Ruben Wolhandler
number of annotations
e4b7ac0
import streamlit as st
import random
import copy
N = 10
state = st.session_state
generated_path = 'generated_predictions.txt'
ORI_RES = f'DUC/results/{generated_path}'
# CONTEXT_SENT_0_h_0_RES = f'/home/nlp/wolhanr/mds_faithfull/data/DUC/output_dir/sent_window_0_h_0_clusters/{generated_path}'
CONTEXT_SENT_0_h_1_RES = f'DUC/output_dir/sent_window_0_clusters/{generated_path}'
CONTEXT_SENT_1_h_1_RES = f'DUC/output_dir/sent_window_1_h_1_clusters/{generated_path}'
source_path = 'DUC/sent_window_1_h_1_clusters/test.source'
OPTIONS = ["faithfull", "Not faithfull"]
Annotators = ['Ruben', 'Arie']
annotators_dic = {'Ruben':10, 'Arie':60}
if "number_button_fill" not in state:
state.number_button_fill = 0
def annotate(annotator):
state.annotator = annotator
if "annotator" not in state:
c = st.columns(len(Annotators))
for idx, option in enumerate(Annotators):
c[idx].button(f"{option}", on_click=annotate, args=(option,))
if "annotations" not in state and "annotator" in state:
state.annotations = {}
state.annotations['data_id', 'index', 'output', 'model', 'is_faithfull'] = ''
f_0 = open(source_path)
source_ = f_0.read().split('\n')
source = source_[:N] + source_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50]
f_1 = open(ORI_RES)
ori_res_ = f_1.read().split('\n')
ori_res = ori_res_[:N] + ori_res_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50]
# f_2 = open(CONTEXT_SENT_0_h_0_RES)
f_3 = open(CONTEXT_SENT_0_h_1_RES)
sent_0_h_1_ = f_3.read().split('\n')
sent_0_h_1 = sent_0_h_1_[:N] + sent_0_h_1_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50]
f_4 = open(CONTEXT_SENT_1_h_1_RES)
sent_1_h_1_ = f_4.read().split('\n')
sent_1_h_1 = sent_1_h_1_[:N] + sent_1_h_1_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50]
state.files = list(zip(source, ori_res, sent_0_h_1, sent_1_h_1))
state.current_file = state.files[0]
state.counter = 0
state.submit = 0
def submit(index_0, index_1, index_2):
x = [(index_0, state.a),(index_1, state.b), (index_2, state.c)]
x = sorted(x, key=lambda x: x[0])
if state.submit == 0:
state.annotations[state.counter, x[0][0], state.current_file[1], 'ori_res', x[0][1]] = ''
state.annotations[state.counter, x[1][0], state.current_file[2], 'sent_0_h_1', x[1][1]] = ''
state.annotations[state.counter, x[2][0], state.current_file[3], 'sent_1_h_1', x[2][1]] = ''
state.submit = 1
if state.submit == 1:
state.files.remove(state.current_file)
random.shuffle(state.indexes)
state.current_file = state.files[0]
state.counter += 1
state.submit = 0
if 'files' in state and "annotator" in state:
st.header("Dataset annotation")
st.header(state.annotator)
selected_file = state.current_file
# source_file = selected_file[0]
# ori_file = selected_file[1]
# sent_0_h_1_file = selected_file[2]
# sent_1_h_1_file = selected_file[3]
st.write(f"Source file: {selected_file[0]}")
if 'indexes' not in state:
state.indexes = [1, 2, 3]
random.shuffle(state.indexes)
st.write('-'*50)
st.write(f"Output: {selected_file[state.indexes[0]]}")
# c = st.columns(len(OPTIONS))
state.a = st.radio('a', OPTIONS, key=f'{0}')
st.write('-'*50)
st.write(f"Output: {selected_file[state.indexes[1]]}")
state.b = st.radio('b', OPTIONS, key=f'{1}')
st.write('-'*50)
st.write(f"Output: {selected_file[state.indexes[2]]}")
state.c = st.radio('c', OPTIONS, key=f'{2}')
st.write('-'*50)
st.button('Submit', on_click=submit, args=(copy.deepcopy(state.indexes)))
else:
st.info("Everything annotated.")
if 'annotations' in state and 'files' in state:
st.info(f"Annotated: {(len(state.annotations)-1)/3}, Remaining: {len(state.files)}")
st.download_button(
"Download annotations as CSV",
"\n".join([f"{k}\t{v}" for k, v in state.annotations.items()]),
file_name="annotations_faithfull.csv",
)