Spaces:

rubenwol
/

faithfull_annotation

Runtime error

Ruben Wolhandler

number of annotations

e4b7ac0 about 2 years ago

4.1 kB

	import streamlit as st
	import random
	import copy

	N = 10
	state = st.session_state
	generated_path = 'generated_predictions.txt'

	ORI_RES = f'DUC/results/{generated_path}'
	# CONTEXT_SENT_0_h_0_RES = f'/home/nlp/wolhanr/mds_faithfull/data/DUC/output_dir/sent_window_0_h_0_clusters/{generated_path}'
	CONTEXT_SENT_0_h_1_RES = f'DUC/output_dir/sent_window_0_clusters/{generated_path}'
	CONTEXT_SENT_1_h_1_RES = f'DUC/output_dir/sent_window_1_h_1_clusters/{generated_path}'

	source_path = 'DUC/sent_window_1_h_1_clusters/test.source'

	OPTIONS = ["faithfull", "Not faithfull"]
	Annotators = ['Ruben', 'Arie']
	annotators_dic = {'Ruben':10, 'Arie':60}

	if "number_button_fill" not in state:
	state.number_button_fill = 0

	def annotate(annotator):
	state.annotator = annotator

	if "annotator" not in state:
	c = st.columns(len(Annotators))
	for idx, option in enumerate(Annotators):
	c[idx].button(f"{option}", on_click=annotate, args=(option,))

	if "annotations" not in state and "annotator" in state:
	state.annotations = {}
	state.annotations['data_id', 'index', 'output', 'model', 'is_faithfull'] = ''
	f_0 = open(source_path)
	source_ = f_0.read().split('\n')
	source = source_[:N] + source_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50]
	f_1 = open(ORI_RES)
	ori_res_ = f_1.read().split('\n')
	ori_res = ori_res_[:N] + ori_res_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50]
	# f_2 = open(CONTEXT_SENT_0_h_0_RES)
	f_3 = open(CONTEXT_SENT_0_h_1_RES)
	sent_0_h_1_ = f_3.read().split('\n')
	sent_0_h_1 = sent_0_h_1_[:N] + sent_0_h_1_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50]
	f_4 = open(CONTEXT_SENT_1_h_1_RES)
	sent_1_h_1_ = f_4.read().split('\n')
	sent_1_h_1 = sent_1_h_1_[:N] + sent_1_h_1_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50]
	state.files = list(zip(source, ori_res, sent_0_h_1, sent_1_h_1))
	state.current_file = state.files[0]
	state.counter = 0
	state.submit = 0


	def submit(index_0, index_1, index_2):
	x = [(index_0, state.a),(index_1, state.b), (index_2, state.c)]
	x = sorted(x, key=lambda x: x[0])
	if state.submit == 0:
	state.annotations[state.counter, x[0][0], state.current_file[1], 'ori_res', x[0][1]] = ''
	state.annotations[state.counter, x[1][0], state.current_file[2], 'sent_0_h_1', x[1][1]] = ''
	state.annotations[state.counter, x[2][0], state.current_file[3], 'sent_1_h_1', x[2][1]] = ''
	state.submit = 1
	if state.submit == 1:
	state.files.remove(state.current_file)
	random.shuffle(state.indexes)
	state.current_file = state.files[0]
	state.counter += 1
	state.submit = 0


	if 'files' in state and "annotator" in state:
	st.header("Dataset annotation")
	st.header(state.annotator)
	selected_file = state.current_file
	# source_file = selected_file[0]
	# ori_file = selected_file[1]
	# sent_0_h_1_file = selected_file[2]
	# sent_1_h_1_file = selected_file[3]

	st.write(f"Source file: {selected_file[0]}")

	if 'indexes' not in state:
	state.indexes = [1, 2, 3]
	random.shuffle(state.indexes)

	st.write('-'*50)
	st.write(f"Output: {selected_file[state.indexes[0]]}")
	# c = st.columns(len(OPTIONS))
	state.a = st.radio('a', OPTIONS, key=f'{0}')
	st.write('-'*50)
	st.write(f"Output: {selected_file[state.indexes[1]]}")
	state.b = st.radio('b', OPTIONS, key=f'{1}')
	st.write('-'*50)
	st.write(f"Output: {selected_file[state.indexes[2]]}")
	state.c = st.radio('c', OPTIONS, key=f'{2}')
	st.write('-'*50)


	st.button('Submit', on_click=submit, args=(copy.deepcopy(state.indexes)))




	else:
	st.info("Everything annotated.")
	if 'annotations' in state and 'files' in state:
	st.info(f"Annotated: {(len(state.annotations)-1)/3}, Remaining: {len(state.files)}")

	st.download_button(
	"Download annotations as CSV",
	"\n".join([f"{k}\t{v}" for k, v in state.annotations.items()]),
	file_name="annotations_faithfull.csv",
	)