import os
from typing import AnyStr
import nltk
import streamlit as st
import validators
from transformers import pipeline
from validators import ValidationFailure
from Summarizer import Summarizer
def main() -> None:
nltk.download('punkt')
st.markdown('# Terms & Conditions Summarizer :pencil:')
st.markdown('Do you also always take the time out of your day to thoroughly read every word of the Terms & Conditions before signing up to an app like the responsible citizen that you are? :thinking_face:
'
'No?
'
"Well don't worry, neither do we! That's why we created a Terms & Conditions Summarization algorithm!", unsafe_allow_html=True)
st.markdown('Just copy-paste that pesky Terms & Conditions text or provide a URL to the text and let our fancy NLP algorithm do the rest!
'
'You will see both an extractive summary (the most important sentences will be highlighted) and an abstractive summary (an actual summary)
'
'Now you can just take a quick glanse at the summary and go about the rest of your day assured that no one is abusing your precious personal data :books:', unsafe_allow_html=True)
st.markdown('Want to find out more? :brain:
'
'For details about the extractive part :point_right: https://en.wikipedia.org/wiki/Latent_semantic_analysis
'
'For details about the abstractive part :point_right: https://huggingface.co/ml6team/distilbart-tos-summarizer-tosdr', unsafe_allow_html=True)
@st.cache(allow_output_mutation=True,
suppress_st_warning=True,
show_spinner=True)
def create_pipeline():
with st.spinner('Please wait for the model to load...'):
terms_and_conditions_pipeline = pipeline(
task='summarization',
model='ml6team/distilbart-tos-summarizer-tosdr',
tokenizer='ml6team/distilbart-tos-summarizer-tosdr'
)
return terms_and_conditions_pipeline
def display_abstractive_summary(summary_sentences: list) -> None:
st.subheader("Abstractive Summary")
st.markdown('#####')
for sentence in summary_sentences:
st.markdown(f"- {sentence}", unsafe_allow_html=True)
def display_extractive_summary(terms_and_conditions_text: str, summary_sentences: list) -> None:
st.subheader("Extractive Summary")
st.markdown('#####')
replaced_text = terms_and_conditions_text
for sentence in summary_sentences:
replaced_text = replaced_text.replace(sentence,
f"{sentence}")
replaced_text = replaced_text.replace('\n', '
')
with st.container():
st.write(replaced_text, unsafe_allow_html=True)
def is_valid_url(url: str) -> bool:
result = validators.url(url)
if isinstance(result, ValidationFailure):
return False
return True
def list_all_filenames() -> list:
filenames = []
for file in os.listdir('./sample-terms-and-conditions/'):
if file.endswith('.txt'):
filenames.append(file.replace('.txt', ''))
return filenames
def fetch_file_contents(filename: str) -> AnyStr:
with open(f'./sample-terms-and-conditions/{filename.lower()}.txt', 'r') as f:
data = f.read()
return data
summarizer: Summarizer = Summarizer(create_pipeline())
if 'tc_text' not in st.session_state:
st.session_state['tc_text'] = ''
if 'sentences_length' not in st.session_state:
st.session_state['sentences_length'] = Summarizer.DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH
if 'sample_choice' not in st.session_state:
st.session_state['sample_choice'] = ''
st.write('', unsafe_allow_html=True)
st.header("Input")
sentences_length = st.number_input(
label='Number of sentences to be extracted:',
min_value=5,
max_value=15,
value=st.session_state.sentences_length
)
sample_choice = st.selectbox(
'Choose a sample terms & conditions:',
list_all_filenames())
st.session_state.tc_text = fetch_file_contents(sample_choice)
tc_text_input = st.text_area(
value=st.session_state.tc_text,
label='Terms & conditions content or specify an URL:',
height=240
)
summarize_button = st.button(label='Summarize')
if summarize_button:
with st.spinner('Summarizing the text is in progress...'):
if is_valid_url(tc_text_input):
extract_summary_sentences = summarizer.extractive_summary_from_url(tc_text_input, sentences_length)
else:
extract_summary_sentences = summarizer.extractive_summary_from_text(tc_text_input, sentences_length)
abstract_summary_list = summarizer.abstractive_summary(extract_summary_sentences)
display_abstractive_summary(abstract_summary_list)
display_extractive_summary(tc_text_input, extract_summary_sentences)
if __name__ == "__main__":
main()