d0r1h commited on
Commit
fd4c3b5
·
1 Parent(s): a888910

add new app

Browse files
Files changed (3) hide show
  1. app.py +16 -0
  2. requirements.txt +4 -0
  3. summarize.py +36 -0
app.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from summarize import Summarizer
3
+
4
+ interface = gr.Interface(fn = Summarizer,
5
+ inputs = [gr.inputs.Textbox(lines=2,
6
+ placeholder="Enter your text...",
7
+ label='YouTube Video Link'),
8
+ gr.inputs.Radio(["mT5", "BART", "Pegasus"], type="value", label='Model')],
9
+ outputs = [gr.outputs.Textbox(
10
+ label="Sar")],
11
+
12
+ title = "Youtube Summarizer",
13
+ examples = [['https://www.youtube.com/watch?v=A4OmtyaBHFE', 'mT5'],
14
+ ['https://www.youtube.com/watch?v=cU6xVZfkcgo', 'mT5']])
15
+
16
+ interface.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ sentencepiece
4
+ youtube-transcript-api
summarize.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from youtube_transcript_api import YouTubeTranscriptApi
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+
4
+ def Summarizer(link, model):
5
+
6
+ video_id = link.split("=")[1]
7
+
8
+ try:
9
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
10
+ FinalTranscript = ' '.join([i['text'] for i in transcript])
11
+
12
+ if model == "Pegasus":
13
+ checkpoint = "google/pegasus-large"
14
+ elif model == "mT5":
15
+ checkpoint = "csebuetnlp/mT5_multilingual_XLSum"
16
+ elif model == "BART":
17
+ checkpoint = "sshleifer/distilbart-cnn-12-6"
18
+
19
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
20
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
21
+
22
+
23
+ inputs = tokenizer(FinalTranscript,
24
+ max_length=1024,
25
+ truncation=True,
26
+ return_tensors="pt")
27
+
28
+ summary_ids = model.generate(inputs["input_ids"])
29
+ summary = tokenizer.batch_decode(summary_ids,
30
+ skip_special_tokens=True,
31
+ clean_up_tokenization_spaces=False)
32
+
33
+
34
+ return summary[0]
35
+ except Exception as e:
36
+ return "TranscriptsDisabled: Transcript is not available \nTry another video"