Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
ZennyKenny
commited on
support for ad hoc recording
Browse files
app.py
CHANGED
@@ -2,27 +2,34 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
from transformers import pipeline
|
4 |
import librosa
|
5 |
-
import soundfile as sf
|
6 |
import spaces
|
|
|
7 |
|
8 |
-
def split_audio(
|
9 |
"""Split audio into chunks of chunk_duration seconds."""
|
10 |
-
audio, sr = librosa.load(audio_path, sr=None)
|
11 |
chunks = []
|
12 |
-
for start in range(0, len(
|
13 |
end = start + int(chunk_duration * sr)
|
14 |
-
chunks.append(
|
15 |
-
return chunks
|
16 |
|
17 |
-
def transcribe_long_audio(
|
18 |
"""Transcribe long audio by splitting into smaller chunks."""
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
transcriptions = []
|
21 |
for i, chunk in enumerate(chunks):
|
22 |
temp_path = f"temp_chunk_{i}.wav"
|
23 |
-
sf.write(temp_path, chunk, sr) # Save the chunk
|
24 |
transcription = transcriber(temp_path)["text"]
|
25 |
transcriptions.append(transcription)
|
|
|
26 |
return " ".join(transcriptions)
|
27 |
|
28 |
@spaces.GPU(duration=3)
|
@@ -39,10 +46,10 @@ def main():
|
|
39 |
raise
|
40 |
|
41 |
# Function to process audio
|
42 |
-
def process_audio(
|
43 |
try:
|
44 |
# Transcribe the audio (long-form support)
|
45 |
-
transcription = transcribe_long_audio(
|
46 |
# Summarize the transcription
|
47 |
summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
|
48 |
return transcription, summary
|
@@ -53,7 +60,7 @@ def main():
|
|
53 |
with gr.Blocks() as interface:
|
54 |
with gr.Row():
|
55 |
with gr.Column():
|
56 |
-
audio_input = gr.Audio(type="
|
57 |
process_button = gr.Button("Process Audio")
|
58 |
with gr.Column():
|
59 |
transcription_output = gr.Textbox(label="Full Transcription", lines=10)
|
|
|
2 |
import torch
|
3 |
from transformers import pipeline
|
4 |
import librosa
|
5 |
+
import soundfile as sf
|
6 |
import spaces
|
7 |
+
import os
|
8 |
|
9 |
+
def split_audio(audio_data, sr, chunk_duration=30):
|
10 |
"""Split audio into chunks of chunk_duration seconds."""
|
|
|
11 |
chunks = []
|
12 |
+
for start in range(0, len(audio_data), int(chunk_duration * sr)):
|
13 |
end = start + int(chunk_duration * sr)
|
14 |
+
chunks.append(audio_data[start:end])
|
15 |
+
return chunks
|
16 |
|
17 |
+
def transcribe_long_audio(audio_input, transcriber, chunk_duration=30):
|
18 |
"""Transcribe long audio by splitting into smaller chunks."""
|
19 |
+
# Check if audio_input is a file path or raw data
|
20 |
+
if isinstance(audio_input, str):
|
21 |
+
audio_data, sr = librosa.load(audio_input, sr=None)
|
22 |
+
else: # Raw audio data (e.g., from recording)
|
23 |
+
audio_data, sr = audio_input
|
24 |
+
|
25 |
+
chunks = split_audio(audio_data, sr, chunk_duration)
|
26 |
transcriptions = []
|
27 |
for i, chunk in enumerate(chunks):
|
28 |
temp_path = f"temp_chunk_{i}.wav"
|
29 |
+
sf.write(temp_path, chunk, sr) # Save the chunk as a WAV file
|
30 |
transcription = transcriber(temp_path)["text"]
|
31 |
transcriptions.append(transcription)
|
32 |
+
os.remove(temp_path) # Clean up temporary files
|
33 |
return " ".join(transcriptions)
|
34 |
|
35 |
@spaces.GPU(duration=3)
|
|
|
46 |
raise
|
47 |
|
48 |
# Function to process audio
|
49 |
+
def process_audio(audio_input):
|
50 |
try:
|
51 |
# Transcribe the audio (long-form support)
|
52 |
+
transcription = transcribe_long_audio(audio_input, transcriber, chunk_duration=30)
|
53 |
# Summarize the transcription
|
54 |
summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
|
55 |
return transcription, summary
|
|
|
60 |
with gr.Blocks() as interface:
|
61 |
with gr.Row():
|
62 |
with gr.Column():
|
63 |
+
audio_input = gr.Audio(source="microphone", type="numpy", label="Record or Upload Audio")
|
64 |
process_button = gr.Button("Process Audio")
|
65 |
with gr.Column():
|
66 |
transcription_output = gr.Textbox(label="Full Transcription", lines=10)
|