Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
ZennyKenny
commited on
fix upload error
Browse files
app.py
CHANGED
@@ -16,51 +16,45 @@ def split_audio(audio_data, sr, chunk_duration=30):
|
|
16 |
|
17 |
def transcribe_long_audio(audio_input, transcriber, chunk_duration=30):
|
18 |
"""Transcribe long audio by splitting into smaller chunks."""
|
19 |
-
|
20 |
-
if isinstance(audio_input, str):
|
21 |
audio_data, sr = librosa.load(audio_input, sr=None)
|
22 |
-
else: # Raw audio data (
|
23 |
audio_data, sr = audio_input
|
24 |
|
25 |
chunks = split_audio(audio_data, sr, chunk_duration)
|
26 |
transcriptions = []
|
27 |
for i, chunk in enumerate(chunks):
|
28 |
temp_path = f"temp_chunk_{i}.wav"
|
29 |
-
sf.write(temp_path, chunk, sr) # Save
|
30 |
transcription = transcriber(temp_path)["text"]
|
31 |
transcriptions.append(transcription)
|
32 |
-
os.remove(temp_path) #
|
33 |
return " ".join(transcriptions)
|
34 |
|
35 |
@spaces.GPU(duration=3)
|
36 |
def main():
|
37 |
-
# Force GPU if available, fallback to CPU
|
38 |
device = 0 if torch.cuda.is_available() else -1
|
39 |
|
40 |
try:
|
41 |
-
# Load models with explicit device
|
42 |
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
|
43 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
44 |
except Exception as e:
|
45 |
print(f"Error loading models: {e}")
|
46 |
raise
|
47 |
|
48 |
-
# Function to process audio
|
49 |
def process_audio(audio_input):
|
50 |
try:
|
51 |
-
# Transcribe the audio (long-form support)
|
52 |
transcription = transcribe_long_audio(audio_input, transcriber, chunk_duration=30)
|
53 |
-
# Summarize the transcription
|
54 |
summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
|
55 |
return transcription, summary
|
56 |
except Exception as e:
|
57 |
return f"Error processing audio: {e}", ""
|
58 |
|
59 |
-
# Gradio Interface with Horizontal Layout
|
60 |
with gr.Blocks() as interface:
|
61 |
with gr.Row():
|
62 |
with gr.Column():
|
63 |
-
|
|
|
64 |
process_button = gr.Button("Process Audio")
|
65 |
with gr.Column():
|
66 |
transcription_output = gr.Textbox(label="Full Transcription", lines=10)
|
@@ -72,9 +66,7 @@ def main():
|
|
72 |
outputs=[transcription_output, summary_output]
|
73 |
)
|
74 |
|
75 |
-
# Launch the interface with optional public sharing
|
76 |
interface.launch(share=True)
|
77 |
|
78 |
-
# Run the main function
|
79 |
if __name__ == "__main__":
|
80 |
main()
|
|
|
16 |
|
17 |
def transcribe_long_audio(audio_input, transcriber, chunk_duration=30):
|
18 |
"""Transcribe long audio by splitting into smaller chunks."""
|
19 |
+
if isinstance(audio_input, str): # File path
|
|
|
20 |
audio_data, sr = librosa.load(audio_input, sr=None)
|
21 |
+
else: # Raw audio data (numpy array)
|
22 |
audio_data, sr = audio_input
|
23 |
|
24 |
chunks = split_audio(audio_data, sr, chunk_duration)
|
25 |
transcriptions = []
|
26 |
for i, chunk in enumerate(chunks):
|
27 |
temp_path = f"temp_chunk_{i}.wav"
|
28 |
+
sf.write(temp_path, chunk, sr) # Save chunk as WAV
|
29 |
transcription = transcriber(temp_path)["text"]
|
30 |
transcriptions.append(transcription)
|
31 |
+
os.remove(temp_path) # Cleanup temp files
|
32 |
return " ".join(transcriptions)
|
33 |
|
34 |
@spaces.GPU(duration=3)
|
35 |
def main():
|
|
|
36 |
device = 0 if torch.cuda.is_available() else -1
|
37 |
|
38 |
try:
|
|
|
39 |
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
|
40 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
41 |
except Exception as e:
|
42 |
print(f"Error loading models: {e}")
|
43 |
raise
|
44 |
|
|
|
45 |
def process_audio(audio_input):
|
46 |
try:
|
|
|
47 |
transcription = transcribe_long_audio(audio_input, transcriber, chunk_duration=30)
|
|
|
48 |
summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
|
49 |
return transcription, summary
|
50 |
except Exception as e:
|
51 |
return f"Error processing audio: {e}", ""
|
52 |
|
|
|
53 |
with gr.Blocks() as interface:
|
54 |
with gr.Row():
|
55 |
with gr.Column():
|
56 |
+
# No 'source' argument; recording enabled by default
|
57 |
+
audio_input = gr.Audio(type="numpy", label="Record or Upload Audio")
|
58 |
process_button = gr.Button("Process Audio")
|
59 |
with gr.Column():
|
60 |
transcription_output = gr.Textbox(label="Full Transcription", lines=10)
|
|
|
66 |
outputs=[transcription_output, summary_output]
|
67 |
)
|
68 |
|
|
|
69 |
interface.launch(share=True)
|
70 |
|
|
|
71 |
if __name__ == "__main__":
|
72 |
main()
|