Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 3,167 Bytes
bd97be7 9fe4dba b40af2a d07525d e88a1f3 010e9c1 82b4010 e88a1f3 8057378 e88a1f3 8057378 e88a1f3 8057378 82b4010 8057378 190e895 82b4010 190e895 875dc71 190e895 875dc71 190e895 db3a36a 190e895 8057378 525ee37 5f36451 d88ec40 525ee37 d88ec40 525ee37 e88a1f3 525ee37 e4afaf8 525ee37 82b4010 e4afaf8 525ee37 db3a36a 82b4010 781e9f1 525ee37 e4afaf8 4d0d51c 525ee37 4d0d51c 525ee37 b40af2a 525ee37 710b2f5 781e9f1 55aeb9b 6befe57 525ee37 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import gradio as gr
import torch
from transformers import pipeline
import librosa
import soundfile as sf
import os
import spaces # Ensure spaces is imported
def split_audio(audio_data, sr, chunk_duration=30):
"""Split audio into chunks of chunk_duration seconds."""
chunks = []
for start in range(0, len(audio_data), int(chunk_duration * sr)):
end = start + int(chunk_duration * sr)
chunks.append(audio_data[start:end])
return chunks
def transcribe_long_audio(audio_path, transcriber, chunk_duration=30):
"""Transcribe long audio by splitting into smaller chunks."""
try:
# Load the audio file
audio_data, sr = librosa.load(audio_path, sr=None)
chunks = split_audio(audio_data, sr, chunk_duration)
transcriptions = []
for i, chunk in enumerate(chunks):
chunk_path = f"temp_chunk_{i}.wav"
sf.write(chunk_path, chunk, sr) # Save chunk as WAV
transcription = transcriber(chunk_path)["text"]
transcriptions.append(transcription)
os.remove(chunk_path) # Cleanup temp files
return " ".join(transcriptions)
except Exception as e:
print(f"Error in transcribe_long_audio: {e}")
return f"Error processing audio: {e}"
@spaces.GPU(duration=3)
def main():
device = 0 if torch.cuda.is_available() else -1
try:
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
except Exception as e:
print(f"Error loading models: {e}")
raise
def process_audio(audio_input):
try:
print(f"Processing uploaded audio: {audio_input}")
if not isinstance(audio_input, str):
raise ValueError("Invalid input type. Please upload a valid audio file.")
if os.path.isdir(audio_input):
raise ValueError("Input is a directory, not a file.")
# Transcribe the uploaded audio file
transcription = transcribe_long_audio(audio_input, transcriber, chunk_duration=30)
summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
return transcription, summary, audio_input
except Exception as e:
print(f"Error in process_audio: {e}")
return f"Error processing audio: {e}", "", ""
with gr.Blocks() as interface:
with gr.Row():
with gr.Column():
# Only support file uploads
audio_input = gr.Audio(type="filepath", label="Upload Audio File")
process_button = gr.Button("Transcribe Audio")
with gr.Column():
transcription_output = gr.Textbox(label="Transcription", lines=10)
summary_output = gr.Textbox(label="Summary", lines=5)
process_button.click(
process_audio,
inputs=[audio_input],
outputs=[transcription_output, summary_output]
)
interface.launch(share=False)
if __name__ == "__main__":
main()
|