ZennyKenny commited on
Commit
e22e17f
·
verified ·
1 Parent(s): e88a1f3

fix upload error

Browse files
Files changed (1) hide show
  1. app.py +6 -14
app.py CHANGED
@@ -16,51 +16,45 @@ def split_audio(audio_data, sr, chunk_duration=30):
16
 
17
  def transcribe_long_audio(audio_input, transcriber, chunk_duration=30):
18
  """Transcribe long audio by splitting into smaller chunks."""
19
- # Check if audio_input is a file path or raw data
20
- if isinstance(audio_input, str):
21
  audio_data, sr = librosa.load(audio_input, sr=None)
22
- else: # Raw audio data (e.g., from recording)
23
  audio_data, sr = audio_input
24
 
25
  chunks = split_audio(audio_data, sr, chunk_duration)
26
  transcriptions = []
27
  for i, chunk in enumerate(chunks):
28
  temp_path = f"temp_chunk_{i}.wav"
29
- sf.write(temp_path, chunk, sr) # Save the chunk as a WAV file
30
  transcription = transcriber(temp_path)["text"]
31
  transcriptions.append(transcription)
32
- os.remove(temp_path) # Clean up temporary files
33
  return " ".join(transcriptions)
34
 
35
  @spaces.GPU(duration=3)
36
  def main():
37
- # Force GPU if available, fallback to CPU
38
  device = 0 if torch.cuda.is_available() else -1
39
 
40
  try:
41
- # Load models with explicit device
42
  transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
43
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
44
  except Exception as e:
45
  print(f"Error loading models: {e}")
46
  raise
47
 
48
- # Function to process audio
49
  def process_audio(audio_input):
50
  try:
51
- # Transcribe the audio (long-form support)
52
  transcription = transcribe_long_audio(audio_input, transcriber, chunk_duration=30)
53
- # Summarize the transcription
54
  summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
55
  return transcription, summary
56
  except Exception as e:
57
  return f"Error processing audio: {e}", ""
58
 
59
- # Gradio Interface with Horizontal Layout
60
  with gr.Blocks() as interface:
61
  with gr.Row():
62
  with gr.Column():
63
- audio_input = gr.Audio(source="microphone", type="numpy", label="Record or Upload Audio")
 
64
  process_button = gr.Button("Process Audio")
65
  with gr.Column():
66
  transcription_output = gr.Textbox(label="Full Transcription", lines=10)
@@ -72,9 +66,7 @@ def main():
72
  outputs=[transcription_output, summary_output]
73
  )
74
 
75
- # Launch the interface with optional public sharing
76
  interface.launch(share=True)
77
 
78
- # Run the main function
79
  if __name__ == "__main__":
80
  main()
 
16
 
17
  def transcribe_long_audio(audio_input, transcriber, chunk_duration=30):
18
  """Transcribe long audio by splitting into smaller chunks."""
19
+ if isinstance(audio_input, str): # File path
 
20
  audio_data, sr = librosa.load(audio_input, sr=None)
21
+ else: # Raw audio data (numpy array)
22
  audio_data, sr = audio_input
23
 
24
  chunks = split_audio(audio_data, sr, chunk_duration)
25
  transcriptions = []
26
  for i, chunk in enumerate(chunks):
27
  temp_path = f"temp_chunk_{i}.wav"
28
+ sf.write(temp_path, chunk, sr) # Save chunk as WAV
29
  transcription = transcriber(temp_path)["text"]
30
  transcriptions.append(transcription)
31
+ os.remove(temp_path) # Cleanup temp files
32
  return " ".join(transcriptions)
33
 
34
  @spaces.GPU(duration=3)
35
  def main():
 
36
  device = 0 if torch.cuda.is_available() else -1
37
 
38
  try:
 
39
  transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
40
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
41
  except Exception as e:
42
  print(f"Error loading models: {e}")
43
  raise
44
 
 
45
  def process_audio(audio_input):
46
  try:
 
47
  transcription = transcribe_long_audio(audio_input, transcriber, chunk_duration=30)
 
48
  summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
49
  return transcription, summary
50
  except Exception as e:
51
  return f"Error processing audio: {e}", ""
52
 
 
53
  with gr.Blocks() as interface:
54
  with gr.Row():
55
  with gr.Column():
56
+ # No 'source' argument; recording enabled by default
57
+ audio_input = gr.Audio(type="numpy", label="Record or Upload Audio")
58
  process_button = gr.Button("Process Audio")
59
  with gr.Column():
60
  transcription_output = gr.Textbox(label="Full Transcription", lines=10)
 
66
  outputs=[transcription_output, summary_output]
67
  )
68
 
 
69
  interface.launch(share=True)
70
 
 
71
  if __name__ == "__main__":
72
  main()