ZennyKenny commited on
Commit
190e895
·
verified ·
1 Parent(s): e22e17f

close mic so browser doesn't crash

Browse files
Files changed (1) hide show
  1. app.py +30 -14
app.py CHANGED
@@ -16,20 +16,25 @@ def split_audio(audio_data, sr, chunk_duration=30):
16
 
17
  def transcribe_long_audio(audio_input, transcriber, chunk_duration=30):
18
  """Transcribe long audio by splitting into smaller chunks."""
19
- if isinstance(audio_input, str): # File path
20
- audio_data, sr = librosa.load(audio_input, sr=None)
21
- else: # Raw audio data (numpy array)
22
- audio_data, sr = audio_input
 
 
 
23
 
24
- chunks = split_audio(audio_data, sr, chunk_duration)
25
- transcriptions = []
26
- for i, chunk in enumerate(chunks):
27
- temp_path = f"temp_chunk_{i}.wav"
28
- sf.write(temp_path, chunk, sr) # Save chunk as WAV
29
- transcription = transcriber(temp_path)["text"]
30
- transcriptions.append(transcription)
31
- os.remove(temp_path) # Cleanup temp files
32
- return " ".join(transcriptions)
 
 
33
 
34
  @spaces.GPU(duration=3)
35
  def main():
@@ -53,19 +58,30 @@ def main():
53
  with gr.Blocks() as interface:
54
  with gr.Row():
55
  with gr.Column():
56
- # No 'source' argument; recording enabled by default
57
  audio_input = gr.Audio(type="numpy", label="Record or Upload Audio")
58
  process_button = gr.Button("Process Audio")
 
59
  with gr.Column():
60
  transcription_output = gr.Textbox(label="Full Transcription", lines=10)
61
  summary_output = gr.Textbox(label="Summary", lines=5)
62
 
 
 
 
 
63
  process_button.click(
64
  process_audio,
65
  inputs=[audio_input],
66
  outputs=[transcription_output, summary_output]
67
  )
68
 
 
 
 
 
 
 
69
  interface.launch(share=True)
70
 
71
  if __name__ == "__main__":
 
16
 
17
  def transcribe_long_audio(audio_input, transcriber, chunk_duration=30):
18
  """Transcribe long audio by splitting into smaller chunks."""
19
+ try:
20
+ if isinstance(audio_input, tuple): # Recorded audio
21
+ audio_data, sr = audio_input
22
+ elif isinstance(audio_input, str): # Uploaded file path
23
+ audio_data, sr = librosa.load(audio_input, sr=None)
24
+ else:
25
+ raise ValueError("Unsupported audio input format.")
26
 
27
+ chunks = split_audio(audio_data, sr, chunk_duration)
28
+ transcriptions = []
29
+ for i, chunk in enumerate(chunks):
30
+ temp_path = f"temp_chunk_{i}.wav"
31
+ sf.write(temp_path, chunk, sr) # Save chunk as WAV
32
+ transcription = transcriber(temp_path)["text"]
33
+ transcriptions.append(transcription)
34
+ os.remove(temp_path) # Cleanup temp files
35
+ return " ".join(transcriptions)
36
+ except Exception as e:
37
+ return f"Error processing audio: {e}"
38
 
39
  @spaces.GPU(duration=3)
40
  def main():
 
58
  with gr.Blocks() as interface:
59
  with gr.Row():
60
  with gr.Column():
61
+ # Enable recording or file upload
62
  audio_input = gr.Audio(type="numpy", label="Record or Upload Audio")
63
  process_button = gr.Button("Process Audio")
64
+ stop_button = gr.Button("Stop Recording") # Add Stop Button
65
  with gr.Column():
66
  transcription_output = gr.Textbox(label="Full Transcription", lines=10)
67
  summary_output = gr.Textbox(label="Summary", lines=5)
68
 
69
+ def stop_microphone():
70
+ """Dummy function to simulate stopping the microphone."""
71
+ return "Recording stopped."
72
+
73
  process_button.click(
74
  process_audio,
75
  inputs=[audio_input],
76
  outputs=[transcription_output, summary_output]
77
  )
78
 
79
+ stop_button.click(
80
+ stop_microphone,
81
+ inputs=[],
82
+ outputs=[],
83
+ )
84
+
85
  interface.launch(share=True)
86
 
87
  if __name__ == "__main__":