ZennyKenny commited on
Commit
875dc71
·
verified ·
1 Parent(s): 9036a70

handle audio input

Browse files
Files changed (1) hide show
  1. app.py +11 -17
app.py CHANGED
@@ -19,20 +19,25 @@ def transcribe_long_audio(audio_input, transcriber, chunk_duration=30):
19
  try:
20
  if isinstance(audio_input, tuple): # Recorded audio
21
  audio_data, sr = audio_input # Unpack raw audio data and sample rate
 
 
22
  elif isinstance(audio_input, str): # Uploaded file path
23
- audio_data, sr = librosa.load(audio_input, sr=None)
24
  else:
25
  raise ValueError("Unsupported audio input format.")
26
 
27
- # Split the audio into chunks
 
28
  chunks = split_audio(audio_data, sr, chunk_duration)
29
  transcriptions = []
30
  for i, chunk in enumerate(chunks):
31
- temp_path = f"temp_chunk_{i}.wav"
32
- sf.write(temp_path, chunk, sr) # Save chunk as WAV
33
- transcription = transcriber(temp_path)["text"]
34
  transcriptions.append(transcription)
35
- os.remove(temp_path) # Cleanup temp files
 
 
36
  return " ".join(transcriptions)
37
  except Exception as e:
38
  return f"Error processing audio: {e}"
@@ -62,27 +67,16 @@ def main():
62
  # Enable recording or file upload
63
  audio_input = gr.Audio(type="numpy", label="Record or Upload Audio")
64
  process_button = gr.Button("Process Audio")
65
- stop_button = gr.Button("Stop Recording") # Add Stop Button
66
  with gr.Column():
67
  transcription_output = gr.Textbox(label="Full Transcription", lines=10)
68
  summary_output = gr.Textbox(label="Summary", lines=5)
69
 
70
- def stop_microphone():
71
- """Dummy function to simulate stopping the microphone."""
72
- return "Recording stopped."
73
-
74
  process_button.click(
75
  process_audio,
76
  inputs=[audio_input],
77
  outputs=[transcription_output, summary_output]
78
  )
79
 
80
- stop_button.click(
81
- stop_microphone,
82
- inputs=[],
83
- outputs=[],
84
- )
85
-
86
  interface.launch(share=True)
87
 
88
  if __name__ == "__main__":
 
19
  try:
20
  if isinstance(audio_input, tuple): # Recorded audio
21
  audio_data, sr = audio_input # Unpack raw audio data and sample rate
22
+ temp_path = "recorded_audio.wav"
23
+ sf.write(temp_path, audio_data, sr) # Save recorded audio as a temporary file
24
  elif isinstance(audio_input, str): # Uploaded file path
25
+ temp_path = audio_input # Use the file path directly
26
  else:
27
  raise ValueError("Unsupported audio input format.")
28
 
29
+ # Process the audio file (recorded or uploaded)
30
+ audio_data, sr = librosa.load(temp_path, sr=None)
31
  chunks = split_audio(audio_data, sr, chunk_duration)
32
  transcriptions = []
33
  for i, chunk in enumerate(chunks):
34
+ chunk_path = f"temp_chunk_{i}.wav"
35
+ sf.write(chunk_path, chunk, sr) # Save chunk as WAV
36
+ transcription = transcriber(chunk_path)["text"]
37
  transcriptions.append(transcription)
38
+ os.remove(chunk_path) # Cleanup temp files
39
+ if temp_path == "recorded_audio.wav":
40
+ os.remove(temp_path) # Remove the temporary recorded audio file
41
  return " ".join(transcriptions)
42
  except Exception as e:
43
  return f"Error processing audio: {e}"
 
67
  # Enable recording or file upload
68
  audio_input = gr.Audio(type="numpy", label="Record or Upload Audio")
69
  process_button = gr.Button("Process Audio")
 
70
  with gr.Column():
71
  transcription_output = gr.Textbox(label="Full Transcription", lines=10)
72
  summary_output = gr.Textbox(label="Summary", lines=5)
73
 
 
 
 
 
74
  process_button.click(
75
  process_audio,
76
  inputs=[audio_input],
77
  outputs=[transcription_output, summary_output]
78
  )
79
 
 
 
 
 
 
 
80
  interface.launch(share=True)
81
 
82
  if __name__ == "__main__":