ZennyKenny commited on
Commit
e88a1f3
·
verified ·
1 Parent(s): 0a8f055

support for ad hoc recording

Browse files
Files changed (1) hide show
  1. app.py +19 -12
app.py CHANGED
@@ -2,27 +2,34 @@ import gradio as gr
2
  import torch
3
  from transformers import pipeline
4
  import librosa
5
- import soundfile as sf
6
  import spaces
 
7
 
8
- def split_audio(audio_path, chunk_duration=30):
9
  """Split audio into chunks of chunk_duration seconds."""
10
- audio, sr = librosa.load(audio_path, sr=None)
11
  chunks = []
12
- for start in range(0, len(audio), int(chunk_duration * sr)):
13
  end = start + int(chunk_duration * sr)
14
- chunks.append(audio[start:end])
15
- return chunks, sr
16
 
17
- def transcribe_long_audio(audio_path, transcriber, chunk_duration=30):
18
  """Transcribe long audio by splitting into smaller chunks."""
19
- chunks, sr = split_audio(audio_path, chunk_duration)
 
 
 
 
 
 
20
  transcriptions = []
21
  for i, chunk in enumerate(chunks):
22
  temp_path = f"temp_chunk_{i}.wav"
23
- sf.write(temp_path, chunk, sr) # Save the chunk using soundfile
24
  transcription = transcriber(temp_path)["text"]
25
  transcriptions.append(transcription)
 
26
  return " ".join(transcriptions)
27
 
28
  @spaces.GPU(duration=3)
@@ -39,10 +46,10 @@ def main():
39
  raise
40
 
41
  # Function to process audio
42
- def process_audio(audio_file):
43
  try:
44
  # Transcribe the audio (long-form support)
45
- transcription = transcribe_long_audio(audio_file, transcriber, chunk_duration=30)
46
  # Summarize the transcription
47
  summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
48
  return transcription, summary
@@ -53,7 +60,7 @@ def main():
53
  with gr.Blocks() as interface:
54
  with gr.Row():
55
  with gr.Column():
56
- audio_input = gr.Audio(type="filepath", label="Upload Audio File")
57
  process_button = gr.Button("Process Audio")
58
  with gr.Column():
59
  transcription_output = gr.Textbox(label="Full Transcription", lines=10)
 
2
  import torch
3
  from transformers import pipeline
4
  import librosa
5
+ import soundfile as sf
6
  import spaces
7
+ import os
8
 
9
+ def split_audio(audio_data, sr, chunk_duration=30):
10
  """Split audio into chunks of chunk_duration seconds."""
 
11
  chunks = []
12
+ for start in range(0, len(audio_data), int(chunk_duration * sr)):
13
  end = start + int(chunk_duration * sr)
14
+ chunks.append(audio_data[start:end])
15
+ return chunks
16
 
17
+ def transcribe_long_audio(audio_input, transcriber, chunk_duration=30):
18
  """Transcribe long audio by splitting into smaller chunks."""
19
+ # Check if audio_input is a file path or raw data
20
+ if isinstance(audio_input, str):
21
+ audio_data, sr = librosa.load(audio_input, sr=None)
22
+ else: # Raw audio data (e.g., from recording)
23
+ audio_data, sr = audio_input
24
+
25
+ chunks = split_audio(audio_data, sr, chunk_duration)
26
  transcriptions = []
27
  for i, chunk in enumerate(chunks):
28
  temp_path = f"temp_chunk_{i}.wav"
29
+ sf.write(temp_path, chunk, sr) # Save the chunk as a WAV file
30
  transcription = transcriber(temp_path)["text"]
31
  transcriptions.append(transcription)
32
+ os.remove(temp_path) # Clean up temporary files
33
  return " ".join(transcriptions)
34
 
35
  @spaces.GPU(duration=3)
 
46
  raise
47
 
48
  # Function to process audio
49
+ def process_audio(audio_input):
50
  try:
51
  # Transcribe the audio (long-form support)
52
+ transcription = transcribe_long_audio(audio_input, transcriber, chunk_duration=30)
53
  # Summarize the transcription
54
  summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
55
  return transcription, summary
 
60
  with gr.Blocks() as interface:
61
  with gr.Row():
62
  with gr.Column():
63
+ audio_input = gr.Audio(source="microphone", type="numpy", label="Record or Upload Audio")
64
  process_button = gr.Button("Process Audio")
65
  with gr.Column():
66
  transcription_output = gr.Textbox(label="Full Transcription", lines=10)