Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
ZennyKenny
commited on
persistent storage support
Browse files
app.py
CHANGED
@@ -3,8 +3,13 @@ import torch
|
|
3 |
from transformers import pipeline
|
4 |
import librosa
|
5 |
import soundfile as sf
|
6 |
-
import spaces
|
7 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
def split_audio(audio_data, sr, chunk_duration=30):
|
10 |
"""Split audio into chunks of chunk_duration seconds."""
|
@@ -14,24 +19,11 @@ def split_audio(audio_data, sr, chunk_duration=30):
|
|
14 |
chunks.append(audio_data[start:end])
|
15 |
return chunks
|
16 |
|
17 |
-
def transcribe_long_audio(
|
18 |
"""Transcribe long audio by splitting into smaller chunks."""
|
19 |
try:
|
20 |
-
#
|
21 |
-
|
22 |
-
if isinstance(audio_input, tuple): # Recorded audio
|
23 |
-
print("Processing recorded audio...")
|
24 |
-
audio_data, sr = audio_input # Unpack raw audio data and sample rate
|
25 |
-
temp_path = "recorded_audio.wav"
|
26 |
-
sf.write(temp_path, audio_data, sr) # Save recorded audio as a temporary file
|
27 |
-
elif isinstance(audio_input, str): # Uploaded file path
|
28 |
-
print("Processing uploaded audio...")
|
29 |
-
temp_path = audio_input # Use the file path directly
|
30 |
-
else:
|
31 |
-
raise ValueError("Unsupported audio input format.")
|
32 |
-
|
33 |
-
# Process the audio file (recorded or uploaded)
|
34 |
-
audio_data, sr = librosa.load(temp_path, sr=None)
|
35 |
chunks = split_audio(audio_data, sr, chunk_duration)
|
36 |
transcriptions = []
|
37 |
for i, chunk in enumerate(chunks):
|
@@ -40,13 +32,30 @@ def transcribe_long_audio(audio_input, transcriber, chunk_duration=30):
|
|
40 |
transcription = transcriber(chunk_path)["text"]
|
41 |
transcriptions.append(transcription)
|
42 |
os.remove(chunk_path) # Cleanup temp files
|
43 |
-
if temp_path == "recorded_audio.wav":
|
44 |
-
os.remove(temp_path) # Remove the temporary recorded audio file
|
45 |
return " ".join(transcriptions)
|
46 |
except Exception as e:
|
47 |
print(f"Error in transcribe_long_audio: {e}")
|
48 |
return f"Error processing audio: {e}"
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
@spaces.GPU(duration=3)
|
51 |
def main():
|
52 |
device = 0 if torch.cuda.is_available() else -1
|
@@ -60,17 +69,29 @@ def main():
|
|
60 |
|
61 |
def process_audio(audio_input):
|
62 |
try:
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
|
65 |
-
|
|
|
|
|
|
|
|
|
66 |
except Exception as e:
|
67 |
print(f"Error in process_audio: {e}")
|
68 |
-
return f"Error processing audio: {e}", ""
|
69 |
-
|
70 |
-
def stop_microphone():
|
71 |
-
"""Simulate stopping the microphone."""
|
72 |
-
print("Microphone stopped.") # Debugging for user feedback
|
73 |
-
return "Microphone stopped. Recording session has ended."
|
74 |
|
75 |
with gr.Blocks() as interface:
|
76 |
with gr.Row():
|
@@ -78,21 +99,15 @@ def main():
|
|
78 |
# Enable recording or file upload
|
79 |
audio_input = gr.Audio(type="numpy", label="Record or Upload Audio")
|
80 |
process_button = gr.Button("Process Audio")
|
81 |
-
stop_button = gr.Button("Stop Recording") # Add Stop Button
|
82 |
with gr.Column():
|
83 |
transcription_output = gr.Textbox(label="Full Transcription", lines=10)
|
84 |
summary_output = gr.Textbox(label="Summary", lines=5)
|
|
|
85 |
|
86 |
process_button.click(
|
87 |
process_audio,
|
88 |
inputs=[audio_input],
|
89 |
-
outputs=[transcription_output, summary_output]
|
90 |
-
)
|
91 |
-
|
92 |
-
stop_button.click(
|
93 |
-
stop_microphone,
|
94 |
-
inputs=[],
|
95 |
-
outputs=[]
|
96 |
)
|
97 |
|
98 |
interface.launch(share=True)
|
|
|
3 |
from transformers import pipeline
|
4 |
import librosa
|
5 |
import soundfile as sf
|
|
|
6 |
import os
|
7 |
+
import uuid
|
8 |
+
|
9 |
+
# Directory to save recorded audio files
|
10 |
+
OUTPUT_DIR = os.getenv("HF_HOME", ".") # Use dynamic path or default to current directory
|
11 |
+
OUTPUT_DIR = os.path.join(OUTPUT_DIR, "recorded_audio_files")
|
12 |
+
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
13 |
|
14 |
def split_audio(audio_data, sr, chunk_duration=30):
|
15 |
"""Split audio into chunks of chunk_duration seconds."""
|
|
|
19 |
chunks.append(audio_data[start:end])
|
20 |
return chunks
|
21 |
|
22 |
+
def transcribe_long_audio(audio_path, transcriber, chunk_duration=30):
|
23 |
"""Transcribe long audio by splitting into smaller chunks."""
|
24 |
try:
|
25 |
+
# Load the audio file
|
26 |
+
audio_data, sr = librosa.load(audio_path, sr=None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
chunks = split_audio(audio_data, sr, chunk_duration)
|
28 |
transcriptions = []
|
29 |
for i, chunk in enumerate(chunks):
|
|
|
32 |
transcription = transcriber(chunk_path)["text"]
|
33 |
transcriptions.append(transcription)
|
34 |
os.remove(chunk_path) # Cleanup temp files
|
|
|
|
|
35 |
return " ".join(transcriptions)
|
36 |
except Exception as e:
|
37 |
print(f"Error in transcribe_long_audio: {e}")
|
38 |
return f"Error processing audio: {e}"
|
39 |
|
40 |
+
def cleanup_output_dir(max_storage_mb=500):
|
41 |
+
"""Remove old files if total directory size exceeds max_storage_mb."""
|
42 |
+
try:
|
43 |
+
total_size = sum(
|
44 |
+
os.path.getsize(os.path.join(OUTPUT_DIR, f)) for f in os.listdir(OUTPUT_DIR)
|
45 |
+
)
|
46 |
+
if total_size > max_storage_mb * 1024 * 1024:
|
47 |
+
files = sorted(
|
48 |
+
(os.path.join(OUTPUT_DIR, f) for f in os.listdir(OUTPUT_DIR)),
|
49 |
+
key=os.path.getctime,
|
50 |
+
)
|
51 |
+
for file in files:
|
52 |
+
os.remove(file)
|
53 |
+
total_size -= os.path.getsize(file)
|
54 |
+
if total_size <= max_storage_mb * 1024 * 1024:
|
55 |
+
break
|
56 |
+
except Exception as e:
|
57 |
+
print(f"Error during cleanup: {e}")
|
58 |
+
|
59 |
@spaces.GPU(duration=3)
|
60 |
def main():
|
61 |
device = 0 if torch.cuda.is_available() else -1
|
|
|
69 |
|
70 |
def process_audio(audio_input):
|
71 |
try:
|
72 |
+
if isinstance(audio_input, tuple): # Recorded audio
|
73 |
+
print("Handling recorded audio.")
|
74 |
+
audio_data, sr = audio_input
|
75 |
+
filename = f"recorded_audio_{uuid.uuid4().hex}.wav"
|
76 |
+
temp_path = os.path.join(OUTPUT_DIR, filename)
|
77 |
+
sf.write(temp_path, audio_data, sr)
|
78 |
+
elif isinstance(audio_input, str): # Uploaded file path
|
79 |
+
print("Handling uploaded audio.")
|
80 |
+
temp_path = audio_input
|
81 |
+
else:
|
82 |
+
raise ValueError("Unsupported audio input format.")
|
83 |
+
|
84 |
+
# Transcribe the saved audio file
|
85 |
+
transcription = transcribe_long_audio(temp_path, transcriber, chunk_duration=30)
|
86 |
summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
|
87 |
+
|
88 |
+
# Cleanup old files
|
89 |
+
cleanup_output_dir()
|
90 |
+
|
91 |
+
return transcription, summary, temp_path
|
92 |
except Exception as e:
|
93 |
print(f"Error in process_audio: {e}")
|
94 |
+
return f"Error processing audio: {e}", "", ""
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
with gr.Blocks() as interface:
|
97 |
with gr.Row():
|
|
|
99 |
# Enable recording or file upload
|
100 |
audio_input = gr.Audio(type="numpy", label="Record or Upload Audio")
|
101 |
process_button = gr.Button("Process Audio")
|
|
|
102 |
with gr.Column():
|
103 |
transcription_output = gr.Textbox(label="Full Transcription", lines=10)
|
104 |
summary_output = gr.Textbox(label="Summary", lines=5)
|
105 |
+
audio_output = gr.Audio(label="Playback Processed Audio")
|
106 |
|
107 |
process_button.click(
|
108 |
process_audio,
|
109 |
inputs=[audio_input],
|
110 |
+
outputs=[transcription_output, summary_output, audio_output]
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
)
|
112 |
|
113 |
interface.launch(share=True)
|