Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,59 +2,58 @@ import streamlit as st
|
|
2 |
from gradio_client import Client
|
3 |
import re
|
4 |
import os
|
|
|
5 |
|
6 |
st.title("Application de transcription Whisper-JAX 🎙️")
|
7 |
|
8 |
-
#
|
9 |
API_URL = "https://sanchit-gandhi-whisper-jax-spaces.hf.space"
|
10 |
|
11 |
-
#
|
12 |
client = Client(API_URL)
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
28 |
fichier_telecharge = st.file_uploader("Choisissez un fichier audio", type=['mp3', 'wav', 'ogg'])
|
29 |
|
30 |
-
#
|
31 |
if st.button("Transcrire l'audio"):
|
32 |
if fichier_telecharge is not None:
|
33 |
-
#
|
34 |
-
|
35 |
-
with open(chemin_fichier, "wb") as f:
|
36 |
-
f.write(fichier_telecharge.getbuffer())
|
37 |
|
38 |
-
#
|
39 |
try:
|
40 |
-
transcription,
|
41 |
st.write("Transcription avec horodatage :", transcription)
|
42 |
|
43 |
-
#
|
44 |
transcription_sans_horodatages = remove_timestamps(transcription)
|
45 |
st.write("Transcription sans horodatage :", transcription_sans_horodatages)
|
46 |
except Exception as e:
|
47 |
st.error(f"Une erreur est survenue lors de la transcription : {str(e)}")
|
48 |
-
finally:
|
49 |
-
# Nettoyage du fichier temporaire
|
50 |
-
os.remove(chemin_fichier)
|
51 |
else:
|
52 |
st.error("Veuillez télécharger un fichier audio pour continuer.")
|
53 |
|
54 |
-
#
|
55 |
-
def remove_timestamps(
|
56 |
-
#
|
57 |
-
|
58 |
-
#
|
59 |
-
|
60 |
-
return
|
|
|
2 |
from gradio_client import Client
|
3 |
import re
|
4 |
import os
|
5 |
+
import base64
|
6 |
|
7 |
st.title("Application de transcription Whisper-JAX 🎙️")
|
8 |
|
9 |
+
# Specify the API URL
|
10 |
API_URL = "https://sanchit-gandhi-whisper-jax-spaces.hf.space"
|
11 |
|
12 |
+
# Initialize the Gradio client with the API URL
|
13 |
client = Client(API_URL)
|
14 |
+
client.view_api(return_format="dict")
|
15 |
+
|
16 |
+
# Function to transcribe an audio file using the specified API endpoint
|
17 |
+
def transcrire_audio(file_data, task="transcribe", return_timestamps=True):
|
18 |
+
"""Function to transcribe an audio file using the Whisper-JAX API endpoint."""
|
19 |
+
# Encode the file data to base64
|
20 |
+
base64_encoded_data = base64.b64encode(file_data).decode('utf-8')
|
21 |
+
|
22 |
+
# Prepare and send the request
|
23 |
+
response = client.predict(
|
24 |
+
base64_encoded_data,
|
25 |
+
task,
|
26 |
+
return_timestamps,
|
27 |
+
api_name="/predict_1" # Make sure this is the correct endpoint
|
28 |
+
)
|
29 |
+
return response[0], response[1] # Adjust according to the response structure returned by the API
|
30 |
+
|
31 |
+
# Streamlit widget to upload an audio file
|
32 |
fichier_telecharge = st.file_uploader("Choisissez un fichier audio", type=['mp3', 'wav', 'ogg'])
|
33 |
|
34 |
+
# Button to process the audio file
|
35 |
if st.button("Transcrire l'audio"):
|
36 |
if fichier_telecharge is not None:
|
37 |
+
# Read the file into memory
|
38 |
+
file_data = fichier_telecharge.getvalue()
|
|
|
|
|
39 |
|
40 |
+
# Call the transcription function
|
41 |
try:
|
42 |
+
transcription, runtime = transcrire_audio(file_data)
|
43 |
st.write("Transcription avec horodatage :", transcription)
|
44 |
|
45 |
+
# Display transcription without timestamps
|
46 |
transcription_sans_horodatages = remove_timestamps(transcription)
|
47 |
st.write("Transcription sans horodatage :", transcription_sans_horodatages)
|
48 |
except Exception as e:
|
49 |
st.error(f"Une erreur est survenue lors de la transcription : {str(e)}")
|
|
|
|
|
|
|
50 |
else:
|
51 |
st.error("Veuillez télécharger un fichier audio pour continuer.")
|
52 |
|
53 |
+
# Function to remove timestamps from text
|
54 |
+
def remove_timestamps(text):
|
55 |
+
# Pattern to match timestamps in the format [HH:MM:SS.mmm -> HH:MM:SS.mmm]
|
56 |
+
pattern = r"\[\d{2}:\d{2}:\d{2}\.\d{3} -> \d{2}:\d{2}:\d{2}\.\d{3}\]\s*"
|
57 |
+
# Replace matched patterns with an empty string
|
58 |
+
cleaned_text = re.sub(pattern, "", text)
|
59 |
+
return cleaned_text
|