NHZ commited on
Commit
3546e74
·
verified ·
1 Parent(s): 3c82158

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import whisper
4
+ from transformers import pipeline
5
+ from gtts import gTTS
6
+ import speech_recognition as sr
7
+ import tempfile
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain.document_loaders import PyPDFLoader
10
+ from langchain.embeddings import HuggingFaceEmbeddings
11
+ from langchain.chains import ConversationalRetrievalChain
12
+ from langchain.memory import ConversationBufferMemory
13
+
14
+ # Initialize models
15
+ whisper_model = whisper.load_model("base") # Use the base model for faster performance
16
+ translation_pipeline = pipeline(
17
+ "translation", model="Helsinki-NLP/opus-mt-ur-en-tiny", tokenizer="Helsinki-NLP/opus-mt-ur-en-tiny"
18
+ )
19
+ urdu_translation_pipeline = pipeline(
20
+ "translation", model="Helsinki-NLP/opus-mt-en-ur-tiny", tokenizer="Helsinki-NLP/opus-mt-en-ur-tiny"
21
+ )
22
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L6-v2")
23
+
24
+ # Streamlit interface
25
+ st.title("Real-Time Voice-to-Voice First Aid Chatbot")
26
+
27
+ uploaded_file = st.file_uploader("Upload a PDF file for First Aid Knowledge", type=["pdf"])
28
+ if uploaded_file:
29
+ st.write("Processing PDF...")
30
+ loader = PyPDFLoader(uploaded_file)
31
+ documents = loader.load()
32
+
33
+ st.write("Creating vector database...")
34
+ vectorstore = FAISS.from_documents(documents, embedding_model)
35
+ st.write("Knowledge base ready.")
36
+
37
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
38
+ chain = ConversationalRetrievalChain.from_llm(
39
+ llm=None, # Replace with a valid LLM integration like OpenAI or Groq client
40
+ retriever=vectorstore.as_retriever(),
41
+ memory=memory,
42
+ )
43
+
44
+ if st.button("Start Chat"):
45
+ st.write("Listening... Speak now!")
46
+ recognizer = sr.Recognizer()
47
+
48
+ with sr.Microphone() as source:
49
+ st.write("Adjusting for ambient noise, please wait...")
50
+ recognizer.adjust_for_ambient_noise(source)
51
+ st.write("You can now speak.")
52
+
53
+ while True:
54
+ try:
55
+ st.write("Listening...")
56
+ audio = recognizer.listen(source)
57
+ st.write("Processing audio...")
58
+
59
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
60
+ temp_audio.write(audio.get_wav_data())
61
+ temp_audio_path = temp_audio.name
62
+
63
+ transcription = whisper_model.transcribe(temp_audio_path)["text"]
64
+ st.write(f"You said: {transcription}")
65
+
66
+ translated_text = translation_pipeline(transcription)[0]["translation_text"]
67
+ st.write(f"Translated Text: {translated_text}")
68
+
69
+ response = chain({"input": translated_text})["response"]
70
+ st.write(f"Response: {response}")
71
+
72
+ urdu_response = urdu_translation_pipeline(response)[0]["translation_text"]
73
+ st.write(f"Response in Urdu: {urdu_response}")
74
+
75
+ tts = gTTS(urdu_response, lang="ur")
76
+ response_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
77
+ tts.save(response_audio_path)
78
+ os.system(f"mpg123 {response_audio_path}")
79
+
80
+ except Exception as e:
81
+ st.write(f"Error: {str(e)}")