PKaushik commited on
Commit
4305b2f
Β·
verified Β·
1 Parent(s): 788e2b5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -0
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from langchain_groq import ChatGroq
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.chains.combine_documents import create_stuff_documents_chain
7
+ from langchain_core.prompts import ChatPromptTemplate
8
+ from langchain.chains import create_retrieval_chain
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain_community.document_loaders import PyPDFLoader
11
+ from dotenv import load_dotenv
12
+ import tempfile
13
+
14
+ # Show title and description.
15
+ st.title("πŸ“„ Document question answering")
16
+ st.write(
17
+ "Upload a document below and ask a question about it – Groq will answer! "
18
+ "To use this app, you need to provide an Groq API key, which you can get [here](https://console.groq.com/keys). "
19
+ )
20
+
21
+ # Ask user for their Groq API key via `st.text_input`.
22
+ # Alternatively, you can store the API key in `./.streamlit/secrets.toml` and access it
23
+ # via `st.secrets`, see https://docs.streamlit.io/develop/concepts/connections/secrets-management
24
+ # Define model options
25
+ model_options = [
26
+ "llama3-8b-8192",
27
+ "llama3-70b-8192",
28
+ "llama-3.1-8b-instant",
29
+ "llama-3.1-70b-versatile",
30
+ "llama-3.2-1b-preview",
31
+ "llama-3.2-3b-preview",
32
+ "llama-3.2-11b-text-preview",
33
+ "llama-3.2-90b-text-preview",
34
+ "mixtral-8x7b-32768",
35
+ "gemma-7b-it",
36
+ "gemma2-9b-it"
37
+ ]
38
+ # Sidebar elements
39
+ with st.sidebar:
40
+ selected_model = st.selectbox("Select any Groq Model", model_options)
41
+ groq_api_key = st.text_input("Groq API Key", type="password")
42
+ if not groq_api_key:
43
+ st.info("Please add your Groq API key to continue.", icon="πŸ—οΈ")
44
+ else:
45
+
46
+ # Create an Groq client.
47
+ llm = ChatGroq(groq_api_key=groq_api_key, model_name=selected_model)
48
+
49
+ prompt = ChatPromptTemplate.from_template(
50
+ """
51
+ Answer the questions based on the provided context only.
52
+ Please provide the most accurate response based on the question.
53
+ <context>
54
+ {context}
55
+ <context>
56
+ Questions: {input}
57
+ """
58
+ )
59
+
60
+ def create_vector_db_out_of_the_uploaded_pdf_file(pdf_file):
61
+
62
+
63
+ if "vector_store" not in st.session_state:
64
+
65
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
66
+
67
+ temp_file.write(pdf_file.read())
68
+
69
+ pdf_file_path = temp_file.name
70
+
71
+ st.session_state.embeddings = HuggingFaceEmbeddings(model_name='BAAI/bge-small-en-v1.5', model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': True})
72
+
73
+ st.session_state.loader = PyPDFLoader(pdf_file_path)
74
+
75
+ st.session_state.text_document_from_pdf = st.session_state.loader.load()
76
+
77
+ st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
78
+
79
+ st.session_state.final_document_chunks = st.session_state.text_splitter.split_documents(st.session_state.text_document_from_pdf)
80
+
81
+ st.session_state.vector_store = FAISS.from_documents(st.session_state.final_document_chunks, st.session_state.embeddings)
82
+
83
+
84
+ pdf_input_from_user = st.file_uploader("Upload the PDF file", type=['pdf'])
85
+
86
+
87
+ if pdf_input_from_user is not None:
88
+
89
+ if st.button("Create the Vector DB from the uploaded PDF file"):
90
+
91
+ if pdf_input_from_user is not None:
92
+
93
+ create_vector_db_out_of_the_uploaded_pdf_file(pdf_input_from_user)
94
+
95
+ st.success("Vector Store DB for this PDF file Is Ready")
96
+
97
+ else:
98
+
99
+ st.write("Please upload a PDF file first")
100
+
101
+
102
+ # Main section for question input and results
103
+ if "vector_store" in st.session_state:
104
+
105
+ user_prompt = st.text_input("Enter Your Question related to the uploaded PDF")
106
+
107
+ if st.button('Submit Prompt'):
108
+
109
+ if user_prompt:
110
+
111
+ if "vector_store" in st.session_state:
112
+
113
+ document_chain = create_stuff_documents_chain(llm, prompt)
114
+
115
+ retriever = st.session_state.vector_store.as_retriever()
116
+
117
+ retrieval_chain = create_retrieval_chain(retriever, document_chain)
118
+
119
+ response = retrieval_chain.invoke({'input': user_prompt})
120
+
121
+ st.write(response['answer'])
122
+
123
+ else:
124
+
125
+ st.write("Please embed the document first by uploading a PDF file.")
126
+
127
+ else:
128
+
129
+ st.error('Please write your prompt')