Spaces:
Running
Running
from langchain.chains import RetrievalQA | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
from langchain.callbacks.manager import CallbackManager | |
#from langchain_community.llms import Ollama | |
#from langchain_community.embeddings.ollama import OllamaEmbeddings | |
from langchain_community.vectorstores import Chroma | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.prompts import PromptTemplate | |
from langchain.memory import ConversationBufferMemory | |
import streamlit as st | |
import os | |
import time | |
from langchain_community.llms import HuggingFaceEndpoint | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
model_name = "sentence-transformers/all-mpnet-base-v2" | |
model_kwargs = {'device': 'cpu'} | |
encode_kwargs = {'normalize_embeddings': False} | |
embeddings = HuggingFaceEmbeddings( | |
model_name=model_name, | |
model_kwargs=model_kwargs, | |
encode_kwargs=encode_kwargs | |
) | |
if not os.path.exists('files'): | |
os.mkdir('files') | |
if not os.path.exists('jj'): | |
os.mkdir('jj') | |
if 'template' not in st.session_state: | |
st.session_state.template = """You are a knowledgeable chatbot, here to help with questions of the user. Your tone should be professional and informative.Try to give answer in tabular and shortcut. | |
Context: {context} | |
History: {history} | |
User: {question} | |
Chatbot:""" | |
if 'prompt' not in st.session_state: | |
st.session_state.prompt = PromptTemplate( | |
input_variables=["history", "context", "question"], | |
template=st.session_state.template, | |
) | |
if 'memory' not in st.session_state: | |
st.session_state.memory = ConversationBufferMemory( | |
memory_key="history", | |
return_messages=True, | |
input_key="question") | |
if 'vectorstore' not in st.session_state: | |
#st.session_state.vectorstore = Chroma(persist_directory='jj', embedding_function=OllamaEmbeddings(base_url='http://localhost:11434',model="mistral") | |
st.session_state.vectorstore = Chroma(persist_directory='jj', embedding_function=embeddings) | |
if 'llm' not in st.session_state: | |
#st.session_state.llm = Ollama(base_url="http://localhost:11434",model="mistral",verbose=True,callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),) | |
st.session_state.llm = HuggingFaceEndpoint(repo_id="mistralai/Mistral-7B-Instruct-v0.2", Temperature=0.9) | |
# Initialize session state | |
if 'chat_history' not in st.session_state: | |
st.session_state.chat_history = [] | |
st.title("PDF Chatbot") | |
# Upload a PDF file | |
uploaded_file = st.file_uploader("Upload your PDF", type='pdf') | |
for message in st.session_state.chat_history: | |
with st.chat_message(message["role"]): | |
st.markdown(message["message"]) | |
if uploaded_file is not None: | |
if not os.path.isfile("files/"+uploaded_file.name+".pdf"): | |
with st.status("Analyzing your document..."): | |
bytes_data = uploaded_file.read() | |
f = open("files/"+uploaded_file.name+".pdf", "wb") | |
f.write(bytes_data) | |
f.close() | |
loader = PyPDFLoader("files/"+uploaded_file.name+".pdf") | |
data = loader.load() | |
# Initialize text splitter | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=1500, | |
chunk_overlap=0, | |
length_function=len | |
) | |
all_splits = text_splitter.split_documents(data) | |
# Create and persist the vector store | |
#st.session_state.vectorstore = Chroma.from_documents(documents=all_splits,embedding=OllamaEmbeddings(model="mistral")) | |
st.session_state.vectorstore = Chroma.from_documents(documents=all_splits,embedding=embeddings) | |
st.session_state.vectorstore.persist() | |
st.session_state.retriever = st.session_state.vectorstore.as_retriever() | |
# Initialize the QA chain | |
if 'qa_chain' not in st.session_state: | |
st.session_state.qa_chain = RetrievalQA.from_chain_type( | |
llm=st.session_state.llm, | |
chain_type='stuff', | |
retriever=st.session_state.retriever, | |
verbose=True, | |
chain_type_kwargs={ | |
"verbose": True, | |
"prompt": st.session_state.prompt, | |
"memory": st.session_state.memory, | |
} | |
) | |
# Chat input | |
if user_input := st.chat_input("You:", key="user_input"): | |
user_message = {"role": "user", "message": user_input} | |
st.session_state.chat_history.append(user_message) | |
with st.chat_message("user"): | |
st.markdown(user_input) | |
with st.chat_message("assistant"): | |
with st.spinner("Assistant is typing..."): | |
response = st.session_state.qa_chain(user_input) | |
message_placeholder = st.empty() | |
full_response = "" | |
for chunk in response['result'].split(): | |
full_response += chunk + " " | |
time.sleep(0.05) | |
# Add a blinking cursor to simulate typing | |
message_placeholder.markdown(full_response + "β") | |
message_placeholder.markdown(full_response) | |
chatbot_message = {"role": "assistant", "message": response['result']} | |
st.session_state.chat_history.append(chatbot_message) | |
else: | |
st.write("Please upload a PDF file.") |