cnmoro commited on
Commit
a70fff0
·
verified ·
1 Parent(s): 4ece2ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -66
app.py CHANGED
@@ -1,22 +1,23 @@
1
- import time, aiohttp, asyncio, json, os, multiprocessing, torch
2
  from minivectordb.embedding_model import EmbeddingModel
3
  from minivectordb.vector_database import VectorDatabase
4
  from text_util_en_pt.cleaner import structurize_text, detect_language, Language
5
  from webtextcrawler.webtextcrawler import extract_text_from_url
6
  from duckduckgo_search import DDGS
 
7
  import gradio as gr
8
 
9
- torch.set_num_threads(2)
10
-
11
- openrouter_key = os.environ.get("OPENROUTER_KEY")
12
  model = EmbeddingModel(use_quantized_onnx_model=True)
13
 
14
- def fetch_links(query, max_results=5):
15
  with DDGS() as ddgs:
16
  return [r['href'] for r in ddgs.text(keywords=query, max_results=max_results)]
17
 
18
  def fetch_texts(links):
19
- with multiprocessing.Pool(5) as pool:
20
  texts = pool.map(extract_text_from_url, links)
21
  return '\n'.join([t for t in texts if t])
22
 
@@ -36,83 +37,80 @@ def index_and_search(query, text):
36
 
37
  # Retrieval
38
  start = time.time()
39
- search_results = vector_db.find_most_similar(query_embedding, k = 12)
40
  retrieval_time = time.time() - start
41
  return '\n'.join([s['sentence'] for s in search_results[2]]), embedding_time, retrieval_time
42
 
43
- def retrieval_pipeline(query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  start = time.time()
45
- links = fetch_links(query)
 
 
46
  websearch_time = time.time() - start
47
 
 
 
 
 
 
 
 
 
 
 
 
48
  start = time.time()
49
  text = fetch_texts(links)
50
  webcrawl_time = time.time() - start
51
 
52
- context, embedding_time, retrieval_time = index_and_search(query, text)
53
 
54
- return context, websearch_time, webcrawl_time, embedding_time, retrieval_time, links
 
55
 
56
- async def predict(message, history):
57
- context, websearch_time, webcrawl_time, embedding_time, retrieval_time, links = retrieval_pipeline(message)
58
 
59
- if detect_language(message) == Language.ptbr:
60
- prompt = f"Contexto:\n\n{context}\n\nBaseado no contexto, responda: {message}"
61
  else:
62
- prompt = f"Context:\n\n{context}\n\nBased on the context, answer: {message}"
63
-
64
- url = "https://openrouter.ai/api/v1/chat/completions"
65
- headers = { "Content-Type": "application/json",
66
- "Authorization": f"Bearer {openrouter_key}" }
67
- body = { "stream": True,
68
- "models": [
69
- "mistralai/mistral-7b-instruct:free",
70
- "nousresearch/nous-capybara-7b:free",
71
- "huggingfaceh4/zephyr-7b-beta:free"
72
- ],
73
- "route": "fallback",
74
- "max_tokens": 768,
75
- "messages": [
76
- {"role": "user", "content": prompt}
77
- ] }
78
 
79
- full_response = ""
80
- async with aiohttp.ClientSession() as session:
81
- async with session.post(url, headers=headers, json=body) as response:
82
- buffer = "" # A buffer to hold incomplete lines of data
83
- async for chunk in response.content.iter_any():
84
- buffer += chunk.decode()
85
- while "\n" in buffer: # Process as long as there are complete lines in the buffer
86
- line, buffer = buffer.split("\n", 1)
87
-
88
- if line.startswith("data: "):
89
- event_data = line[len("data: "):]
90
- if event_data != '[DONE]':
91
- try:
92
- current_text = json.loads(event_data)['choices'][0]['delta']['content']
93
- full_response += current_text
94
- yield full_response
95
- await asyncio.sleep(0.01)
96
- except Exception:
97
- try:
98
- current_text = json.loads(event_data)['choices'][0]['text']
99
- full_response += current_text
100
- yield full_response
101
- await asyncio.sleep(0.01)
102
- except Exception:
103
- pass
104
-
105
- final_metadata_block = ""
106
 
107
- final_metadata_block += f"Links visited:\n"
108
- for link in links:
109
- final_metadata_block += f"{link}\n"
110
- final_metadata_block += f"\nWeb search time: {websearch_time:.4f} seconds\n"
111
- final_metadata_block += f"\nText extraction: {webcrawl_time:.4f} seconds\n"
112
- final_metadata_block += f"\nEmbedding time: {embedding_time:.4f} seconds\n"
113
- final_metadata_block += f"\nRetrieval from VectorDB time: {retrieval_time:.4f} seconds"
114
 
115
- yield f"{full_response}\n\n{final_metadata_block}"
 
 
 
 
116
 
117
  gr.ChatInterface(
118
  predict,
 
1
+ import time, os, multiprocessing
2
  from minivectordb.embedding_model import EmbeddingModel
3
  from minivectordb.vector_database import VectorDatabase
4
  from text_util_en_pt.cleaner import structurize_text, detect_language, Language
5
  from webtextcrawler.webtextcrawler import extract_text_from_url
6
  from duckduckgo_search import DDGS
7
+ import google.generativeai as genai
8
  import gradio as gr
9
 
10
+ gemini_key = os.environ.get("GEMINI_KEY")
11
+ genai.configure(api_key=gemini_key)
12
+ gemini = genai.GenerativeModel('gemini-pro')
13
  model = EmbeddingModel(use_quantized_onnx_model=True)
14
 
15
+ def fetch_links(query, max_results=10):
16
  with DDGS() as ddgs:
17
  return [r['href'] for r in ddgs.text(keywords=query, max_results=max_results)]
18
 
19
  def fetch_texts(links):
20
+ with multiprocessing.Pool(10) as pool:
21
  texts = pool.map(extract_text_from_url, links)
22
  return '\n'.join([t for t in texts if t])
23
 
 
37
 
38
  # Retrieval
39
  start = time.time()
40
+ search_results = vector_db.find_most_similar(query_embedding, k = 30)
41
  retrieval_time = time.time() - start
42
  return '\n'.join([s['sentence'] for s in search_results[2]]), embedding_time, retrieval_time
43
 
44
+ def generate_search_terms(message, lang):
45
+ if lang == Language.ptbr:
46
+ prompt = f"A partir do texto a seguir, gere alguns termos de pesquisa: \"{message}\"\nSua resposta deve ser apenas o termo de busca mais adequado, e nada mais."
47
+ else:
48
+ prompt = f"From the following text, generate some search terms: \"{message}\"\nYour answer should be just the most appropriate search term, and nothing else."
49
+
50
+ response = gemini.generate_content(prompt)
51
+ return response.text
52
+
53
+ async def predict(message, history):
54
+ full_response = ""
55
+
56
+ query_language = detect_language(message)
57
+
58
+ start = time.time()
59
+ full_response += "Generating search terms...\n"
60
+ yield full_response
61
+ search_query = generate_search_terms(message, query_language)
62
+ search_terms_time = time.time() - start
63
+
64
+ full_response += f"Search terms: \"{search_query}\"\n"
65
+ yield full_response
66
+ full_response += f"Search terms took: {search_terms_time:.4f} seconds\n"
67
+ yield full_response
68
+
69
  start = time.time()
70
+ full_response += "\nSearching the web...\n"
71
+ yield full_response
72
+ links = fetch_links(search_query)
73
  websearch_time = time.time() - start
74
 
75
+ full_response += f"Web search took: {websearch_time:.4f} seconds\n"
76
+ yield full_response
77
+
78
+ full_response += f"Links visited:\n"
79
+ yield full_response
80
+ for link in links:
81
+ full_response += f"{link}\n"
82
+ yield full_response
83
+
84
+ full_response += "\nExtracting text from web pages...\n"
85
+ yield full_response
86
  start = time.time()
87
  text = fetch_texts(links)
88
  webcrawl_time = time.time() - start
89
 
90
+ full_response += f"Text extraction took: {webcrawl_time:.4f} seconds\n"
91
 
92
+ full_response += "\nIndexing in vector database and building prompt...\n"
93
+ yield full_response
94
 
95
+ context, embedding_time, retrieval_time = index_and_search(message, text)
 
96
 
97
+ if query_language == Language.ptbr:
98
+ prompt = f"Contexto:\n{context}\n\nResponda: \"{message}\"\n(Você pode utilizar o contexto para responder)\n(Sua resposta deve ser completa, detalhada e bem estruturada)"
99
  else:
100
+ prompt = f"Context:\n{context}\n\nAnswer: \"{message}\"\n(You can use the context to answer)\n(Your answer should be complete, detailed and well-structured)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ full_response += f"Embedding time: {embedding_time:.4f} seconds\n"
103
+ full_response += f"Retrieval from VectorDB time: {retrieval_time:.4f} seconds\n"
104
+ yield full_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
+ full_response += "\nGenerating response...\n"
107
+ yield full_response
 
 
 
 
 
108
 
109
+ full_response += "\nResponse: "
110
+ streaming_response = gemini.generate_content(prompt, stream=True)
111
+ for sr in streaming_response:
112
+ full_response += sr.text
113
+ yield full_response
114
 
115
  gr.ChatInterface(
116
  predict,