cnmoro commited on
Commit
3094e1a
·
verified ·
1 Parent(s): cd654e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -14
app.py CHANGED
@@ -1,21 +1,17 @@
1
- import time, os, multiprocessing, torch
2
  from minivectordb.embedding_model import EmbeddingModel
3
  from minivectordb.vector_database import VectorDatabase
4
  from text_util_en_pt.cleaner import structurize_text, detect_language, Language
5
  from webtextcrawler.webtextcrawler import extract_text_from_url
6
- from duckduckgo_search import DDGS
7
- import google.generativeai as genai
8
  import gradio as gr
 
9
 
10
  torch.set_num_threads(2)
11
- gemini_key = os.environ.get("GEMINI_KEY")
12
- genai.configure(api_key=gemini_key)
13
- gemini = genai.GenerativeModel('gemini-pro')
14
  model = EmbeddingModel(use_quantized_onnx_model=True)
15
 
16
  def fetch_links(query, max_results=10):
17
- with DDGS() as ddgs:
18
- return [r['href'] for r in ddgs.text(keywords=query, max_results=max_results)]
19
 
20
  def fetch_texts(links):
21
  with multiprocessing.Pool(10) as pool:
@@ -48,8 +44,22 @@ def generate_search_terms(message, lang):
48
  else:
49
  prompt = f"From the following text, generate some search terms: \"{message}\"\nYour answer should be just the most appropriate search term, and nothing else."
50
 
51
- response = gemini.generate_content(prompt)
52
- return response.text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  async def predict(message, history):
55
  full_response = ""
@@ -108,10 +118,45 @@ async def predict(message, history):
108
  yield full_response
109
 
110
  full_response += "\nResponse: "
111
- streaming_response = gemini.generate_content(prompt, stream=True)
112
- for sr in streaming_response:
113
- full_response += sr.text
114
- yield full_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  gr.ChatInterface(
117
  predict,
 
1
+ import time, os, multiprocessing, torch, requests, asyncio, json, aiohttp
2
  from minivectordb.embedding_model import EmbeddingModel
3
  from minivectordb.vector_database import VectorDatabase
4
  from text_util_en_pt.cleaner import structurize_text, detect_language, Language
5
  from webtextcrawler.webtextcrawler import extract_text_from_url
 
 
6
  import gradio as gr
7
+ from googlesearch import search
8
 
9
  torch.set_num_threads(2)
10
+ openrouter_key = os.environ.get("OPENROUTER_KEY")
 
 
11
  model = EmbeddingModel(use_quantized_onnx_model=True)
12
 
13
  def fetch_links(query, max_results=10):
14
+ return list(search(query, num_results=max_results))
 
15
 
16
  def fetch_texts(links):
17
  with multiprocessing.Pool(10) as pool:
 
44
  else:
45
  prompt = f"From the following text, generate some search terms: \"{message}\"\nYour answer should be just the most appropriate search term, and nothing else."
46
 
47
+ url = "https://openrouter.ai/api/v1/chat/completions"
48
+ headers = { "Content-Type": "application/json",
49
+ "Authorization": f"Bearer {openrouter_key}" }
50
+ body = { "stream": False,
51
+ "models": [
52
+ "mistralai/mistral-7b-instruct:free",
53
+ "openchat/openchat-7b:free"
54
+ ],
55
+ "route": "fallback",
56
+ "max_tokens": 1024,
57
+ "messages": [
58
+ {"role": "user", "content": prompt}
59
+ ] }
60
+
61
+ response = requests.post(url, headers=headers, json=body)
62
+ return response.json()['choices'][0]['message']['content']
63
 
64
  async def predict(message, history):
65
  full_response = ""
 
118
  yield full_response
119
 
120
  full_response += "\nResponse: "
121
+
122
+ url = "https://openrouter.ai/api/v1/chat/completions"
123
+ headers = { "Content-Type": "application/json",
124
+ "Authorization": f"Bearer {openrouter_key}" }
125
+ body = { "stream": True,
126
+ "models": [
127
+ "mistralai/mistral-7b-instruct:free",
128
+ "openchat/openchat-7b:free"
129
+ ],
130
+ "route": "fallback",
131
+ "max_tokens": 1024,
132
+ "messages": [
133
+ {"role": "user", "content": prompt}
134
+ ] }
135
+
136
+ async with aiohttp.ClientSession() as session:
137
+ async with session.post(url, headers=headers, json=body) as response:
138
+ buffer = "" # A buffer to hold incomplete lines of data
139
+ async for chunk in response.content.iter_any():
140
+ buffer += chunk.decode()
141
+ while "\n" in buffer: # Process as long as there are complete lines in the buffer
142
+ line, buffer = buffer.split("\n", 1)
143
+
144
+ if line.startswith("data: "):
145
+ event_data = line[len("data: "):]
146
+ if event_data != '[DONE]':
147
+ try:
148
+ current_text = json.loads(event_data)['choices'][0]['delta']['content']
149
+ full_response += current_text
150
+ yield full_response
151
+ await asyncio.sleep(0.01)
152
+ except Exception:
153
+ try:
154
+ current_text = json.loads(event_data)['choices'][0]['text']
155
+ full_response += current_text
156
+ yield full_response
157
+ await asyncio.sleep(0.01)
158
+ except Exception:
159
+ pass
160
 
161
  gr.ChatInterface(
162
  predict,