Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -25,7 +25,13 @@ class StopOnTokens(StoppingCriteria):
|
|
25 |
@spaces.GPU
|
26 |
def predict(message, history):
|
27 |
stop = StopOnTokens()
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
model_inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
30 |
streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
|
31 |
generate_kwargs = dict(
|
@@ -52,7 +58,7 @@ def predict(message, history):
|
|
52 |
|
53 |
# Setting up the Gradio chat interface.
|
54 |
gr.ChatInterface(predict,
|
55 |
-
title="
|
56 |
-
description="
|
57 |
-
examples=['
|
58 |
).launch() # Launching the web interface.
|
|
|
25 |
@spaces.GPU
|
26 |
def predict(message, history):
|
27 |
stop = StopOnTokens()
|
28 |
+
conversation = []
|
29 |
+
|
30 |
+
for user, assistant in history:
|
31 |
+
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
|
32 |
+
|
33 |
+
conversation.append({"role": "user", "content": message})
|
34 |
+
prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
|
35 |
model_inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
36 |
streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
|
37 |
generate_kwargs = dict(
|
|
|
58 |
|
59 |
# Setting up the Gradio chat interface.
|
60 |
gr.ChatInterface(predict,
|
61 |
+
title="SOLAR 10.7B Instruct v1.0",
|
62 |
+
description="Warning. All answers are generated and may contain inaccurate information.",
|
63 |
+
examples=['How do you cook fish?', 'Who is the president of the United States?']
|
64 |
).launch() # Launching the web interface.
|