Spaces:
Running
on
Zero
Running
on
Zero
Update
Browse files
app.py
CHANGED
@@ -34,22 +34,14 @@ model.eval()
|
|
34 |
@spaces.GPU(duration=90)
|
35 |
def generate(
|
36 |
message: str,
|
37 |
-
chat_history: list[
|
38 |
max_new_tokens: int = 1024,
|
39 |
temperature: float = 0.6,
|
40 |
top_p: float = 0.9,
|
41 |
top_k: int = 50,
|
42 |
repetition_penalty: float = 1.2,
|
43 |
) -> Iterator[str]:
|
44 |
-
conversation = []
|
45 |
-
for user, assistant in chat_history:
|
46 |
-
conversation.extend(
|
47 |
-
[
|
48 |
-
{"role": "user", "content": user},
|
49 |
-
{"role": "assistant", "content": assistant},
|
50 |
-
]
|
51 |
-
)
|
52 |
-
conversation.append({"role": "user", "content": message})
|
53 |
|
54 |
input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
|
55 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
@@ -126,6 +118,7 @@ demo = gr.ChatInterface(
|
|
126 |
["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
|
127 |
],
|
128 |
cache_examples=False,
|
|
|
129 |
description=DESCRIPTION,
|
130 |
css_paths="style.css",
|
131 |
fill_height=True,
|
|
|
34 |
@spaces.GPU(duration=90)
|
35 |
def generate(
|
36 |
message: str,
|
37 |
+
chat_history: list[dict],
|
38 |
max_new_tokens: int = 1024,
|
39 |
temperature: float = 0.6,
|
40 |
top_p: float = 0.9,
|
41 |
top_k: int = 50,
|
42 |
repetition_penalty: float = 1.2,
|
43 |
) -> Iterator[str]:
|
44 |
+
conversation = [*chat_history, {"role": "user", "content": message}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
|
47 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
|
|
118 |
["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
|
119 |
],
|
120 |
cache_examples=False,
|
121 |
+
type="messages",
|
122 |
description=DESCRIPTION,
|
123 |
css_paths="style.css",
|
124 |
fill_height=True,
|