|
<!DOCTYPE html> |
|
<html> |
|
<head> |
|
<meta charset="utf-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1"> |
|
<title>Gradio-Lite: Serverless Gradio Running Entirely in Your Browser</title> |
|
<meta name="description" content="Gradio-Lite: Serverless Gradio Running Entirely in Your Browser"> |
|
|
|
<script type="module" crossorigin src="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.js"></script> |
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.css" /> |
|
|
|
<style> |
|
html, body { |
|
margin: 0; |
|
padding: 0; |
|
height: 100%; |
|
} |
|
</style> |
|
</head> |
|
<body> |
|
<gradio-lite> |
|
<gradio-file name="app.py" entrypoint> |
|
import gradio as gr |
|
from transformers_js_py import pipeline |
|
|
|
generator = await pipeline( |
|
"text-generation", |
|
"onnx-community/Qwen2.5-0.5B-Instruct", |
|
{ "dtype": "q4", "device": "webgpu" } |
|
) |
|
|
|
async def chat_response(message, history): |
|
messages = [ |
|
{ "role": "system", "content": "You are a great assistant." }, |
|
{ "role": "user", "content": message } |
|
] |
|
|
|
output = await generator(messages, { |
|
"max_new_tokens": 256, |
|
"do_sample": True, |
|
"temperature": 0.3, |
|
}) |
|
response = output[0]["generated_text"][-1]["content"] |
|
return response |
|
|
|
demo = gr.ChatInterface(chat_response, type="messages", autofocus=False) |
|
|
|
demo.launch() |
|
</gradio-file> |
|
|
|
<gradio-requirements> |
|
transformers-js-py |
|
</gradio-requirements> |
|
</gradio-lite> |
|
</body> |
|
</html> |