Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM | |
import torch | |
import spaces | |
MODEL_PATH = "benhaotang/phi4-qwq-sky-t1" | |
MODEL_URL = f"https://huggingface.co/{MODEL_PATH}" | |
def load_model(): | |
bnb_config = BitsAndBytesConfig( | |
load_in_8bit=False, | |
llm_int8_enable_fp32_cpu_offload=True | |
) | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_PATH, | |
device_map="auto", | |
torch_dtype=torch.float16, | |
offload_folder="offload_folder", | |
quantization_config=bnb_config | |
) | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) | |
pipe = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
device_map="auto", | |
) | |
return pipe | |
pipe = load_model() | |
def generate_response(prompt, max_length=1024): | |
# Create messages with system prompt | |
messages = [ | |
{"role": "system", "content": "You are a helpful AI assistant. You always think step by step."}, | |
{"role": "user", "content": prompt} | |
] | |
outputs = pipe(messages, max_new_tokens=max_length) | |
# print("Raw output:", outputs) #removed after debugging output format problem is done | |
# Extract just the assistant's response | |
try: | |
# outputs[0]["generated_text"] is already a list of messages | |
message_list = outputs[0]["generated_text"] | |
# Get the last message (assistant's response) | |
assistant_message = message_list[-1] | |
if assistant_message["role"] == "assistant": | |
return assistant_message["content"] | |
except Exception as e: | |
# print(f"Error extracting response: {e}") | |
# If extraction fails, return the raw output | |
return str(outputs[0]["generated_text"]) | |
return outputs[0]["generated_text"] | |
# Example with proper line breaks | |
example_prompt = """For a scalar field theory with interaction Lagrangian $\mathcal{L}_{int} = g\phi^3 + \lambda\phi^4$: | |
1. Enumerate all possible 1-loop Feynman diagrams contributing to the scalar propagator | |
2. For each diagram, write down its loop contribution | |
3. Provide Mathematica code to calculate these loop amplitudes with dimensional regularization at $d=4-\epsilon$ | |
Please explain your reasoning step by step.""" | |
demo = gr.Interface( | |
fn=generate_response, | |
inputs=[ | |
gr.Textbox( | |
label="Enter your question", | |
placeholder="Ask me anything...", | |
lines=5 | |
), | |
], | |
outputs=gr.Textbox(label="Response", lines=10), | |
title="benhaotang/phi4-qwq-sky-t1", | |
description=f""" To achieve CoT and science reasoning on small scale with a merge of CoT finetuned phi4 model. | |
Model: [benhaotang/phi4-qwq-sky-t1]({MODEL_URL})""", | |
examples=[ | |
[example_prompt] # Now using the formatted example | |
] | |
) | |
demo.launch() |