|
import torch |
|
from transformers import AutoTokenizer |
|
from petals import AutoDistributedModelForCausalLM |
|
|
|
model_name = "petals-team/StableBeluga2" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, add_bos_token=False) |
|
model = AutoDistributedModelForCausalLM.from_pretrained(model_name) |
|
|
|
import gradio as gr |
|
|
|
def generate(input): |
|
tokenized = tokenizer(input, return_tensors="pt")["input_ids"] |
|
outputs = model.generate(tokenized, max_new_tokens=80, do_sample=True, temperature=0.9) |
|
decoded = tokenizer.decode(outputs[0]) |
|
return decoded[len(input):len(decoded)].replace("</s>", ""); |
|
|
|
iface = gr.Interface(fn=generate, inputs="text", outputs="text") |
|
iface.launch() |