from transformers import AutoTokenizer, AutoModelForCausalLM import torch import gradio as gr import os # Define the device device = "cuda" if torch.cuda.is_available() else "cpu" # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained('stabilityai/stablelm-zephyr-3b') model = AutoModelForCausalLM.from_pretrained( 'stabilityai/stablelm-zephyr-3b', trust_remote_code=True, device_map="auto" ) model.to(device) class ChatBot: def __init__(self): self.history = [] def predict(self, user_input, system_prompt="You are an expert analyst and provide assessment:"): prompt = [{'role': 'user', 'content': user_input + "\n" + system_prompt + ":"}] inputs = tokenizer.apply_chat_template( prompt, add_generation_prompt=True, return_tensors='pt' ) # Generate a response using the model tokens = model.generate( inputs.to(model.device), max_new_tokens=250, temperature=0.8, do_sample=False ) # Decode the response response_text = tokenizer.decode(tokens[0], skip_special_tokens=False) # Free up memory del tokens torch.cuda.empty_cache() return response_text bot = ChatBot() title = "👋🏻Welcome to 🌟Tonic's🗽Stable🌟LM 3B🚀Chat" description = """ You can use this Space to test out the current model [stabilityai/stablelm-zephyr-3b](https://huggingface.co/stabilityai/stablelm-zephyr-3b) You can also use 😷StableMed⚕️ on your laptop & by cloning this space. 🧬🔬🔍 Simply click here: Duplicate Space Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [PolyGPT](https://github.com/tonic-ai/polygpt-alpha) """ examples = [["What is the proper treatment for buccal herpes?", "Please provide information on the most effective antiviral medications and home remedies for treating buccal herpes."]] iface = gr.Interface( fn=bot.predict, title=title, description=description, examples=examples, inputs=["text", "text"], outputs="text", theme="ParityError/Anime" ) iface.launch()