import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch import spaces zero = torch.Tensor([0]).cuda() # Load model and tokenizer only once, outside the function model_name = "deepapaikar/Katzbot_Llama_7b_QA_10eps" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, device_map='auto') @spaces.GPU def generate_text(input_text): """Generates text using the LlamaKatz-3x8B model. Args: input_text (str): The input text as a prompt. Returns: str: The generated text. """ inputs = tokenizer(input_text, return_tensors="pt").to(zero.device) outputs = model.generate(**inputs) generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return generated_text iface = gr.Interface( fn=generate_text, inputs=gr.Textbox(lines=5, label="Enter your text here:"), outputs=gr.Textbox(lines=5, label="Generated Text:"), title="KatzLLaMA", description="Enter some text and this app will generate more text based on it using the KatzLLaMA." ) if __name__ == "__main__": iface.launch(debug=True)