from transformers import GPT2LMHeadModel, GPT2Tokenizer import torch # Load the model and tokenizer model_path = 'model_data/finetuned_gpt' tokenizer = GPT2Tokenizer.from_pretrained(model_path) model = GPT2LMHeadModel.from_pretrained(model_path) # Move model to GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) def generate_text(prompt_text, length, temperature, beams): encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=False, return_tensors="pt") encoded_prompt = encoded_prompt.to(device) output_sequences = model.generate( input_ids=encoded_prompt, max_length=length, temperature=temperature, top_k=20, top_p=0.9, repetition_penalty=1.2, do_sample=True, num_return_sequences=beams, ) # Decode the generated text generated_sequence = output_sequences[0].tolist() text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True) # Remove the prompt from the generated text text = text[len(tokenizer.decode(encoded_prompt[0], clean_up_tokenization_spaces=True)) :] return text.strip() # Streamlit interface