Pico
Collection
The Pico family is a family of reasoning models designed to reason and self reflect.
•
2 items
•
Updated
Pico v1 is a work in progress model. Based off Phi 3.5 Mini, it has been fine tuned for automatic COT and self reflection.
When making a output, Pico will create three sections, a reasoning section, a self-reflection section and a output section.
Pico v1 struggles with non-question related tasks (Small talk, roleplay, etc).
Here is a example of how you can use it:
import torch
phi3_template = (
"{{ bos_token }}"
"{% for message in messages %}"
"{{ '<|' + message['role'] + '|>\\n' + message['content'] + '<|end|>\\n' }}"
"{% endfor %}"
"{% if add_generation_prompt %}"
"{{ '<|assistant|>\\n' }}"
"{% endif %}"
)
phi3_template_eos_token = "<|end|>"
def build_prompt(messages, bos_token="<|start|>", add_generation_prompt=True):
"""
Build a prompt using the PHI 3.5 template.
"""
prompt = bos_token
for message in messages:
prompt += f"<|{message['role']}|>\n{message['content']}\n<|end|>\n"
if add_generation_prompt:
prompt += "<|assistant|>\n"
return prompt
def chat_with_model():
# Load the model and tokenizer
model_name = "LucidityAI/Pico-v1-3b"
print("Loading model and tokenizer...")
# Enforce GPU usage
if not torch.cuda.is_available():
raise RuntimeError("CUDA is not available. Please ensure your GPU and CUDA environment are configured correctly.")
device = torch.device("cuda")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
print("Model and tokenizer loaded successfully.")
# Chat loop
print("Start chatting with the model! Type 'exit' to quit.")
conversation = []
while True:
user_input = input("You: ")
if user_input.lower() == "exit":
print("Goodbye!")
break
# Append user's message to the conversation
conversation.append({"role": "user", "content": user_input})
# Build the input prompt using the PHI 3.5 template
prompt = build_prompt(conversation, bos_token=tokenizer.bos_token or "<|start|>")
# Tokenize the input prompt
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device)
# Generate a response
outputs = model.generate(
inputs.input_ids,
max_length=1024,
num_return_sequences=1,
temperature=0.5,
pad_token_id=tokenizer.eos_token_id
)
# Decode the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract the assistant's reply
assistant_reply = response[len(prompt):].strip()
print(f"Model: {assistant_reply}")
# Append the assistant's reply to the conversation
conversation.append({"role": "assistant", "content": assistant_reply})
if __name__ == "__main__":
chat_with_model()
Base model
microsoft/Phi-3.5-mini-instruct