k-mktr's picture
Update app.py
4c2791d verified
import gradio as gr
from functools import lru_cache
import random
import requests
import logging
import config
import plotly.graph_objects as go
from typing import Dict
from leaderboard import (
get_current_leaderboard,
update_leaderboard,
start_backup_thread,
get_leaderboard,
get_elo_leaderboard,
ensure_elo_ratings_initialized
)
import sys
import openai
import threading
import time
from collections import Counter
from model_suggestions import add_suggestion, get_suggestions_html
from release_notes import get_release_notes_html
# Initialize logging for errors only
logging.basicConfig(level=logging.ERROR)
logger = logging.getLogger(__name__)
# Start the backup thread
start_backup_thread()
# Function to get available models (using predefined list)
def get_available_models():
return [model[0] for model in config.get_approved_models()]
# Function to get recent opponents for a model
recent_opponents = {}
def update_recent_opponents(model_a, model_b):
recent_opponents.setdefault(model_a, []).append(model_b)
recent_opponents.setdefault(model_b, []).append(model_a)
# Limit history to last 5 opponents
recent_opponents[model_a] = recent_opponents[model_a][-5:]
recent_opponents[model_b] = recent_opponents[model_b][-5:]
# Function to call Ollama API with caching
@lru_cache(maxsize=100)
def call_ollama_api(model, prompt):
client = openai.OpenAI(
api_key=config.API_KEY,
base_url=config.API_URL
)
try:
response = client.chat.completions.create(
model=model,
messages=[
{
"role": "system",
"content": "You are a helpful assistant. At no point should you reveal your name, identity or team affiliation to the user, especially if asked directly!"
},
{
"role": "user",
"content": prompt
}
],
timeout=100
)
return response.choices[0].message.content
except Exception as e:
logger.error(f"Error calling Ollama API for model {model}: {e}")
return f"Error: Unable to get response from the model."
# Generate responses using two randomly selected models
def get_battle_counts():
leaderboard = get_current_leaderboard()
battle_counts = Counter()
for model, data in leaderboard.items():
battle_counts[model] = data['wins'] + data['losses']
return battle_counts
def generate_responses(prompt):
available_models = get_available_models()
if len(available_models) < 2:
return "Error: Not enough models available", "Error: Not enough models available", None, None
battle_counts = get_battle_counts()
# Sort models by battle count (ascending)
sorted_models = sorted(available_models, key=lambda m: battle_counts.get(m, 0))
# Select the first model (least battles)
model_a = sorted_models[0]
# Filter out recent opponents for model_a
potential_opponents = [m for m in sorted_models[1:] if m not in recent_opponents.get(model_a, [])]
# If no potential opponents left, reset recent opponents for model_a
if not potential_opponents:
recent_opponents[model_a] = []
potential_opponents = sorted_models[1:]
# For the second model, use weighted random selection
weights = [1 / (battle_counts.get(m, 1) + 1) for m in potential_opponents]
model_b = random.choices(potential_opponents, weights=weights, k=1)[0]
# Update recent opponents
update_recent_opponents(model_a, model_b)
model_a_response = call_ollama_api(model_a, prompt)
model_b_response = call_ollama_api(model_b, prompt)
return model_a_response, model_b_response, model_a, model_b
def battle_arena(prompt):
response_a, response_b, model_a, model_b = generate_responses(prompt)
# Check for API errors in responses
if "Error: Unable to get response from the model" in response_a or "Error: Unable to get response from the model" in response_b:
return (
[], [], None, None,
gr.update(value=[]),
gr.update(value=[]),
gr.update(interactive=False, value="Voting Disabled - API Error"),
gr.update(interactive=False, value="Voting Disabled - API Error"),
gr.update(interactive=False, visible=False),
prompt,
0,
gr.update(visible=False),
gr.update(value="Error: Unable to get response from the model", visible=True)
)
nickname_a = random.choice(config.model_nicknames)
nickname_b = random.choice(config.model_nicknames)
# Format responses for gr.Chatbot, including the user's prompt
response_a_formatted = [
{"role": "user", "content": prompt},
{"role": "assistant", "content": response_a}
]
response_b_formatted = [
{"role": "user", "content": prompt},
{"role": "assistant", "content": response_b}
]
if random.choice([True, False]):
return (
response_a_formatted, response_b_formatted, model_a, model_b,
gr.update(label=nickname_a, value=response_a_formatted),
gr.update(label=nickname_b, value=response_b_formatted),
gr.update(interactive=True, value=f"Vote for {nickname_a}"),
gr.update(interactive=True, value=f"Vote for {nickname_b}"),
gr.update(interactive=True, visible=True),
prompt,
0,
gr.update(visible=False),
gr.update(value="Ready for your vote! πŸ—³οΈ", visible=True)
)
else:
return (
response_b_formatted, response_a_formatted, model_b, model_a,
gr.update(label=nickname_a, value=response_b_formatted),
gr.update(label=nickname_b, value=response_a_formatted),
gr.update(interactive=True, value=f"Vote for {nickname_a}"),
gr.update(interactive=True, value=f"Vote for {nickname_b}"),
gr.update(interactive=True, visible=True),
prompt,
0,
gr.update(visible=False),
gr.update(value="Ready for your vote! πŸ—³οΈ", visible=True)
)
def record_vote(prompt, left_response, right_response, left_model, right_model, choice):
# Check if outputs are generated
if not left_response or not right_response or not left_model or not right_model:
return (
"Please generate responses before voting.",
gr.update(),
gr.update(interactive=False),
gr.update(interactive=False),
gr.update(visible=False),
gr.update()
)
winner = left_model if choice == "Left is better" else right_model
loser = right_model if choice == "Left is better" else left_model
# Update the leaderboard
battle_results = update_leaderboard(winner, loser)
result_message = f"""
πŸŽ‰ Vote recorded! You're awesome! 🌟
πŸ”΅ In the left corner: {get_human_readable_name(left_model)}
πŸ”΄ In the right corner: {get_human_readable_name(right_model)}
πŸ† And the champion you picked is... {get_human_readable_name(winner)}! πŸ₯‡
"""
return (
gr.update(value=result_message, visible=True), # Show result as Markdown
get_leaderboard(), # Update leaderboard
get_elo_leaderboard(), # Add this line
gr.update(interactive=False), # Disable left vote button
gr.update(interactive=False), # Disable right vote button
gr.update(interactive=False), # Disable tie button
gr.update(visible=True), # Show model names
get_leaderboard_chart() # Update leaderboard chart
)
def get_leaderboard_chart():
battle_results = get_current_leaderboard()
# Calculate scores and sort results
for model, results in battle_results.items():
total_battles = results["wins"] + results["losses"]
if total_battles > 0:
win_rate = results["wins"] / total_battles
results["score"] = win_rate * (1 - 1 / (total_battles + 1))
else:
results["score"] = 0
sorted_results = sorted(
battle_results.items(),
key=lambda x: (x[1]["score"], x[1]["wins"] + x[1]["losses"]),
reverse=True
)
models = [get_human_readable_name(model) for model, _ in sorted_results]
wins = [results["wins"] for _, results in sorted_results]
losses = [results["losses"] for _, results in sorted_results]
scores = [results["score"] for _, results in sorted_results]
fig = go.Figure()
# Stacked Bar chart for Wins and Losses
fig.add_trace(go.Bar(
x=models,
y=wins,
name='Wins',
marker_color='#22577a'
))
fig.add_trace(go.Bar(
x=models,
y=losses,
name='Losses',
marker_color='#38a3a5'
))
# Line chart for Scores
fig.add_trace(go.Scatter(
x=models,
y=scores,
name='Score',
yaxis='y2',
line=dict(color='#ff7f0e', width=2)
))
# Update layout for full-width, increased height, and secondary y-axis
fig.update_layout(
title='Model Performance',
xaxis_title='Models',
yaxis_title='Number of Battles',
yaxis2=dict(
title='Score',
overlaying='y',
side='right'
),
barmode='stack',
height=800,
width=1450,
autosize=True,
legend=dict(
orientation='h',
yanchor='bottom',
y=1.02,
xanchor='right',
x=1
)
)
chart_data = fig.to_json()
return fig
def new_battle():
nickname_a = random.choice(config.model_nicknames)
nickname_b = random.choice(config.model_nicknames)
return (
"", # Reset prompt_input
gr.update(value=[], label=nickname_a), # Reset left Chatbot
gr.update(value=[], label=nickname_b), # Reset right Chatbot
None,
None,
gr.update(interactive=False, value=f"Vote for {nickname_a}"),
gr.update(interactive=False, value=f"Vote for {nickname_b}"),
gr.update(interactive=False, visible=False), # Reset Tie button
gr.update(value="", visible=False),
gr.update(),
gr.update(visible=False),
gr.update(),
0 # Reset tie_count
)
# Add this new function
def get_human_readable_name(model_name: str) -> str:
model_dict = dict(config.get_approved_models())
return model_dict.get(model_name, model_name)
# Add this new function to randomly select a prompt
def random_prompt():
return random.choice(config.example_prompts)
# Modify the continue_conversation function
def continue_conversation(prompt, left_chat, right_chat, left_model, right_model, previous_prompt, tie_count):
# Check if the prompt is empty or the same as the previous one
if not prompt or prompt == previous_prompt:
prompt = random.choice(config.example_prompts)
left_response = call_ollama_api(left_model, prompt)
right_response = call_ollama_api(right_model, prompt)
left_chat.append({"role": "user", "content": prompt})
left_chat.append({"role": "assistant", "content": left_response})
right_chat.append({"role": "user", "content": prompt})
right_chat.append({"role": "assistant", "content": right_response})
tie_count += 1
tie_button_state = gr.update(interactive=True) if tie_count < 3 else gr.update(interactive=False, value="Max ties reached. Please vote!")
return (
gr.update(value=left_chat),
gr.update(value=right_chat),
gr.update(value=""), # Clear the prompt input
tie_button_state,
prompt, # Return the new prompt
tie_count
)
# Initialize Gradio Blocks
with gr.Blocks(css="""
#dice-button {
min-height: 90px;
font-size: 35px;
}
""") as demo:
gr.Markdown(config.ARENA_NAME)
gr.Markdown(config.ARENA_DESCRIPTION)
# Leaderboard Tab (now first)
with gr.Tab("Leaderboard"):
leaderboard = gr.HTML(label="Leaderboard")
# Battle Arena Tab (now second)
with gr.Tab("Battle Arena"):
with gr.Row():
prompt_input = gr.Textbox(
label="Enter your prompt",
placeholder="Type your prompt here...",
scale=20
)
random_prompt_btn = gr.Button("🎲", scale=1, elem_id="dice-button")
gr.Markdown("<br>")
# Add the random prompt button functionality
random_prompt_btn.click(
random_prompt,
outputs=prompt_input
)
submit_btn = gr.Button("Generate Responses", variant="primary")
with gr.Row():
left_output = gr.Chatbot(label=random.choice(config.model_nicknames), type="messages")
right_output = gr.Chatbot(label=random.choice(config.model_nicknames), type="messages")
with gr.Row():
left_vote_btn = gr.Button(f"Vote for {left_output.label}", interactive=False)
tie_btn = gr.Button("Tie πŸ™ˆ Continue with a new prompt", interactive=False, visible=False)
right_vote_btn = gr.Button(f"Vote for {right_output.label}", interactive=False)
result = gr.Textbox(
label="Status",
interactive=False,
value="Generate responses to start the battle! πŸš€",
visible=True # Always visible
)
with gr.Row(visible=False) as model_names_row:
left_model = gr.Textbox(label="πŸ”΅ Left Model", interactive=False)
right_model = gr.Textbox(label="πŸ”΄ Right Model", interactive=False)
previous_prompt = gr.State("") # Add this line to store the previous prompt
tie_count = gr.State(0) # Add this line to keep track of tie count
new_battle_btn = gr.Button("New Battle")
# Performance Chart Tab
with gr.Tab("Performance Chart"):
leaderboard_chart = gr.Plot(label="Model Performance Chart")
# ELO Leaderboard Tab
with gr.Tab("ELO Leaderboard"):
elo_leaderboard = gr.HTML(label="ELO Leaderboard")
# Add this new tab
with gr.Tab("Suggest Models"):
with gr.Row():
model_url_input = gr.Textbox(
label="Model URL",
placeholder="hf.co/username/model-name-GGUF:Q4_K_M",
scale=4
)
submit_suggestion_btn = gr.Button("Submit Suggestion", scale=1, variant="primary")
suggestion_status = gr.Markdown("Submit a model to see it listed below!")
suggestions_list = gr.HTML(get_suggestions_html())
refresh_suggestions_btn = gr.Button("Refresh List")
# Update button click handlers
submit_suggestion_btn.click(
add_suggestion,
inputs=[model_url_input],
outputs=[suggestion_status]
).then(
lambda: (
get_suggestions_html(), # Update suggestions list
"" # Clear model URL input
),
outputs=[
suggestions_list,
model_url_input
]
)
refresh_suggestions_btn.click(
get_suggestions_html,
outputs=[suggestions_list]
)
# Add this new tab
with gr.Tab("Latest Updates"):
release_notes = gr.HTML(get_release_notes_html())
refresh_notes_btn = gr.Button("Refresh Updates")
refresh_notes_btn.click(
get_release_notes_html,
outputs=[release_notes]
)
# Define interactions
submit_btn.click(
battle_arena,
inputs=prompt_input,
outputs=[
left_output, right_output, left_model, right_model,
left_output, right_output, left_vote_btn, right_vote_btn,
tie_btn, previous_prompt, tie_count, model_names_row, result
]
)
left_vote_btn.click(
lambda *args: record_vote(*args, "Left is better"),
inputs=[prompt_input, left_output, right_output, left_model, right_model],
outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
right_vote_btn, tie_btn, model_names_row, leaderboard_chart]
)
right_vote_btn.click(
lambda *args: record_vote(*args, "Right is better"),
inputs=[prompt_input, left_output, right_output, left_model, right_model],
outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
right_vote_btn, tie_btn, model_names_row, leaderboard_chart]
)
tie_btn.click(
continue_conversation,
inputs=[prompt_input, left_output, right_output, left_model, right_model, previous_prompt, tie_count],
outputs=[left_output, right_output, prompt_input, tie_btn, previous_prompt, tie_count]
)
new_battle_btn.click(
new_battle,
outputs=[prompt_input, left_output, right_output, left_model,
right_model, left_vote_btn, right_vote_btn, tie_btn,
result, leaderboard, model_names_row, leaderboard_chart, tie_count]
)
# Update leaderboard and chart on launch
demo.load(get_leaderboard, outputs=leaderboard)
demo.load(get_elo_leaderboard, outputs=elo_leaderboard)
demo.load(get_leaderboard_chart, outputs=leaderboard_chart)
if __name__ == "__main__":
# Initialize ELO ratings before launching the app
ensure_elo_ratings_initialized()
# Start the model refresh thread
config.start_model_refresh_thread()
demo.launch(show_api=False)