File size: 3,943 Bytes
7841db2
0a8cafa
4fc9e70
297f353
7841db2
 
4fc9e70
7841db2
 
10dfcb1
 
 
7660535
 
 
 
10dfcb1
 
 
 
7841db2
 
602d4aa
7841db2
 
 
 
 
 
 
 
 
 
 
0a8cafa
 
7841db2
10dfcb1
 
 
 
 
7841db2
 
 
10dfcb1
 
7841db2
0a8cafa
 
4fc9e70
7841db2
f3d87e2
 
10dfcb1
 
9038518
 
10dfcb1
f3d87e2
10dfcb1
ae21d92
10dfcb1
 
 
 
 
 
 
 
 
 
 
 
 
 
4b78c6c
 
 
 
 
 
10dfcb1
ae21d92
4b78c6c
 
10dfcb1
4b78c6c
10dfcb1
4b78c6c
10dfcb1
964b210
4b78c6c
10dfcb1
4b78c6c
10dfcb1
4b78c6c
10dfcb1
5c30376
 
10dfcb1
 
4b78c6c
 
10dfcb1
 
f3d87e2
0a8cafa
 
7841db2
5c30376
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import gradio as gr
from huggingface_hub import login, InferenceClient
import spaces

# Authenticate with Hugging Face API
api_key = os.getenv("TOKEN")
login(api_key)

# Predefined list of models to compare (can be expanded)
model_options = {
    "Llama-3.1-70B": "meta-llama/Llama-3.1-70B-Instruct",
    "Qwen-2.5-1.5B-Instruct": "Qwen/Qwen2.5-1.5B-Instruct",
    "Llama-3.2-1B": "meta-llama/Llama-3.2-1B",
    "DeepSeek-V2.5": "deepseek-ai/DeepSeek-V2.5",
    "Athene-V2-Chat": "Nexusflow/Athene-V2-Chat",
}

# Initialize clients for models
clients = {name: InferenceClient(repo_id) for name, repo_id in model_options.items()}

# Define the response function
@spaces.GPU
def respond(
    message,
    history: list[dict],
    system_message,
    max_tokens,
    temperature,
    top_p,
    selected_models,
):
    messages = [{"role": "system", "content": system_message}] + history
    messages.append({"role": "user", "content": message})

    responses = {}

    # Generate responses for each selected model
    for model_name in selected_models:
        client = clients[model_name]
        response = ""
        for token in client.chat_completion(
            messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p
        ):
            delta = token.choices[0].delta.content
            response += delta
        responses[model_name] = response

    return responses

# Build Gradio app
def create_demo():
    with gr.Blocks() as demo:
        gr.Markdown("# AI Model Comparison Tool 🌟")
        gr.Markdown(
            """
            Compare responses from two AI models side-by-side.  
            Select two models, ask a question, and compare their responses in real time!
            """
        )

        # Input Section
        with gr.Row():
            system_message = gr.Textbox(
                value="You are a helpful assistant providing answers for technical and customer support queries.",
                label="System message"
            )
            user_message = gr.Textbox(label="Your question", placeholder="Type your question here...")

        with gr.Row():
            max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
            temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
            top_p = gr.Slider(
                minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
            )

        # Model Selection Section
        selected_models = gr.CheckboxGroup(
            choices=list(model_options.keys()),
            label="Select exactly two models to compare",
            value=["Llama-3.1-70B", "Qwen-2.5-1.5B-Instruct"],  # Default models
        )

        # Dynamic Response Section
        response_box1 = gr.Textbox(label="Response from Model 1", interactive=False)
        response_box2 = gr.Textbox(label="Response from Model 2", interactive=False)

        # Function to generate responses
        def generate_responses(
            message, system_message, max_tokens, temperature, top_p, selected_models
        ):
            if len(selected_models) != 2:
                return "Error: Please select exactly two models to compare.", ""
            responses = respond(
                message, [], system_message, max_tokens, temperature, top_p, selected_models
            )
            return responses.get(selected_models[0], ""), responses.get(selected_models[1], "")

        # Add a button for generating responses
        submit_button = gr.Button("Generate Responses")
        submit_button.click(
            generate_responses,
            inputs=[user_message, system_message, max_tokens, temperature, top_p, selected_models],
            outputs=[response_box1, response_box2],  # Link to response boxes
        )

    return demo

if __name__ == "__main__":
    demo = create_demo()
    demo.launch()