Spaces:
Running
Running
update
Browse files- .gradio/certificate.pem +31 -0
- app.py +88 -198
- content.py +30 -0
- css.py +13 -0
- utils.py +13 -0
.gradio/certificate.pem
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
-----BEGIN CERTIFICATE-----
|
2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
31 |
+
-----END CERTIFICATE-----
|
app.py
CHANGED
@@ -1,204 +1,94 @@
|
|
1 |
-
import
|
2 |
-
|
|
|
|
|
3 |
import pandas as pd
|
4 |
-
|
5 |
-
from
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
),
|
71 |
-
search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
|
72 |
-
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
|
73 |
-
filter_columns=[
|
74 |
-
ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
75 |
-
ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
|
76 |
-
ColumnFilter(
|
77 |
-
AutoEvalColumn.params.name,
|
78 |
-
type="slider",
|
79 |
-
min=0.01,
|
80 |
-
max=150,
|
81 |
-
label="Select the number of parameters (B)",
|
82 |
-
),
|
83 |
-
ColumnFilter(
|
84 |
-
AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
|
85 |
-
),
|
86 |
-
],
|
87 |
-
bool_checkboxgroup_label="Hide models",
|
88 |
-
interactive=False,
|
89 |
-
)
|
90 |
-
|
91 |
-
|
92 |
-
demo = gr.Blocks(css=custom_css)
|
93 |
with demo:
|
94 |
gr.HTML(TITLE)
|
95 |
-
gr.Markdown(
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
|
105 |
-
with gr.Column():
|
106 |
-
with gr.Row():
|
107 |
-
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
108 |
-
|
109 |
-
with gr.Column():
|
110 |
-
with gr.Accordion(
|
111 |
-
f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
|
112 |
-
open=False,
|
113 |
-
):
|
114 |
-
with gr.Row():
|
115 |
-
finished_eval_table = gr.components.Dataframe(
|
116 |
-
value=finished_eval_queue_df,
|
117 |
-
headers=EVAL_COLS,
|
118 |
-
datatype=EVAL_TYPES,
|
119 |
-
row_count=5,
|
120 |
-
)
|
121 |
-
with gr.Accordion(
|
122 |
-
f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
|
123 |
-
open=False,
|
124 |
-
):
|
125 |
-
with gr.Row():
|
126 |
-
running_eval_table = gr.components.Dataframe(
|
127 |
-
value=running_eval_queue_df,
|
128 |
-
headers=EVAL_COLS,
|
129 |
-
datatype=EVAL_TYPES,
|
130 |
-
row_count=5,
|
131 |
-
)
|
132 |
-
|
133 |
-
with gr.Accordion(
|
134 |
-
f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
|
135 |
-
open=False,
|
136 |
-
):
|
137 |
-
with gr.Row():
|
138 |
-
pending_eval_table = gr.components.Dataframe(
|
139 |
-
value=pending_eval_queue_df,
|
140 |
-
headers=EVAL_COLS,
|
141 |
-
datatype=EVAL_TYPES,
|
142 |
-
row_count=5,
|
143 |
-
)
|
144 |
-
with gr.Row():
|
145 |
-
gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
|
146 |
-
|
147 |
-
with gr.Row():
|
148 |
-
with gr.Column():
|
149 |
-
model_name_textbox = gr.Textbox(label="Model name")
|
150 |
-
revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
151 |
-
model_type = gr.Dropdown(
|
152 |
-
choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
|
153 |
-
label="Model type",
|
154 |
-
multiselect=False,
|
155 |
-
value=None,
|
156 |
-
interactive=True,
|
157 |
-
)
|
158 |
-
|
159 |
-
with gr.Column():
|
160 |
-
precision = gr.Dropdown(
|
161 |
-
choices=[i.value.name for i in Precision if i != Precision.Unknown],
|
162 |
-
label="Precision",
|
163 |
-
multiselect=False,
|
164 |
-
value="float16",
|
165 |
-
interactive=True,
|
166 |
-
)
|
167 |
-
weight_type = gr.Dropdown(
|
168 |
-
choices=[i.value.name for i in WeightType],
|
169 |
-
label="Weights type",
|
170 |
-
multiselect=False,
|
171 |
-
value="Original",
|
172 |
-
interactive=True,
|
173 |
-
)
|
174 |
-
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
175 |
-
|
176 |
-
submit_button = gr.Button("Submit Eval")
|
177 |
-
submission_result = gr.Markdown()
|
178 |
-
submit_button.click(
|
179 |
-
add_new_eval,
|
180 |
-
[
|
181 |
-
model_name_textbox,
|
182 |
-
base_model_name_textbox,
|
183 |
-
revision_name_textbox,
|
184 |
-
precision,
|
185 |
-
weight_type,
|
186 |
-
model_type,
|
187 |
-
],
|
188 |
-
submission_result,
|
189 |
)
|
|
|
|
|
|
|
|
|
|
|
190 |
|
191 |
-
with gr.Row():
|
192 |
-
with gr.Accordion("📙 Citation", open=False):
|
193 |
-
citation_button = gr.Textbox(
|
194 |
-
value=CITATION_BUTTON_TEXT,
|
195 |
-
label=CITATION_BUTTON_LABEL,
|
196 |
-
lines=20,
|
197 |
-
elem_id="citation-button",
|
198 |
-
show_copy_button=True,
|
199 |
-
)
|
200 |
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
demo.
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import glob
|
4 |
+
from collections import defaultdict
|
5 |
import pandas as pd
|
6 |
+
import gradio as gr
|
7 |
+
from content import *
|
8 |
+
from css import *
|
9 |
+
import glob
|
10 |
+
|
11 |
+
ARC = "arc"
|
12 |
+
HELLASWAG = "hellaswag"
|
13 |
+
MMLU = "mmlu"
|
14 |
+
TRUTHFULQA = "truthfulqa"
|
15 |
+
BENCHMARKS = [ARC, HELLASWAG, MMLU, TRUTHFULQA]
|
16 |
+
|
17 |
+
METRICS = ["acc_norm", "acc_norm", "acc_norm", "mc2"]
|
18 |
+
|
19 |
+
LANGS = 'ar,bn,ca,da,de,es,eu,fr,gu,hi,hr,hu,hy,id,it,kn,ml,mr,ne,nl,pt,ro,ru,sk,sr,sv,ta,te,uk,vi,zh'.split(',')
|
20 |
+
|
21 |
+
LANG_NAME = {
|
22 |
+
'ar': 'Arabic',
|
23 |
+
'bn': 'Bengali',
|
24 |
+
'ca': 'Catalan',
|
25 |
+
'da': 'Danish',
|
26 |
+
'de': 'German',
|
27 |
+
'es': 'Spanish',
|
28 |
+
'eu': 'Basque',
|
29 |
+
'fr': 'French',
|
30 |
+
'gu': 'Gujarati',
|
31 |
+
'hi': 'Hindi',
|
32 |
+
'hr': 'Croatian',
|
33 |
+
'hu': 'Hungarian',
|
34 |
+
'hy': 'Armenian',
|
35 |
+
'id': 'Indonesian',
|
36 |
+
'it': 'Italian',
|
37 |
+
'kn': 'Kannada',
|
38 |
+
'ml': 'Malayalam',
|
39 |
+
'mr': 'Marathi',
|
40 |
+
'ne': 'Nepali',
|
41 |
+
'nl': 'Dutch',
|
42 |
+
'pt': 'Portuguese',
|
43 |
+
'ro': 'Romanian',
|
44 |
+
'ru': 'Russian',
|
45 |
+
'sk': 'Slovak',
|
46 |
+
'sr': 'Serbian',
|
47 |
+
'sv': 'Swedish',
|
48 |
+
'ta': 'Tamil',
|
49 |
+
'te': 'Telugu',
|
50 |
+
'uk': 'Ukrainian',
|
51 |
+
'vi': 'Vietnamese',
|
52 |
+
'zh': 'Chinese'
|
53 |
+
}
|
54 |
+
|
55 |
+
NONE_COL = "None"
|
56 |
+
|
57 |
+
|
58 |
+
COLS = ["Method", "Model" , "SS Easy", "SS Medium", "SS Hard", "MS Easy", "MS Meduium", "MS Hard", "Overall", NONE_COL]
|
59 |
+
TYPES = ["str", "str", "number", "number", "number", "number", "number", "number", "number", "number" , "number"]
|
60 |
+
|
61 |
+
df = []
|
62 |
+
row = ["React", "Qwen-plus" , "10.5", "20.6", "30.4", "10.5", "20.6", "30.4", "20", NONE_COL]
|
63 |
+
|
64 |
+
df.append(row)
|
65 |
+
df.append(row)
|
66 |
+
df.append(row)
|
67 |
+
df.append(row)
|
68 |
+
df = pd.DataFrame.from_records(df, columns=COLS)
|
69 |
+
df = df.sort_values(by=["Method", "Overall"], ascending=False)
|
70 |
+
df = df[COLS]
|
71 |
+
demo = gr.Blocks(css=CUSTOM_CSS)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
with demo:
|
73 |
gr.HTML(TITLE)
|
74 |
+
gr.Markdown(INTRO_TEXT, elem_classes="markdown-text")
|
75 |
+
gr.Markdown(HOW_TO, elem_classes="markdown-text")
|
76 |
+
print(TYPES)
|
77 |
+
print(df.columns)
|
78 |
+
with gr.Group():
|
79 |
+
with gr.Tab("Results: Agent"):
|
80 |
+
leaderboard_table_test = gr.components.Dataframe(
|
81 |
+
value=df, datatype=TYPES, interactive=False,
|
82 |
+
column_widths = ["20%"] * len(df.columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
)
|
84 |
+
with gr.Tab("Results: RAG-system"):
|
85 |
+
leaderboard_table_val = gr.components.Dataframe(
|
86 |
+
value=df, datatype=TYPES, interactive=False,
|
87 |
+
column_widths=["20%"]
|
88 |
+
)
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
+
gr.Markdown(CREDIT, elem_classes="markdown-text")
|
92 |
+
gr.Markdown(CITATION, elem_classes="markdown-text")
|
93 |
+
|
94 |
+
demo.launch(share=True)
|
content.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
TITLE = '<h1 align="center" id="space-title">🏆 WebWalkerQA Leaderboard</h1>'
|
2 |
+
|
3 |
+
INTRO_TEXT = f"""
|
4 |
+
## About
|
5 |
+
This leaderboard shows the performance of models on the WebWalkerQA benchmark. The WebWalkerQA benchmark is a collection of question-answering datasets that test the ability of models to answer questions about web pages.
|
6 |
+
"""
|
7 |
+
|
8 |
+
HOW_TO = f"""
|
9 |
+
## How to list your model performance on this leaderboard:
|
10 |
+
Send a email to [email protected] or [email protected].
|
11 |
+
"""
|
12 |
+
|
13 |
+
CREDIT = f"""
|
14 |
+
## Credit
|
15 |
+
To make this website, we use the following resources:
|
16 |
+
- Evaluation code (EleutherAI's lm_evaluation_harness repo)
|
17 |
+
- Leaderboard code (Huggingface4's open_llm_leaderboard and repo)
|
18 |
+
"""
|
19 |
+
|
20 |
+
|
21 |
+
CITATION = f"""
|
22 |
+
## Citation
|
23 |
+
```
|
24 |
+
@misc{{lai2023openllmbenchmark,
|
25 |
+
author = {{Viet Lai and Nghia Trung Ngo and Amir Pouran Ben Veyseh and Franck Dernoncourt and Thien Huu Nguyen}},
|
26 |
+
title={{Open Multilingual LLM Evaluation Leaderboard}},
|
27 |
+
year={{2023}}
|
28 |
+
}}
|
29 |
+
```
|
30 |
+
"""
|
css.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
CUSTOM_CSS= """
|
2 |
+
/* Hides the final column */
|
3 |
+
table td:last-child,
|
4 |
+
table th:last-child {
|
5 |
+
display: none;
|
6 |
+
}
|
7 |
+
# table td:first-child,
|
8 |
+
# table th:first-child {
|
9 |
+
# max-width: 400px;
|
10 |
+
# overflow: auto;
|
11 |
+
# white-space: nowrap;
|
12 |
+
# }
|
13 |
+
"""
|
utils.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Based on Omar Sanseviero work
|
2 |
+
# Make model clickable link
|
3 |
+
def make_clickable_model(model_name):
|
4 |
+
# remove user from model name
|
5 |
+
model_name_show = ' '.join(model_name.split('/')[1:])
|
6 |
+
|
7 |
+
link = "https://huggingface.co/" + model_name
|
8 |
+
return f'<a target="_blank" href="{link}">{model_name_show}</a>'
|
9 |
+
|
10 |
+
# Make user clickable link
|
11 |
+
def make_clickable_user(user_id):
|
12 |
+
link = "https://huggingface.co/" + user_id
|
13 |
+
return f'<a target="_blank" href="{link}">{user_id}</a>'
|