|
import pandas as pd |
|
from pathlib import Path |
|
from datasets import load_dataset |
|
import numpy as np |
|
import os |
|
import re |
|
|
|
UNVERIFIED_MODELS = [ |
|
|
|
] |
|
|
|
CONTAMINATED_MODELS = [ |
|
|
|
] |
|
|
|
|
|
def model_hyperlink(link, model_name): |
|
|
|
if len(model_name) > 50: |
|
model_name = model_name[:47] + "..." |
|
if model_name == "random": |
|
output = "random" |
|
elif model_name == "Cohere March 2024": |
|
output = f'<a target="_blank" href="https://huggingface.co/Cohere" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' |
|
elif "openai" == model_name.split("/")[0]: |
|
output = f'<a target="_blank" href="https://huggingface.co/openai" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' |
|
elif "Anthropic" == model_name.split("/")[0]: |
|
output = f'<a target="_blank" href="https://huggingface.co/Anthropic" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' |
|
elif "google" == model_name.split("/")[0]: |
|
output = f'<a target="_blank" href="https://huggingface.co/google" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' |
|
elif "PoLL" == model_name.split("/")[0]: |
|
output = model_name |
|
output = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' |
|
|
|
if model_name in UNVERIFIED_MODELS: |
|
output += " *" |
|
if model_name in CONTAMINATED_MODELS: |
|
output += " ⚠️" |
|
return output |
|
|
|
def undo_hyperlink(html_string): |
|
|
|
pattern = r'>[^<]+<' |
|
match = re.search(pattern, html_string) |
|
if match: |
|
|
|
return match.group(0)[1:-1] |
|
else: |
|
return "No text found" |
|
|
|
|
|
|
|
def load_all_data(data_repo, subdir:str, subsubsets=False): |
|
dir = Path(data_repo) |
|
data_dir = dir / subdir |
|
|
|
|
|
models_names = [f.split(".json")[0] for f in os.listdir(data_dir) |
|
if os.path.isfile(os.path.join(data_dir, f)) and f.endswith(".json")] |
|
|
|
df = pd.DataFrame() |
|
|
|
|
|
for model_name in models_names: |
|
model_data = load_dataset("json", data_files=os.path.join(data_dir, model_name + ".json"), split="train") |
|
model_data = model_data.add_column("model", [model_name]) |
|
df2 = pd.DataFrame(model_data) |
|
|
|
df = pd.concat([df2, df]) |
|
|
|
return df |
|
|
|
|
|
def prep_df(df): |
|
|
|
|
|
df = df.reindex(sorted(df.columns), axis=1) |
|
|
|
|
|
cols = list(df.columns) |
|
cols.insert(0, cols.pop(cols.index('model'))) |
|
df = df.loc[:, cols] |
|
|
|
|
|
df["model"] = df.apply(lambda row: model_hyperlink(f"https://huggingface.co/{row['path']}", row['model']), axis=1) |
|
df = df.drop(columns=["path"]) |
|
|
|
|
|
cols = df.columns.tolist() |
|
cols.remove("model") |
|
cols = [c for c in cols if "rank" not in c and "confi" not in c] |
|
df[cols] = (df[cols]*100) |
|
|
|
|
|
cols = list(df.columns) |
|
cols.insert(1, cols.pop(cols.index('average'))) |
|
df = df.loc[:, cols] |
|
|
|
df = df.rename(columns={ |
|
"model": "Model", |
|
"average": "Average", |
|
"brainstorm": "Brainstorm", |
|
"open_qa": "Open QA", |
|
"closed_qa": "Closed QA", |
|
"extract": "Extract", |
|
"generation": "Generation", |
|
"rewrite": "Rewrite", |
|
"summarize": "Summarize", |
|
"classify": "Classify", |
|
"reasoning_over_numerical_data": "Reasoning Over Numerical Data", |
|
"multi-document_synthesis": "Multi-Document Synthesis", |
|
"fact_checking_or_attributed_qa": "Fact Checking or Attributed QA", |
|
}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return df |
|
|
|
|
|
def sort_by_category(df, category): |
|
new_df = df.copy() |
|
col_rank = category.lower().replace(" ", "_") + "_rank" |
|
col_confi = category.lower().replace(" ", "_") + "_confi" |
|
|
|
|
|
new_df = new_df.sort_values(by=[col_rank, category], ascending=[True, False]) |
|
|
|
|
|
cols = list(new_df.columns) |
|
cols.insert(0, cols.pop(cols.index(col_rank))) |
|
new_df = new_df.loc[:, cols] |
|
new_df = new_df.rename(columns={col_rank: "Rank"}) |
|
|
|
|
|
cols = list(new_df.columns) |
|
cols.insert(2, cols.pop(cols.index(category))) |
|
new_df = new_df.loc[:, cols] |
|
|
|
|
|
cols = list(new_df.columns) |
|
cols.insert(3, cols.pop(cols.index(col_confi))) |
|
new_df = new_df.loc[:, cols] |
|
new_df = new_df.rename(columns={col_confi: "95% CI"}) |
|
|
|
|
|
|
|
new_df = new_df.drop(columns=[c for c in new_df.columns if c.endswith("rank")]) |
|
new_df = new_df.drop(columns=[c for c in new_df.columns if c.endswith("confi")]) |
|
|
|
return new_df |