import pandas as pd from pathlib import Path from datasets import load_dataset import numpy as np import os # From Open LLM Leaderboard def model_hyperlink(link, model_name): if model_name == "random": return "random" return f'{model_name}' # Define a function to fetch and process data def load_all_data(data_repo, subdir:str, subsubsets=False): # use HF api to pull the git repo dir = Path(data_repo) data_dir = dir / subdir orgs = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))] # get all files within the sub folders orgs models_results = [] for org in orgs: org_dir = data_dir / org files = [f for f in os.listdir(org_dir) if os.path.isfile(os.path.join(org_dir, f))] for file in files: if file.endswith(".json"): models_results.append(org + "/" + file) # create empty dataframe to add all data to df = pd.DataFrame() # load all json data in the list models_results one by one to avoid not having the same entries for model in models_results: model_data = load_dataset("json", data_files=data_repo + subdir+ "/" + model, split="train") df2 = pd.DataFrame(model_data) # add to df df = pd.concat([df2, df]) # remove chat_template comlumn df = df.drop(columns=["chat_template"]) # sort columns alphabetically df = df.reindex(sorted(df.columns), axis=1) # move column "model" to the front cols = list(df.columns) cols.insert(0, cols.pop(cols.index('model'))) df = df.loc[:, cols] # select all columns except "model" cols = df.columns.tolist() cols.remove("model") # round df[cols] = df[cols].round(2) avg = np.nanmean(df[cols].values,axis=1).round(2) # add average column df["average"] = avg # apply model_hyperlink function to column "model" df["model"] = df["model"].apply(lambda x: model_hyperlink(f"https://huggingface.co/{x}", x)) # move average column to the second cols = list(df.columns) cols.insert(1, cols.pop(cols.index('average'))) df = df.loc[:, cols] # remove column xstest (outdated data) # if xstest is a column if "xstest" in df.columns: df = df.drop(columns=["xstest"]) # remove column anthropic and summarize_prompted (outdated data) if "anthropic" in df.columns: df = df.drop(columns=["anthropic"]) if "summarize_prompted" in df.columns: df = df.drop(columns=["summarize_prompted"]) return df