natolambert commited on
Commit
874c0c9
·
1 Parent(s): 18596de
Files changed (2) hide show
  1. app.py +7 -4
  2. src/md.py +1 -1
app.py CHANGED
@@ -211,21 +211,24 @@ def regex_table(dataframe, regex, filter_button):
211
 
212
  # if Score exists, round to 2 decimals
213
  if "Score" in data.columns:
214
- data["Score"] = data["Score"].round(2)
215
  if "Average" in data.columns:
216
- data["Average"] = data["Average"].round(1)
217
  # round all others to 1 decimal
218
  for col in data.columns:
219
  if col not in ["", "Model", "Model Type", "Score", "Average"]:
220
- data[col] = data[col].round(1)
221
  return data
222
 
 
 
 
223
 
224
  with gr.Blocks(css=custom_css) as app:
225
  # create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
226
  with gr.Row():
227
  with gr.Column(scale=6):
228
- gr.Markdown(TOP_TEXT)
229
  with gr.Column(scale=4):
230
  # search = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
231
  # filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
 
211
 
212
  # if Score exists, round to 2 decimals
213
  if "Score" in data.columns:
214
+ data["Score"] = np.round(np.array(data["Score"].values).astype(float), 2)
215
  if "Average" in data.columns:
216
+ data["Average"] = np.round(np.array(data["Average"].values).astype(float), 1)
217
  # round all others to 1 decimal
218
  for col in data.columns:
219
  if col not in ["", "Model", "Model Type", "Score", "Average"]:
220
+ data[col] = np.round(np.array(data[col].values).astype(float), 1)
221
  return data
222
 
223
+ # import ipdb; ipdb.set_trace()
224
+
225
+ total_models = len(regex_table(rewardbench_data_avg.copy(), "", ["Seq. Classifiers", "DPO", "Custom Classifiers"]).values)
226
 
227
  with gr.Blocks(css=custom_css) as app:
228
  # create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
229
  with gr.Row():
230
  with gr.Column(scale=6):
231
+ gr.Markdown(TOP_TEXT.format(str(total_models)))
232
  with gr.Column(scale=4):
233
  # search = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
234
  # filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
src/md.py CHANGED
@@ -97,5 +97,5 @@ For more details, see the [dataset](https://huggingface.co/datasets/allenai/rewa
97
  TOP_TEXT = """
98
  # RewardBench: Evaluating Reward Models
99
  ### Evaluating the capabilities, safety, and pitfalls of reward models
100
- [Code](https://github.com/allenai/reward-bench) | [Eval. Dataset](https://huggingface.co/datasets/allenai/reward-bench) | [Prior Test Sets](https://huggingface.co/datasets/allenai/pref-test-sets) | [Results](https://huggingface.co/datasets/allenai/reward-bench-results) | [Paper](https://arxiv.org/abs/2403.13787)
101
  """
 
97
  TOP_TEXT = """
98
  # RewardBench: Evaluating Reward Models
99
  ### Evaluating the capabilities, safety, and pitfalls of reward models
100
+ [Code](https://github.com/allenai/reward-bench) | [Eval. Dataset](https://huggingface.co/datasets/allenai/reward-bench) | [Prior Test Sets](https://huggingface.co/datasets/allenai/pref-test-sets) | [Results](https://huggingface.co/datasets/allenai/reward-bench-results) | [Paper](https://arxiv.org/abs/2403.13787) | Total models: {}
101
  """