{ "best_metric": 61.36379559447102, "best_model_checkpoint": "/home/jcanete/ft-data/all_results/tar/albeto_base_2/epochs_3_bs_16_lr_5e-5/checkpoint-10500", "epoch": 3.0, "global_step": 16455, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "eval_exact_match": 21.75023651844844, "eval_f1": 37.57447600514485, "step": 300 }, { "epoch": 0.09, "learning_rate": 4.848374354299605e-05, "loss": 3.3269, "step": 500 }, { "epoch": 0.11, "eval_exact_match": 24.66414380321665, "eval_f1": 41.23440962664224, "step": 600 }, { "epoch": 0.16, "eval_exact_match": 32.44087038789026, "eval_f1": 49.56736430711076, "step": 900 }, { "epoch": 0.18, "learning_rate": 4.6967487085992104e-05, "loss": 2.677, "step": 1000 }, { "epoch": 0.22, "eval_exact_match": 34.16272469252602, "eval_f1": 52.17403937631078, "step": 1200 }, { "epoch": 0.27, "learning_rate": 4.544819203889396e-05, "loss": 2.4733, "step": 1500 }, { "epoch": 0.27, "eval_exact_match": 35.496688741721854, "eval_f1": 53.19348275463691, "step": 1500 }, { "epoch": 0.33, "eval_exact_match": 36.40491958372753, "eval_f1": 54.831302693730166, "step": 1800 }, { "epoch": 0.36, "learning_rate": 4.392889699179581e-05, "loss": 2.3891, "step": 2000 }, { "epoch": 0.38, "eval_exact_match": 36.74550614947966, "eval_f1": 54.7644398701711, "step": 2100 }, { "epoch": 0.44, "eval_exact_match": 38.42951750236519, "eval_f1": 55.69824697753973, "step": 2400 }, { "epoch": 0.46, "learning_rate": 4.2409601944697666e-05, "loss": 2.2821, "step": 2500 }, { "epoch": 0.49, "eval_exact_match": 38.41059602649007, "eval_f1": 56.0614093765921, "step": 2700 }, { "epoch": 0.55, "learning_rate": 4.089030689759952e-05, "loss": 2.2559, "step": 3000 }, { "epoch": 0.55, "eval_exact_match": 39.33774834437086, "eval_f1": 57.385105012672504, "step": 3000 }, { "epoch": 0.6, "eval_exact_match": 38.656575212866606, "eval_f1": 56.03551336622864, "step": 3300 }, { "epoch": 0.64, "learning_rate": 3.9371011850501374e-05, "loss": 2.2176, "step": 3500 }, { "epoch": 0.66, "eval_exact_match": 39.517502365184484, "eval_f1": 57.32723067329097, "step": 3600 }, { "epoch": 0.71, "eval_exact_match": 40.397350993377486, "eval_f1": 58.23372912964764, "step": 3900 }, { "epoch": 0.73, "learning_rate": 3.785171680340323e-05, "loss": 2.1779, "step": 4000 }, { "epoch": 0.77, "eval_exact_match": 40.14191106906339, "eval_f1": 58.213042409331, "step": 4200 }, { "epoch": 0.82, "learning_rate": 3.6332421756305075e-05, "loss": 2.1644, "step": 4500 }, { "epoch": 0.82, "eval_exact_match": 40.96499526963103, "eval_f1": 58.918006140859504, "step": 4500 }, { "epoch": 0.88, "eval_exact_match": 41.324503311258276, "eval_f1": 59.21702126158363, "step": 4800 }, { "epoch": 0.91, "learning_rate": 3.481616529930112e-05, "loss": 2.1601, "step": 5000 }, { "epoch": 0.93, "eval_exact_match": 41.40964995269631, "eval_f1": 59.44853824992533, "step": 5100 }, { "epoch": 0.98, "eval_exact_match": 41.97729422894986, "eval_f1": 60.06117101015576, "step": 5400 }, { "epoch": 1.0, "learning_rate": 3.329687025220298e-05, "loss": 2.0731, "step": 5500 }, { "epoch": 1.04, "eval_exact_match": 41.68401135288553, "eval_f1": 59.830806199953315, "step": 5700 }, { "epoch": 1.09, "learning_rate": 3.177757520510483e-05, "loss": 1.8064, "step": 6000 }, { "epoch": 1.09, "eval_exact_match": 42.02459791863765, "eval_f1": 59.84599471093964, "step": 6000 }, { "epoch": 1.15, "eval_exact_match": 41.67455061494797, "eval_f1": 59.31725434528457, "step": 6300 }, { "epoch": 1.19, "learning_rate": 3.0258280158006685e-05, "loss": 1.7892, "step": 6500 }, { "epoch": 1.2, "eval_exact_match": 41.57048249763481, "eval_f1": 60.199256429502846, "step": 6600 }, { "epoch": 1.26, "eval_exact_match": 41.9678334910123, "eval_f1": 59.90500225716347, "step": 6900 }, { "epoch": 1.28, "learning_rate": 2.873898511090854e-05, "loss": 1.7917, "step": 7000 }, { "epoch": 1.31, "eval_exact_match": 42.42194891201514, "eval_f1": 60.72762889522327, "step": 7200 }, { "epoch": 1.37, "learning_rate": 2.722272865390459e-05, "loss": 1.788, "step": 7500 }, { "epoch": 1.37, "eval_exact_match": 42.36518448438978, "eval_f1": 60.809943991859576, "step": 7500 }, { "epoch": 1.42, "eval_exact_match": 42.48817407757805, "eval_f1": 60.79429012527435, "step": 7800 }, { "epoch": 1.46, "learning_rate": 2.5703433606806444e-05, "loss": 1.8415, "step": 8000 }, { "epoch": 1.48, "eval_exact_match": 42.639545884579, "eval_f1": 60.460391744838276, "step": 8100 }, { "epoch": 1.53, "eval_exact_match": 42.620624408703875, "eval_f1": 61.10136266360258, "step": 8400 }, { "epoch": 1.55, "learning_rate": 2.4184138559708297e-05, "loss": 1.798, "step": 8500 }, { "epoch": 1.59, "eval_exact_match": 42.35572374645222, "eval_f1": 60.73530844456789, "step": 8700 }, { "epoch": 1.64, "learning_rate": 2.266484351261015e-05, "loss": 1.817, "step": 9000 }, { "epoch": 1.64, "eval_exact_match": 42.92336802270577, "eval_f1": 60.69895519530886, "step": 9000 }, { "epoch": 1.7, "eval_exact_match": 42.86660359508041, "eval_f1": 60.66334036689526, "step": 9300 }, { "epoch": 1.73, "learning_rate": 2.11485870556062e-05, "loss": 1.7745, "step": 9500 }, { "epoch": 1.75, "eval_exact_match": 43.074739829706715, "eval_f1": 61.09186469003939, "step": 9600 }, { "epoch": 1.8, "eval_exact_match": 42.904446546830656, "eval_f1": 61.13917775897811, "step": 9900 }, { "epoch": 1.82, "learning_rate": 1.9629292008508053e-05, "loss": 1.7932, "step": 10000 }, { "epoch": 1.86, "eval_exact_match": 42.838221381267736, "eval_f1": 61.00012403857813, "step": 10200 }, { "epoch": 1.91, "learning_rate": 1.8109996961409907e-05, "loss": 1.8128, "step": 10500 }, { "epoch": 1.91, "eval_exact_match": 42.81929990539262, "eval_f1": 61.36379559447102, "step": 10500 }, { "epoch": 1.97, "eval_exact_match": 42.50709555345317, "eval_f1": 60.829640531869714, "step": 10800 }, { "epoch": 2.01, "learning_rate": 1.6593740504405955e-05, "loss": 1.756, "step": 11000 }, { "epoch": 2.02, "eval_exact_match": 42.72469252601703, "eval_f1": 61.0949824983516, "step": 11100 }, { "epoch": 2.08, "eval_exact_match": 42.544938505203405, "eval_f1": 60.824356487676354, "step": 11400 }, { "epoch": 2.1, "learning_rate": 1.5074445457307808e-05, "loss": 1.4926, "step": 11500 }, { "epoch": 2.13, "eval_exact_match": 42.18543046357616, "eval_f1": 60.54564764009016, "step": 11700 }, { "epoch": 2.19, "learning_rate": 1.3555150410209664e-05, "loss": 1.4462, "step": 12000 }, { "epoch": 2.19, "eval_exact_match": 42.535477767265846, "eval_f1": 60.63187441471661, "step": 12000 }, { "epoch": 2.24, "eval_exact_match": 42.100283822138124, "eval_f1": 60.76346385090341, "step": 12300 }, { "epoch": 2.28, "learning_rate": 1.2035855363111518e-05, "loss": 1.4906, "step": 12500 }, { "epoch": 2.3, "eval_exact_match": 42.3368022705771, "eval_f1": 60.88032689469819, "step": 12600 }, { "epoch": 2.35, "eval_exact_match": 42.29895931882687, "eval_f1": 60.75186551387642, "step": 12900 }, { "epoch": 2.37, "learning_rate": 1.051656031601337e-05, "loss": 1.4723, "step": 13000 }, { "epoch": 2.41, "eval_exact_match": 42.29895931882687, "eval_f1": 61.06192361554967, "step": 13200 }, { "epoch": 2.46, "learning_rate": 8.997265268915224e-06, "loss": 1.4917, "step": 13500 }, { "epoch": 2.46, "eval_exact_match": 42.232734153263955, "eval_f1": 60.9421036656735, "step": 13500 }, { "epoch": 2.52, "eval_exact_match": 42.39356669820246, "eval_f1": 61.102189737578314, "step": 13800 }, { "epoch": 2.55, "learning_rate": 7.4779702218170765e-06, "loss": 1.4979, "step": 14000 }, { "epoch": 2.57, "eval_exact_match": 42.34626300851466, "eval_f1": 61.05567412635194, "step": 14100 }, { "epoch": 2.63, "eval_exact_match": 42.8003784295175, "eval_f1": 61.240965933718726, "step": 14400 }, { "epoch": 2.64, "learning_rate": 5.95867517471893e-06, "loss": 1.4725, "step": 14500 }, { "epoch": 2.68, "eval_exact_match": 42.3368022705771, "eval_f1": 60.933088700510005, "step": 14700 }, { "epoch": 2.73, "learning_rate": 4.4424187177149806e-06, "loss": 1.4776, "step": 15000 }, { "epoch": 2.73, "eval_exact_match": 42.55439924314096, "eval_f1": 60.96455686411175, "step": 15000 }, { "epoch": 2.79, "eval_exact_match": 42.82876064333018, "eval_f1": 61.07475280902827, "step": 15300 }, { "epoch": 2.83, "learning_rate": 2.9231236706168336e-06, "loss": 1.4236, "step": 15500 }, { "epoch": 2.84, "eval_exact_match": 42.57332071901608, "eval_f1": 61.128070584216076, "step": 15600 }, { "epoch": 2.9, "eval_exact_match": 42.70577105014191, "eval_f1": 61.08493602964073, "step": 15900 }, { "epoch": 2.92, "learning_rate": 1.4038286235186874e-06, "loss": 1.4909, "step": 16000 }, { "epoch": 2.95, "eval_exact_match": 42.82876064333018, "eval_f1": 61.181108132512826, "step": 16200 }, { "epoch": 3.0, "step": 16455, "total_flos": 3819871314614016.0, "train_loss": 1.8848772167761034, "train_runtime": 2200.5584, "train_samples_per_second": 119.625, "train_steps_per_second": 7.478 } ], "max_steps": 16455, "num_train_epochs": 3, "total_flos": 3819871314614016.0, "trial_name": null, "trial_params": null }