|
{ |
|
"best_metric": 61.36379559447102, |
|
"best_model_checkpoint": "/home/jcanete/ft-data/all_results/tar/albeto_base_2/epochs_3_bs_16_lr_5e-5/checkpoint-10500", |
|
"epoch": 3.0, |
|
"global_step": 16455, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_exact_match": 21.75023651844844, |
|
"eval_f1": 37.57447600514485, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.848374354299605e-05, |
|
"loss": 3.3269, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_exact_match": 24.66414380321665, |
|
"eval_f1": 41.23440962664224, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_exact_match": 32.44087038789026, |
|
"eval_f1": 49.56736430711076, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.6967487085992104e-05, |
|
"loss": 2.677, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_exact_match": 34.16272469252602, |
|
"eval_f1": 52.17403937631078, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.544819203889396e-05, |
|
"loss": 2.4733, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_exact_match": 35.496688741721854, |
|
"eval_f1": 53.19348275463691, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_exact_match": 36.40491958372753, |
|
"eval_f1": 54.831302693730166, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.392889699179581e-05, |
|
"loss": 2.3891, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_exact_match": 36.74550614947966, |
|
"eval_f1": 54.7644398701711, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_exact_match": 38.42951750236519, |
|
"eval_f1": 55.69824697753973, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2409601944697666e-05, |
|
"loss": 2.2821, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_exact_match": 38.41059602649007, |
|
"eval_f1": 56.0614093765921, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.089030689759952e-05, |
|
"loss": 2.2559, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_exact_match": 39.33774834437086, |
|
"eval_f1": 57.385105012672504, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_exact_match": 38.656575212866606, |
|
"eval_f1": 56.03551336622864, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9371011850501374e-05, |
|
"loss": 2.2176, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_exact_match": 39.517502365184484, |
|
"eval_f1": 57.32723067329097, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_exact_match": 40.397350993377486, |
|
"eval_f1": 58.23372912964764, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.785171680340323e-05, |
|
"loss": 2.1779, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_exact_match": 40.14191106906339, |
|
"eval_f1": 58.213042409331, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6332421756305075e-05, |
|
"loss": 2.1644, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_exact_match": 40.96499526963103, |
|
"eval_f1": 58.918006140859504, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_exact_match": 41.324503311258276, |
|
"eval_f1": 59.21702126158363, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.481616529930112e-05, |
|
"loss": 2.1601, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_exact_match": 41.40964995269631, |
|
"eval_f1": 59.44853824992533, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_exact_match": 41.97729422894986, |
|
"eval_f1": 60.06117101015576, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.329687025220298e-05, |
|
"loss": 2.0731, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_exact_match": 41.68401135288553, |
|
"eval_f1": 59.830806199953315, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.177757520510483e-05, |
|
"loss": 1.8064, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_exact_match": 42.02459791863765, |
|
"eval_f1": 59.84599471093964, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_exact_match": 41.67455061494797, |
|
"eval_f1": 59.31725434528457, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0258280158006685e-05, |
|
"loss": 1.7892, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_exact_match": 41.57048249763481, |
|
"eval_f1": 60.199256429502846, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_exact_match": 41.9678334910123, |
|
"eval_f1": 59.90500225716347, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.873898511090854e-05, |
|
"loss": 1.7917, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_exact_match": 42.42194891201514, |
|
"eval_f1": 60.72762889522327, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.722272865390459e-05, |
|
"loss": 1.788, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_exact_match": 42.36518448438978, |
|
"eval_f1": 60.809943991859576, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_exact_match": 42.48817407757805, |
|
"eval_f1": 60.79429012527435, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5703433606806444e-05, |
|
"loss": 1.8415, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_exact_match": 42.639545884579, |
|
"eval_f1": 60.460391744838276, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_exact_match": 42.620624408703875, |
|
"eval_f1": 61.10136266360258, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4184138559708297e-05, |
|
"loss": 1.798, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_exact_match": 42.35572374645222, |
|
"eval_f1": 60.73530844456789, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.266484351261015e-05, |
|
"loss": 1.817, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_exact_match": 42.92336802270577, |
|
"eval_f1": 60.69895519530886, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_exact_match": 42.86660359508041, |
|
"eval_f1": 60.66334036689526, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.11485870556062e-05, |
|
"loss": 1.7745, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_exact_match": 43.074739829706715, |
|
"eval_f1": 61.09186469003939, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_exact_match": 42.904446546830656, |
|
"eval_f1": 61.13917775897811, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9629292008508053e-05, |
|
"loss": 1.7932, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_exact_match": 42.838221381267736, |
|
"eval_f1": 61.00012403857813, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8109996961409907e-05, |
|
"loss": 1.8128, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_exact_match": 42.81929990539262, |
|
"eval_f1": 61.36379559447102, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_exact_match": 42.50709555345317, |
|
"eval_f1": 60.829640531869714, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.6593740504405955e-05, |
|
"loss": 1.756, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_exact_match": 42.72469252601703, |
|
"eval_f1": 61.0949824983516, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_exact_match": 42.544938505203405, |
|
"eval_f1": 60.824356487676354, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.5074445457307808e-05, |
|
"loss": 1.4926, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_exact_match": 42.18543046357616, |
|
"eval_f1": 60.54564764009016, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3555150410209664e-05, |
|
"loss": 1.4462, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_exact_match": 42.535477767265846, |
|
"eval_f1": 60.63187441471661, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_exact_match": 42.100283822138124, |
|
"eval_f1": 60.76346385090341, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.2035855363111518e-05, |
|
"loss": 1.4906, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_exact_match": 42.3368022705771, |
|
"eval_f1": 60.88032689469819, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_exact_match": 42.29895931882687, |
|
"eval_f1": 60.75186551387642, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.051656031601337e-05, |
|
"loss": 1.4723, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_exact_match": 42.29895931882687, |
|
"eval_f1": 61.06192361554967, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.997265268915224e-06, |
|
"loss": 1.4917, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_exact_match": 42.232734153263955, |
|
"eval_f1": 60.9421036656735, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_exact_match": 42.39356669820246, |
|
"eval_f1": 61.102189737578314, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.4779702218170765e-06, |
|
"loss": 1.4979, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_exact_match": 42.34626300851466, |
|
"eval_f1": 61.05567412635194, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_exact_match": 42.8003784295175, |
|
"eval_f1": 61.240965933718726, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.95867517471893e-06, |
|
"loss": 1.4725, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_exact_match": 42.3368022705771, |
|
"eval_f1": 60.933088700510005, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.4424187177149806e-06, |
|
"loss": 1.4776, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_exact_match": 42.55439924314096, |
|
"eval_f1": 60.96455686411175, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_exact_match": 42.82876064333018, |
|
"eval_f1": 61.07475280902827, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.9231236706168336e-06, |
|
"loss": 1.4236, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_exact_match": 42.57332071901608, |
|
"eval_f1": 61.128070584216076, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_exact_match": 42.70577105014191, |
|
"eval_f1": 61.08493602964073, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4038286235186874e-06, |
|
"loss": 1.4909, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_exact_match": 42.82876064333018, |
|
"eval_f1": 61.181108132512826, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 16455, |
|
"total_flos": 3819871314614016.0, |
|
"train_loss": 1.8848772167761034, |
|
"train_runtime": 2200.5584, |
|
"train_samples_per_second": 119.625, |
|
"train_steps_per_second": 7.478 |
|
} |
|
], |
|
"max_steps": 16455, |
|
"num_train_epochs": 3, |
|
"total_flos": 3819871314614016.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|