|
{ |
|
"best_metric": 0.9310862172434486, |
|
"best_model_checkpoint": "10-convnextv2-base-22k-384-finetuned-spiderTraining100-1000/checkpoint-7400", |
|
"epoch": 9.989875126560918, |
|
"eval_steps": 500, |
|
"global_step": 7400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.7567567567567575e-06, |
|
"loss": 4.7191, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3513513513513515e-05, |
|
"loss": 4.6815, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0270270270270273e-05, |
|
"loss": 4.6441, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.702702702702703e-05, |
|
"loss": 4.5566, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3783783783783784e-05, |
|
"loss": 4.5796, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.0540540540540545e-05, |
|
"loss": 4.6235, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.72972972972973e-05, |
|
"loss": 4.61, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5.405405405405406e-05, |
|
"loss": 4.6244, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6.0810810810810814e-05, |
|
"loss": 4.6277, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.756756756756757e-05, |
|
"loss": 4.6237, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.432432432432433e-05, |
|
"loss": 4.5135, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.108108108108109e-05, |
|
"loss": 4.4143, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.783783783783784e-05, |
|
"loss": 4.1232, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.45945945945946e-05, |
|
"loss": 3.7123, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00010135135135135136, |
|
"loss": 3.1878, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00010810810810810812, |
|
"loss": 2.7711, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00011486486486486487, |
|
"loss": 2.3975, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00012162162162162163, |
|
"loss": 2.2354, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00012837837837837836, |
|
"loss": 2.0567, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00013513513513513514, |
|
"loss": 1.9001, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00014189189189189188, |
|
"loss": 1.7957, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00014864864864864866, |
|
"loss": 1.683, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001554054054054054, |
|
"loss": 1.6268, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00016216216216216218, |
|
"loss": 1.6159, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016891891891891893, |
|
"loss": 1.4989, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00017567567567567568, |
|
"loss": 1.4819, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00018243243243243242, |
|
"loss": 1.4353, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001891891891891892, |
|
"loss": 1.4604, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019594594594594594, |
|
"loss": 1.4115, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00020270270270270272, |
|
"loss": 1.3756, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00020945945945945947, |
|
"loss": 1.3704, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00021621621621621624, |
|
"loss": 1.3525, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.000222972972972973, |
|
"loss": 1.3842, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00022972972972972974, |
|
"loss": 1.3297, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00023648648648648648, |
|
"loss": 1.304, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00024324324324324326, |
|
"loss": 1.4385, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00025, |
|
"loss": 1.3085, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002567567567567567, |
|
"loss": 1.3235, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002635135135135135, |
|
"loss": 1.3504, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002702702702702703, |
|
"loss": 1.3193, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00027702702702702705, |
|
"loss": 1.2816, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00028378378378378377, |
|
"loss": 1.3683, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00029054054054054054, |
|
"loss": 1.1493, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002972972972972973, |
|
"loss": 1.3138, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00030405405405405404, |
|
"loss": 1.3149, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003108108108108108, |
|
"loss": 1.297, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00031756756756756753, |
|
"loss": 1.4255, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00032432432432432436, |
|
"loss": 1.3951, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0003310810810810811, |
|
"loss": 1.2615, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00033783783783783786, |
|
"loss": 1.2754, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0003445945945945946, |
|
"loss": 1.3006, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00035135135135135135, |
|
"loss": 1.444, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0003581081081081081, |
|
"loss": 1.2068, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00036486486486486485, |
|
"loss": 1.4908, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0003716216216216216, |
|
"loss": 1.3093, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0003783783783783784, |
|
"loss": 1.2905, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00038513513513513517, |
|
"loss": 1.4815, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0003918918918918919, |
|
"loss": 1.2824, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00039864864864864866, |
|
"loss": 1.3966, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00040540540540540544, |
|
"loss": 1.3284, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00041216216216216216, |
|
"loss": 1.249, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00041891891891891893, |
|
"loss": 1.2585, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00042567567567567565, |
|
"loss": 1.3343, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0004324324324324325, |
|
"loss": 1.2704, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0004391891891891892, |
|
"loss": 1.3528, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.000445945945945946, |
|
"loss": 1.3853, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0004527027027027027, |
|
"loss": 1.3388, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00045945945945945947, |
|
"loss": 1.4151, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00046621621621621625, |
|
"loss": 1.4184, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00047297297297297297, |
|
"loss": 1.3309, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00047972972972972974, |
|
"loss": 1.4356, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0004864864864864865, |
|
"loss": 1.4564, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0004932432432432432, |
|
"loss": 1.3526, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0005, |
|
"loss": 1.2855, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7249449889977996, |
|
"eval_f1": 0.7228725416685348, |
|
"eval_loss": 0.9570144414901733, |
|
"eval_precision": 0.7636111347187294, |
|
"eval_recall": 0.7236760047474455, |
|
"eval_runtime": 196.5787, |
|
"eval_samples_per_second": 50.86, |
|
"eval_steps_per_second": 1.887, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0004992492492492493, |
|
"loss": 1.2199, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0004984984984984984, |
|
"loss": 1.3408, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0004977477477477478, |
|
"loss": 1.2037, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.000496996996996997, |
|
"loss": 1.3243, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0004962462462462463, |
|
"loss": 1.3089, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0004954954954954955, |
|
"loss": 1.2901, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0004947447447447447, |
|
"loss": 1.2378, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0004939939939939941, |
|
"loss": 1.2818, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0004932432432432432, |
|
"loss": 1.2249, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0004924924924924925, |
|
"loss": 1.2199, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0004917417417417418, |
|
"loss": 1.2566, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0004909909909909909, |
|
"loss": 1.2868, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0004902402402402403, |
|
"loss": 1.4444, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0004894894894894895, |
|
"loss": 1.3055, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0004887387387387388, |
|
"loss": 1.2765, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.000487987987987988, |
|
"loss": 1.2846, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00048723723723723724, |
|
"loss": 1.2887, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0004864864864864865, |
|
"loss": 1.2308, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0004857357357357358, |
|
"loss": 1.2236, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.000484984984984985, |
|
"loss": 1.2052, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0004842342342342342, |
|
"loss": 1.1945, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0004834834834834835, |
|
"loss": 1.2301, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0004827327327327327, |
|
"loss": 1.2469, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00048198198198198204, |
|
"loss": 1.1334, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00048123123123123125, |
|
"loss": 1.2563, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00048048048048048047, |
|
"loss": 1.2526, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00047972972972972974, |
|
"loss": 1.3089, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00047897897897897896, |
|
"loss": 1.1471, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0004782282282282283, |
|
"loss": 1.2159, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0004774774774774775, |
|
"loss": 1.254, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0004767267267267267, |
|
"loss": 1.1875, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.000475975975975976, |
|
"loss": 1.28, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0004752252252252252, |
|
"loss": 1.2797, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0004744744744744745, |
|
"loss": 1.2614, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00047372372372372375, |
|
"loss": 1.2121, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00047297297297297297, |
|
"loss": 1.2221, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00047222222222222224, |
|
"loss": 1.1854, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00047147147147147146, |
|
"loss": 1.147, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0004707207207207207, |
|
"loss": 1.1233, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00046996996996997, |
|
"loss": 1.1032, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.0004692192192192192, |
|
"loss": 1.1887, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0004684684684684685, |
|
"loss": 1.1418, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0004677177177177177, |
|
"loss": 1.283, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.000466966966966967, |
|
"loss": 1.1715, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00046621621621621625, |
|
"loss": 1.1116, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00046546546546546546, |
|
"loss": 1.1327, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00046471471471471473, |
|
"loss": 1.1484, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00046396396396396395, |
|
"loss": 1.0976, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0004632132132132132, |
|
"loss": 1.2164, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0004624624624624625, |
|
"loss": 1.108, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0004617117117117117, |
|
"loss": 1.0269, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.000460960960960961, |
|
"loss": 1.1285, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0004602102102102102, |
|
"loss": 1.0542, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00045945945945945947, |
|
"loss": 1.1624, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00045870870870870874, |
|
"loss": 1.1616, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00045795795795795796, |
|
"loss": 1.1036, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00045720720720720723, |
|
"loss": 1.0506, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00045645645645645645, |
|
"loss": 1.1882, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0004557057057057057, |
|
"loss": 1.1138, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.000454954954954955, |
|
"loss": 1.1566, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0004542042042042042, |
|
"loss": 1.1016, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0004534534534534535, |
|
"loss": 1.0219, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0004527027027027027, |
|
"loss": 1.0964, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0004519519519519519, |
|
"loss": 1.0864, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00045120120120120124, |
|
"loss": 1.15, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00045045045045045046, |
|
"loss": 1.074, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00044969969969969973, |
|
"loss": 0.9878, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00044894894894894895, |
|
"loss": 1.1312, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00044819819819819816, |
|
"loss": 1.0452, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0004474474474474475, |
|
"loss": 1.1523, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0004466966966966967, |
|
"loss": 1.0347, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.000445945945945946, |
|
"loss": 1.1487, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0004451951951951952, |
|
"loss": 1.1014, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0004444444444444444, |
|
"loss": 1.1598, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7715543108621724, |
|
"eval_f1": 0.7705672329120885, |
|
"eval_loss": 0.8161230087280273, |
|
"eval_precision": 0.7970684748506285, |
|
"eval_recall": 0.7687323574914616, |
|
"eval_runtime": 192.2102, |
|
"eval_samples_per_second": 52.016, |
|
"eval_steps_per_second": 1.93, |
|
"step": 1481 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00044369369369369374, |
|
"loss": 1.0385, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00044294294294294295, |
|
"loss": 0.9801, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0004421921921921922, |
|
"loss": 0.9433, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00044144144144144144, |
|
"loss": 0.9969, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00044069069069069066, |
|
"loss": 1.0062, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00043993993993994, |
|
"loss": 0.9232, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.0004391891891891892, |
|
"loss": 1.0435, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0004384384384384385, |
|
"loss": 0.9815, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.0004376876876876877, |
|
"loss": 0.98, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0004369369369369369, |
|
"loss": 0.9993, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00043618618618618623, |
|
"loss": 0.9791, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.00043543543543543545, |
|
"loss": 0.9976, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.00043468468468468467, |
|
"loss": 0.9826, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.00043393393393393394, |
|
"loss": 0.9983, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00043318318318318316, |
|
"loss": 0.9786, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.0004324324324324325, |
|
"loss": 0.9759, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.0004316816816816817, |
|
"loss": 0.9812, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0004309309309309309, |
|
"loss": 0.9019, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.0004301801801801802, |
|
"loss": 0.9509, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0004294294294294294, |
|
"loss": 0.8406, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00042867867867867873, |
|
"loss": 0.8635, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.00042792792792792795, |
|
"loss": 0.9476, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00042717717717717717, |
|
"loss": 0.9808, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.00042642642642642644, |
|
"loss": 0.8811, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.00042567567567567565, |
|
"loss": 0.9824, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.000424924924924925, |
|
"loss": 0.8877, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.0004241741741741742, |
|
"loss": 0.9078, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.0004234234234234234, |
|
"loss": 0.9928, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.0004226726726726727, |
|
"loss": 0.9896, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0004219219219219219, |
|
"loss": 0.9167, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.0004211711711711712, |
|
"loss": 0.9614, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00042042042042042044, |
|
"loss": 0.9728, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00041966966966966966, |
|
"loss": 0.8913, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.00041891891891891893, |
|
"loss": 0.9239, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00041816816816816815, |
|
"loss": 0.8981, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0004174174174174174, |
|
"loss": 0.9159, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0004166666666666667, |
|
"loss": 0.9693, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.0004159159159159159, |
|
"loss": 0.9409, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.0004151651651651652, |
|
"loss": 0.9313, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.0004144144144144144, |
|
"loss": 0.8637, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.00041366366366366367, |
|
"loss": 0.9116, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.00041291291291291294, |
|
"loss": 0.9368, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.00041216216216216216, |
|
"loss": 0.9111, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00041141141141141143, |
|
"loss": 0.9446, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.00041066066066066065, |
|
"loss": 0.9185, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0004099099099099099, |
|
"loss": 0.8664, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.0004091591591591592, |
|
"loss": 0.7811, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.0004084084084084084, |
|
"loss": 0.9265, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.0004076576576576577, |
|
"loss": 0.83, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0004069069069069069, |
|
"loss": 0.8942, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00040615615615615617, |
|
"loss": 0.9473, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.00040540540540540544, |
|
"loss": 0.9547, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.00040465465465465466, |
|
"loss": 0.9377, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00040390390390390393, |
|
"loss": 0.9604, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.00040315315315315314, |
|
"loss": 0.9484, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.0004024024024024024, |
|
"loss": 0.8361, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0004016516516516517, |
|
"loss": 0.8545, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.0004009009009009009, |
|
"loss": 0.9045, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.0004001501501501502, |
|
"loss": 0.8322, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 0.0003993993993993994, |
|
"loss": 0.9181, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.00039864864864864866, |
|
"loss": 0.7966, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.00039789789789789794, |
|
"loss": 0.9089, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.00039714714714714715, |
|
"loss": 0.9082, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.0003963963963963964, |
|
"loss": 0.8908, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.00039564564564564564, |
|
"loss": 0.8805, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.00039489489489489486, |
|
"loss": 0.9338, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.0003941441441441442, |
|
"loss": 0.9174, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.0003933933933933934, |
|
"loss": 0.902, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.00039264264264264267, |
|
"loss": 0.8365, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.0003918918918918919, |
|
"loss": 0.7663, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.0003911411411411411, |
|
"loss": 0.9336, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.0003903903903903904, |
|
"loss": 0.8055, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.00038963963963963965, |
|
"loss": 0.8888, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0003888888888888889, |
|
"loss": 0.9069, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8292658531706342, |
|
"eval_f1": 0.8288337486077575, |
|
"eval_loss": 0.5952173471450806, |
|
"eval_precision": 0.842670072902701, |
|
"eval_recall": 0.8282744379906413, |
|
"eval_runtime": 192.1316, |
|
"eval_samples_per_second": 52.037, |
|
"eval_steps_per_second": 1.931, |
|
"step": 2222 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.00038813813813813814, |
|
"loss": 0.7958, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.00038738738738738736, |
|
"loss": 0.7455, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.0003866366366366366, |
|
"loss": 0.7151, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.0003858858858858859, |
|
"loss": 0.7621, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.00038513513513513517, |
|
"loss": 0.8418, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.0003843843843843844, |
|
"loss": 0.8065, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.0003836336336336336, |
|
"loss": 0.7361, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.0003828828828828829, |
|
"loss": 0.7774, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.00038213213213213215, |
|
"loss": 0.7262, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.0003813813813813814, |
|
"loss": 0.7989, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.00038063063063063064, |
|
"loss": 0.8062, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.00037987987987987985, |
|
"loss": 0.7957, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.0003791291291291291, |
|
"loss": 0.7507, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.0003783783783783784, |
|
"loss": 0.7881, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.00037762762762762767, |
|
"loss": 0.7048, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0003768768768768769, |
|
"loss": 0.8162, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 0.0003761261261261261, |
|
"loss": 0.7446, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.00037537537537537537, |
|
"loss": 0.7904, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.00037462462462462464, |
|
"loss": 0.7939, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.0003738738738738739, |
|
"loss": 0.6914, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.00037312312312312313, |
|
"loss": 0.8431, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.00037237237237237235, |
|
"loss": 0.775, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.0003716216216216216, |
|
"loss": 0.7682, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.0003708708708708709, |
|
"loss": 0.8463, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.00037012012012012016, |
|
"loss": 0.7268, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.0003693693693693694, |
|
"loss": 0.7556, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.0003686186186186186, |
|
"loss": 0.7808, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.00036786786786786787, |
|
"loss": 0.7615, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00036711711711711714, |
|
"loss": 0.798, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.0003663663663663664, |
|
"loss": 0.7917, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.00036561561561561563, |
|
"loss": 0.75, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.00036486486486486485, |
|
"loss": 0.6937, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.0003641141141141141, |
|
"loss": 0.7988, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.0003633633633633634, |
|
"loss": 0.7315, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00036261261261261266, |
|
"loss": 0.7546, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.0003618618618618619, |
|
"loss": 0.7216, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.0003611111111111111, |
|
"loss": 0.7669, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.00036036036036036037, |
|
"loss": 0.7127, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.00035960960960960964, |
|
"loss": 0.7755, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.0003588588588588589, |
|
"loss": 0.8252, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 0.0003581081081081081, |
|
"loss": 0.7839, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.00035735735735735734, |
|
"loss": 0.7111, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.0003566066066066066, |
|
"loss": 0.6958, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.00035585585585585583, |
|
"loss": 0.6712, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.00035510510510510516, |
|
"loss": 0.6692, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0003543543543543544, |
|
"loss": 0.7866, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.0003536036036036036, |
|
"loss": 0.7061, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.00035285285285285286, |
|
"loss": 0.7742, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.0003521021021021021, |
|
"loss": 0.6905, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00035135135135135135, |
|
"loss": 0.7464, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.0003506006006006006, |
|
"loss": 0.7855, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.00034984984984984984, |
|
"loss": 0.7871, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.0003490990990990991, |
|
"loss": 0.6782, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.00034834834834834833, |
|
"loss": 0.836, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.0003475975975975976, |
|
"loss": 0.6404, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.00034684684684684687, |
|
"loss": 0.8315, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.0003460960960960961, |
|
"loss": 0.6882, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.00034534534534534536, |
|
"loss": 0.7388, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.0003445945945945946, |
|
"loss": 0.8343, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.00034384384384384385, |
|
"loss": 0.7021, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.0003430930930930931, |
|
"loss": 0.7739, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.00034234234234234234, |
|
"loss": 0.7021, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 0.0003415915915915916, |
|
"loss": 0.8136, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.0003408408408408408, |
|
"loss": 0.7634, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 0.0003400900900900901, |
|
"loss": 0.7506, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.00033933933933933937, |
|
"loss": 0.7432, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.0003385885885885886, |
|
"loss": 0.7378, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.00033783783783783786, |
|
"loss": 0.6994, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.0003370870870870871, |
|
"loss": 0.7371, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.00033633633633633635, |
|
"loss": 0.7055, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.0003355855855855856, |
|
"loss": 0.703, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.00033483483483483483, |
|
"loss": 0.7472, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.0003340840840840841, |
|
"loss": 0.8109, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 0.7519, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8491698339667934, |
|
"eval_f1": 0.849423792771648, |
|
"eval_loss": 0.5195841789245605, |
|
"eval_precision": 0.8611084598890817, |
|
"eval_recall": 0.8482889408623805, |
|
"eval_runtime": 190.693, |
|
"eval_samples_per_second": 52.43, |
|
"eval_steps_per_second": 1.946, |
|
"step": 2963 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.0003325825825825826, |
|
"loss": 0.7232, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.00033183183183183186, |
|
"loss": 0.5435, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.0003310810810810811, |
|
"loss": 0.6165, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.00033033033033033035, |
|
"loss": 0.6642, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.00032957957957957957, |
|
"loss": 0.6566, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.00032882882882882884, |
|
"loss": 0.6535, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.0003280780780780781, |
|
"loss": 0.6825, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.00032732732732732733, |
|
"loss": 0.5755, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.0003265765765765766, |
|
"loss": 0.6186, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.0003258258258258258, |
|
"loss": 0.6469, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.00032507507507507504, |
|
"loss": 0.6201, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.00032432432432432436, |
|
"loss": 0.6528, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.0003235735735735736, |
|
"loss": 0.7014, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.00032282282282282285, |
|
"loss": 0.6835, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.00032207207207207207, |
|
"loss": 0.6827, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.0003213213213213213, |
|
"loss": 0.7095, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.0003205705705705706, |
|
"loss": 0.6773, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.00031981981981981983, |
|
"loss": 0.6967, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.0003190690690690691, |
|
"loss": 0.6751, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.0003183183183183183, |
|
"loss": 0.6711, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.00031756756756756753, |
|
"loss": 0.5826, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00031681681681681686, |
|
"loss": 0.6481, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.0003160660660660661, |
|
"loss": 0.6282, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.00031531531531531535, |
|
"loss": 0.6441, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.00031456456456456456, |
|
"loss": 0.643, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.0003138138138138138, |
|
"loss": 0.6681, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.0003130630630630631, |
|
"loss": 0.642, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.0003123123123123123, |
|
"loss": 0.6114, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.00031156156156156154, |
|
"loss": 0.6465, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.0003108108108108108, |
|
"loss": 0.7339, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.00031006006006006003, |
|
"loss": 0.6085, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.00030930930930930936, |
|
"loss": 0.6396, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.00030855855855855857, |
|
"loss": 0.6864, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.0003078078078078078, |
|
"loss": 0.6241, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.00030705705705705706, |
|
"loss": 0.634, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.0003063063063063063, |
|
"loss": 0.6854, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0003055555555555556, |
|
"loss": 0.5872, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.0003048048048048048, |
|
"loss": 0.5485, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.00030405405405405404, |
|
"loss": 0.6161, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.0003033033033033033, |
|
"loss": 0.6059, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.0003025525525525525, |
|
"loss": 0.6244, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.00030180180180180185, |
|
"loss": 0.6937, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.00030105105105105107, |
|
"loss": 0.6506, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.0003003003003003003, |
|
"loss": 0.6477, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.00029954954954954956, |
|
"loss": 0.6439, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 0.0002987987987987988, |
|
"loss": 0.6016, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.0002980480480480481, |
|
"loss": 0.5656, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.0002972972972972973, |
|
"loss": 0.6457, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.00029654654654654654, |
|
"loss": 0.6378, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 0.0002957957957957958, |
|
"loss": 0.6456, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.000295045045045045, |
|
"loss": 0.6649, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.0002942942942942943, |
|
"loss": 0.6672, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.00029354354354354357, |
|
"loss": 0.6567, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.0002927927927927928, |
|
"loss": 0.6411, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.00029204204204204206, |
|
"loss": 0.6502, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.00029129129129129127, |
|
"loss": 0.6637, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 0.00029054054054054054, |
|
"loss": 0.6339, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.0002897897897897898, |
|
"loss": 0.5812, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.00028903903903903903, |
|
"loss": 0.6396, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.0002882882882882883, |
|
"loss": 0.6208, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 0.0002875375375375375, |
|
"loss": 0.6381, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 0.0002867867867867868, |
|
"loss": 0.691, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 0.00028603603603603606, |
|
"loss": 0.7211, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.0002852852852852853, |
|
"loss": 0.616, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 0.00028453453453453455, |
|
"loss": 0.5619, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 0.00028378378378378377, |
|
"loss": 0.5765, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.00028303303303303304, |
|
"loss": 0.6436, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 0.0002822822822822823, |
|
"loss": 0.6864, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.00028153153153153153, |
|
"loss": 0.6217, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 0.0002807807807807808, |
|
"loss": 0.5852, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.00028003003003003, |
|
"loss": 0.6185, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.0002792792792792793, |
|
"loss": 0.6803, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 0.00027852852852852856, |
|
"loss": 0.5834, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0002777777777777778, |
|
"loss": 0.6357, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8704740948189638, |
|
"eval_f1": 0.8689097751341385, |
|
"eval_loss": 0.44387924671173096, |
|
"eval_precision": 0.8761662335945799, |
|
"eval_recall": 0.8693479074500065, |
|
"eval_runtime": 191.3953, |
|
"eval_samples_per_second": 52.237, |
|
"eval_steps_per_second": 1.938, |
|
"step": 3703 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.00027702702702702705, |
|
"loss": 0.5605, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.00027627627627627627, |
|
"loss": 0.5468, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 0.00027552552552552554, |
|
"loss": 0.5749, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.0002747747747747748, |
|
"loss": 0.53, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 0.000274024024024024, |
|
"loss": 0.5308, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.0002732732732732733, |
|
"loss": 0.5127, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.0002725225225225225, |
|
"loss": 0.5424, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.00027177177177177173, |
|
"loss": 0.5608, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.00027102102102102106, |
|
"loss": 0.621, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.0002702702702702703, |
|
"loss": 0.5018, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.00026951951951951955, |
|
"loss": 0.4879, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 0.00026876876876876876, |
|
"loss": 0.4991, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.000268018018018018, |
|
"loss": 0.5633, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.0002672672672672673, |
|
"loss": 0.6233, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.0002665165165165165, |
|
"loss": 0.511, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.0002657657657657658, |
|
"loss": 0.5036, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.000265015015015015, |
|
"loss": 0.5826, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.00026426426426426423, |
|
"loss": 0.5605, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 0.0002635135135135135, |
|
"loss": 0.6443, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.00026276276276276277, |
|
"loss": 0.582, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.00026201201201201204, |
|
"loss": 0.6232, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.00026126126126126126, |
|
"loss": 0.5763, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.0002605105105105105, |
|
"loss": 0.6183, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.00025975975975975975, |
|
"loss": 0.5533, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.000259009009009009, |
|
"loss": 0.5633, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 0.0002582582582582583, |
|
"loss": 0.544, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.0002575075075075075, |
|
"loss": 0.5882, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.0002567567567567567, |
|
"loss": 0.5974, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.000256006006006006, |
|
"loss": 0.557, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.00025525525525525527, |
|
"loss": 0.5013, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.00025450450450450454, |
|
"loss": 0.5785, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.00025375375375375376, |
|
"loss": 0.558, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.000253003003003003, |
|
"loss": 0.5429, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.00025225225225225225, |
|
"loss": 0.5667, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.0002515015015015015, |
|
"loss": 0.5386, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.0002507507507507508, |
|
"loss": 0.6074, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.00025, |
|
"loss": 0.5583, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.0002492492492492492, |
|
"loss": 0.4637, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 0.0002484984984984985, |
|
"loss": 0.5877, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.00024774774774774777, |
|
"loss": 0.4403, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.00024699699699699704, |
|
"loss": 0.5379, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.00024624624624624625, |
|
"loss": 0.5562, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 0.00024549549549549547, |
|
"loss": 0.5481, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.00024474474474474474, |
|
"loss": 0.5422, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.000243993993993994, |
|
"loss": 0.5545, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.00024324324324324326, |
|
"loss": 0.5128, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.0002424924924924925, |
|
"loss": 0.5653, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 0.00024174174174174175, |
|
"loss": 0.5782, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.00024099099099099102, |
|
"loss": 0.5709, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 0.00024024024024024023, |
|
"loss": 0.5307, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 0.00023948948948948948, |
|
"loss": 0.569, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.00023873873873873875, |
|
"loss": 0.5939, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.000237987987987988, |
|
"loss": 0.5127, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.00023723723723723724, |
|
"loss": 0.5174, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.00023648648648648648, |
|
"loss": 0.558, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.00023573573573573573, |
|
"loss": 0.5006, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.000234984984984985, |
|
"loss": 0.6208, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 0.00023423423423423424, |
|
"loss": 0.554, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.0002334834834834835, |
|
"loss": 0.5183, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.00023273273273273273, |
|
"loss": 0.5545, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 0.00023198198198198198, |
|
"loss": 0.5011, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.00023123123123123125, |
|
"loss": 0.5286, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.0002304804804804805, |
|
"loss": 0.4974, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.00022972972972972974, |
|
"loss": 0.576, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 0.00022897897897897898, |
|
"loss": 0.4924, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.00022822822822822822, |
|
"loss": 0.5501, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 0.0002274774774774775, |
|
"loss": 0.5307, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.00022672672672672674, |
|
"loss": 0.5161, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.00022597597597597596, |
|
"loss": 0.5153, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.00022522522522522523, |
|
"loss": 0.5222, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 0.00022447447447447447, |
|
"loss": 0.5144, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 0.00022372372372372374, |
|
"loss": 0.5002, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.000222972972972973, |
|
"loss": 0.4556, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0002222222222222222, |
|
"loss": 0.5657, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8890778155631126, |
|
"eval_f1": 0.8884475854159729, |
|
"eval_loss": 0.405392587184906, |
|
"eval_precision": 0.8929255500739524, |
|
"eval_recall": 0.8885232893443815, |
|
"eval_runtime": 191.4068, |
|
"eval_samples_per_second": 52.234, |
|
"eval_steps_per_second": 1.938, |
|
"step": 4444 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 0.00022147147147147148, |
|
"loss": 0.5383, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.00022072072072072072, |
|
"loss": 0.444, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 0.00021996996996997, |
|
"loss": 0.4961, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.00021921921921921924, |
|
"loss": 0.5372, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.00021846846846846845, |
|
"loss": 0.4424, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.00021771771771771773, |
|
"loss": 0.3961, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.00021696696696696697, |
|
"loss": 0.4882, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.00021621621621621624, |
|
"loss": 0.4517, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.00021546546546546546, |
|
"loss": 0.4742, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.0002147147147147147, |
|
"loss": 0.5285, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.00021396396396396397, |
|
"loss": 0.4771, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.00021321321321321322, |
|
"loss": 0.4278, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.0002124624624624625, |
|
"loss": 0.5205, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.0002117117117117117, |
|
"loss": 0.5019, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.00021096096096096095, |
|
"loss": 0.4504, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.00021021021021021022, |
|
"loss": 0.411, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.00020945945945945947, |
|
"loss": 0.5114, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.0002087087087087087, |
|
"loss": 0.4836, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.00020795795795795796, |
|
"loss": 0.4559, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.0002072072072072072, |
|
"loss": 0.4264, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.00020645645645645647, |
|
"loss": 0.4078, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.00020570570570570572, |
|
"loss": 0.451, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.00020495495495495496, |
|
"loss": 0.407, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.0002042042042042042, |
|
"loss": 0.412, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.00020345345345345345, |
|
"loss": 0.502, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.00020270270270270272, |
|
"loss": 0.4881, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00020195195195195196, |
|
"loss": 0.4685, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.0002012012012012012, |
|
"loss": 0.4531, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.00020045045045045045, |
|
"loss": 0.3962, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0001996996996996997, |
|
"loss": 0.3741, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00019894894894894897, |
|
"loss": 0.4502, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.0001981981981981982, |
|
"loss": 0.4775, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.00019744744744744743, |
|
"loss": 0.5319, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.0001966966966966967, |
|
"loss": 0.5363, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.00019594594594594594, |
|
"loss": 0.4511, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.0001951951951951952, |
|
"loss": 0.4314, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 0.00019444444444444446, |
|
"loss": 0.405, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 0.00019369369369369368, |
|
"loss": 0.4473, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00019294294294294295, |
|
"loss": 0.436, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.0001921921921921922, |
|
"loss": 0.4574, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.00019144144144144144, |
|
"loss": 0.4293, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.0001906906906906907, |
|
"loss": 0.3983, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.00018993993993993993, |
|
"loss": 0.4439, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.0001891891891891892, |
|
"loss": 0.4613, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.00018843843843843844, |
|
"loss": 0.4209, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.00018768768768768769, |
|
"loss": 0.5008, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.00018693693693693696, |
|
"loss": 0.4341, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.00018618618618618617, |
|
"loss": 0.4438, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.00018543543543543545, |
|
"loss": 0.3922, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.0001846846846846847, |
|
"loss": 0.4661, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.00018393393393393393, |
|
"loss": 0.4535, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.0001831831831831832, |
|
"loss": 0.4534, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.00018243243243243242, |
|
"loss": 0.4782, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 0.0001816816816816817, |
|
"loss": 0.4182, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.00018093093093093094, |
|
"loss": 0.4373, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 0.00018018018018018018, |
|
"loss": 0.4867, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.00017942942942942945, |
|
"loss": 0.4659, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.00017867867867867867, |
|
"loss": 0.3941, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 0.00017792792792792792, |
|
"loss": 0.4212, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.0001771771771771772, |
|
"loss": 0.4637, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 0.00017642642642642643, |
|
"loss": 0.5005, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 0.00017567567567567568, |
|
"loss": 0.4785, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.00017492492492492492, |
|
"loss": 0.4328, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.00017417417417417416, |
|
"loss": 0.3952, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.00017342342342342344, |
|
"loss": 0.4419, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 0.00017267267267267268, |
|
"loss": 0.4173, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 0.00017192192192192192, |
|
"loss": 0.4701, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 0.00017117117117117117, |
|
"loss": 0.4804, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 0.0001704204204204204, |
|
"loss": 0.3814, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.00016966966966966968, |
|
"loss": 0.4407, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 0.00016891891891891893, |
|
"loss": 0.4275, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 0.00016816816816816817, |
|
"loss": 0.4784, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 0.00016741741741741742, |
|
"loss": 0.5154, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 0.4628, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9037807561512302, |
|
"eval_f1": 0.9033003135892885, |
|
"eval_loss": 0.3493529260158539, |
|
"eval_precision": 0.9058549926677517, |
|
"eval_recall": 0.9033230091030984, |
|
"eval_runtime": 191.6635, |
|
"eval_samples_per_second": 52.164, |
|
"eval_steps_per_second": 1.936, |
|
"step": 5185 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 0.00016591591591591593, |
|
"loss": 0.3719, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.00016516516516516518, |
|
"loss": 0.3562, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.00016441441441441442, |
|
"loss": 0.4041, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.00016366366366366367, |
|
"loss": 0.3744, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 0.0001629129129129129, |
|
"loss": 0.4728, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.00016216216216216218, |
|
"loss": 0.4138, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 0.00016141141141141143, |
|
"loss": 0.3801, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.00016066066066066064, |
|
"loss": 0.4113, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 0.00015990990990990991, |
|
"loss": 0.388, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 0.00015915915915915916, |
|
"loss": 0.4006, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.00015840840840840843, |
|
"loss": 0.3767, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.00015765765765765767, |
|
"loss": 0.419, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 0.0001569069069069069, |
|
"loss": 0.3563, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 0.00015615615615615616, |
|
"loss": 0.4191, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.0001554054054054054, |
|
"loss": 0.4491, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 0.00015465465465465468, |
|
"loss": 0.3764, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.0001539039039039039, |
|
"loss": 0.3798, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 0.00015315315315315314, |
|
"loss": 0.4415, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.0001524024024024024, |
|
"loss": 0.3818, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.00015165165165165165, |
|
"loss": 0.3974, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 0.00015090090090090093, |
|
"loss": 0.4371, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.00015015015015015014, |
|
"loss": 0.4014, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.0001493993993993994, |
|
"loss": 0.3844, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 0.00014864864864864866, |
|
"loss": 0.384, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.0001478978978978979, |
|
"loss": 0.4619, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 0.00014714714714714715, |
|
"loss": 0.3962, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.0001463963963963964, |
|
"loss": 0.4337, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.00014564564564564564, |
|
"loss": 0.4088, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 0.0001448948948948949, |
|
"loss": 0.3786, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.00014414414414414415, |
|
"loss": 0.4226, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 0.0001433933933933934, |
|
"loss": 0.44, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 0.00014264264264264264, |
|
"loss": 0.4322, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 0.00014189189189189188, |
|
"loss": 0.3269, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 0.00014114114114114116, |
|
"loss": 0.347, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 0.0001403903903903904, |
|
"loss": 0.4496, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 0.00013963963963963964, |
|
"loss": 0.4092, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.0001388888888888889, |
|
"loss": 0.3484, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.00013813813813813813, |
|
"loss": 0.4184, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.0001373873873873874, |
|
"loss": 0.3967, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 0.00013663663663663665, |
|
"loss": 0.4125, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.00013588588588588587, |
|
"loss": 0.401, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.00013513513513513514, |
|
"loss": 0.4787, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 0.00013438438438438438, |
|
"loss": 0.3997, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 0.00013363363363363365, |
|
"loss": 0.3598, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 0.0001328828828828829, |
|
"loss": 0.3988, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 0.00013213213213213211, |
|
"loss": 0.3209, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 0.00013138138138138139, |
|
"loss": 0.3528, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 0.00013063063063063063, |
|
"loss": 0.4024, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.00012987987987987987, |
|
"loss": 0.3689, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 0.00012912912912912915, |
|
"loss": 0.3908, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.00012837837837837836, |
|
"loss": 0.4048, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 0.00012762762762762763, |
|
"loss": 0.3461, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 0.00012687687687687688, |
|
"loss": 0.4064, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 0.00012612612612612612, |
|
"loss": 0.3859, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 0.0001253753753753754, |
|
"loss": 0.3992, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.0001246246246246246, |
|
"loss": 0.3116, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.00012387387387387388, |
|
"loss": 0.3627, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.00012312312312312313, |
|
"loss": 0.3946, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 0.00012237237237237237, |
|
"loss": 0.3297, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.00012162162162162163, |
|
"loss": 0.4199, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 0.00012087087087087087, |
|
"loss": 0.4164, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.00012012012012012012, |
|
"loss": 0.3763, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.00011936936936936938, |
|
"loss": 0.3401, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 0.00011861861861861862, |
|
"loss": 0.3524, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 0.00011786786786786786, |
|
"loss": 0.4263, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.00011711711711711712, |
|
"loss": 0.3855, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 0.00011636636636636637, |
|
"loss": 0.3678, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.00011561561561561562, |
|
"loss": 0.3067, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.00011486486486486487, |
|
"loss": 0.3637, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 0.00011411411411411411, |
|
"loss": 0.4032, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.00011336336336336337, |
|
"loss": 0.3832, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.00011261261261261261, |
|
"loss": 0.4019, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 0.00011186186186186187, |
|
"loss": 0.4264, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.0001111111111111111, |
|
"loss": 0.3256, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9149829965993198, |
|
"eval_f1": 0.9145293036070032, |
|
"eval_loss": 0.3138176500797272, |
|
"eval_precision": 0.9170611788545995, |
|
"eval_recall": 0.9140792071407026, |
|
"eval_runtime": 191.5713, |
|
"eval_samples_per_second": 52.189, |
|
"eval_steps_per_second": 1.937, |
|
"step": 5926 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.00011036036036036036, |
|
"loss": 0.3767, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 0.00010960960960960962, |
|
"loss": 0.3465, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 0.00010885885885885886, |
|
"loss": 0.3514, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 0.00010810810810810812, |
|
"loss": 0.3637, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 0.00010735735735735735, |
|
"loss": 0.3834, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.00010660660660660661, |
|
"loss": 0.2656, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 0.00010585585585585585, |
|
"loss": 0.3559, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.00010510510510510511, |
|
"loss": 0.3491, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 0.00010435435435435436, |
|
"loss": 0.4012, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 0.0001036036036036036, |
|
"loss": 0.3757, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 0.00010285285285285286, |
|
"loss": 0.3678, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 0.0001021021021021021, |
|
"loss": 0.3667, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 0.00010135135135135136, |
|
"loss": 0.3421, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 0.0001006006006006006, |
|
"loss": 0.3924, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 9.984984984984985e-05, |
|
"loss": 0.2621, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 9.90990990990991e-05, |
|
"loss": 0.3959, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 9.834834834834835e-05, |
|
"loss": 0.3191, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 9.75975975975976e-05, |
|
"loss": 0.3449, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 9.684684684684684e-05, |
|
"loss": 0.3142, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 9.60960960960961e-05, |
|
"loss": 0.3093, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 9.534534534534535e-05, |
|
"loss": 0.3446, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 9.45945945945946e-05, |
|
"loss": 0.2899, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 9.384384384384384e-05, |
|
"loss": 0.3068, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 9.309309309309309e-05, |
|
"loss": 0.3611, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 9.234234234234235e-05, |
|
"loss": 0.2767, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 9.15915915915916e-05, |
|
"loss": 0.4126, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 9.084084084084085e-05, |
|
"loss": 0.3741, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 9.009009009009009e-05, |
|
"loss": 0.3326, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 8.933933933933934e-05, |
|
"loss": 0.3034, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 8.85885885885886e-05, |
|
"loss": 0.3002, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 8.783783783783784e-05, |
|
"loss": 0.3728, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 8.708708708708708e-05, |
|
"loss": 0.3149, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 8.633633633633634e-05, |
|
"loss": 0.3122, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 8.558558558558558e-05, |
|
"loss": 0.3404, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 8.483483483483484e-05, |
|
"loss": 0.3521, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 8.408408408408409e-05, |
|
"loss": 0.3154, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 8.333333333333333e-05, |
|
"loss": 0.3654, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 8.258258258258259e-05, |
|
"loss": 0.3591, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 8.183183183183183e-05, |
|
"loss": 0.3284, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 8.108108108108109e-05, |
|
"loss": 0.2774, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 8.033033033033032e-05, |
|
"loss": 0.2813, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 7.957957957957958e-05, |
|
"loss": 0.3347, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 7.882882882882884e-05, |
|
"loss": 0.2895, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 7.807807807807808e-05, |
|
"loss": 0.3634, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 7.732732732732734e-05, |
|
"loss": 0.3155, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 7.657657657657657e-05, |
|
"loss": 0.3407, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 7.582582582582583e-05, |
|
"loss": 0.318, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 7.507507507507507e-05, |
|
"loss": 0.2963, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 7.432432432432433e-05, |
|
"loss": 0.3049, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 7.357357357357357e-05, |
|
"loss": 0.3612, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 7.282282282282282e-05, |
|
"loss": 0.3931, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 7.207207207207208e-05, |
|
"loss": 0.3213, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 7.132132132132132e-05, |
|
"loss": 0.3216, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 7.057057057057058e-05, |
|
"loss": 0.341, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 6.981981981981982e-05, |
|
"loss": 0.2771, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 6.906906906906907e-05, |
|
"loss": 0.3066, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 6.831831831831832e-05, |
|
"loss": 0.2938, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 6.756756756756757e-05, |
|
"loss": 0.3225, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 6.681681681681683e-05, |
|
"loss": 0.2781, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 6.606606606606606e-05, |
|
"loss": 0.3331, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 6.531531531531531e-05, |
|
"loss": 0.3598, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 6.456456456456457e-05, |
|
"loss": 0.325, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 6.381381381381382e-05, |
|
"loss": 0.3053, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 6.306306306306306e-05, |
|
"loss": 0.2996, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 6.23123123123123e-05, |
|
"loss": 0.3505, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 6.156156156156156e-05, |
|
"loss": 0.2732, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 6.0810810810810814e-05, |
|
"loss": 0.3205, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 6.006006006006006e-05, |
|
"loss": 0.3117, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 5.930930930930931e-05, |
|
"loss": 0.3402, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 5.855855855855856e-05, |
|
"loss": 0.3251, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 5.780780780780781e-05, |
|
"loss": 0.3538, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 5.7057057057057056e-05, |
|
"loss": 0.3323, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 5.630630630630631e-05, |
|
"loss": 0.2546, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 5.555555555555555e-05, |
|
"loss": 0.3567, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9256851370274055, |
|
"eval_f1": 0.9250707611820068, |
|
"eval_loss": 0.27885687351226807, |
|
"eval_precision": 0.9262127050820336, |
|
"eval_recall": 0.9250555525343819, |
|
"eval_runtime": 194.8139, |
|
"eval_samples_per_second": 51.321, |
|
"eval_steps_per_second": 1.904, |
|
"step": 6666 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5.480480480480481e-05, |
|
"loss": 0.2732, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 5.405405405405406e-05, |
|
"loss": 0.316, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 5.3303303303303305e-05, |
|
"loss": 0.2812, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 5.2552552552552556e-05, |
|
"loss": 0.3061, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 5.18018018018018e-05, |
|
"loss": 0.2993, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 5.105105105105105e-05, |
|
"loss": 0.323, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 5.03003003003003e-05, |
|
"loss": 0.2922, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 4.954954954954955e-05, |
|
"loss": 0.3282, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 4.87987987987988e-05, |
|
"loss": 0.2847, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 4.804804804804805e-05, |
|
"loss": 0.2945, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 4.72972972972973e-05, |
|
"loss": 0.2525, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 4.6546546546546544e-05, |
|
"loss": 0.2907, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 4.57957957957958e-05, |
|
"loss": 0.3036, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 4.5045045045045046e-05, |
|
"loss": 0.2495, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 4.42942942942943e-05, |
|
"loss": 0.2752, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 4.354354354354354e-05, |
|
"loss": 0.3388, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 4.279279279279279e-05, |
|
"loss": 0.3166, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 4.204204204204204e-05, |
|
"loss": 0.3494, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 4.1291291291291294e-05, |
|
"loss": 0.2602, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 4.0540540540540545e-05, |
|
"loss": 0.2571, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 3.978978978978979e-05, |
|
"loss": 0.3087, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 3.903903903903904e-05, |
|
"loss": 0.2927, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 3.8288288288288285e-05, |
|
"loss": 0.2868, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 3.7537537537537536e-05, |
|
"loss": 0.2722, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 3.678678678678679e-05, |
|
"loss": 0.249, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 3.603603603603604e-05, |
|
"loss": 0.2773, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 3.528528528528529e-05, |
|
"loss": 0.2947, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 3.453453453453453e-05, |
|
"loss": 0.3421, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 3.3783783783783784e-05, |
|
"loss": 0.2547, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 3.303303303303303e-05, |
|
"loss": 0.2803, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 3.2282282282282286e-05, |
|
"loss": 0.2675, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 3.153153153153153e-05, |
|
"loss": 0.2654, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 3.078078078078078e-05, |
|
"loss": 0.2709, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 3.003003003003003e-05, |
|
"loss": 0.2359, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 2.927927927927928e-05, |
|
"loss": 0.2535, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 2.8528528528528528e-05, |
|
"loss": 0.2867, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 2.7777777777777776e-05, |
|
"loss": 0.254, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 2.702702702702703e-05, |
|
"loss": 0.2382, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 2.6276276276276278e-05, |
|
"loss": 0.2711, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 2.5525525525525525e-05, |
|
"loss": 0.2785, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 2.4774774774774777e-05, |
|
"loss": 0.2695, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 2.4024024024024024e-05, |
|
"loss": 0.3143, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 2.3273273273273272e-05, |
|
"loss": 0.2781, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 2.2522522522522523e-05, |
|
"loss": 0.254, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 2.177177177177177e-05, |
|
"loss": 0.2371, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 2.102102102102102e-05, |
|
"loss": 0.2558, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 2.0270270270270273e-05, |
|
"loss": 0.2748, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 1.951951951951952e-05, |
|
"loss": 0.3006, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 1.8768768768768768e-05, |
|
"loss": 0.3219, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 1.801801801801802e-05, |
|
"loss": 0.3013, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 1.7267267267267267e-05, |
|
"loss": 0.2612, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 1.6516516516516514e-05, |
|
"loss": 0.2406, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 1.5765765765765765e-05, |
|
"loss": 0.2628, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 1.5015015015015015e-05, |
|
"loss": 0.2798, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 1.4264264264264264e-05, |
|
"loss": 0.2725, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 1.3513513513513515e-05, |
|
"loss": 0.3193, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 1.2762762762762763e-05, |
|
"loss": 0.277, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 1.2012012012012012e-05, |
|
"loss": 0.3349, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 1.1261261261261261e-05, |
|
"loss": 0.272, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 1.051051051051051e-05, |
|
"loss": 0.3017, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 9.75975975975976e-06, |
|
"loss": 0.3224, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 9.00900900900901e-06, |
|
"loss": 0.2935, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 8.258258258258257e-06, |
|
"loss": 0.3441, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 7.507507507507507e-06, |
|
"loss": 0.3263, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 6.7567567567567575e-06, |
|
"loss": 0.2503, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 6.006006006006006e-06, |
|
"loss": 0.316, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 5.255255255255255e-06, |
|
"loss": 0.309, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 4.504504504504505e-06, |
|
"loss": 0.2497, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 3.7537537537537537e-06, |
|
"loss": 0.206, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 3.003003003003003e-06, |
|
"loss": 0.2813, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 2.2522522522522524e-06, |
|
"loss": 0.3494, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 1.5015015015015015e-06, |
|
"loss": 0.3268, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 7.507507507507508e-07, |
|
"loss": 0.2431, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 0.0, |
|
"loss": 0.3313, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_accuracy": 0.9310862172434486, |
|
"eval_f1": 0.9305170890009876, |
|
"eval_loss": 0.26112696528434753, |
|
"eval_precision": 0.9311947598825216, |
|
"eval_recall": 0.9304846180354004, |
|
"eval_runtime": 191.2068, |
|
"eval_samples_per_second": 52.289, |
|
"eval_steps_per_second": 1.94, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"step": 7400, |
|
"total_flos": 1.8619741375601993e+20, |
|
"train_loss": 0.7571127768142804, |
|
"train_runtime": 37998.3887, |
|
"train_samples_per_second": 21.049, |
|
"train_steps_per_second": 0.195 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 7400, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.8619741375601993e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|