{ "best_metric": 0.9310862172434486, "best_model_checkpoint": "10-convnextv2-base-22k-384-finetuned-spiderTraining100-1000/checkpoint-7400", "epoch": 9.989875126560918, "eval_steps": 500, "global_step": 7400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 6.7567567567567575e-06, "loss": 4.7191, "step": 10 }, { "epoch": 0.03, "learning_rate": 1.3513513513513515e-05, "loss": 4.6815, "step": 20 }, { "epoch": 0.04, "learning_rate": 2.0270270270270273e-05, "loss": 4.6441, "step": 30 }, { "epoch": 0.05, "learning_rate": 2.702702702702703e-05, "loss": 4.5566, "step": 40 }, { "epoch": 0.07, "learning_rate": 3.3783783783783784e-05, "loss": 4.5796, "step": 50 }, { "epoch": 0.08, "learning_rate": 4.0540540540540545e-05, "loss": 4.6235, "step": 60 }, { "epoch": 0.09, "learning_rate": 4.72972972972973e-05, "loss": 4.61, "step": 70 }, { "epoch": 0.11, "learning_rate": 5.405405405405406e-05, "loss": 4.6244, "step": 80 }, { "epoch": 0.12, "learning_rate": 6.0810810810810814e-05, "loss": 4.6277, "step": 90 }, { "epoch": 0.13, "learning_rate": 6.756756756756757e-05, "loss": 4.6237, "step": 100 }, { "epoch": 0.15, "learning_rate": 7.432432432432433e-05, "loss": 4.5135, "step": 110 }, { "epoch": 0.16, "learning_rate": 8.108108108108109e-05, "loss": 4.4143, "step": 120 }, { "epoch": 0.18, "learning_rate": 8.783783783783784e-05, "loss": 4.1232, "step": 130 }, { "epoch": 0.19, "learning_rate": 9.45945945945946e-05, "loss": 3.7123, "step": 140 }, { "epoch": 0.2, "learning_rate": 0.00010135135135135136, "loss": 3.1878, "step": 150 }, { "epoch": 0.22, "learning_rate": 0.00010810810810810812, "loss": 2.7711, "step": 160 }, { "epoch": 0.23, "learning_rate": 0.00011486486486486487, "loss": 2.3975, "step": 170 }, { "epoch": 0.24, "learning_rate": 0.00012162162162162163, "loss": 2.2354, "step": 180 }, { "epoch": 0.26, "learning_rate": 0.00012837837837837836, "loss": 2.0567, "step": 190 }, { "epoch": 0.27, "learning_rate": 0.00013513513513513514, "loss": 1.9001, "step": 200 }, { "epoch": 0.28, "learning_rate": 0.00014189189189189188, "loss": 1.7957, "step": 210 }, { "epoch": 0.3, "learning_rate": 0.00014864864864864866, "loss": 1.683, "step": 220 }, { "epoch": 0.31, "learning_rate": 0.0001554054054054054, "loss": 1.6268, "step": 230 }, { "epoch": 0.32, "learning_rate": 0.00016216216216216218, "loss": 1.6159, "step": 240 }, { "epoch": 0.34, "learning_rate": 0.00016891891891891893, "loss": 1.4989, "step": 250 }, { "epoch": 0.35, "learning_rate": 0.00017567567567567568, "loss": 1.4819, "step": 260 }, { "epoch": 0.36, "learning_rate": 0.00018243243243243242, "loss": 1.4353, "step": 270 }, { "epoch": 0.38, "learning_rate": 0.0001891891891891892, "loss": 1.4604, "step": 280 }, { "epoch": 0.39, "learning_rate": 0.00019594594594594594, "loss": 1.4115, "step": 290 }, { "epoch": 0.4, "learning_rate": 0.00020270270270270272, "loss": 1.3756, "step": 300 }, { "epoch": 0.42, "learning_rate": 0.00020945945945945947, "loss": 1.3704, "step": 310 }, { "epoch": 0.43, "learning_rate": 0.00021621621621621624, "loss": 1.3525, "step": 320 }, { "epoch": 0.45, "learning_rate": 0.000222972972972973, "loss": 1.3842, "step": 330 }, { "epoch": 0.46, "learning_rate": 0.00022972972972972974, "loss": 1.3297, "step": 340 }, { "epoch": 0.47, "learning_rate": 0.00023648648648648648, "loss": 1.304, "step": 350 }, { "epoch": 0.49, "learning_rate": 0.00024324324324324326, "loss": 1.4385, "step": 360 }, { "epoch": 0.5, "learning_rate": 0.00025, "loss": 1.3085, "step": 370 }, { "epoch": 0.51, "learning_rate": 0.0002567567567567567, "loss": 1.3235, "step": 380 }, { "epoch": 0.53, "learning_rate": 0.0002635135135135135, "loss": 1.3504, "step": 390 }, { "epoch": 0.54, "learning_rate": 0.0002702702702702703, "loss": 1.3193, "step": 400 }, { "epoch": 0.55, "learning_rate": 0.00027702702702702705, "loss": 1.2816, "step": 410 }, { "epoch": 0.57, "learning_rate": 0.00028378378378378377, "loss": 1.3683, "step": 420 }, { "epoch": 0.58, "learning_rate": 0.00029054054054054054, "loss": 1.1493, "step": 430 }, { "epoch": 0.59, "learning_rate": 0.0002972972972972973, "loss": 1.3138, "step": 440 }, { "epoch": 0.61, "learning_rate": 0.00030405405405405404, "loss": 1.3149, "step": 450 }, { "epoch": 0.62, "learning_rate": 0.0003108108108108108, "loss": 1.297, "step": 460 }, { "epoch": 0.63, "learning_rate": 0.00031756756756756753, "loss": 1.4255, "step": 470 }, { "epoch": 0.65, "learning_rate": 0.00032432432432432436, "loss": 1.3951, "step": 480 }, { "epoch": 0.66, "learning_rate": 0.0003310810810810811, "loss": 1.2615, "step": 490 }, { "epoch": 0.67, "learning_rate": 0.00033783783783783786, "loss": 1.2754, "step": 500 }, { "epoch": 0.69, "learning_rate": 0.0003445945945945946, "loss": 1.3006, "step": 510 }, { "epoch": 0.7, "learning_rate": 0.00035135135135135135, "loss": 1.444, "step": 520 }, { "epoch": 0.72, "learning_rate": 0.0003581081081081081, "loss": 1.2068, "step": 530 }, { "epoch": 0.73, "learning_rate": 0.00036486486486486485, "loss": 1.4908, "step": 540 }, { "epoch": 0.74, "learning_rate": 0.0003716216216216216, "loss": 1.3093, "step": 550 }, { "epoch": 0.76, "learning_rate": 0.0003783783783783784, "loss": 1.2905, "step": 560 }, { "epoch": 0.77, "learning_rate": 0.00038513513513513517, "loss": 1.4815, "step": 570 }, { "epoch": 0.78, "learning_rate": 0.0003918918918918919, "loss": 1.2824, "step": 580 }, { "epoch": 0.8, "learning_rate": 0.00039864864864864866, "loss": 1.3966, "step": 590 }, { "epoch": 0.81, "learning_rate": 0.00040540540540540544, "loss": 1.3284, "step": 600 }, { "epoch": 0.82, "learning_rate": 0.00041216216216216216, "loss": 1.249, "step": 610 }, { "epoch": 0.84, "learning_rate": 0.00041891891891891893, "loss": 1.2585, "step": 620 }, { "epoch": 0.85, "learning_rate": 0.00042567567567567565, "loss": 1.3343, "step": 630 }, { "epoch": 0.86, "learning_rate": 0.0004324324324324325, "loss": 1.2704, "step": 640 }, { "epoch": 0.88, "learning_rate": 0.0004391891891891892, "loss": 1.3528, "step": 650 }, { "epoch": 0.89, "learning_rate": 0.000445945945945946, "loss": 1.3853, "step": 660 }, { "epoch": 0.9, "learning_rate": 0.0004527027027027027, "loss": 1.3388, "step": 670 }, { "epoch": 0.92, "learning_rate": 0.00045945945945945947, "loss": 1.4151, "step": 680 }, { "epoch": 0.93, "learning_rate": 0.00046621621621621625, "loss": 1.4184, "step": 690 }, { "epoch": 0.94, "learning_rate": 0.00047297297297297297, "loss": 1.3309, "step": 700 }, { "epoch": 0.96, "learning_rate": 0.00047972972972972974, "loss": 1.4356, "step": 710 }, { "epoch": 0.97, "learning_rate": 0.0004864864864864865, "loss": 1.4564, "step": 720 }, { "epoch": 0.99, "learning_rate": 0.0004932432432432432, "loss": 1.3526, "step": 730 }, { "epoch": 1.0, "learning_rate": 0.0005, "loss": 1.2855, "step": 740 }, { "epoch": 1.0, "eval_accuracy": 0.7249449889977996, "eval_f1": 0.7228725416685348, "eval_loss": 0.9570144414901733, "eval_precision": 0.7636111347187294, "eval_recall": 0.7236760047474455, "eval_runtime": 196.5787, "eval_samples_per_second": 50.86, "eval_steps_per_second": 1.887, "step": 740 }, { "epoch": 1.01, "learning_rate": 0.0004992492492492493, "loss": 1.2199, "step": 750 }, { "epoch": 1.03, "learning_rate": 0.0004984984984984984, "loss": 1.3408, "step": 760 }, { "epoch": 1.04, "learning_rate": 0.0004977477477477478, "loss": 1.2037, "step": 770 }, { "epoch": 1.05, "learning_rate": 0.000496996996996997, "loss": 1.3243, "step": 780 }, { "epoch": 1.07, "learning_rate": 0.0004962462462462463, "loss": 1.3089, "step": 790 }, { "epoch": 1.08, "learning_rate": 0.0004954954954954955, "loss": 1.2901, "step": 800 }, { "epoch": 1.09, "learning_rate": 0.0004947447447447447, "loss": 1.2378, "step": 810 }, { "epoch": 1.11, "learning_rate": 0.0004939939939939941, "loss": 1.2818, "step": 820 }, { "epoch": 1.12, "learning_rate": 0.0004932432432432432, "loss": 1.2249, "step": 830 }, { "epoch": 1.13, "learning_rate": 0.0004924924924924925, "loss": 1.2199, "step": 840 }, { "epoch": 1.15, "learning_rate": 0.0004917417417417418, "loss": 1.2566, "step": 850 }, { "epoch": 1.16, "learning_rate": 0.0004909909909909909, "loss": 1.2868, "step": 860 }, { "epoch": 1.17, "learning_rate": 0.0004902402402402403, "loss": 1.4444, "step": 870 }, { "epoch": 1.19, "learning_rate": 0.0004894894894894895, "loss": 1.3055, "step": 880 }, { "epoch": 1.2, "learning_rate": 0.0004887387387387388, "loss": 1.2765, "step": 890 }, { "epoch": 1.21, "learning_rate": 0.000487987987987988, "loss": 1.2846, "step": 900 }, { "epoch": 1.23, "learning_rate": 0.00048723723723723724, "loss": 1.2887, "step": 910 }, { "epoch": 1.24, "learning_rate": 0.0004864864864864865, "loss": 1.2308, "step": 920 }, { "epoch": 1.26, "learning_rate": 0.0004857357357357358, "loss": 1.2236, "step": 930 }, { "epoch": 1.27, "learning_rate": 0.000484984984984985, "loss": 1.2052, "step": 940 }, { "epoch": 1.28, "learning_rate": 0.0004842342342342342, "loss": 1.1945, "step": 950 }, { "epoch": 1.3, "learning_rate": 0.0004834834834834835, "loss": 1.2301, "step": 960 }, { "epoch": 1.31, "learning_rate": 0.0004827327327327327, "loss": 1.2469, "step": 970 }, { "epoch": 1.32, "learning_rate": 0.00048198198198198204, "loss": 1.1334, "step": 980 }, { "epoch": 1.34, "learning_rate": 0.00048123123123123125, "loss": 1.2563, "step": 990 }, { "epoch": 1.35, "learning_rate": 0.00048048048048048047, "loss": 1.2526, "step": 1000 }, { "epoch": 1.36, "learning_rate": 0.00047972972972972974, "loss": 1.3089, "step": 1010 }, { "epoch": 1.38, "learning_rate": 0.00047897897897897896, "loss": 1.1471, "step": 1020 }, { "epoch": 1.39, "learning_rate": 0.0004782282282282283, "loss": 1.2159, "step": 1030 }, { "epoch": 1.4, "learning_rate": 0.0004774774774774775, "loss": 1.254, "step": 1040 }, { "epoch": 1.42, "learning_rate": 0.0004767267267267267, "loss": 1.1875, "step": 1050 }, { "epoch": 1.43, "learning_rate": 0.000475975975975976, "loss": 1.28, "step": 1060 }, { "epoch": 1.44, "learning_rate": 0.0004752252252252252, "loss": 1.2797, "step": 1070 }, { "epoch": 1.46, "learning_rate": 0.0004744744744744745, "loss": 1.2614, "step": 1080 }, { "epoch": 1.47, "learning_rate": 0.00047372372372372375, "loss": 1.2121, "step": 1090 }, { "epoch": 1.48, "learning_rate": 0.00047297297297297297, "loss": 1.2221, "step": 1100 }, { "epoch": 1.5, "learning_rate": 0.00047222222222222224, "loss": 1.1854, "step": 1110 }, { "epoch": 1.51, "learning_rate": 0.00047147147147147146, "loss": 1.147, "step": 1120 }, { "epoch": 1.53, "learning_rate": 0.0004707207207207207, "loss": 1.1233, "step": 1130 }, { "epoch": 1.54, "learning_rate": 0.00046996996996997, "loss": 1.1032, "step": 1140 }, { "epoch": 1.55, "learning_rate": 0.0004692192192192192, "loss": 1.1887, "step": 1150 }, { "epoch": 1.57, "learning_rate": 0.0004684684684684685, "loss": 1.1418, "step": 1160 }, { "epoch": 1.58, "learning_rate": 0.0004677177177177177, "loss": 1.283, "step": 1170 }, { "epoch": 1.59, "learning_rate": 0.000466966966966967, "loss": 1.1715, "step": 1180 }, { "epoch": 1.61, "learning_rate": 0.00046621621621621625, "loss": 1.1116, "step": 1190 }, { "epoch": 1.62, "learning_rate": 0.00046546546546546546, "loss": 1.1327, "step": 1200 }, { "epoch": 1.63, "learning_rate": 0.00046471471471471473, "loss": 1.1484, "step": 1210 }, { "epoch": 1.65, "learning_rate": 0.00046396396396396395, "loss": 1.0976, "step": 1220 }, { "epoch": 1.66, "learning_rate": 0.0004632132132132132, "loss": 1.2164, "step": 1230 }, { "epoch": 1.67, "learning_rate": 0.0004624624624624625, "loss": 1.108, "step": 1240 }, { "epoch": 1.69, "learning_rate": 0.0004617117117117117, "loss": 1.0269, "step": 1250 }, { "epoch": 1.7, "learning_rate": 0.000460960960960961, "loss": 1.1285, "step": 1260 }, { "epoch": 1.71, "learning_rate": 0.0004602102102102102, "loss": 1.0542, "step": 1270 }, { "epoch": 1.73, "learning_rate": 0.00045945945945945947, "loss": 1.1624, "step": 1280 }, { "epoch": 1.74, "learning_rate": 0.00045870870870870874, "loss": 1.1616, "step": 1290 }, { "epoch": 1.75, "learning_rate": 0.00045795795795795796, "loss": 1.1036, "step": 1300 }, { "epoch": 1.77, "learning_rate": 0.00045720720720720723, "loss": 1.0506, "step": 1310 }, { "epoch": 1.78, "learning_rate": 0.00045645645645645645, "loss": 1.1882, "step": 1320 }, { "epoch": 1.8, "learning_rate": 0.0004557057057057057, "loss": 1.1138, "step": 1330 }, { "epoch": 1.81, "learning_rate": 0.000454954954954955, "loss": 1.1566, "step": 1340 }, { "epoch": 1.82, "learning_rate": 0.0004542042042042042, "loss": 1.1016, "step": 1350 }, { "epoch": 1.84, "learning_rate": 0.0004534534534534535, "loss": 1.0219, "step": 1360 }, { "epoch": 1.85, "learning_rate": 0.0004527027027027027, "loss": 1.0964, "step": 1370 }, { "epoch": 1.86, "learning_rate": 0.0004519519519519519, "loss": 1.0864, "step": 1380 }, { "epoch": 1.88, "learning_rate": 0.00045120120120120124, "loss": 1.15, "step": 1390 }, { "epoch": 1.89, "learning_rate": 0.00045045045045045046, "loss": 1.074, "step": 1400 }, { "epoch": 1.9, "learning_rate": 0.00044969969969969973, "loss": 0.9878, "step": 1410 }, { "epoch": 1.92, "learning_rate": 0.00044894894894894895, "loss": 1.1312, "step": 1420 }, { "epoch": 1.93, "learning_rate": 0.00044819819819819816, "loss": 1.0452, "step": 1430 }, { "epoch": 1.94, "learning_rate": 0.0004474474474474475, "loss": 1.1523, "step": 1440 }, { "epoch": 1.96, "learning_rate": 0.0004466966966966967, "loss": 1.0347, "step": 1450 }, { "epoch": 1.97, "learning_rate": 0.000445945945945946, "loss": 1.1487, "step": 1460 }, { "epoch": 1.98, "learning_rate": 0.0004451951951951952, "loss": 1.1014, "step": 1470 }, { "epoch": 2.0, "learning_rate": 0.0004444444444444444, "loss": 1.1598, "step": 1480 }, { "epoch": 2.0, "eval_accuracy": 0.7715543108621724, "eval_f1": 0.7705672329120885, "eval_loss": 0.8161230087280273, "eval_precision": 0.7970684748506285, "eval_recall": 0.7687323574914616, "eval_runtime": 192.2102, "eval_samples_per_second": 52.016, "eval_steps_per_second": 1.93, "step": 1481 }, { "epoch": 2.01, "learning_rate": 0.00044369369369369374, "loss": 1.0385, "step": 1490 }, { "epoch": 2.02, "learning_rate": 0.00044294294294294295, "loss": 0.9801, "step": 1500 }, { "epoch": 2.04, "learning_rate": 0.0004421921921921922, "loss": 0.9433, "step": 1510 }, { "epoch": 2.05, "learning_rate": 0.00044144144144144144, "loss": 0.9969, "step": 1520 }, { "epoch": 2.07, "learning_rate": 0.00044069069069069066, "loss": 1.0062, "step": 1530 }, { "epoch": 2.08, "learning_rate": 0.00043993993993994, "loss": 0.9232, "step": 1540 }, { "epoch": 2.09, "learning_rate": 0.0004391891891891892, "loss": 1.0435, "step": 1550 }, { "epoch": 2.11, "learning_rate": 0.0004384384384384385, "loss": 0.9815, "step": 1560 }, { "epoch": 2.12, "learning_rate": 0.0004376876876876877, "loss": 0.98, "step": 1570 }, { "epoch": 2.13, "learning_rate": 0.0004369369369369369, "loss": 0.9993, "step": 1580 }, { "epoch": 2.15, "learning_rate": 0.00043618618618618623, "loss": 0.9791, "step": 1590 }, { "epoch": 2.16, "learning_rate": 0.00043543543543543545, "loss": 0.9976, "step": 1600 }, { "epoch": 2.17, "learning_rate": 0.00043468468468468467, "loss": 0.9826, "step": 1610 }, { "epoch": 2.19, "learning_rate": 0.00043393393393393394, "loss": 0.9983, "step": 1620 }, { "epoch": 2.2, "learning_rate": 0.00043318318318318316, "loss": 0.9786, "step": 1630 }, { "epoch": 2.21, "learning_rate": 0.0004324324324324325, "loss": 0.9759, "step": 1640 }, { "epoch": 2.23, "learning_rate": 0.0004316816816816817, "loss": 0.9812, "step": 1650 }, { "epoch": 2.24, "learning_rate": 0.0004309309309309309, "loss": 0.9019, "step": 1660 }, { "epoch": 2.25, "learning_rate": 0.0004301801801801802, "loss": 0.9509, "step": 1670 }, { "epoch": 2.27, "learning_rate": 0.0004294294294294294, "loss": 0.8406, "step": 1680 }, { "epoch": 2.28, "learning_rate": 0.00042867867867867873, "loss": 0.8635, "step": 1690 }, { "epoch": 2.29, "learning_rate": 0.00042792792792792795, "loss": 0.9476, "step": 1700 }, { "epoch": 2.31, "learning_rate": 0.00042717717717717717, "loss": 0.9808, "step": 1710 }, { "epoch": 2.32, "learning_rate": 0.00042642642642642644, "loss": 0.8811, "step": 1720 }, { "epoch": 2.34, "learning_rate": 0.00042567567567567565, "loss": 0.9824, "step": 1730 }, { "epoch": 2.35, "learning_rate": 0.000424924924924925, "loss": 0.8877, "step": 1740 }, { "epoch": 2.36, "learning_rate": 0.0004241741741741742, "loss": 0.9078, "step": 1750 }, { "epoch": 2.38, "learning_rate": 0.0004234234234234234, "loss": 0.9928, "step": 1760 }, { "epoch": 2.39, "learning_rate": 0.0004226726726726727, "loss": 0.9896, "step": 1770 }, { "epoch": 2.4, "learning_rate": 0.0004219219219219219, "loss": 0.9167, "step": 1780 }, { "epoch": 2.42, "learning_rate": 0.0004211711711711712, "loss": 0.9614, "step": 1790 }, { "epoch": 2.43, "learning_rate": 0.00042042042042042044, "loss": 0.9728, "step": 1800 }, { "epoch": 2.44, "learning_rate": 0.00041966966966966966, "loss": 0.8913, "step": 1810 }, { "epoch": 2.46, "learning_rate": 0.00041891891891891893, "loss": 0.9239, "step": 1820 }, { "epoch": 2.47, "learning_rate": 0.00041816816816816815, "loss": 0.8981, "step": 1830 }, { "epoch": 2.48, "learning_rate": 0.0004174174174174174, "loss": 0.9159, "step": 1840 }, { "epoch": 2.5, "learning_rate": 0.0004166666666666667, "loss": 0.9693, "step": 1850 }, { "epoch": 2.51, "learning_rate": 0.0004159159159159159, "loss": 0.9409, "step": 1860 }, { "epoch": 2.52, "learning_rate": 0.0004151651651651652, "loss": 0.9313, "step": 1870 }, { "epoch": 2.54, "learning_rate": 0.0004144144144144144, "loss": 0.8637, "step": 1880 }, { "epoch": 2.55, "learning_rate": 0.00041366366366366367, "loss": 0.9116, "step": 1890 }, { "epoch": 2.56, "learning_rate": 0.00041291291291291294, "loss": 0.9368, "step": 1900 }, { "epoch": 2.58, "learning_rate": 0.00041216216216216216, "loss": 0.9111, "step": 1910 }, { "epoch": 2.59, "learning_rate": 0.00041141141141141143, "loss": 0.9446, "step": 1920 }, { "epoch": 2.61, "learning_rate": 0.00041066066066066065, "loss": 0.9185, "step": 1930 }, { "epoch": 2.62, "learning_rate": 0.0004099099099099099, "loss": 0.8664, "step": 1940 }, { "epoch": 2.63, "learning_rate": 0.0004091591591591592, "loss": 0.7811, "step": 1950 }, { "epoch": 2.65, "learning_rate": 0.0004084084084084084, "loss": 0.9265, "step": 1960 }, { "epoch": 2.66, "learning_rate": 0.0004076576576576577, "loss": 0.83, "step": 1970 }, { "epoch": 2.67, "learning_rate": 0.0004069069069069069, "loss": 0.8942, "step": 1980 }, { "epoch": 2.69, "learning_rate": 0.00040615615615615617, "loss": 0.9473, "step": 1990 }, { "epoch": 2.7, "learning_rate": 0.00040540540540540544, "loss": 0.9547, "step": 2000 }, { "epoch": 2.71, "learning_rate": 0.00040465465465465466, "loss": 0.9377, "step": 2010 }, { "epoch": 2.73, "learning_rate": 0.00040390390390390393, "loss": 0.9604, "step": 2020 }, { "epoch": 2.74, "learning_rate": 0.00040315315315315314, "loss": 0.9484, "step": 2030 }, { "epoch": 2.75, "learning_rate": 0.0004024024024024024, "loss": 0.8361, "step": 2040 }, { "epoch": 2.77, "learning_rate": 0.0004016516516516517, "loss": 0.8545, "step": 2050 }, { "epoch": 2.78, "learning_rate": 0.0004009009009009009, "loss": 0.9045, "step": 2060 }, { "epoch": 2.79, "learning_rate": 0.0004001501501501502, "loss": 0.8322, "step": 2070 }, { "epoch": 2.81, "learning_rate": 0.0003993993993993994, "loss": 0.9181, "step": 2080 }, { "epoch": 2.82, "learning_rate": 0.00039864864864864866, "loss": 0.7966, "step": 2090 }, { "epoch": 2.83, "learning_rate": 0.00039789789789789794, "loss": 0.9089, "step": 2100 }, { "epoch": 2.85, "learning_rate": 0.00039714714714714715, "loss": 0.9082, "step": 2110 }, { "epoch": 2.86, "learning_rate": 0.0003963963963963964, "loss": 0.8908, "step": 2120 }, { "epoch": 2.88, "learning_rate": 0.00039564564564564564, "loss": 0.8805, "step": 2130 }, { "epoch": 2.89, "learning_rate": 0.00039489489489489486, "loss": 0.9338, "step": 2140 }, { "epoch": 2.9, "learning_rate": 0.0003941441441441442, "loss": 0.9174, "step": 2150 }, { "epoch": 2.92, "learning_rate": 0.0003933933933933934, "loss": 0.902, "step": 2160 }, { "epoch": 2.93, "learning_rate": 0.00039264264264264267, "loss": 0.8365, "step": 2170 }, { "epoch": 2.94, "learning_rate": 0.0003918918918918919, "loss": 0.7663, "step": 2180 }, { "epoch": 2.96, "learning_rate": 0.0003911411411411411, "loss": 0.9336, "step": 2190 }, { "epoch": 2.97, "learning_rate": 0.0003903903903903904, "loss": 0.8055, "step": 2200 }, { "epoch": 2.98, "learning_rate": 0.00038963963963963965, "loss": 0.8888, "step": 2210 }, { "epoch": 3.0, "learning_rate": 0.0003888888888888889, "loss": 0.9069, "step": 2220 }, { "epoch": 3.0, "eval_accuracy": 0.8292658531706342, "eval_f1": 0.8288337486077575, "eval_loss": 0.5952173471450806, "eval_precision": 0.842670072902701, "eval_recall": 0.8282744379906413, "eval_runtime": 192.1316, "eval_samples_per_second": 52.037, "eval_steps_per_second": 1.931, "step": 2222 }, { "epoch": 3.01, "learning_rate": 0.00038813813813813814, "loss": 0.7958, "step": 2230 }, { "epoch": 3.02, "learning_rate": 0.00038738738738738736, "loss": 0.7455, "step": 2240 }, { "epoch": 3.04, "learning_rate": 0.0003866366366366366, "loss": 0.7151, "step": 2250 }, { "epoch": 3.05, "learning_rate": 0.0003858858858858859, "loss": 0.7621, "step": 2260 }, { "epoch": 3.06, "learning_rate": 0.00038513513513513517, "loss": 0.8418, "step": 2270 }, { "epoch": 3.08, "learning_rate": 0.0003843843843843844, "loss": 0.8065, "step": 2280 }, { "epoch": 3.09, "learning_rate": 0.0003836336336336336, "loss": 0.7361, "step": 2290 }, { "epoch": 3.1, "learning_rate": 0.0003828828828828829, "loss": 0.7774, "step": 2300 }, { "epoch": 3.12, "learning_rate": 0.00038213213213213215, "loss": 0.7262, "step": 2310 }, { "epoch": 3.13, "learning_rate": 0.0003813813813813814, "loss": 0.7989, "step": 2320 }, { "epoch": 3.15, "learning_rate": 0.00038063063063063064, "loss": 0.8062, "step": 2330 }, { "epoch": 3.16, "learning_rate": 0.00037987987987987985, "loss": 0.7957, "step": 2340 }, { "epoch": 3.17, "learning_rate": 0.0003791291291291291, "loss": 0.7507, "step": 2350 }, { "epoch": 3.19, "learning_rate": 0.0003783783783783784, "loss": 0.7881, "step": 2360 }, { "epoch": 3.2, "learning_rate": 0.00037762762762762767, "loss": 0.7048, "step": 2370 }, { "epoch": 3.21, "learning_rate": 0.0003768768768768769, "loss": 0.8162, "step": 2380 }, { "epoch": 3.23, "learning_rate": 0.0003761261261261261, "loss": 0.7446, "step": 2390 }, { "epoch": 3.24, "learning_rate": 0.00037537537537537537, "loss": 0.7904, "step": 2400 }, { "epoch": 3.25, "learning_rate": 0.00037462462462462464, "loss": 0.7939, "step": 2410 }, { "epoch": 3.27, "learning_rate": 0.0003738738738738739, "loss": 0.6914, "step": 2420 }, { "epoch": 3.28, "learning_rate": 0.00037312312312312313, "loss": 0.8431, "step": 2430 }, { "epoch": 3.29, "learning_rate": 0.00037237237237237235, "loss": 0.775, "step": 2440 }, { "epoch": 3.31, "learning_rate": 0.0003716216216216216, "loss": 0.7682, "step": 2450 }, { "epoch": 3.32, "learning_rate": 0.0003708708708708709, "loss": 0.8463, "step": 2460 }, { "epoch": 3.33, "learning_rate": 0.00037012012012012016, "loss": 0.7268, "step": 2470 }, { "epoch": 3.35, "learning_rate": 0.0003693693693693694, "loss": 0.7556, "step": 2480 }, { "epoch": 3.36, "learning_rate": 0.0003686186186186186, "loss": 0.7808, "step": 2490 }, { "epoch": 3.37, "learning_rate": 0.00036786786786786787, "loss": 0.7615, "step": 2500 }, { "epoch": 3.39, "learning_rate": 0.00036711711711711714, "loss": 0.798, "step": 2510 }, { "epoch": 3.4, "learning_rate": 0.0003663663663663664, "loss": 0.7917, "step": 2520 }, { "epoch": 3.42, "learning_rate": 0.00036561561561561563, "loss": 0.75, "step": 2530 }, { "epoch": 3.43, "learning_rate": 0.00036486486486486485, "loss": 0.6937, "step": 2540 }, { "epoch": 3.44, "learning_rate": 0.0003641141141141141, "loss": 0.7988, "step": 2550 }, { "epoch": 3.46, "learning_rate": 0.0003633633633633634, "loss": 0.7315, "step": 2560 }, { "epoch": 3.47, "learning_rate": 0.00036261261261261266, "loss": 0.7546, "step": 2570 }, { "epoch": 3.48, "learning_rate": 0.0003618618618618619, "loss": 0.7216, "step": 2580 }, { "epoch": 3.5, "learning_rate": 0.0003611111111111111, "loss": 0.7669, "step": 2590 }, { "epoch": 3.51, "learning_rate": 0.00036036036036036037, "loss": 0.7127, "step": 2600 }, { "epoch": 3.52, "learning_rate": 0.00035960960960960964, "loss": 0.7755, "step": 2610 }, { "epoch": 3.54, "learning_rate": 0.0003588588588588589, "loss": 0.8252, "step": 2620 }, { "epoch": 3.55, "learning_rate": 0.0003581081081081081, "loss": 0.7839, "step": 2630 }, { "epoch": 3.56, "learning_rate": 0.00035735735735735734, "loss": 0.7111, "step": 2640 }, { "epoch": 3.58, "learning_rate": 0.0003566066066066066, "loss": 0.6958, "step": 2650 }, { "epoch": 3.59, "learning_rate": 0.00035585585585585583, "loss": 0.6712, "step": 2660 }, { "epoch": 3.6, "learning_rate": 0.00035510510510510516, "loss": 0.6692, "step": 2670 }, { "epoch": 3.62, "learning_rate": 0.0003543543543543544, "loss": 0.7866, "step": 2680 }, { "epoch": 3.63, "learning_rate": 0.0003536036036036036, "loss": 0.7061, "step": 2690 }, { "epoch": 3.64, "learning_rate": 0.00035285285285285286, "loss": 0.7742, "step": 2700 }, { "epoch": 3.66, "learning_rate": 0.0003521021021021021, "loss": 0.6905, "step": 2710 }, { "epoch": 3.67, "learning_rate": 0.00035135135135135135, "loss": 0.7464, "step": 2720 }, { "epoch": 3.69, "learning_rate": 0.0003506006006006006, "loss": 0.7855, "step": 2730 }, { "epoch": 3.7, "learning_rate": 0.00034984984984984984, "loss": 0.7871, "step": 2740 }, { "epoch": 3.71, "learning_rate": 0.0003490990990990991, "loss": 0.6782, "step": 2750 }, { "epoch": 3.73, "learning_rate": 0.00034834834834834833, "loss": 0.836, "step": 2760 }, { "epoch": 3.74, "learning_rate": 0.0003475975975975976, "loss": 0.6404, "step": 2770 }, { "epoch": 3.75, "learning_rate": 0.00034684684684684687, "loss": 0.8315, "step": 2780 }, { "epoch": 3.77, "learning_rate": 0.0003460960960960961, "loss": 0.6882, "step": 2790 }, { "epoch": 3.78, "learning_rate": 0.00034534534534534536, "loss": 0.7388, "step": 2800 }, { "epoch": 3.79, "learning_rate": 0.0003445945945945946, "loss": 0.8343, "step": 2810 }, { "epoch": 3.81, "learning_rate": 0.00034384384384384385, "loss": 0.7021, "step": 2820 }, { "epoch": 3.82, "learning_rate": 0.0003430930930930931, "loss": 0.7739, "step": 2830 }, { "epoch": 3.83, "learning_rate": 0.00034234234234234234, "loss": 0.7021, "step": 2840 }, { "epoch": 3.85, "learning_rate": 0.0003415915915915916, "loss": 0.8136, "step": 2850 }, { "epoch": 3.86, "learning_rate": 0.0003408408408408408, "loss": 0.7634, "step": 2860 }, { "epoch": 3.87, "learning_rate": 0.0003400900900900901, "loss": 0.7506, "step": 2870 }, { "epoch": 3.89, "learning_rate": 0.00033933933933933937, "loss": 0.7432, "step": 2880 }, { "epoch": 3.9, "learning_rate": 0.0003385885885885886, "loss": 0.7378, "step": 2890 }, { "epoch": 3.91, "learning_rate": 0.00033783783783783786, "loss": 0.6994, "step": 2900 }, { "epoch": 3.93, "learning_rate": 0.0003370870870870871, "loss": 0.7371, "step": 2910 }, { "epoch": 3.94, "learning_rate": 0.00033633633633633635, "loss": 0.7055, "step": 2920 }, { "epoch": 3.96, "learning_rate": 0.0003355855855855856, "loss": 0.703, "step": 2930 }, { "epoch": 3.97, "learning_rate": 0.00033483483483483483, "loss": 0.7472, "step": 2940 }, { "epoch": 3.98, "learning_rate": 0.0003340840840840841, "loss": 0.8109, "step": 2950 }, { "epoch": 4.0, "learning_rate": 0.0003333333333333333, "loss": 0.7519, "step": 2960 }, { "epoch": 4.0, "eval_accuracy": 0.8491698339667934, "eval_f1": 0.849423792771648, "eval_loss": 0.5195841789245605, "eval_precision": 0.8611084598890817, "eval_recall": 0.8482889408623805, "eval_runtime": 190.693, "eval_samples_per_second": 52.43, "eval_steps_per_second": 1.946, "step": 2963 }, { "epoch": 4.01, "learning_rate": 0.0003325825825825826, "loss": 0.7232, "step": 2970 }, { "epoch": 4.02, "learning_rate": 0.00033183183183183186, "loss": 0.5435, "step": 2980 }, { "epoch": 4.04, "learning_rate": 0.0003310810810810811, "loss": 0.6165, "step": 2990 }, { "epoch": 4.05, "learning_rate": 0.00033033033033033035, "loss": 0.6642, "step": 3000 }, { "epoch": 4.06, "learning_rate": 0.00032957957957957957, "loss": 0.6566, "step": 3010 }, { "epoch": 4.08, "learning_rate": 0.00032882882882882884, "loss": 0.6535, "step": 3020 }, { "epoch": 4.09, "learning_rate": 0.0003280780780780781, "loss": 0.6825, "step": 3030 }, { "epoch": 4.1, "learning_rate": 0.00032732732732732733, "loss": 0.5755, "step": 3040 }, { "epoch": 4.12, "learning_rate": 0.0003265765765765766, "loss": 0.6186, "step": 3050 }, { "epoch": 4.13, "learning_rate": 0.0003258258258258258, "loss": 0.6469, "step": 3060 }, { "epoch": 4.14, "learning_rate": 0.00032507507507507504, "loss": 0.6201, "step": 3070 }, { "epoch": 4.16, "learning_rate": 0.00032432432432432436, "loss": 0.6528, "step": 3080 }, { "epoch": 4.17, "learning_rate": 0.0003235735735735736, "loss": 0.7014, "step": 3090 }, { "epoch": 4.18, "learning_rate": 0.00032282282282282285, "loss": 0.6835, "step": 3100 }, { "epoch": 4.2, "learning_rate": 0.00032207207207207207, "loss": 0.6827, "step": 3110 }, { "epoch": 4.21, "learning_rate": 0.0003213213213213213, "loss": 0.7095, "step": 3120 }, { "epoch": 4.23, "learning_rate": 0.0003205705705705706, "loss": 0.6773, "step": 3130 }, { "epoch": 4.24, "learning_rate": 0.00031981981981981983, "loss": 0.6967, "step": 3140 }, { "epoch": 4.25, "learning_rate": 0.0003190690690690691, "loss": 0.6751, "step": 3150 }, { "epoch": 4.27, "learning_rate": 0.0003183183183183183, "loss": 0.6711, "step": 3160 }, { "epoch": 4.28, "learning_rate": 0.00031756756756756753, "loss": 0.5826, "step": 3170 }, { "epoch": 4.29, "learning_rate": 0.00031681681681681686, "loss": 0.6481, "step": 3180 }, { "epoch": 4.31, "learning_rate": 0.0003160660660660661, "loss": 0.6282, "step": 3190 }, { "epoch": 4.32, "learning_rate": 0.00031531531531531535, "loss": 0.6441, "step": 3200 }, { "epoch": 4.33, "learning_rate": 0.00031456456456456456, "loss": 0.643, "step": 3210 }, { "epoch": 4.35, "learning_rate": 0.0003138138138138138, "loss": 0.6681, "step": 3220 }, { "epoch": 4.36, "learning_rate": 0.0003130630630630631, "loss": 0.642, "step": 3230 }, { "epoch": 4.37, "learning_rate": 0.0003123123123123123, "loss": 0.6114, "step": 3240 }, { "epoch": 4.39, "learning_rate": 0.00031156156156156154, "loss": 0.6465, "step": 3250 }, { "epoch": 4.4, "learning_rate": 0.0003108108108108108, "loss": 0.7339, "step": 3260 }, { "epoch": 4.41, "learning_rate": 0.00031006006006006003, "loss": 0.6085, "step": 3270 }, { "epoch": 4.43, "learning_rate": 0.00030930930930930936, "loss": 0.6396, "step": 3280 }, { "epoch": 4.44, "learning_rate": 0.00030855855855855857, "loss": 0.6864, "step": 3290 }, { "epoch": 4.45, "learning_rate": 0.0003078078078078078, "loss": 0.6241, "step": 3300 }, { "epoch": 4.47, "learning_rate": 0.00030705705705705706, "loss": 0.634, "step": 3310 }, { "epoch": 4.48, "learning_rate": 0.0003063063063063063, "loss": 0.6854, "step": 3320 }, { "epoch": 4.5, "learning_rate": 0.0003055555555555556, "loss": 0.5872, "step": 3330 }, { "epoch": 4.51, "learning_rate": 0.0003048048048048048, "loss": 0.5485, "step": 3340 }, { "epoch": 4.52, "learning_rate": 0.00030405405405405404, "loss": 0.6161, "step": 3350 }, { "epoch": 4.54, "learning_rate": 0.0003033033033033033, "loss": 0.6059, "step": 3360 }, { "epoch": 4.55, "learning_rate": 0.0003025525525525525, "loss": 0.6244, "step": 3370 }, { "epoch": 4.56, "learning_rate": 0.00030180180180180185, "loss": 0.6937, "step": 3380 }, { "epoch": 4.58, "learning_rate": 0.00030105105105105107, "loss": 0.6506, "step": 3390 }, { "epoch": 4.59, "learning_rate": 0.0003003003003003003, "loss": 0.6477, "step": 3400 }, { "epoch": 4.6, "learning_rate": 0.00029954954954954956, "loss": 0.6439, "step": 3410 }, { "epoch": 4.62, "learning_rate": 0.0002987987987987988, "loss": 0.6016, "step": 3420 }, { "epoch": 4.63, "learning_rate": 0.0002980480480480481, "loss": 0.5656, "step": 3430 }, { "epoch": 4.64, "learning_rate": 0.0002972972972972973, "loss": 0.6457, "step": 3440 }, { "epoch": 4.66, "learning_rate": 0.00029654654654654654, "loss": 0.6378, "step": 3450 }, { "epoch": 4.67, "learning_rate": 0.0002957957957957958, "loss": 0.6456, "step": 3460 }, { "epoch": 4.68, "learning_rate": 0.000295045045045045, "loss": 0.6649, "step": 3470 }, { "epoch": 4.7, "learning_rate": 0.0002942942942942943, "loss": 0.6672, "step": 3480 }, { "epoch": 4.71, "learning_rate": 0.00029354354354354357, "loss": 0.6567, "step": 3490 }, { "epoch": 4.72, "learning_rate": 0.0002927927927927928, "loss": 0.6411, "step": 3500 }, { "epoch": 4.74, "learning_rate": 0.00029204204204204206, "loss": 0.6502, "step": 3510 }, { "epoch": 4.75, "learning_rate": 0.00029129129129129127, "loss": 0.6637, "step": 3520 }, { "epoch": 4.77, "learning_rate": 0.00029054054054054054, "loss": 0.6339, "step": 3530 }, { "epoch": 4.78, "learning_rate": 0.0002897897897897898, "loss": 0.5812, "step": 3540 }, { "epoch": 4.79, "learning_rate": 0.00028903903903903903, "loss": 0.6396, "step": 3550 }, { "epoch": 4.81, "learning_rate": 0.0002882882882882883, "loss": 0.6208, "step": 3560 }, { "epoch": 4.82, "learning_rate": 0.0002875375375375375, "loss": 0.6381, "step": 3570 }, { "epoch": 4.83, "learning_rate": 0.0002867867867867868, "loss": 0.691, "step": 3580 }, { "epoch": 4.85, "learning_rate": 0.00028603603603603606, "loss": 0.7211, "step": 3590 }, { "epoch": 4.86, "learning_rate": 0.0002852852852852853, "loss": 0.616, "step": 3600 }, { "epoch": 4.87, "learning_rate": 0.00028453453453453455, "loss": 0.5619, "step": 3610 }, { "epoch": 4.89, "learning_rate": 0.00028378378378378377, "loss": 0.5765, "step": 3620 }, { "epoch": 4.9, "learning_rate": 0.00028303303303303304, "loss": 0.6436, "step": 3630 }, { "epoch": 4.91, "learning_rate": 0.0002822822822822823, "loss": 0.6864, "step": 3640 }, { "epoch": 4.93, "learning_rate": 0.00028153153153153153, "loss": 0.6217, "step": 3650 }, { "epoch": 4.94, "learning_rate": 0.0002807807807807808, "loss": 0.5852, "step": 3660 }, { "epoch": 4.95, "learning_rate": 0.00028003003003003, "loss": 0.6185, "step": 3670 }, { "epoch": 4.97, "learning_rate": 0.0002792792792792793, "loss": 0.6803, "step": 3680 }, { "epoch": 4.98, "learning_rate": 0.00027852852852852856, "loss": 0.5834, "step": 3690 }, { "epoch": 4.99, "learning_rate": 0.0002777777777777778, "loss": 0.6357, "step": 3700 }, { "epoch": 5.0, "eval_accuracy": 0.8704740948189638, "eval_f1": 0.8689097751341385, "eval_loss": 0.44387924671173096, "eval_precision": 0.8761662335945799, "eval_recall": 0.8693479074500065, "eval_runtime": 191.3953, "eval_samples_per_second": 52.237, "eval_steps_per_second": 1.938, "step": 3703 }, { "epoch": 5.01, "learning_rate": 0.00027702702702702705, "loss": 0.5605, "step": 3710 }, { "epoch": 5.02, "learning_rate": 0.00027627627627627627, "loss": 0.5468, "step": 3720 }, { "epoch": 5.04, "learning_rate": 0.00027552552552552554, "loss": 0.5749, "step": 3730 }, { "epoch": 5.05, "learning_rate": 0.0002747747747747748, "loss": 0.53, "step": 3740 }, { "epoch": 5.06, "learning_rate": 0.000274024024024024, "loss": 0.5308, "step": 3750 }, { "epoch": 5.08, "learning_rate": 0.0002732732732732733, "loss": 0.5127, "step": 3760 }, { "epoch": 5.09, "learning_rate": 0.0002725225225225225, "loss": 0.5424, "step": 3770 }, { "epoch": 5.1, "learning_rate": 0.00027177177177177173, "loss": 0.5608, "step": 3780 }, { "epoch": 5.12, "learning_rate": 0.00027102102102102106, "loss": 0.621, "step": 3790 }, { "epoch": 5.13, "learning_rate": 0.0002702702702702703, "loss": 0.5018, "step": 3800 }, { "epoch": 5.14, "learning_rate": 0.00026951951951951955, "loss": 0.4879, "step": 3810 }, { "epoch": 5.16, "learning_rate": 0.00026876876876876876, "loss": 0.4991, "step": 3820 }, { "epoch": 5.17, "learning_rate": 0.000268018018018018, "loss": 0.5633, "step": 3830 }, { "epoch": 5.18, "learning_rate": 0.0002672672672672673, "loss": 0.6233, "step": 3840 }, { "epoch": 5.2, "learning_rate": 0.0002665165165165165, "loss": 0.511, "step": 3850 }, { "epoch": 5.21, "learning_rate": 0.0002657657657657658, "loss": 0.5036, "step": 3860 }, { "epoch": 5.22, "learning_rate": 0.000265015015015015, "loss": 0.5826, "step": 3870 }, { "epoch": 5.24, "learning_rate": 0.00026426426426426423, "loss": 0.5605, "step": 3880 }, { "epoch": 5.25, "learning_rate": 0.0002635135135135135, "loss": 0.6443, "step": 3890 }, { "epoch": 5.26, "learning_rate": 0.00026276276276276277, "loss": 0.582, "step": 3900 }, { "epoch": 5.28, "learning_rate": 0.00026201201201201204, "loss": 0.6232, "step": 3910 }, { "epoch": 5.29, "learning_rate": 0.00026126126126126126, "loss": 0.5763, "step": 3920 }, { "epoch": 5.31, "learning_rate": 0.0002605105105105105, "loss": 0.6183, "step": 3930 }, { "epoch": 5.32, "learning_rate": 0.00025975975975975975, "loss": 0.5533, "step": 3940 }, { "epoch": 5.33, "learning_rate": 0.000259009009009009, "loss": 0.5633, "step": 3950 }, { "epoch": 5.35, "learning_rate": 0.0002582582582582583, "loss": 0.544, "step": 3960 }, { "epoch": 5.36, "learning_rate": 0.0002575075075075075, "loss": 0.5882, "step": 3970 }, { "epoch": 5.37, "learning_rate": 0.0002567567567567567, "loss": 0.5974, "step": 3980 }, { "epoch": 5.39, "learning_rate": 0.000256006006006006, "loss": 0.557, "step": 3990 }, { "epoch": 5.4, "learning_rate": 0.00025525525525525527, "loss": 0.5013, "step": 4000 }, { "epoch": 5.41, "learning_rate": 0.00025450450450450454, "loss": 0.5785, "step": 4010 }, { "epoch": 5.43, "learning_rate": 0.00025375375375375376, "loss": 0.558, "step": 4020 }, { "epoch": 5.44, "learning_rate": 0.000253003003003003, "loss": 0.5429, "step": 4030 }, { "epoch": 5.45, "learning_rate": 0.00025225225225225225, "loss": 0.5667, "step": 4040 }, { "epoch": 5.47, "learning_rate": 0.0002515015015015015, "loss": 0.5386, "step": 4050 }, { "epoch": 5.48, "learning_rate": 0.0002507507507507508, "loss": 0.6074, "step": 4060 }, { "epoch": 5.49, "learning_rate": 0.00025, "loss": 0.5583, "step": 4070 }, { "epoch": 5.51, "learning_rate": 0.0002492492492492492, "loss": 0.4637, "step": 4080 }, { "epoch": 5.52, "learning_rate": 0.0002484984984984985, "loss": 0.5877, "step": 4090 }, { "epoch": 5.53, "learning_rate": 0.00024774774774774777, "loss": 0.4403, "step": 4100 }, { "epoch": 5.55, "learning_rate": 0.00024699699699699704, "loss": 0.5379, "step": 4110 }, { "epoch": 5.56, "learning_rate": 0.00024624624624624625, "loss": 0.5562, "step": 4120 }, { "epoch": 5.58, "learning_rate": 0.00024549549549549547, "loss": 0.5481, "step": 4130 }, { "epoch": 5.59, "learning_rate": 0.00024474474474474474, "loss": 0.5422, "step": 4140 }, { "epoch": 5.6, "learning_rate": 0.000243993993993994, "loss": 0.5545, "step": 4150 }, { "epoch": 5.62, "learning_rate": 0.00024324324324324326, "loss": 0.5128, "step": 4160 }, { "epoch": 5.63, "learning_rate": 0.0002424924924924925, "loss": 0.5653, "step": 4170 }, { "epoch": 5.64, "learning_rate": 0.00024174174174174175, "loss": 0.5782, "step": 4180 }, { "epoch": 5.66, "learning_rate": 0.00024099099099099102, "loss": 0.5709, "step": 4190 }, { "epoch": 5.67, "learning_rate": 0.00024024024024024023, "loss": 0.5307, "step": 4200 }, { "epoch": 5.68, "learning_rate": 0.00023948948948948948, "loss": 0.569, "step": 4210 }, { "epoch": 5.7, "learning_rate": 0.00023873873873873875, "loss": 0.5939, "step": 4220 }, { "epoch": 5.71, "learning_rate": 0.000237987987987988, "loss": 0.5127, "step": 4230 }, { "epoch": 5.72, "learning_rate": 0.00023723723723723724, "loss": 0.5174, "step": 4240 }, { "epoch": 5.74, "learning_rate": 0.00023648648648648648, "loss": 0.558, "step": 4250 }, { "epoch": 5.75, "learning_rate": 0.00023573573573573573, "loss": 0.5006, "step": 4260 }, { "epoch": 5.76, "learning_rate": 0.000234984984984985, "loss": 0.6208, "step": 4270 }, { "epoch": 5.78, "learning_rate": 0.00023423423423423424, "loss": 0.554, "step": 4280 }, { "epoch": 5.79, "learning_rate": 0.0002334834834834835, "loss": 0.5183, "step": 4290 }, { "epoch": 5.8, "learning_rate": 0.00023273273273273273, "loss": 0.5545, "step": 4300 }, { "epoch": 5.82, "learning_rate": 0.00023198198198198198, "loss": 0.5011, "step": 4310 }, { "epoch": 5.83, "learning_rate": 0.00023123123123123125, "loss": 0.5286, "step": 4320 }, { "epoch": 5.85, "learning_rate": 0.0002304804804804805, "loss": 0.4974, "step": 4330 }, { "epoch": 5.86, "learning_rate": 0.00022972972972972974, "loss": 0.576, "step": 4340 }, { "epoch": 5.87, "learning_rate": 0.00022897897897897898, "loss": 0.4924, "step": 4350 }, { "epoch": 5.89, "learning_rate": 0.00022822822822822822, "loss": 0.5501, "step": 4360 }, { "epoch": 5.9, "learning_rate": 0.0002274774774774775, "loss": 0.5307, "step": 4370 }, { "epoch": 5.91, "learning_rate": 0.00022672672672672674, "loss": 0.5161, "step": 4380 }, { "epoch": 5.93, "learning_rate": 0.00022597597597597596, "loss": 0.5153, "step": 4390 }, { "epoch": 5.94, "learning_rate": 0.00022522522522522523, "loss": 0.5222, "step": 4400 }, { "epoch": 5.95, "learning_rate": 0.00022447447447447447, "loss": 0.5144, "step": 4410 }, { "epoch": 5.97, "learning_rate": 0.00022372372372372374, "loss": 0.5002, "step": 4420 }, { "epoch": 5.98, "learning_rate": 0.000222972972972973, "loss": 0.4556, "step": 4430 }, { "epoch": 5.99, "learning_rate": 0.0002222222222222222, "loss": 0.5657, "step": 4440 }, { "epoch": 6.0, "eval_accuracy": 0.8890778155631126, "eval_f1": 0.8884475854159729, "eval_loss": 0.405392587184906, "eval_precision": 0.8929255500739524, "eval_recall": 0.8885232893443815, "eval_runtime": 191.4068, "eval_samples_per_second": 52.234, "eval_steps_per_second": 1.938, "step": 4444 }, { "epoch": 6.01, "learning_rate": 0.00022147147147147148, "loss": 0.5383, "step": 4450 }, { "epoch": 6.02, "learning_rate": 0.00022072072072072072, "loss": 0.444, "step": 4460 }, { "epoch": 6.03, "learning_rate": 0.00021996996996997, "loss": 0.4961, "step": 4470 }, { "epoch": 6.05, "learning_rate": 0.00021921921921921924, "loss": 0.5372, "step": 4480 }, { "epoch": 6.06, "learning_rate": 0.00021846846846846845, "loss": 0.4424, "step": 4490 }, { "epoch": 6.07, "learning_rate": 0.00021771771771771773, "loss": 0.3961, "step": 4500 }, { "epoch": 6.09, "learning_rate": 0.00021696696696696697, "loss": 0.4882, "step": 4510 }, { "epoch": 6.1, "learning_rate": 0.00021621621621621624, "loss": 0.4517, "step": 4520 }, { "epoch": 6.12, "learning_rate": 0.00021546546546546546, "loss": 0.4742, "step": 4530 }, { "epoch": 6.13, "learning_rate": 0.0002147147147147147, "loss": 0.5285, "step": 4540 }, { "epoch": 6.14, "learning_rate": 0.00021396396396396397, "loss": 0.4771, "step": 4550 }, { "epoch": 6.16, "learning_rate": 0.00021321321321321322, "loss": 0.4278, "step": 4560 }, { "epoch": 6.17, "learning_rate": 0.0002124624624624625, "loss": 0.5205, "step": 4570 }, { "epoch": 6.18, "learning_rate": 0.0002117117117117117, "loss": 0.5019, "step": 4580 }, { "epoch": 6.2, "learning_rate": 0.00021096096096096095, "loss": 0.4504, "step": 4590 }, { "epoch": 6.21, "learning_rate": 0.00021021021021021022, "loss": 0.411, "step": 4600 }, { "epoch": 6.22, "learning_rate": 0.00020945945945945947, "loss": 0.5114, "step": 4610 }, { "epoch": 6.24, "learning_rate": 0.0002087087087087087, "loss": 0.4836, "step": 4620 }, { "epoch": 6.25, "learning_rate": 0.00020795795795795796, "loss": 0.4559, "step": 4630 }, { "epoch": 6.26, "learning_rate": 0.0002072072072072072, "loss": 0.4264, "step": 4640 }, { "epoch": 6.28, "learning_rate": 0.00020645645645645647, "loss": 0.4078, "step": 4650 }, { "epoch": 6.29, "learning_rate": 0.00020570570570570572, "loss": 0.451, "step": 4660 }, { "epoch": 6.3, "learning_rate": 0.00020495495495495496, "loss": 0.407, "step": 4670 }, { "epoch": 6.32, "learning_rate": 0.0002042042042042042, "loss": 0.412, "step": 4680 }, { "epoch": 6.33, "learning_rate": 0.00020345345345345345, "loss": 0.502, "step": 4690 }, { "epoch": 6.34, "learning_rate": 0.00020270270270270272, "loss": 0.4881, "step": 4700 }, { "epoch": 6.36, "learning_rate": 0.00020195195195195196, "loss": 0.4685, "step": 4710 }, { "epoch": 6.37, "learning_rate": 0.0002012012012012012, "loss": 0.4531, "step": 4720 }, { "epoch": 6.39, "learning_rate": 0.00020045045045045045, "loss": 0.3962, "step": 4730 }, { "epoch": 6.4, "learning_rate": 0.0001996996996996997, "loss": 0.3741, "step": 4740 }, { "epoch": 6.41, "learning_rate": 0.00019894894894894897, "loss": 0.4502, "step": 4750 }, { "epoch": 6.43, "learning_rate": 0.0001981981981981982, "loss": 0.4775, "step": 4760 }, { "epoch": 6.44, "learning_rate": 0.00019744744744744743, "loss": 0.5319, "step": 4770 }, { "epoch": 6.45, "learning_rate": 0.0001966966966966967, "loss": 0.5363, "step": 4780 }, { "epoch": 6.47, "learning_rate": 0.00019594594594594594, "loss": 0.4511, "step": 4790 }, { "epoch": 6.48, "learning_rate": 0.0001951951951951952, "loss": 0.4314, "step": 4800 }, { "epoch": 6.49, "learning_rate": 0.00019444444444444446, "loss": 0.405, "step": 4810 }, { "epoch": 6.51, "learning_rate": 0.00019369369369369368, "loss": 0.4473, "step": 4820 }, { "epoch": 6.52, "learning_rate": 0.00019294294294294295, "loss": 0.436, "step": 4830 }, { "epoch": 6.53, "learning_rate": 0.0001921921921921922, "loss": 0.4574, "step": 4840 }, { "epoch": 6.55, "learning_rate": 0.00019144144144144144, "loss": 0.4293, "step": 4850 }, { "epoch": 6.56, "learning_rate": 0.0001906906906906907, "loss": 0.3983, "step": 4860 }, { "epoch": 6.57, "learning_rate": 0.00018993993993993993, "loss": 0.4439, "step": 4870 }, { "epoch": 6.59, "learning_rate": 0.0001891891891891892, "loss": 0.4613, "step": 4880 }, { "epoch": 6.6, "learning_rate": 0.00018843843843843844, "loss": 0.4209, "step": 4890 }, { "epoch": 6.61, "learning_rate": 0.00018768768768768769, "loss": 0.5008, "step": 4900 }, { "epoch": 6.63, "learning_rate": 0.00018693693693693696, "loss": 0.4341, "step": 4910 }, { "epoch": 6.64, "learning_rate": 0.00018618618618618617, "loss": 0.4438, "step": 4920 }, { "epoch": 6.66, "learning_rate": 0.00018543543543543545, "loss": 0.3922, "step": 4930 }, { "epoch": 6.67, "learning_rate": 0.0001846846846846847, "loss": 0.4661, "step": 4940 }, { "epoch": 6.68, "learning_rate": 0.00018393393393393393, "loss": 0.4535, "step": 4950 }, { "epoch": 6.7, "learning_rate": 0.0001831831831831832, "loss": 0.4534, "step": 4960 }, { "epoch": 6.71, "learning_rate": 0.00018243243243243242, "loss": 0.4782, "step": 4970 }, { "epoch": 6.72, "learning_rate": 0.0001816816816816817, "loss": 0.4182, "step": 4980 }, { "epoch": 6.74, "learning_rate": 0.00018093093093093094, "loss": 0.4373, "step": 4990 }, { "epoch": 6.75, "learning_rate": 0.00018018018018018018, "loss": 0.4867, "step": 5000 }, { "epoch": 6.76, "learning_rate": 0.00017942942942942945, "loss": 0.4659, "step": 5010 }, { "epoch": 6.78, "learning_rate": 0.00017867867867867867, "loss": 0.3941, "step": 5020 }, { "epoch": 6.79, "learning_rate": 0.00017792792792792792, "loss": 0.4212, "step": 5030 }, { "epoch": 6.8, "learning_rate": 0.0001771771771771772, "loss": 0.4637, "step": 5040 }, { "epoch": 6.82, "learning_rate": 0.00017642642642642643, "loss": 0.5005, "step": 5050 }, { "epoch": 6.83, "learning_rate": 0.00017567567567567568, "loss": 0.4785, "step": 5060 }, { "epoch": 6.84, "learning_rate": 0.00017492492492492492, "loss": 0.4328, "step": 5070 }, { "epoch": 6.86, "learning_rate": 0.00017417417417417416, "loss": 0.3952, "step": 5080 }, { "epoch": 6.87, "learning_rate": 0.00017342342342342344, "loss": 0.4419, "step": 5090 }, { "epoch": 6.88, "learning_rate": 0.00017267267267267268, "loss": 0.4173, "step": 5100 }, { "epoch": 6.9, "learning_rate": 0.00017192192192192192, "loss": 0.4701, "step": 5110 }, { "epoch": 6.91, "learning_rate": 0.00017117117117117117, "loss": 0.4804, "step": 5120 }, { "epoch": 6.93, "learning_rate": 0.0001704204204204204, "loss": 0.3814, "step": 5130 }, { "epoch": 6.94, "learning_rate": 0.00016966966966966968, "loss": 0.4407, "step": 5140 }, { "epoch": 6.95, "learning_rate": 0.00016891891891891893, "loss": 0.4275, "step": 5150 }, { "epoch": 6.97, "learning_rate": 0.00016816816816816817, "loss": 0.4784, "step": 5160 }, { "epoch": 6.98, "learning_rate": 0.00016741741741741742, "loss": 0.5154, "step": 5170 }, { "epoch": 6.99, "learning_rate": 0.00016666666666666666, "loss": 0.4628, "step": 5180 }, { "epoch": 7.0, "eval_accuracy": 0.9037807561512302, "eval_f1": 0.9033003135892885, "eval_loss": 0.3493529260158539, "eval_precision": 0.9058549926677517, "eval_recall": 0.9033230091030984, "eval_runtime": 191.6635, "eval_samples_per_second": 52.164, "eval_steps_per_second": 1.936, "step": 5185 }, { "epoch": 7.01, "learning_rate": 0.00016591591591591593, "loss": 0.3719, "step": 5190 }, { "epoch": 7.02, "learning_rate": 0.00016516516516516518, "loss": 0.3562, "step": 5200 }, { "epoch": 7.03, "learning_rate": 0.00016441441441441442, "loss": 0.4041, "step": 5210 }, { "epoch": 7.05, "learning_rate": 0.00016366366366366367, "loss": 0.3744, "step": 5220 }, { "epoch": 7.06, "learning_rate": 0.0001629129129129129, "loss": 0.4728, "step": 5230 }, { "epoch": 7.07, "learning_rate": 0.00016216216216216218, "loss": 0.4138, "step": 5240 }, { "epoch": 7.09, "learning_rate": 0.00016141141141141143, "loss": 0.3801, "step": 5250 }, { "epoch": 7.1, "learning_rate": 0.00016066066066066064, "loss": 0.4113, "step": 5260 }, { "epoch": 7.11, "learning_rate": 0.00015990990990990991, "loss": 0.388, "step": 5270 }, { "epoch": 7.13, "learning_rate": 0.00015915915915915916, "loss": 0.4006, "step": 5280 }, { "epoch": 7.14, "learning_rate": 0.00015840840840840843, "loss": 0.3767, "step": 5290 }, { "epoch": 7.15, "learning_rate": 0.00015765765765765767, "loss": 0.419, "step": 5300 }, { "epoch": 7.17, "learning_rate": 0.0001569069069069069, "loss": 0.3563, "step": 5310 }, { "epoch": 7.18, "learning_rate": 0.00015615615615615616, "loss": 0.4191, "step": 5320 }, { "epoch": 7.2, "learning_rate": 0.0001554054054054054, "loss": 0.4491, "step": 5330 }, { "epoch": 7.21, "learning_rate": 0.00015465465465465468, "loss": 0.3764, "step": 5340 }, { "epoch": 7.22, "learning_rate": 0.0001539039039039039, "loss": 0.3798, "step": 5350 }, { "epoch": 7.24, "learning_rate": 0.00015315315315315314, "loss": 0.4415, "step": 5360 }, { "epoch": 7.25, "learning_rate": 0.0001524024024024024, "loss": 0.3818, "step": 5370 }, { "epoch": 7.26, "learning_rate": 0.00015165165165165165, "loss": 0.3974, "step": 5380 }, { "epoch": 7.28, "learning_rate": 0.00015090090090090093, "loss": 0.4371, "step": 5390 }, { "epoch": 7.29, "learning_rate": 0.00015015015015015014, "loss": 0.4014, "step": 5400 }, { "epoch": 7.3, "learning_rate": 0.0001493993993993994, "loss": 0.3844, "step": 5410 }, { "epoch": 7.32, "learning_rate": 0.00014864864864864866, "loss": 0.384, "step": 5420 }, { "epoch": 7.33, "learning_rate": 0.0001478978978978979, "loss": 0.4619, "step": 5430 }, { "epoch": 7.34, "learning_rate": 0.00014714714714714715, "loss": 0.3962, "step": 5440 }, { "epoch": 7.36, "learning_rate": 0.0001463963963963964, "loss": 0.4337, "step": 5450 }, { "epoch": 7.37, "learning_rate": 0.00014564564564564564, "loss": 0.4088, "step": 5460 }, { "epoch": 7.38, "learning_rate": 0.0001448948948948949, "loss": 0.3786, "step": 5470 }, { "epoch": 7.4, "learning_rate": 0.00014414414414414415, "loss": 0.4226, "step": 5480 }, { "epoch": 7.41, "learning_rate": 0.0001433933933933934, "loss": 0.44, "step": 5490 }, { "epoch": 7.42, "learning_rate": 0.00014264264264264264, "loss": 0.4322, "step": 5500 }, { "epoch": 7.44, "learning_rate": 0.00014189189189189188, "loss": 0.3269, "step": 5510 }, { "epoch": 7.45, "learning_rate": 0.00014114114114114116, "loss": 0.347, "step": 5520 }, { "epoch": 7.47, "learning_rate": 0.0001403903903903904, "loss": 0.4496, "step": 5530 }, { "epoch": 7.48, "learning_rate": 0.00013963963963963964, "loss": 0.4092, "step": 5540 }, { "epoch": 7.49, "learning_rate": 0.0001388888888888889, "loss": 0.3484, "step": 5550 }, { "epoch": 7.51, "learning_rate": 0.00013813813813813813, "loss": 0.4184, "step": 5560 }, { "epoch": 7.52, "learning_rate": 0.0001373873873873874, "loss": 0.3967, "step": 5570 }, { "epoch": 7.53, "learning_rate": 0.00013663663663663665, "loss": 0.4125, "step": 5580 }, { "epoch": 7.55, "learning_rate": 0.00013588588588588587, "loss": 0.401, "step": 5590 }, { "epoch": 7.56, "learning_rate": 0.00013513513513513514, "loss": 0.4787, "step": 5600 }, { "epoch": 7.57, "learning_rate": 0.00013438438438438438, "loss": 0.3997, "step": 5610 }, { "epoch": 7.59, "learning_rate": 0.00013363363363363365, "loss": 0.3598, "step": 5620 }, { "epoch": 7.6, "learning_rate": 0.0001328828828828829, "loss": 0.3988, "step": 5630 }, { "epoch": 7.61, "learning_rate": 0.00013213213213213211, "loss": 0.3209, "step": 5640 }, { "epoch": 7.63, "learning_rate": 0.00013138138138138139, "loss": 0.3528, "step": 5650 }, { "epoch": 7.64, "learning_rate": 0.00013063063063063063, "loss": 0.4024, "step": 5660 }, { "epoch": 7.65, "learning_rate": 0.00012987987987987987, "loss": 0.3689, "step": 5670 }, { "epoch": 7.67, "learning_rate": 0.00012912912912912915, "loss": 0.3908, "step": 5680 }, { "epoch": 7.68, "learning_rate": 0.00012837837837837836, "loss": 0.4048, "step": 5690 }, { "epoch": 7.69, "learning_rate": 0.00012762762762762763, "loss": 0.3461, "step": 5700 }, { "epoch": 7.71, "learning_rate": 0.00012687687687687688, "loss": 0.4064, "step": 5710 }, { "epoch": 7.72, "learning_rate": 0.00012612612612612612, "loss": 0.3859, "step": 5720 }, { "epoch": 7.74, "learning_rate": 0.0001253753753753754, "loss": 0.3992, "step": 5730 }, { "epoch": 7.75, "learning_rate": 0.0001246246246246246, "loss": 0.3116, "step": 5740 }, { "epoch": 7.76, "learning_rate": 0.00012387387387387388, "loss": 0.3627, "step": 5750 }, { "epoch": 7.78, "learning_rate": 0.00012312312312312313, "loss": 0.3946, "step": 5760 }, { "epoch": 7.79, "learning_rate": 0.00012237237237237237, "loss": 0.3297, "step": 5770 }, { "epoch": 7.8, "learning_rate": 0.00012162162162162163, "loss": 0.4199, "step": 5780 }, { "epoch": 7.82, "learning_rate": 0.00012087087087087087, "loss": 0.4164, "step": 5790 }, { "epoch": 7.83, "learning_rate": 0.00012012012012012012, "loss": 0.3763, "step": 5800 }, { "epoch": 7.84, "learning_rate": 0.00011936936936936938, "loss": 0.3401, "step": 5810 }, { "epoch": 7.86, "learning_rate": 0.00011861861861861862, "loss": 0.3524, "step": 5820 }, { "epoch": 7.87, "learning_rate": 0.00011786786786786786, "loss": 0.4263, "step": 5830 }, { "epoch": 7.88, "learning_rate": 0.00011711711711711712, "loss": 0.3855, "step": 5840 }, { "epoch": 7.9, "learning_rate": 0.00011636636636636637, "loss": 0.3678, "step": 5850 }, { "epoch": 7.91, "learning_rate": 0.00011561561561561562, "loss": 0.3067, "step": 5860 }, { "epoch": 7.92, "learning_rate": 0.00011486486486486487, "loss": 0.3637, "step": 5870 }, { "epoch": 7.94, "learning_rate": 0.00011411411411411411, "loss": 0.4032, "step": 5880 }, { "epoch": 7.95, "learning_rate": 0.00011336336336336337, "loss": 0.3832, "step": 5890 }, { "epoch": 7.96, "learning_rate": 0.00011261261261261261, "loss": 0.4019, "step": 5900 }, { "epoch": 7.98, "learning_rate": 0.00011186186186186187, "loss": 0.4264, "step": 5910 }, { "epoch": 7.99, "learning_rate": 0.0001111111111111111, "loss": 0.3256, "step": 5920 }, { "epoch": 8.0, "eval_accuracy": 0.9149829965993198, "eval_f1": 0.9145293036070032, "eval_loss": 0.3138176500797272, "eval_precision": 0.9170611788545995, "eval_recall": 0.9140792071407026, "eval_runtime": 191.5713, "eval_samples_per_second": 52.189, "eval_steps_per_second": 1.937, "step": 5926 }, { "epoch": 8.01, "learning_rate": 0.00011036036036036036, "loss": 0.3767, "step": 5930 }, { "epoch": 8.02, "learning_rate": 0.00010960960960960962, "loss": 0.3465, "step": 5940 }, { "epoch": 8.03, "learning_rate": 0.00010885885885885886, "loss": 0.3514, "step": 5950 }, { "epoch": 8.05, "learning_rate": 0.00010810810810810812, "loss": 0.3637, "step": 5960 }, { "epoch": 8.06, "learning_rate": 0.00010735735735735735, "loss": 0.3834, "step": 5970 }, { "epoch": 8.07, "learning_rate": 0.00010660660660660661, "loss": 0.2656, "step": 5980 }, { "epoch": 8.09, "learning_rate": 0.00010585585585585585, "loss": 0.3559, "step": 5990 }, { "epoch": 8.1, "learning_rate": 0.00010510510510510511, "loss": 0.3491, "step": 6000 }, { "epoch": 8.11, "learning_rate": 0.00010435435435435436, "loss": 0.4012, "step": 6010 }, { "epoch": 8.13, "learning_rate": 0.0001036036036036036, "loss": 0.3757, "step": 6020 }, { "epoch": 8.14, "learning_rate": 0.00010285285285285286, "loss": 0.3678, "step": 6030 }, { "epoch": 8.15, "learning_rate": 0.0001021021021021021, "loss": 0.3667, "step": 6040 }, { "epoch": 8.17, "learning_rate": 0.00010135135135135136, "loss": 0.3421, "step": 6050 }, { "epoch": 8.18, "learning_rate": 0.0001006006006006006, "loss": 0.3924, "step": 6060 }, { "epoch": 8.19, "learning_rate": 9.984984984984985e-05, "loss": 0.2621, "step": 6070 }, { "epoch": 8.21, "learning_rate": 9.90990990990991e-05, "loss": 0.3959, "step": 6080 }, { "epoch": 8.22, "learning_rate": 9.834834834834835e-05, "loss": 0.3191, "step": 6090 }, { "epoch": 8.23, "learning_rate": 9.75975975975976e-05, "loss": 0.3449, "step": 6100 }, { "epoch": 8.25, "learning_rate": 9.684684684684684e-05, "loss": 0.3142, "step": 6110 }, { "epoch": 8.26, "learning_rate": 9.60960960960961e-05, "loss": 0.3093, "step": 6120 }, { "epoch": 8.28, "learning_rate": 9.534534534534535e-05, "loss": 0.3446, "step": 6130 }, { "epoch": 8.29, "learning_rate": 9.45945945945946e-05, "loss": 0.2899, "step": 6140 }, { "epoch": 8.3, "learning_rate": 9.384384384384384e-05, "loss": 0.3068, "step": 6150 }, { "epoch": 8.32, "learning_rate": 9.309309309309309e-05, "loss": 0.3611, "step": 6160 }, { "epoch": 8.33, "learning_rate": 9.234234234234235e-05, "loss": 0.2767, "step": 6170 }, { "epoch": 8.34, "learning_rate": 9.15915915915916e-05, "loss": 0.4126, "step": 6180 }, { "epoch": 8.36, "learning_rate": 9.084084084084085e-05, "loss": 0.3741, "step": 6190 }, { "epoch": 8.37, "learning_rate": 9.009009009009009e-05, "loss": 0.3326, "step": 6200 }, { "epoch": 8.38, "learning_rate": 8.933933933933934e-05, "loss": 0.3034, "step": 6210 }, { "epoch": 8.4, "learning_rate": 8.85885885885886e-05, "loss": 0.3002, "step": 6220 }, { "epoch": 8.41, "learning_rate": 8.783783783783784e-05, "loss": 0.3728, "step": 6230 }, { "epoch": 8.42, "learning_rate": 8.708708708708708e-05, "loss": 0.3149, "step": 6240 }, { "epoch": 8.44, "learning_rate": 8.633633633633634e-05, "loss": 0.3122, "step": 6250 }, { "epoch": 8.45, "learning_rate": 8.558558558558558e-05, "loss": 0.3404, "step": 6260 }, { "epoch": 8.46, "learning_rate": 8.483483483483484e-05, "loss": 0.3521, "step": 6270 }, { "epoch": 8.48, "learning_rate": 8.408408408408409e-05, "loss": 0.3154, "step": 6280 }, { "epoch": 8.49, "learning_rate": 8.333333333333333e-05, "loss": 0.3654, "step": 6290 }, { "epoch": 8.5, "learning_rate": 8.258258258258259e-05, "loss": 0.3591, "step": 6300 }, { "epoch": 8.52, "learning_rate": 8.183183183183183e-05, "loss": 0.3284, "step": 6310 }, { "epoch": 8.53, "learning_rate": 8.108108108108109e-05, "loss": 0.2774, "step": 6320 }, { "epoch": 8.55, "learning_rate": 8.033033033033032e-05, "loss": 0.2813, "step": 6330 }, { "epoch": 8.56, "learning_rate": 7.957957957957958e-05, "loss": 0.3347, "step": 6340 }, { "epoch": 8.57, "learning_rate": 7.882882882882884e-05, "loss": 0.2895, "step": 6350 }, { "epoch": 8.59, "learning_rate": 7.807807807807808e-05, "loss": 0.3634, "step": 6360 }, { "epoch": 8.6, "learning_rate": 7.732732732732734e-05, "loss": 0.3155, "step": 6370 }, { "epoch": 8.61, "learning_rate": 7.657657657657657e-05, "loss": 0.3407, "step": 6380 }, { "epoch": 8.63, "learning_rate": 7.582582582582583e-05, "loss": 0.318, "step": 6390 }, { "epoch": 8.64, "learning_rate": 7.507507507507507e-05, "loss": 0.2963, "step": 6400 }, { "epoch": 8.65, "learning_rate": 7.432432432432433e-05, "loss": 0.3049, "step": 6410 }, { "epoch": 8.67, "learning_rate": 7.357357357357357e-05, "loss": 0.3612, "step": 6420 }, { "epoch": 8.68, "learning_rate": 7.282282282282282e-05, "loss": 0.3931, "step": 6430 }, { "epoch": 8.69, "learning_rate": 7.207207207207208e-05, "loss": 0.3213, "step": 6440 }, { "epoch": 8.71, "learning_rate": 7.132132132132132e-05, "loss": 0.3216, "step": 6450 }, { "epoch": 8.72, "learning_rate": 7.057057057057058e-05, "loss": 0.341, "step": 6460 }, { "epoch": 8.73, "learning_rate": 6.981981981981982e-05, "loss": 0.2771, "step": 6470 }, { "epoch": 8.75, "learning_rate": 6.906906906906907e-05, "loss": 0.3066, "step": 6480 }, { "epoch": 8.76, "learning_rate": 6.831831831831832e-05, "loss": 0.2938, "step": 6490 }, { "epoch": 8.77, "learning_rate": 6.756756756756757e-05, "loss": 0.3225, "step": 6500 }, { "epoch": 8.79, "learning_rate": 6.681681681681683e-05, "loss": 0.2781, "step": 6510 }, { "epoch": 8.8, "learning_rate": 6.606606606606606e-05, "loss": 0.3331, "step": 6520 }, { "epoch": 8.82, "learning_rate": 6.531531531531531e-05, "loss": 0.3598, "step": 6530 }, { "epoch": 8.83, "learning_rate": 6.456456456456457e-05, "loss": 0.325, "step": 6540 }, { "epoch": 8.84, "learning_rate": 6.381381381381382e-05, "loss": 0.3053, "step": 6550 }, { "epoch": 8.86, "learning_rate": 6.306306306306306e-05, "loss": 0.2996, "step": 6560 }, { "epoch": 8.87, "learning_rate": 6.23123123123123e-05, "loss": 0.3505, "step": 6570 }, { "epoch": 8.88, "learning_rate": 6.156156156156156e-05, "loss": 0.2732, "step": 6580 }, { "epoch": 8.9, "learning_rate": 6.0810810810810814e-05, "loss": 0.3205, "step": 6590 }, { "epoch": 8.91, "learning_rate": 6.006006006006006e-05, "loss": 0.3117, "step": 6600 }, { "epoch": 8.92, "learning_rate": 5.930930930930931e-05, "loss": 0.3402, "step": 6610 }, { "epoch": 8.94, "learning_rate": 5.855855855855856e-05, "loss": 0.3251, "step": 6620 }, { "epoch": 8.95, "learning_rate": 5.780780780780781e-05, "loss": 0.3538, "step": 6630 }, { "epoch": 8.96, "learning_rate": 5.7057057057057056e-05, "loss": 0.3323, "step": 6640 }, { "epoch": 8.98, "learning_rate": 5.630630630630631e-05, "loss": 0.2546, "step": 6650 }, { "epoch": 8.99, "learning_rate": 5.555555555555555e-05, "loss": 0.3567, "step": 6660 }, { "epoch": 9.0, "eval_accuracy": 0.9256851370274055, "eval_f1": 0.9250707611820068, "eval_loss": 0.27885687351226807, "eval_precision": 0.9262127050820336, "eval_recall": 0.9250555525343819, "eval_runtime": 194.8139, "eval_samples_per_second": 51.321, "eval_steps_per_second": 1.904, "step": 6666 }, { "epoch": 9.0, "learning_rate": 5.480480480480481e-05, "loss": 0.2732, "step": 6670 }, { "epoch": 9.02, "learning_rate": 5.405405405405406e-05, "loss": 0.316, "step": 6680 }, { "epoch": 9.03, "learning_rate": 5.3303303303303305e-05, "loss": 0.2812, "step": 6690 }, { "epoch": 9.04, "learning_rate": 5.2552552552552556e-05, "loss": 0.3061, "step": 6700 }, { "epoch": 9.06, "learning_rate": 5.18018018018018e-05, "loss": 0.2993, "step": 6710 }, { "epoch": 9.07, "learning_rate": 5.105105105105105e-05, "loss": 0.323, "step": 6720 }, { "epoch": 9.09, "learning_rate": 5.03003003003003e-05, "loss": 0.2922, "step": 6730 }, { "epoch": 9.1, "learning_rate": 4.954954954954955e-05, "loss": 0.3282, "step": 6740 }, { "epoch": 9.11, "learning_rate": 4.87987987987988e-05, "loss": 0.2847, "step": 6750 }, { "epoch": 9.13, "learning_rate": 4.804804804804805e-05, "loss": 0.2945, "step": 6760 }, { "epoch": 9.14, "learning_rate": 4.72972972972973e-05, "loss": 0.2525, "step": 6770 }, { "epoch": 9.15, "learning_rate": 4.6546546546546544e-05, "loss": 0.2907, "step": 6780 }, { "epoch": 9.17, "learning_rate": 4.57957957957958e-05, "loss": 0.3036, "step": 6790 }, { "epoch": 9.18, "learning_rate": 4.5045045045045046e-05, "loss": 0.2495, "step": 6800 }, { "epoch": 9.19, "learning_rate": 4.42942942942943e-05, "loss": 0.2752, "step": 6810 }, { "epoch": 9.21, "learning_rate": 4.354354354354354e-05, "loss": 0.3388, "step": 6820 }, { "epoch": 9.22, "learning_rate": 4.279279279279279e-05, "loss": 0.3166, "step": 6830 }, { "epoch": 9.23, "learning_rate": 4.204204204204204e-05, "loss": 0.3494, "step": 6840 }, { "epoch": 9.25, "learning_rate": 4.1291291291291294e-05, "loss": 0.2602, "step": 6850 }, { "epoch": 9.26, "learning_rate": 4.0540540540540545e-05, "loss": 0.2571, "step": 6860 }, { "epoch": 9.27, "learning_rate": 3.978978978978979e-05, "loss": 0.3087, "step": 6870 }, { "epoch": 9.29, "learning_rate": 3.903903903903904e-05, "loss": 0.2927, "step": 6880 }, { "epoch": 9.3, "learning_rate": 3.8288288288288285e-05, "loss": 0.2868, "step": 6890 }, { "epoch": 9.31, "learning_rate": 3.7537537537537536e-05, "loss": 0.2722, "step": 6900 }, { "epoch": 9.33, "learning_rate": 3.678678678678679e-05, "loss": 0.249, "step": 6910 }, { "epoch": 9.34, "learning_rate": 3.603603603603604e-05, "loss": 0.2773, "step": 6920 }, { "epoch": 9.36, "learning_rate": 3.528528528528529e-05, "loss": 0.2947, "step": 6930 }, { "epoch": 9.37, "learning_rate": 3.453453453453453e-05, "loss": 0.3421, "step": 6940 }, { "epoch": 9.38, "learning_rate": 3.3783783783783784e-05, "loss": 0.2547, "step": 6950 }, { "epoch": 9.4, "learning_rate": 3.303303303303303e-05, "loss": 0.2803, "step": 6960 }, { "epoch": 9.41, "learning_rate": 3.2282282282282286e-05, "loss": 0.2675, "step": 6970 }, { "epoch": 9.42, "learning_rate": 3.153153153153153e-05, "loss": 0.2654, "step": 6980 }, { "epoch": 9.44, "learning_rate": 3.078078078078078e-05, "loss": 0.2709, "step": 6990 }, { "epoch": 9.45, "learning_rate": 3.003003003003003e-05, "loss": 0.2359, "step": 7000 }, { "epoch": 9.46, "learning_rate": 2.927927927927928e-05, "loss": 0.2535, "step": 7010 }, { "epoch": 9.48, "learning_rate": 2.8528528528528528e-05, "loss": 0.2867, "step": 7020 }, { "epoch": 9.49, "learning_rate": 2.7777777777777776e-05, "loss": 0.254, "step": 7030 }, { "epoch": 9.5, "learning_rate": 2.702702702702703e-05, "loss": 0.2382, "step": 7040 }, { "epoch": 9.52, "learning_rate": 2.6276276276276278e-05, "loss": 0.2711, "step": 7050 }, { "epoch": 9.53, "learning_rate": 2.5525525525525525e-05, "loss": 0.2785, "step": 7060 }, { "epoch": 9.54, "learning_rate": 2.4774774774774777e-05, "loss": 0.2695, "step": 7070 }, { "epoch": 9.56, "learning_rate": 2.4024024024024024e-05, "loss": 0.3143, "step": 7080 }, { "epoch": 9.57, "learning_rate": 2.3273273273273272e-05, "loss": 0.2781, "step": 7090 }, { "epoch": 9.58, "learning_rate": 2.2522522522522523e-05, "loss": 0.254, "step": 7100 }, { "epoch": 9.6, "learning_rate": 2.177177177177177e-05, "loss": 0.2371, "step": 7110 }, { "epoch": 9.61, "learning_rate": 2.102102102102102e-05, "loss": 0.2558, "step": 7120 }, { "epoch": 9.63, "learning_rate": 2.0270270270270273e-05, "loss": 0.2748, "step": 7130 }, { "epoch": 9.64, "learning_rate": 1.951951951951952e-05, "loss": 0.3006, "step": 7140 }, { "epoch": 9.65, "learning_rate": 1.8768768768768768e-05, "loss": 0.3219, "step": 7150 }, { "epoch": 9.67, "learning_rate": 1.801801801801802e-05, "loss": 0.3013, "step": 7160 }, { "epoch": 9.68, "learning_rate": 1.7267267267267267e-05, "loss": 0.2612, "step": 7170 }, { "epoch": 9.69, "learning_rate": 1.6516516516516514e-05, "loss": 0.2406, "step": 7180 }, { "epoch": 9.71, "learning_rate": 1.5765765765765765e-05, "loss": 0.2628, "step": 7190 }, { "epoch": 9.72, "learning_rate": 1.5015015015015015e-05, "loss": 0.2798, "step": 7200 }, { "epoch": 9.73, "learning_rate": 1.4264264264264264e-05, "loss": 0.2725, "step": 7210 }, { "epoch": 9.75, "learning_rate": 1.3513513513513515e-05, "loss": 0.3193, "step": 7220 }, { "epoch": 9.76, "learning_rate": 1.2762762762762763e-05, "loss": 0.277, "step": 7230 }, { "epoch": 9.77, "learning_rate": 1.2012012012012012e-05, "loss": 0.3349, "step": 7240 }, { "epoch": 9.79, "learning_rate": 1.1261261261261261e-05, "loss": 0.272, "step": 7250 }, { "epoch": 9.8, "learning_rate": 1.051051051051051e-05, "loss": 0.3017, "step": 7260 }, { "epoch": 9.81, "learning_rate": 9.75975975975976e-06, "loss": 0.3224, "step": 7270 }, { "epoch": 9.83, "learning_rate": 9.00900900900901e-06, "loss": 0.2935, "step": 7280 }, { "epoch": 9.84, "learning_rate": 8.258258258258257e-06, "loss": 0.3441, "step": 7290 }, { "epoch": 9.85, "learning_rate": 7.507507507507507e-06, "loss": 0.3263, "step": 7300 }, { "epoch": 9.87, "learning_rate": 6.7567567567567575e-06, "loss": 0.2503, "step": 7310 }, { "epoch": 9.88, "learning_rate": 6.006006006006006e-06, "loss": 0.316, "step": 7320 }, { "epoch": 9.9, "learning_rate": 5.255255255255255e-06, "loss": 0.309, "step": 7330 }, { "epoch": 9.91, "learning_rate": 4.504504504504505e-06, "loss": 0.2497, "step": 7340 }, { "epoch": 9.92, "learning_rate": 3.7537537537537537e-06, "loss": 0.206, "step": 7350 }, { "epoch": 9.94, "learning_rate": 3.003003003003003e-06, "loss": 0.2813, "step": 7360 }, { "epoch": 9.95, "learning_rate": 2.2522522522522524e-06, "loss": 0.3494, "step": 7370 }, { "epoch": 9.96, "learning_rate": 1.5015015015015015e-06, "loss": 0.3268, "step": 7380 }, { "epoch": 9.98, "learning_rate": 7.507507507507508e-07, "loss": 0.2431, "step": 7390 }, { "epoch": 9.99, "learning_rate": 0.0, "loss": 0.3313, "step": 7400 }, { "epoch": 9.99, "eval_accuracy": 0.9310862172434486, "eval_f1": 0.9305170890009876, "eval_loss": 0.26112696528434753, "eval_precision": 0.9311947598825216, "eval_recall": 0.9304846180354004, "eval_runtime": 191.2068, "eval_samples_per_second": 52.289, "eval_steps_per_second": 1.94, "step": 7400 }, { "epoch": 9.99, "step": 7400, "total_flos": 1.8619741375601993e+20, "train_loss": 0.7571127768142804, "train_runtime": 37998.3887, "train_samples_per_second": 21.049, "train_steps_per_second": 0.195 } ], "logging_steps": 10, "max_steps": 7400, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.8619741375601993e+20, "trial_name": null, "trial_params": null }