diff --git "a/data_lm/perceived/model/trainer_state.json" "b/data_lm/perceived/model/trainer_state.json" new file mode 100644--- /dev/null +++ "b/data_lm/perceived/model/trainer_state.json" @@ -0,0 +1,54900 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 50.0, + "global_step": 4544750, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 5.5008526321579835e-11, + "loss": 4.768, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 1.1001705264315967e-10, + "loss": 4.784, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 1.6502557896473952e-10, + "loss": 4.757, + "step": 1500 + }, + { + "epoch": 0.02, + "learning_rate": 2.2003410528631934e-10, + "loss": 4.7771, + "step": 2000 + }, + { + "epoch": 0.03, + "learning_rate": 2.750426316078992e-10, + "loss": 4.7514, + "step": 2500 + }, + { + "epoch": 0.03, + "learning_rate": 3.3005115792947905e-10, + "loss": 4.7511, + "step": 3000 + }, + { + "epoch": 0.04, + "learning_rate": 3.850596842510589e-10, + "loss": 4.7648, + "step": 3500 + }, + { + "epoch": 0.04, + "learning_rate": 4.400682105726387e-10, + "loss": 4.7847, + "step": 4000 + }, + { + "epoch": 0.05, + "learning_rate": 4.950767368942185e-10, + "loss": 4.7525, + "step": 4500 + }, + { + "epoch": 0.06, + "learning_rate": 5.500852632157984e-10, + "loss": 4.7737, + "step": 5000 + }, + { + "epoch": 0.06, + "learning_rate": 6.050937895373783e-10, + "loss": 4.764, + "step": 5500 + }, + { + "epoch": 0.07, + "learning_rate": 6.601023158589581e-10, + "loss": 4.7703, + "step": 6000 + }, + { + "epoch": 0.07, + "learning_rate": 7.151108421805379e-10, + "loss": 4.7538, + "step": 6500 + }, + { + "epoch": 0.08, + "learning_rate": 7.701193685021178e-10, + "loss": 4.7734, + "step": 7000 + }, + { + "epoch": 0.08, + "learning_rate": 8.251278948236977e-10, + "loss": 4.759, + "step": 7500 + }, + { + "epoch": 0.09, + "learning_rate": 8.801364211452774e-10, + "loss": 4.7597, + "step": 8000 + }, + { + "epoch": 0.09, + "learning_rate": 9.351449474668573e-10, + "loss": 4.7476, + "step": 8500 + }, + { + "epoch": 0.1, + "learning_rate": 9.90153473788437e-10, + "loss": 4.7311, + "step": 9000 + }, + { + "epoch": 0.1, + "learning_rate": 1.045162000110017e-09, + "loss": 4.7374, + "step": 9500 + }, + { + "epoch": 0.11, + "learning_rate": 1.1001705264315968e-09, + "loss": 4.7695, + "step": 10000 + }, + { + "epoch": 0.12, + "learning_rate": 1.1551790527531766e-09, + "loss": 4.7508, + "step": 10500 + }, + { + "epoch": 0.12, + "learning_rate": 1.2101875790747566e-09, + "loss": 4.7638, + "step": 11000 + }, + { + "epoch": 0.13, + "learning_rate": 1.2651961053963364e-09, + "loss": 4.7644, + "step": 11500 + }, + { + "epoch": 0.13, + "learning_rate": 1.3202046317179162e-09, + "loss": 4.7728, + "step": 12000 + }, + { + "epoch": 0.14, + "learning_rate": 1.3752131580394962e-09, + "loss": 4.7544, + "step": 12500 + }, + { + "epoch": 0.14, + "learning_rate": 1.4302216843610758e-09, + "loss": 4.7541, + "step": 13000 + }, + { + "epoch": 0.15, + "learning_rate": 1.4852302106826558e-09, + "loss": 4.7371, + "step": 13500 + }, + { + "epoch": 0.15, + "learning_rate": 1.5402387370042356e-09, + "loss": 4.7518, + "step": 14000 + }, + { + "epoch": 0.16, + "learning_rate": 1.5952472633258156e-09, + "loss": 4.7438, + "step": 14500 + }, + { + "epoch": 0.17, + "learning_rate": 1.6502557896473954e-09, + "loss": 4.7609, + "step": 15000 + }, + { + "epoch": 0.17, + "learning_rate": 1.705264315968975e-09, + "loss": 4.7393, + "step": 15500 + }, + { + "epoch": 0.18, + "learning_rate": 1.7602728422905547e-09, + "loss": 4.7297, + "step": 16000 + }, + { + "epoch": 0.18, + "learning_rate": 1.8152813686121347e-09, + "loss": 4.7173, + "step": 16500 + }, + { + "epoch": 0.19, + "learning_rate": 1.8702898949337145e-09, + "loss": 4.7246, + "step": 17000 + }, + { + "epoch": 0.19, + "learning_rate": 1.9252984212552945e-09, + "loss": 4.762, + "step": 17500 + }, + { + "epoch": 0.2, + "learning_rate": 1.980306947576874e-09, + "loss": 4.7414, + "step": 18000 + }, + { + "epoch": 0.2, + "learning_rate": 2.035315473898454e-09, + "loss": 4.7654, + "step": 18500 + }, + { + "epoch": 0.21, + "learning_rate": 2.090324000220034e-09, + "loss": 4.7208, + "step": 19000 + }, + { + "epoch": 0.21, + "learning_rate": 2.1453325265416137e-09, + "loss": 4.715, + "step": 19500 + }, + { + "epoch": 0.22, + "learning_rate": 2.2003410528631937e-09, + "loss": 4.7226, + "step": 20000 + }, + { + "epoch": 0.23, + "learning_rate": 2.2553495791847737e-09, + "loss": 4.7242, + "step": 20500 + }, + { + "epoch": 0.23, + "learning_rate": 2.3103581055063532e-09, + "loss": 4.7273, + "step": 21000 + }, + { + "epoch": 0.24, + "learning_rate": 2.3653666318279332e-09, + "loss": 4.7234, + "step": 21500 + }, + { + "epoch": 0.24, + "learning_rate": 2.4203751581495132e-09, + "loss": 4.7021, + "step": 22000 + }, + { + "epoch": 0.25, + "learning_rate": 2.475383684471093e-09, + "loss": 4.6943, + "step": 22500 + }, + { + "epoch": 0.25, + "learning_rate": 2.530392210792673e-09, + "loss": 4.6991, + "step": 23000 + }, + { + "epoch": 0.26, + "learning_rate": 2.585400737114253e-09, + "loss": 4.7006, + "step": 23500 + }, + { + "epoch": 0.26, + "learning_rate": 2.6404092634358324e-09, + "loss": 4.7126, + "step": 24000 + }, + { + "epoch": 0.27, + "learning_rate": 2.6954177897574124e-09, + "loss": 4.7212, + "step": 24500 + }, + { + "epoch": 0.28, + "learning_rate": 2.7504263160789924e-09, + "loss": 4.6977, + "step": 25000 + }, + { + "epoch": 0.28, + "learning_rate": 2.805434842400572e-09, + "loss": 4.7103, + "step": 25500 + }, + { + "epoch": 0.29, + "learning_rate": 2.8604433687221516e-09, + "loss": 4.7067, + "step": 26000 + }, + { + "epoch": 0.29, + "learning_rate": 2.9154518950437316e-09, + "loss": 4.6784, + "step": 26500 + }, + { + "epoch": 0.3, + "learning_rate": 2.9704604213653115e-09, + "loss": 4.6981, + "step": 27000 + }, + { + "epoch": 0.3, + "learning_rate": 3.025468947686891e-09, + "loss": 4.6792, + "step": 27500 + }, + { + "epoch": 0.31, + "learning_rate": 3.080477474008471e-09, + "loss": 4.6849, + "step": 28000 + }, + { + "epoch": 0.31, + "learning_rate": 3.135486000330051e-09, + "loss": 4.6782, + "step": 28500 + }, + { + "epoch": 0.32, + "learning_rate": 3.190494526651631e-09, + "loss": 4.6557, + "step": 29000 + }, + { + "epoch": 0.32, + "learning_rate": 3.2455030529732107e-09, + "loss": 4.648, + "step": 29500 + }, + { + "epoch": 0.33, + "learning_rate": 3.3005115792947907e-09, + "loss": 4.6865, + "step": 30000 + }, + { + "epoch": 0.34, + "learning_rate": 3.3555201056163703e-09, + "loss": 4.6514, + "step": 30500 + }, + { + "epoch": 0.34, + "learning_rate": 3.41052863193795e-09, + "loss": 4.6514, + "step": 31000 + }, + { + "epoch": 0.35, + "learning_rate": 3.4655371582595303e-09, + "loss": 4.6764, + "step": 31500 + }, + { + "epoch": 0.35, + "learning_rate": 3.5205456845811094e-09, + "loss": 4.6575, + "step": 32000 + }, + { + "epoch": 0.36, + "learning_rate": 3.57555421090269e-09, + "loss": 4.6421, + "step": 32500 + }, + { + "epoch": 0.36, + "learning_rate": 3.6305627372242694e-09, + "loss": 4.6165, + "step": 33000 + }, + { + "epoch": 0.37, + "learning_rate": 3.6855712635458494e-09, + "loss": 4.6427, + "step": 33500 + }, + { + "epoch": 0.37, + "learning_rate": 3.740579789867429e-09, + "loss": 4.6247, + "step": 34000 + }, + { + "epoch": 0.38, + "learning_rate": 3.795588316189009e-09, + "loss": 4.623, + "step": 34500 + }, + { + "epoch": 0.39, + "learning_rate": 3.850596842510589e-09, + "loss": 4.6339, + "step": 35000 + }, + { + "epoch": 0.39, + "learning_rate": 3.905605368832169e-09, + "loss": 4.6452, + "step": 35500 + }, + { + "epoch": 0.4, + "learning_rate": 3.960613895153748e-09, + "loss": 4.6301, + "step": 36000 + }, + { + "epoch": 0.4, + "learning_rate": 4.015622421475329e-09, + "loss": 4.6345, + "step": 36500 + }, + { + "epoch": 0.41, + "learning_rate": 4.070630947796908e-09, + "loss": 4.6117, + "step": 37000 + }, + { + "epoch": 0.41, + "learning_rate": 4.125639474118488e-09, + "loss": 4.6143, + "step": 37500 + }, + { + "epoch": 0.42, + "learning_rate": 4.180648000440068e-09, + "loss": 4.5898, + "step": 38000 + }, + { + "epoch": 0.42, + "learning_rate": 4.235656526761648e-09, + "loss": 4.6068, + "step": 38500 + }, + { + "epoch": 0.43, + "learning_rate": 4.290665053083227e-09, + "loss": 4.6021, + "step": 39000 + }, + { + "epoch": 0.43, + "learning_rate": 4.345673579404808e-09, + "loss": 4.6156, + "step": 39500 + }, + { + "epoch": 0.44, + "learning_rate": 4.400682105726387e-09, + "loss": 4.5951, + "step": 40000 + }, + { + "epoch": 0.45, + "learning_rate": 4.455690632047967e-09, + "loss": 4.5732, + "step": 40500 + }, + { + "epoch": 0.45, + "learning_rate": 4.510699158369547e-09, + "loss": 4.571, + "step": 41000 + }, + { + "epoch": 0.46, + "learning_rate": 4.5657076846911265e-09, + "loss": 4.5747, + "step": 41500 + }, + { + "epoch": 0.46, + "learning_rate": 4.6207162110127065e-09, + "loss": 4.5716, + "step": 42000 + }, + { + "epoch": 0.47, + "learning_rate": 4.6757247373342865e-09, + "loss": 4.5713, + "step": 42500 + }, + { + "epoch": 0.47, + "learning_rate": 4.7307332636558665e-09, + "loss": 4.5905, + "step": 43000 + }, + { + "epoch": 0.48, + "learning_rate": 4.785741789977446e-09, + "loss": 4.5881, + "step": 43500 + }, + { + "epoch": 0.48, + "learning_rate": 4.8407503162990265e-09, + "loss": 4.5747, + "step": 44000 + }, + { + "epoch": 0.49, + "learning_rate": 4.895758842620606e-09, + "loss": 4.5705, + "step": 44500 + }, + { + "epoch": 0.5, + "learning_rate": 4.950767368942186e-09, + "loss": 4.5505, + "step": 45000 + }, + { + "epoch": 0.5, + "learning_rate": 5.005775895263766e-09, + "loss": 4.5615, + "step": 45500 + }, + { + "epoch": 0.51, + "learning_rate": 5.060784421585346e-09, + "loss": 4.5524, + "step": 46000 + }, + { + "epoch": 0.51, + "learning_rate": 5.115792947906925e-09, + "loss": 4.5373, + "step": 46500 + }, + { + "epoch": 0.52, + "learning_rate": 5.170801474228506e-09, + "loss": 4.5184, + "step": 47000 + }, + { + "epoch": 0.52, + "learning_rate": 5.225810000550085e-09, + "loss": 4.533, + "step": 47500 + }, + { + "epoch": 0.53, + "learning_rate": 5.280818526871665e-09, + "loss": 4.5517, + "step": 48000 + }, + { + "epoch": 0.53, + "learning_rate": 5.335827053193245e-09, + "loss": 4.5389, + "step": 48500 + }, + { + "epoch": 0.54, + "learning_rate": 5.390835579514825e-09, + "loss": 4.5272, + "step": 49000 + }, + { + "epoch": 0.54, + "learning_rate": 5.445844105836404e-09, + "loss": 4.553, + "step": 49500 + }, + { + "epoch": 0.55, + "learning_rate": 5.500852632157985e-09, + "loss": 4.5444, + "step": 50000 + }, + { + "epoch": 0.56, + "learning_rate": 5.555861158479564e-09, + "loss": 4.541, + "step": 50500 + }, + { + "epoch": 0.56, + "learning_rate": 5.610869684801144e-09, + "loss": 4.5163, + "step": 51000 + }, + { + "epoch": 0.57, + "learning_rate": 5.665878211122724e-09, + "loss": 4.5052, + "step": 51500 + }, + { + "epoch": 0.57, + "learning_rate": 5.720886737444303e-09, + "loss": 4.5229, + "step": 52000 + }, + { + "epoch": 0.58, + "learning_rate": 5.775895263765883e-09, + "loss": 4.5193, + "step": 52500 + }, + { + "epoch": 0.58, + "learning_rate": 5.830903790087463e-09, + "loss": 4.5054, + "step": 53000 + }, + { + "epoch": 0.59, + "learning_rate": 5.885912316409043e-09, + "loss": 4.5003, + "step": 53500 + }, + { + "epoch": 0.59, + "learning_rate": 5.940920842730623e-09, + "loss": 4.5198, + "step": 54000 + }, + { + "epoch": 0.6, + "learning_rate": 5.995929369052203e-09, + "loss": 4.5211, + "step": 54500 + }, + { + "epoch": 0.61, + "learning_rate": 6.050937895373782e-09, + "loss": 4.5238, + "step": 55000 + }, + { + "epoch": 0.61, + "learning_rate": 6.105946421695362e-09, + "loss": 4.509, + "step": 55500 + }, + { + "epoch": 0.62, + "learning_rate": 6.160954948016942e-09, + "loss": 4.4888, + "step": 56000 + }, + { + "epoch": 0.62, + "learning_rate": 6.215963474338522e-09, + "loss": 4.4952, + "step": 56500 + }, + { + "epoch": 0.63, + "learning_rate": 6.270972000660102e-09, + "loss": 4.5136, + "step": 57000 + }, + { + "epoch": 0.63, + "learning_rate": 6.325980526981682e-09, + "loss": 4.5058, + "step": 57500 + }, + { + "epoch": 0.64, + "learning_rate": 6.380989053303262e-09, + "loss": 4.4879, + "step": 58000 + }, + { + "epoch": 0.64, + "learning_rate": 6.4359975796248414e-09, + "loss": 4.4921, + "step": 58500 + }, + { + "epoch": 0.65, + "learning_rate": 6.491006105946421e-09, + "loss": 4.4858, + "step": 59000 + }, + { + "epoch": 0.65, + "learning_rate": 6.546014632268001e-09, + "loss": 4.4903, + "step": 59500 + }, + { + "epoch": 0.66, + "learning_rate": 6.601023158589581e-09, + "loss": 4.4859, + "step": 60000 + }, + { + "epoch": 0.67, + "learning_rate": 6.6560316849111606e-09, + "loss": 4.4696, + "step": 60500 + }, + { + "epoch": 0.67, + "learning_rate": 6.7110402112327406e-09, + "loss": 4.4711, + "step": 61000 + }, + { + "epoch": 0.68, + "learning_rate": 6.766048737554321e-09, + "loss": 4.4755, + "step": 61500 + }, + { + "epoch": 0.68, + "learning_rate": 6.8210572638759e-09, + "loss": 4.4784, + "step": 62000 + }, + { + "epoch": 0.69, + "learning_rate": 6.87606579019748e-09, + "loss": 4.4525, + "step": 62500 + }, + { + "epoch": 0.69, + "learning_rate": 6.9310743165190606e-09, + "loss": 4.459, + "step": 63000 + }, + { + "epoch": 0.7, + "learning_rate": 6.9860828428406406e-09, + "loss": 4.4746, + "step": 63500 + }, + { + "epoch": 0.7, + "learning_rate": 7.041091369162219e-09, + "loss": 4.4519, + "step": 64000 + }, + { + "epoch": 0.71, + "learning_rate": 7.0960998954838e-09, + "loss": 4.4633, + "step": 64500 + }, + { + "epoch": 0.72, + "learning_rate": 7.15110842180538e-09, + "loss": 4.4463, + "step": 65000 + }, + { + "epoch": 0.72, + "learning_rate": 7.20611694812696e-09, + "loss": 4.4813, + "step": 65500 + }, + { + "epoch": 0.73, + "learning_rate": 7.261125474448539e-09, + "loss": 4.4452, + "step": 66000 + }, + { + "epoch": 0.73, + "learning_rate": 7.316134000770119e-09, + "loss": 4.4415, + "step": 66500 + }, + { + "epoch": 0.74, + "learning_rate": 7.371142527091699e-09, + "loss": 4.4505, + "step": 67000 + }, + { + "epoch": 0.74, + "learning_rate": 7.42615105341328e-09, + "loss": 4.4413, + "step": 67500 + }, + { + "epoch": 0.75, + "learning_rate": 7.481159579734858e-09, + "loss": 4.4267, + "step": 68000 + }, + { + "epoch": 0.75, + "learning_rate": 7.536168106056439e-09, + "loss": 4.4385, + "step": 68500 + }, + { + "epoch": 0.76, + "learning_rate": 7.591176632378018e-09, + "loss": 4.4343, + "step": 69000 + }, + { + "epoch": 0.76, + "learning_rate": 7.646185158699599e-09, + "loss": 4.419, + "step": 69500 + }, + { + "epoch": 0.77, + "learning_rate": 7.701193685021178e-09, + "loss": 4.4365, + "step": 70000 + }, + { + "epoch": 0.78, + "learning_rate": 7.756202211342757e-09, + "loss": 4.4294, + "step": 70500 + }, + { + "epoch": 0.78, + "learning_rate": 7.811210737664338e-09, + "loss": 4.4121, + "step": 71000 + }, + { + "epoch": 0.79, + "learning_rate": 7.866219263985917e-09, + "loss": 4.419, + "step": 71500 + }, + { + "epoch": 0.79, + "learning_rate": 7.921227790307496e-09, + "loss": 4.4186, + "step": 72000 + }, + { + "epoch": 0.8, + "learning_rate": 7.976236316629077e-09, + "loss": 4.4212, + "step": 72500 + }, + { + "epoch": 0.8, + "learning_rate": 8.031244842950658e-09, + "loss": 4.4224, + "step": 73000 + }, + { + "epoch": 0.81, + "learning_rate": 8.086253369272236e-09, + "loss": 4.4272, + "step": 73500 + }, + { + "epoch": 0.81, + "learning_rate": 8.141261895593816e-09, + "loss": 4.411, + "step": 74000 + }, + { + "epoch": 0.82, + "learning_rate": 8.196270421915397e-09, + "loss": 4.4217, + "step": 74500 + }, + { + "epoch": 0.83, + "learning_rate": 8.251278948236976e-09, + "loss": 4.3936, + "step": 75000 + }, + { + "epoch": 0.83, + "learning_rate": 8.306287474558556e-09, + "loss": 4.4151, + "step": 75500 + }, + { + "epoch": 0.84, + "learning_rate": 8.361296000880136e-09, + "loss": 4.394, + "step": 76000 + }, + { + "epoch": 0.84, + "learning_rate": 8.416304527201716e-09, + "loss": 4.4027, + "step": 76500 + }, + { + "epoch": 0.85, + "learning_rate": 8.471313053523296e-09, + "loss": 4.3853, + "step": 77000 + }, + { + "epoch": 0.85, + "learning_rate": 8.526321579844875e-09, + "loss": 4.3918, + "step": 77500 + }, + { + "epoch": 0.86, + "learning_rate": 8.581330106166455e-09, + "loss": 4.3874, + "step": 78000 + }, + { + "epoch": 0.86, + "learning_rate": 8.636338632488035e-09, + "loss": 4.3888, + "step": 78500 + }, + { + "epoch": 0.87, + "learning_rate": 8.691347158809616e-09, + "loss": 4.4081, + "step": 79000 + }, + { + "epoch": 0.87, + "learning_rate": 8.746355685131194e-09, + "loss": 4.3998, + "step": 79500 + }, + { + "epoch": 0.88, + "learning_rate": 8.801364211452775e-09, + "loss": 4.4128, + "step": 80000 + }, + { + "epoch": 0.89, + "learning_rate": 8.856372737774355e-09, + "loss": 4.4045, + "step": 80500 + }, + { + "epoch": 0.89, + "learning_rate": 8.911381264095935e-09, + "loss": 4.3757, + "step": 81000 + }, + { + "epoch": 0.9, + "learning_rate": 8.966389790417514e-09, + "loss": 4.3741, + "step": 81500 + }, + { + "epoch": 0.9, + "learning_rate": 9.021398316739095e-09, + "loss": 4.3908, + "step": 82000 + }, + { + "epoch": 0.91, + "learning_rate": 9.076406843060674e-09, + "loss": 4.3689, + "step": 82500 + }, + { + "epoch": 0.91, + "learning_rate": 9.131415369382253e-09, + "loss": 4.3711, + "step": 83000 + }, + { + "epoch": 0.92, + "learning_rate": 9.186423895703834e-09, + "loss": 4.3888, + "step": 83500 + }, + { + "epoch": 0.92, + "learning_rate": 9.241432422025413e-09, + "loss": 4.377, + "step": 84000 + }, + { + "epoch": 0.93, + "learning_rate": 9.296440948346994e-09, + "loss": 4.3702, + "step": 84500 + }, + { + "epoch": 0.94, + "learning_rate": 9.351449474668573e-09, + "loss": 4.3912, + "step": 85000 + }, + { + "epoch": 0.94, + "learning_rate": 9.406458000990152e-09, + "loss": 4.3761, + "step": 85500 + }, + { + "epoch": 0.95, + "learning_rate": 9.461466527311733e-09, + "loss": 4.3941, + "step": 86000 + }, + { + "epoch": 0.95, + "learning_rate": 9.516475053633314e-09, + "loss": 4.3642, + "step": 86500 + }, + { + "epoch": 0.96, + "learning_rate": 9.571483579954891e-09, + "loss": 4.354, + "step": 87000 + }, + { + "epoch": 0.96, + "learning_rate": 9.626492106276472e-09, + "loss": 4.3474, + "step": 87500 + }, + { + "epoch": 0.97, + "learning_rate": 9.681500632598053e-09, + "loss": 4.3485, + "step": 88000 + }, + { + "epoch": 0.97, + "learning_rate": 9.736509158919632e-09, + "loss": 4.3483, + "step": 88500 + }, + { + "epoch": 0.98, + "learning_rate": 9.791517685241211e-09, + "loss": 4.35, + "step": 89000 + }, + { + "epoch": 0.98, + "learning_rate": 9.846526211562792e-09, + "loss": 4.3379, + "step": 89500 + }, + { + "epoch": 0.99, + "learning_rate": 9.901534737884371e-09, + "loss": 4.3628, + "step": 90000 + }, + { + "epoch": 1.0, + "learning_rate": 9.956543264205952e-09, + "loss": 4.3589, + "step": 90500 + }, + { + "epoch": 1.0, + "eval_loss": 4.289782524108887, + "eval_runtime": 6.1366, + "eval_samples_per_second": 253.236, + "step": 90895 + }, + { + "epoch": 1.0, + "learning_rate": 1.0011551790527531e-08, + "loss": 4.362, + "step": 91000 + }, + { + "epoch": 1.01, + "learning_rate": 1.006656031684911e-08, + "loss": 4.3533, + "step": 91500 + }, + { + "epoch": 1.01, + "learning_rate": 1.0121568843170691e-08, + "loss": 4.3506, + "step": 92000 + }, + { + "epoch": 1.02, + "learning_rate": 1.017657736949227e-08, + "loss": 4.3542, + "step": 92500 + }, + { + "epoch": 1.02, + "learning_rate": 1.023158589581385e-08, + "loss": 4.3379, + "step": 93000 + }, + { + "epoch": 1.03, + "learning_rate": 1.028659442213543e-08, + "loss": 4.3561, + "step": 93500 + }, + { + "epoch": 1.03, + "learning_rate": 1.0341602948457011e-08, + "loss": 4.3405, + "step": 94000 + }, + { + "epoch": 1.04, + "learning_rate": 1.039661147477859e-08, + "loss": 4.3303, + "step": 94500 + }, + { + "epoch": 1.05, + "learning_rate": 1.045162000110017e-08, + "loss": 4.3082, + "step": 95000 + }, + { + "epoch": 1.05, + "learning_rate": 1.050662852742175e-08, + "loss": 4.343, + "step": 95500 + }, + { + "epoch": 1.06, + "learning_rate": 1.056163705374333e-08, + "loss": 4.3371, + "step": 96000 + }, + { + "epoch": 1.06, + "learning_rate": 1.0616645580064909e-08, + "loss": 4.3386, + "step": 96500 + }, + { + "epoch": 1.07, + "learning_rate": 1.067165410638649e-08, + "loss": 4.2928, + "step": 97000 + }, + { + "epoch": 1.07, + "learning_rate": 1.0726662632708069e-08, + "loss": 4.3474, + "step": 97500 + }, + { + "epoch": 1.08, + "learning_rate": 1.078167115902965e-08, + "loss": 4.3359, + "step": 98000 + }, + { + "epoch": 1.08, + "learning_rate": 1.0836679685351229e-08, + "loss": 4.3129, + "step": 98500 + }, + { + "epoch": 1.09, + "learning_rate": 1.0891688211672808e-08, + "loss": 4.3129, + "step": 99000 + }, + { + "epoch": 1.09, + "learning_rate": 1.0946696737994389e-08, + "loss": 4.3069, + "step": 99500 + }, + { + "epoch": 1.1, + "learning_rate": 1.100170526431597e-08, + "loss": 4.3064, + "step": 100000 + }, + { + "epoch": 1.11, + "learning_rate": 1.1056713790637549e-08, + "loss": 4.3297, + "step": 100500 + }, + { + "epoch": 1.11, + "learning_rate": 1.1111722316959128e-08, + "loss": 4.3102, + "step": 101000 + }, + { + "epoch": 1.12, + "learning_rate": 1.1166730843280709e-08, + "loss": 4.3026, + "step": 101500 + }, + { + "epoch": 1.12, + "learning_rate": 1.1221739369602288e-08, + "loss": 4.3202, + "step": 102000 + }, + { + "epoch": 1.13, + "learning_rate": 1.1276747895923867e-08, + "loss": 4.3356, + "step": 102500 + }, + { + "epoch": 1.13, + "learning_rate": 1.1331756422245448e-08, + "loss": 4.3202, + "step": 103000 + }, + { + "epoch": 1.14, + "learning_rate": 1.1386764948567027e-08, + "loss": 4.3178, + "step": 103500 + }, + { + "epoch": 1.14, + "learning_rate": 1.1441773474888606e-08, + "loss": 4.2979, + "step": 104000 + }, + { + "epoch": 1.15, + "learning_rate": 1.1496782001210187e-08, + "loss": 4.3109, + "step": 104500 + }, + { + "epoch": 1.16, + "learning_rate": 1.1551790527531766e-08, + "loss": 4.3, + "step": 105000 + }, + { + "epoch": 1.16, + "learning_rate": 1.1606799053853347e-08, + "loss": 4.3124, + "step": 105500 + }, + { + "epoch": 1.17, + "learning_rate": 1.1661807580174926e-08, + "loss": 4.2976, + "step": 106000 + }, + { + "epoch": 1.17, + "learning_rate": 1.1716816106496507e-08, + "loss": 4.2828, + "step": 106500 + }, + { + "epoch": 1.18, + "learning_rate": 1.1771824632818086e-08, + "loss": 4.3044, + "step": 107000 + }, + { + "epoch": 1.18, + "learning_rate": 1.1826833159139667e-08, + "loss": 4.2915, + "step": 107500 + }, + { + "epoch": 1.19, + "learning_rate": 1.1881841685461246e-08, + "loss": 4.3062, + "step": 108000 + }, + { + "epoch": 1.19, + "learning_rate": 1.1936850211782825e-08, + "loss": 4.2922, + "step": 108500 + }, + { + "epoch": 1.2, + "learning_rate": 1.1991858738104406e-08, + "loss": 4.2828, + "step": 109000 + }, + { + "epoch": 1.2, + "learning_rate": 1.2046867264425985e-08, + "loss": 4.2928, + "step": 109500 + }, + { + "epoch": 1.21, + "learning_rate": 1.2101875790747565e-08, + "loss": 4.2987, + "step": 110000 + }, + { + "epoch": 1.22, + "learning_rate": 1.2156884317069145e-08, + "loss": 4.2907, + "step": 110500 + }, + { + "epoch": 1.22, + "learning_rate": 1.2211892843390725e-08, + "loss": 4.2922, + "step": 111000 + }, + { + "epoch": 1.23, + "learning_rate": 1.2266901369712304e-08, + "loss": 4.2973, + "step": 111500 + }, + { + "epoch": 1.23, + "learning_rate": 1.2321909896033885e-08, + "loss": 4.2901, + "step": 112000 + }, + { + "epoch": 1.24, + "learning_rate": 1.2376918422355465e-08, + "loss": 4.283, + "step": 112500 + }, + { + "epoch": 1.24, + "learning_rate": 1.2431926948677045e-08, + "loss": 4.2771, + "step": 113000 + }, + { + "epoch": 1.25, + "learning_rate": 1.2486935474998624e-08, + "loss": 4.2953, + "step": 113500 + }, + { + "epoch": 1.25, + "learning_rate": 1.2541944001320205e-08, + "loss": 4.2705, + "step": 114000 + }, + { + "epoch": 1.26, + "learning_rate": 1.2596952527641782e-08, + "loss": 4.2946, + "step": 114500 + }, + { + "epoch": 1.27, + "learning_rate": 1.2651961053963364e-08, + "loss": 4.2666, + "step": 115000 + }, + { + "epoch": 1.27, + "learning_rate": 1.2706969580284944e-08, + "loss": 4.2594, + "step": 115500 + }, + { + "epoch": 1.28, + "learning_rate": 1.2761978106606524e-08, + "loss": 4.2728, + "step": 116000 + }, + { + "epoch": 1.28, + "learning_rate": 1.2816986632928104e-08, + "loss": 4.2912, + "step": 116500 + }, + { + "epoch": 1.29, + "learning_rate": 1.2871995159249683e-08, + "loss": 4.2638, + "step": 117000 + }, + { + "epoch": 1.29, + "learning_rate": 1.2927003685571264e-08, + "loss": 4.254, + "step": 117500 + }, + { + "epoch": 1.3, + "learning_rate": 1.2982012211892843e-08, + "loss": 4.2631, + "step": 118000 + }, + { + "epoch": 1.3, + "learning_rate": 1.3037020738214422e-08, + "loss": 4.2645, + "step": 118500 + }, + { + "epoch": 1.31, + "learning_rate": 1.3092029264536003e-08, + "loss": 4.2674, + "step": 119000 + }, + { + "epoch": 1.31, + "learning_rate": 1.3147037790857582e-08, + "loss": 4.2591, + "step": 119500 + }, + { + "epoch": 1.32, + "learning_rate": 1.3202046317179163e-08, + "loss": 4.2593, + "step": 120000 + }, + { + "epoch": 1.33, + "learning_rate": 1.3257054843500742e-08, + "loss": 4.2737, + "step": 120500 + }, + { + "epoch": 1.33, + "learning_rate": 1.3312063369822321e-08, + "loss": 4.2487, + "step": 121000 + }, + { + "epoch": 1.34, + "learning_rate": 1.3367071896143902e-08, + "loss": 4.2739, + "step": 121500 + }, + { + "epoch": 1.34, + "learning_rate": 1.3422080422465481e-08, + "loss": 4.2685, + "step": 122000 + }, + { + "epoch": 1.35, + "learning_rate": 1.347708894878706e-08, + "loss": 4.248, + "step": 122500 + }, + { + "epoch": 1.35, + "learning_rate": 1.3532097475108643e-08, + "loss": 4.2452, + "step": 123000 + }, + { + "epoch": 1.36, + "learning_rate": 1.358710600143022e-08, + "loss": 4.2696, + "step": 123500 + }, + { + "epoch": 1.36, + "learning_rate": 1.36421145277518e-08, + "loss": 4.2638, + "step": 124000 + }, + { + "epoch": 1.37, + "learning_rate": 1.3697123054073382e-08, + "loss": 4.2639, + "step": 124500 + }, + { + "epoch": 1.38, + "learning_rate": 1.375213158039496e-08, + "loss": 4.2381, + "step": 125000 + }, + { + "epoch": 1.38, + "learning_rate": 1.3807140106716542e-08, + "loss": 4.2466, + "step": 125500 + }, + { + "epoch": 1.39, + "learning_rate": 1.3862148633038121e-08, + "loss": 4.2518, + "step": 126000 + }, + { + "epoch": 1.39, + "learning_rate": 1.3917157159359699e-08, + "loss": 4.2385, + "step": 126500 + }, + { + "epoch": 1.4, + "learning_rate": 1.3972165685681281e-08, + "loss": 4.2427, + "step": 127000 + }, + { + "epoch": 1.4, + "learning_rate": 1.402717421200286e-08, + "loss": 4.2388, + "step": 127500 + }, + { + "epoch": 1.41, + "learning_rate": 1.4082182738324438e-08, + "loss": 4.2439, + "step": 128000 + }, + { + "epoch": 1.41, + "learning_rate": 1.413719126464602e-08, + "loss": 4.2515, + "step": 128500 + }, + { + "epoch": 1.42, + "learning_rate": 1.41921997909676e-08, + "loss": 4.2376, + "step": 129000 + }, + { + "epoch": 1.42, + "learning_rate": 1.424720831728918e-08, + "loss": 4.2544, + "step": 129500 + }, + { + "epoch": 1.43, + "learning_rate": 1.430221684361076e-08, + "loss": 4.2362, + "step": 130000 + }, + { + "epoch": 1.44, + "learning_rate": 1.4357225369932339e-08, + "loss": 4.2354, + "step": 130500 + }, + { + "epoch": 1.44, + "learning_rate": 1.441223389625392e-08, + "loss": 4.2289, + "step": 131000 + }, + { + "epoch": 1.45, + "learning_rate": 1.4467242422575499e-08, + "loss": 4.2352, + "step": 131500 + }, + { + "epoch": 1.45, + "learning_rate": 1.4522250948897078e-08, + "loss": 4.2446, + "step": 132000 + }, + { + "epoch": 1.46, + "learning_rate": 1.4577259475218659e-08, + "loss": 4.2215, + "step": 132500 + }, + { + "epoch": 1.46, + "learning_rate": 1.4632268001540238e-08, + "loss": 4.2311, + "step": 133000 + }, + { + "epoch": 1.47, + "learning_rate": 1.4687276527861817e-08, + "loss": 4.2194, + "step": 133500 + }, + { + "epoch": 1.47, + "learning_rate": 1.4742285054183398e-08, + "loss": 4.2424, + "step": 134000 + }, + { + "epoch": 1.48, + "learning_rate": 1.4797293580504977e-08, + "loss": 4.2409, + "step": 134500 + }, + { + "epoch": 1.49, + "learning_rate": 1.485230210682656e-08, + "loss": 4.2386, + "step": 135000 + }, + { + "epoch": 1.49, + "learning_rate": 1.4907310633148137e-08, + "loss": 4.231, + "step": 135500 + }, + { + "epoch": 1.5, + "learning_rate": 1.4962319159469716e-08, + "loss": 4.2323, + "step": 136000 + }, + { + "epoch": 1.5, + "learning_rate": 1.50173276857913e-08, + "loss": 4.2426, + "step": 136500 + }, + { + "epoch": 1.51, + "learning_rate": 1.5072336212112878e-08, + "loss": 4.2334, + "step": 137000 + }, + { + "epoch": 1.51, + "learning_rate": 1.5127344738434457e-08, + "loss": 4.2404, + "step": 137500 + }, + { + "epoch": 1.52, + "learning_rate": 1.5182353264756036e-08, + "loss": 4.2379, + "step": 138000 + }, + { + "epoch": 1.52, + "learning_rate": 1.5237361791077615e-08, + "loss": 4.2294, + "step": 138500 + }, + { + "epoch": 1.53, + "learning_rate": 1.5292370317399198e-08, + "loss": 4.2298, + "step": 139000 + }, + { + "epoch": 1.53, + "learning_rate": 1.5347378843720777e-08, + "loss": 4.2258, + "step": 139500 + }, + { + "epoch": 1.54, + "learning_rate": 1.5402387370042356e-08, + "loss": 4.2313, + "step": 140000 + }, + { + "epoch": 1.55, + "learning_rate": 1.5457395896363935e-08, + "loss": 4.2273, + "step": 140500 + }, + { + "epoch": 1.55, + "learning_rate": 1.5512404422685514e-08, + "loss": 4.2185, + "step": 141000 + }, + { + "epoch": 1.56, + "learning_rate": 1.5567412949007094e-08, + "loss": 4.2187, + "step": 141500 + }, + { + "epoch": 1.56, + "learning_rate": 1.5622421475328676e-08, + "loss": 4.2292, + "step": 142000 + }, + { + "epoch": 1.57, + "learning_rate": 1.5677430001650255e-08, + "loss": 4.2364, + "step": 142500 + }, + { + "epoch": 1.57, + "learning_rate": 1.5732438527971834e-08, + "loss": 4.1938, + "step": 143000 + }, + { + "epoch": 1.58, + "learning_rate": 1.5787447054293414e-08, + "loss": 4.2166, + "step": 143500 + }, + { + "epoch": 1.58, + "learning_rate": 1.5842455580614993e-08, + "loss": 4.2128, + "step": 144000 + }, + { + "epoch": 1.59, + "learning_rate": 1.5897464106936575e-08, + "loss": 4.2198, + "step": 144500 + }, + { + "epoch": 1.6, + "learning_rate": 1.5952472633258154e-08, + "loss": 4.2407, + "step": 145000 + }, + { + "epoch": 1.6, + "learning_rate": 1.6007481159579734e-08, + "loss": 4.2087, + "step": 145500 + }, + { + "epoch": 1.61, + "learning_rate": 1.6062489685901316e-08, + "loss": 4.2192, + "step": 146000 + }, + { + "epoch": 1.61, + "learning_rate": 1.6117498212222895e-08, + "loss": 4.2053, + "step": 146500 + }, + { + "epoch": 1.62, + "learning_rate": 1.617250673854447e-08, + "loss": 4.2251, + "step": 147000 + }, + { + "epoch": 1.62, + "learning_rate": 1.6227515264866054e-08, + "loss": 4.2181, + "step": 147500 + }, + { + "epoch": 1.63, + "learning_rate": 1.6282523791187633e-08, + "loss": 4.2102, + "step": 148000 + }, + { + "epoch": 1.63, + "learning_rate": 1.6337532317509215e-08, + "loss": 4.2054, + "step": 148500 + }, + { + "epoch": 1.64, + "learning_rate": 1.6392540843830794e-08, + "loss": 4.2147, + "step": 149000 + }, + { + "epoch": 1.64, + "learning_rate": 1.6447549370152374e-08, + "loss": 4.1979, + "step": 149500 + }, + { + "epoch": 1.65, + "learning_rate": 1.6502557896473953e-08, + "loss": 4.1898, + "step": 150000 + }, + { + "epoch": 1.66, + "learning_rate": 1.6557566422795532e-08, + "loss": 4.2091, + "step": 150500 + }, + { + "epoch": 1.66, + "learning_rate": 1.661257494911711e-08, + "loss": 4.193, + "step": 151000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6667583475438694e-08, + "loss": 4.1914, + "step": 151500 + }, + { + "epoch": 1.67, + "learning_rate": 1.6722592001760273e-08, + "loss": 4.2179, + "step": 152000 + }, + { + "epoch": 1.68, + "learning_rate": 1.6777600528081852e-08, + "loss": 4.2042, + "step": 152500 + }, + { + "epoch": 1.68, + "learning_rate": 1.683260905440343e-08, + "loss": 4.2024, + "step": 153000 + }, + { + "epoch": 1.69, + "learning_rate": 1.688761758072501e-08, + "loss": 4.198, + "step": 153500 + }, + { + "epoch": 1.69, + "learning_rate": 1.6942626107046593e-08, + "loss": 4.1979, + "step": 154000 + }, + { + "epoch": 1.7, + "learning_rate": 1.6997634633368172e-08, + "loss": 4.1953, + "step": 154500 + }, + { + "epoch": 1.71, + "learning_rate": 1.705264315968975e-08, + "loss": 4.194, + "step": 155000 + }, + { + "epoch": 1.71, + "learning_rate": 1.710765168601133e-08, + "loss": 4.2025, + "step": 155500 + }, + { + "epoch": 1.72, + "learning_rate": 1.716266021233291e-08, + "loss": 4.2024, + "step": 156000 + }, + { + "epoch": 1.72, + "learning_rate": 1.721766873865449e-08, + "loss": 4.1942, + "step": 156500 + }, + { + "epoch": 1.73, + "learning_rate": 1.727267726497607e-08, + "loss": 4.1708, + "step": 157000 + }, + { + "epoch": 1.73, + "learning_rate": 1.732768579129765e-08, + "loss": 4.1987, + "step": 157500 + }, + { + "epoch": 1.74, + "learning_rate": 1.7382694317619233e-08, + "loss": 4.183, + "step": 158000 + }, + { + "epoch": 1.74, + "learning_rate": 1.7437702843940812e-08, + "loss": 4.1875, + "step": 158500 + }, + { + "epoch": 1.75, + "learning_rate": 1.7492711370262388e-08, + "loss": 4.1796, + "step": 159000 + }, + { + "epoch": 1.75, + "learning_rate": 1.754771989658397e-08, + "loss": 4.1838, + "step": 159500 + }, + { + "epoch": 1.76, + "learning_rate": 1.760272842290555e-08, + "loss": 4.1826, + "step": 160000 + }, + { + "epoch": 1.77, + "learning_rate": 1.765773694922713e-08, + "loss": 4.1893, + "step": 160500 + }, + { + "epoch": 1.77, + "learning_rate": 1.771274547554871e-08, + "loss": 4.1898, + "step": 161000 + }, + { + "epoch": 1.78, + "learning_rate": 1.776775400187029e-08, + "loss": 4.1741, + "step": 161500 + }, + { + "epoch": 1.78, + "learning_rate": 1.782276252819187e-08, + "loss": 4.2011, + "step": 162000 + }, + { + "epoch": 1.79, + "learning_rate": 1.787777105451345e-08, + "loss": 4.2012, + "step": 162500 + }, + { + "epoch": 1.79, + "learning_rate": 1.7932779580835028e-08, + "loss": 4.1947, + "step": 163000 + }, + { + "epoch": 1.8, + "learning_rate": 1.798778810715661e-08, + "loss": 4.1843, + "step": 163500 + }, + { + "epoch": 1.8, + "learning_rate": 1.804279663347819e-08, + "loss": 4.1737, + "step": 164000 + }, + { + "epoch": 1.81, + "learning_rate": 1.809780515979977e-08, + "loss": 4.1953, + "step": 164500 + }, + { + "epoch": 1.82, + "learning_rate": 1.8152813686121348e-08, + "loss": 4.1943, + "step": 165000 + }, + { + "epoch": 1.82, + "learning_rate": 1.8207822212442927e-08, + "loss": 4.1691, + "step": 165500 + }, + { + "epoch": 1.83, + "learning_rate": 1.8262830738764506e-08, + "loss": 4.153, + "step": 166000 + }, + { + "epoch": 1.83, + "learning_rate": 1.831783926508609e-08, + "loss": 4.1801, + "step": 166500 + }, + { + "epoch": 1.84, + "learning_rate": 1.8372847791407668e-08, + "loss": 4.1811, + "step": 167000 + }, + { + "epoch": 1.84, + "learning_rate": 1.8427856317729247e-08, + "loss": 4.1735, + "step": 167500 + }, + { + "epoch": 1.85, + "learning_rate": 1.8482864844050826e-08, + "loss": 4.1614, + "step": 168000 + }, + { + "epoch": 1.85, + "learning_rate": 1.8537873370372405e-08, + "loss": 4.1711, + "step": 168500 + }, + { + "epoch": 1.86, + "learning_rate": 1.8592881896693988e-08, + "loss": 4.1783, + "step": 169000 + }, + { + "epoch": 1.86, + "learning_rate": 1.8647890423015567e-08, + "loss": 4.17, + "step": 169500 + }, + { + "epoch": 1.87, + "learning_rate": 1.8702898949337146e-08, + "loss": 4.1725, + "step": 170000 + }, + { + "epoch": 1.88, + "learning_rate": 1.875790747565873e-08, + "loss": 4.1727, + "step": 170500 + }, + { + "epoch": 1.88, + "learning_rate": 1.8812916001980304e-08, + "loss": 4.17, + "step": 171000 + }, + { + "epoch": 1.89, + "learning_rate": 1.8867924528301887e-08, + "loss": 4.1768, + "step": 171500 + }, + { + "epoch": 1.89, + "learning_rate": 1.8922933054623466e-08, + "loss": 4.1607, + "step": 172000 + }, + { + "epoch": 1.9, + "learning_rate": 1.8977941580945045e-08, + "loss": 4.1826, + "step": 172500 + }, + { + "epoch": 1.9, + "learning_rate": 1.9032950107266628e-08, + "loss": 4.1884, + "step": 173000 + }, + { + "epoch": 1.91, + "learning_rate": 1.9087958633588207e-08, + "loss": 4.1573, + "step": 173500 + }, + { + "epoch": 1.91, + "learning_rate": 1.9142967159909783e-08, + "loss": 4.1502, + "step": 174000 + }, + { + "epoch": 1.92, + "learning_rate": 1.9197975686231365e-08, + "loss": 4.1692, + "step": 174500 + }, + { + "epoch": 1.93, + "learning_rate": 1.9252984212552944e-08, + "loss": 4.1763, + "step": 175000 + }, + { + "epoch": 1.93, + "learning_rate": 1.9307992738874523e-08, + "loss": 4.1595, + "step": 175500 + }, + { + "epoch": 1.94, + "learning_rate": 1.9363001265196106e-08, + "loss": 4.1727, + "step": 176000 + }, + { + "epoch": 1.94, + "learning_rate": 1.9418009791517685e-08, + "loss": 4.1723, + "step": 176500 + }, + { + "epoch": 1.95, + "learning_rate": 1.9473018317839264e-08, + "loss": 4.183, + "step": 177000 + }, + { + "epoch": 1.95, + "learning_rate": 1.9528026844160843e-08, + "loss": 4.1635, + "step": 177500 + }, + { + "epoch": 1.96, + "learning_rate": 1.9583035370482423e-08, + "loss": 4.1573, + "step": 178000 + }, + { + "epoch": 1.96, + "learning_rate": 1.9638043896804005e-08, + "loss": 4.1551, + "step": 178500 + }, + { + "epoch": 1.97, + "learning_rate": 1.9693052423125584e-08, + "loss": 4.1479, + "step": 179000 + }, + { + "epoch": 1.97, + "learning_rate": 1.9748060949447163e-08, + "loss": 4.1719, + "step": 179500 + }, + { + "epoch": 1.98, + "learning_rate": 1.9803069475768743e-08, + "loss": 4.1469, + "step": 180000 + }, + { + "epoch": 1.99, + "learning_rate": 1.9858078002090322e-08, + "loss": 4.1515, + "step": 180500 + }, + { + "epoch": 1.99, + "learning_rate": 1.9913086528411904e-08, + "loss": 4.1634, + "step": 181000 + }, + { + "epoch": 2.0, + "learning_rate": 1.9968095054733483e-08, + "loss": 4.1733, + "step": 181500 + }, + { + "epoch": 2.0, + "eval_loss": 4.123291492462158, + "eval_runtime": 6.1426, + "eval_samples_per_second": 252.988, + "step": 181790 + }, + { + "epoch": 2.0, + "learning_rate": 2.0023103581055063e-08, + "loss": 4.159, + "step": 182000 + }, + { + "epoch": 2.01, + "learning_rate": 2.0078112107376645e-08, + "loss": 4.1671, + "step": 182500 + }, + { + "epoch": 2.01, + "learning_rate": 2.013312063369822e-08, + "loss": 4.157, + "step": 183000 + }, + { + "epoch": 2.02, + "learning_rate": 2.01881291600198e-08, + "loss": 4.1638, + "step": 183500 + }, + { + "epoch": 2.02, + "learning_rate": 2.0243137686341383e-08, + "loss": 4.1781, + "step": 184000 + }, + { + "epoch": 2.03, + "learning_rate": 2.0298146212662962e-08, + "loss": 4.1482, + "step": 184500 + }, + { + "epoch": 2.04, + "learning_rate": 2.035315473898454e-08, + "loss": 4.1569, + "step": 185000 + }, + { + "epoch": 2.04, + "learning_rate": 2.0408163265306123e-08, + "loss": 4.1737, + "step": 185500 + }, + { + "epoch": 2.05, + "learning_rate": 2.04631717916277e-08, + "loss": 4.1626, + "step": 186000 + }, + { + "epoch": 2.05, + "learning_rate": 2.0518180317949282e-08, + "loss": 4.1507, + "step": 186500 + }, + { + "epoch": 2.06, + "learning_rate": 2.057318884427086e-08, + "loss": 4.1541, + "step": 187000 + }, + { + "epoch": 2.06, + "learning_rate": 2.062819737059244e-08, + "loss": 4.1576, + "step": 187500 + }, + { + "epoch": 2.07, + "learning_rate": 2.0683205896914023e-08, + "loss": 4.1452, + "step": 188000 + }, + { + "epoch": 2.07, + "learning_rate": 2.0738214423235602e-08, + "loss": 4.1473, + "step": 188500 + }, + { + "epoch": 2.08, + "learning_rate": 2.079322294955718e-08, + "loss": 4.1498, + "step": 189000 + }, + { + "epoch": 2.08, + "learning_rate": 2.084823147587876e-08, + "loss": 4.1441, + "step": 189500 + }, + { + "epoch": 2.09, + "learning_rate": 2.090324000220034e-08, + "loss": 4.1445, + "step": 190000 + }, + { + "epoch": 2.1, + "learning_rate": 2.0958248528521922e-08, + "loss": 4.1388, + "step": 190500 + }, + { + "epoch": 2.1, + "learning_rate": 2.10132570548435e-08, + "loss": 4.1458, + "step": 191000 + }, + { + "epoch": 2.11, + "learning_rate": 2.106826558116508e-08, + "loss": 4.1566, + "step": 191500 + }, + { + "epoch": 2.11, + "learning_rate": 2.112327410748666e-08, + "loss": 4.1697, + "step": 192000 + }, + { + "epoch": 2.12, + "learning_rate": 2.1178282633808238e-08, + "loss": 4.154, + "step": 192500 + }, + { + "epoch": 2.12, + "learning_rate": 2.1233291160129818e-08, + "loss": 4.1516, + "step": 193000 + }, + { + "epoch": 2.13, + "learning_rate": 2.12882996864514e-08, + "loss": 4.1357, + "step": 193500 + }, + { + "epoch": 2.13, + "learning_rate": 2.134330821277298e-08, + "loss": 4.1551, + "step": 194000 + }, + { + "epoch": 2.14, + "learning_rate": 2.1398316739094558e-08, + "loss": 4.1199, + "step": 194500 + }, + { + "epoch": 2.15, + "learning_rate": 2.1453325265416137e-08, + "loss": 4.1343, + "step": 195000 + }, + { + "epoch": 2.15, + "learning_rate": 2.1508333791737717e-08, + "loss": 4.1348, + "step": 195500 + }, + { + "epoch": 2.16, + "learning_rate": 2.15633423180593e-08, + "loss": 4.1537, + "step": 196000 + }, + { + "epoch": 2.16, + "learning_rate": 2.1618350844380878e-08, + "loss": 4.1408, + "step": 196500 + }, + { + "epoch": 2.17, + "learning_rate": 2.1673359370702457e-08, + "loss": 4.1233, + "step": 197000 + }, + { + "epoch": 2.17, + "learning_rate": 2.172836789702404e-08, + "loss": 4.1434, + "step": 197500 + }, + { + "epoch": 2.18, + "learning_rate": 2.1783376423345616e-08, + "loss": 4.1379, + "step": 198000 + }, + { + "epoch": 2.18, + "learning_rate": 2.1838384949667195e-08, + "loss": 4.1286, + "step": 198500 + }, + { + "epoch": 2.19, + "learning_rate": 2.1893393475988777e-08, + "loss": 4.1609, + "step": 199000 + }, + { + "epoch": 2.19, + "learning_rate": 2.1948402002310357e-08, + "loss": 4.1414, + "step": 199500 + }, + { + "epoch": 2.2, + "learning_rate": 2.200341052863194e-08, + "loss": 4.1278, + "step": 200000 + }, + { + "epoch": 2.21, + "learning_rate": 2.2058419054953518e-08, + "loss": 4.14, + "step": 200500 + }, + { + "epoch": 2.21, + "learning_rate": 2.2113427581275097e-08, + "loss": 4.1567, + "step": 201000 + }, + { + "epoch": 2.22, + "learning_rate": 2.2168436107596677e-08, + "loss": 4.132, + "step": 201500 + }, + { + "epoch": 2.22, + "learning_rate": 2.2223444633918256e-08, + "loss": 4.1439, + "step": 202000 + }, + { + "epoch": 2.23, + "learning_rate": 2.2278453160239835e-08, + "loss": 4.1418, + "step": 202500 + }, + { + "epoch": 2.23, + "learning_rate": 2.2333461686561417e-08, + "loss": 4.1574, + "step": 203000 + }, + { + "epoch": 2.24, + "learning_rate": 2.2388470212882997e-08, + "loss": 4.1295, + "step": 203500 + }, + { + "epoch": 2.24, + "learning_rate": 2.2443478739204576e-08, + "loss": 4.1422, + "step": 204000 + }, + { + "epoch": 2.25, + "learning_rate": 2.2498487265526155e-08, + "loss": 4.1128, + "step": 204500 + }, + { + "epoch": 2.26, + "learning_rate": 2.2553495791847734e-08, + "loss": 4.1337, + "step": 205000 + }, + { + "epoch": 2.26, + "learning_rate": 2.2608504318169317e-08, + "loss": 4.141, + "step": 205500 + }, + { + "epoch": 2.27, + "learning_rate": 2.2663512844490896e-08, + "loss": 4.1178, + "step": 206000 + }, + { + "epoch": 2.27, + "learning_rate": 2.2718521370812475e-08, + "loss": 4.1296, + "step": 206500 + }, + { + "epoch": 2.28, + "learning_rate": 2.2773529897134054e-08, + "loss": 4.1411, + "step": 207000 + }, + { + "epoch": 2.28, + "learning_rate": 2.2828538423455633e-08, + "loss": 4.1232, + "step": 207500 + }, + { + "epoch": 2.29, + "learning_rate": 2.2883546949777212e-08, + "loss": 4.1447, + "step": 208000 + }, + { + "epoch": 2.29, + "learning_rate": 2.2938555476098795e-08, + "loss": 4.152, + "step": 208500 + }, + { + "epoch": 2.3, + "learning_rate": 2.2993564002420374e-08, + "loss": 4.1064, + "step": 209000 + }, + { + "epoch": 2.3, + "learning_rate": 2.3048572528741957e-08, + "loss": 4.1464, + "step": 209500 + }, + { + "epoch": 2.31, + "learning_rate": 2.3103581055063532e-08, + "loss": 4.1286, + "step": 210000 + }, + { + "epoch": 2.32, + "learning_rate": 2.315858958138511e-08, + "loss": 4.1136, + "step": 210500 + }, + { + "epoch": 2.32, + "learning_rate": 2.3213598107706694e-08, + "loss": 4.1417, + "step": 211000 + }, + { + "epoch": 2.33, + "learning_rate": 2.3268606634028273e-08, + "loss": 4.1176, + "step": 211500 + }, + { + "epoch": 2.33, + "learning_rate": 2.3323615160349852e-08, + "loss": 4.1272, + "step": 212000 + }, + { + "epoch": 2.34, + "learning_rate": 2.3378623686671435e-08, + "loss": 4.1374, + "step": 212500 + }, + { + "epoch": 2.34, + "learning_rate": 2.3433632212993014e-08, + "loss": 4.1144, + "step": 213000 + }, + { + "epoch": 2.35, + "learning_rate": 2.348864073931459e-08, + "loss": 4.1247, + "step": 213500 + }, + { + "epoch": 2.35, + "learning_rate": 2.3543649265636172e-08, + "loss": 4.131, + "step": 214000 + }, + { + "epoch": 2.36, + "learning_rate": 2.359865779195775e-08, + "loss": 4.1254, + "step": 214500 + }, + { + "epoch": 2.37, + "learning_rate": 2.3653666318279334e-08, + "loss": 4.1133, + "step": 215000 + }, + { + "epoch": 2.37, + "learning_rate": 2.3708674844600913e-08, + "loss": 4.1169, + "step": 215500 + }, + { + "epoch": 2.38, + "learning_rate": 2.3763683370922492e-08, + "loss": 4.1224, + "step": 216000 + }, + { + "epoch": 2.38, + "learning_rate": 2.381869189724407e-08, + "loss": 4.116, + "step": 216500 + }, + { + "epoch": 2.39, + "learning_rate": 2.387370042356565e-08, + "loss": 4.1025, + "step": 217000 + }, + { + "epoch": 2.39, + "learning_rate": 2.392870894988723e-08, + "loss": 4.1161, + "step": 217500 + }, + { + "epoch": 2.4, + "learning_rate": 2.3983717476208812e-08, + "loss": 4.1103, + "step": 218000 + }, + { + "epoch": 2.4, + "learning_rate": 2.403872600253039e-08, + "loss": 4.1311, + "step": 218500 + }, + { + "epoch": 2.41, + "learning_rate": 2.409373452885197e-08, + "loss": 4.1039, + "step": 219000 + }, + { + "epoch": 2.41, + "learning_rate": 2.414874305517355e-08, + "loss": 4.1102, + "step": 219500 + }, + { + "epoch": 2.42, + "learning_rate": 2.420375158149513e-08, + "loss": 4.1115, + "step": 220000 + }, + { + "epoch": 2.43, + "learning_rate": 2.425876010781671e-08, + "loss": 4.1203, + "step": 220500 + }, + { + "epoch": 2.43, + "learning_rate": 2.431376863413829e-08, + "loss": 4.1062, + "step": 221000 + }, + { + "epoch": 2.44, + "learning_rate": 2.436877716045987e-08, + "loss": 4.1049, + "step": 221500 + }, + { + "epoch": 2.44, + "learning_rate": 2.442378568678145e-08, + "loss": 4.1047, + "step": 222000 + }, + { + "epoch": 2.45, + "learning_rate": 2.4478794213103028e-08, + "loss": 4.0975, + "step": 222500 + }, + { + "epoch": 2.45, + "learning_rate": 2.4533802739424607e-08, + "loss": 4.1031, + "step": 223000 + }, + { + "epoch": 2.46, + "learning_rate": 2.458881126574619e-08, + "loss": 4.1039, + "step": 223500 + }, + { + "epoch": 2.46, + "learning_rate": 2.464381979206777e-08, + "loss": 4.1193, + "step": 224000 + }, + { + "epoch": 2.47, + "learning_rate": 2.469882831838935e-08, + "loss": 4.1225, + "step": 224500 + }, + { + "epoch": 2.48, + "learning_rate": 2.475383684471093e-08, + "loss": 4.1232, + "step": 225000 + }, + { + "epoch": 2.48, + "learning_rate": 2.4808845371032507e-08, + "loss": 4.1206, + "step": 225500 + }, + { + "epoch": 2.49, + "learning_rate": 2.486385389735409e-08, + "loss": 4.12, + "step": 226000 + }, + { + "epoch": 2.49, + "learning_rate": 2.4918862423675668e-08, + "loss": 4.112, + "step": 226500 + }, + { + "epoch": 2.5, + "learning_rate": 2.4973870949997247e-08, + "loss": 4.1199, + "step": 227000 + }, + { + "epoch": 2.5, + "learning_rate": 2.5028879476318827e-08, + "loss": 4.1248, + "step": 227500 + }, + { + "epoch": 2.51, + "learning_rate": 2.508388800264041e-08, + "loss": 4.1182, + "step": 228000 + }, + { + "epoch": 2.51, + "learning_rate": 2.5138896528961988e-08, + "loss": 4.1075, + "step": 228500 + }, + { + "epoch": 2.52, + "learning_rate": 2.5193905055283564e-08, + "loss": 4.1157, + "step": 229000 + }, + { + "epoch": 2.52, + "learning_rate": 2.5248913581605147e-08, + "loss": 4.0953, + "step": 229500 + }, + { + "epoch": 2.53, + "learning_rate": 2.530392210792673e-08, + "loss": 4.089, + "step": 230000 + }, + { + "epoch": 2.54, + "learning_rate": 2.535893063424831e-08, + "loss": 4.0914, + "step": 230500 + }, + { + "epoch": 2.54, + "learning_rate": 2.5413939160569887e-08, + "loss": 4.1025, + "step": 231000 + }, + { + "epoch": 2.55, + "learning_rate": 2.5468947686891466e-08, + "loss": 4.1051, + "step": 231500 + }, + { + "epoch": 2.55, + "learning_rate": 2.552395621321305e-08, + "loss": 4.0837, + "step": 232000 + }, + { + "epoch": 2.56, + "learning_rate": 2.5578964739534625e-08, + "loss": 4.1203, + "step": 232500 + }, + { + "epoch": 2.56, + "learning_rate": 2.5633973265856207e-08, + "loss": 4.098, + "step": 233000 + }, + { + "epoch": 2.57, + "learning_rate": 2.568898179217779e-08, + "loss": 4.0965, + "step": 233500 + }, + { + "epoch": 2.57, + "learning_rate": 2.5743990318499366e-08, + "loss": 4.1057, + "step": 234000 + }, + { + "epoch": 2.58, + "learning_rate": 2.5798998844820945e-08, + "loss": 4.0846, + "step": 234500 + }, + { + "epoch": 2.59, + "learning_rate": 2.5854007371142527e-08, + "loss": 4.1125, + "step": 235000 + }, + { + "epoch": 2.59, + "learning_rate": 2.5909015897464103e-08, + "loss": 4.1225, + "step": 235500 + }, + { + "epoch": 2.6, + "learning_rate": 2.5964024423785686e-08, + "loss": 4.0813, + "step": 236000 + }, + { + "epoch": 2.6, + "learning_rate": 2.6019032950107268e-08, + "loss": 4.1011, + "step": 236500 + }, + { + "epoch": 2.61, + "learning_rate": 2.6074041476428844e-08, + "loss": 4.095, + "step": 237000 + }, + { + "epoch": 2.61, + "learning_rate": 2.6129050002750423e-08, + "loss": 4.0847, + "step": 237500 + }, + { + "epoch": 2.62, + "learning_rate": 2.6184058529072006e-08, + "loss": 4.0935, + "step": 238000 + }, + { + "epoch": 2.62, + "learning_rate": 2.623906705539358e-08, + "loss": 4.0988, + "step": 238500 + }, + { + "epoch": 2.63, + "learning_rate": 2.6294075581715164e-08, + "loss": 4.1106, + "step": 239000 + }, + { + "epoch": 2.63, + "learning_rate": 2.6349084108036746e-08, + "loss": 4.0884, + "step": 239500 + }, + { + "epoch": 2.64, + "learning_rate": 2.6404092634358326e-08, + "loss": 4.1179, + "step": 240000 + }, + { + "epoch": 2.65, + "learning_rate": 2.64591011606799e-08, + "loss": 4.0912, + "step": 240500 + }, + { + "epoch": 2.65, + "learning_rate": 2.6514109687001484e-08, + "loss": 4.0808, + "step": 241000 + }, + { + "epoch": 2.66, + "learning_rate": 2.6569118213323066e-08, + "loss": 4.1192, + "step": 241500 + }, + { + "epoch": 2.66, + "learning_rate": 2.6624126739644642e-08, + "loss": 4.0961, + "step": 242000 + }, + { + "epoch": 2.67, + "learning_rate": 2.6679135265966225e-08, + "loss": 4.0915, + "step": 242500 + }, + { + "epoch": 2.67, + "learning_rate": 2.6734143792287804e-08, + "loss": 4.0894, + "step": 243000 + }, + { + "epoch": 2.68, + "learning_rate": 2.6789152318609383e-08, + "loss": 4.0981, + "step": 243500 + }, + { + "epoch": 2.68, + "learning_rate": 2.6844160844930962e-08, + "loss": 4.0863, + "step": 244000 + }, + { + "epoch": 2.69, + "learning_rate": 2.6899169371252545e-08, + "loss": 4.1067, + "step": 244500 + }, + { + "epoch": 2.7, + "learning_rate": 2.695417789757412e-08, + "loss": 4.1008, + "step": 245000 + }, + { + "epoch": 2.7, + "learning_rate": 2.7009186423895703e-08, + "loss": 4.1062, + "step": 245500 + }, + { + "epoch": 2.71, + "learning_rate": 2.7064194950217286e-08, + "loss": 4.0995, + "step": 246000 + }, + { + "epoch": 2.71, + "learning_rate": 2.711920347653886e-08, + "loss": 4.0928, + "step": 246500 + }, + { + "epoch": 2.72, + "learning_rate": 2.717421200286044e-08, + "loss": 4.11, + "step": 247000 + }, + { + "epoch": 2.72, + "learning_rate": 2.7229220529182023e-08, + "loss": 4.094, + "step": 247500 + }, + { + "epoch": 2.73, + "learning_rate": 2.72842290555036e-08, + "loss": 4.0777, + "step": 248000 + }, + { + "epoch": 2.73, + "learning_rate": 2.733923758182518e-08, + "loss": 4.1137, + "step": 248500 + }, + { + "epoch": 2.74, + "learning_rate": 2.7394246108146764e-08, + "loss": 4.0861, + "step": 249000 + }, + { + "epoch": 2.74, + "learning_rate": 2.7449254634468343e-08, + "loss": 4.0958, + "step": 249500 + }, + { + "epoch": 2.75, + "learning_rate": 2.750426316078992e-08, + "loss": 4.0915, + "step": 250000 + }, + { + "epoch": 2.76, + "learning_rate": 2.75592716871115e-08, + "loss": 4.079, + "step": 250500 + }, + { + "epoch": 2.76, + "learning_rate": 2.7614280213433084e-08, + "loss": 4.0976, + "step": 251000 + }, + { + "epoch": 2.77, + "learning_rate": 2.766928873975466e-08, + "loss": 4.0696, + "step": 251500 + }, + { + "epoch": 2.77, + "learning_rate": 2.7724297266076242e-08, + "loss": 4.0911, + "step": 252000 + }, + { + "epoch": 2.78, + "learning_rate": 2.777930579239782e-08, + "loss": 4.0644, + "step": 252500 + }, + { + "epoch": 2.78, + "learning_rate": 2.7834314318719397e-08, + "loss": 4.0755, + "step": 253000 + }, + { + "epoch": 2.79, + "learning_rate": 2.788932284504098e-08, + "loss": 4.0652, + "step": 253500 + }, + { + "epoch": 2.79, + "learning_rate": 2.7944331371362562e-08, + "loss": 4.0744, + "step": 254000 + }, + { + "epoch": 2.8, + "learning_rate": 2.7999339897684138e-08, + "loss": 4.0751, + "step": 254500 + }, + { + "epoch": 2.81, + "learning_rate": 2.805434842400572e-08, + "loss": 4.0602, + "step": 255000 + }, + { + "epoch": 2.81, + "learning_rate": 2.81093569503273e-08, + "loss": 4.0901, + "step": 255500 + }, + { + "epoch": 2.82, + "learning_rate": 2.8164365476648876e-08, + "loss": 4.0726, + "step": 256000 + }, + { + "epoch": 2.82, + "learning_rate": 2.8219374002970458e-08, + "loss": 4.074, + "step": 256500 + }, + { + "epoch": 2.83, + "learning_rate": 2.827438252929204e-08, + "loss": 4.0823, + "step": 257000 + }, + { + "epoch": 2.83, + "learning_rate": 2.8329391055613616e-08, + "loss": 4.0885, + "step": 257500 + }, + { + "epoch": 2.84, + "learning_rate": 2.83843995819352e-08, + "loss": 4.0899, + "step": 258000 + }, + { + "epoch": 2.84, + "learning_rate": 2.8439408108256778e-08, + "loss": 4.0722, + "step": 258500 + }, + { + "epoch": 2.85, + "learning_rate": 2.849441663457836e-08, + "loss": 4.0792, + "step": 259000 + }, + { + "epoch": 2.85, + "learning_rate": 2.8549425160899936e-08, + "loss": 4.0765, + "step": 259500 + }, + { + "epoch": 2.86, + "learning_rate": 2.860443368722152e-08, + "loss": 4.0724, + "step": 260000 + }, + { + "epoch": 2.87, + "learning_rate": 2.86594422135431e-08, + "loss": 4.0767, + "step": 260500 + }, + { + "epoch": 2.87, + "learning_rate": 2.8714450739864677e-08, + "loss": 4.0731, + "step": 261000 + }, + { + "epoch": 2.88, + "learning_rate": 2.8769459266186256e-08, + "loss": 4.0742, + "step": 261500 + }, + { + "epoch": 2.88, + "learning_rate": 2.882446779250784e-08, + "loss": 4.0782, + "step": 262000 + }, + { + "epoch": 2.89, + "learning_rate": 2.8879476318829415e-08, + "loss": 4.092, + "step": 262500 + }, + { + "epoch": 2.89, + "learning_rate": 2.8934484845150997e-08, + "loss": 4.0789, + "step": 263000 + }, + { + "epoch": 2.9, + "learning_rate": 2.898949337147258e-08, + "loss": 4.0854, + "step": 263500 + }, + { + "epoch": 2.9, + "learning_rate": 2.9044501897794156e-08, + "loss": 4.0897, + "step": 264000 + }, + { + "epoch": 2.91, + "learning_rate": 2.9099510424115735e-08, + "loss": 4.0724, + "step": 264500 + }, + { + "epoch": 2.92, + "learning_rate": 2.9154518950437317e-08, + "loss": 4.0951, + "step": 265000 + }, + { + "epoch": 2.92, + "learning_rate": 2.9209527476758893e-08, + "loss": 4.087, + "step": 265500 + }, + { + "epoch": 2.93, + "learning_rate": 2.9264536003080476e-08, + "loss": 4.0687, + "step": 266000 + }, + { + "epoch": 2.93, + "learning_rate": 2.9319544529402058e-08, + "loss": 4.085, + "step": 266500 + }, + { + "epoch": 2.94, + "learning_rate": 2.9374553055723634e-08, + "loss": 4.0707, + "step": 267000 + }, + { + "epoch": 2.94, + "learning_rate": 2.9429561582045216e-08, + "loss": 4.0662, + "step": 267500 + }, + { + "epoch": 2.95, + "learning_rate": 2.9484570108366796e-08, + "loss": 4.0841, + "step": 268000 + }, + { + "epoch": 2.95, + "learning_rate": 2.9539578634688378e-08, + "loss": 4.0762, + "step": 268500 + }, + { + "epoch": 2.96, + "learning_rate": 2.9594587161009954e-08, + "loss": 4.0742, + "step": 269000 + }, + { + "epoch": 2.96, + "learning_rate": 2.9649595687331536e-08, + "loss": 4.0795, + "step": 269500 + }, + { + "epoch": 2.97, + "learning_rate": 2.970460421365312e-08, + "loss": 4.0605, + "step": 270000 + }, + { + "epoch": 2.98, + "learning_rate": 2.9759612739974695e-08, + "loss": 4.1081, + "step": 270500 + }, + { + "epoch": 2.98, + "learning_rate": 2.9814621266296274e-08, + "loss": 4.0718, + "step": 271000 + }, + { + "epoch": 2.99, + "learning_rate": 2.9869629792617856e-08, + "loss": 4.0627, + "step": 271500 + }, + { + "epoch": 2.99, + "learning_rate": 2.992463831893943e-08, + "loss": 4.092, + "step": 272000 + }, + { + "epoch": 3.0, + "learning_rate": 2.9979646845261015e-08, + "loss": 4.0619, + "step": 272500 + }, + { + "epoch": 3.0, + "eval_loss": 4.056740760803223, + "eval_runtime": 6.1366, + "eval_samples_per_second": 253.236, + "step": 272685 + }, + { + "epoch": 3.0, + "learning_rate": 3.00346553715826e-08, + "loss": 4.0703, + "step": 273000 + }, + { + "epoch": 3.01, + "learning_rate": 3.008966389790417e-08, + "loss": 4.0576, + "step": 273500 + }, + { + "epoch": 3.01, + "learning_rate": 3.0144672424225755e-08, + "loss": 4.0756, + "step": 274000 + }, + { + "epoch": 3.02, + "learning_rate": 3.019968095054734e-08, + "loss": 4.0657, + "step": 274500 + }, + { + "epoch": 3.03, + "learning_rate": 3.0254689476868914e-08, + "loss": 4.0519, + "step": 275000 + }, + { + "epoch": 3.03, + "learning_rate": 3.030969800319049e-08, + "loss": 4.0658, + "step": 275500 + }, + { + "epoch": 3.04, + "learning_rate": 3.036470652951207e-08, + "loss": 4.0678, + "step": 276000 + }, + { + "epoch": 3.04, + "learning_rate": 3.041971505583365e-08, + "loss": 4.0844, + "step": 276500 + }, + { + "epoch": 3.05, + "learning_rate": 3.047472358215523e-08, + "loss": 4.0661, + "step": 277000 + }, + { + "epoch": 3.05, + "learning_rate": 3.052973210847681e-08, + "loss": 4.0723, + "step": 277500 + }, + { + "epoch": 3.06, + "learning_rate": 3.0584740634798395e-08, + "loss": 4.0739, + "step": 278000 + }, + { + "epoch": 3.06, + "learning_rate": 3.063974916111997e-08, + "loss": 4.0801, + "step": 278500 + }, + { + "epoch": 3.07, + "learning_rate": 3.0694757687441554e-08, + "loss": 4.0804, + "step": 279000 + }, + { + "epoch": 3.07, + "learning_rate": 3.0749766213763136e-08, + "loss": 4.0707, + "step": 279500 + }, + { + "epoch": 3.08, + "learning_rate": 3.080477474008471e-08, + "loss": 4.0445, + "step": 280000 + }, + { + "epoch": 3.09, + "learning_rate": 3.0859783266406295e-08, + "loss": 4.046, + "step": 280500 + }, + { + "epoch": 3.09, + "learning_rate": 3.091479179272787e-08, + "loss": 4.0697, + "step": 281000 + }, + { + "epoch": 3.1, + "learning_rate": 3.0969800319049446e-08, + "loss": 4.0503, + "step": 281500 + }, + { + "epoch": 3.1, + "learning_rate": 3.102480884537103e-08, + "loss": 4.038, + "step": 282000 + }, + { + "epoch": 3.11, + "learning_rate": 3.107981737169261e-08, + "loss": 4.0421, + "step": 282500 + }, + { + "epoch": 3.11, + "learning_rate": 3.113482589801419e-08, + "loss": 4.0489, + "step": 283000 + }, + { + "epoch": 3.12, + "learning_rate": 3.118983442433577e-08, + "loss": 4.0613, + "step": 283500 + }, + { + "epoch": 3.12, + "learning_rate": 3.124484295065735e-08, + "loss": 4.0681, + "step": 284000 + }, + { + "epoch": 3.13, + "learning_rate": 3.129985147697893e-08, + "loss": 4.0751, + "step": 284500 + }, + { + "epoch": 3.14, + "learning_rate": 3.135486000330051e-08, + "loss": 4.0464, + "step": 285000 + }, + { + "epoch": 3.14, + "learning_rate": 3.140986852962209e-08, + "loss": 4.0619, + "step": 285500 + }, + { + "epoch": 3.15, + "learning_rate": 3.146487705594367e-08, + "loss": 4.065, + "step": 286000 + }, + { + "epoch": 3.15, + "learning_rate": 3.151988558226525e-08, + "loss": 4.0525, + "step": 286500 + }, + { + "epoch": 3.16, + "learning_rate": 3.157489410858683e-08, + "loss": 4.0533, + "step": 287000 + }, + { + "epoch": 3.16, + "learning_rate": 3.162990263490841e-08, + "loss": 4.0558, + "step": 287500 + }, + { + "epoch": 3.17, + "learning_rate": 3.1684911161229985e-08, + "loss": 4.0897, + "step": 288000 + }, + { + "epoch": 3.17, + "learning_rate": 3.173991968755157e-08, + "loss": 4.0699, + "step": 288500 + }, + { + "epoch": 3.18, + "learning_rate": 3.179492821387315e-08, + "loss": 4.0454, + "step": 289000 + }, + { + "epoch": 3.18, + "learning_rate": 3.1849936740194726e-08, + "loss": 4.073, + "step": 289500 + }, + { + "epoch": 3.19, + "learning_rate": 3.190494526651631e-08, + "loss": 4.0347, + "step": 290000 + }, + { + "epoch": 3.2, + "learning_rate": 3.195995379283789e-08, + "loss": 4.0387, + "step": 290500 + }, + { + "epoch": 3.2, + "learning_rate": 3.201496231915947e-08, + "loss": 4.0866, + "step": 291000 + }, + { + "epoch": 3.21, + "learning_rate": 3.206997084548105e-08, + "loss": 4.0629, + "step": 291500 + }, + { + "epoch": 3.21, + "learning_rate": 3.212497937180263e-08, + "loss": 4.0451, + "step": 292000 + }, + { + "epoch": 3.22, + "learning_rate": 3.217998789812421e-08, + "loss": 4.0698, + "step": 292500 + }, + { + "epoch": 3.22, + "learning_rate": 3.223499642444579e-08, + "loss": 4.06, + "step": 293000 + }, + { + "epoch": 3.23, + "learning_rate": 3.2290004950767366e-08, + "loss": 4.0609, + "step": 293500 + }, + { + "epoch": 3.23, + "learning_rate": 3.234501347708894e-08, + "loss": 4.06, + "step": 294000 + }, + { + "epoch": 3.24, + "learning_rate": 3.2400022003410525e-08, + "loss": 4.0346, + "step": 294500 + }, + { + "epoch": 3.25, + "learning_rate": 3.245503052973211e-08, + "loss": 4.0518, + "step": 295000 + }, + { + "epoch": 3.25, + "learning_rate": 3.251003905605368e-08, + "loss": 4.0434, + "step": 295500 + }, + { + "epoch": 3.26, + "learning_rate": 3.2565047582375265e-08, + "loss": 4.0486, + "step": 296000 + }, + { + "epoch": 3.26, + "learning_rate": 3.262005610869685e-08, + "loss": 4.0567, + "step": 296500 + }, + { + "epoch": 3.27, + "learning_rate": 3.267506463501843e-08, + "loss": 4.0553, + "step": 297000 + }, + { + "epoch": 3.27, + "learning_rate": 3.2730073161340006e-08, + "loss": 4.0523, + "step": 297500 + }, + { + "epoch": 3.28, + "learning_rate": 3.278508168766159e-08, + "loss": 4.041, + "step": 298000 + }, + { + "epoch": 3.28, + "learning_rate": 3.284009021398317e-08, + "loss": 4.0354, + "step": 298500 + }, + { + "epoch": 3.29, + "learning_rate": 3.289509874030475e-08, + "loss": 4.0617, + "step": 299000 + }, + { + "epoch": 3.3, + "learning_rate": 3.295010726662632e-08, + "loss": 4.0707, + "step": 299500 + }, + { + "epoch": 3.3, + "learning_rate": 3.3005115792947905e-08, + "loss": 4.0604, + "step": 300000 + }, + { + "epoch": 3.31, + "learning_rate": 3.306012431926948e-08, + "loss": 4.0456, + "step": 300500 + }, + { + "epoch": 3.31, + "learning_rate": 3.3115132845591064e-08, + "loss": 4.0447, + "step": 301000 + }, + { + "epoch": 3.32, + "learning_rate": 3.3170141371912646e-08, + "loss": 4.0557, + "step": 301500 + }, + { + "epoch": 3.32, + "learning_rate": 3.322514989823422e-08, + "loss": 4.0457, + "step": 302000 + }, + { + "epoch": 3.33, + "learning_rate": 3.3280158424555805e-08, + "loss": 4.0368, + "step": 302500 + }, + { + "epoch": 3.33, + "learning_rate": 3.333516695087739e-08, + "loss": 4.0553, + "step": 303000 + }, + { + "epoch": 3.34, + "learning_rate": 3.339017547719896e-08, + "loss": 4.0562, + "step": 303500 + }, + { + "epoch": 3.34, + "learning_rate": 3.3445184003520545e-08, + "loss": 4.0501, + "step": 304000 + }, + { + "epoch": 3.35, + "learning_rate": 3.350019252984213e-08, + "loss": 4.0603, + "step": 304500 + }, + { + "epoch": 3.36, + "learning_rate": 3.3555201056163704e-08, + "loss": 4.0607, + "step": 305000 + }, + { + "epoch": 3.36, + "learning_rate": 3.361020958248528e-08, + "loss": 4.0504, + "step": 305500 + }, + { + "epoch": 3.37, + "learning_rate": 3.366521810880686e-08, + "loss": 4.0367, + "step": 306000 + }, + { + "epoch": 3.37, + "learning_rate": 3.3720226635128444e-08, + "loss": 4.0498, + "step": 306500 + }, + { + "epoch": 3.38, + "learning_rate": 3.377523516145002e-08, + "loss": 4.0457, + "step": 307000 + }, + { + "epoch": 3.38, + "learning_rate": 3.38302436877716e-08, + "loss": 4.0459, + "step": 307500 + }, + { + "epoch": 3.39, + "learning_rate": 3.3885252214093185e-08, + "loss": 4.0495, + "step": 308000 + }, + { + "epoch": 3.39, + "learning_rate": 3.394026074041476e-08, + "loss": 4.0432, + "step": 308500 + }, + { + "epoch": 3.4, + "learning_rate": 3.3995269266736344e-08, + "loss": 4.0577, + "step": 309000 + }, + { + "epoch": 3.41, + "learning_rate": 3.4050277793057926e-08, + "loss": 4.0555, + "step": 309500 + }, + { + "epoch": 3.41, + "learning_rate": 3.41052863193795e-08, + "loss": 4.0387, + "step": 310000 + }, + { + "epoch": 3.42, + "learning_rate": 3.4160294845701084e-08, + "loss": 4.0537, + "step": 310500 + }, + { + "epoch": 3.42, + "learning_rate": 3.421530337202266e-08, + "loss": 4.0493, + "step": 311000 + }, + { + "epoch": 3.43, + "learning_rate": 3.427031189834424e-08, + "loss": 4.04, + "step": 311500 + }, + { + "epoch": 3.43, + "learning_rate": 3.432532042466582e-08, + "loss": 4.0484, + "step": 312000 + }, + { + "epoch": 3.44, + "learning_rate": 3.43803289509874e-08, + "loss": 4.0613, + "step": 312500 + }, + { + "epoch": 3.44, + "learning_rate": 3.443533747730898e-08, + "loss": 4.0247, + "step": 313000 + }, + { + "epoch": 3.45, + "learning_rate": 3.449034600363056e-08, + "loss": 4.017, + "step": 313500 + }, + { + "epoch": 3.45, + "learning_rate": 3.454535452995214e-08, + "loss": 4.044, + "step": 314000 + }, + { + "epoch": 3.46, + "learning_rate": 3.460036305627372e-08, + "loss": 4.0464, + "step": 314500 + }, + { + "epoch": 3.47, + "learning_rate": 3.46553715825953e-08, + "loss": 4.0469, + "step": 315000 + }, + { + "epoch": 3.47, + "learning_rate": 3.471038010891688e-08, + "loss": 4.0392, + "step": 315500 + }, + { + "epoch": 3.48, + "learning_rate": 3.4765388635238465e-08, + "loss": 4.0456, + "step": 316000 + }, + { + "epoch": 3.48, + "learning_rate": 3.482039716156004e-08, + "loss": 4.0338, + "step": 316500 + }, + { + "epoch": 3.49, + "learning_rate": 3.4875405687881624e-08, + "loss": 4.032, + "step": 317000 + }, + { + "epoch": 3.49, + "learning_rate": 3.49304142142032e-08, + "loss": 4.0359, + "step": 317500 + }, + { + "epoch": 3.5, + "learning_rate": 3.4985422740524775e-08, + "loss": 4.0514, + "step": 318000 + }, + { + "epoch": 3.5, + "learning_rate": 3.504043126684636e-08, + "loss": 4.0503, + "step": 318500 + }, + { + "epoch": 3.51, + "learning_rate": 3.509543979316794e-08, + "loss": 4.0402, + "step": 319000 + }, + { + "epoch": 3.52, + "learning_rate": 3.5150448319489516e-08, + "loss": 4.0275, + "step": 319500 + }, + { + "epoch": 3.52, + "learning_rate": 3.52054568458111e-08, + "loss": 4.06, + "step": 320000 + }, + { + "epoch": 3.53, + "learning_rate": 3.526046537213268e-08, + "loss": 4.0523, + "step": 320500 + }, + { + "epoch": 3.53, + "learning_rate": 3.531547389845426e-08, + "loss": 4.0402, + "step": 321000 + }, + { + "epoch": 3.54, + "learning_rate": 3.537048242477584e-08, + "loss": 4.0494, + "step": 321500 + }, + { + "epoch": 3.54, + "learning_rate": 3.542549095109742e-08, + "loss": 4.0467, + "step": 322000 + }, + { + "epoch": 3.55, + "learning_rate": 3.5480499477419e-08, + "loss": 4.0423, + "step": 322500 + }, + { + "epoch": 3.55, + "learning_rate": 3.553550800374058e-08, + "loss": 4.0594, + "step": 323000 + }, + { + "epoch": 3.56, + "learning_rate": 3.5590516530062156e-08, + "loss": 4.031, + "step": 323500 + }, + { + "epoch": 3.56, + "learning_rate": 3.564552505638374e-08, + "loss": 4.0452, + "step": 324000 + }, + { + "epoch": 3.57, + "learning_rate": 3.5700533582705314e-08, + "loss": 4.0303, + "step": 324500 + }, + { + "epoch": 3.58, + "learning_rate": 3.57555421090269e-08, + "loss": 4.0428, + "step": 325000 + }, + { + "epoch": 3.58, + "learning_rate": 3.581055063534848e-08, + "loss": 4.0235, + "step": 325500 + }, + { + "epoch": 3.59, + "learning_rate": 3.5865559161670055e-08, + "loss": 4.0525, + "step": 326000 + }, + { + "epoch": 3.59, + "learning_rate": 3.592056768799164e-08, + "loss": 4.0301, + "step": 326500 + }, + { + "epoch": 3.6, + "learning_rate": 3.597557621431322e-08, + "loss": 4.0356, + "step": 327000 + }, + { + "epoch": 3.6, + "learning_rate": 3.6030584740634796e-08, + "loss": 4.0469, + "step": 327500 + }, + { + "epoch": 3.61, + "learning_rate": 3.608559326695638e-08, + "loss": 4.0563, + "step": 328000 + }, + { + "epoch": 3.61, + "learning_rate": 3.614060179327796e-08, + "loss": 4.0312, + "step": 328500 + }, + { + "epoch": 3.62, + "learning_rate": 3.619561031959954e-08, + "loss": 4.0429, + "step": 329000 + }, + { + "epoch": 3.63, + "learning_rate": 3.625061884592111e-08, + "loss": 4.0158, + "step": 329500 + }, + { + "epoch": 3.63, + "learning_rate": 3.6305627372242695e-08, + "loss": 4.0303, + "step": 330000 + }, + { + "epoch": 3.64, + "learning_rate": 3.636063589856427e-08, + "loss": 4.026, + "step": 330500 + }, + { + "epoch": 3.64, + "learning_rate": 3.6415644424885854e-08, + "loss": 4.0521, + "step": 331000 + }, + { + "epoch": 3.65, + "learning_rate": 3.6470652951207436e-08, + "loss": 4.0328, + "step": 331500 + }, + { + "epoch": 3.65, + "learning_rate": 3.652566147752901e-08, + "loss": 4.0433, + "step": 332000 + }, + { + "epoch": 3.66, + "learning_rate": 3.6580670003850594e-08, + "loss": 4.0237, + "step": 332500 + }, + { + "epoch": 3.66, + "learning_rate": 3.663567853017218e-08, + "loss": 4.0241, + "step": 333000 + }, + { + "epoch": 3.67, + "learning_rate": 3.669068705649376e-08, + "loss": 4.0381, + "step": 333500 + }, + { + "epoch": 3.67, + "learning_rate": 3.6745695582815335e-08, + "loss": 4.0307, + "step": 334000 + }, + { + "epoch": 3.68, + "learning_rate": 3.680070410913692e-08, + "loss": 4.0286, + "step": 334500 + }, + { + "epoch": 3.69, + "learning_rate": 3.6855712635458494e-08, + "loss": 4.0291, + "step": 335000 + }, + { + "epoch": 3.69, + "learning_rate": 3.6910721161780076e-08, + "loss": 4.0137, + "step": 335500 + }, + { + "epoch": 3.7, + "learning_rate": 3.696572968810165e-08, + "loss": 4.0364, + "step": 336000 + }, + { + "epoch": 3.7, + "learning_rate": 3.7020738214423234e-08, + "loss": 4.0147, + "step": 336500 + }, + { + "epoch": 3.71, + "learning_rate": 3.707574674074481e-08, + "loss": 4.0135, + "step": 337000 + }, + { + "epoch": 3.71, + "learning_rate": 3.713075526706639e-08, + "loss": 4.0234, + "step": 337500 + }, + { + "epoch": 3.72, + "learning_rate": 3.7185763793387975e-08, + "loss": 4.0387, + "step": 338000 + }, + { + "epoch": 3.72, + "learning_rate": 3.724077231970955e-08, + "loss": 4.0183, + "step": 338500 + }, + { + "epoch": 3.73, + "learning_rate": 3.7295780846031134e-08, + "loss": 4.0375, + "step": 339000 + }, + { + "epoch": 3.74, + "learning_rate": 3.7350789372352716e-08, + "loss": 4.0222, + "step": 339500 + }, + { + "epoch": 3.74, + "learning_rate": 3.740579789867429e-08, + "loss": 4.0139, + "step": 340000 + }, + { + "epoch": 3.75, + "learning_rate": 3.7460806424995874e-08, + "loss": 4.0278, + "step": 340500 + }, + { + "epoch": 3.75, + "learning_rate": 3.751581495131746e-08, + "loss": 4.0418, + "step": 341000 + }, + { + "epoch": 3.76, + "learning_rate": 3.757082347763903e-08, + "loss": 4.016, + "step": 341500 + }, + { + "epoch": 3.76, + "learning_rate": 3.762583200396061e-08, + "loss": 4.0273, + "step": 342000 + }, + { + "epoch": 3.77, + "learning_rate": 3.768084053028219e-08, + "loss": 4.0246, + "step": 342500 + }, + { + "epoch": 3.77, + "learning_rate": 3.7735849056603774e-08, + "loss": 4.0259, + "step": 343000 + }, + { + "epoch": 3.78, + "learning_rate": 3.779085758292535e-08, + "loss": 4.0245, + "step": 343500 + }, + { + "epoch": 3.78, + "learning_rate": 3.784586610924693e-08, + "loss": 4.0109, + "step": 344000 + }, + { + "epoch": 3.79, + "learning_rate": 3.7900874635568514e-08, + "loss": 4.0318, + "step": 344500 + }, + { + "epoch": 3.8, + "learning_rate": 3.795588316189009e-08, + "loss": 4.0287, + "step": 345000 + }, + { + "epoch": 3.8, + "learning_rate": 3.801089168821167e-08, + "loss": 4.0193, + "step": 345500 + }, + { + "epoch": 3.81, + "learning_rate": 3.8065900214533255e-08, + "loss": 4.0221, + "step": 346000 + }, + { + "epoch": 3.81, + "learning_rate": 3.812090874085483e-08, + "loss": 4.0096, + "step": 346500 + }, + { + "epoch": 3.82, + "learning_rate": 3.8175917267176413e-08, + "loss": 4.0385, + "step": 347000 + }, + { + "epoch": 3.82, + "learning_rate": 3.823092579349799e-08, + "loss": 4.0195, + "step": 347500 + }, + { + "epoch": 3.83, + "learning_rate": 3.8285934319819565e-08, + "loss": 4.0207, + "step": 348000 + }, + { + "epoch": 3.83, + "learning_rate": 3.834094284614115e-08, + "loss": 4.0367, + "step": 348500 + }, + { + "epoch": 3.84, + "learning_rate": 3.839595137246273e-08, + "loss": 4.0268, + "step": 349000 + }, + { + "epoch": 3.85, + "learning_rate": 3.8450959898784306e-08, + "loss": 4.0501, + "step": 349500 + }, + { + "epoch": 3.85, + "learning_rate": 3.850596842510589e-08, + "loss": 4.0366, + "step": 350000 + }, + { + "epoch": 3.86, + "learning_rate": 3.856097695142747e-08, + "loss": 4.0161, + "step": 350500 + }, + { + "epoch": 3.86, + "learning_rate": 3.861598547774905e-08, + "loss": 4.0321, + "step": 351000 + }, + { + "epoch": 3.87, + "learning_rate": 3.867099400407063e-08, + "loss": 4.0502, + "step": 351500 + }, + { + "epoch": 3.87, + "learning_rate": 3.872600253039221e-08, + "loss": 4.0289, + "step": 352000 + }, + { + "epoch": 3.88, + "learning_rate": 3.8781011056713794e-08, + "loss": 4.0096, + "step": 352500 + }, + { + "epoch": 3.88, + "learning_rate": 3.883601958303537e-08, + "loss": 4.0201, + "step": 353000 + }, + { + "epoch": 3.89, + "learning_rate": 3.8891028109356946e-08, + "loss": 4.0336, + "step": 353500 + }, + { + "epoch": 3.89, + "learning_rate": 3.894603663567853e-08, + "loss": 4.0297, + "step": 354000 + }, + { + "epoch": 3.9, + "learning_rate": 3.9001045162000104e-08, + "loss": 4.0228, + "step": 354500 + }, + { + "epoch": 3.91, + "learning_rate": 3.905605368832169e-08, + "loss": 4.0394, + "step": 355000 + }, + { + "epoch": 3.91, + "learning_rate": 3.911106221464327e-08, + "loss": 4.0329, + "step": 355500 + }, + { + "epoch": 3.92, + "learning_rate": 3.9166070740964845e-08, + "loss": 4.0428, + "step": 356000 + }, + { + "epoch": 3.92, + "learning_rate": 3.922107926728643e-08, + "loss": 4.0207, + "step": 356500 + }, + { + "epoch": 3.93, + "learning_rate": 3.927608779360801e-08, + "loss": 4.0077, + "step": 357000 + }, + { + "epoch": 3.93, + "learning_rate": 3.9331096319929586e-08, + "loss": 4.0057, + "step": 357500 + }, + { + "epoch": 3.94, + "learning_rate": 3.938610484625117e-08, + "loss": 4.0271, + "step": 358000 + }, + { + "epoch": 3.94, + "learning_rate": 3.944111337257275e-08, + "loss": 4.0287, + "step": 358500 + }, + { + "epoch": 3.95, + "learning_rate": 3.949612189889433e-08, + "loss": 4.0055, + "step": 359000 + }, + { + "epoch": 3.96, + "learning_rate": 3.955113042521591e-08, + "loss": 4.008, + "step": 359500 + }, + { + "epoch": 3.96, + "learning_rate": 3.9606138951537485e-08, + "loss": 4.0103, + "step": 360000 + }, + { + "epoch": 3.97, + "learning_rate": 3.966114747785906e-08, + "loss": 4.031, + "step": 360500 + }, + { + "epoch": 3.97, + "learning_rate": 3.9716156004180643e-08, + "loss": 4.0047, + "step": 361000 + }, + { + "epoch": 3.98, + "learning_rate": 3.9771164530502226e-08, + "loss": 4.0167, + "step": 361500 + }, + { + "epoch": 3.98, + "learning_rate": 3.982617305682381e-08, + "loss": 4.0196, + "step": 362000 + }, + { + "epoch": 3.99, + "learning_rate": 3.9881181583145384e-08, + "loss": 4.0136, + "step": 362500 + }, + { + "epoch": 3.99, + "learning_rate": 3.993619010946697e-08, + "loss": 4.0117, + "step": 363000 + }, + { + "epoch": 4.0, + "learning_rate": 3.999119863578855e-08, + "loss": 4.0042, + "step": 363500 + }, + { + "epoch": 4.0, + "eval_loss": 4.0121588706970215, + "eval_runtime": 6.1397, + "eval_samples_per_second": 253.106, + "step": 363580 + }, + { + "epoch": 4.0, + "learning_rate": 4.0046207162110125e-08, + "loss": 4.0071, + "step": 364000 + }, + { + "epoch": 4.01, + "learning_rate": 4.010121568843171e-08, + "loss": 4.033, + "step": 364500 + }, + { + "epoch": 4.02, + "learning_rate": 4.015622421475329e-08, + "loss": 4.0074, + "step": 365000 + }, + { + "epoch": 4.02, + "learning_rate": 4.0211232741074866e-08, + "loss": 4.0232, + "step": 365500 + }, + { + "epoch": 4.03, + "learning_rate": 4.026624126739644e-08, + "loss": 4.0049, + "step": 366000 + }, + { + "epoch": 4.03, + "learning_rate": 4.0321249793718024e-08, + "loss": 4.0017, + "step": 366500 + }, + { + "epoch": 4.04, + "learning_rate": 4.03762583200396e-08, + "loss": 4.0143, + "step": 367000 + }, + { + "epoch": 4.04, + "learning_rate": 4.043126684636118e-08, + "loss": 4.0409, + "step": 367500 + }, + { + "epoch": 4.05, + "learning_rate": 4.0486275372682765e-08, + "loss": 4.0269, + "step": 368000 + }, + { + "epoch": 4.05, + "learning_rate": 4.054128389900434e-08, + "loss": 3.9749, + "step": 368500 + }, + { + "epoch": 4.06, + "learning_rate": 4.0596292425325923e-08, + "loss": 4.0078, + "step": 369000 + }, + { + "epoch": 4.07, + "learning_rate": 4.0651300951647506e-08, + "loss": 4.0135, + "step": 369500 + }, + { + "epoch": 4.07, + "learning_rate": 4.070630947796908e-08, + "loss": 4.0113, + "step": 370000 + }, + { + "epoch": 4.08, + "learning_rate": 4.0761318004290664e-08, + "loss": 4.0086, + "step": 370500 + }, + { + "epoch": 4.08, + "learning_rate": 4.081632653061225e-08, + "loss": 3.9849, + "step": 371000 + }, + { + "epoch": 4.09, + "learning_rate": 4.087133505693382e-08, + "loss": 4.0202, + "step": 371500 + }, + { + "epoch": 4.09, + "learning_rate": 4.09263435832554e-08, + "loss": 4.0197, + "step": 372000 + }, + { + "epoch": 4.1, + "learning_rate": 4.098135210957698e-08, + "loss": 4.0029, + "step": 372500 + }, + { + "epoch": 4.1, + "learning_rate": 4.1036360635898563e-08, + "loss": 3.9933, + "step": 373000 + }, + { + "epoch": 4.11, + "learning_rate": 4.109136916222014e-08, + "loss": 4.0088, + "step": 373500 + }, + { + "epoch": 4.11, + "learning_rate": 4.114637768854172e-08, + "loss": 4.0098, + "step": 374000 + }, + { + "epoch": 4.12, + "learning_rate": 4.1201386214863304e-08, + "loss": 4.0065, + "step": 374500 + }, + { + "epoch": 4.13, + "learning_rate": 4.125639474118488e-08, + "loss": 4.0075, + "step": 375000 + }, + { + "epoch": 4.13, + "learning_rate": 4.131140326750646e-08, + "loss": 4.0129, + "step": 375500 + }, + { + "epoch": 4.14, + "learning_rate": 4.1366411793828045e-08, + "loss": 4.0109, + "step": 376000 + }, + { + "epoch": 4.14, + "learning_rate": 4.142142032014962e-08, + "loss": 4.0215, + "step": 376500 + }, + { + "epoch": 4.15, + "learning_rate": 4.1476428846471203e-08, + "loss": 4.0045, + "step": 377000 + }, + { + "epoch": 4.15, + "learning_rate": 4.153143737279278e-08, + "loss": 4.0102, + "step": 377500 + }, + { + "epoch": 4.16, + "learning_rate": 4.158644589911436e-08, + "loss": 4.0199, + "step": 378000 + }, + { + "epoch": 4.16, + "learning_rate": 4.164145442543594e-08, + "loss": 3.9962, + "step": 378500 + }, + { + "epoch": 4.17, + "learning_rate": 4.169646295175752e-08, + "loss": 4.002, + "step": 379000 + }, + { + "epoch": 4.18, + "learning_rate": 4.1751471478079096e-08, + "loss": 3.9867, + "step": 379500 + }, + { + "epoch": 4.18, + "learning_rate": 4.180648000440068e-08, + "loss": 3.9963, + "step": 380000 + }, + { + "epoch": 4.19, + "learning_rate": 4.186148853072226e-08, + "loss": 4.0038, + "step": 380500 + }, + { + "epoch": 4.19, + "learning_rate": 4.1916497057043843e-08, + "loss": 3.9885, + "step": 381000 + }, + { + "epoch": 4.2, + "learning_rate": 4.197150558336542e-08, + "loss": 4.0027, + "step": 381500 + }, + { + "epoch": 4.2, + "learning_rate": 4.2026514109687e-08, + "loss": 4.0353, + "step": 382000 + }, + { + "epoch": 4.21, + "learning_rate": 4.2081522636008584e-08, + "loss": 4.0029, + "step": 382500 + }, + { + "epoch": 4.21, + "learning_rate": 4.213653116233016e-08, + "loss": 4.005, + "step": 383000 + }, + { + "epoch": 4.22, + "learning_rate": 4.219153968865174e-08, + "loss": 4.0055, + "step": 383500 + }, + { + "epoch": 4.22, + "learning_rate": 4.224654821497332e-08, + "loss": 3.9999, + "step": 384000 + }, + { + "epoch": 4.23, + "learning_rate": 4.2301556741294894e-08, + "loss": 3.9951, + "step": 384500 + }, + { + "epoch": 4.24, + "learning_rate": 4.2356565267616477e-08, + "loss": 4.0008, + "step": 385000 + }, + { + "epoch": 4.24, + "learning_rate": 4.241157379393806e-08, + "loss": 3.9898, + "step": 385500 + }, + { + "epoch": 4.25, + "learning_rate": 4.2466582320259635e-08, + "loss": 4.002, + "step": 386000 + }, + { + "epoch": 4.25, + "learning_rate": 4.252159084658122e-08, + "loss": 4.0249, + "step": 386500 + }, + { + "epoch": 4.26, + "learning_rate": 4.25765993729028e-08, + "loss": 4.0081, + "step": 387000 + }, + { + "epoch": 4.26, + "learning_rate": 4.2631607899224376e-08, + "loss": 4.0123, + "step": 387500 + }, + { + "epoch": 4.27, + "learning_rate": 4.268661642554596e-08, + "loss": 4.0046, + "step": 388000 + }, + { + "epoch": 4.27, + "learning_rate": 4.274162495186754e-08, + "loss": 4.0146, + "step": 388500 + }, + { + "epoch": 4.28, + "learning_rate": 4.2796633478189117e-08, + "loss": 4.0301, + "step": 389000 + }, + { + "epoch": 4.29, + "learning_rate": 4.28516420045107e-08, + "loss": 3.9999, + "step": 389500 + }, + { + "epoch": 4.29, + "learning_rate": 4.2906650530832275e-08, + "loss": 4.0287, + "step": 390000 + }, + { + "epoch": 4.3, + "learning_rate": 4.296165905715386e-08, + "loss": 3.9945, + "step": 390500 + }, + { + "epoch": 4.3, + "learning_rate": 4.3016667583475433e-08, + "loss": 3.9994, + "step": 391000 + }, + { + "epoch": 4.31, + "learning_rate": 4.3071676109797016e-08, + "loss": 4.0263, + "step": 391500 + }, + { + "epoch": 4.31, + "learning_rate": 4.31266846361186e-08, + "loss": 3.9797, + "step": 392000 + }, + { + "epoch": 4.32, + "learning_rate": 4.3181693162440174e-08, + "loss": 4.0001, + "step": 392500 + }, + { + "epoch": 4.32, + "learning_rate": 4.3236701688761757e-08, + "loss": 4.011, + "step": 393000 + }, + { + "epoch": 4.33, + "learning_rate": 4.329171021508334e-08, + "loss": 4.0018, + "step": 393500 + }, + { + "epoch": 4.33, + "learning_rate": 4.3346718741404915e-08, + "loss": 3.9987, + "step": 394000 + }, + { + "epoch": 4.34, + "learning_rate": 4.34017272677265e-08, + "loss": 3.989, + "step": 394500 + }, + { + "epoch": 4.35, + "learning_rate": 4.345673579404808e-08, + "loss": 4.0141, + "step": 395000 + }, + { + "epoch": 4.35, + "learning_rate": 4.3511744320369656e-08, + "loss": 3.9949, + "step": 395500 + }, + { + "epoch": 4.36, + "learning_rate": 4.356675284669123e-08, + "loss": 3.9979, + "step": 396000 + }, + { + "epoch": 4.36, + "learning_rate": 4.3621761373012814e-08, + "loss": 3.9793, + "step": 396500 + }, + { + "epoch": 4.37, + "learning_rate": 4.367676989933439e-08, + "loss": 4.0263, + "step": 397000 + }, + { + "epoch": 4.37, + "learning_rate": 4.373177842565597e-08, + "loss": 4.0045, + "step": 397500 + }, + { + "epoch": 4.38, + "learning_rate": 4.3786786951977555e-08, + "loss": 4.0005, + "step": 398000 + }, + { + "epoch": 4.38, + "learning_rate": 4.384179547829913e-08, + "loss": 4.0029, + "step": 398500 + }, + { + "epoch": 4.39, + "learning_rate": 4.389680400462071e-08, + "loss": 3.9951, + "step": 399000 + }, + { + "epoch": 4.4, + "learning_rate": 4.3951812530942296e-08, + "loss": 4.0049, + "step": 399500 + }, + { + "epoch": 4.4, + "learning_rate": 4.400682105726388e-08, + "loss": 3.9915, + "step": 400000 + }, + { + "epoch": 4.41, + "learning_rate": 4.4061829583585454e-08, + "loss": 4.0023, + "step": 400500 + }, + { + "epoch": 4.41, + "learning_rate": 4.4116838109907037e-08, + "loss": 4.003, + "step": 401000 + }, + { + "epoch": 4.42, + "learning_rate": 4.417184663622861e-08, + "loss": 3.9966, + "step": 401500 + }, + { + "epoch": 4.42, + "learning_rate": 4.4226855162550195e-08, + "loss": 4.0086, + "step": 402000 + }, + { + "epoch": 4.43, + "learning_rate": 4.428186368887177e-08, + "loss": 4.0102, + "step": 402500 + }, + { + "epoch": 4.43, + "learning_rate": 4.433687221519335e-08, + "loss": 4.0021, + "step": 403000 + }, + { + "epoch": 4.44, + "learning_rate": 4.439188074151493e-08, + "loss": 3.9932, + "step": 403500 + }, + { + "epoch": 4.44, + "learning_rate": 4.444688926783651e-08, + "loss": 3.9922, + "step": 404000 + }, + { + "epoch": 4.45, + "learning_rate": 4.4501897794158094e-08, + "loss": 3.9927, + "step": 404500 + }, + { + "epoch": 4.46, + "learning_rate": 4.455690632047967e-08, + "loss": 4.008, + "step": 405000 + }, + { + "epoch": 4.46, + "learning_rate": 4.461191484680125e-08, + "loss": 3.9971, + "step": 405500 + }, + { + "epoch": 4.47, + "learning_rate": 4.4666923373122835e-08, + "loss": 4.0077, + "step": 406000 + }, + { + "epoch": 4.47, + "learning_rate": 4.472193189944441e-08, + "loss": 3.9928, + "step": 406500 + }, + { + "epoch": 4.48, + "learning_rate": 4.477694042576599e-08, + "loss": 3.9987, + "step": 407000 + }, + { + "epoch": 4.48, + "learning_rate": 4.4831948952087576e-08, + "loss": 3.9989, + "step": 407500 + }, + { + "epoch": 4.49, + "learning_rate": 4.488695747840915e-08, + "loss": 4.0057, + "step": 408000 + }, + { + "epoch": 4.49, + "learning_rate": 4.494196600473073e-08, + "loss": 3.9884, + "step": 408500 + }, + { + "epoch": 4.5, + "learning_rate": 4.499697453105231e-08, + "loss": 4.0138, + "step": 409000 + }, + { + "epoch": 4.51, + "learning_rate": 4.505198305737389e-08, + "loss": 4.0081, + "step": 409500 + }, + { + "epoch": 4.51, + "learning_rate": 4.510699158369547e-08, + "loss": 3.9936, + "step": 410000 + }, + { + "epoch": 4.52, + "learning_rate": 4.516200011001705e-08, + "loss": 4.0194, + "step": 410500 + }, + { + "epoch": 4.52, + "learning_rate": 4.521700863633863e-08, + "loss": 3.9893, + "step": 411000 + }, + { + "epoch": 4.53, + "learning_rate": 4.527201716266021e-08, + "loss": 3.9932, + "step": 411500 + }, + { + "epoch": 4.53, + "learning_rate": 4.532702568898179e-08, + "loss": 3.9974, + "step": 412000 + }, + { + "epoch": 4.54, + "learning_rate": 4.5382034215303374e-08, + "loss": 3.9908, + "step": 412500 + }, + { + "epoch": 4.54, + "learning_rate": 4.543704274162495e-08, + "loss": 3.9843, + "step": 413000 + }, + { + "epoch": 4.55, + "learning_rate": 4.549205126794653e-08, + "loss": 4.0037, + "step": 413500 + }, + { + "epoch": 4.55, + "learning_rate": 4.554705979426811e-08, + "loss": 3.9912, + "step": 414000 + }, + { + "epoch": 4.56, + "learning_rate": 4.5602068320589684e-08, + "loss": 4.0016, + "step": 414500 + }, + { + "epoch": 4.57, + "learning_rate": 4.5657076846911267e-08, + "loss": 3.9823, + "step": 415000 + }, + { + "epoch": 4.57, + "learning_rate": 4.571208537323285e-08, + "loss": 3.9904, + "step": 415500 + }, + { + "epoch": 4.58, + "learning_rate": 4.5767093899554425e-08, + "loss": 3.9957, + "step": 416000 + }, + { + "epoch": 4.58, + "learning_rate": 4.582210242587601e-08, + "loss": 3.9918, + "step": 416500 + }, + { + "epoch": 4.59, + "learning_rate": 4.587711095219759e-08, + "loss": 3.981, + "step": 417000 + }, + { + "epoch": 4.59, + "learning_rate": 4.5932119478519166e-08, + "loss": 3.988, + "step": 417500 + }, + { + "epoch": 4.6, + "learning_rate": 4.598712800484075e-08, + "loss": 3.9751, + "step": 418000 + }, + { + "epoch": 4.6, + "learning_rate": 4.604213653116233e-08, + "loss": 3.9723, + "step": 418500 + }, + { + "epoch": 4.61, + "learning_rate": 4.609714505748391e-08, + "loss": 3.9709, + "step": 419000 + }, + { + "epoch": 4.62, + "learning_rate": 4.615215358380549e-08, + "loss": 3.9787, + "step": 419500 + }, + { + "epoch": 4.62, + "learning_rate": 4.6207162110127065e-08, + "loss": 3.988, + "step": 420000 + }, + { + "epoch": 4.63, + "learning_rate": 4.626217063644865e-08, + "loss": 3.9831, + "step": 420500 + }, + { + "epoch": 4.63, + "learning_rate": 4.631717916277022e-08, + "loss": 3.99, + "step": 421000 + }, + { + "epoch": 4.64, + "learning_rate": 4.6372187689091806e-08, + "loss": 3.9824, + "step": 421500 + }, + { + "epoch": 4.64, + "learning_rate": 4.642719621541339e-08, + "loss": 3.9747, + "step": 422000 + }, + { + "epoch": 4.65, + "learning_rate": 4.6482204741734964e-08, + "loss": 3.9933, + "step": 422500 + }, + { + "epoch": 4.65, + "learning_rate": 4.6537213268056547e-08, + "loss": 4.0043, + "step": 423000 + }, + { + "epoch": 4.66, + "learning_rate": 4.659222179437813e-08, + "loss": 3.9815, + "step": 423500 + }, + { + "epoch": 4.66, + "learning_rate": 4.6647230320699705e-08, + "loss": 3.9837, + "step": 424000 + }, + { + "epoch": 4.67, + "learning_rate": 4.670223884702129e-08, + "loss": 3.9899, + "step": 424500 + }, + { + "epoch": 4.68, + "learning_rate": 4.675724737334287e-08, + "loss": 4.0, + "step": 425000 + }, + { + "epoch": 4.68, + "learning_rate": 4.6812255899664446e-08, + "loss": 3.9825, + "step": 425500 + }, + { + "epoch": 4.69, + "learning_rate": 4.686726442598603e-08, + "loss": 3.9884, + "step": 426000 + }, + { + "epoch": 4.69, + "learning_rate": 4.6922272952307604e-08, + "loss": 3.9689, + "step": 426500 + }, + { + "epoch": 4.7, + "learning_rate": 4.697728147862918e-08, + "loss": 3.9748, + "step": 427000 + }, + { + "epoch": 4.7, + "learning_rate": 4.703229000495076e-08, + "loss": 3.9784, + "step": 427500 + }, + { + "epoch": 4.71, + "learning_rate": 4.7087298531272345e-08, + "loss": 3.9905, + "step": 428000 + }, + { + "epoch": 4.71, + "learning_rate": 4.714230705759393e-08, + "loss": 3.974, + "step": 428500 + }, + { + "epoch": 4.72, + "learning_rate": 4.71973155839155e-08, + "loss": 3.9935, + "step": 429000 + }, + { + "epoch": 4.73, + "learning_rate": 4.7252324110237086e-08, + "loss": 3.9933, + "step": 429500 + }, + { + "epoch": 4.73, + "learning_rate": 4.730733263655867e-08, + "loss": 3.973, + "step": 430000 + }, + { + "epoch": 4.74, + "learning_rate": 4.7362341162880244e-08, + "loss": 3.9661, + "step": 430500 + }, + { + "epoch": 4.74, + "learning_rate": 4.7417349689201826e-08, + "loss": 3.9842, + "step": 431000 + }, + { + "epoch": 4.75, + "learning_rate": 4.747235821552341e-08, + "loss": 3.9886, + "step": 431500 + }, + { + "epoch": 4.75, + "learning_rate": 4.7527366741844985e-08, + "loss": 3.9817, + "step": 432000 + }, + { + "epoch": 4.76, + "learning_rate": 4.758237526816656e-08, + "loss": 3.9832, + "step": 432500 + }, + { + "epoch": 4.76, + "learning_rate": 4.763738379448814e-08, + "loss": 3.9892, + "step": 433000 + }, + { + "epoch": 4.77, + "learning_rate": 4.769239232080972e-08, + "loss": 3.9949, + "step": 433500 + }, + { + "epoch": 4.77, + "learning_rate": 4.77474008471313e-08, + "loss": 3.9738, + "step": 434000 + }, + { + "epoch": 4.78, + "learning_rate": 4.7802409373452884e-08, + "loss": 3.9813, + "step": 434500 + }, + { + "epoch": 4.79, + "learning_rate": 4.785741789977446e-08, + "loss": 3.9875, + "step": 435000 + }, + { + "epoch": 4.79, + "learning_rate": 4.791242642609604e-08, + "loss": 4.0019, + "step": 435500 + }, + { + "epoch": 4.8, + "learning_rate": 4.7967434952417625e-08, + "loss": 3.9713, + "step": 436000 + }, + { + "epoch": 4.8, + "learning_rate": 4.80224434787392e-08, + "loss": 3.9735, + "step": 436500 + }, + { + "epoch": 4.81, + "learning_rate": 4.807745200506078e-08, + "loss": 3.995, + "step": 437000 + }, + { + "epoch": 4.81, + "learning_rate": 4.8132460531382366e-08, + "loss": 3.9831, + "step": 437500 + }, + { + "epoch": 4.82, + "learning_rate": 4.818746905770394e-08, + "loss": 3.9909, + "step": 438000 + }, + { + "epoch": 4.82, + "learning_rate": 4.824247758402552e-08, + "loss": 3.9871, + "step": 438500 + }, + { + "epoch": 4.83, + "learning_rate": 4.82974861103471e-08, + "loss": 3.9827, + "step": 439000 + }, + { + "epoch": 4.84, + "learning_rate": 4.835249463666868e-08, + "loss": 3.9868, + "step": 439500 + }, + { + "epoch": 4.84, + "learning_rate": 4.840750316299026e-08, + "loss": 3.9791, + "step": 440000 + }, + { + "epoch": 4.85, + "learning_rate": 4.846251168931184e-08, + "loss": 3.992, + "step": 440500 + }, + { + "epoch": 4.85, + "learning_rate": 4.851752021563342e-08, + "loss": 3.995, + "step": 441000 + }, + { + "epoch": 4.86, + "learning_rate": 4.8572528741955e-08, + "loss": 3.974, + "step": 441500 + }, + { + "epoch": 4.86, + "learning_rate": 4.862753726827658e-08, + "loss": 3.9685, + "step": 442000 + }, + { + "epoch": 4.87, + "learning_rate": 4.8682545794598164e-08, + "loss": 3.994, + "step": 442500 + }, + { + "epoch": 4.87, + "learning_rate": 4.873755432091974e-08, + "loss": 3.9945, + "step": 443000 + }, + { + "epoch": 4.88, + "learning_rate": 4.879256284724132e-08, + "loss": 3.9687, + "step": 443500 + }, + { + "epoch": 4.88, + "learning_rate": 4.88475713735629e-08, + "loss": 3.9684, + "step": 444000 + }, + { + "epoch": 4.89, + "learning_rate": 4.890257989988448e-08, + "loss": 3.9765, + "step": 444500 + }, + { + "epoch": 4.9, + "learning_rate": 4.8957588426206056e-08, + "loss": 3.971, + "step": 445000 + }, + { + "epoch": 4.9, + "learning_rate": 4.901259695252764e-08, + "loss": 3.9697, + "step": 445500 + }, + { + "epoch": 4.91, + "learning_rate": 4.9067605478849215e-08, + "loss": 3.9693, + "step": 446000 + }, + { + "epoch": 4.91, + "learning_rate": 4.91226140051708e-08, + "loss": 3.9733, + "step": 446500 + }, + { + "epoch": 4.92, + "learning_rate": 4.917762253149238e-08, + "loss": 3.9804, + "step": 447000 + }, + { + "epoch": 4.92, + "learning_rate": 4.923263105781396e-08, + "loss": 3.9777, + "step": 447500 + }, + { + "epoch": 4.93, + "learning_rate": 4.928763958413554e-08, + "loss": 3.979, + "step": 448000 + }, + { + "epoch": 4.93, + "learning_rate": 4.934264811045712e-08, + "loss": 3.9641, + "step": 448500 + }, + { + "epoch": 4.94, + "learning_rate": 4.93976566367787e-08, + "loss": 3.9728, + "step": 449000 + }, + { + "epoch": 4.95, + "learning_rate": 4.945266516310028e-08, + "loss": 3.9872, + "step": 449500 + }, + { + "epoch": 4.95, + "learning_rate": 4.950767368942186e-08, + "loss": 3.9514, + "step": 450000 + }, + { + "epoch": 4.96, + "learning_rate": 4.956268221574344e-08, + "loss": 3.974, + "step": 450500 + }, + { + "epoch": 4.96, + "learning_rate": 4.961769074206501e-08, + "loss": 3.9851, + "step": 451000 + }, + { + "epoch": 4.97, + "learning_rate": 4.9672699268386596e-08, + "loss": 3.974, + "step": 451500 + }, + { + "epoch": 4.97, + "learning_rate": 4.972770779470818e-08, + "loss": 3.9641, + "step": 452000 + }, + { + "epoch": 4.98, + "learning_rate": 4.9782716321029754e-08, + "loss": 3.9679, + "step": 452500 + }, + { + "epoch": 4.98, + "learning_rate": 4.9837724847351336e-08, + "loss": 3.9805, + "step": 453000 + }, + { + "epoch": 4.99, + "learning_rate": 4.989273337367292e-08, + "loss": 3.9635, + "step": 453500 + }, + { + "epoch": 4.99, + "learning_rate": 4.9947741899994495e-08, + "loss": 3.9655, + "step": 454000 + }, + { + "epoch": 5.0, + "eval_loss": 3.980429172515869, + "eval_runtime": 6.1307, + "eval_samples_per_second": 253.477, + "step": 454475 + }, + { + "epoch": 5.0, + "learning_rate": 5.000275042631608e-08, + "loss": 3.9819, + "step": 454500 + }, + { + "epoch": 5.01, + "learning_rate": 5.005775895263765e-08, + "loss": 3.9567, + "step": 455000 + }, + { + "epoch": 5.01, + "learning_rate": 5.011276747895924e-08, + "loss": 3.9931, + "step": 455500 + }, + { + "epoch": 5.02, + "learning_rate": 5.016777600528082e-08, + "loss": 3.9623, + "step": 456000 + }, + { + "epoch": 5.02, + "learning_rate": 5.0222784531602394e-08, + "loss": 3.9674, + "step": 456500 + }, + { + "epoch": 5.03, + "learning_rate": 5.0277793057923976e-08, + "loss": 3.955, + "step": 457000 + }, + { + "epoch": 5.03, + "learning_rate": 5.033280158424555e-08, + "loss": 3.9906, + "step": 457500 + }, + { + "epoch": 5.04, + "learning_rate": 5.038781011056713e-08, + "loss": 3.9723, + "step": 458000 + }, + { + "epoch": 5.04, + "learning_rate": 5.044281863688872e-08, + "loss": 3.9704, + "step": 458500 + }, + { + "epoch": 5.05, + "learning_rate": 5.049782716321029e-08, + "loss": 3.9716, + "step": 459000 + }, + { + "epoch": 5.06, + "learning_rate": 5.055283568953187e-08, + "loss": 3.9701, + "step": 459500 + }, + { + "epoch": 5.06, + "learning_rate": 5.060784421585346e-08, + "loss": 3.9642, + "step": 460000 + }, + { + "epoch": 5.07, + "learning_rate": 5.0662852742175034e-08, + "loss": 3.9735, + "step": 460500 + }, + { + "epoch": 5.07, + "learning_rate": 5.071786126849662e-08, + "loss": 3.9809, + "step": 461000 + }, + { + "epoch": 5.08, + "learning_rate": 5.07728697948182e-08, + "loss": 3.9641, + "step": 461500 + }, + { + "epoch": 5.08, + "learning_rate": 5.0827878321139775e-08, + "loss": 3.9653, + "step": 462000 + }, + { + "epoch": 5.09, + "learning_rate": 5.088288684746136e-08, + "loss": 3.9794, + "step": 462500 + }, + { + "epoch": 5.09, + "learning_rate": 5.093789537378293e-08, + "loss": 3.9835, + "step": 463000 + }, + { + "epoch": 5.1, + "learning_rate": 5.099290390010451e-08, + "loss": 3.988, + "step": 463500 + }, + { + "epoch": 5.1, + "learning_rate": 5.10479124264261e-08, + "loss": 3.964, + "step": 464000 + }, + { + "epoch": 5.11, + "learning_rate": 5.1102920952747674e-08, + "loss": 3.9694, + "step": 464500 + }, + { + "epoch": 5.12, + "learning_rate": 5.115792947906925e-08, + "loss": 3.9647, + "step": 465000 + }, + { + "epoch": 5.12, + "learning_rate": 5.121293800539084e-08, + "loss": 3.9671, + "step": 465500 + }, + { + "epoch": 5.13, + "learning_rate": 5.1267946531712415e-08, + "loss": 3.9661, + "step": 466000 + }, + { + "epoch": 5.13, + "learning_rate": 5.132295505803399e-08, + "loss": 3.9645, + "step": 466500 + }, + { + "epoch": 5.14, + "learning_rate": 5.137796358435558e-08, + "loss": 3.9872, + "step": 467000 + }, + { + "epoch": 5.14, + "learning_rate": 5.1432972110677155e-08, + "loss": 3.9663, + "step": 467500 + }, + { + "epoch": 5.15, + "learning_rate": 5.148798063699873e-08, + "loss": 3.9515, + "step": 468000 + }, + { + "epoch": 5.15, + "learning_rate": 5.1542989163320314e-08, + "loss": 3.9619, + "step": 468500 + }, + { + "epoch": 5.16, + "learning_rate": 5.159799768964189e-08, + "loss": 3.9639, + "step": 469000 + }, + { + "epoch": 5.17, + "learning_rate": 5.1653006215963466e-08, + "loss": 3.9766, + "step": 469500 + }, + { + "epoch": 5.17, + "learning_rate": 5.1708014742285055e-08, + "loss": 3.9871, + "step": 470000 + }, + { + "epoch": 5.18, + "learning_rate": 5.176302326860663e-08, + "loss": 3.9717, + "step": 470500 + }, + { + "epoch": 5.18, + "learning_rate": 5.1818031794928206e-08, + "loss": 3.9862, + "step": 471000 + }, + { + "epoch": 5.19, + "learning_rate": 5.1873040321249795e-08, + "loss": 3.9745, + "step": 471500 + }, + { + "epoch": 5.19, + "learning_rate": 5.192804884757137e-08, + "loss": 3.977, + "step": 472000 + }, + { + "epoch": 5.2, + "learning_rate": 5.198305737389295e-08, + "loss": 3.9838, + "step": 472500 + }, + { + "epoch": 5.2, + "learning_rate": 5.2038065900214536e-08, + "loss": 3.9731, + "step": 473000 + }, + { + "epoch": 5.21, + "learning_rate": 5.209307442653611e-08, + "loss": 3.9583, + "step": 473500 + }, + { + "epoch": 5.21, + "learning_rate": 5.214808295285769e-08, + "loss": 3.9543, + "step": 474000 + }, + { + "epoch": 5.22, + "learning_rate": 5.220309147917927e-08, + "loss": 3.974, + "step": 474500 + }, + { + "epoch": 5.23, + "learning_rate": 5.2258100005500846e-08, + "loss": 3.9668, + "step": 475000 + }, + { + "epoch": 5.23, + "learning_rate": 5.231310853182242e-08, + "loss": 3.9816, + "step": 475500 + }, + { + "epoch": 5.24, + "learning_rate": 5.236811705814401e-08, + "loss": 3.9574, + "step": 476000 + }, + { + "epoch": 5.24, + "learning_rate": 5.242312558446559e-08, + "loss": 3.9651, + "step": 476500 + }, + { + "epoch": 5.25, + "learning_rate": 5.247813411078716e-08, + "loss": 3.9695, + "step": 477000 + }, + { + "epoch": 5.25, + "learning_rate": 5.253314263710875e-08, + "loss": 3.9782, + "step": 477500 + }, + { + "epoch": 5.26, + "learning_rate": 5.258815116343033e-08, + "loss": 3.9788, + "step": 478000 + }, + { + "epoch": 5.26, + "learning_rate": 5.2643159689751904e-08, + "loss": 3.9607, + "step": 478500 + }, + { + "epoch": 5.27, + "learning_rate": 5.269816821607349e-08, + "loss": 3.9679, + "step": 479000 + }, + { + "epoch": 5.28, + "learning_rate": 5.275317674239507e-08, + "loss": 3.9604, + "step": 479500 + }, + { + "epoch": 5.28, + "learning_rate": 5.280818526871665e-08, + "loss": 3.9783, + "step": 480000 + }, + { + "epoch": 5.29, + "learning_rate": 5.286319379503823e-08, + "loss": 3.9557, + "step": 480500 + }, + { + "epoch": 5.29, + "learning_rate": 5.29182023213598e-08, + "loss": 3.981, + "step": 481000 + }, + { + "epoch": 5.3, + "learning_rate": 5.297321084768139e-08, + "loss": 3.9644, + "step": 481500 + }, + { + "epoch": 5.3, + "learning_rate": 5.302821937400297e-08, + "loss": 3.9616, + "step": 482000 + }, + { + "epoch": 5.31, + "learning_rate": 5.3083227900324544e-08, + "loss": 3.9705, + "step": 482500 + }, + { + "epoch": 5.31, + "learning_rate": 5.313823642664613e-08, + "loss": 3.9667, + "step": 483000 + }, + { + "epoch": 5.32, + "learning_rate": 5.319324495296771e-08, + "loss": 3.9628, + "step": 483500 + }, + { + "epoch": 5.32, + "learning_rate": 5.3248253479289285e-08, + "loss": 3.9661, + "step": 484000 + }, + { + "epoch": 5.33, + "learning_rate": 5.3303262005610874e-08, + "loss": 3.9593, + "step": 484500 + }, + { + "epoch": 5.34, + "learning_rate": 5.335827053193245e-08, + "loss": 3.9695, + "step": 485000 + }, + { + "epoch": 5.34, + "learning_rate": 5.3413279058254025e-08, + "loss": 3.9734, + "step": 485500 + }, + { + "epoch": 5.35, + "learning_rate": 5.346828758457561e-08, + "loss": 3.9713, + "step": 486000 + }, + { + "epoch": 5.35, + "learning_rate": 5.3523296110897184e-08, + "loss": 3.9626, + "step": 486500 + }, + { + "epoch": 5.36, + "learning_rate": 5.3578304637218766e-08, + "loss": 3.9468, + "step": 487000 + }, + { + "epoch": 5.36, + "learning_rate": 5.363331316354035e-08, + "loss": 3.9713, + "step": 487500 + }, + { + "epoch": 5.37, + "learning_rate": 5.3688321689861925e-08, + "loss": 3.9502, + "step": 488000 + }, + { + "epoch": 5.37, + "learning_rate": 5.37433302161835e-08, + "loss": 3.9582, + "step": 488500 + }, + { + "epoch": 5.38, + "learning_rate": 5.379833874250509e-08, + "loss": 3.9792, + "step": 489000 + }, + { + "epoch": 5.39, + "learning_rate": 5.3853347268826665e-08, + "loss": 3.9665, + "step": 489500 + }, + { + "epoch": 5.39, + "learning_rate": 5.390835579514824e-08, + "loss": 3.9455, + "step": 490000 + }, + { + "epoch": 5.4, + "learning_rate": 5.396336432146983e-08, + "loss": 3.9625, + "step": 490500 + }, + { + "epoch": 5.4, + "learning_rate": 5.4018372847791406e-08, + "loss": 3.967, + "step": 491000 + }, + { + "epoch": 5.41, + "learning_rate": 5.407338137411298e-08, + "loss": 3.9528, + "step": 491500 + }, + { + "epoch": 5.41, + "learning_rate": 5.412838990043457e-08, + "loss": 3.9586, + "step": 492000 + }, + { + "epoch": 5.42, + "learning_rate": 5.418339842675615e-08, + "loss": 3.9547, + "step": 492500 + }, + { + "epoch": 5.42, + "learning_rate": 5.423840695307772e-08, + "loss": 3.9765, + "step": 493000 + }, + { + "epoch": 5.43, + "learning_rate": 5.4293415479399305e-08, + "loss": 3.9561, + "step": 493500 + }, + { + "epoch": 5.43, + "learning_rate": 5.434842400572088e-08, + "loss": 3.9474, + "step": 494000 + }, + { + "epoch": 5.44, + "learning_rate": 5.440343253204246e-08, + "loss": 3.9687, + "step": 494500 + }, + { + "epoch": 5.45, + "learning_rate": 5.4458441058364046e-08, + "loss": 3.9501, + "step": 495000 + }, + { + "epoch": 5.45, + "learning_rate": 5.451344958468562e-08, + "loss": 3.9448, + "step": 495500 + }, + { + "epoch": 5.46, + "learning_rate": 5.45684581110072e-08, + "loss": 3.9588, + "step": 496000 + }, + { + "epoch": 5.46, + "learning_rate": 5.462346663732879e-08, + "loss": 3.9652, + "step": 496500 + }, + { + "epoch": 5.47, + "learning_rate": 5.467847516365036e-08, + "loss": 3.952, + "step": 497000 + }, + { + "epoch": 5.47, + "learning_rate": 5.473348368997194e-08, + "loss": 3.9503, + "step": 497500 + }, + { + "epoch": 5.48, + "learning_rate": 5.478849221629353e-08, + "loss": 3.9532, + "step": 498000 + }, + { + "epoch": 5.48, + "learning_rate": 5.4843500742615104e-08, + "loss": 3.9668, + "step": 498500 + }, + { + "epoch": 5.49, + "learning_rate": 5.4898509268936686e-08, + "loss": 3.9745, + "step": 499000 + }, + { + "epoch": 5.5, + "learning_rate": 5.495351779525826e-08, + "loss": 3.9625, + "step": 499500 + }, + { + "epoch": 5.5, + "learning_rate": 5.500852632157984e-08, + "loss": 3.9581, + "step": 500000 + }, + { + "epoch": 5.51, + "learning_rate": 5.506353484790143e-08, + "loss": 3.9555, + "step": 500500 + }, + { + "epoch": 5.51, + "learning_rate": 5.5118543374223e-08, + "loss": 3.9846, + "step": 501000 + }, + { + "epoch": 5.52, + "learning_rate": 5.517355190054458e-08, + "loss": 3.9805, + "step": 501500 + }, + { + "epoch": 5.52, + "learning_rate": 5.522856042686617e-08, + "loss": 3.9715, + "step": 502000 + }, + { + "epoch": 5.53, + "learning_rate": 5.5283568953187744e-08, + "loss": 3.963, + "step": 502500 + }, + { + "epoch": 5.53, + "learning_rate": 5.533857747950932e-08, + "loss": 3.9431, + "step": 503000 + }, + { + "epoch": 5.54, + "learning_rate": 5.539358600583091e-08, + "loss": 3.9637, + "step": 503500 + }, + { + "epoch": 5.54, + "learning_rate": 5.5448594532152484e-08, + "loss": 3.9447, + "step": 504000 + }, + { + "epoch": 5.55, + "learning_rate": 5.550360305847406e-08, + "loss": 3.9546, + "step": 504500 + }, + { + "epoch": 5.56, + "learning_rate": 5.555861158479564e-08, + "loss": 3.9714, + "step": 505000 + }, + { + "epoch": 5.56, + "learning_rate": 5.561362011111722e-08, + "loss": 3.9531, + "step": 505500 + }, + { + "epoch": 5.57, + "learning_rate": 5.5668628637438795e-08, + "loss": 3.9678, + "step": 506000 + }, + { + "epoch": 5.57, + "learning_rate": 5.5723637163760384e-08, + "loss": 3.9499, + "step": 506500 + }, + { + "epoch": 5.58, + "learning_rate": 5.577864569008196e-08, + "loss": 3.947, + "step": 507000 + }, + { + "epoch": 5.58, + "learning_rate": 5.5833654216403535e-08, + "loss": 3.961, + "step": 507500 + }, + { + "epoch": 5.59, + "learning_rate": 5.5888662742725124e-08, + "loss": 3.95, + "step": 508000 + }, + { + "epoch": 5.59, + "learning_rate": 5.59436712690467e-08, + "loss": 3.9695, + "step": 508500 + }, + { + "epoch": 5.6, + "learning_rate": 5.5998679795368276e-08, + "loss": 3.9634, + "step": 509000 + }, + { + "epoch": 5.61, + "learning_rate": 5.6053688321689865e-08, + "loss": 3.9519, + "step": 509500 + }, + { + "epoch": 5.61, + "learning_rate": 5.610869684801144e-08, + "loss": 3.9536, + "step": 510000 + }, + { + "epoch": 5.62, + "learning_rate": 5.616370537433302e-08, + "loss": 3.9448, + "step": 510500 + }, + { + "epoch": 5.62, + "learning_rate": 5.62187139006546e-08, + "loss": 3.9632, + "step": 511000 + }, + { + "epoch": 5.63, + "learning_rate": 5.6273722426976175e-08, + "loss": 3.9457, + "step": 511500 + }, + { + "epoch": 5.63, + "learning_rate": 5.632873095329775e-08, + "loss": 3.9622, + "step": 512000 + }, + { + "epoch": 5.64, + "learning_rate": 5.638373947961934e-08, + "loss": 3.9404, + "step": 512500 + }, + { + "epoch": 5.64, + "learning_rate": 5.6438748005940916e-08, + "loss": 3.9654, + "step": 513000 + }, + { + "epoch": 5.65, + "learning_rate": 5.649375653226249e-08, + "loss": 3.9565, + "step": 513500 + }, + { + "epoch": 5.65, + "learning_rate": 5.654876505858408e-08, + "loss": 3.9585, + "step": 514000 + }, + { + "epoch": 5.66, + "learning_rate": 5.660377358490566e-08, + "loss": 3.9617, + "step": 514500 + }, + { + "epoch": 5.67, + "learning_rate": 5.665878211122723e-08, + "loss": 3.9598, + "step": 515000 + }, + { + "epoch": 5.67, + "learning_rate": 5.671379063754882e-08, + "loss": 3.9674, + "step": 515500 + }, + { + "epoch": 5.68, + "learning_rate": 5.67687991638704e-08, + "loss": 3.9676, + "step": 516000 + }, + { + "epoch": 5.68, + "learning_rate": 5.682380769019198e-08, + "loss": 3.9224, + "step": 516500 + }, + { + "epoch": 5.69, + "learning_rate": 5.6878816216513556e-08, + "loss": 3.9673, + "step": 517000 + }, + { + "epoch": 5.69, + "learning_rate": 5.693382474283513e-08, + "loss": 3.9491, + "step": 517500 + }, + { + "epoch": 5.7, + "learning_rate": 5.698883326915672e-08, + "loss": 3.9543, + "step": 518000 + }, + { + "epoch": 5.7, + "learning_rate": 5.70438417954783e-08, + "loss": 3.9433, + "step": 518500 + }, + { + "epoch": 5.71, + "learning_rate": 5.709885032179987e-08, + "loss": 3.9444, + "step": 519000 + }, + { + "epoch": 5.72, + "learning_rate": 5.715385884812146e-08, + "loss": 3.9704, + "step": 519500 + }, + { + "epoch": 5.72, + "learning_rate": 5.720886737444304e-08, + "loss": 3.9654, + "step": 520000 + }, + { + "epoch": 5.73, + "learning_rate": 5.7263875900764614e-08, + "loss": 3.9717, + "step": 520500 + }, + { + "epoch": 5.73, + "learning_rate": 5.73188844270862e-08, + "loss": 3.9549, + "step": 521000 + }, + { + "epoch": 5.74, + "learning_rate": 5.737389295340778e-08, + "loss": 3.9556, + "step": 521500 + }, + { + "epoch": 5.74, + "learning_rate": 5.7428901479729354e-08, + "loss": 3.964, + "step": 522000 + }, + { + "epoch": 5.75, + "learning_rate": 5.748391000605094e-08, + "loss": 3.9617, + "step": 522500 + }, + { + "epoch": 5.75, + "learning_rate": 5.753891853237251e-08, + "loss": 3.9238, + "step": 523000 + }, + { + "epoch": 5.76, + "learning_rate": 5.759392705869409e-08, + "loss": 3.974, + "step": 523500 + }, + { + "epoch": 5.76, + "learning_rate": 5.764893558501568e-08, + "loss": 3.9519, + "step": 524000 + }, + { + "epoch": 5.77, + "learning_rate": 5.7703944111337254e-08, + "loss": 3.9472, + "step": 524500 + }, + { + "epoch": 5.78, + "learning_rate": 5.775895263765883e-08, + "loss": 3.9688, + "step": 525000 + }, + { + "epoch": 5.78, + "learning_rate": 5.781396116398042e-08, + "loss": 3.9611, + "step": 525500 + }, + { + "epoch": 5.79, + "learning_rate": 5.7868969690301994e-08, + "loss": 3.9406, + "step": 526000 + }, + { + "epoch": 5.79, + "learning_rate": 5.792397821662357e-08, + "loss": 3.9517, + "step": 526500 + }, + { + "epoch": 5.8, + "learning_rate": 5.797898674294516e-08, + "loss": 3.9668, + "step": 527000 + }, + { + "epoch": 5.8, + "learning_rate": 5.8033995269266735e-08, + "loss": 3.9495, + "step": 527500 + }, + { + "epoch": 5.81, + "learning_rate": 5.808900379558831e-08, + "loss": 3.94, + "step": 528000 + }, + { + "epoch": 5.81, + "learning_rate": 5.8144012321909894e-08, + "loss": 3.9477, + "step": 528500 + }, + { + "epoch": 5.82, + "learning_rate": 5.819902084823147e-08, + "loss": 3.9443, + "step": 529000 + }, + { + "epoch": 5.83, + "learning_rate": 5.825402937455305e-08, + "loss": 3.9334, + "step": 529500 + }, + { + "epoch": 5.83, + "learning_rate": 5.8309037900874634e-08, + "loss": 3.9379, + "step": 530000 + }, + { + "epoch": 5.84, + "learning_rate": 5.836404642719621e-08, + "loss": 3.9645, + "step": 530500 + }, + { + "epoch": 5.84, + "learning_rate": 5.8419054953517786e-08, + "loss": 3.9347, + "step": 531000 + }, + { + "epoch": 5.85, + "learning_rate": 5.8474063479839375e-08, + "loss": 3.9638, + "step": 531500 + }, + { + "epoch": 5.85, + "learning_rate": 5.852907200616095e-08, + "loss": 3.963, + "step": 532000 + }, + { + "epoch": 5.86, + "learning_rate": 5.858408053248253e-08, + "loss": 3.9338, + "step": 532500 + }, + { + "epoch": 5.86, + "learning_rate": 5.8639089058804116e-08, + "loss": 3.9373, + "step": 533000 + }, + { + "epoch": 5.87, + "learning_rate": 5.869409758512569e-08, + "loss": 3.9303, + "step": 533500 + }, + { + "epoch": 5.87, + "learning_rate": 5.874910611144727e-08, + "loss": 3.9594, + "step": 534000 + }, + { + "epoch": 5.88, + "learning_rate": 5.880411463776886e-08, + "loss": 3.9326, + "step": 534500 + }, + { + "epoch": 5.89, + "learning_rate": 5.885912316409043e-08, + "loss": 3.9415, + "step": 535000 + }, + { + "epoch": 5.89, + "learning_rate": 5.8914131690412015e-08, + "loss": 3.9484, + "step": 535500 + }, + { + "epoch": 5.9, + "learning_rate": 5.896914021673359e-08, + "loss": 3.9398, + "step": 536000 + }, + { + "epoch": 5.9, + "learning_rate": 5.902414874305517e-08, + "loss": 3.9405, + "step": 536500 + }, + { + "epoch": 5.91, + "learning_rate": 5.9079157269376756e-08, + "loss": 3.9471, + "step": 537000 + }, + { + "epoch": 5.91, + "learning_rate": 5.913416579569833e-08, + "loss": 3.953, + "step": 537500 + }, + { + "epoch": 5.92, + "learning_rate": 5.918917432201991e-08, + "loss": 3.9506, + "step": 538000 + }, + { + "epoch": 5.92, + "learning_rate": 5.92441828483415e-08, + "loss": 3.9436, + "step": 538500 + }, + { + "epoch": 5.93, + "learning_rate": 5.929919137466307e-08, + "loss": 3.9281, + "step": 539000 + }, + { + "epoch": 5.94, + "learning_rate": 5.935419990098465e-08, + "loss": 3.9576, + "step": 539500 + }, + { + "epoch": 5.94, + "learning_rate": 5.940920842730624e-08, + "loss": 3.9382, + "step": 540000 + }, + { + "epoch": 5.95, + "learning_rate": 5.9464216953627813e-08, + "loss": 3.9441, + "step": 540500 + }, + { + "epoch": 5.95, + "learning_rate": 5.951922547994939e-08, + "loss": 3.9373, + "step": 541000 + }, + { + "epoch": 5.96, + "learning_rate": 5.957423400627097e-08, + "loss": 3.9462, + "step": 541500 + }, + { + "epoch": 5.96, + "learning_rate": 5.962924253259255e-08, + "loss": 3.9592, + "step": 542000 + }, + { + "epoch": 5.97, + "learning_rate": 5.968425105891412e-08, + "loss": 3.941, + "step": 542500 + }, + { + "epoch": 5.97, + "learning_rate": 5.973925958523571e-08, + "loss": 3.9576, + "step": 543000 + }, + { + "epoch": 5.98, + "learning_rate": 5.979426811155729e-08, + "loss": 3.9445, + "step": 543500 + }, + { + "epoch": 5.98, + "learning_rate": 5.984927663787886e-08, + "loss": 3.9583, + "step": 544000 + }, + { + "epoch": 5.99, + "learning_rate": 5.990428516420045e-08, + "loss": 3.9579, + "step": 544500 + }, + { + "epoch": 6.0, + "learning_rate": 5.995929369052203e-08, + "loss": 3.9562, + "step": 545000 + }, + { + "epoch": 6.0, + "eval_loss": 3.9576685428619385, + "eval_runtime": 6.1306, + "eval_samples_per_second": 253.481, + "step": 545370 + }, + { + "epoch": 6.0, + "learning_rate": 6.00143022168436e-08, + "loss": 3.9328, + "step": 545500 + }, + { + "epoch": 6.01, + "learning_rate": 6.00693107431652e-08, + "loss": 3.9461, + "step": 546000 + }, + { + "epoch": 6.01, + "learning_rate": 6.012431926948677e-08, + "loss": 3.9389, + "step": 546500 + }, + { + "epoch": 6.02, + "learning_rate": 6.017932779580835e-08, + "loss": 3.9322, + "step": 547000 + }, + { + "epoch": 6.02, + "learning_rate": 6.023433632212994e-08, + "loss": 3.9461, + "step": 547500 + }, + { + "epoch": 6.03, + "learning_rate": 6.028934484845151e-08, + "loss": 3.9675, + "step": 548000 + }, + { + "epoch": 6.03, + "learning_rate": 6.034435337477309e-08, + "loss": 3.9524, + "step": 548500 + }, + { + "epoch": 6.04, + "learning_rate": 6.039936190109468e-08, + "loss": 3.9484, + "step": 549000 + }, + { + "epoch": 6.05, + "learning_rate": 6.045437042741625e-08, + "loss": 3.9521, + "step": 549500 + }, + { + "epoch": 6.05, + "learning_rate": 6.050937895373783e-08, + "loss": 3.9518, + "step": 550000 + }, + { + "epoch": 6.06, + "learning_rate": 6.05643874800594e-08, + "loss": 3.951, + "step": 550500 + }, + { + "epoch": 6.06, + "learning_rate": 6.061939600638098e-08, + "loss": 3.94, + "step": 551000 + }, + { + "epoch": 6.07, + "learning_rate": 6.067440453270256e-08, + "loss": 3.9571, + "step": 551500 + }, + { + "epoch": 6.07, + "learning_rate": 6.072941305902414e-08, + "loss": 3.9502, + "step": 552000 + }, + { + "epoch": 6.08, + "learning_rate": 6.078442158534572e-08, + "loss": 3.9648, + "step": 552500 + }, + { + "epoch": 6.08, + "learning_rate": 6.08394301116673e-08, + "loss": 3.9206, + "step": 553000 + }, + { + "epoch": 6.09, + "learning_rate": 6.089443863798889e-08, + "loss": 3.9481, + "step": 553500 + }, + { + "epoch": 6.09, + "learning_rate": 6.094944716431046e-08, + "loss": 3.9347, + "step": 554000 + }, + { + "epoch": 6.1, + "learning_rate": 6.100445569063205e-08, + "loss": 3.9768, + "step": 554500 + }, + { + "epoch": 6.11, + "learning_rate": 6.105946421695363e-08, + "loss": 3.9426, + "step": 555000 + }, + { + "epoch": 6.11, + "learning_rate": 6.11144727432752e-08, + "loss": 3.9284, + "step": 555500 + }, + { + "epoch": 6.12, + "learning_rate": 6.116948126959679e-08, + "loss": 3.9415, + "step": 556000 + }, + { + "epoch": 6.12, + "learning_rate": 6.122448979591837e-08, + "loss": 3.9329, + "step": 556500 + }, + { + "epoch": 6.13, + "learning_rate": 6.127949832223994e-08, + "loss": 3.9476, + "step": 557000 + }, + { + "epoch": 6.13, + "learning_rate": 6.133450684856153e-08, + "loss": 3.941, + "step": 557500 + }, + { + "epoch": 6.14, + "learning_rate": 6.138951537488311e-08, + "loss": 3.9352, + "step": 558000 + }, + { + "epoch": 6.14, + "learning_rate": 6.144452390120468e-08, + "loss": 3.9353, + "step": 558500 + }, + { + "epoch": 6.15, + "learning_rate": 6.149953242752627e-08, + "loss": 3.9361, + "step": 559000 + }, + { + "epoch": 6.16, + "learning_rate": 6.155454095384785e-08, + "loss": 3.9676, + "step": 559500 + }, + { + "epoch": 6.16, + "learning_rate": 6.160954948016942e-08, + "loss": 3.9426, + "step": 560000 + }, + { + "epoch": 6.17, + "learning_rate": 6.166455800649101e-08, + "loss": 3.9516, + "step": 560500 + }, + { + "epoch": 6.17, + "learning_rate": 6.171956653281259e-08, + "loss": 3.9637, + "step": 561000 + }, + { + "epoch": 6.18, + "learning_rate": 6.177457505913417e-08, + "loss": 3.9363, + "step": 561500 + }, + { + "epoch": 6.18, + "learning_rate": 6.182958358545574e-08, + "loss": 3.9608, + "step": 562000 + }, + { + "epoch": 6.19, + "learning_rate": 6.188459211177732e-08, + "loss": 3.9412, + "step": 562500 + }, + { + "epoch": 6.19, + "learning_rate": 6.193960063809889e-08, + "loss": 3.9411, + "step": 563000 + }, + { + "epoch": 6.2, + "learning_rate": 6.199460916442048e-08, + "loss": 3.9509, + "step": 563500 + }, + { + "epoch": 6.2, + "learning_rate": 6.204961769074206e-08, + "loss": 3.9322, + "step": 564000 + }, + { + "epoch": 6.21, + "learning_rate": 6.210462621706363e-08, + "loss": 3.9118, + "step": 564500 + }, + { + "epoch": 6.22, + "learning_rate": 6.215963474338522e-08, + "loss": 3.9509, + "step": 565000 + }, + { + "epoch": 6.22, + "learning_rate": 6.22146432697068e-08, + "loss": 3.9439, + "step": 565500 + }, + { + "epoch": 6.23, + "learning_rate": 6.226965179602837e-08, + "loss": 3.9287, + "step": 566000 + }, + { + "epoch": 6.23, + "learning_rate": 6.232466032234996e-08, + "loss": 3.9262, + "step": 566500 + }, + { + "epoch": 6.24, + "learning_rate": 6.237966884867154e-08, + "loss": 3.9332, + "step": 567000 + }, + { + "epoch": 6.24, + "learning_rate": 6.243467737499312e-08, + "loss": 3.9483, + "step": 567500 + }, + { + "epoch": 6.25, + "learning_rate": 6.24896859013147e-08, + "loss": 3.929, + "step": 568000 + }, + { + "epoch": 6.25, + "learning_rate": 6.254469442763628e-08, + "loss": 3.9239, + "step": 568500 + }, + { + "epoch": 6.26, + "learning_rate": 6.259970295395786e-08, + "loss": 3.9452, + "step": 569000 + }, + { + "epoch": 6.27, + "learning_rate": 6.265471148027944e-08, + "loss": 3.9175, + "step": 569500 + }, + { + "epoch": 6.27, + "learning_rate": 6.270972000660102e-08, + "loss": 3.958, + "step": 570000 + }, + { + "epoch": 6.28, + "learning_rate": 6.27647285329226e-08, + "loss": 3.9289, + "step": 570500 + }, + { + "epoch": 6.28, + "learning_rate": 6.281973705924419e-08, + "loss": 3.9381, + "step": 571000 + }, + { + "epoch": 6.29, + "learning_rate": 6.287474558556576e-08, + "loss": 3.9398, + "step": 571500 + }, + { + "epoch": 6.29, + "learning_rate": 6.292975411188734e-08, + "loss": 3.9404, + "step": 572000 + }, + { + "epoch": 6.3, + "learning_rate": 6.298476263820893e-08, + "loss": 3.9404, + "step": 572500 + }, + { + "epoch": 6.3, + "learning_rate": 6.30397711645305e-08, + "loss": 3.935, + "step": 573000 + }, + { + "epoch": 6.31, + "learning_rate": 6.309477969085208e-08, + "loss": 3.9377, + "step": 573500 + }, + { + "epoch": 6.31, + "learning_rate": 6.314978821717365e-08, + "loss": 3.9401, + "step": 574000 + }, + { + "epoch": 6.32, + "learning_rate": 6.320479674349524e-08, + "loss": 3.9466, + "step": 574500 + }, + { + "epoch": 6.33, + "learning_rate": 6.325980526981682e-08, + "loss": 3.9326, + "step": 575000 + }, + { + "epoch": 6.33, + "learning_rate": 6.33148137961384e-08, + "loss": 3.9202, + "step": 575500 + }, + { + "epoch": 6.34, + "learning_rate": 6.336982232245997e-08, + "loss": 3.9389, + "step": 576000 + }, + { + "epoch": 6.34, + "learning_rate": 6.342483084878156e-08, + "loss": 3.9515, + "step": 576500 + }, + { + "epoch": 6.35, + "learning_rate": 6.347983937510314e-08, + "loss": 3.9255, + "step": 577000 + }, + { + "epoch": 6.35, + "learning_rate": 6.353484790142471e-08, + "loss": 3.9249, + "step": 577500 + }, + { + "epoch": 6.36, + "learning_rate": 6.35898564277463e-08, + "loss": 3.9361, + "step": 578000 + }, + { + "epoch": 6.36, + "learning_rate": 6.364486495406788e-08, + "loss": 3.9522, + "step": 578500 + }, + { + "epoch": 6.37, + "learning_rate": 6.369987348038945e-08, + "loss": 3.9233, + "step": 579000 + }, + { + "epoch": 6.38, + "learning_rate": 6.375488200671104e-08, + "loss": 3.9269, + "step": 579500 + }, + { + "epoch": 6.38, + "learning_rate": 6.380989053303262e-08, + "loss": 3.935, + "step": 580000 + }, + { + "epoch": 6.39, + "learning_rate": 6.386489905935419e-08, + "loss": 3.9263, + "step": 580500 + }, + { + "epoch": 6.39, + "learning_rate": 6.391990758567578e-08, + "loss": 3.9252, + "step": 581000 + }, + { + "epoch": 6.4, + "learning_rate": 6.397491611199736e-08, + "loss": 3.931, + "step": 581500 + }, + { + "epoch": 6.4, + "learning_rate": 6.402992463831893e-08, + "loss": 3.9309, + "step": 582000 + }, + { + "epoch": 6.41, + "learning_rate": 6.408493316464052e-08, + "loss": 3.9348, + "step": 582500 + }, + { + "epoch": 6.41, + "learning_rate": 6.41399416909621e-08, + "loss": 3.9203, + "step": 583000 + }, + { + "epoch": 6.42, + "learning_rate": 6.419495021728367e-08, + "loss": 3.9343, + "step": 583500 + }, + { + "epoch": 6.42, + "learning_rate": 6.424995874360526e-08, + "loss": 3.9201, + "step": 584000 + }, + { + "epoch": 6.43, + "learning_rate": 6.430496726992684e-08, + "loss": 3.9472, + "step": 584500 + }, + { + "epoch": 6.44, + "learning_rate": 6.435997579624842e-08, + "loss": 3.9412, + "step": 585000 + }, + { + "epoch": 6.44, + "learning_rate": 6.441498432257e-08, + "loss": 3.9461, + "step": 585500 + }, + { + "epoch": 6.45, + "learning_rate": 6.446999284889158e-08, + "loss": 3.9392, + "step": 586000 + }, + { + "epoch": 6.45, + "learning_rate": 6.452500137521316e-08, + "loss": 3.9402, + "step": 586500 + }, + { + "epoch": 6.46, + "learning_rate": 6.458000990153473e-08, + "loss": 3.9349, + "step": 587000 + }, + { + "epoch": 6.46, + "learning_rate": 6.463501842785631e-08, + "loss": 3.9292, + "step": 587500 + }, + { + "epoch": 6.47, + "learning_rate": 6.469002695417788e-08, + "loss": 3.9409, + "step": 588000 + }, + { + "epoch": 6.47, + "learning_rate": 6.474503548049947e-08, + "loss": 3.9253, + "step": 588500 + }, + { + "epoch": 6.48, + "learning_rate": 6.480004400682105e-08, + "loss": 3.9516, + "step": 589000 + }, + { + "epoch": 6.49, + "learning_rate": 6.485505253314263e-08, + "loss": 3.9209, + "step": 589500 + }, + { + "epoch": 6.49, + "learning_rate": 6.491006105946421e-08, + "loss": 3.9083, + "step": 590000 + }, + { + "epoch": 6.5, + "learning_rate": 6.496506958578579e-08, + "loss": 3.9227, + "step": 590500 + }, + { + "epoch": 6.5, + "learning_rate": 6.502007811210737e-08, + "loss": 3.9363, + "step": 591000 + }, + { + "epoch": 6.51, + "learning_rate": 6.507508663842895e-08, + "loss": 3.9302, + "step": 591500 + }, + { + "epoch": 6.51, + "learning_rate": 6.513009516475053e-08, + "loss": 3.9293, + "step": 592000 + }, + { + "epoch": 6.52, + "learning_rate": 6.518510369107212e-08, + "loss": 3.9304, + "step": 592500 + }, + { + "epoch": 6.52, + "learning_rate": 6.52401122173937e-08, + "loss": 3.9351, + "step": 593000 + }, + { + "epoch": 6.53, + "learning_rate": 6.529512074371527e-08, + "loss": 3.927, + "step": 593500 + }, + { + "epoch": 6.54, + "learning_rate": 6.535012927003686e-08, + "loss": 3.9318, + "step": 594000 + }, + { + "epoch": 6.54, + "learning_rate": 6.540513779635844e-08, + "loss": 3.9341, + "step": 594500 + }, + { + "epoch": 6.55, + "learning_rate": 6.546014632268001e-08, + "loss": 3.9156, + "step": 595000 + }, + { + "epoch": 6.55, + "learning_rate": 6.55151548490016e-08, + "loss": 3.9396, + "step": 595500 + }, + { + "epoch": 6.56, + "learning_rate": 6.557016337532318e-08, + "loss": 3.9281, + "step": 596000 + }, + { + "epoch": 6.56, + "learning_rate": 6.562517190164475e-08, + "loss": 3.9291, + "step": 596500 + }, + { + "epoch": 6.57, + "learning_rate": 6.568018042796634e-08, + "loss": 3.9369, + "step": 597000 + }, + { + "epoch": 6.57, + "learning_rate": 6.573518895428792e-08, + "loss": 3.9168, + "step": 597500 + }, + { + "epoch": 6.58, + "learning_rate": 6.57901974806095e-08, + "loss": 3.9322, + "step": 598000 + }, + { + "epoch": 6.58, + "learning_rate": 6.584520600693107e-08, + "loss": 3.9493, + "step": 598500 + }, + { + "epoch": 6.59, + "learning_rate": 6.590021453325265e-08, + "loss": 3.9286, + "step": 599000 + }, + { + "epoch": 6.6, + "learning_rate": 6.595522305957422e-08, + "loss": 3.9198, + "step": 599500 + }, + { + "epoch": 6.6, + "learning_rate": 6.601023158589581e-08, + "loss": 3.9297, + "step": 600000 + }, + { + "epoch": 6.61, + "learning_rate": 6.606524011221739e-08, + "loss": 3.9379, + "step": 600500 + }, + { + "epoch": 6.61, + "learning_rate": 6.612024863853896e-08, + "loss": 3.9321, + "step": 601000 + }, + { + "epoch": 6.62, + "learning_rate": 6.617525716486055e-08, + "loss": 3.9202, + "step": 601500 + }, + { + "epoch": 6.62, + "learning_rate": 6.623026569118213e-08, + "loss": 3.9229, + "step": 602000 + }, + { + "epoch": 6.63, + "learning_rate": 6.62852742175037e-08, + "loss": 3.9386, + "step": 602500 + }, + { + "epoch": 6.63, + "learning_rate": 6.634028274382529e-08, + "loss": 3.9369, + "step": 603000 + }, + { + "epoch": 6.64, + "learning_rate": 6.639529127014687e-08, + "loss": 3.9228, + "step": 603500 + }, + { + "epoch": 6.65, + "learning_rate": 6.645029979646844e-08, + "loss": 3.9425, + "step": 604000 + }, + { + "epoch": 6.65, + "learning_rate": 6.650530832279003e-08, + "loss": 3.911, + "step": 604500 + }, + { + "epoch": 6.66, + "learning_rate": 6.656031684911161e-08, + "loss": 3.9405, + "step": 605000 + }, + { + "epoch": 6.66, + "learning_rate": 6.661532537543318e-08, + "loss": 3.9182, + "step": 605500 + }, + { + "epoch": 6.67, + "learning_rate": 6.667033390175477e-08, + "loss": 3.9491, + "step": 606000 + }, + { + "epoch": 6.67, + "learning_rate": 6.672534242807635e-08, + "loss": 3.9242, + "step": 606500 + }, + { + "epoch": 6.68, + "learning_rate": 6.678035095439793e-08, + "loss": 3.9226, + "step": 607000 + }, + { + "epoch": 6.68, + "learning_rate": 6.683535948071951e-08, + "loss": 3.9386, + "step": 607500 + }, + { + "epoch": 6.69, + "learning_rate": 6.689036800704109e-08, + "loss": 3.923, + "step": 608000 + }, + { + "epoch": 6.69, + "learning_rate": 6.694537653336267e-08, + "loss": 3.9361, + "step": 608500 + }, + { + "epoch": 6.7, + "learning_rate": 6.700038505968426e-08, + "loss": 3.9267, + "step": 609000 + }, + { + "epoch": 6.71, + "learning_rate": 6.705539358600583e-08, + "loss": 3.9354, + "step": 609500 + }, + { + "epoch": 6.71, + "learning_rate": 6.711040211232741e-08, + "loss": 3.9226, + "step": 610000 + }, + { + "epoch": 6.72, + "learning_rate": 6.716541063864898e-08, + "loss": 3.9149, + "step": 610500 + }, + { + "epoch": 6.72, + "learning_rate": 6.722041916497056e-08, + "loss": 3.93, + "step": 611000 + }, + { + "epoch": 6.73, + "learning_rate": 6.727542769129215e-08, + "loss": 3.9193, + "step": 611500 + }, + { + "epoch": 6.73, + "learning_rate": 6.733043621761372e-08, + "loss": 3.9372, + "step": 612000 + }, + { + "epoch": 6.74, + "learning_rate": 6.73854447439353e-08, + "loss": 3.9306, + "step": 612500 + }, + { + "epoch": 6.74, + "learning_rate": 6.744045327025689e-08, + "loss": 3.9304, + "step": 613000 + }, + { + "epoch": 6.75, + "learning_rate": 6.749546179657846e-08, + "loss": 3.9305, + "step": 613500 + }, + { + "epoch": 6.76, + "learning_rate": 6.755047032290004e-08, + "loss": 3.9368, + "step": 614000 + }, + { + "epoch": 6.76, + "learning_rate": 6.760547884922163e-08, + "loss": 3.9285, + "step": 614500 + }, + { + "epoch": 6.77, + "learning_rate": 6.76604873755432e-08, + "loss": 3.934, + "step": 615000 + }, + { + "epoch": 6.77, + "learning_rate": 6.771549590186478e-08, + "loss": 3.9292, + "step": 615500 + }, + { + "epoch": 6.78, + "learning_rate": 6.777050442818637e-08, + "loss": 3.9402, + "step": 616000 + }, + { + "epoch": 6.78, + "learning_rate": 6.782551295450795e-08, + "loss": 3.947, + "step": 616500 + }, + { + "epoch": 6.79, + "learning_rate": 6.788052148082952e-08, + "loss": 3.9345, + "step": 617000 + }, + { + "epoch": 6.79, + "learning_rate": 6.793553000715111e-08, + "loss": 3.9416, + "step": 617500 + }, + { + "epoch": 6.8, + "learning_rate": 6.799053853347269e-08, + "loss": 3.9245, + "step": 618000 + }, + { + "epoch": 6.8, + "learning_rate": 6.804554705979426e-08, + "loss": 3.919, + "step": 618500 + }, + { + "epoch": 6.81, + "learning_rate": 6.810055558611585e-08, + "loss": 3.904, + "step": 619000 + }, + { + "epoch": 6.82, + "learning_rate": 6.815556411243743e-08, + "loss": 3.9165, + "step": 619500 + }, + { + "epoch": 6.82, + "learning_rate": 6.8210572638759e-08, + "loss": 3.923, + "step": 620000 + }, + { + "epoch": 6.83, + "learning_rate": 6.826558116508059e-08, + "loss": 3.9316, + "step": 620500 + }, + { + "epoch": 6.83, + "learning_rate": 6.832058969140217e-08, + "loss": 3.9293, + "step": 621000 + }, + { + "epoch": 6.84, + "learning_rate": 6.837559821772374e-08, + "loss": 3.9369, + "step": 621500 + }, + { + "epoch": 6.84, + "learning_rate": 6.843060674404532e-08, + "loss": 3.9225, + "step": 622000 + }, + { + "epoch": 6.85, + "learning_rate": 6.848561527036691e-08, + "loss": 3.9419, + "step": 622500 + }, + { + "epoch": 6.85, + "learning_rate": 6.854062379668849e-08, + "loss": 3.9314, + "step": 623000 + }, + { + "epoch": 6.86, + "learning_rate": 6.859563232301006e-08, + "loss": 3.9441, + "step": 623500 + }, + { + "epoch": 6.87, + "learning_rate": 6.865064084933164e-08, + "loss": 3.9144, + "step": 624000 + }, + { + "epoch": 6.87, + "learning_rate": 6.870564937565321e-08, + "loss": 3.9274, + "step": 624500 + }, + { + "epoch": 6.88, + "learning_rate": 6.87606579019748e-08, + "loss": 3.9134, + "step": 625000 + }, + { + "epoch": 6.88, + "learning_rate": 6.881566642829638e-08, + "loss": 3.9179, + "step": 625500 + }, + { + "epoch": 6.89, + "learning_rate": 6.887067495461795e-08, + "loss": 3.9024, + "step": 626000 + }, + { + "epoch": 6.89, + "learning_rate": 6.892568348093954e-08, + "loss": 3.9225, + "step": 626500 + }, + { + "epoch": 6.9, + "learning_rate": 6.898069200726112e-08, + "loss": 3.9324, + "step": 627000 + }, + { + "epoch": 6.9, + "learning_rate": 6.90357005335827e-08, + "loss": 3.9262, + "step": 627500 + }, + { + "epoch": 6.91, + "learning_rate": 6.909070905990428e-08, + "loss": 3.9258, + "step": 628000 + }, + { + "epoch": 6.91, + "learning_rate": 6.914571758622586e-08, + "loss": 3.9372, + "step": 628500 + }, + { + "epoch": 6.92, + "learning_rate": 6.920072611254744e-08, + "loss": 3.9198, + "step": 629000 + }, + { + "epoch": 6.93, + "learning_rate": 6.925573463886902e-08, + "loss": 3.9342, + "step": 629500 + }, + { + "epoch": 6.93, + "learning_rate": 6.93107431651906e-08, + "loss": 3.9058, + "step": 630000 + }, + { + "epoch": 6.94, + "learning_rate": 6.936575169151219e-08, + "loss": 3.9207, + "step": 630500 + }, + { + "epoch": 6.94, + "learning_rate": 6.942076021783377e-08, + "loss": 3.9388, + "step": 631000 + }, + { + "epoch": 6.95, + "learning_rate": 6.947576874415534e-08, + "loss": 3.9307, + "step": 631500 + }, + { + "epoch": 6.95, + "learning_rate": 6.953077727047693e-08, + "loss": 3.9357, + "step": 632000 + }, + { + "epoch": 6.96, + "learning_rate": 6.95857857967985e-08, + "loss": 3.9173, + "step": 632500 + }, + { + "epoch": 6.96, + "learning_rate": 6.964079432312008e-08, + "loss": 3.9104, + "step": 633000 + }, + { + "epoch": 6.97, + "learning_rate": 6.969580284944167e-08, + "loss": 3.9464, + "step": 633500 + }, + { + "epoch": 6.98, + "learning_rate": 6.975081137576325e-08, + "loss": 3.9087, + "step": 634000 + }, + { + "epoch": 6.98, + "learning_rate": 6.980581990208482e-08, + "loss": 3.9162, + "step": 634500 + }, + { + "epoch": 6.99, + "learning_rate": 6.98608284284064e-08, + "loss": 3.9259, + "step": 635000 + }, + { + "epoch": 6.99, + "learning_rate": 6.991583695472797e-08, + "loss": 3.9292, + "step": 635500 + }, + { + "epoch": 7.0, + "learning_rate": 6.997084548104955e-08, + "loss": 3.9056, + "step": 636000 + }, + { + "epoch": 7.0, + "eval_loss": 3.940394878387451, + "eval_runtime": 6.1436, + "eval_samples_per_second": 252.945, + "step": 636265 + }, + { + "epoch": 7.0, + "learning_rate": 7.002585400737114e-08, + "loss": 3.9303, + "step": 636500 + }, + { + "epoch": 7.01, + "learning_rate": 7.008086253369272e-08, + "loss": 3.9269, + "step": 637000 + }, + { + "epoch": 7.01, + "learning_rate": 7.013587106001429e-08, + "loss": 3.9401, + "step": 637500 + }, + { + "epoch": 7.02, + "learning_rate": 7.019087958633588e-08, + "loss": 3.9193, + "step": 638000 + }, + { + "epoch": 7.02, + "learning_rate": 7.024588811265746e-08, + "loss": 3.9222, + "step": 638500 + }, + { + "epoch": 7.03, + "learning_rate": 7.030089663897903e-08, + "loss": 3.9185, + "step": 639000 + }, + { + "epoch": 7.04, + "learning_rate": 7.035590516530062e-08, + "loss": 3.9251, + "step": 639500 + }, + { + "epoch": 7.04, + "learning_rate": 7.04109136916222e-08, + "loss": 3.9128, + "step": 640000 + }, + { + "epoch": 7.05, + "learning_rate": 7.046592221794377e-08, + "loss": 3.9191, + "step": 640500 + }, + { + "epoch": 7.05, + "learning_rate": 7.052093074426536e-08, + "loss": 3.9081, + "step": 641000 + }, + { + "epoch": 7.06, + "learning_rate": 7.057593927058694e-08, + "loss": 3.9258, + "step": 641500 + }, + { + "epoch": 7.06, + "learning_rate": 7.063094779690851e-08, + "loss": 3.9314, + "step": 642000 + }, + { + "epoch": 7.07, + "learning_rate": 7.06859563232301e-08, + "loss": 3.9366, + "step": 642500 + }, + { + "epoch": 7.07, + "learning_rate": 7.074096484955168e-08, + "loss": 3.9039, + "step": 643000 + }, + { + "epoch": 7.08, + "learning_rate": 7.079597337587325e-08, + "loss": 3.918, + "step": 643500 + }, + { + "epoch": 7.09, + "learning_rate": 7.085098190219484e-08, + "loss": 3.9195, + "step": 644000 + }, + { + "epoch": 7.09, + "learning_rate": 7.090599042851642e-08, + "loss": 3.9363, + "step": 644500 + }, + { + "epoch": 7.1, + "learning_rate": 7.0960998954838e-08, + "loss": 3.9192, + "step": 645000 + }, + { + "epoch": 7.1, + "learning_rate": 7.101600748115958e-08, + "loss": 3.9334, + "step": 645500 + }, + { + "epoch": 7.11, + "learning_rate": 7.107101600748116e-08, + "loss": 3.9039, + "step": 646000 + }, + { + "epoch": 7.11, + "learning_rate": 7.112602453380274e-08, + "loss": 3.9188, + "step": 646500 + }, + { + "epoch": 7.12, + "learning_rate": 7.118103306012431e-08, + "loss": 3.9307, + "step": 647000 + }, + { + "epoch": 7.12, + "learning_rate": 7.123604158644589e-08, + "loss": 3.9368, + "step": 647500 + }, + { + "epoch": 7.13, + "learning_rate": 7.129105011276748e-08, + "loss": 3.9192, + "step": 648000 + }, + { + "epoch": 7.13, + "learning_rate": 7.134605863908905e-08, + "loss": 3.9148, + "step": 648500 + }, + { + "epoch": 7.14, + "learning_rate": 7.140106716541063e-08, + "loss": 3.9037, + "step": 649000 + }, + { + "epoch": 7.15, + "learning_rate": 7.145607569173222e-08, + "loss": 3.901, + "step": 649500 + }, + { + "epoch": 7.15, + "learning_rate": 7.15110842180538e-08, + "loss": 3.9074, + "step": 650000 + }, + { + "epoch": 7.16, + "learning_rate": 7.156609274437537e-08, + "loss": 3.9267, + "step": 650500 + }, + { + "epoch": 7.16, + "learning_rate": 7.162110127069696e-08, + "loss": 3.9173, + "step": 651000 + }, + { + "epoch": 7.17, + "learning_rate": 7.167610979701853e-08, + "loss": 3.9384, + "step": 651500 + }, + { + "epoch": 7.17, + "learning_rate": 7.173111832334011e-08, + "loss": 3.9389, + "step": 652000 + }, + { + "epoch": 7.18, + "learning_rate": 7.17861268496617e-08, + "loss": 3.9181, + "step": 652500 + }, + { + "epoch": 7.18, + "learning_rate": 7.184113537598328e-08, + "loss": 3.9165, + "step": 653000 + }, + { + "epoch": 7.19, + "learning_rate": 7.189614390230485e-08, + "loss": 3.8961, + "step": 653500 + }, + { + "epoch": 7.2, + "learning_rate": 7.195115242862644e-08, + "loss": 3.9135, + "step": 654000 + }, + { + "epoch": 7.2, + "learning_rate": 7.200616095494802e-08, + "loss": 3.9025, + "step": 654500 + }, + { + "epoch": 7.21, + "learning_rate": 7.206116948126959e-08, + "loss": 3.9301, + "step": 655000 + }, + { + "epoch": 7.21, + "learning_rate": 7.211617800759118e-08, + "loss": 3.8983, + "step": 655500 + }, + { + "epoch": 7.22, + "learning_rate": 7.217118653391276e-08, + "loss": 3.924, + "step": 656000 + }, + { + "epoch": 7.22, + "learning_rate": 7.222619506023433e-08, + "loss": 3.9168, + "step": 656500 + }, + { + "epoch": 7.23, + "learning_rate": 7.228120358655592e-08, + "loss": 3.9028, + "step": 657000 + }, + { + "epoch": 7.23, + "learning_rate": 7.23362121128775e-08, + "loss": 3.9169, + "step": 657500 + }, + { + "epoch": 7.24, + "learning_rate": 7.239122063919907e-08, + "loss": 3.906, + "step": 658000 + }, + { + "epoch": 7.24, + "learning_rate": 7.244622916552065e-08, + "loss": 3.9178, + "step": 658500 + }, + { + "epoch": 7.25, + "learning_rate": 7.250123769184223e-08, + "loss": 3.923, + "step": 659000 + }, + { + "epoch": 7.26, + "learning_rate": 7.25562462181638e-08, + "loss": 3.9157, + "step": 659500 + }, + { + "epoch": 7.26, + "learning_rate": 7.261125474448539e-08, + "loss": 3.9085, + "step": 660000 + }, + { + "epoch": 7.27, + "learning_rate": 7.266626327080697e-08, + "loss": 3.9407, + "step": 660500 + }, + { + "epoch": 7.27, + "learning_rate": 7.272127179712854e-08, + "loss": 3.9164, + "step": 661000 + }, + { + "epoch": 7.28, + "learning_rate": 7.277628032345013e-08, + "loss": 3.9124, + "step": 661500 + }, + { + "epoch": 7.28, + "learning_rate": 7.283128884977171e-08, + "loss": 3.9136, + "step": 662000 + }, + { + "epoch": 7.29, + "learning_rate": 7.288629737609328e-08, + "loss": 3.9222, + "step": 662500 + }, + { + "epoch": 7.29, + "learning_rate": 7.294130590241487e-08, + "loss": 3.9177, + "step": 663000 + }, + { + "epoch": 7.3, + "learning_rate": 7.299631442873645e-08, + "loss": 3.9184, + "step": 663500 + }, + { + "epoch": 7.31, + "learning_rate": 7.305132295505802e-08, + "loss": 3.9249, + "step": 664000 + }, + { + "epoch": 7.31, + "learning_rate": 7.310633148137961e-08, + "loss": 3.9227, + "step": 664500 + }, + { + "epoch": 7.32, + "learning_rate": 7.316134000770119e-08, + "loss": 3.9117, + "step": 665000 + }, + { + "epoch": 7.32, + "learning_rate": 7.321634853402276e-08, + "loss": 3.9063, + "step": 665500 + }, + { + "epoch": 7.33, + "learning_rate": 7.327135706034435e-08, + "loss": 3.9121, + "step": 666000 + }, + { + "epoch": 7.33, + "learning_rate": 7.332636558666593e-08, + "loss": 3.9133, + "step": 666500 + }, + { + "epoch": 7.34, + "learning_rate": 7.338137411298752e-08, + "loss": 3.9194, + "step": 667000 + }, + { + "epoch": 7.34, + "learning_rate": 7.34363826393091e-08, + "loss": 3.9119, + "step": 667500 + }, + { + "epoch": 7.35, + "learning_rate": 7.349139116563067e-08, + "loss": 3.9068, + "step": 668000 + }, + { + "epoch": 7.35, + "learning_rate": 7.354639969195226e-08, + "loss": 3.8997, + "step": 668500 + }, + { + "epoch": 7.36, + "learning_rate": 7.360140821827384e-08, + "loss": 3.9227, + "step": 669000 + }, + { + "epoch": 7.37, + "learning_rate": 7.365641674459541e-08, + "loss": 3.9088, + "step": 669500 + }, + { + "epoch": 7.37, + "learning_rate": 7.371142527091699e-08, + "loss": 3.8975, + "step": 670000 + }, + { + "epoch": 7.38, + "learning_rate": 7.376643379723858e-08, + "loss": 3.9216, + "step": 670500 + }, + { + "epoch": 7.38, + "learning_rate": 7.382144232356015e-08, + "loss": 3.9021, + "step": 671000 + }, + { + "epoch": 7.39, + "learning_rate": 7.387645084988173e-08, + "loss": 3.9231, + "step": 671500 + }, + { + "epoch": 7.39, + "learning_rate": 7.39314593762033e-08, + "loss": 3.9226, + "step": 672000 + }, + { + "epoch": 7.4, + "learning_rate": 7.398646790252488e-08, + "loss": 3.9387, + "step": 672500 + }, + { + "epoch": 7.4, + "learning_rate": 7.404147642884647e-08, + "loss": 3.9327, + "step": 673000 + }, + { + "epoch": 7.41, + "learning_rate": 7.409648495516804e-08, + "loss": 3.9036, + "step": 673500 + }, + { + "epoch": 7.42, + "learning_rate": 7.415149348148962e-08, + "loss": 3.9243, + "step": 674000 + }, + { + "epoch": 7.42, + "learning_rate": 7.420650200781121e-08, + "loss": 3.9086, + "step": 674500 + }, + { + "epoch": 7.43, + "learning_rate": 7.426151053413279e-08, + "loss": 3.9141, + "step": 675000 + }, + { + "epoch": 7.43, + "learning_rate": 7.431651906045436e-08, + "loss": 3.9254, + "step": 675500 + }, + { + "epoch": 7.44, + "learning_rate": 7.437152758677595e-08, + "loss": 3.906, + "step": 676000 + }, + { + "epoch": 7.44, + "learning_rate": 7.442653611309753e-08, + "loss": 3.9208, + "step": 676500 + }, + { + "epoch": 7.45, + "learning_rate": 7.44815446394191e-08, + "loss": 3.9241, + "step": 677000 + }, + { + "epoch": 7.45, + "learning_rate": 7.453655316574069e-08, + "loss": 3.9148, + "step": 677500 + }, + { + "epoch": 7.46, + "learning_rate": 7.459156169206227e-08, + "loss": 3.9087, + "step": 678000 + }, + { + "epoch": 7.46, + "learning_rate": 7.464657021838384e-08, + "loss": 3.9096, + "step": 678500 + }, + { + "epoch": 7.47, + "learning_rate": 7.470157874470543e-08, + "loss": 3.9391, + "step": 679000 + }, + { + "epoch": 7.48, + "learning_rate": 7.475658727102701e-08, + "loss": 3.9227, + "step": 679500 + }, + { + "epoch": 7.48, + "learning_rate": 7.481159579734858e-08, + "loss": 3.9108, + "step": 680000 + }, + { + "epoch": 7.49, + "learning_rate": 7.486660432367017e-08, + "loss": 3.9147, + "step": 680500 + }, + { + "epoch": 7.49, + "learning_rate": 7.492161284999175e-08, + "loss": 3.9291, + "step": 681000 + }, + { + "epoch": 7.5, + "learning_rate": 7.497662137631332e-08, + "loss": 3.9131, + "step": 681500 + }, + { + "epoch": 7.5, + "learning_rate": 7.503162990263491e-08, + "loss": 3.9299, + "step": 682000 + }, + { + "epoch": 7.51, + "learning_rate": 7.508663842895649e-08, + "loss": 3.9156, + "step": 682500 + }, + { + "epoch": 7.51, + "learning_rate": 7.514164695527807e-08, + "loss": 3.9146, + "step": 683000 + }, + { + "epoch": 7.52, + "learning_rate": 7.519665548159964e-08, + "loss": 3.9249, + "step": 683500 + }, + { + "epoch": 7.53, + "learning_rate": 7.525166400792122e-08, + "loss": 3.9217, + "step": 684000 + }, + { + "epoch": 7.53, + "learning_rate": 7.530667253424279e-08, + "loss": 3.8964, + "step": 684500 + }, + { + "epoch": 7.54, + "learning_rate": 7.536168106056438e-08, + "loss": 3.9092, + "step": 685000 + }, + { + "epoch": 7.54, + "learning_rate": 7.541668958688596e-08, + "loss": 3.8986, + "step": 685500 + }, + { + "epoch": 7.55, + "learning_rate": 7.547169811320755e-08, + "loss": 3.9074, + "step": 686000 + }, + { + "epoch": 7.55, + "learning_rate": 7.552670663952912e-08, + "loss": 3.9234, + "step": 686500 + }, + { + "epoch": 7.56, + "learning_rate": 7.55817151658507e-08, + "loss": 3.8932, + "step": 687000 + }, + { + "epoch": 7.56, + "learning_rate": 7.563672369217229e-08, + "loss": 3.9228, + "step": 687500 + }, + { + "epoch": 7.57, + "learning_rate": 7.569173221849386e-08, + "loss": 3.9148, + "step": 688000 + }, + { + "epoch": 7.57, + "learning_rate": 7.574674074481544e-08, + "loss": 3.9152, + "step": 688500 + }, + { + "epoch": 7.58, + "learning_rate": 7.580174927113703e-08, + "loss": 3.8918, + "step": 689000 + }, + { + "epoch": 7.59, + "learning_rate": 7.58567577974586e-08, + "loss": 3.921, + "step": 689500 + }, + { + "epoch": 7.59, + "learning_rate": 7.591176632378018e-08, + "loss": 3.8818, + "step": 690000 + }, + { + "epoch": 7.6, + "learning_rate": 7.596677485010177e-08, + "loss": 3.93, + "step": 690500 + }, + { + "epoch": 7.6, + "learning_rate": 7.602178337642335e-08, + "loss": 3.9163, + "step": 691000 + }, + { + "epoch": 7.61, + "learning_rate": 7.607679190274492e-08, + "loss": 3.8964, + "step": 691500 + }, + { + "epoch": 7.61, + "learning_rate": 7.613180042906651e-08, + "loss": 3.9159, + "step": 692000 + }, + { + "epoch": 7.62, + "learning_rate": 7.618680895538809e-08, + "loss": 3.9261, + "step": 692500 + }, + { + "epoch": 7.62, + "learning_rate": 7.624181748170966e-08, + "loss": 3.9159, + "step": 693000 + }, + { + "epoch": 7.63, + "learning_rate": 7.629682600803125e-08, + "loss": 3.9089, + "step": 693500 + }, + { + "epoch": 7.64, + "learning_rate": 7.635183453435283e-08, + "loss": 3.8955, + "step": 694000 + }, + { + "epoch": 7.64, + "learning_rate": 7.64068430606744e-08, + "loss": 3.9165, + "step": 694500 + }, + { + "epoch": 7.65, + "learning_rate": 7.646185158699598e-08, + "loss": 3.9015, + "step": 695000 + }, + { + "epoch": 7.65, + "learning_rate": 7.651686011331755e-08, + "loss": 3.9221, + "step": 695500 + }, + { + "epoch": 7.66, + "learning_rate": 7.657186863963913e-08, + "loss": 3.9051, + "step": 696000 + }, + { + "epoch": 7.66, + "learning_rate": 7.662687716596072e-08, + "loss": 3.8975, + "step": 696500 + }, + { + "epoch": 7.67, + "learning_rate": 7.66818856922823e-08, + "loss": 3.897, + "step": 697000 + }, + { + "epoch": 7.67, + "learning_rate": 7.673689421860387e-08, + "loss": 3.8943, + "step": 697500 + }, + { + "epoch": 7.68, + "learning_rate": 7.679190274492546e-08, + "loss": 3.9132, + "step": 698000 + }, + { + "epoch": 7.68, + "learning_rate": 7.684691127124704e-08, + "loss": 3.9125, + "step": 698500 + }, + { + "epoch": 7.69, + "learning_rate": 7.690191979756861e-08, + "loss": 3.8882, + "step": 699000 + }, + { + "epoch": 7.7, + "learning_rate": 7.69569283238902e-08, + "loss": 3.9115, + "step": 699500 + }, + { + "epoch": 7.7, + "learning_rate": 7.701193685021178e-08, + "loss": 3.9181, + "step": 700000 + }, + { + "epoch": 7.71, + "learning_rate": 7.706694537653335e-08, + "loss": 3.9306, + "step": 700500 + }, + { + "epoch": 7.71, + "learning_rate": 7.712195390285494e-08, + "loss": 3.8962, + "step": 701000 + }, + { + "epoch": 7.72, + "learning_rate": 7.717696242917652e-08, + "loss": 3.9223, + "step": 701500 + }, + { + "epoch": 7.72, + "learning_rate": 7.72319709554981e-08, + "loss": 3.8991, + "step": 702000 + }, + { + "epoch": 7.73, + "learning_rate": 7.728697948181968e-08, + "loss": 3.9178, + "step": 702500 + }, + { + "epoch": 7.73, + "learning_rate": 7.734198800814126e-08, + "loss": 3.9014, + "step": 703000 + }, + { + "epoch": 7.74, + "learning_rate": 7.739699653446283e-08, + "loss": 3.9259, + "step": 703500 + }, + { + "epoch": 7.75, + "learning_rate": 7.745200506078442e-08, + "loss": 3.9172, + "step": 704000 + }, + { + "epoch": 7.75, + "learning_rate": 7.7507013587106e-08, + "loss": 3.916, + "step": 704500 + }, + { + "epoch": 7.76, + "learning_rate": 7.756202211342759e-08, + "loss": 3.8928, + "step": 705000 + }, + { + "epoch": 7.76, + "learning_rate": 7.761703063974916e-08, + "loss": 3.8943, + "step": 705500 + }, + { + "epoch": 7.77, + "learning_rate": 7.767203916607074e-08, + "loss": 3.9075, + "step": 706000 + }, + { + "epoch": 7.77, + "learning_rate": 7.772704769239232e-08, + "loss": 3.9107, + "step": 706500 + }, + { + "epoch": 7.78, + "learning_rate": 7.778205621871389e-08, + "loss": 3.9118, + "step": 707000 + }, + { + "epoch": 7.78, + "learning_rate": 7.783706474503548e-08, + "loss": 3.9033, + "step": 707500 + }, + { + "epoch": 7.79, + "learning_rate": 7.789207327135706e-08, + "loss": 3.9055, + "step": 708000 + }, + { + "epoch": 7.79, + "learning_rate": 7.794708179767863e-08, + "loss": 3.9084, + "step": 708500 + }, + { + "epoch": 7.8, + "learning_rate": 7.800209032400021e-08, + "loss": 3.9016, + "step": 709000 + }, + { + "epoch": 7.81, + "learning_rate": 7.80570988503218e-08, + "loss": 3.9138, + "step": 709500 + }, + { + "epoch": 7.81, + "learning_rate": 7.811210737664337e-08, + "loss": 3.9096, + "step": 710000 + }, + { + "epoch": 7.82, + "learning_rate": 7.816711590296495e-08, + "loss": 3.8901, + "step": 710500 + }, + { + "epoch": 7.82, + "learning_rate": 7.822212442928654e-08, + "loss": 3.906, + "step": 711000 + }, + { + "epoch": 7.83, + "learning_rate": 7.827713295560811e-08, + "loss": 3.8939, + "step": 711500 + }, + { + "epoch": 7.83, + "learning_rate": 7.833214148192969e-08, + "loss": 3.9009, + "step": 712000 + }, + { + "epoch": 7.84, + "learning_rate": 7.838715000825128e-08, + "loss": 3.9171, + "step": 712500 + }, + { + "epoch": 7.84, + "learning_rate": 7.844215853457286e-08, + "loss": 3.9023, + "step": 713000 + }, + { + "epoch": 7.85, + "learning_rate": 7.849716706089443e-08, + "loss": 3.9002, + "step": 713500 + }, + { + "epoch": 7.86, + "learning_rate": 7.855217558721602e-08, + "loss": 3.9156, + "step": 714000 + }, + { + "epoch": 7.86, + "learning_rate": 7.86071841135376e-08, + "loss": 3.9115, + "step": 714500 + }, + { + "epoch": 7.87, + "learning_rate": 7.866219263985917e-08, + "loss": 3.9076, + "step": 715000 + }, + { + "epoch": 7.87, + "learning_rate": 7.871720116618076e-08, + "loss": 3.8795, + "step": 715500 + }, + { + "epoch": 7.88, + "learning_rate": 7.877220969250234e-08, + "loss": 3.9118, + "step": 716000 + }, + { + "epoch": 7.88, + "learning_rate": 7.882721821882391e-08, + "loss": 3.8955, + "step": 716500 + }, + { + "epoch": 7.89, + "learning_rate": 7.88822267451455e-08, + "loss": 3.9018, + "step": 717000 + }, + { + "epoch": 7.89, + "learning_rate": 7.893723527146708e-08, + "loss": 3.9139, + "step": 717500 + }, + { + "epoch": 7.9, + "learning_rate": 7.899224379778865e-08, + "loss": 3.9105, + "step": 718000 + }, + { + "epoch": 7.9, + "learning_rate": 7.904725232411024e-08, + "loss": 3.9065, + "step": 718500 + }, + { + "epoch": 7.91, + "learning_rate": 7.910226085043182e-08, + "loss": 3.9276, + "step": 719000 + }, + { + "epoch": 7.92, + "learning_rate": 7.91572693767534e-08, + "loss": 3.9266, + "step": 719500 + }, + { + "epoch": 7.92, + "learning_rate": 7.921227790307497e-08, + "loss": 3.9034, + "step": 720000 + }, + { + "epoch": 7.93, + "learning_rate": 7.926728642939655e-08, + "loss": 3.9024, + "step": 720500 + }, + { + "epoch": 7.93, + "learning_rate": 7.932229495571812e-08, + "loss": 3.9129, + "step": 721000 + }, + { + "epoch": 7.94, + "learning_rate": 7.937730348203971e-08, + "loss": 3.8932, + "step": 721500 + }, + { + "epoch": 7.94, + "learning_rate": 7.943231200836129e-08, + "loss": 3.8973, + "step": 722000 + }, + { + "epoch": 7.95, + "learning_rate": 7.948732053468286e-08, + "loss": 3.8973, + "step": 722500 + }, + { + "epoch": 7.95, + "learning_rate": 7.954232906100445e-08, + "loss": 3.9007, + "step": 723000 + }, + { + "epoch": 7.96, + "learning_rate": 7.959733758732603e-08, + "loss": 3.8944, + "step": 723500 + }, + { + "epoch": 7.97, + "learning_rate": 7.965234611364762e-08, + "loss": 3.9182, + "step": 724000 + }, + { + "epoch": 7.97, + "learning_rate": 7.970735463996919e-08, + "loss": 3.8951, + "step": 724500 + }, + { + "epoch": 7.98, + "learning_rate": 7.976236316629077e-08, + "loss": 3.9124, + "step": 725000 + }, + { + "epoch": 7.98, + "learning_rate": 7.981737169261236e-08, + "loss": 3.9019, + "step": 725500 + }, + { + "epoch": 7.99, + "learning_rate": 7.987238021893393e-08, + "loss": 3.8971, + "step": 726000 + }, + { + "epoch": 7.99, + "learning_rate": 7.992738874525551e-08, + "loss": 3.9142, + "step": 726500 + }, + { + "epoch": 8.0, + "learning_rate": 7.99823972715771e-08, + "loss": 3.8871, + "step": 727000 + }, + { + "epoch": 8.0, + "eval_loss": 3.926957845687866, + "eval_runtime": 6.1331, + "eval_samples_per_second": 253.379, + "step": 727160 + }, + { + "epoch": 8.0, + "learning_rate": 8.003740579789867e-08, + "loss": 3.8918, + "step": 727500 + }, + { + "epoch": 8.01, + "learning_rate": 8.009241432422025e-08, + "loss": 3.9025, + "step": 728000 + }, + { + "epoch": 8.01, + "learning_rate": 8.014742285054184e-08, + "loss": 3.9083, + "step": 728500 + }, + { + "epoch": 8.02, + "learning_rate": 8.020243137686342e-08, + "loss": 3.9043, + "step": 729000 + }, + { + "epoch": 8.03, + "learning_rate": 8.025743990318499e-08, + "loss": 3.8933, + "step": 729500 + }, + { + "epoch": 8.03, + "learning_rate": 8.031244842950658e-08, + "loss": 3.9053, + "step": 730000 + }, + { + "epoch": 8.04, + "learning_rate": 8.036745695582816e-08, + "loss": 3.9155, + "step": 730500 + }, + { + "epoch": 8.04, + "learning_rate": 8.042246548214973e-08, + "loss": 3.9173, + "step": 731000 + }, + { + "epoch": 8.05, + "learning_rate": 8.047747400847131e-08, + "loss": 3.9124, + "step": 731500 + }, + { + "epoch": 8.05, + "learning_rate": 8.053248253479288e-08, + "loss": 3.913, + "step": 732000 + }, + { + "epoch": 8.06, + "learning_rate": 8.058749106111446e-08, + "loss": 3.9008, + "step": 732500 + }, + { + "epoch": 8.06, + "learning_rate": 8.064249958743605e-08, + "loss": 3.901, + "step": 733000 + }, + { + "epoch": 8.07, + "learning_rate": 8.069750811375762e-08, + "loss": 3.88, + "step": 733500 + }, + { + "epoch": 8.08, + "learning_rate": 8.07525166400792e-08, + "loss": 3.9202, + "step": 734000 + }, + { + "epoch": 8.08, + "learning_rate": 8.080752516640079e-08, + "loss": 3.9036, + "step": 734500 + }, + { + "epoch": 8.09, + "learning_rate": 8.086253369272237e-08, + "loss": 3.8783, + "step": 735000 + }, + { + "epoch": 8.09, + "learning_rate": 8.091754221904394e-08, + "loss": 3.9001, + "step": 735500 + }, + { + "epoch": 8.1, + "learning_rate": 8.097255074536553e-08, + "loss": 3.8894, + "step": 736000 + }, + { + "epoch": 8.1, + "learning_rate": 8.10275592716871e-08, + "loss": 3.8991, + "step": 736500 + }, + { + "epoch": 8.11, + "learning_rate": 8.108256779800868e-08, + "loss": 3.894, + "step": 737000 + }, + { + "epoch": 8.11, + "learning_rate": 8.113757632433027e-08, + "loss": 3.8955, + "step": 737500 + }, + { + "epoch": 8.12, + "learning_rate": 8.119258485065185e-08, + "loss": 3.9309, + "step": 738000 + }, + { + "epoch": 8.12, + "learning_rate": 8.124759337697342e-08, + "loss": 3.8995, + "step": 738500 + }, + { + "epoch": 8.13, + "learning_rate": 8.130260190329501e-08, + "loss": 3.9121, + "step": 739000 + }, + { + "epoch": 8.14, + "learning_rate": 8.135761042961659e-08, + "loss": 3.8867, + "step": 739500 + }, + { + "epoch": 8.14, + "learning_rate": 8.141261895593816e-08, + "loss": 3.9147, + "step": 740000 + }, + { + "epoch": 8.15, + "learning_rate": 8.146762748225975e-08, + "loss": 3.8934, + "step": 740500 + }, + { + "epoch": 8.15, + "learning_rate": 8.152263600858133e-08, + "loss": 3.9009, + "step": 741000 + }, + { + "epoch": 8.16, + "learning_rate": 8.15776445349029e-08, + "loss": 3.8814, + "step": 741500 + }, + { + "epoch": 8.16, + "learning_rate": 8.16326530612245e-08, + "loss": 3.8974, + "step": 742000 + }, + { + "epoch": 8.17, + "learning_rate": 8.168766158754607e-08, + "loss": 3.8935, + "step": 742500 + }, + { + "epoch": 8.17, + "learning_rate": 8.174267011386765e-08, + "loss": 3.8936, + "step": 743000 + }, + { + "epoch": 8.18, + "learning_rate": 8.179767864018922e-08, + "loss": 3.9022, + "step": 743500 + }, + { + "epoch": 8.19, + "learning_rate": 8.18526871665108e-08, + "loss": 3.8802, + "step": 744000 + }, + { + "epoch": 8.19, + "learning_rate": 8.190769569283239e-08, + "loss": 3.9066, + "step": 744500 + }, + { + "epoch": 8.2, + "learning_rate": 8.196270421915396e-08, + "loss": 3.886, + "step": 745000 + }, + { + "epoch": 8.2, + "learning_rate": 8.201771274547554e-08, + "loss": 3.8857, + "step": 745500 + }, + { + "epoch": 8.21, + "learning_rate": 8.207272127179713e-08, + "loss": 3.9002, + "step": 746000 + }, + { + "epoch": 8.21, + "learning_rate": 8.21277297981187e-08, + "loss": 3.8968, + "step": 746500 + }, + { + "epoch": 8.22, + "learning_rate": 8.218273832444028e-08, + "loss": 3.8912, + "step": 747000 + }, + { + "epoch": 8.22, + "learning_rate": 8.223774685076187e-08, + "loss": 3.9007, + "step": 747500 + }, + { + "epoch": 8.23, + "learning_rate": 8.229275537708344e-08, + "loss": 3.9005, + "step": 748000 + }, + { + "epoch": 8.23, + "learning_rate": 8.234776390340502e-08, + "loss": 3.8994, + "step": 748500 + }, + { + "epoch": 8.24, + "learning_rate": 8.240277242972661e-08, + "loss": 3.8879, + "step": 749000 + }, + { + "epoch": 8.25, + "learning_rate": 8.245778095604818e-08, + "loss": 3.8951, + "step": 749500 + }, + { + "epoch": 8.25, + "learning_rate": 8.251278948236976e-08, + "loss": 3.9098, + "step": 750000 + }, + { + "epoch": 8.26, + "learning_rate": 8.256779800869135e-08, + "loss": 3.8986, + "step": 750500 + }, + { + "epoch": 8.26, + "learning_rate": 8.262280653501293e-08, + "loss": 3.8956, + "step": 751000 + }, + { + "epoch": 8.27, + "learning_rate": 8.26778150613345e-08, + "loss": 3.8887, + "step": 751500 + }, + { + "epoch": 8.27, + "learning_rate": 8.273282358765609e-08, + "loss": 3.9047, + "step": 752000 + }, + { + "epoch": 8.28, + "learning_rate": 8.278783211397767e-08, + "loss": 3.8919, + "step": 752500 + }, + { + "epoch": 8.28, + "learning_rate": 8.284284064029924e-08, + "loss": 3.9062, + "step": 753000 + }, + { + "epoch": 8.29, + "learning_rate": 8.289784916662083e-08, + "loss": 3.8922, + "step": 753500 + }, + { + "epoch": 8.3, + "learning_rate": 8.295285769294241e-08, + "loss": 3.8911, + "step": 754000 + }, + { + "epoch": 8.3, + "learning_rate": 8.300786621926398e-08, + "loss": 3.8721, + "step": 754500 + }, + { + "epoch": 8.31, + "learning_rate": 8.306287474558556e-08, + "loss": 3.9009, + "step": 755000 + }, + { + "epoch": 8.31, + "learning_rate": 8.311788327190715e-08, + "loss": 3.8953, + "step": 755500 + }, + { + "epoch": 8.32, + "learning_rate": 8.317289179822872e-08, + "loss": 3.9044, + "step": 756000 + }, + { + "epoch": 8.32, + "learning_rate": 8.32279003245503e-08, + "loss": 3.9114, + "step": 756500 + }, + { + "epoch": 8.33, + "learning_rate": 8.328290885087188e-08, + "loss": 3.8909, + "step": 757000 + }, + { + "epoch": 8.33, + "learning_rate": 8.333791737719345e-08, + "loss": 3.9084, + "step": 757500 + }, + { + "epoch": 8.34, + "learning_rate": 8.339292590351504e-08, + "loss": 3.9077, + "step": 758000 + }, + { + "epoch": 8.34, + "learning_rate": 8.344793442983662e-08, + "loss": 3.8981, + "step": 758500 + }, + { + "epoch": 8.35, + "learning_rate": 8.350294295615819e-08, + "loss": 3.9014, + "step": 759000 + }, + { + "epoch": 8.36, + "learning_rate": 8.355795148247978e-08, + "loss": 3.882, + "step": 759500 + }, + { + "epoch": 8.36, + "learning_rate": 8.361296000880136e-08, + "loss": 3.8898, + "step": 760000 + }, + { + "epoch": 8.37, + "learning_rate": 8.366796853512293e-08, + "loss": 3.9083, + "step": 760500 + }, + { + "epoch": 8.37, + "learning_rate": 8.372297706144452e-08, + "loss": 3.8963, + "step": 761000 + }, + { + "epoch": 8.38, + "learning_rate": 8.37779855877661e-08, + "loss": 3.9029, + "step": 761500 + }, + { + "epoch": 8.38, + "learning_rate": 8.383299411408769e-08, + "loss": 3.9072, + "step": 762000 + }, + { + "epoch": 8.39, + "learning_rate": 8.388800264040926e-08, + "loss": 3.8948, + "step": 762500 + }, + { + "epoch": 8.39, + "learning_rate": 8.394301116673084e-08, + "loss": 3.8935, + "step": 763000 + }, + { + "epoch": 8.4, + "learning_rate": 8.399801969305243e-08, + "loss": 3.891, + "step": 763500 + }, + { + "epoch": 8.41, + "learning_rate": 8.4053028219374e-08, + "loss": 3.8859, + "step": 764000 + }, + { + "epoch": 8.41, + "learning_rate": 8.410803674569558e-08, + "loss": 3.8979, + "step": 764500 + }, + { + "epoch": 8.42, + "learning_rate": 8.416304527201717e-08, + "loss": 3.9019, + "step": 765000 + }, + { + "epoch": 8.42, + "learning_rate": 8.421805379833874e-08, + "loss": 3.9051, + "step": 765500 + }, + { + "epoch": 8.43, + "learning_rate": 8.427306232466032e-08, + "loss": 3.8916, + "step": 766000 + }, + { + "epoch": 8.43, + "learning_rate": 8.432807085098191e-08, + "loss": 3.9007, + "step": 766500 + }, + { + "epoch": 8.44, + "learning_rate": 8.438307937730348e-08, + "loss": 3.8985, + "step": 767000 + }, + { + "epoch": 8.44, + "learning_rate": 8.443808790362506e-08, + "loss": 3.8977, + "step": 767500 + }, + { + "epoch": 8.45, + "learning_rate": 8.449309642994664e-08, + "loss": 3.8982, + "step": 768000 + }, + { + "epoch": 8.45, + "learning_rate": 8.454810495626821e-08, + "loss": 3.9051, + "step": 768500 + }, + { + "epoch": 8.46, + "learning_rate": 8.460311348258979e-08, + "loss": 3.8998, + "step": 769000 + }, + { + "epoch": 8.47, + "learning_rate": 8.465812200891138e-08, + "loss": 3.8904, + "step": 769500 + }, + { + "epoch": 8.47, + "learning_rate": 8.471313053523295e-08, + "loss": 3.8885, + "step": 770000 + }, + { + "epoch": 8.48, + "learning_rate": 8.476813906155453e-08, + "loss": 3.8949, + "step": 770500 + }, + { + "epoch": 8.48, + "learning_rate": 8.482314758787612e-08, + "loss": 3.8934, + "step": 771000 + }, + { + "epoch": 8.49, + "learning_rate": 8.48781561141977e-08, + "loss": 3.896, + "step": 771500 + }, + { + "epoch": 8.49, + "learning_rate": 8.493316464051927e-08, + "loss": 3.8946, + "step": 772000 + }, + { + "epoch": 8.5, + "learning_rate": 8.498817316684086e-08, + "loss": 3.8903, + "step": 772500 + }, + { + "epoch": 8.5, + "learning_rate": 8.504318169316243e-08, + "loss": 3.9084, + "step": 773000 + }, + { + "epoch": 8.51, + "learning_rate": 8.509819021948401e-08, + "loss": 3.8922, + "step": 773500 + }, + { + "epoch": 8.52, + "learning_rate": 8.51531987458056e-08, + "loss": 3.9014, + "step": 774000 + }, + { + "epoch": 8.52, + "learning_rate": 8.520820727212718e-08, + "loss": 3.8909, + "step": 774500 + }, + { + "epoch": 8.53, + "learning_rate": 8.526321579844875e-08, + "loss": 3.893, + "step": 775000 + }, + { + "epoch": 8.53, + "learning_rate": 8.531822432477034e-08, + "loss": 3.8987, + "step": 775500 + }, + { + "epoch": 8.54, + "learning_rate": 8.537323285109192e-08, + "loss": 3.8829, + "step": 776000 + }, + { + "epoch": 8.54, + "learning_rate": 8.542824137741349e-08, + "loss": 3.8944, + "step": 776500 + }, + { + "epoch": 8.55, + "learning_rate": 8.548324990373508e-08, + "loss": 3.8891, + "step": 777000 + }, + { + "epoch": 8.55, + "learning_rate": 8.553825843005666e-08, + "loss": 3.8932, + "step": 777500 + }, + { + "epoch": 8.56, + "learning_rate": 8.559326695637823e-08, + "loss": 3.9029, + "step": 778000 + }, + { + "epoch": 8.56, + "learning_rate": 8.564827548269982e-08, + "loss": 3.8749, + "step": 778500 + }, + { + "epoch": 8.57, + "learning_rate": 8.57032840090214e-08, + "loss": 3.9014, + "step": 779000 + }, + { + "epoch": 8.58, + "learning_rate": 8.575829253534297e-08, + "loss": 3.8893, + "step": 779500 + }, + { + "epoch": 8.58, + "learning_rate": 8.581330106166455e-08, + "loss": 3.8894, + "step": 780000 + }, + { + "epoch": 8.59, + "learning_rate": 8.586830958798613e-08, + "loss": 3.9082, + "step": 780500 + }, + { + "epoch": 8.59, + "learning_rate": 8.592331811430771e-08, + "loss": 3.8854, + "step": 781000 + }, + { + "epoch": 8.6, + "learning_rate": 8.597832664062929e-08, + "loss": 3.8751, + "step": 781500 + }, + { + "epoch": 8.6, + "learning_rate": 8.603333516695087e-08, + "loss": 3.8986, + "step": 782000 + }, + { + "epoch": 8.61, + "learning_rate": 8.608834369327246e-08, + "loss": 3.889, + "step": 782500 + }, + { + "epoch": 8.61, + "learning_rate": 8.614335221959403e-08, + "loss": 3.9028, + "step": 783000 + }, + { + "epoch": 8.62, + "learning_rate": 8.619836074591561e-08, + "loss": 3.9117, + "step": 783500 + }, + { + "epoch": 8.63, + "learning_rate": 8.62533692722372e-08, + "loss": 3.8982, + "step": 784000 + }, + { + "epoch": 8.63, + "learning_rate": 8.630837779855877e-08, + "loss": 3.879, + "step": 784500 + }, + { + "epoch": 8.64, + "learning_rate": 8.636338632488035e-08, + "loss": 3.8861, + "step": 785000 + }, + { + "epoch": 8.64, + "learning_rate": 8.641839485120194e-08, + "loss": 3.8881, + "step": 785500 + }, + { + "epoch": 8.65, + "learning_rate": 8.647340337752351e-08, + "loss": 3.8935, + "step": 786000 + }, + { + "epoch": 8.65, + "learning_rate": 8.652841190384509e-08, + "loss": 3.8953, + "step": 786500 + }, + { + "epoch": 8.66, + "learning_rate": 8.658342043016668e-08, + "loss": 3.8813, + "step": 787000 + }, + { + "epoch": 8.66, + "learning_rate": 8.663842895648825e-08, + "loss": 3.9062, + "step": 787500 + }, + { + "epoch": 8.67, + "learning_rate": 8.669343748280983e-08, + "loss": 3.8921, + "step": 788000 + }, + { + "epoch": 8.67, + "learning_rate": 8.674844600913142e-08, + "loss": 3.8953, + "step": 788500 + }, + { + "epoch": 8.68, + "learning_rate": 8.6803454535453e-08, + "loss": 3.8898, + "step": 789000 + }, + { + "epoch": 8.69, + "learning_rate": 8.685846306177457e-08, + "loss": 3.8872, + "step": 789500 + }, + { + "epoch": 8.69, + "learning_rate": 8.691347158809616e-08, + "loss": 3.8914, + "step": 790000 + }, + { + "epoch": 8.7, + "learning_rate": 8.696848011441774e-08, + "loss": 3.8833, + "step": 790500 + }, + { + "epoch": 8.7, + "learning_rate": 8.702348864073931e-08, + "loss": 3.8932, + "step": 791000 + }, + { + "epoch": 8.71, + "learning_rate": 8.707849716706089e-08, + "loss": 3.8925, + "step": 791500 + }, + { + "epoch": 8.71, + "learning_rate": 8.713350569338246e-08, + "loss": 3.8878, + "step": 792000 + }, + { + "epoch": 8.72, + "learning_rate": 8.718851421970404e-08, + "loss": 3.9026, + "step": 792500 + }, + { + "epoch": 8.72, + "learning_rate": 8.724352274602563e-08, + "loss": 3.91, + "step": 793000 + }, + { + "epoch": 8.73, + "learning_rate": 8.72985312723472e-08, + "loss": 3.8778, + "step": 793500 + }, + { + "epoch": 8.74, + "learning_rate": 8.735353979866878e-08, + "loss": 3.8947, + "step": 794000 + }, + { + "epoch": 8.74, + "learning_rate": 8.740854832499037e-08, + "loss": 3.8945, + "step": 794500 + }, + { + "epoch": 8.75, + "learning_rate": 8.746355685131194e-08, + "loss": 3.8887, + "step": 795000 + }, + { + "epoch": 8.75, + "learning_rate": 8.751856537763352e-08, + "loss": 3.8829, + "step": 795500 + }, + { + "epoch": 8.76, + "learning_rate": 8.757357390395511e-08, + "loss": 3.8842, + "step": 796000 + }, + { + "epoch": 8.76, + "learning_rate": 8.762858243027669e-08, + "loss": 3.8862, + "step": 796500 + }, + { + "epoch": 8.77, + "learning_rate": 8.768359095659826e-08, + "loss": 3.8884, + "step": 797000 + }, + { + "epoch": 8.77, + "learning_rate": 8.773859948291985e-08, + "loss": 3.8705, + "step": 797500 + }, + { + "epoch": 8.78, + "learning_rate": 8.779360800924143e-08, + "loss": 3.8802, + "step": 798000 + }, + { + "epoch": 8.78, + "learning_rate": 8.784861653556302e-08, + "loss": 3.898, + "step": 798500 + }, + { + "epoch": 8.79, + "learning_rate": 8.790362506188459e-08, + "loss": 3.9, + "step": 799000 + }, + { + "epoch": 8.8, + "learning_rate": 8.795863358820617e-08, + "loss": 3.8913, + "step": 799500 + }, + { + "epoch": 8.8, + "learning_rate": 8.801364211452776e-08, + "loss": 3.9104, + "step": 800000 + }, + { + "epoch": 8.81, + "learning_rate": 8.806865064084933e-08, + "loss": 3.8795, + "step": 800500 + }, + { + "epoch": 8.81, + "learning_rate": 8.812365916717091e-08, + "loss": 3.8898, + "step": 801000 + }, + { + "epoch": 8.82, + "learning_rate": 8.81786676934925e-08, + "loss": 3.9109, + "step": 801500 + }, + { + "epoch": 8.82, + "learning_rate": 8.823367621981407e-08, + "loss": 3.8936, + "step": 802000 + }, + { + "epoch": 8.83, + "learning_rate": 8.828868474613565e-08, + "loss": 3.8818, + "step": 802500 + }, + { + "epoch": 8.83, + "learning_rate": 8.834369327245722e-08, + "loss": 3.8757, + "step": 803000 + }, + { + "epoch": 8.84, + "learning_rate": 8.839870179877881e-08, + "loss": 3.8839, + "step": 803500 + }, + { + "epoch": 8.85, + "learning_rate": 8.845371032510039e-08, + "loss": 3.8843, + "step": 804000 + }, + { + "epoch": 8.85, + "learning_rate": 8.850871885142197e-08, + "loss": 3.8792, + "step": 804500 + }, + { + "epoch": 8.86, + "learning_rate": 8.856372737774354e-08, + "loss": 3.8905, + "step": 805000 + }, + { + "epoch": 8.86, + "learning_rate": 8.861873590406512e-08, + "loss": 3.8775, + "step": 805500 + }, + { + "epoch": 8.87, + "learning_rate": 8.86737444303867e-08, + "loss": 3.879, + "step": 806000 + }, + { + "epoch": 8.87, + "learning_rate": 8.872875295670828e-08, + "loss": 3.8906, + "step": 806500 + }, + { + "epoch": 8.88, + "learning_rate": 8.878376148302986e-08, + "loss": 3.9063, + "step": 807000 + }, + { + "epoch": 8.88, + "learning_rate": 8.883877000935145e-08, + "loss": 3.8853, + "step": 807500 + }, + { + "epoch": 8.89, + "learning_rate": 8.889377853567302e-08, + "loss": 3.8906, + "step": 808000 + }, + { + "epoch": 8.89, + "learning_rate": 8.89487870619946e-08, + "loss": 3.8863, + "step": 808500 + }, + { + "epoch": 8.9, + "learning_rate": 8.900379558831619e-08, + "loss": 3.889, + "step": 809000 + }, + { + "epoch": 8.91, + "learning_rate": 8.905880411463776e-08, + "loss": 3.8813, + "step": 809500 + }, + { + "epoch": 8.91, + "learning_rate": 8.911381264095934e-08, + "loss": 3.8928, + "step": 810000 + }, + { + "epoch": 8.92, + "learning_rate": 8.916882116728093e-08, + "loss": 3.9089, + "step": 810500 + }, + { + "epoch": 8.92, + "learning_rate": 8.92238296936025e-08, + "loss": 3.8768, + "step": 811000 + }, + { + "epoch": 8.93, + "learning_rate": 8.927883821992408e-08, + "loss": 3.9027, + "step": 811500 + }, + { + "epoch": 8.93, + "learning_rate": 8.933384674624567e-08, + "loss": 3.8835, + "step": 812000 + }, + { + "epoch": 8.94, + "learning_rate": 8.938885527256725e-08, + "loss": 3.8869, + "step": 812500 + }, + { + "epoch": 8.94, + "learning_rate": 8.944386379888882e-08, + "loss": 3.8905, + "step": 813000 + }, + { + "epoch": 8.95, + "learning_rate": 8.949887232521041e-08, + "loss": 3.8784, + "step": 813500 + }, + { + "epoch": 8.96, + "learning_rate": 8.955388085153199e-08, + "loss": 3.8828, + "step": 814000 + }, + { + "epoch": 8.96, + "learning_rate": 8.960888937785356e-08, + "loss": 3.8996, + "step": 814500 + }, + { + "epoch": 8.97, + "learning_rate": 8.966389790417515e-08, + "loss": 3.9052, + "step": 815000 + }, + { + "epoch": 8.97, + "learning_rate": 8.971890643049673e-08, + "loss": 3.8879, + "step": 815500 + }, + { + "epoch": 8.98, + "learning_rate": 8.97739149568183e-08, + "loss": 3.8851, + "step": 816000 + }, + { + "epoch": 8.98, + "learning_rate": 8.982892348313988e-08, + "loss": 3.9055, + "step": 816500 + }, + { + "epoch": 8.99, + "learning_rate": 8.988393200946145e-08, + "loss": 3.8858, + "step": 817000 + }, + { + "epoch": 8.99, + "learning_rate": 8.993894053578304e-08, + "loss": 3.881, + "step": 817500 + }, + { + "epoch": 9.0, + "learning_rate": 8.999394906210462e-08, + "loss": 3.9195, + "step": 818000 + }, + { + "epoch": 9.0, + "eval_loss": 3.91526460647583, + "eval_runtime": 6.1324, + "eval_samples_per_second": 253.406, + "step": 818055 + }, + { + "epoch": 9.0, + "learning_rate": 9.00489575884262e-08, + "loss": 3.8931, + "step": 818500 + }, + { + "epoch": 9.01, + "learning_rate": 9.010396611474778e-08, + "loss": 3.8859, + "step": 819000 + }, + { + "epoch": 9.02, + "learning_rate": 9.015897464106936e-08, + "loss": 3.8897, + "step": 819500 + }, + { + "epoch": 9.02, + "learning_rate": 9.021398316739094e-08, + "loss": 3.8872, + "step": 820000 + }, + { + "epoch": 9.03, + "learning_rate": 9.026899169371253e-08, + "loss": 3.8912, + "step": 820500 + }, + { + "epoch": 9.03, + "learning_rate": 9.03240002200341e-08, + "loss": 3.8934, + "step": 821000 + }, + { + "epoch": 9.04, + "learning_rate": 9.037900874635568e-08, + "loss": 3.8843, + "step": 821500 + }, + { + "epoch": 9.04, + "learning_rate": 9.043401727267727e-08, + "loss": 3.8948, + "step": 822000 + }, + { + "epoch": 9.05, + "learning_rate": 9.048902579899884e-08, + "loss": 3.866, + "step": 822500 + }, + { + "epoch": 9.05, + "learning_rate": 9.054403432532042e-08, + "loss": 3.8823, + "step": 823000 + }, + { + "epoch": 9.06, + "learning_rate": 9.059904285164201e-08, + "loss": 3.8994, + "step": 823500 + }, + { + "epoch": 9.07, + "learning_rate": 9.065405137796358e-08, + "loss": 3.881, + "step": 824000 + }, + { + "epoch": 9.07, + "learning_rate": 9.070905990428516e-08, + "loss": 3.8865, + "step": 824500 + }, + { + "epoch": 9.08, + "learning_rate": 9.076406843060675e-08, + "loss": 3.8763, + "step": 825000 + }, + { + "epoch": 9.08, + "learning_rate": 9.081907695692832e-08, + "loss": 3.9057, + "step": 825500 + }, + { + "epoch": 9.09, + "learning_rate": 9.08740854832499e-08, + "loss": 3.8752, + "step": 826000 + }, + { + "epoch": 9.09, + "learning_rate": 9.092909400957149e-08, + "loss": 3.8823, + "step": 826500 + }, + { + "epoch": 9.1, + "learning_rate": 9.098410253589306e-08, + "loss": 3.8974, + "step": 827000 + }, + { + "epoch": 9.1, + "learning_rate": 9.103911106221464e-08, + "loss": 3.8731, + "step": 827500 + }, + { + "epoch": 9.11, + "learning_rate": 9.109411958853622e-08, + "loss": 3.887, + "step": 828000 + }, + { + "epoch": 9.11, + "learning_rate": 9.114912811485779e-08, + "loss": 3.8689, + "step": 828500 + }, + { + "epoch": 9.12, + "learning_rate": 9.120413664117937e-08, + "loss": 3.9069, + "step": 829000 + }, + { + "epoch": 9.13, + "learning_rate": 9.125914516750096e-08, + "loss": 3.8876, + "step": 829500 + }, + { + "epoch": 9.13, + "learning_rate": 9.131415369382253e-08, + "loss": 3.8935, + "step": 830000 + }, + { + "epoch": 9.14, + "learning_rate": 9.136916222014411e-08, + "loss": 3.8853, + "step": 830500 + }, + { + "epoch": 9.14, + "learning_rate": 9.14241707464657e-08, + "loss": 3.8828, + "step": 831000 + }, + { + "epoch": 9.15, + "learning_rate": 9.147917927278727e-08, + "loss": 3.8912, + "step": 831500 + }, + { + "epoch": 9.15, + "learning_rate": 9.153418779910885e-08, + "loss": 3.8987, + "step": 832000 + }, + { + "epoch": 9.16, + "learning_rate": 9.158919632543044e-08, + "loss": 3.8755, + "step": 832500 + }, + { + "epoch": 9.16, + "learning_rate": 9.164420485175201e-08, + "loss": 3.8698, + "step": 833000 + }, + { + "epoch": 9.17, + "learning_rate": 9.169921337807359e-08, + "loss": 3.8809, + "step": 833500 + }, + { + "epoch": 9.18, + "learning_rate": 9.175422190439518e-08, + "loss": 3.8716, + "step": 834000 + }, + { + "epoch": 9.18, + "learning_rate": 9.180923043071676e-08, + "loss": 3.8921, + "step": 834500 + }, + { + "epoch": 9.19, + "learning_rate": 9.186423895703833e-08, + "loss": 3.8844, + "step": 835000 + }, + { + "epoch": 9.19, + "learning_rate": 9.191924748335992e-08, + "loss": 3.879, + "step": 835500 + }, + { + "epoch": 9.2, + "learning_rate": 9.19742560096815e-08, + "loss": 3.8695, + "step": 836000 + }, + { + "epoch": 9.2, + "learning_rate": 9.202926453600309e-08, + "loss": 3.8923, + "step": 836500 + }, + { + "epoch": 9.21, + "learning_rate": 9.208427306232466e-08, + "loss": 3.8497, + "step": 837000 + }, + { + "epoch": 9.21, + "learning_rate": 9.213928158864624e-08, + "loss": 3.8885, + "step": 837500 + }, + { + "epoch": 9.22, + "learning_rate": 9.219429011496783e-08, + "loss": 3.8694, + "step": 838000 + }, + { + "epoch": 9.22, + "learning_rate": 9.22492986412894e-08, + "loss": 3.8745, + "step": 838500 + }, + { + "epoch": 9.23, + "learning_rate": 9.230430716761098e-08, + "loss": 3.8806, + "step": 839000 + }, + { + "epoch": 9.24, + "learning_rate": 9.235931569393255e-08, + "loss": 3.8787, + "step": 839500 + }, + { + "epoch": 9.24, + "learning_rate": 9.241432422025413e-08, + "loss": 3.8871, + "step": 840000 + }, + { + "epoch": 9.25, + "learning_rate": 9.246933274657572e-08, + "loss": 3.8722, + "step": 840500 + }, + { + "epoch": 9.25, + "learning_rate": 9.25243412728973e-08, + "loss": 3.8858, + "step": 841000 + }, + { + "epoch": 9.26, + "learning_rate": 9.257934979921887e-08, + "loss": 3.8961, + "step": 841500 + }, + { + "epoch": 9.26, + "learning_rate": 9.263435832554045e-08, + "loss": 3.8958, + "step": 842000 + }, + { + "epoch": 9.27, + "learning_rate": 9.268936685186204e-08, + "loss": 3.8843, + "step": 842500 + }, + { + "epoch": 9.27, + "learning_rate": 9.274437537818361e-08, + "loss": 3.872, + "step": 843000 + }, + { + "epoch": 9.28, + "learning_rate": 9.279938390450519e-08, + "loss": 3.8803, + "step": 843500 + }, + { + "epoch": 9.29, + "learning_rate": 9.285439243082678e-08, + "loss": 3.8821, + "step": 844000 + }, + { + "epoch": 9.29, + "learning_rate": 9.290940095714835e-08, + "loss": 3.8959, + "step": 844500 + }, + { + "epoch": 9.3, + "learning_rate": 9.296440948346993e-08, + "loss": 3.8836, + "step": 845000 + }, + { + "epoch": 9.3, + "learning_rate": 9.301941800979152e-08, + "loss": 3.8783, + "step": 845500 + }, + { + "epoch": 9.31, + "learning_rate": 9.307442653611309e-08, + "loss": 3.8917, + "step": 846000 + }, + { + "epoch": 9.31, + "learning_rate": 9.312943506243467e-08, + "loss": 3.9004, + "step": 846500 + }, + { + "epoch": 9.32, + "learning_rate": 9.318444358875626e-08, + "loss": 3.8687, + "step": 847000 + }, + { + "epoch": 9.32, + "learning_rate": 9.323945211507783e-08, + "loss": 3.8708, + "step": 847500 + }, + { + "epoch": 9.33, + "learning_rate": 9.329446064139941e-08, + "loss": 3.8681, + "step": 848000 + }, + { + "epoch": 9.33, + "learning_rate": 9.3349469167721e-08, + "loss": 3.8812, + "step": 848500 + }, + { + "epoch": 9.34, + "learning_rate": 9.340447769404257e-08, + "loss": 3.889, + "step": 849000 + }, + { + "epoch": 9.35, + "learning_rate": 9.345948622036415e-08, + "loss": 3.905, + "step": 849500 + }, + { + "epoch": 9.35, + "learning_rate": 9.351449474668574e-08, + "loss": 3.8923, + "step": 850000 + }, + { + "epoch": 9.36, + "learning_rate": 9.356950327300732e-08, + "loss": 3.8838, + "step": 850500 + }, + { + "epoch": 9.36, + "learning_rate": 9.362451179932889e-08, + "loss": 3.8681, + "step": 851000 + }, + { + "epoch": 9.37, + "learning_rate": 9.367952032565048e-08, + "loss": 3.8939, + "step": 851500 + }, + { + "epoch": 9.37, + "learning_rate": 9.373452885197206e-08, + "loss": 3.8911, + "step": 852000 + }, + { + "epoch": 9.38, + "learning_rate": 9.378953737829363e-08, + "loss": 3.8759, + "step": 852500 + }, + { + "epoch": 9.38, + "learning_rate": 9.384454590461521e-08, + "loss": 3.8749, + "step": 853000 + }, + { + "epoch": 9.39, + "learning_rate": 9.389955443093678e-08, + "loss": 3.8796, + "step": 853500 + }, + { + "epoch": 9.4, + "learning_rate": 9.395456295725836e-08, + "loss": 3.8843, + "step": 854000 + }, + { + "epoch": 9.4, + "learning_rate": 9.400957148357995e-08, + "loss": 3.8791, + "step": 854500 + }, + { + "epoch": 9.41, + "learning_rate": 9.406458000990152e-08, + "loss": 3.8743, + "step": 855000 + }, + { + "epoch": 9.41, + "learning_rate": 9.411958853622311e-08, + "loss": 3.8948, + "step": 855500 + }, + { + "epoch": 9.42, + "learning_rate": 9.417459706254469e-08, + "loss": 3.8576, + "step": 856000 + }, + { + "epoch": 9.42, + "learning_rate": 9.422960558886627e-08, + "loss": 3.8904, + "step": 856500 + }, + { + "epoch": 9.43, + "learning_rate": 9.428461411518785e-08, + "loss": 3.8703, + "step": 857000 + }, + { + "epoch": 9.43, + "learning_rate": 9.433962264150943e-08, + "loss": 3.8838, + "step": 857500 + }, + { + "epoch": 9.44, + "learning_rate": 9.4394631167831e-08, + "loss": 3.8972, + "step": 858000 + }, + { + "epoch": 9.44, + "learning_rate": 9.44496396941526e-08, + "loss": 3.876, + "step": 858500 + }, + { + "epoch": 9.45, + "learning_rate": 9.450464822047417e-08, + "loss": 3.8738, + "step": 859000 + }, + { + "epoch": 9.46, + "learning_rate": 9.455965674679575e-08, + "loss": 3.8905, + "step": 859500 + }, + { + "epoch": 9.46, + "learning_rate": 9.461466527311734e-08, + "loss": 3.8696, + "step": 860000 + }, + { + "epoch": 9.47, + "learning_rate": 9.466967379943891e-08, + "loss": 3.8876, + "step": 860500 + }, + { + "epoch": 9.47, + "learning_rate": 9.472468232576049e-08, + "loss": 3.8806, + "step": 861000 + }, + { + "epoch": 9.48, + "learning_rate": 9.477969085208208e-08, + "loss": 3.8752, + "step": 861500 + }, + { + "epoch": 9.48, + "learning_rate": 9.483469937840365e-08, + "loss": 3.8944, + "step": 862000 + }, + { + "epoch": 9.49, + "learning_rate": 9.488970790472523e-08, + "loss": 3.8768, + "step": 862500 + }, + { + "epoch": 9.49, + "learning_rate": 9.494471643104682e-08, + "loss": 3.8779, + "step": 863000 + }, + { + "epoch": 9.5, + "learning_rate": 9.49997249573684e-08, + "loss": 3.8678, + "step": 863500 + }, + { + "epoch": 9.51, + "learning_rate": 9.505473348368997e-08, + "loss": 3.8701, + "step": 864000 + }, + { + "epoch": 9.51, + "learning_rate": 9.510974201001155e-08, + "loss": 3.8868, + "step": 864500 + }, + { + "epoch": 9.52, + "learning_rate": 9.516475053633312e-08, + "loss": 3.877, + "step": 865000 + }, + { + "epoch": 9.52, + "learning_rate": 9.52197590626547e-08, + "loss": 3.863, + "step": 865500 + }, + { + "epoch": 9.53, + "learning_rate": 9.527476758897629e-08, + "loss": 3.881, + "step": 866000 + }, + { + "epoch": 9.53, + "learning_rate": 9.532977611529786e-08, + "loss": 3.8917, + "step": 866500 + }, + { + "epoch": 9.54, + "learning_rate": 9.538478464161944e-08, + "loss": 3.8803, + "step": 867000 + }, + { + "epoch": 9.54, + "learning_rate": 9.543979316794103e-08, + "loss": 3.8846, + "step": 867500 + }, + { + "epoch": 9.55, + "learning_rate": 9.54948016942626e-08, + "loss": 3.8789, + "step": 868000 + }, + { + "epoch": 9.55, + "learning_rate": 9.554981022058418e-08, + "loss": 3.8757, + "step": 868500 + }, + { + "epoch": 9.56, + "learning_rate": 9.560481874690577e-08, + "loss": 3.8924, + "step": 869000 + }, + { + "epoch": 9.57, + "learning_rate": 9.565982727322734e-08, + "loss": 3.8773, + "step": 869500 + }, + { + "epoch": 9.57, + "learning_rate": 9.571483579954892e-08, + "loss": 3.8751, + "step": 870000 + }, + { + "epoch": 9.58, + "learning_rate": 9.576984432587051e-08, + "loss": 3.8848, + "step": 870500 + }, + { + "epoch": 9.58, + "learning_rate": 9.582485285219208e-08, + "loss": 3.863, + "step": 871000 + }, + { + "epoch": 9.59, + "learning_rate": 9.587986137851366e-08, + "loss": 3.8641, + "step": 871500 + }, + { + "epoch": 9.59, + "learning_rate": 9.593486990483525e-08, + "loss": 3.8632, + "step": 872000 + }, + { + "epoch": 9.6, + "learning_rate": 9.598987843115683e-08, + "loss": 3.8858, + "step": 872500 + }, + { + "epoch": 9.6, + "learning_rate": 9.60448869574784e-08, + "loss": 3.8765, + "step": 873000 + }, + { + "epoch": 9.61, + "learning_rate": 9.609989548379999e-08, + "loss": 3.8702, + "step": 873500 + }, + { + "epoch": 9.62, + "learning_rate": 9.615490401012157e-08, + "loss": 3.8629, + "step": 874000 + }, + { + "epoch": 9.62, + "learning_rate": 9.620991253644316e-08, + "loss": 3.8748, + "step": 874500 + }, + { + "epoch": 9.63, + "learning_rate": 9.626492106276473e-08, + "loss": 3.8688, + "step": 875000 + }, + { + "epoch": 9.63, + "learning_rate": 9.631992958908631e-08, + "loss": 3.8858, + "step": 875500 + }, + { + "epoch": 9.64, + "learning_rate": 9.637493811540788e-08, + "loss": 3.8649, + "step": 876000 + }, + { + "epoch": 9.64, + "learning_rate": 9.642994664172946e-08, + "loss": 3.893, + "step": 876500 + }, + { + "epoch": 9.65, + "learning_rate": 9.648495516805103e-08, + "loss": 3.8773, + "step": 877000 + }, + { + "epoch": 9.65, + "learning_rate": 9.653996369437262e-08, + "loss": 3.8813, + "step": 877500 + }, + { + "epoch": 9.66, + "learning_rate": 9.65949722206942e-08, + "loss": 3.8754, + "step": 878000 + }, + { + "epoch": 9.66, + "learning_rate": 9.664998074701578e-08, + "loss": 3.8753, + "step": 878500 + }, + { + "epoch": 9.67, + "learning_rate": 9.670498927333736e-08, + "loss": 3.8904, + "step": 879000 + }, + { + "epoch": 9.68, + "learning_rate": 9.675999779965894e-08, + "loss": 3.9013, + "step": 879500 + }, + { + "epoch": 9.68, + "learning_rate": 9.681500632598052e-08, + "loss": 3.8645, + "step": 880000 + }, + { + "epoch": 9.69, + "learning_rate": 9.68700148523021e-08, + "loss": 3.875, + "step": 880500 + }, + { + "epoch": 9.69, + "learning_rate": 9.692502337862368e-08, + "loss": 3.8759, + "step": 881000 + }, + { + "epoch": 9.7, + "learning_rate": 9.698003190494526e-08, + "loss": 3.8723, + "step": 881500 + }, + { + "epoch": 9.7, + "learning_rate": 9.703504043126685e-08, + "loss": 3.8717, + "step": 882000 + }, + { + "epoch": 9.71, + "learning_rate": 9.709004895758842e-08, + "loss": 3.8703, + "step": 882500 + }, + { + "epoch": 9.71, + "learning_rate": 9.714505748391e-08, + "loss": 3.8701, + "step": 883000 + }, + { + "epoch": 9.72, + "learning_rate": 9.720006601023159e-08, + "loss": 3.8526, + "step": 883500 + }, + { + "epoch": 9.73, + "learning_rate": 9.725507453655316e-08, + "loss": 3.8551, + "step": 884000 + }, + { + "epoch": 9.73, + "learning_rate": 9.731008306287474e-08, + "loss": 3.8786, + "step": 884500 + }, + { + "epoch": 9.74, + "learning_rate": 9.736509158919633e-08, + "loss": 3.8725, + "step": 885000 + }, + { + "epoch": 9.74, + "learning_rate": 9.74201001155179e-08, + "loss": 3.8839, + "step": 885500 + }, + { + "epoch": 9.75, + "learning_rate": 9.747510864183948e-08, + "loss": 3.8654, + "step": 886000 + }, + { + "epoch": 9.75, + "learning_rate": 9.753011716816107e-08, + "loss": 3.8946, + "step": 886500 + }, + { + "epoch": 9.76, + "learning_rate": 9.758512569448264e-08, + "loss": 3.8675, + "step": 887000 + }, + { + "epoch": 9.76, + "learning_rate": 9.764013422080422e-08, + "loss": 3.8737, + "step": 887500 + }, + { + "epoch": 9.77, + "learning_rate": 9.76951427471258e-08, + "loss": 3.876, + "step": 888000 + }, + { + "epoch": 9.78, + "learning_rate": 9.775015127344739e-08, + "loss": 3.8848, + "step": 888500 + }, + { + "epoch": 9.78, + "learning_rate": 9.780515979976896e-08, + "loss": 3.8654, + "step": 889000 + }, + { + "epoch": 9.79, + "learning_rate": 9.786016832609054e-08, + "loss": 3.8669, + "step": 889500 + }, + { + "epoch": 9.79, + "learning_rate": 9.791517685241211e-08, + "loss": 3.8551, + "step": 890000 + }, + { + "epoch": 9.8, + "learning_rate": 9.797018537873369e-08, + "loss": 3.8773, + "step": 890500 + }, + { + "epoch": 9.8, + "learning_rate": 9.802519390505528e-08, + "loss": 3.8834, + "step": 891000 + }, + { + "epoch": 9.81, + "learning_rate": 9.808020243137685e-08, + "loss": 3.8759, + "step": 891500 + }, + { + "epoch": 9.81, + "learning_rate": 9.813521095769843e-08, + "loss": 3.864, + "step": 892000 + }, + { + "epoch": 9.82, + "learning_rate": 9.819021948402002e-08, + "loss": 3.8698, + "step": 892500 + }, + { + "epoch": 9.82, + "learning_rate": 9.82452280103416e-08, + "loss": 3.8783, + "step": 893000 + }, + { + "epoch": 9.83, + "learning_rate": 9.830023653666318e-08, + "loss": 3.8792, + "step": 893500 + }, + { + "epoch": 9.84, + "learning_rate": 9.835524506298476e-08, + "loss": 3.8725, + "step": 894000 + }, + { + "epoch": 9.84, + "learning_rate": 9.841025358930634e-08, + "loss": 3.8855, + "step": 894500 + }, + { + "epoch": 9.85, + "learning_rate": 9.846526211562792e-08, + "loss": 3.8717, + "step": 895000 + }, + { + "epoch": 9.85, + "learning_rate": 9.85202706419495e-08, + "loss": 3.8812, + "step": 895500 + }, + { + "epoch": 9.86, + "learning_rate": 9.857527916827108e-08, + "loss": 3.8659, + "step": 896000 + }, + { + "epoch": 9.86, + "learning_rate": 9.863028769459267e-08, + "loss": 3.8765, + "step": 896500 + }, + { + "epoch": 9.87, + "learning_rate": 9.868529622091424e-08, + "loss": 3.8749, + "step": 897000 + }, + { + "epoch": 9.87, + "learning_rate": 9.874030474723582e-08, + "loss": 3.8877, + "step": 897500 + }, + { + "epoch": 9.88, + "learning_rate": 9.87953132735574e-08, + "loss": 3.8622, + "step": 898000 + }, + { + "epoch": 9.89, + "learning_rate": 9.885032179987898e-08, + "loss": 3.8751, + "step": 898500 + }, + { + "epoch": 9.89, + "learning_rate": 9.890533032620056e-08, + "loss": 3.8655, + "step": 899000 + }, + { + "epoch": 9.9, + "learning_rate": 9.896033885252215e-08, + "loss": 3.895, + "step": 899500 + }, + { + "epoch": 9.9, + "learning_rate": 9.901534737884372e-08, + "loss": 3.888, + "step": 900000 + }, + { + "epoch": 9.91, + "learning_rate": 9.90703559051653e-08, + "loss": 3.8801, + "step": 900500 + }, + { + "epoch": 9.91, + "learning_rate": 9.912536443148687e-08, + "loss": 3.8707, + "step": 901000 + }, + { + "epoch": 9.92, + "learning_rate": 9.918037295780845e-08, + "loss": 3.89, + "step": 901500 + }, + { + "epoch": 9.92, + "learning_rate": 9.923538148413003e-08, + "loss": 3.8869, + "step": 902000 + }, + { + "epoch": 9.93, + "learning_rate": 9.929039001045162e-08, + "loss": 3.8724, + "step": 902500 + }, + { + "epoch": 9.93, + "learning_rate": 9.934539853677319e-08, + "loss": 3.8889, + "step": 903000 + }, + { + "epoch": 9.94, + "learning_rate": 9.940040706309477e-08, + "loss": 3.8517, + "step": 903500 + }, + { + "epoch": 9.95, + "learning_rate": 9.945541558941636e-08, + "loss": 3.8743, + "step": 904000 + }, + { + "epoch": 9.95, + "learning_rate": 9.951042411573793e-08, + "loss": 3.8948, + "step": 904500 + }, + { + "epoch": 9.96, + "learning_rate": 9.956543264205951e-08, + "loss": 3.8811, + "step": 905000 + }, + { + "epoch": 9.96, + "learning_rate": 9.96204411683811e-08, + "loss": 3.8664, + "step": 905500 + }, + { + "epoch": 9.97, + "learning_rate": 9.967544969470267e-08, + "loss": 3.8681, + "step": 906000 + }, + { + "epoch": 9.97, + "learning_rate": 9.973045822102425e-08, + "loss": 3.8666, + "step": 906500 + }, + { + "epoch": 9.98, + "learning_rate": 9.978546674734584e-08, + "loss": 3.8888, + "step": 907000 + }, + { + "epoch": 9.98, + "learning_rate": 9.984047527366741e-08, + "loss": 3.8618, + "step": 907500 + }, + { + "epoch": 9.99, + "learning_rate": 9.989548379998899e-08, + "loss": 3.8665, + "step": 908000 + }, + { + "epoch": 10.0, + "learning_rate": 9.995049232631058e-08, + "loss": 3.8727, + "step": 908500 + }, + { + "epoch": 10.0, + "eval_loss": 3.9042184352874756, + "eval_runtime": 6.1344, + "eval_samples_per_second": 253.326, + "step": 908950 + }, + { + "epoch": 10.0, + "learning_rate": 9.999862478684196e-08, + "loss": 3.8701, + "step": 909000 + }, + { + "epoch": 10.01, + "learning_rate": 9.998487265526157e-08, + "loss": 3.8771, + "step": 909500 + }, + { + "epoch": 10.01, + "learning_rate": 9.997112052368116e-08, + "loss": 3.8777, + "step": 910000 + }, + { + "epoch": 10.02, + "learning_rate": 9.995736839210077e-08, + "loss": 3.8868, + "step": 910500 + }, + { + "epoch": 10.02, + "learning_rate": 9.994361626052038e-08, + "loss": 3.8467, + "step": 911000 + }, + { + "epoch": 10.03, + "learning_rate": 9.992986412893998e-08, + "loss": 3.8789, + "step": 911500 + }, + { + "epoch": 10.03, + "learning_rate": 9.991611199735959e-08, + "loss": 3.8684, + "step": 912000 + }, + { + "epoch": 10.04, + "learning_rate": 9.99023598657792e-08, + "loss": 3.8753, + "step": 912500 + }, + { + "epoch": 10.04, + "learning_rate": 9.98886077341988e-08, + "loss": 3.8548, + "step": 913000 + }, + { + "epoch": 10.05, + "learning_rate": 9.98748556026184e-08, + "loss": 3.8719, + "step": 913500 + }, + { + "epoch": 10.06, + "learning_rate": 9.986110347103801e-08, + "loss": 3.8869, + "step": 914000 + }, + { + "epoch": 10.06, + "learning_rate": 9.984735133945761e-08, + "loss": 3.8617, + "step": 914500 + }, + { + "epoch": 10.07, + "learning_rate": 9.983359920787722e-08, + "loss": 3.878, + "step": 915000 + }, + { + "epoch": 10.07, + "learning_rate": 9.981984707629683e-08, + "loss": 3.864, + "step": 915500 + }, + { + "epoch": 10.08, + "learning_rate": 9.980609494471642e-08, + "loss": 3.8714, + "step": 916000 + }, + { + "epoch": 10.08, + "learning_rate": 9.979234281313603e-08, + "loss": 3.8534, + "step": 916500 + }, + { + "epoch": 10.09, + "learning_rate": 9.977859068155564e-08, + "loss": 3.8678, + "step": 917000 + }, + { + "epoch": 10.09, + "learning_rate": 9.976483854997524e-08, + "loss": 3.8713, + "step": 917500 + }, + { + "epoch": 10.1, + "learning_rate": 9.975108641839485e-08, + "loss": 3.8683, + "step": 918000 + }, + { + "epoch": 10.11, + "learning_rate": 9.973733428681446e-08, + "loss": 3.8884, + "step": 918500 + }, + { + "epoch": 10.11, + "learning_rate": 9.972358215523405e-08, + "loss": 3.8626, + "step": 919000 + }, + { + "epoch": 10.12, + "learning_rate": 9.970983002365366e-08, + "loss": 3.873, + "step": 919500 + }, + { + "epoch": 10.12, + "learning_rate": 9.969607789207327e-08, + "loss": 3.8694, + "step": 920000 + }, + { + "epoch": 10.13, + "learning_rate": 9.968232576049287e-08, + "loss": 3.8691, + "step": 920500 + }, + { + "epoch": 10.13, + "learning_rate": 9.966857362891248e-08, + "loss": 3.8766, + "step": 921000 + }, + { + "epoch": 10.14, + "learning_rate": 9.965482149733209e-08, + "loss": 3.8944, + "step": 921500 + }, + { + "epoch": 10.14, + "learning_rate": 9.964106936575168e-08, + "loss": 3.8497, + "step": 922000 + }, + { + "epoch": 10.15, + "learning_rate": 9.962731723417129e-08, + "loss": 3.8611, + "step": 922500 + }, + { + "epoch": 10.15, + "learning_rate": 9.96135651025909e-08, + "loss": 3.8727, + "step": 923000 + }, + { + "epoch": 10.16, + "learning_rate": 9.95998129710105e-08, + "loss": 3.8585, + "step": 923500 + }, + { + "epoch": 10.17, + "learning_rate": 9.958606083943011e-08, + "loss": 3.8616, + "step": 924000 + }, + { + "epoch": 10.17, + "learning_rate": 9.957230870784972e-08, + "loss": 3.8469, + "step": 924500 + }, + { + "epoch": 10.18, + "learning_rate": 9.955855657626931e-08, + "loss": 3.8673, + "step": 925000 + }, + { + "epoch": 10.18, + "learning_rate": 9.954480444468892e-08, + "loss": 3.8815, + "step": 925500 + }, + { + "epoch": 10.19, + "learning_rate": 9.953105231310853e-08, + "loss": 3.8865, + "step": 926000 + }, + { + "epoch": 10.19, + "learning_rate": 9.951730018152813e-08, + "loss": 3.8616, + "step": 926500 + }, + { + "epoch": 10.2, + "learning_rate": 9.950354804994774e-08, + "loss": 3.8531, + "step": 927000 + }, + { + "epoch": 10.2, + "learning_rate": 9.948979591836735e-08, + "loss": 3.8767, + "step": 927500 + }, + { + "epoch": 10.21, + "learning_rate": 9.947604378678694e-08, + "loss": 3.878, + "step": 928000 + }, + { + "epoch": 10.22, + "learning_rate": 9.946229165520655e-08, + "loss": 3.8771, + "step": 928500 + }, + { + "epoch": 10.22, + "learning_rate": 9.944853952362616e-08, + "loss": 3.8742, + "step": 929000 + }, + { + "epoch": 10.23, + "learning_rate": 9.943478739204576e-08, + "loss": 3.8609, + "step": 929500 + }, + { + "epoch": 10.23, + "learning_rate": 9.942103526046537e-08, + "loss": 3.867, + "step": 930000 + }, + { + "epoch": 10.24, + "learning_rate": 9.940728312888498e-08, + "loss": 3.8851, + "step": 930500 + }, + { + "epoch": 10.24, + "learning_rate": 9.939353099730457e-08, + "loss": 3.8482, + "step": 931000 + }, + { + "epoch": 10.25, + "learning_rate": 9.937977886572418e-08, + "loss": 3.8677, + "step": 931500 + }, + { + "epoch": 10.25, + "learning_rate": 9.936602673414379e-08, + "loss": 3.8713, + "step": 932000 + }, + { + "epoch": 10.26, + "learning_rate": 9.935227460256339e-08, + "loss": 3.8697, + "step": 932500 + }, + { + "epoch": 10.26, + "learning_rate": 9.9338522470983e-08, + "loss": 3.8768, + "step": 933000 + }, + { + "epoch": 10.27, + "learning_rate": 9.93247703394026e-08, + "loss": 3.8682, + "step": 933500 + }, + { + "epoch": 10.28, + "learning_rate": 9.93110182078222e-08, + "loss": 3.8627, + "step": 934000 + }, + { + "epoch": 10.28, + "learning_rate": 9.929726607624181e-08, + "loss": 3.8777, + "step": 934500 + }, + { + "epoch": 10.29, + "learning_rate": 9.928351394466142e-08, + "loss": 3.8933, + "step": 935000 + }, + { + "epoch": 10.29, + "learning_rate": 9.926976181308102e-08, + "loss": 3.8538, + "step": 935500 + }, + { + "epoch": 10.3, + "learning_rate": 9.925600968150063e-08, + "loss": 3.8656, + "step": 936000 + }, + { + "epoch": 10.3, + "learning_rate": 9.924225754992023e-08, + "loss": 3.8555, + "step": 936500 + }, + { + "epoch": 10.31, + "learning_rate": 9.922850541833983e-08, + "loss": 3.8903, + "step": 937000 + }, + { + "epoch": 10.31, + "learning_rate": 9.921475328675944e-08, + "loss": 3.8552, + "step": 937500 + }, + { + "epoch": 10.32, + "learning_rate": 9.920100115517905e-08, + "loss": 3.8667, + "step": 938000 + }, + { + "epoch": 10.33, + "learning_rate": 9.918724902359865e-08, + "loss": 3.8532, + "step": 938500 + }, + { + "epoch": 10.33, + "learning_rate": 9.917349689201825e-08, + "loss": 3.842, + "step": 939000 + }, + { + "epoch": 10.34, + "learning_rate": 9.915974476043786e-08, + "loss": 3.8711, + "step": 939500 + }, + { + "epoch": 10.34, + "learning_rate": 9.914599262885746e-08, + "loss": 3.8498, + "step": 940000 + }, + { + "epoch": 10.35, + "learning_rate": 9.913224049727707e-08, + "loss": 3.8502, + "step": 940500 + }, + { + "epoch": 10.35, + "learning_rate": 9.911848836569668e-08, + "loss": 3.8692, + "step": 941000 + }, + { + "epoch": 10.36, + "learning_rate": 9.910473623411628e-08, + "loss": 3.8643, + "step": 941500 + }, + { + "epoch": 10.36, + "learning_rate": 9.909098410253588e-08, + "loss": 3.8646, + "step": 942000 + }, + { + "epoch": 10.37, + "learning_rate": 9.90772319709555e-08, + "loss": 3.8596, + "step": 942500 + }, + { + "epoch": 10.37, + "learning_rate": 9.90634798393751e-08, + "loss": 3.8669, + "step": 943000 + }, + { + "epoch": 10.38, + "learning_rate": 9.90497277077947e-08, + "loss": 3.8637, + "step": 943500 + }, + { + "epoch": 10.39, + "learning_rate": 9.903597557621431e-08, + "loss": 3.8543, + "step": 944000 + }, + { + "epoch": 10.39, + "learning_rate": 9.902222344463392e-08, + "loss": 3.8847, + "step": 944500 + }, + { + "epoch": 10.4, + "learning_rate": 9.900847131305353e-08, + "loss": 3.8667, + "step": 945000 + }, + { + "epoch": 10.4, + "learning_rate": 9.899471918147312e-08, + "loss": 3.8794, + "step": 945500 + }, + { + "epoch": 10.41, + "learning_rate": 9.898096704989273e-08, + "loss": 3.8772, + "step": 946000 + }, + { + "epoch": 10.41, + "learning_rate": 9.896721491831234e-08, + "loss": 3.8748, + "step": 946500 + }, + { + "epoch": 10.42, + "learning_rate": 9.895346278673194e-08, + "loss": 3.8735, + "step": 947000 + }, + { + "epoch": 10.42, + "learning_rate": 9.893971065515155e-08, + "loss": 3.8614, + "step": 947500 + }, + { + "epoch": 10.43, + "learning_rate": 9.892595852357116e-08, + "loss": 3.8754, + "step": 948000 + }, + { + "epoch": 10.44, + "learning_rate": 9.891220639199075e-08, + "loss": 3.8776, + "step": 948500 + }, + { + "epoch": 10.44, + "learning_rate": 9.889845426041036e-08, + "loss": 3.8592, + "step": 949000 + }, + { + "epoch": 10.45, + "learning_rate": 9.888470212882997e-08, + "loss": 3.8724, + "step": 949500 + }, + { + "epoch": 10.45, + "learning_rate": 9.887094999724957e-08, + "loss": 3.874, + "step": 950000 + }, + { + "epoch": 10.46, + "learning_rate": 9.885719786566918e-08, + "loss": 3.8758, + "step": 950500 + }, + { + "epoch": 10.46, + "learning_rate": 9.884344573408879e-08, + "loss": 3.8518, + "step": 951000 + }, + { + "epoch": 10.47, + "learning_rate": 9.882969360250838e-08, + "loss": 3.8717, + "step": 951500 + }, + { + "epoch": 10.47, + "learning_rate": 9.881594147092799e-08, + "loss": 3.8688, + "step": 952000 + }, + { + "epoch": 10.48, + "learning_rate": 9.88021893393476e-08, + "loss": 3.8604, + "step": 952500 + }, + { + "epoch": 10.48, + "learning_rate": 9.87884372077672e-08, + "loss": 3.86, + "step": 953000 + }, + { + "epoch": 10.49, + "learning_rate": 9.877468507618681e-08, + "loss": 3.8734, + "step": 953500 + }, + { + "epoch": 10.5, + "learning_rate": 9.876093294460642e-08, + "loss": 3.8658, + "step": 954000 + }, + { + "epoch": 10.5, + "learning_rate": 9.874718081302601e-08, + "loss": 3.8501, + "step": 954500 + }, + { + "epoch": 10.51, + "learning_rate": 9.873342868144562e-08, + "loss": 3.8703, + "step": 955000 + }, + { + "epoch": 10.51, + "learning_rate": 9.871967654986523e-08, + "loss": 3.8658, + "step": 955500 + }, + { + "epoch": 10.52, + "learning_rate": 9.870592441828483e-08, + "loss": 3.8704, + "step": 956000 + }, + { + "epoch": 10.52, + "learning_rate": 9.869217228670444e-08, + "loss": 3.883, + "step": 956500 + }, + { + "epoch": 10.53, + "learning_rate": 9.867842015512405e-08, + "loss": 3.8654, + "step": 957000 + }, + { + "epoch": 10.53, + "learning_rate": 9.866466802354364e-08, + "loss": 3.8446, + "step": 957500 + }, + { + "epoch": 10.54, + "learning_rate": 9.865091589196325e-08, + "loss": 3.8637, + "step": 958000 + }, + { + "epoch": 10.55, + "learning_rate": 9.863716376038286e-08, + "loss": 3.8563, + "step": 958500 + }, + { + "epoch": 10.55, + "learning_rate": 9.862341162880246e-08, + "loss": 3.8579, + "step": 959000 + }, + { + "epoch": 10.56, + "learning_rate": 9.860965949722207e-08, + "loss": 3.8813, + "step": 959500 + }, + { + "epoch": 10.56, + "learning_rate": 9.859590736564168e-08, + "loss": 3.8509, + "step": 960000 + }, + { + "epoch": 10.57, + "learning_rate": 9.858215523406127e-08, + "loss": 3.8784, + "step": 960500 + }, + { + "epoch": 10.57, + "learning_rate": 9.856840310248088e-08, + "loss": 3.8797, + "step": 961000 + }, + { + "epoch": 10.58, + "learning_rate": 9.855465097090049e-08, + "loss": 3.8665, + "step": 961500 + }, + { + "epoch": 10.58, + "learning_rate": 9.854089883932009e-08, + "loss": 3.8666, + "step": 962000 + }, + { + "epoch": 10.59, + "learning_rate": 9.85271467077397e-08, + "loss": 3.8468, + "step": 962500 + }, + { + "epoch": 10.59, + "learning_rate": 9.85133945761593e-08, + "loss": 3.8637, + "step": 963000 + }, + { + "epoch": 10.6, + "learning_rate": 9.84996424445789e-08, + "loss": 3.8757, + "step": 963500 + }, + { + "epoch": 10.61, + "learning_rate": 9.848589031299851e-08, + "loss": 3.864, + "step": 964000 + }, + { + "epoch": 10.61, + "learning_rate": 9.847213818141812e-08, + "loss": 3.8569, + "step": 964500 + }, + { + "epoch": 10.62, + "learning_rate": 9.845838604983772e-08, + "loss": 3.8758, + "step": 965000 + }, + { + "epoch": 10.62, + "learning_rate": 9.844463391825733e-08, + "loss": 3.8931, + "step": 965500 + }, + { + "epoch": 10.63, + "learning_rate": 9.843088178667693e-08, + "loss": 3.8637, + "step": 966000 + }, + { + "epoch": 10.63, + "learning_rate": 9.841712965509653e-08, + "loss": 3.8612, + "step": 966500 + }, + { + "epoch": 10.64, + "learning_rate": 9.840337752351614e-08, + "loss": 3.8609, + "step": 967000 + }, + { + "epoch": 10.64, + "learning_rate": 9.838962539193575e-08, + "loss": 3.8681, + "step": 967500 + }, + { + "epoch": 10.65, + "learning_rate": 9.837587326035535e-08, + "loss": 3.8522, + "step": 968000 + }, + { + "epoch": 10.66, + "learning_rate": 9.836212112877495e-08, + "loss": 3.8582, + "step": 968500 + }, + { + "epoch": 10.66, + "learning_rate": 9.834836899719456e-08, + "loss": 3.8594, + "step": 969000 + }, + { + "epoch": 10.67, + "learning_rate": 9.833461686561416e-08, + "loss": 3.8481, + "step": 969500 + }, + { + "epoch": 10.67, + "learning_rate": 9.832086473403377e-08, + "loss": 3.8386, + "step": 970000 + }, + { + "epoch": 10.68, + "learning_rate": 9.830711260245338e-08, + "loss": 3.8644, + "step": 970500 + }, + { + "epoch": 10.68, + "learning_rate": 9.829336047087298e-08, + "loss": 3.8727, + "step": 971000 + }, + { + "epoch": 10.69, + "learning_rate": 9.827960833929258e-08, + "loss": 3.8721, + "step": 971500 + }, + { + "epoch": 10.69, + "learning_rate": 9.82658562077122e-08, + "loss": 3.8689, + "step": 972000 + }, + { + "epoch": 10.7, + "learning_rate": 9.825210407613179e-08, + "loss": 3.8601, + "step": 972500 + }, + { + "epoch": 10.7, + "learning_rate": 9.82383519445514e-08, + "loss": 3.8681, + "step": 973000 + }, + { + "epoch": 10.71, + "learning_rate": 9.822459981297101e-08, + "loss": 3.8565, + "step": 973500 + }, + { + "epoch": 10.72, + "learning_rate": 9.82108476813906e-08, + "loss": 3.8703, + "step": 974000 + }, + { + "epoch": 10.72, + "learning_rate": 9.819709554981021e-08, + "loss": 3.8704, + "step": 974500 + }, + { + "epoch": 10.73, + "learning_rate": 9.818334341822982e-08, + "loss": 3.8545, + "step": 975000 + }, + { + "epoch": 10.73, + "learning_rate": 9.816959128664942e-08, + "loss": 3.8456, + "step": 975500 + }, + { + "epoch": 10.74, + "learning_rate": 9.815583915506903e-08, + "loss": 3.8623, + "step": 976000 + }, + { + "epoch": 10.74, + "learning_rate": 9.814208702348864e-08, + "loss": 3.863, + "step": 976500 + }, + { + "epoch": 10.75, + "learning_rate": 9.812833489190823e-08, + "loss": 3.8543, + "step": 977000 + }, + { + "epoch": 10.75, + "learning_rate": 9.811458276032784e-08, + "loss": 3.8538, + "step": 977500 + }, + { + "epoch": 10.76, + "learning_rate": 9.810083062874745e-08, + "loss": 3.856, + "step": 978000 + }, + { + "epoch": 10.77, + "learning_rate": 9.808707849716705e-08, + "loss": 3.867, + "step": 978500 + }, + { + "epoch": 10.77, + "learning_rate": 9.807332636558666e-08, + "loss": 3.8787, + "step": 979000 + }, + { + "epoch": 10.78, + "learning_rate": 9.805957423400627e-08, + "loss": 3.8575, + "step": 979500 + }, + { + "epoch": 10.78, + "learning_rate": 9.804582210242586e-08, + "loss": 3.8658, + "step": 980000 + }, + { + "epoch": 10.79, + "learning_rate": 9.803206997084547e-08, + "loss": 3.8507, + "step": 980500 + }, + { + "epoch": 10.79, + "learning_rate": 9.801831783926508e-08, + "loss": 3.8549, + "step": 981000 + }, + { + "epoch": 10.8, + "learning_rate": 9.800456570768468e-08, + "loss": 3.88, + "step": 981500 + }, + { + "epoch": 10.8, + "learning_rate": 9.799081357610429e-08, + "loss": 3.8592, + "step": 982000 + }, + { + "epoch": 10.81, + "learning_rate": 9.79770614445239e-08, + "loss": 3.8429, + "step": 982500 + }, + { + "epoch": 10.81, + "learning_rate": 9.79633093129435e-08, + "loss": 3.8448, + "step": 983000 + }, + { + "epoch": 10.82, + "learning_rate": 9.79495571813631e-08, + "loss": 3.8565, + "step": 983500 + }, + { + "epoch": 10.83, + "learning_rate": 9.793580504978271e-08, + "loss": 3.8517, + "step": 984000 + }, + { + "epoch": 10.83, + "learning_rate": 9.792205291820231e-08, + "loss": 3.8523, + "step": 984500 + }, + { + "epoch": 10.84, + "learning_rate": 9.790830078662192e-08, + "loss": 3.8534, + "step": 985000 + }, + { + "epoch": 10.84, + "learning_rate": 9.789454865504153e-08, + "loss": 3.8734, + "step": 985500 + }, + { + "epoch": 10.85, + "learning_rate": 9.788079652346112e-08, + "loss": 3.8567, + "step": 986000 + }, + { + "epoch": 10.85, + "learning_rate": 9.786704439188073e-08, + "loss": 3.8718, + "step": 986500 + }, + { + "epoch": 10.86, + "learning_rate": 9.785329226030034e-08, + "loss": 3.8782, + "step": 987000 + }, + { + "epoch": 10.86, + "learning_rate": 9.783954012871994e-08, + "loss": 3.8746, + "step": 987500 + }, + { + "epoch": 10.87, + "learning_rate": 9.782578799713955e-08, + "loss": 3.8683, + "step": 988000 + }, + { + "epoch": 10.88, + "learning_rate": 9.781203586555916e-08, + "loss": 3.8782, + "step": 988500 + }, + { + "epoch": 10.88, + "learning_rate": 9.779828373397877e-08, + "loss": 3.8757, + "step": 989000 + }, + { + "epoch": 10.89, + "learning_rate": 9.778453160239836e-08, + "loss": 3.8643, + "step": 989500 + }, + { + "epoch": 10.89, + "learning_rate": 9.777077947081797e-08, + "loss": 3.8603, + "step": 990000 + }, + { + "epoch": 10.9, + "learning_rate": 9.775702733923758e-08, + "loss": 3.8593, + "step": 990500 + }, + { + "epoch": 10.9, + "learning_rate": 9.774327520765718e-08, + "loss": 3.8638, + "step": 991000 + }, + { + "epoch": 10.91, + "learning_rate": 9.772952307607679e-08, + "loss": 3.8731, + "step": 991500 + }, + { + "epoch": 10.91, + "learning_rate": 9.77157709444964e-08, + "loss": 3.8735, + "step": 992000 + }, + { + "epoch": 10.92, + "learning_rate": 9.7702018812916e-08, + "loss": 3.859, + "step": 992500 + }, + { + "epoch": 10.92, + "learning_rate": 9.76882666813356e-08, + "loss": 3.8499, + "step": 993000 + }, + { + "epoch": 10.93, + "learning_rate": 9.767451454975521e-08, + "loss": 3.858, + "step": 993500 + }, + { + "epoch": 10.94, + "learning_rate": 9.766076241817482e-08, + "loss": 3.8575, + "step": 994000 + }, + { + "epoch": 10.94, + "learning_rate": 9.764701028659443e-08, + "loss": 3.8623, + "step": 994500 + }, + { + "epoch": 10.95, + "learning_rate": 9.763325815501402e-08, + "loss": 3.8699, + "step": 995000 + }, + { + "epoch": 10.95, + "learning_rate": 9.761950602343363e-08, + "loss": 3.8741, + "step": 995500 + }, + { + "epoch": 10.96, + "learning_rate": 9.760575389185324e-08, + "loss": 3.8574, + "step": 996000 + }, + { + "epoch": 10.96, + "learning_rate": 9.759200176027284e-08, + "loss": 3.8764, + "step": 996500 + }, + { + "epoch": 10.97, + "learning_rate": 9.757824962869245e-08, + "loss": 3.8547, + "step": 997000 + }, + { + "epoch": 10.97, + "learning_rate": 9.756449749711206e-08, + "loss": 3.8585, + "step": 997500 + }, + { + "epoch": 10.98, + "learning_rate": 9.755074536553165e-08, + "loss": 3.8546, + "step": 998000 + }, + { + "epoch": 10.99, + "learning_rate": 9.753699323395126e-08, + "loss": 3.8531, + "step": 998500 + }, + { + "epoch": 10.99, + "learning_rate": 9.752324110237087e-08, + "loss": 3.8498, + "step": 999000 + }, + { + "epoch": 11.0, + "learning_rate": 9.750948897079047e-08, + "loss": 3.8766, + "step": 999500 + }, + { + "epoch": 11.0, + "eval_loss": 3.895866870880127, + "eval_runtime": 6.1351, + "eval_samples_per_second": 253.299, + "step": 999845 + }, + { + "epoch": 11.0, + "learning_rate": 9.749573683921008e-08, + "loss": 3.8554, + "step": 1000000 + }, + { + "epoch": 11.01, + "learning_rate": 9.748198470762969e-08, + "loss": 3.8874, + "step": 1000500 + }, + { + "epoch": 11.01, + "learning_rate": 9.746823257604928e-08, + "loss": 3.8435, + "step": 1001000 + }, + { + "epoch": 11.02, + "learning_rate": 9.74544804444689e-08, + "loss": 3.8729, + "step": 1001500 + }, + { + "epoch": 11.02, + "learning_rate": 9.74407283128885e-08, + "loss": 3.8792, + "step": 1002000 + }, + { + "epoch": 11.03, + "learning_rate": 9.74269761813081e-08, + "loss": 3.8472, + "step": 1002500 + }, + { + "epoch": 11.03, + "learning_rate": 9.741322404972771e-08, + "loss": 3.8571, + "step": 1003000 + }, + { + "epoch": 11.04, + "learning_rate": 9.739947191814732e-08, + "loss": 3.8463, + "step": 1003500 + }, + { + "epoch": 11.05, + "learning_rate": 9.738571978656691e-08, + "loss": 3.8666, + "step": 1004000 + }, + { + "epoch": 11.05, + "learning_rate": 9.737196765498652e-08, + "loss": 3.8495, + "step": 1004500 + }, + { + "epoch": 11.06, + "learning_rate": 9.735821552340613e-08, + "loss": 3.869, + "step": 1005000 + }, + { + "epoch": 11.06, + "learning_rate": 9.734446339182573e-08, + "loss": 3.8619, + "step": 1005500 + }, + { + "epoch": 11.07, + "learning_rate": 9.733071126024534e-08, + "loss": 3.8636, + "step": 1006000 + }, + { + "epoch": 11.07, + "learning_rate": 9.731695912866495e-08, + "loss": 3.8448, + "step": 1006500 + }, + { + "epoch": 11.08, + "learning_rate": 9.730320699708454e-08, + "loss": 3.8611, + "step": 1007000 + }, + { + "epoch": 11.08, + "learning_rate": 9.728945486550415e-08, + "loss": 3.8451, + "step": 1007500 + }, + { + "epoch": 11.09, + "learning_rate": 9.727570273392376e-08, + "loss": 3.875, + "step": 1008000 + }, + { + "epoch": 11.1, + "learning_rate": 9.726195060234336e-08, + "loss": 3.8701, + "step": 1008500 + }, + { + "epoch": 11.1, + "learning_rate": 9.724819847076297e-08, + "loss": 3.8766, + "step": 1009000 + }, + { + "epoch": 11.11, + "learning_rate": 9.723444633918258e-08, + "loss": 3.8508, + "step": 1009500 + }, + { + "epoch": 11.11, + "learning_rate": 9.722069420760217e-08, + "loss": 3.8487, + "step": 1010000 + }, + { + "epoch": 11.12, + "learning_rate": 9.720694207602178e-08, + "loss": 3.8628, + "step": 1010500 + }, + { + "epoch": 11.12, + "learning_rate": 9.719318994444139e-08, + "loss": 3.8508, + "step": 1011000 + }, + { + "epoch": 11.13, + "learning_rate": 9.717943781286099e-08, + "loss": 3.8699, + "step": 1011500 + }, + { + "epoch": 11.13, + "learning_rate": 9.71656856812806e-08, + "loss": 3.8638, + "step": 1012000 + }, + { + "epoch": 11.14, + "learning_rate": 9.71519335497002e-08, + "loss": 3.8429, + "step": 1012500 + }, + { + "epoch": 11.14, + "learning_rate": 9.71381814181198e-08, + "loss": 3.8388, + "step": 1013000 + }, + { + "epoch": 11.15, + "learning_rate": 9.712442928653941e-08, + "loss": 3.8491, + "step": 1013500 + }, + { + "epoch": 11.16, + "learning_rate": 9.711067715495902e-08, + "loss": 3.8575, + "step": 1014000 + }, + { + "epoch": 11.16, + "learning_rate": 9.709692502337862e-08, + "loss": 3.8515, + "step": 1014500 + }, + { + "epoch": 11.17, + "learning_rate": 9.708317289179823e-08, + "loss": 3.8726, + "step": 1015000 + }, + { + "epoch": 11.17, + "learning_rate": 9.706942076021782e-08, + "loss": 3.8666, + "step": 1015500 + }, + { + "epoch": 11.18, + "learning_rate": 9.705566862863743e-08, + "loss": 3.8772, + "step": 1016000 + }, + { + "epoch": 11.18, + "learning_rate": 9.704191649705704e-08, + "loss": 3.8504, + "step": 1016500 + }, + { + "epoch": 11.19, + "learning_rate": 9.702816436547664e-08, + "loss": 3.8383, + "step": 1017000 + }, + { + "epoch": 11.19, + "learning_rate": 9.701441223389625e-08, + "loss": 3.8617, + "step": 1017500 + }, + { + "epoch": 11.2, + "learning_rate": 9.700066010231586e-08, + "loss": 3.842, + "step": 1018000 + }, + { + "epoch": 11.21, + "learning_rate": 9.698690797073545e-08, + "loss": 3.8596, + "step": 1018500 + }, + { + "epoch": 11.21, + "learning_rate": 9.697315583915506e-08, + "loss": 3.8662, + "step": 1019000 + }, + { + "epoch": 11.22, + "learning_rate": 9.695940370757467e-08, + "loss": 3.8617, + "step": 1019500 + }, + { + "epoch": 11.22, + "learning_rate": 9.694565157599427e-08, + "loss": 3.8479, + "step": 1020000 + }, + { + "epoch": 11.23, + "learning_rate": 9.693189944441388e-08, + "loss": 3.8726, + "step": 1020500 + }, + { + "epoch": 11.23, + "learning_rate": 9.691814731283349e-08, + "loss": 3.8639, + "step": 1021000 + }, + { + "epoch": 11.24, + "learning_rate": 9.690439518125308e-08, + "loss": 3.8646, + "step": 1021500 + }, + { + "epoch": 11.24, + "learning_rate": 9.689064304967269e-08, + "loss": 3.8641, + "step": 1022000 + }, + { + "epoch": 11.25, + "learning_rate": 9.68768909180923e-08, + "loss": 3.8669, + "step": 1022500 + }, + { + "epoch": 11.25, + "learning_rate": 9.68631387865119e-08, + "loss": 3.8502, + "step": 1023000 + }, + { + "epoch": 11.26, + "learning_rate": 9.68493866549315e-08, + "loss": 3.8813, + "step": 1023500 + }, + { + "epoch": 11.27, + "learning_rate": 9.683563452335112e-08, + "loss": 3.8592, + "step": 1024000 + }, + { + "epoch": 11.27, + "learning_rate": 9.682188239177071e-08, + "loss": 3.8553, + "step": 1024500 + }, + { + "epoch": 11.28, + "learning_rate": 9.680813026019032e-08, + "loss": 3.8477, + "step": 1025000 + }, + { + "epoch": 11.28, + "learning_rate": 9.679437812860993e-08, + "loss": 3.8418, + "step": 1025500 + }, + { + "epoch": 11.29, + "learning_rate": 9.678062599702953e-08, + "loss": 3.8389, + "step": 1026000 + }, + { + "epoch": 11.29, + "learning_rate": 9.676687386544914e-08, + "loss": 3.8577, + "step": 1026500 + }, + { + "epoch": 11.3, + "learning_rate": 9.675312173386875e-08, + "loss": 3.8561, + "step": 1027000 + }, + { + "epoch": 11.3, + "learning_rate": 9.673936960228834e-08, + "loss": 3.8564, + "step": 1027500 + }, + { + "epoch": 11.31, + "learning_rate": 9.672561747070795e-08, + "loss": 3.8499, + "step": 1028000 + }, + { + "epoch": 11.32, + "learning_rate": 9.671186533912756e-08, + "loss": 3.8395, + "step": 1028500 + }, + { + "epoch": 11.32, + "learning_rate": 9.669811320754716e-08, + "loss": 3.8522, + "step": 1029000 + }, + { + "epoch": 11.33, + "learning_rate": 9.668436107596677e-08, + "loss": 3.8551, + "step": 1029500 + }, + { + "epoch": 11.33, + "learning_rate": 9.667060894438637e-08, + "loss": 3.8493, + "step": 1030000 + }, + { + "epoch": 11.34, + "learning_rate": 9.665685681280597e-08, + "loss": 3.8701, + "step": 1030500 + }, + { + "epoch": 11.34, + "learning_rate": 9.664310468122558e-08, + "loss": 3.8614, + "step": 1031000 + }, + { + "epoch": 11.35, + "learning_rate": 9.662935254964519e-08, + "loss": 3.8588, + "step": 1031500 + }, + { + "epoch": 11.35, + "learning_rate": 9.661560041806479e-08, + "loss": 3.8544, + "step": 1032000 + }, + { + "epoch": 11.36, + "learning_rate": 9.66018482864844e-08, + "loss": 3.8473, + "step": 1032500 + }, + { + "epoch": 11.36, + "learning_rate": 9.6588096154904e-08, + "loss": 3.8432, + "step": 1033000 + }, + { + "epoch": 11.37, + "learning_rate": 9.65743440233236e-08, + "loss": 3.8371, + "step": 1033500 + }, + { + "epoch": 11.38, + "learning_rate": 9.656059189174321e-08, + "loss": 3.8503, + "step": 1034000 + }, + { + "epoch": 11.38, + "learning_rate": 9.654683976016282e-08, + "loss": 3.8621, + "step": 1034500 + }, + { + "epoch": 11.39, + "learning_rate": 9.653308762858242e-08, + "loss": 3.8454, + "step": 1035000 + }, + { + "epoch": 11.39, + "learning_rate": 9.651933549700202e-08, + "loss": 3.8663, + "step": 1035500 + }, + { + "epoch": 11.4, + "learning_rate": 9.650558336542163e-08, + "loss": 3.8392, + "step": 1036000 + }, + { + "epoch": 11.4, + "learning_rate": 9.649183123384124e-08, + "loss": 3.8508, + "step": 1036500 + }, + { + "epoch": 11.41, + "learning_rate": 9.647807910226084e-08, + "loss": 3.849, + "step": 1037000 + }, + { + "epoch": 11.41, + "learning_rate": 9.646432697068045e-08, + "loss": 3.8781, + "step": 1037500 + }, + { + "epoch": 11.42, + "learning_rate": 9.645057483910006e-08, + "loss": 3.8599, + "step": 1038000 + }, + { + "epoch": 11.43, + "learning_rate": 9.643682270751965e-08, + "loss": 3.8613, + "step": 1038500 + }, + { + "epoch": 11.43, + "learning_rate": 9.642307057593926e-08, + "loss": 3.853, + "step": 1039000 + }, + { + "epoch": 11.44, + "learning_rate": 9.640931844435887e-08, + "loss": 3.8524, + "step": 1039500 + }, + { + "epoch": 11.44, + "learning_rate": 9.639556631277848e-08, + "loss": 3.8483, + "step": 1040000 + }, + { + "epoch": 11.45, + "learning_rate": 9.638181418119808e-08, + "loss": 3.8465, + "step": 1040500 + }, + { + "epoch": 11.45, + "learning_rate": 9.636806204961769e-08, + "loss": 3.8485, + "step": 1041000 + }, + { + "epoch": 11.46, + "learning_rate": 9.63543099180373e-08, + "loss": 3.8652, + "step": 1041500 + }, + { + "epoch": 11.46, + "learning_rate": 9.63405577864569e-08, + "loss": 3.8547, + "step": 1042000 + }, + { + "epoch": 11.47, + "learning_rate": 9.63268056548765e-08, + "loss": 3.8508, + "step": 1042500 + }, + { + "epoch": 11.47, + "learning_rate": 9.631305352329611e-08, + "loss": 3.8684, + "step": 1043000 + }, + { + "epoch": 11.48, + "learning_rate": 9.629930139171572e-08, + "loss": 3.8516, + "step": 1043500 + }, + { + "epoch": 11.49, + "learning_rate": 9.628554926013532e-08, + "loss": 3.8452, + "step": 1044000 + }, + { + "epoch": 11.49, + "learning_rate": 9.627179712855493e-08, + "loss": 3.8465, + "step": 1044500 + }, + { + "epoch": 11.5, + "learning_rate": 9.625804499697454e-08, + "loss": 3.8386, + "step": 1045000 + }, + { + "epoch": 11.5, + "learning_rate": 9.624429286539413e-08, + "loss": 3.8555, + "step": 1045500 + }, + { + "epoch": 11.51, + "learning_rate": 9.623054073381374e-08, + "loss": 3.8634, + "step": 1046000 + }, + { + "epoch": 11.51, + "learning_rate": 9.621678860223335e-08, + "loss": 3.8556, + "step": 1046500 + }, + { + "epoch": 11.52, + "learning_rate": 9.620303647065295e-08, + "loss": 3.8601, + "step": 1047000 + }, + { + "epoch": 11.52, + "learning_rate": 9.618928433907256e-08, + "loss": 3.8625, + "step": 1047500 + }, + { + "epoch": 11.53, + "learning_rate": 9.617553220749217e-08, + "loss": 3.8585, + "step": 1048000 + }, + { + "epoch": 11.54, + "learning_rate": 9.616178007591176e-08, + "loss": 3.8576, + "step": 1048500 + }, + { + "epoch": 11.54, + "learning_rate": 9.614802794433137e-08, + "loss": 3.853, + "step": 1049000 + }, + { + "epoch": 11.55, + "learning_rate": 9.613427581275098e-08, + "loss": 3.8545, + "step": 1049500 + }, + { + "epoch": 11.55, + "learning_rate": 9.612052368117058e-08, + "loss": 3.85, + "step": 1050000 + }, + { + "epoch": 11.56, + "learning_rate": 9.610677154959019e-08, + "loss": 3.8578, + "step": 1050500 + }, + { + "epoch": 11.56, + "learning_rate": 9.60930194180098e-08, + "loss": 3.8581, + "step": 1051000 + }, + { + "epoch": 11.57, + "learning_rate": 9.607926728642939e-08, + "loss": 3.8568, + "step": 1051500 + }, + { + "epoch": 11.57, + "learning_rate": 9.6065515154849e-08, + "loss": 3.8609, + "step": 1052000 + }, + { + "epoch": 11.58, + "learning_rate": 9.605176302326861e-08, + "loss": 3.8379, + "step": 1052500 + }, + { + "epoch": 11.58, + "learning_rate": 9.60380108916882e-08, + "loss": 3.8529, + "step": 1053000 + }, + { + "epoch": 11.59, + "learning_rate": 9.602425876010782e-08, + "loss": 3.8429, + "step": 1053500 + }, + { + "epoch": 11.6, + "learning_rate": 9.601050662852742e-08, + "loss": 3.8343, + "step": 1054000 + }, + { + "epoch": 11.6, + "learning_rate": 9.599675449694702e-08, + "loss": 3.8305, + "step": 1054500 + }, + { + "epoch": 11.61, + "learning_rate": 9.598300236536663e-08, + "loss": 3.8729, + "step": 1055000 + }, + { + "epoch": 11.61, + "learning_rate": 9.596925023378624e-08, + "loss": 3.8549, + "step": 1055500 + }, + { + "epoch": 11.62, + "learning_rate": 9.595549810220584e-08, + "loss": 3.8515, + "step": 1056000 + }, + { + "epoch": 11.62, + "learning_rate": 9.594174597062544e-08, + "loss": 3.8538, + "step": 1056500 + }, + { + "epoch": 11.63, + "learning_rate": 9.592799383904505e-08, + "loss": 3.8531, + "step": 1057000 + }, + { + "epoch": 11.63, + "learning_rate": 9.591424170746465e-08, + "loss": 3.8471, + "step": 1057500 + }, + { + "epoch": 11.64, + "learning_rate": 9.590048957588426e-08, + "loss": 3.8409, + "step": 1058000 + }, + { + "epoch": 11.65, + "learning_rate": 9.588673744430387e-08, + "loss": 3.8684, + "step": 1058500 + }, + { + "epoch": 11.65, + "learning_rate": 9.587298531272347e-08, + "loss": 3.8542, + "step": 1059000 + }, + { + "epoch": 11.66, + "learning_rate": 9.585923318114307e-08, + "loss": 3.8459, + "step": 1059500 + }, + { + "epoch": 11.66, + "learning_rate": 9.584548104956268e-08, + "loss": 3.8347, + "step": 1060000 + }, + { + "epoch": 11.67, + "learning_rate": 9.583172891798228e-08, + "loss": 3.8486, + "step": 1060500 + }, + { + "epoch": 11.67, + "learning_rate": 9.581797678640189e-08, + "loss": 3.8488, + "step": 1061000 + }, + { + "epoch": 11.68, + "learning_rate": 9.58042246548215e-08, + "loss": 3.8714, + "step": 1061500 + }, + { + "epoch": 11.68, + "learning_rate": 9.57904725232411e-08, + "loss": 3.8863, + "step": 1062000 + }, + { + "epoch": 11.69, + "learning_rate": 9.57767203916607e-08, + "loss": 3.8484, + "step": 1062500 + }, + { + "epoch": 11.69, + "learning_rate": 9.576296826008031e-08, + "loss": 3.853, + "step": 1063000 + }, + { + "epoch": 11.7, + "learning_rate": 9.574921612849991e-08, + "loss": 3.8334, + "step": 1063500 + }, + { + "epoch": 11.71, + "learning_rate": 9.573546399691952e-08, + "loss": 3.8506, + "step": 1064000 + }, + { + "epoch": 11.71, + "learning_rate": 9.572171186533913e-08, + "loss": 3.8327, + "step": 1064500 + }, + { + "epoch": 11.72, + "learning_rate": 9.570795973375872e-08, + "loss": 3.8461, + "step": 1065000 + }, + { + "epoch": 11.72, + "learning_rate": 9.569420760217833e-08, + "loss": 3.8453, + "step": 1065500 + }, + { + "epoch": 11.73, + "learning_rate": 9.568045547059794e-08, + "loss": 3.8445, + "step": 1066000 + }, + { + "epoch": 11.73, + "learning_rate": 9.566670333901754e-08, + "loss": 3.8568, + "step": 1066500 + }, + { + "epoch": 11.74, + "learning_rate": 9.565295120743715e-08, + "loss": 3.8575, + "step": 1067000 + }, + { + "epoch": 11.74, + "learning_rate": 9.563919907585676e-08, + "loss": 3.8499, + "step": 1067500 + }, + { + "epoch": 11.75, + "learning_rate": 9.562544694427635e-08, + "loss": 3.8452, + "step": 1068000 + }, + { + "epoch": 11.76, + "learning_rate": 9.561169481269596e-08, + "loss": 3.8769, + "step": 1068500 + }, + { + "epoch": 11.76, + "learning_rate": 9.559794268111557e-08, + "loss": 3.8759, + "step": 1069000 + }, + { + "epoch": 11.77, + "learning_rate": 9.558419054953517e-08, + "loss": 3.8518, + "step": 1069500 + }, + { + "epoch": 11.77, + "learning_rate": 9.557043841795478e-08, + "loss": 3.8602, + "step": 1070000 + }, + { + "epoch": 11.78, + "learning_rate": 9.555668628637439e-08, + "loss": 3.8502, + "step": 1070500 + }, + { + "epoch": 11.78, + "learning_rate": 9.554293415479398e-08, + "loss": 3.851, + "step": 1071000 + }, + { + "epoch": 11.79, + "learning_rate": 9.552918202321359e-08, + "loss": 3.8361, + "step": 1071500 + }, + { + "epoch": 11.79, + "learning_rate": 9.55154298916332e-08, + "loss": 3.854, + "step": 1072000 + }, + { + "epoch": 11.8, + "learning_rate": 9.55016777600528e-08, + "loss": 3.8515, + "step": 1072500 + }, + { + "epoch": 11.8, + "learning_rate": 9.548792562847241e-08, + "loss": 3.8648, + "step": 1073000 + }, + { + "epoch": 11.81, + "learning_rate": 9.547417349689202e-08, + "loss": 3.842, + "step": 1073500 + }, + { + "epoch": 11.82, + "learning_rate": 9.546042136531161e-08, + "loss": 3.861, + "step": 1074000 + }, + { + "epoch": 11.82, + "learning_rate": 9.544666923373122e-08, + "loss": 3.845, + "step": 1074500 + }, + { + "epoch": 11.83, + "learning_rate": 9.543291710215083e-08, + "loss": 3.8699, + "step": 1075000 + }, + { + "epoch": 11.83, + "learning_rate": 9.541916497057043e-08, + "loss": 3.8418, + "step": 1075500 + }, + { + "epoch": 11.84, + "learning_rate": 9.540541283899004e-08, + "loss": 3.852, + "step": 1076000 + }, + { + "epoch": 11.84, + "learning_rate": 9.539166070740965e-08, + "loss": 3.851, + "step": 1076500 + }, + { + "epoch": 11.85, + "learning_rate": 9.537790857582924e-08, + "loss": 3.8313, + "step": 1077000 + }, + { + "epoch": 11.85, + "learning_rate": 9.536415644424885e-08, + "loss": 3.8516, + "step": 1077500 + }, + { + "epoch": 11.86, + "learning_rate": 9.535040431266846e-08, + "loss": 3.8357, + "step": 1078000 + }, + { + "epoch": 11.87, + "learning_rate": 9.533665218108806e-08, + "loss": 3.8461, + "step": 1078500 + }, + { + "epoch": 11.87, + "learning_rate": 9.532290004950767e-08, + "loss": 3.8608, + "step": 1079000 + }, + { + "epoch": 11.88, + "learning_rate": 9.530914791792728e-08, + "loss": 3.8533, + "step": 1079500 + }, + { + "epoch": 11.88, + "learning_rate": 9.529539578634687e-08, + "loss": 3.8361, + "step": 1080000 + }, + { + "epoch": 11.89, + "learning_rate": 9.528164365476648e-08, + "loss": 3.8455, + "step": 1080500 + }, + { + "epoch": 11.89, + "learning_rate": 9.526789152318609e-08, + "loss": 3.8398, + "step": 1081000 + }, + { + "epoch": 11.9, + "learning_rate": 9.525413939160569e-08, + "loss": 3.8553, + "step": 1081500 + }, + { + "epoch": 11.9, + "learning_rate": 9.52403872600253e-08, + "loss": 3.8275, + "step": 1082000 + }, + { + "epoch": 11.91, + "learning_rate": 9.52266351284449e-08, + "loss": 3.8512, + "step": 1082500 + }, + { + "epoch": 11.91, + "learning_rate": 9.52128829968645e-08, + "loss": 3.8606, + "step": 1083000 + }, + { + "epoch": 11.92, + "learning_rate": 9.519913086528411e-08, + "loss": 3.858, + "step": 1083500 + }, + { + "epoch": 11.93, + "learning_rate": 9.518537873370372e-08, + "loss": 3.8465, + "step": 1084000 + }, + { + "epoch": 11.93, + "learning_rate": 9.517162660212332e-08, + "loss": 3.8482, + "step": 1084500 + }, + { + "epoch": 11.94, + "learning_rate": 9.515787447054293e-08, + "loss": 3.8532, + "step": 1085000 + }, + { + "epoch": 11.94, + "learning_rate": 9.514412233896254e-08, + "loss": 3.843, + "step": 1085500 + }, + { + "epoch": 11.95, + "learning_rate": 9.513037020738213e-08, + "loss": 3.85, + "step": 1086000 + }, + { + "epoch": 11.95, + "learning_rate": 9.511661807580174e-08, + "loss": 3.8532, + "step": 1086500 + }, + { + "epoch": 11.96, + "learning_rate": 9.510286594422135e-08, + "loss": 3.8317, + "step": 1087000 + }, + { + "epoch": 11.96, + "learning_rate": 9.508911381264096e-08, + "loss": 3.8499, + "step": 1087500 + }, + { + "epoch": 11.97, + "learning_rate": 9.507536168106056e-08, + "loss": 3.8736, + "step": 1088000 + }, + { + "epoch": 11.98, + "learning_rate": 9.506160954948016e-08, + "loss": 3.8361, + "step": 1088500 + }, + { + "epoch": 11.98, + "learning_rate": 9.504785741789977e-08, + "loss": 3.851, + "step": 1089000 + }, + { + "epoch": 11.99, + "learning_rate": 9.503410528631938e-08, + "loss": 3.8527, + "step": 1089500 + }, + { + "epoch": 11.99, + "learning_rate": 9.502035315473898e-08, + "loss": 3.8495, + "step": 1090000 + }, + { + "epoch": 12.0, + "learning_rate": 9.500660102315859e-08, + "loss": 3.8611, + "step": 1090500 + }, + { + "epoch": 12.0, + "eval_loss": 3.8887219429016113, + "eval_runtime": 6.1376, + "eval_samples_per_second": 253.195, + "step": 1090740 + }, + { + "epoch": 12.0, + "learning_rate": 9.49928488915782e-08, + "loss": 3.8234, + "step": 1091000 + }, + { + "epoch": 12.01, + "learning_rate": 9.49790967599978e-08, + "loss": 3.8437, + "step": 1091500 + }, + { + "epoch": 12.01, + "learning_rate": 9.49653446284174e-08, + "loss": 3.8494, + "step": 1092000 + }, + { + "epoch": 12.02, + "learning_rate": 9.495159249683701e-08, + "loss": 3.8533, + "step": 1092500 + }, + { + "epoch": 12.02, + "learning_rate": 9.493784036525661e-08, + "loss": 3.8646, + "step": 1093000 + }, + { + "epoch": 12.03, + "learning_rate": 9.492408823367622e-08, + "loss": 3.8291, + "step": 1093500 + }, + { + "epoch": 12.04, + "learning_rate": 9.491033610209583e-08, + "loss": 3.8515, + "step": 1094000 + }, + { + "epoch": 12.04, + "learning_rate": 9.489658397051542e-08, + "loss": 3.85, + "step": 1094500 + }, + { + "epoch": 12.05, + "learning_rate": 9.488283183893503e-08, + "loss": 3.8505, + "step": 1095000 + }, + { + "epoch": 12.05, + "learning_rate": 9.486907970735464e-08, + "loss": 3.8356, + "step": 1095500 + }, + { + "epoch": 12.06, + "learning_rate": 9.485532757577424e-08, + "loss": 3.8407, + "step": 1096000 + }, + { + "epoch": 12.06, + "learning_rate": 9.484157544419385e-08, + "loss": 3.8344, + "step": 1096500 + }, + { + "epoch": 12.07, + "learning_rate": 9.482782331261346e-08, + "loss": 3.8445, + "step": 1097000 + }, + { + "epoch": 12.07, + "learning_rate": 9.481407118103305e-08, + "loss": 3.8602, + "step": 1097500 + }, + { + "epoch": 12.08, + "learning_rate": 9.480031904945266e-08, + "loss": 3.8586, + "step": 1098000 + }, + { + "epoch": 12.09, + "learning_rate": 9.478656691787227e-08, + "loss": 3.8459, + "step": 1098500 + }, + { + "epoch": 12.09, + "learning_rate": 9.477281478629187e-08, + "loss": 3.8466, + "step": 1099000 + }, + { + "epoch": 12.1, + "learning_rate": 9.475906265471148e-08, + "loss": 3.8512, + "step": 1099500 + }, + { + "epoch": 12.1, + "learning_rate": 9.474531052313109e-08, + "loss": 3.8545, + "step": 1100000 + }, + { + "epoch": 12.11, + "learning_rate": 9.473155839155068e-08, + "loss": 3.8633, + "step": 1100500 + }, + { + "epoch": 12.11, + "learning_rate": 9.471780625997029e-08, + "loss": 3.8301, + "step": 1101000 + }, + { + "epoch": 12.12, + "learning_rate": 9.47040541283899e-08, + "loss": 3.8708, + "step": 1101500 + }, + { + "epoch": 12.12, + "learning_rate": 9.46903019968095e-08, + "loss": 3.8366, + "step": 1102000 + }, + { + "epoch": 12.13, + "learning_rate": 9.467654986522911e-08, + "loss": 3.8454, + "step": 1102500 + }, + { + "epoch": 12.13, + "learning_rate": 9.466279773364872e-08, + "loss": 3.8364, + "step": 1103000 + }, + { + "epoch": 12.14, + "learning_rate": 9.464904560206831e-08, + "loss": 3.8277, + "step": 1103500 + }, + { + "epoch": 12.15, + "learning_rate": 9.463529347048792e-08, + "loss": 3.8566, + "step": 1104000 + }, + { + "epoch": 12.15, + "learning_rate": 9.462154133890753e-08, + "loss": 3.8562, + "step": 1104500 + }, + { + "epoch": 12.16, + "learning_rate": 9.460778920732713e-08, + "loss": 3.8211, + "step": 1105000 + }, + { + "epoch": 12.16, + "learning_rate": 9.459403707574674e-08, + "loss": 3.8784, + "step": 1105500 + }, + { + "epoch": 12.17, + "learning_rate": 9.458028494416635e-08, + "loss": 3.8409, + "step": 1106000 + }, + { + "epoch": 12.17, + "learning_rate": 9.456653281258594e-08, + "loss": 3.8558, + "step": 1106500 + }, + { + "epoch": 12.18, + "learning_rate": 9.455278068100555e-08, + "loss": 3.8583, + "step": 1107000 + }, + { + "epoch": 12.18, + "learning_rate": 9.453902854942516e-08, + "loss": 3.8558, + "step": 1107500 + }, + { + "epoch": 12.19, + "learning_rate": 9.452527641784476e-08, + "loss": 3.8456, + "step": 1108000 + }, + { + "epoch": 12.2, + "learning_rate": 9.451152428626437e-08, + "loss": 3.8566, + "step": 1108500 + }, + { + "epoch": 12.2, + "learning_rate": 9.449777215468398e-08, + "loss": 3.8432, + "step": 1109000 + }, + { + "epoch": 12.21, + "learning_rate": 9.448402002310357e-08, + "loss": 3.8462, + "step": 1109500 + }, + { + "epoch": 12.21, + "learning_rate": 9.447026789152318e-08, + "loss": 3.8483, + "step": 1110000 + }, + { + "epoch": 12.22, + "learning_rate": 9.445651575994279e-08, + "loss": 3.8542, + "step": 1110500 + }, + { + "epoch": 12.22, + "learning_rate": 9.444276362836239e-08, + "loss": 3.8275, + "step": 1111000 + }, + { + "epoch": 12.23, + "learning_rate": 9.4429011496782e-08, + "loss": 3.8589, + "step": 1111500 + }, + { + "epoch": 12.23, + "learning_rate": 9.44152593652016e-08, + "loss": 3.8548, + "step": 1112000 + }, + { + "epoch": 12.24, + "learning_rate": 9.44015072336212e-08, + "loss": 3.8475, + "step": 1112500 + }, + { + "epoch": 12.24, + "learning_rate": 9.438775510204081e-08, + "loss": 3.8447, + "step": 1113000 + }, + { + "epoch": 12.25, + "learning_rate": 9.437400297046042e-08, + "loss": 3.8368, + "step": 1113500 + }, + { + "epoch": 12.26, + "learning_rate": 9.436025083888002e-08, + "loss": 3.8414, + "step": 1114000 + }, + { + "epoch": 12.26, + "learning_rate": 9.434649870729963e-08, + "loss": 3.8453, + "step": 1114500 + }, + { + "epoch": 12.27, + "learning_rate": 9.433274657571924e-08, + "loss": 3.8514, + "step": 1115000 + }, + { + "epoch": 12.27, + "learning_rate": 9.431899444413883e-08, + "loss": 3.8609, + "step": 1115500 + }, + { + "epoch": 12.28, + "learning_rate": 9.430524231255844e-08, + "loss": 3.8691, + "step": 1116000 + }, + { + "epoch": 12.28, + "learning_rate": 9.429149018097805e-08, + "loss": 3.8496, + "step": 1116500 + }, + { + "epoch": 12.29, + "learning_rate": 9.427773804939765e-08, + "loss": 3.8407, + "step": 1117000 + }, + { + "epoch": 12.29, + "learning_rate": 9.426398591781726e-08, + "loss": 3.854, + "step": 1117500 + }, + { + "epoch": 12.3, + "learning_rate": 9.425023378623686e-08, + "loss": 3.8544, + "step": 1118000 + }, + { + "epoch": 12.31, + "learning_rate": 9.423648165465646e-08, + "loss": 3.8364, + "step": 1118500 + }, + { + "epoch": 12.31, + "learning_rate": 9.422272952307607e-08, + "loss": 3.8483, + "step": 1119000 + }, + { + "epoch": 12.32, + "learning_rate": 9.420897739149568e-08, + "loss": 3.8348, + "step": 1119500 + }, + { + "epoch": 12.32, + "learning_rate": 9.419522525991528e-08, + "loss": 3.8491, + "step": 1120000 + }, + { + "epoch": 12.33, + "learning_rate": 9.418147312833488e-08, + "loss": 3.8641, + "step": 1120500 + }, + { + "epoch": 12.33, + "learning_rate": 9.41677209967545e-08, + "loss": 3.8354, + "step": 1121000 + }, + { + "epoch": 12.34, + "learning_rate": 9.415396886517409e-08, + "loss": 3.8441, + "step": 1121500 + }, + { + "epoch": 12.34, + "learning_rate": 9.41402167335937e-08, + "loss": 3.8482, + "step": 1122000 + }, + { + "epoch": 12.35, + "learning_rate": 9.412646460201331e-08, + "loss": 3.8572, + "step": 1122500 + }, + { + "epoch": 12.35, + "learning_rate": 9.41127124704329e-08, + "loss": 3.8433, + "step": 1123000 + }, + { + "epoch": 12.36, + "learning_rate": 9.409896033885251e-08, + "loss": 3.8443, + "step": 1123500 + }, + { + "epoch": 12.37, + "learning_rate": 9.408520820727212e-08, + "loss": 3.8391, + "step": 1124000 + }, + { + "epoch": 12.37, + "learning_rate": 9.407145607569172e-08, + "loss": 3.8617, + "step": 1124500 + }, + { + "epoch": 12.38, + "learning_rate": 9.405770394411133e-08, + "loss": 3.8388, + "step": 1125000 + }, + { + "epoch": 12.38, + "learning_rate": 9.404395181253094e-08, + "loss": 3.8581, + "step": 1125500 + }, + { + "epoch": 12.39, + "learning_rate": 9.403019968095053e-08, + "loss": 3.8504, + "step": 1126000 + }, + { + "epoch": 12.39, + "learning_rate": 9.401644754937014e-08, + "loss": 3.8322, + "step": 1126500 + }, + { + "epoch": 12.4, + "learning_rate": 9.400269541778975e-08, + "loss": 3.8383, + "step": 1127000 + }, + { + "epoch": 12.4, + "learning_rate": 9.398894328620935e-08, + "loss": 3.8522, + "step": 1127500 + }, + { + "epoch": 12.41, + "learning_rate": 9.397519115462896e-08, + "loss": 3.8329, + "step": 1128000 + }, + { + "epoch": 12.42, + "learning_rate": 9.396143902304857e-08, + "loss": 3.8356, + "step": 1128500 + }, + { + "epoch": 12.42, + "learning_rate": 9.394768689146816e-08, + "loss": 3.8424, + "step": 1129000 + }, + { + "epoch": 12.43, + "learning_rate": 9.393393475988777e-08, + "loss": 3.8158, + "step": 1129500 + }, + { + "epoch": 12.43, + "learning_rate": 9.392018262830738e-08, + "loss": 3.8494, + "step": 1130000 + }, + { + "epoch": 12.44, + "learning_rate": 9.390643049672698e-08, + "loss": 3.8515, + "step": 1130500 + }, + { + "epoch": 12.44, + "learning_rate": 9.389267836514659e-08, + "loss": 3.8338, + "step": 1131000 + }, + { + "epoch": 12.45, + "learning_rate": 9.38789262335662e-08, + "loss": 3.8524, + "step": 1131500 + }, + { + "epoch": 12.45, + "learning_rate": 9.38651741019858e-08, + "loss": 3.8483, + "step": 1132000 + }, + { + "epoch": 12.46, + "learning_rate": 9.38514219704054e-08, + "loss": 3.8332, + "step": 1132500 + }, + { + "epoch": 12.46, + "learning_rate": 9.383766983882501e-08, + "loss": 3.8514, + "step": 1133000 + }, + { + "epoch": 12.47, + "learning_rate": 9.382391770724461e-08, + "loss": 3.8435, + "step": 1133500 + }, + { + "epoch": 12.48, + "learning_rate": 9.381016557566422e-08, + "loss": 3.8634, + "step": 1134000 + }, + { + "epoch": 12.48, + "learning_rate": 9.379641344408383e-08, + "loss": 3.8614, + "step": 1134500 + }, + { + "epoch": 12.49, + "learning_rate": 9.378266131250344e-08, + "loss": 3.8595, + "step": 1135000 + }, + { + "epoch": 12.49, + "learning_rate": 9.376890918092303e-08, + "loss": 3.8436, + "step": 1135500 + }, + { + "epoch": 12.5, + "learning_rate": 9.375515704934264e-08, + "loss": 3.8546, + "step": 1136000 + }, + { + "epoch": 12.5, + "learning_rate": 9.374140491776225e-08, + "loss": 3.8584, + "step": 1136500 + }, + { + "epoch": 12.51, + "learning_rate": 9.372765278618186e-08, + "loss": 3.847, + "step": 1137000 + }, + { + "epoch": 12.51, + "learning_rate": 9.371390065460146e-08, + "loss": 3.8429, + "step": 1137500 + }, + { + "epoch": 12.52, + "learning_rate": 9.370014852302107e-08, + "loss": 3.8421, + "step": 1138000 + }, + { + "epoch": 12.53, + "learning_rate": 9.368639639144068e-08, + "loss": 3.8301, + "step": 1138500 + }, + { + "epoch": 12.53, + "learning_rate": 9.367264425986027e-08, + "loss": 3.8522, + "step": 1139000 + }, + { + "epoch": 12.54, + "learning_rate": 9.365889212827988e-08, + "loss": 3.8434, + "step": 1139500 + }, + { + "epoch": 12.54, + "learning_rate": 9.364513999669949e-08, + "loss": 3.8342, + "step": 1140000 + }, + { + "epoch": 12.55, + "learning_rate": 9.36313878651191e-08, + "loss": 3.8511, + "step": 1140500 + }, + { + "epoch": 12.55, + "learning_rate": 9.36176357335387e-08, + "loss": 3.8365, + "step": 1141000 + }, + { + "epoch": 12.56, + "learning_rate": 9.36038836019583e-08, + "loss": 3.8405, + "step": 1141500 + }, + { + "epoch": 12.56, + "learning_rate": 9.359013147037791e-08, + "loss": 3.8439, + "step": 1142000 + }, + { + "epoch": 12.57, + "learning_rate": 9.357637933879751e-08, + "loss": 3.856, + "step": 1142500 + }, + { + "epoch": 12.57, + "learning_rate": 9.356262720721712e-08, + "loss": 3.8466, + "step": 1143000 + }, + { + "epoch": 12.58, + "learning_rate": 9.354887507563673e-08, + "loss": 3.8499, + "step": 1143500 + }, + { + "epoch": 12.59, + "learning_rate": 9.353512294405633e-08, + "loss": 3.8465, + "step": 1144000 + }, + { + "epoch": 12.59, + "learning_rate": 9.352137081247593e-08, + "loss": 3.8395, + "step": 1144500 + }, + { + "epoch": 12.6, + "learning_rate": 9.350761868089554e-08, + "loss": 3.8371, + "step": 1145000 + }, + { + "epoch": 12.6, + "learning_rate": 9.349386654931514e-08, + "loss": 3.8399, + "step": 1145500 + }, + { + "epoch": 12.61, + "learning_rate": 9.348011441773475e-08, + "loss": 3.8291, + "step": 1146000 + }, + { + "epoch": 12.61, + "learning_rate": 9.346636228615436e-08, + "loss": 3.8304, + "step": 1146500 + }, + { + "epoch": 12.62, + "learning_rate": 9.345261015457396e-08, + "loss": 3.8305, + "step": 1147000 + }, + { + "epoch": 12.62, + "learning_rate": 9.343885802299356e-08, + "loss": 3.8472, + "step": 1147500 + }, + { + "epoch": 12.63, + "learning_rate": 9.342510589141317e-08, + "loss": 3.8415, + "step": 1148000 + }, + { + "epoch": 12.64, + "learning_rate": 9.341135375983277e-08, + "loss": 3.8467, + "step": 1148500 + }, + { + "epoch": 12.64, + "learning_rate": 9.339760162825238e-08, + "loss": 3.8542, + "step": 1149000 + }, + { + "epoch": 12.65, + "learning_rate": 9.338384949667199e-08, + "loss": 3.8424, + "step": 1149500 + }, + { + "epoch": 12.65, + "learning_rate": 9.337009736509158e-08, + "loss": 3.8254, + "step": 1150000 + }, + { + "epoch": 12.66, + "learning_rate": 9.33563452335112e-08, + "loss": 3.8499, + "step": 1150500 + }, + { + "epoch": 12.66, + "learning_rate": 9.33425931019308e-08, + "loss": 3.8562, + "step": 1151000 + }, + { + "epoch": 12.67, + "learning_rate": 9.33288409703504e-08, + "loss": 3.8467, + "step": 1151500 + }, + { + "epoch": 12.67, + "learning_rate": 9.331508883877001e-08, + "loss": 3.8452, + "step": 1152000 + }, + { + "epoch": 12.68, + "learning_rate": 9.330133670718962e-08, + "loss": 3.8336, + "step": 1152500 + }, + { + "epoch": 12.68, + "learning_rate": 9.328758457560921e-08, + "loss": 3.823, + "step": 1153000 + }, + { + "epoch": 12.69, + "learning_rate": 9.327383244402882e-08, + "loss": 3.8553, + "step": 1153500 + }, + { + "epoch": 12.7, + "learning_rate": 9.326008031244843e-08, + "loss": 3.8567, + "step": 1154000 + }, + { + "epoch": 12.7, + "learning_rate": 9.324632818086803e-08, + "loss": 3.8446, + "step": 1154500 + }, + { + "epoch": 12.71, + "learning_rate": 9.323257604928764e-08, + "loss": 3.8422, + "step": 1155000 + }, + { + "epoch": 12.71, + "learning_rate": 9.321882391770725e-08, + "loss": 3.8255, + "step": 1155500 + }, + { + "epoch": 12.72, + "learning_rate": 9.320507178612684e-08, + "loss": 3.8435, + "step": 1156000 + }, + { + "epoch": 12.72, + "learning_rate": 9.319131965454645e-08, + "loss": 3.849, + "step": 1156500 + }, + { + "epoch": 12.73, + "learning_rate": 9.317756752296606e-08, + "loss": 3.8247, + "step": 1157000 + }, + { + "epoch": 12.73, + "learning_rate": 9.316381539138566e-08, + "loss": 3.8498, + "step": 1157500 + }, + { + "epoch": 12.74, + "learning_rate": 9.315006325980527e-08, + "loss": 3.8387, + "step": 1158000 + }, + { + "epoch": 12.75, + "learning_rate": 9.313631112822488e-08, + "loss": 3.851, + "step": 1158500 + }, + { + "epoch": 12.75, + "learning_rate": 9.312255899664447e-08, + "loss": 3.8589, + "step": 1159000 + }, + { + "epoch": 12.76, + "learning_rate": 9.310880686506408e-08, + "loss": 3.8516, + "step": 1159500 + }, + { + "epoch": 12.76, + "learning_rate": 9.309505473348369e-08, + "loss": 3.8563, + "step": 1160000 + }, + { + "epoch": 12.77, + "learning_rate": 9.308130260190329e-08, + "loss": 3.8329, + "step": 1160500 + }, + { + "epoch": 12.77, + "learning_rate": 9.30675504703229e-08, + "loss": 3.8258, + "step": 1161000 + }, + { + "epoch": 12.78, + "learning_rate": 9.305379833874251e-08, + "loss": 3.8369, + "step": 1161500 + }, + { + "epoch": 12.78, + "learning_rate": 9.30400462071621e-08, + "loss": 3.8504, + "step": 1162000 + }, + { + "epoch": 12.79, + "learning_rate": 9.302629407558171e-08, + "loss": 3.853, + "step": 1162500 + }, + { + "epoch": 12.79, + "learning_rate": 9.301254194400132e-08, + "loss": 3.8362, + "step": 1163000 + }, + { + "epoch": 12.8, + "learning_rate": 9.299878981242092e-08, + "loss": 3.8469, + "step": 1163500 + }, + { + "epoch": 12.81, + "learning_rate": 9.298503768084053e-08, + "loss": 3.8514, + "step": 1164000 + }, + { + "epoch": 12.81, + "learning_rate": 9.297128554926012e-08, + "loss": 3.8272, + "step": 1164500 + }, + { + "epoch": 12.82, + "learning_rate": 9.295753341767973e-08, + "loss": 3.8303, + "step": 1165000 + }, + { + "epoch": 12.82, + "learning_rate": 9.294378128609934e-08, + "loss": 3.8392, + "step": 1165500 + }, + { + "epoch": 12.83, + "learning_rate": 9.293002915451894e-08, + "loss": 3.8377, + "step": 1166000 + }, + { + "epoch": 12.83, + "learning_rate": 9.291627702293855e-08, + "loss": 3.8435, + "step": 1166500 + }, + { + "epoch": 12.84, + "learning_rate": 9.290252489135816e-08, + "loss": 3.8245, + "step": 1167000 + }, + { + "epoch": 12.84, + "learning_rate": 9.288877275977775e-08, + "loss": 3.8507, + "step": 1167500 + }, + { + "epoch": 12.85, + "learning_rate": 9.287502062819736e-08, + "loss": 3.8472, + "step": 1168000 + }, + { + "epoch": 12.86, + "learning_rate": 9.286126849661697e-08, + "loss": 3.8193, + "step": 1168500 + }, + { + "epoch": 12.86, + "learning_rate": 9.284751636503657e-08, + "loss": 3.837, + "step": 1169000 + }, + { + "epoch": 12.87, + "learning_rate": 9.283376423345618e-08, + "loss": 3.8436, + "step": 1169500 + }, + { + "epoch": 12.87, + "learning_rate": 9.282001210187579e-08, + "loss": 3.839, + "step": 1170000 + }, + { + "epoch": 12.88, + "learning_rate": 9.280625997029538e-08, + "loss": 3.8458, + "step": 1170500 + }, + { + "epoch": 12.88, + "learning_rate": 9.279250783871499e-08, + "loss": 3.8394, + "step": 1171000 + }, + { + "epoch": 12.89, + "learning_rate": 9.27787557071346e-08, + "loss": 3.853, + "step": 1171500 + }, + { + "epoch": 12.89, + "learning_rate": 9.27650035755542e-08, + "loss": 3.8484, + "step": 1172000 + }, + { + "epoch": 12.9, + "learning_rate": 9.275125144397381e-08, + "loss": 3.8281, + "step": 1172500 + }, + { + "epoch": 12.91, + "learning_rate": 9.273749931239342e-08, + "loss": 3.8462, + "step": 1173000 + }, + { + "epoch": 12.91, + "learning_rate": 9.272374718081301e-08, + "loss": 3.8299, + "step": 1173500 + }, + { + "epoch": 12.92, + "learning_rate": 9.270999504923262e-08, + "loss": 3.833, + "step": 1174000 + }, + { + "epoch": 12.92, + "learning_rate": 9.269624291765223e-08, + "loss": 3.8459, + "step": 1174500 + }, + { + "epoch": 12.93, + "learning_rate": 9.268249078607183e-08, + "loss": 3.8403, + "step": 1175000 + }, + { + "epoch": 12.93, + "learning_rate": 9.266873865449144e-08, + "loss": 3.8356, + "step": 1175500 + }, + { + "epoch": 12.94, + "learning_rate": 9.265498652291105e-08, + "loss": 3.8462, + "step": 1176000 + }, + { + "epoch": 12.94, + "learning_rate": 9.264123439133064e-08, + "loss": 3.8358, + "step": 1176500 + }, + { + "epoch": 12.95, + "learning_rate": 9.262748225975025e-08, + "loss": 3.8372, + "step": 1177000 + }, + { + "epoch": 12.95, + "learning_rate": 9.261373012816986e-08, + "loss": 3.8385, + "step": 1177500 + }, + { + "epoch": 12.96, + "learning_rate": 9.259997799658946e-08, + "loss": 3.8371, + "step": 1178000 + }, + { + "epoch": 12.97, + "learning_rate": 9.258622586500907e-08, + "loss": 3.8283, + "step": 1178500 + }, + { + "epoch": 12.97, + "learning_rate": 9.257247373342868e-08, + "loss": 3.8499, + "step": 1179000 + }, + { + "epoch": 12.98, + "learning_rate": 9.255872160184827e-08, + "loss": 3.8439, + "step": 1179500 + }, + { + "epoch": 12.98, + "learning_rate": 9.254496947026788e-08, + "loss": 3.8382, + "step": 1180000 + }, + { + "epoch": 12.99, + "learning_rate": 9.253121733868749e-08, + "loss": 3.8405, + "step": 1180500 + }, + { + "epoch": 12.99, + "learning_rate": 9.251746520710709e-08, + "loss": 3.8276, + "step": 1181000 + }, + { + "epoch": 13.0, + "learning_rate": 9.25037130755267e-08, + "loss": 3.8529, + "step": 1181500 + }, + { + "epoch": 13.0, + "eval_loss": 3.883164405822754, + "eval_runtime": 6.142, + "eval_samples_per_second": 253.014, + "step": 1181635 + }, + { + "epoch": 13.0, + "learning_rate": 9.24899609439463e-08, + "loss": 3.8435, + "step": 1182000 + }, + { + "epoch": 13.01, + "learning_rate": 9.247620881236591e-08, + "loss": 3.826, + "step": 1182500 + }, + { + "epoch": 13.02, + "learning_rate": 9.246245668078551e-08, + "loss": 3.8357, + "step": 1183000 + }, + { + "epoch": 13.02, + "learning_rate": 9.244870454920512e-08, + "loss": 3.8327, + "step": 1183500 + }, + { + "epoch": 13.03, + "learning_rate": 9.243495241762473e-08, + "loss": 3.8556, + "step": 1184000 + }, + { + "epoch": 13.03, + "learning_rate": 9.242120028604434e-08, + "loss": 3.8313, + "step": 1184500 + }, + { + "epoch": 13.04, + "learning_rate": 9.240744815446393e-08, + "loss": 3.858, + "step": 1185000 + }, + { + "epoch": 13.04, + "learning_rate": 9.239369602288354e-08, + "loss": 3.854, + "step": 1185500 + }, + { + "epoch": 13.05, + "learning_rate": 9.237994389130315e-08, + "loss": 3.846, + "step": 1186000 + }, + { + "epoch": 13.05, + "learning_rate": 9.236619175972275e-08, + "loss": 3.8472, + "step": 1186500 + }, + { + "epoch": 13.06, + "learning_rate": 9.235243962814236e-08, + "loss": 3.8436, + "step": 1187000 + }, + { + "epoch": 13.06, + "learning_rate": 9.233868749656197e-08, + "loss": 3.8398, + "step": 1187500 + }, + { + "epoch": 13.07, + "learning_rate": 9.232493536498158e-08, + "loss": 3.8173, + "step": 1188000 + }, + { + "epoch": 13.08, + "learning_rate": 9.231118323340117e-08, + "loss": 3.8494, + "step": 1188500 + }, + { + "epoch": 13.08, + "learning_rate": 9.229743110182078e-08, + "loss": 3.836, + "step": 1189000 + }, + { + "epoch": 13.09, + "learning_rate": 9.228367897024039e-08, + "loss": 3.8333, + "step": 1189500 + }, + { + "epoch": 13.09, + "learning_rate": 9.226992683865999e-08, + "loss": 3.842, + "step": 1190000 + }, + { + "epoch": 13.1, + "learning_rate": 9.22561747070796e-08, + "loss": 3.8228, + "step": 1190500 + }, + { + "epoch": 13.1, + "learning_rate": 9.224242257549921e-08, + "loss": 3.8329, + "step": 1191000 + }, + { + "epoch": 13.11, + "learning_rate": 9.22286704439188e-08, + "loss": 3.8283, + "step": 1191500 + }, + { + "epoch": 13.11, + "learning_rate": 9.221491831233841e-08, + "loss": 3.8551, + "step": 1192000 + }, + { + "epoch": 13.12, + "learning_rate": 9.220116618075802e-08, + "loss": 3.8317, + "step": 1192500 + }, + { + "epoch": 13.13, + "learning_rate": 9.218741404917762e-08, + "loss": 3.8255, + "step": 1193000 + }, + { + "epoch": 13.13, + "learning_rate": 9.217366191759723e-08, + "loss": 3.8387, + "step": 1193500 + }, + { + "epoch": 13.14, + "learning_rate": 9.215990978601684e-08, + "loss": 3.8346, + "step": 1194000 + }, + { + "epoch": 13.14, + "learning_rate": 9.214615765443643e-08, + "loss": 3.8476, + "step": 1194500 + }, + { + "epoch": 13.15, + "learning_rate": 9.213240552285604e-08, + "loss": 3.8553, + "step": 1195000 + }, + { + "epoch": 13.15, + "learning_rate": 9.211865339127565e-08, + "loss": 3.8554, + "step": 1195500 + }, + { + "epoch": 13.16, + "learning_rate": 9.210490125969525e-08, + "loss": 3.8496, + "step": 1196000 + }, + { + "epoch": 13.16, + "learning_rate": 9.209114912811486e-08, + "loss": 3.859, + "step": 1196500 + }, + { + "epoch": 13.17, + "learning_rate": 9.207739699653447e-08, + "loss": 3.8418, + "step": 1197000 + }, + { + "epoch": 13.17, + "learning_rate": 9.206364486495406e-08, + "loss": 3.824, + "step": 1197500 + }, + { + "epoch": 13.18, + "learning_rate": 9.204989273337367e-08, + "loss": 3.8371, + "step": 1198000 + }, + { + "epoch": 13.19, + "learning_rate": 9.203614060179328e-08, + "loss": 3.8478, + "step": 1198500 + }, + { + "epoch": 13.19, + "learning_rate": 9.202238847021288e-08, + "loss": 3.8318, + "step": 1199000 + }, + { + "epoch": 13.2, + "learning_rate": 9.200863633863249e-08, + "loss": 3.8384, + "step": 1199500 + }, + { + "epoch": 13.2, + "learning_rate": 9.19948842070521e-08, + "loss": 3.8368, + "step": 1200000 + }, + { + "epoch": 13.21, + "learning_rate": 9.198113207547169e-08, + "loss": 3.8501, + "step": 1200500 + }, + { + "epoch": 13.21, + "learning_rate": 9.19673799438913e-08, + "loss": 3.8391, + "step": 1201000 + }, + { + "epoch": 13.22, + "learning_rate": 9.195362781231091e-08, + "loss": 3.8356, + "step": 1201500 + }, + { + "epoch": 13.22, + "learning_rate": 9.19398756807305e-08, + "loss": 3.8369, + "step": 1202000 + }, + { + "epoch": 13.23, + "learning_rate": 9.192612354915012e-08, + "loss": 3.826, + "step": 1202500 + }, + { + "epoch": 13.24, + "learning_rate": 9.191237141756973e-08, + "loss": 3.8188, + "step": 1203000 + }, + { + "epoch": 13.24, + "learning_rate": 9.189861928598932e-08, + "loss": 3.8421, + "step": 1203500 + }, + { + "epoch": 13.25, + "learning_rate": 9.188486715440893e-08, + "loss": 3.8414, + "step": 1204000 + }, + { + "epoch": 13.25, + "learning_rate": 9.187111502282854e-08, + "loss": 3.8285, + "step": 1204500 + }, + { + "epoch": 13.26, + "learning_rate": 9.185736289124814e-08, + "loss": 3.8447, + "step": 1205000 + }, + { + "epoch": 13.26, + "learning_rate": 9.184361075966775e-08, + "loss": 3.83, + "step": 1205500 + }, + { + "epoch": 13.27, + "learning_rate": 9.182985862808735e-08, + "loss": 3.8213, + "step": 1206000 + }, + { + "epoch": 13.27, + "learning_rate": 9.181610649650695e-08, + "loss": 3.8473, + "step": 1206500 + }, + { + "epoch": 13.28, + "learning_rate": 9.180235436492656e-08, + "loss": 3.8394, + "step": 1207000 + }, + { + "epoch": 13.28, + "learning_rate": 9.178860223334617e-08, + "loss": 3.8413, + "step": 1207500 + }, + { + "epoch": 13.29, + "learning_rate": 9.177485010176577e-08, + "loss": 3.8429, + "step": 1208000 + }, + { + "epoch": 13.3, + "learning_rate": 9.176109797018538e-08, + "loss": 3.8222, + "step": 1208500 + }, + { + "epoch": 13.3, + "learning_rate": 9.174734583860498e-08, + "loss": 3.833, + "step": 1209000 + }, + { + "epoch": 13.31, + "learning_rate": 9.173359370702458e-08, + "loss": 3.8409, + "step": 1209500 + }, + { + "epoch": 13.31, + "learning_rate": 9.171984157544419e-08, + "loss": 3.8438, + "step": 1210000 + }, + { + "epoch": 13.32, + "learning_rate": 9.17060894438638e-08, + "loss": 3.8277, + "step": 1210500 + }, + { + "epoch": 13.32, + "learning_rate": 9.16923373122834e-08, + "loss": 3.8349, + "step": 1211000 + }, + { + "epoch": 13.33, + "learning_rate": 9.1678585180703e-08, + "loss": 3.8444, + "step": 1211500 + }, + { + "epoch": 13.33, + "learning_rate": 9.166483304912261e-08, + "loss": 3.8565, + "step": 1212000 + }, + { + "epoch": 13.34, + "learning_rate": 9.165108091754221e-08, + "loss": 3.8534, + "step": 1212500 + }, + { + "epoch": 13.35, + "learning_rate": 9.163732878596182e-08, + "loss": 3.8493, + "step": 1213000 + }, + { + "epoch": 13.35, + "learning_rate": 9.162357665438143e-08, + "loss": 3.8298, + "step": 1213500 + }, + { + "epoch": 13.36, + "learning_rate": 9.160982452280102e-08, + "loss": 3.8399, + "step": 1214000 + }, + { + "epoch": 13.36, + "learning_rate": 9.159607239122063e-08, + "loss": 3.8411, + "step": 1214500 + }, + { + "epoch": 13.37, + "learning_rate": 9.158232025964024e-08, + "loss": 3.8151, + "step": 1215000 + }, + { + "epoch": 13.37, + "learning_rate": 9.156856812805984e-08, + "loss": 3.8279, + "step": 1215500 + }, + { + "epoch": 13.38, + "learning_rate": 9.155481599647945e-08, + "loss": 3.8292, + "step": 1216000 + }, + { + "epoch": 13.38, + "learning_rate": 9.154106386489906e-08, + "loss": 3.8525, + "step": 1216500 + }, + { + "epoch": 13.39, + "learning_rate": 9.152731173331865e-08, + "loss": 3.8336, + "step": 1217000 + }, + { + "epoch": 13.39, + "learning_rate": 9.151355960173826e-08, + "loss": 3.8598, + "step": 1217500 + }, + { + "epoch": 13.4, + "learning_rate": 9.149980747015787e-08, + "loss": 3.8331, + "step": 1218000 + }, + { + "epoch": 13.41, + "learning_rate": 9.148605533857747e-08, + "loss": 3.8237, + "step": 1218500 + }, + { + "epoch": 13.41, + "learning_rate": 9.147230320699708e-08, + "loss": 3.8359, + "step": 1219000 + }, + { + "epoch": 13.42, + "learning_rate": 9.145855107541669e-08, + "loss": 3.8295, + "step": 1219500 + }, + { + "epoch": 13.42, + "learning_rate": 9.144479894383628e-08, + "loss": 3.8374, + "step": 1220000 + }, + { + "epoch": 13.43, + "learning_rate": 9.14310468122559e-08, + "loss": 3.8434, + "step": 1220500 + }, + { + "epoch": 13.43, + "learning_rate": 9.14172946806755e-08, + "loss": 3.8352, + "step": 1221000 + }, + { + "epoch": 13.44, + "learning_rate": 9.14035425490951e-08, + "loss": 3.8392, + "step": 1221500 + }, + { + "epoch": 13.44, + "learning_rate": 9.138979041751471e-08, + "loss": 3.842, + "step": 1222000 + }, + { + "epoch": 13.45, + "learning_rate": 9.137603828593432e-08, + "loss": 3.8447, + "step": 1222500 + }, + { + "epoch": 13.46, + "learning_rate": 9.136228615435391e-08, + "loss": 3.8526, + "step": 1223000 + }, + { + "epoch": 13.46, + "learning_rate": 9.134853402277352e-08, + "loss": 3.8412, + "step": 1223500 + }, + { + "epoch": 13.47, + "learning_rate": 9.133478189119313e-08, + "loss": 3.836, + "step": 1224000 + }, + { + "epoch": 13.47, + "learning_rate": 9.132102975961273e-08, + "loss": 3.8414, + "step": 1224500 + }, + { + "epoch": 13.48, + "learning_rate": 9.130727762803234e-08, + "loss": 3.8434, + "step": 1225000 + }, + { + "epoch": 13.48, + "learning_rate": 9.129352549645195e-08, + "loss": 3.8256, + "step": 1225500 + }, + { + "epoch": 13.49, + "learning_rate": 9.127977336487154e-08, + "loss": 3.8252, + "step": 1226000 + }, + { + "epoch": 13.49, + "learning_rate": 9.126602123329115e-08, + "loss": 3.8345, + "step": 1226500 + }, + { + "epoch": 13.5, + "learning_rate": 9.125226910171076e-08, + "loss": 3.8522, + "step": 1227000 + }, + { + "epoch": 13.5, + "learning_rate": 9.123851697013036e-08, + "loss": 3.8464, + "step": 1227500 + }, + { + "epoch": 13.51, + "learning_rate": 9.122476483854997e-08, + "loss": 3.8329, + "step": 1228000 + }, + { + "epoch": 13.52, + "learning_rate": 9.121101270696958e-08, + "loss": 3.8437, + "step": 1228500 + }, + { + "epoch": 13.52, + "learning_rate": 9.119726057538917e-08, + "loss": 3.8389, + "step": 1229000 + }, + { + "epoch": 13.53, + "learning_rate": 9.118350844380878e-08, + "loss": 3.8353, + "step": 1229500 + }, + { + "epoch": 13.53, + "learning_rate": 9.116975631222839e-08, + "loss": 3.8294, + "step": 1230000 + }, + { + "epoch": 13.54, + "learning_rate": 9.115600418064799e-08, + "loss": 3.8433, + "step": 1230500 + }, + { + "epoch": 13.54, + "learning_rate": 9.11422520490676e-08, + "loss": 3.8331, + "step": 1231000 + }, + { + "epoch": 13.55, + "learning_rate": 9.11284999174872e-08, + "loss": 3.8299, + "step": 1231500 + }, + { + "epoch": 13.55, + "learning_rate": 9.111474778590682e-08, + "loss": 3.8405, + "step": 1232000 + }, + { + "epoch": 13.56, + "learning_rate": 9.110099565432641e-08, + "loss": 3.8342, + "step": 1232500 + }, + { + "epoch": 13.57, + "learning_rate": 9.108724352274602e-08, + "loss": 3.8416, + "step": 1233000 + }, + { + "epoch": 13.57, + "learning_rate": 9.107349139116563e-08, + "loss": 3.837, + "step": 1233500 + }, + { + "epoch": 13.58, + "learning_rate": 9.105973925958524e-08, + "loss": 3.8327, + "step": 1234000 + }, + { + "epoch": 13.58, + "learning_rate": 9.104598712800484e-08, + "loss": 3.8413, + "step": 1234500 + }, + { + "epoch": 13.59, + "learning_rate": 9.103223499642445e-08, + "loss": 3.8333, + "step": 1235000 + }, + { + "epoch": 13.59, + "learning_rate": 9.101848286484405e-08, + "loss": 3.8149, + "step": 1235500 + }, + { + "epoch": 13.6, + "learning_rate": 9.100473073326365e-08, + "loss": 3.8377, + "step": 1236000 + }, + { + "epoch": 13.6, + "learning_rate": 9.099097860168326e-08, + "loss": 3.8467, + "step": 1236500 + }, + { + "epoch": 13.61, + "learning_rate": 9.097722647010287e-08, + "loss": 3.8293, + "step": 1237000 + }, + { + "epoch": 13.61, + "learning_rate": 9.096347433852248e-08, + "loss": 3.837, + "step": 1237500 + }, + { + "epoch": 13.62, + "learning_rate": 9.094972220694207e-08, + "loss": 3.8359, + "step": 1238000 + }, + { + "epoch": 13.63, + "learning_rate": 9.093597007536168e-08, + "loss": 3.824, + "step": 1238500 + }, + { + "epoch": 13.63, + "learning_rate": 9.09222179437813e-08, + "loss": 3.8252, + "step": 1239000 + }, + { + "epoch": 13.64, + "learning_rate": 9.090846581220089e-08, + "loss": 3.8431, + "step": 1239500 + }, + { + "epoch": 13.64, + "learning_rate": 9.08947136806205e-08, + "loss": 3.855, + "step": 1240000 + }, + { + "epoch": 13.65, + "learning_rate": 9.08809615490401e-08, + "loss": 3.8327, + "step": 1240500 + }, + { + "epoch": 13.65, + "learning_rate": 9.08672094174597e-08, + "loss": 3.8392, + "step": 1241000 + }, + { + "epoch": 13.66, + "learning_rate": 9.085345728587931e-08, + "loss": 3.8306, + "step": 1241500 + }, + { + "epoch": 13.66, + "learning_rate": 9.083970515429891e-08, + "loss": 3.8445, + "step": 1242000 + }, + { + "epoch": 13.67, + "learning_rate": 9.082595302271852e-08, + "loss": 3.8407, + "step": 1242500 + }, + { + "epoch": 13.68, + "learning_rate": 9.081220089113813e-08, + "loss": 3.8331, + "step": 1243000 + }, + { + "epoch": 13.68, + "learning_rate": 9.079844875955772e-08, + "loss": 3.8528, + "step": 1243500 + }, + { + "epoch": 13.69, + "learning_rate": 9.078469662797733e-08, + "loss": 3.8492, + "step": 1244000 + }, + { + "epoch": 13.69, + "learning_rate": 9.077094449639694e-08, + "loss": 3.835, + "step": 1244500 + }, + { + "epoch": 13.7, + "learning_rate": 9.075719236481654e-08, + "loss": 3.8104, + "step": 1245000 + }, + { + "epoch": 13.7, + "learning_rate": 9.074344023323615e-08, + "loss": 3.8294, + "step": 1245500 + }, + { + "epoch": 13.71, + "learning_rate": 9.072968810165576e-08, + "loss": 3.8243, + "step": 1246000 + }, + { + "epoch": 13.71, + "learning_rate": 9.071593597007535e-08, + "loss": 3.822, + "step": 1246500 + }, + { + "epoch": 13.72, + "learning_rate": 9.070218383849496e-08, + "loss": 3.8266, + "step": 1247000 + }, + { + "epoch": 13.72, + "learning_rate": 9.068843170691457e-08, + "loss": 3.8195, + "step": 1247500 + }, + { + "epoch": 13.73, + "learning_rate": 9.067467957533417e-08, + "loss": 3.8157, + "step": 1248000 + }, + { + "epoch": 13.74, + "learning_rate": 9.066092744375378e-08, + "loss": 3.8359, + "step": 1248500 + }, + { + "epoch": 13.74, + "learning_rate": 9.064717531217339e-08, + "loss": 3.8356, + "step": 1249000 + }, + { + "epoch": 13.75, + "learning_rate": 9.063342318059298e-08, + "loss": 3.8221, + "step": 1249500 + }, + { + "epoch": 13.75, + "learning_rate": 9.061967104901259e-08, + "loss": 3.8219, + "step": 1250000 + }, + { + "epoch": 13.76, + "learning_rate": 9.06059189174322e-08, + "loss": 3.8214, + "step": 1250500 + }, + { + "epoch": 13.76, + "learning_rate": 9.05921667858518e-08, + "loss": 3.8301, + "step": 1251000 + }, + { + "epoch": 13.77, + "learning_rate": 9.057841465427141e-08, + "loss": 3.8324, + "step": 1251500 + }, + { + "epoch": 13.77, + "learning_rate": 9.056466252269102e-08, + "loss": 3.8312, + "step": 1252000 + }, + { + "epoch": 13.78, + "learning_rate": 9.055091039111061e-08, + "loss": 3.8204, + "step": 1252500 + }, + { + "epoch": 13.79, + "learning_rate": 9.053715825953022e-08, + "loss": 3.8401, + "step": 1253000 + }, + { + "epoch": 13.79, + "learning_rate": 9.052340612794983e-08, + "loss": 3.8298, + "step": 1253500 + }, + { + "epoch": 13.8, + "learning_rate": 9.050965399636943e-08, + "loss": 3.839, + "step": 1254000 + }, + { + "epoch": 13.8, + "learning_rate": 9.049590186478904e-08, + "loss": 3.85, + "step": 1254500 + }, + { + "epoch": 13.81, + "learning_rate": 9.048214973320865e-08, + "loss": 3.8128, + "step": 1255000 + }, + { + "epoch": 13.81, + "learning_rate": 9.046839760162824e-08, + "loss": 3.8177, + "step": 1255500 + }, + { + "epoch": 13.82, + "learning_rate": 9.045464547004785e-08, + "loss": 3.8591, + "step": 1256000 + }, + { + "epoch": 13.82, + "learning_rate": 9.044089333846746e-08, + "loss": 3.8283, + "step": 1256500 + }, + { + "epoch": 13.83, + "learning_rate": 9.042714120688706e-08, + "loss": 3.8344, + "step": 1257000 + }, + { + "epoch": 13.83, + "learning_rate": 9.041338907530667e-08, + "loss": 3.8203, + "step": 1257500 + }, + { + "epoch": 13.84, + "learning_rate": 9.039963694372628e-08, + "loss": 3.816, + "step": 1258000 + }, + { + "epoch": 13.85, + "learning_rate": 9.038588481214587e-08, + "loss": 3.8433, + "step": 1258500 + }, + { + "epoch": 13.85, + "learning_rate": 9.037213268056548e-08, + "loss": 3.8174, + "step": 1259000 + }, + { + "epoch": 13.86, + "learning_rate": 9.035838054898509e-08, + "loss": 3.8359, + "step": 1259500 + }, + { + "epoch": 13.86, + "learning_rate": 9.034462841740469e-08, + "loss": 3.8325, + "step": 1260000 + }, + { + "epoch": 13.87, + "learning_rate": 9.03308762858243e-08, + "loss": 3.8263, + "step": 1260500 + }, + { + "epoch": 13.87, + "learning_rate": 9.03171241542439e-08, + "loss": 3.8161, + "step": 1261000 + }, + { + "epoch": 13.88, + "learning_rate": 9.03033720226635e-08, + "loss": 3.851, + "step": 1261500 + }, + { + "epoch": 13.88, + "learning_rate": 9.028961989108311e-08, + "loss": 3.8266, + "step": 1262000 + }, + { + "epoch": 13.89, + "learning_rate": 9.027586775950272e-08, + "loss": 3.8362, + "step": 1262500 + }, + { + "epoch": 13.9, + "learning_rate": 9.026211562792232e-08, + "loss": 3.814, + "step": 1263000 + }, + { + "epoch": 13.9, + "learning_rate": 9.024836349634193e-08, + "loss": 3.8394, + "step": 1263500 + }, + { + "epoch": 13.91, + "learning_rate": 9.023461136476154e-08, + "loss": 3.8369, + "step": 1264000 + }, + { + "epoch": 13.91, + "learning_rate": 9.022085923318113e-08, + "loss": 3.8324, + "step": 1264500 + }, + { + "epoch": 13.92, + "learning_rate": 9.020710710160074e-08, + "loss": 3.847, + "step": 1265000 + }, + { + "epoch": 13.92, + "learning_rate": 9.019335497002035e-08, + "loss": 3.859, + "step": 1265500 + }, + { + "epoch": 13.93, + "learning_rate": 9.017960283843995e-08, + "loss": 3.8282, + "step": 1266000 + }, + { + "epoch": 13.93, + "learning_rate": 9.016585070685956e-08, + "loss": 3.8372, + "step": 1266500 + }, + { + "epoch": 13.94, + "learning_rate": 9.015209857527917e-08, + "loss": 3.8276, + "step": 1267000 + }, + { + "epoch": 13.94, + "learning_rate": 9.013834644369876e-08, + "loss": 3.8427, + "step": 1267500 + }, + { + "epoch": 13.95, + "learning_rate": 9.012459431211837e-08, + "loss": 3.8229, + "step": 1268000 + }, + { + "epoch": 13.96, + "learning_rate": 9.011084218053798e-08, + "loss": 3.8208, + "step": 1268500 + }, + { + "epoch": 13.96, + "learning_rate": 9.009709004895758e-08, + "loss": 3.8364, + "step": 1269000 + }, + { + "epoch": 13.97, + "learning_rate": 9.008333791737719e-08, + "loss": 3.8278, + "step": 1269500 + }, + { + "epoch": 13.97, + "learning_rate": 9.00695857857968e-08, + "loss": 3.8244, + "step": 1270000 + }, + { + "epoch": 13.98, + "learning_rate": 9.005583365421639e-08, + "loss": 3.849, + "step": 1270500 + }, + { + "epoch": 13.98, + "learning_rate": 9.0042081522636e-08, + "loss": 3.8212, + "step": 1271000 + }, + { + "epoch": 13.99, + "learning_rate": 9.002832939105561e-08, + "loss": 3.8329, + "step": 1271500 + }, + { + "epoch": 13.99, + "learning_rate": 9.00145772594752e-08, + "loss": 3.8335, + "step": 1272000 + }, + { + "epoch": 14.0, + "learning_rate": 9.000082512789482e-08, + "loss": 3.8233, + "step": 1272500 + }, + { + "epoch": 14.0, + "eval_loss": 3.8761990070343018, + "eval_runtime": 6.1312, + "eval_samples_per_second": 253.458, + "step": 1272530 + }, + { + "epoch": 14.01, + "learning_rate": 8.998707299631442e-08, + "loss": 3.8429, + "step": 1273000 + }, + { + "epoch": 14.01, + "learning_rate": 8.997332086473402e-08, + "loss": 3.8518, + "step": 1273500 + }, + { + "epoch": 14.02, + "learning_rate": 8.995956873315363e-08, + "loss": 3.8246, + "step": 1274000 + }, + { + "epoch": 14.02, + "learning_rate": 8.994581660157324e-08, + "loss": 3.8219, + "step": 1274500 + }, + { + "epoch": 14.03, + "learning_rate": 8.993206446999284e-08, + "loss": 3.8167, + "step": 1275000 + }, + { + "epoch": 14.03, + "learning_rate": 8.991831233841244e-08, + "loss": 3.8455, + "step": 1275500 + }, + { + "epoch": 14.04, + "learning_rate": 8.990456020683205e-08, + "loss": 3.814, + "step": 1276000 + }, + { + "epoch": 14.04, + "learning_rate": 8.989080807525165e-08, + "loss": 3.8528, + "step": 1276500 + }, + { + "epoch": 14.05, + "learning_rate": 8.987705594367126e-08, + "loss": 3.8351, + "step": 1277000 + }, + { + "epoch": 14.05, + "learning_rate": 8.986330381209087e-08, + "loss": 3.8345, + "step": 1277500 + }, + { + "epoch": 14.06, + "learning_rate": 8.984955168051046e-08, + "loss": 3.8136, + "step": 1278000 + }, + { + "epoch": 14.07, + "learning_rate": 8.983579954893007e-08, + "loss": 3.8299, + "step": 1278500 + }, + { + "epoch": 14.07, + "learning_rate": 8.982204741734968e-08, + "loss": 3.8173, + "step": 1279000 + }, + { + "epoch": 14.08, + "learning_rate": 8.980829528576929e-08, + "loss": 3.8066, + "step": 1279500 + }, + { + "epoch": 14.08, + "learning_rate": 8.979454315418889e-08, + "loss": 3.8486, + "step": 1280000 + }, + { + "epoch": 14.09, + "learning_rate": 8.97807910226085e-08, + "loss": 3.8196, + "step": 1280500 + }, + { + "epoch": 14.09, + "learning_rate": 8.976703889102811e-08, + "loss": 3.8276, + "step": 1281000 + }, + { + "epoch": 14.1, + "learning_rate": 8.975328675944772e-08, + "loss": 3.8144, + "step": 1281500 + }, + { + "epoch": 14.1, + "learning_rate": 8.973953462786731e-08, + "loss": 3.8227, + "step": 1282000 + }, + { + "epoch": 14.11, + "learning_rate": 8.972578249628692e-08, + "loss": 3.8422, + "step": 1282500 + }, + { + "epoch": 14.12, + "learning_rate": 8.971203036470653e-08, + "loss": 3.8273, + "step": 1283000 + }, + { + "epoch": 14.12, + "learning_rate": 8.969827823312613e-08, + "loss": 3.8154, + "step": 1283500 + }, + { + "epoch": 14.13, + "learning_rate": 8.968452610154574e-08, + "loss": 3.8165, + "step": 1284000 + }, + { + "epoch": 14.13, + "learning_rate": 8.967077396996535e-08, + "loss": 3.8358, + "step": 1284500 + }, + { + "epoch": 14.14, + "learning_rate": 8.965702183838496e-08, + "loss": 3.8182, + "step": 1285000 + }, + { + "epoch": 14.14, + "learning_rate": 8.964326970680455e-08, + "loss": 3.8434, + "step": 1285500 + }, + { + "epoch": 14.15, + "learning_rate": 8.962951757522416e-08, + "loss": 3.8237, + "step": 1286000 + }, + { + "epoch": 14.15, + "learning_rate": 8.961576544364377e-08, + "loss": 3.8382, + "step": 1286500 + }, + { + "epoch": 14.16, + "learning_rate": 8.960201331206337e-08, + "loss": 3.8302, + "step": 1287000 + }, + { + "epoch": 14.16, + "learning_rate": 8.958826118048298e-08, + "loss": 3.8336, + "step": 1287500 + }, + { + "epoch": 14.17, + "learning_rate": 8.957450904890259e-08, + "loss": 3.8055, + "step": 1288000 + }, + { + "epoch": 14.18, + "learning_rate": 8.956075691732218e-08, + "loss": 3.8228, + "step": 1288500 + }, + { + "epoch": 14.18, + "learning_rate": 8.954700478574179e-08, + "loss": 3.8555, + "step": 1289000 + }, + { + "epoch": 14.19, + "learning_rate": 8.95332526541614e-08, + "loss": 3.8289, + "step": 1289500 + }, + { + "epoch": 14.19, + "learning_rate": 8.9519500522581e-08, + "loss": 3.8338, + "step": 1290000 + }, + { + "epoch": 14.2, + "learning_rate": 8.95057483910006e-08, + "loss": 3.8181, + "step": 1290500 + }, + { + "epoch": 14.2, + "learning_rate": 8.949199625942022e-08, + "loss": 3.8304, + "step": 1291000 + }, + { + "epoch": 14.21, + "learning_rate": 8.947824412783981e-08, + "loss": 3.8418, + "step": 1291500 + }, + { + "epoch": 14.21, + "learning_rate": 8.946449199625942e-08, + "loss": 3.8576, + "step": 1292000 + }, + { + "epoch": 14.22, + "learning_rate": 8.945073986467903e-08, + "loss": 3.8289, + "step": 1292500 + }, + { + "epoch": 14.23, + "learning_rate": 8.943698773309863e-08, + "loss": 3.8199, + "step": 1293000 + }, + { + "epoch": 14.23, + "learning_rate": 8.942323560151824e-08, + "loss": 3.8392, + "step": 1293500 + }, + { + "epoch": 14.24, + "learning_rate": 8.940948346993784e-08, + "loss": 3.8288, + "step": 1294000 + }, + { + "epoch": 14.24, + "learning_rate": 8.939573133835744e-08, + "loss": 3.8401, + "step": 1294500 + }, + { + "epoch": 14.25, + "learning_rate": 8.938197920677705e-08, + "loss": 3.8449, + "step": 1295000 + }, + { + "epoch": 14.25, + "learning_rate": 8.936822707519666e-08, + "loss": 3.8269, + "step": 1295500 + }, + { + "epoch": 14.26, + "learning_rate": 8.935447494361626e-08, + "loss": 3.8395, + "step": 1296000 + }, + { + "epoch": 14.26, + "learning_rate": 8.934072281203587e-08, + "loss": 3.8267, + "step": 1296500 + }, + { + "epoch": 14.27, + "learning_rate": 8.932697068045547e-08, + "loss": 3.839, + "step": 1297000 + }, + { + "epoch": 14.27, + "learning_rate": 8.931321854887507e-08, + "loss": 3.8286, + "step": 1297500 + }, + { + "epoch": 14.28, + "learning_rate": 8.929946641729468e-08, + "loss": 3.8479, + "step": 1298000 + }, + { + "epoch": 14.29, + "learning_rate": 8.928571428571429e-08, + "loss": 3.8156, + "step": 1298500 + }, + { + "epoch": 14.29, + "learning_rate": 8.927196215413389e-08, + "loss": 3.8269, + "step": 1299000 + }, + { + "epoch": 14.3, + "learning_rate": 8.92582100225535e-08, + "loss": 3.8423, + "step": 1299500 + }, + { + "epoch": 14.3, + "learning_rate": 8.92444578909731e-08, + "loss": 3.8312, + "step": 1300000 + }, + { + "epoch": 14.31, + "learning_rate": 8.92307057593927e-08, + "loss": 3.8257, + "step": 1300500 + }, + { + "epoch": 14.31, + "learning_rate": 8.921695362781231e-08, + "loss": 3.8231, + "step": 1301000 + }, + { + "epoch": 14.32, + "learning_rate": 8.920320149623192e-08, + "loss": 3.8352, + "step": 1301500 + }, + { + "epoch": 14.32, + "learning_rate": 8.918944936465151e-08, + "loss": 3.8058, + "step": 1302000 + }, + { + "epoch": 14.33, + "learning_rate": 8.917569723307112e-08, + "loss": 3.8538, + "step": 1302500 + }, + { + "epoch": 14.34, + "learning_rate": 8.916194510149073e-08, + "loss": 3.819, + "step": 1303000 + }, + { + "epoch": 14.34, + "learning_rate": 8.914819296991033e-08, + "loss": 3.8431, + "step": 1303500 + }, + { + "epoch": 14.35, + "learning_rate": 8.913444083832994e-08, + "loss": 3.8362, + "step": 1304000 + }, + { + "epoch": 14.35, + "learning_rate": 8.912068870674955e-08, + "loss": 3.8366, + "step": 1304500 + }, + { + "epoch": 14.36, + "learning_rate": 8.910693657516914e-08, + "loss": 3.8475, + "step": 1305000 + }, + { + "epoch": 14.36, + "learning_rate": 8.909318444358875e-08, + "loss": 3.8437, + "step": 1305500 + }, + { + "epoch": 14.37, + "learning_rate": 8.907943231200836e-08, + "loss": 3.827, + "step": 1306000 + }, + { + "epoch": 14.37, + "learning_rate": 8.906568018042796e-08, + "loss": 3.8453, + "step": 1306500 + }, + { + "epoch": 14.38, + "learning_rate": 8.905192804884757e-08, + "loss": 3.8273, + "step": 1307000 + }, + { + "epoch": 14.38, + "learning_rate": 8.903817591726718e-08, + "loss": 3.8175, + "step": 1307500 + }, + { + "epoch": 14.39, + "learning_rate": 8.902442378568677e-08, + "loss": 3.8297, + "step": 1308000 + }, + { + "epoch": 14.4, + "learning_rate": 8.901067165410638e-08, + "loss": 3.823, + "step": 1308500 + }, + { + "epoch": 14.4, + "learning_rate": 8.899691952252599e-08, + "loss": 3.83, + "step": 1309000 + }, + { + "epoch": 14.41, + "learning_rate": 8.898316739094559e-08, + "loss": 3.8234, + "step": 1309500 + }, + { + "epoch": 14.41, + "learning_rate": 8.89694152593652e-08, + "loss": 3.8344, + "step": 1310000 + }, + { + "epoch": 14.42, + "learning_rate": 8.895566312778481e-08, + "loss": 3.8282, + "step": 1310500 + }, + { + "epoch": 14.42, + "learning_rate": 8.89419109962044e-08, + "loss": 3.8331, + "step": 1311000 + }, + { + "epoch": 14.43, + "learning_rate": 8.892815886462401e-08, + "loss": 3.8289, + "step": 1311500 + }, + { + "epoch": 14.43, + "learning_rate": 8.891440673304362e-08, + "loss": 3.82, + "step": 1312000 + }, + { + "epoch": 14.44, + "learning_rate": 8.890065460146322e-08, + "loss": 3.8294, + "step": 1312500 + }, + { + "epoch": 14.45, + "learning_rate": 8.888690246988283e-08, + "loss": 3.8244, + "step": 1313000 + }, + { + "epoch": 14.45, + "learning_rate": 8.887315033830244e-08, + "loss": 3.8232, + "step": 1313500 + }, + { + "epoch": 14.46, + "learning_rate": 8.885939820672203e-08, + "loss": 3.8166, + "step": 1314000 + }, + { + "epoch": 14.46, + "learning_rate": 8.884564607514164e-08, + "loss": 3.8492, + "step": 1314500 + }, + { + "epoch": 14.47, + "learning_rate": 8.883189394356124e-08, + "loss": 3.8119, + "step": 1315000 + }, + { + "epoch": 14.47, + "learning_rate": 8.881814181198085e-08, + "loss": 3.8253, + "step": 1315500 + }, + { + "epoch": 14.48, + "learning_rate": 8.880438968040046e-08, + "loss": 3.8214, + "step": 1316000 + }, + { + "epoch": 14.48, + "learning_rate": 8.879063754882005e-08, + "loss": 3.8221, + "step": 1316500 + }, + { + "epoch": 14.49, + "learning_rate": 8.877688541723966e-08, + "loss": 3.8234, + "step": 1317000 + }, + { + "epoch": 14.49, + "learning_rate": 8.876313328565927e-08, + "loss": 3.8347, + "step": 1317500 + }, + { + "epoch": 14.5, + "learning_rate": 8.874938115407887e-08, + "loss": 3.8159, + "step": 1318000 + }, + { + "epoch": 14.51, + "learning_rate": 8.873562902249848e-08, + "loss": 3.847, + "step": 1318500 + }, + { + "epoch": 14.51, + "learning_rate": 8.872187689091809e-08, + "loss": 3.824, + "step": 1319000 + }, + { + "epoch": 14.52, + "learning_rate": 8.870812475933768e-08, + "loss": 3.8217, + "step": 1319500 + }, + { + "epoch": 14.52, + "learning_rate": 8.869437262775729e-08, + "loss": 3.8344, + "step": 1320000 + }, + { + "epoch": 14.53, + "learning_rate": 8.86806204961769e-08, + "loss": 3.8279, + "step": 1320500 + }, + { + "epoch": 14.53, + "learning_rate": 8.86668683645965e-08, + "loss": 3.8317, + "step": 1321000 + }, + { + "epoch": 14.54, + "learning_rate": 8.865311623301611e-08, + "loss": 3.8273, + "step": 1321500 + }, + { + "epoch": 14.54, + "learning_rate": 8.863936410143572e-08, + "loss": 3.8307, + "step": 1322000 + }, + { + "epoch": 14.55, + "learning_rate": 8.862561196985531e-08, + "loss": 3.8392, + "step": 1322500 + }, + { + "epoch": 14.56, + "learning_rate": 8.861185983827492e-08, + "loss": 3.8205, + "step": 1323000 + }, + { + "epoch": 14.56, + "learning_rate": 8.859810770669453e-08, + "loss": 3.836, + "step": 1323500 + }, + { + "epoch": 14.57, + "learning_rate": 8.858435557511413e-08, + "loss": 3.8433, + "step": 1324000 + }, + { + "epoch": 14.57, + "learning_rate": 8.857060344353374e-08, + "loss": 3.8135, + "step": 1324500 + }, + { + "epoch": 14.58, + "learning_rate": 8.855685131195335e-08, + "loss": 3.8281, + "step": 1325000 + }, + { + "epoch": 14.58, + "learning_rate": 8.854309918037294e-08, + "loss": 3.8375, + "step": 1325500 + }, + { + "epoch": 14.59, + "learning_rate": 8.852934704879255e-08, + "loss": 3.8107, + "step": 1326000 + }, + { + "epoch": 14.59, + "learning_rate": 8.851559491721216e-08, + "loss": 3.8195, + "step": 1326500 + }, + { + "epoch": 14.6, + "learning_rate": 8.850184278563177e-08, + "loss": 3.8231, + "step": 1327000 + }, + { + "epoch": 14.6, + "learning_rate": 8.848809065405137e-08, + "loss": 3.8288, + "step": 1327500 + }, + { + "epoch": 14.61, + "learning_rate": 8.847433852247098e-08, + "loss": 3.8235, + "step": 1328000 + }, + { + "epoch": 14.62, + "learning_rate": 8.846058639089059e-08, + "loss": 3.8334, + "step": 1328500 + }, + { + "epoch": 14.62, + "learning_rate": 8.84468342593102e-08, + "loss": 3.8329, + "step": 1329000 + }, + { + "epoch": 14.63, + "learning_rate": 8.843308212772979e-08, + "loss": 3.8304, + "step": 1329500 + }, + { + "epoch": 14.63, + "learning_rate": 8.84193299961494e-08, + "loss": 3.8205, + "step": 1330000 + }, + { + "epoch": 14.64, + "learning_rate": 8.840557786456901e-08, + "loss": 3.8253, + "step": 1330500 + }, + { + "epoch": 14.64, + "learning_rate": 8.83918257329886e-08, + "loss": 3.8297, + "step": 1331000 + }, + { + "epoch": 14.65, + "learning_rate": 8.837807360140821e-08, + "loss": 3.8213, + "step": 1331500 + }, + { + "epoch": 14.65, + "learning_rate": 8.836432146982782e-08, + "loss": 3.8419, + "step": 1332000 + }, + { + "epoch": 14.66, + "learning_rate": 8.835056933824743e-08, + "loss": 3.8432, + "step": 1332500 + }, + { + "epoch": 14.67, + "learning_rate": 8.833681720666703e-08, + "loss": 3.8321, + "step": 1333000 + }, + { + "epoch": 14.67, + "learning_rate": 8.832306507508664e-08, + "loss": 3.8338, + "step": 1333500 + }, + { + "epoch": 14.68, + "learning_rate": 8.830931294350625e-08, + "loss": 3.8249, + "step": 1334000 + }, + { + "epoch": 14.68, + "learning_rate": 8.829556081192584e-08, + "loss": 3.8469, + "step": 1334500 + }, + { + "epoch": 14.69, + "learning_rate": 8.828180868034545e-08, + "loss": 3.81, + "step": 1335000 + }, + { + "epoch": 14.69, + "learning_rate": 8.826805654876506e-08, + "loss": 3.8178, + "step": 1335500 + }, + { + "epoch": 14.7, + "learning_rate": 8.825430441718466e-08, + "loss": 3.8459, + "step": 1336000 + }, + { + "epoch": 14.7, + "learning_rate": 8.824055228560427e-08, + "loss": 3.8249, + "step": 1336500 + }, + { + "epoch": 14.71, + "learning_rate": 8.822680015402388e-08, + "loss": 3.8528, + "step": 1337000 + }, + { + "epoch": 14.71, + "learning_rate": 8.821304802244347e-08, + "loss": 3.8539, + "step": 1337500 + }, + { + "epoch": 14.72, + "learning_rate": 8.819929589086308e-08, + "loss": 3.8353, + "step": 1338000 + }, + { + "epoch": 14.73, + "learning_rate": 8.818554375928269e-08, + "loss": 3.8222, + "step": 1338500 + }, + { + "epoch": 14.73, + "learning_rate": 8.817179162770229e-08, + "loss": 3.8415, + "step": 1339000 + }, + { + "epoch": 14.74, + "learning_rate": 8.81580394961219e-08, + "loss": 3.8315, + "step": 1339500 + }, + { + "epoch": 14.74, + "learning_rate": 8.814428736454151e-08, + "loss": 3.8287, + "step": 1340000 + }, + { + "epoch": 14.75, + "learning_rate": 8.81305352329611e-08, + "loss": 3.811, + "step": 1340500 + }, + { + "epoch": 14.75, + "learning_rate": 8.811678310138071e-08, + "loss": 3.8284, + "step": 1341000 + }, + { + "epoch": 14.76, + "learning_rate": 8.810303096980032e-08, + "loss": 3.8333, + "step": 1341500 + }, + { + "epoch": 14.76, + "learning_rate": 8.808927883821992e-08, + "loss": 3.8285, + "step": 1342000 + }, + { + "epoch": 14.77, + "learning_rate": 8.807552670663953e-08, + "loss": 3.8316, + "step": 1342500 + }, + { + "epoch": 14.78, + "learning_rate": 8.806177457505914e-08, + "loss": 3.8229, + "step": 1343000 + }, + { + "epoch": 14.78, + "learning_rate": 8.804802244347873e-08, + "loss": 3.8303, + "step": 1343500 + }, + { + "epoch": 14.79, + "learning_rate": 8.803427031189834e-08, + "loss": 3.8332, + "step": 1344000 + }, + { + "epoch": 14.79, + "learning_rate": 8.802051818031795e-08, + "loss": 3.8348, + "step": 1344500 + }, + { + "epoch": 14.8, + "learning_rate": 8.800676604873755e-08, + "loss": 3.8206, + "step": 1345000 + }, + { + "epoch": 14.8, + "learning_rate": 8.799301391715716e-08, + "loss": 3.811, + "step": 1345500 + }, + { + "epoch": 14.81, + "learning_rate": 8.797926178557677e-08, + "loss": 3.8215, + "step": 1346000 + }, + { + "epoch": 14.81, + "learning_rate": 8.796550965399636e-08, + "loss": 3.8161, + "step": 1346500 + }, + { + "epoch": 14.82, + "learning_rate": 8.795175752241597e-08, + "loss": 3.8191, + "step": 1347000 + }, + { + "epoch": 14.82, + "learning_rate": 8.793800539083558e-08, + "loss": 3.8497, + "step": 1347500 + }, + { + "epoch": 14.83, + "learning_rate": 8.792425325925518e-08, + "loss": 3.8152, + "step": 1348000 + }, + { + "epoch": 14.84, + "learning_rate": 8.791050112767479e-08, + "loss": 3.8222, + "step": 1348500 + }, + { + "epoch": 14.84, + "learning_rate": 8.78967489960944e-08, + "loss": 3.8175, + "step": 1349000 + }, + { + "epoch": 14.85, + "learning_rate": 8.788299686451399e-08, + "loss": 3.8339, + "step": 1349500 + }, + { + "epoch": 14.85, + "learning_rate": 8.78692447329336e-08, + "loss": 3.8322, + "step": 1350000 + }, + { + "epoch": 14.86, + "learning_rate": 8.785549260135321e-08, + "loss": 3.8071, + "step": 1350500 + }, + { + "epoch": 14.86, + "learning_rate": 8.784174046977281e-08, + "loss": 3.8367, + "step": 1351000 + }, + { + "epoch": 14.87, + "learning_rate": 8.782798833819242e-08, + "loss": 3.8149, + "step": 1351500 + }, + { + "epoch": 14.87, + "learning_rate": 8.781423620661203e-08, + "loss": 3.8086, + "step": 1352000 + }, + { + "epoch": 14.88, + "learning_rate": 8.780048407503162e-08, + "loss": 3.8263, + "step": 1352500 + }, + { + "epoch": 14.89, + "learning_rate": 8.778673194345123e-08, + "loss": 3.8311, + "step": 1353000 + }, + { + "epoch": 14.89, + "learning_rate": 8.777297981187084e-08, + "loss": 3.823, + "step": 1353500 + }, + { + "epoch": 14.9, + "learning_rate": 8.775922768029044e-08, + "loss": 3.8287, + "step": 1354000 + }, + { + "epoch": 14.9, + "learning_rate": 8.774547554871005e-08, + "loss": 3.8153, + "step": 1354500 + }, + { + "epoch": 14.91, + "learning_rate": 8.773172341712966e-08, + "loss": 3.8134, + "step": 1355000 + }, + { + "epoch": 14.91, + "learning_rate": 8.771797128554925e-08, + "loss": 3.8061, + "step": 1355500 + }, + { + "epoch": 14.92, + "learning_rate": 8.770421915396886e-08, + "loss": 3.8165, + "step": 1356000 + }, + { + "epoch": 14.92, + "learning_rate": 8.769046702238847e-08, + "loss": 3.8212, + "step": 1356500 + }, + { + "epoch": 14.93, + "learning_rate": 8.767671489080807e-08, + "loss": 3.8316, + "step": 1357000 + }, + { + "epoch": 14.93, + "learning_rate": 8.766296275922768e-08, + "loss": 3.8384, + "step": 1357500 + }, + { + "epoch": 14.94, + "learning_rate": 8.764921062764728e-08, + "loss": 3.8277, + "step": 1358000 + }, + { + "epoch": 14.95, + "learning_rate": 8.763545849606688e-08, + "loss": 3.8165, + "step": 1358500 + }, + { + "epoch": 14.95, + "learning_rate": 8.762170636448649e-08, + "loss": 3.8392, + "step": 1359000 + }, + { + "epoch": 14.96, + "learning_rate": 8.76079542329061e-08, + "loss": 3.8291, + "step": 1359500 + }, + { + "epoch": 14.96, + "learning_rate": 8.75942021013257e-08, + "loss": 3.824, + "step": 1360000 + }, + { + "epoch": 14.97, + "learning_rate": 8.75804499697453e-08, + "loss": 3.8107, + "step": 1360500 + }, + { + "epoch": 14.97, + "learning_rate": 8.756669783816491e-08, + "loss": 3.8143, + "step": 1361000 + }, + { + "epoch": 14.98, + "learning_rate": 8.755294570658451e-08, + "loss": 3.8302, + "step": 1361500 + }, + { + "epoch": 14.98, + "learning_rate": 8.753919357500412e-08, + "loss": 3.8302, + "step": 1362000 + }, + { + "epoch": 14.99, + "learning_rate": 8.752544144342373e-08, + "loss": 3.8267, + "step": 1362500 + }, + { + "epoch": 15.0, + "learning_rate": 8.751168931184333e-08, + "loss": 3.8379, + "step": 1363000 + }, + { + "epoch": 15.0, + "eval_loss": 3.8711750507354736, + "eval_runtime": 6.1393, + "eval_samples_per_second": 253.125, + "step": 1363425 + }, + { + "epoch": 15.0, + "learning_rate": 8.749793718026293e-08, + "loss": 3.819, + "step": 1363500 + }, + { + "epoch": 15.01, + "learning_rate": 8.748418504868254e-08, + "loss": 3.8339, + "step": 1364000 + }, + { + "epoch": 15.01, + "learning_rate": 8.747043291710214e-08, + "loss": 3.8204, + "step": 1364500 + }, + { + "epoch": 15.02, + "learning_rate": 8.745668078552175e-08, + "loss": 3.8354, + "step": 1365000 + }, + { + "epoch": 15.02, + "learning_rate": 8.744292865394136e-08, + "loss": 3.8203, + "step": 1365500 + }, + { + "epoch": 15.03, + "learning_rate": 8.742917652236096e-08, + "loss": 3.8207, + "step": 1366000 + }, + { + "epoch": 15.03, + "learning_rate": 8.741542439078056e-08, + "loss": 3.8104, + "step": 1366500 + }, + { + "epoch": 15.04, + "learning_rate": 8.740167225920017e-08, + "loss": 3.8263, + "step": 1367000 + }, + { + "epoch": 15.04, + "learning_rate": 8.738792012761977e-08, + "loss": 3.8139, + "step": 1367500 + }, + { + "epoch": 15.05, + "learning_rate": 8.737416799603938e-08, + "loss": 3.8409, + "step": 1368000 + }, + { + "epoch": 15.06, + "learning_rate": 8.736041586445899e-08, + "loss": 3.7963, + "step": 1368500 + }, + { + "epoch": 15.06, + "learning_rate": 8.734666373287858e-08, + "loss": 3.8098, + "step": 1369000 + }, + { + "epoch": 15.07, + "learning_rate": 8.73329116012982e-08, + "loss": 3.8256, + "step": 1369500 + }, + { + "epoch": 15.07, + "learning_rate": 8.73191594697178e-08, + "loss": 3.8247, + "step": 1370000 + }, + { + "epoch": 15.08, + "learning_rate": 8.73054073381374e-08, + "loss": 3.8166, + "step": 1370500 + }, + { + "epoch": 15.08, + "learning_rate": 8.729165520655701e-08, + "loss": 3.8518, + "step": 1371000 + }, + { + "epoch": 15.09, + "learning_rate": 8.727790307497662e-08, + "loss": 3.8313, + "step": 1371500 + }, + { + "epoch": 15.09, + "learning_rate": 8.726415094339621e-08, + "loss": 3.8232, + "step": 1372000 + }, + { + "epoch": 15.1, + "learning_rate": 8.725039881181582e-08, + "loss": 3.8179, + "step": 1372500 + }, + { + "epoch": 15.11, + "learning_rate": 8.723664668023543e-08, + "loss": 3.8299, + "step": 1373000 + }, + { + "epoch": 15.11, + "learning_rate": 8.722289454865503e-08, + "loss": 3.8266, + "step": 1373500 + }, + { + "epoch": 15.12, + "learning_rate": 8.720914241707464e-08, + "loss": 3.8215, + "step": 1374000 + }, + { + "epoch": 15.12, + "learning_rate": 8.719539028549425e-08, + "loss": 3.8317, + "step": 1374500 + }, + { + "epoch": 15.13, + "learning_rate": 8.718163815391384e-08, + "loss": 3.8269, + "step": 1375000 + }, + { + "epoch": 15.13, + "learning_rate": 8.716788602233345e-08, + "loss": 3.84, + "step": 1375500 + }, + { + "epoch": 15.14, + "learning_rate": 8.715413389075306e-08, + "loss": 3.8286, + "step": 1376000 + }, + { + "epoch": 15.14, + "learning_rate": 8.714038175917267e-08, + "loss": 3.8404, + "step": 1376500 + }, + { + "epoch": 15.15, + "learning_rate": 8.712662962759227e-08, + "loss": 3.8253, + "step": 1377000 + }, + { + "epoch": 15.15, + "learning_rate": 8.711287749601188e-08, + "loss": 3.824, + "step": 1377500 + }, + { + "epoch": 15.16, + "learning_rate": 8.709912536443149e-08, + "loss": 3.8241, + "step": 1378000 + }, + { + "epoch": 15.17, + "learning_rate": 8.708537323285108e-08, + "loss": 3.803, + "step": 1378500 + }, + { + "epoch": 15.17, + "learning_rate": 8.707162110127069e-08, + "loss": 3.8223, + "step": 1379000 + }, + { + "epoch": 15.18, + "learning_rate": 8.70578689696903e-08, + "loss": 3.8301, + "step": 1379500 + }, + { + "epoch": 15.18, + "learning_rate": 8.704411683810991e-08, + "loss": 3.8231, + "step": 1380000 + }, + { + "epoch": 15.19, + "learning_rate": 8.703036470652951e-08, + "loss": 3.8293, + "step": 1380500 + }, + { + "epoch": 15.19, + "learning_rate": 8.701661257494912e-08, + "loss": 3.8172, + "step": 1381000 + }, + { + "epoch": 15.2, + "learning_rate": 8.700286044336873e-08, + "loss": 3.8155, + "step": 1381500 + }, + { + "epoch": 15.2, + "learning_rate": 8.698910831178833e-08, + "loss": 3.8332, + "step": 1382000 + }, + { + "epoch": 15.21, + "learning_rate": 8.697535618020793e-08, + "loss": 3.8303, + "step": 1382500 + }, + { + "epoch": 15.22, + "learning_rate": 8.696160404862754e-08, + "loss": 3.834, + "step": 1383000 + }, + { + "epoch": 15.22, + "learning_rate": 8.694785191704715e-08, + "loss": 3.831, + "step": 1383500 + }, + { + "epoch": 15.23, + "learning_rate": 8.693409978546675e-08, + "loss": 3.8489, + "step": 1384000 + }, + { + "epoch": 15.23, + "learning_rate": 8.692034765388636e-08, + "loss": 3.8309, + "step": 1384500 + }, + { + "epoch": 15.24, + "learning_rate": 8.690659552230596e-08, + "loss": 3.8224, + "step": 1385000 + }, + { + "epoch": 15.24, + "learning_rate": 8.689284339072556e-08, + "loss": 3.8189, + "step": 1385500 + }, + { + "epoch": 15.25, + "learning_rate": 8.687909125914517e-08, + "loss": 3.8271, + "step": 1386000 + }, + { + "epoch": 15.25, + "learning_rate": 8.686533912756478e-08, + "loss": 3.7946, + "step": 1386500 + }, + { + "epoch": 15.26, + "learning_rate": 8.685158699598438e-08, + "loss": 3.8178, + "step": 1387000 + }, + { + "epoch": 15.26, + "learning_rate": 8.683783486440398e-08, + "loss": 3.8126, + "step": 1387500 + }, + { + "epoch": 15.27, + "learning_rate": 8.68240827328236e-08, + "loss": 3.8171, + "step": 1388000 + }, + { + "epoch": 15.28, + "learning_rate": 8.681033060124319e-08, + "loss": 3.8246, + "step": 1388500 + }, + { + "epoch": 15.28, + "learning_rate": 8.67965784696628e-08, + "loss": 3.8354, + "step": 1389000 + }, + { + "epoch": 15.29, + "learning_rate": 8.678282633808241e-08, + "loss": 3.8337, + "step": 1389500 + }, + { + "epoch": 15.29, + "learning_rate": 8.6769074206502e-08, + "loss": 3.8168, + "step": 1390000 + }, + { + "epoch": 15.3, + "learning_rate": 8.675532207492161e-08, + "loss": 3.8245, + "step": 1390500 + }, + { + "epoch": 15.3, + "learning_rate": 8.674156994334121e-08, + "loss": 3.815, + "step": 1391000 + }, + { + "epoch": 15.31, + "learning_rate": 8.672781781176082e-08, + "loss": 3.8064, + "step": 1391500 + }, + { + "epoch": 15.31, + "learning_rate": 8.671406568018043e-08, + "loss": 3.8283, + "step": 1392000 + }, + { + "epoch": 15.32, + "learning_rate": 8.670031354860003e-08, + "loss": 3.8198, + "step": 1392500 + }, + { + "epoch": 15.33, + "learning_rate": 8.668656141701963e-08, + "loss": 3.8265, + "step": 1393000 + }, + { + "epoch": 15.33, + "learning_rate": 8.667280928543924e-08, + "loss": 3.8383, + "step": 1393500 + }, + { + "epoch": 15.34, + "learning_rate": 8.665905715385884e-08, + "loss": 3.8405, + "step": 1394000 + }, + { + "epoch": 15.34, + "learning_rate": 8.664530502227845e-08, + "loss": 3.8221, + "step": 1394500 + }, + { + "epoch": 15.35, + "learning_rate": 8.663155289069806e-08, + "loss": 3.8046, + "step": 1395000 + }, + { + "epoch": 15.35, + "learning_rate": 8.661780075911765e-08, + "loss": 3.8153, + "step": 1395500 + }, + { + "epoch": 15.36, + "learning_rate": 8.660404862753726e-08, + "loss": 3.8158, + "step": 1396000 + }, + { + "epoch": 15.36, + "learning_rate": 8.659029649595687e-08, + "loss": 3.8261, + "step": 1396500 + }, + { + "epoch": 15.37, + "learning_rate": 8.657654436437647e-08, + "loss": 3.8058, + "step": 1397000 + }, + { + "epoch": 15.37, + "learning_rate": 8.656279223279608e-08, + "loss": 3.8047, + "step": 1397500 + }, + { + "epoch": 15.38, + "learning_rate": 8.654904010121569e-08, + "loss": 3.8213, + "step": 1398000 + }, + { + "epoch": 15.39, + "learning_rate": 8.653528796963528e-08, + "loss": 3.8377, + "step": 1398500 + }, + { + "epoch": 15.39, + "learning_rate": 8.65215358380549e-08, + "loss": 3.8303, + "step": 1399000 + }, + { + "epoch": 15.4, + "learning_rate": 8.65077837064745e-08, + "loss": 3.8232, + "step": 1399500 + }, + { + "epoch": 15.4, + "learning_rate": 8.64940315748941e-08, + "loss": 3.8248, + "step": 1400000 + }, + { + "epoch": 15.41, + "learning_rate": 8.648027944331371e-08, + "loss": 3.8455, + "step": 1400500 + }, + { + "epoch": 15.41, + "learning_rate": 8.646652731173332e-08, + "loss": 3.8225, + "step": 1401000 + }, + { + "epoch": 15.42, + "learning_rate": 8.645277518015291e-08, + "loss": 3.8282, + "step": 1401500 + }, + { + "epoch": 15.42, + "learning_rate": 8.643902304857252e-08, + "loss": 3.8275, + "step": 1402000 + }, + { + "epoch": 15.43, + "learning_rate": 8.642527091699213e-08, + "loss": 3.8292, + "step": 1402500 + }, + { + "epoch": 15.44, + "learning_rate": 8.641151878541173e-08, + "loss": 3.8259, + "step": 1403000 + }, + { + "epoch": 15.44, + "learning_rate": 8.639776665383134e-08, + "loss": 3.83, + "step": 1403500 + }, + { + "epoch": 15.45, + "learning_rate": 8.638401452225095e-08, + "loss": 3.8268, + "step": 1404000 + }, + { + "epoch": 15.45, + "learning_rate": 8.637026239067054e-08, + "loss": 3.8071, + "step": 1404500 + }, + { + "epoch": 15.46, + "learning_rate": 8.635651025909015e-08, + "loss": 3.8132, + "step": 1405000 + }, + { + "epoch": 15.46, + "learning_rate": 8.634275812750976e-08, + "loss": 3.8084, + "step": 1405500 + }, + { + "epoch": 15.47, + "learning_rate": 8.632900599592936e-08, + "loss": 3.8209, + "step": 1406000 + }, + { + "epoch": 15.47, + "learning_rate": 8.631525386434897e-08, + "loss": 3.825, + "step": 1406500 + }, + { + "epoch": 15.48, + "learning_rate": 8.630150173276858e-08, + "loss": 3.8182, + "step": 1407000 + }, + { + "epoch": 15.48, + "learning_rate": 8.628774960118817e-08, + "loss": 3.8156, + "step": 1407500 + }, + { + "epoch": 15.49, + "learning_rate": 8.627399746960778e-08, + "loss": 3.802, + "step": 1408000 + }, + { + "epoch": 15.5, + "learning_rate": 8.626024533802739e-08, + "loss": 3.8365, + "step": 1408500 + }, + { + "epoch": 15.5, + "learning_rate": 8.624649320644699e-08, + "loss": 3.815, + "step": 1409000 + }, + { + "epoch": 15.51, + "learning_rate": 8.62327410748666e-08, + "loss": 3.8232, + "step": 1409500 + }, + { + "epoch": 15.51, + "learning_rate": 8.621898894328621e-08, + "loss": 3.8285, + "step": 1410000 + }, + { + "epoch": 15.52, + "learning_rate": 8.62052368117058e-08, + "loss": 3.825, + "step": 1410500 + }, + { + "epoch": 15.52, + "learning_rate": 8.619148468012541e-08, + "loss": 3.8201, + "step": 1411000 + }, + { + "epoch": 15.53, + "learning_rate": 8.617773254854502e-08, + "loss": 3.8256, + "step": 1411500 + }, + { + "epoch": 15.53, + "learning_rate": 8.616398041696462e-08, + "loss": 3.8158, + "step": 1412000 + }, + { + "epoch": 15.54, + "learning_rate": 8.615022828538423e-08, + "loss": 3.8097, + "step": 1412500 + }, + { + "epoch": 15.55, + "learning_rate": 8.613647615380384e-08, + "loss": 3.8391, + "step": 1413000 + }, + { + "epoch": 15.55, + "learning_rate": 8.612272402222343e-08, + "loss": 3.8291, + "step": 1413500 + }, + { + "epoch": 15.56, + "learning_rate": 8.610897189064304e-08, + "loss": 3.8295, + "step": 1414000 + }, + { + "epoch": 15.56, + "learning_rate": 8.609521975906265e-08, + "loss": 3.8199, + "step": 1414500 + }, + { + "epoch": 15.57, + "learning_rate": 8.608146762748225e-08, + "loss": 3.8236, + "step": 1415000 + }, + { + "epoch": 15.57, + "learning_rate": 8.606771549590186e-08, + "loss": 3.8074, + "step": 1415500 + }, + { + "epoch": 15.58, + "learning_rate": 8.605396336432147e-08, + "loss": 3.8209, + "step": 1416000 + }, + { + "epoch": 15.58, + "learning_rate": 8.604021123274106e-08, + "loss": 3.8179, + "step": 1416500 + }, + { + "epoch": 15.59, + "learning_rate": 8.602645910116067e-08, + "loss": 3.8325, + "step": 1417000 + }, + { + "epoch": 15.59, + "learning_rate": 8.601270696958028e-08, + "loss": 3.8338, + "step": 1417500 + }, + { + "epoch": 15.6, + "learning_rate": 8.599895483799988e-08, + "loss": 3.8088, + "step": 1418000 + }, + { + "epoch": 15.61, + "learning_rate": 8.598520270641949e-08, + "loss": 3.8141, + "step": 1418500 + }, + { + "epoch": 15.61, + "learning_rate": 8.59714505748391e-08, + "loss": 3.8325, + "step": 1419000 + }, + { + "epoch": 15.62, + "learning_rate": 8.595769844325869e-08, + "loss": 3.8181, + "step": 1419500 + }, + { + "epoch": 15.62, + "learning_rate": 8.59439463116783e-08, + "loss": 3.8324, + "step": 1420000 + }, + { + "epoch": 15.63, + "learning_rate": 8.593019418009791e-08, + "loss": 3.8067, + "step": 1420500 + }, + { + "epoch": 15.63, + "learning_rate": 8.59164420485175e-08, + "loss": 3.7967, + "step": 1421000 + }, + { + "epoch": 15.64, + "learning_rate": 8.590268991693712e-08, + "loss": 3.816, + "step": 1421500 + }, + { + "epoch": 15.64, + "learning_rate": 8.588893778535673e-08, + "loss": 3.82, + "step": 1422000 + }, + { + "epoch": 15.65, + "learning_rate": 8.587518565377632e-08, + "loss": 3.8263, + "step": 1422500 + }, + { + "epoch": 15.66, + "learning_rate": 8.586143352219593e-08, + "loss": 3.8351, + "step": 1423000 + }, + { + "epoch": 15.66, + "learning_rate": 8.584768139061554e-08, + "loss": 3.8124, + "step": 1423500 + }, + { + "epoch": 15.67, + "learning_rate": 8.583392925903515e-08, + "loss": 3.8133, + "step": 1424000 + }, + { + "epoch": 15.67, + "learning_rate": 8.582017712745475e-08, + "loss": 3.8214, + "step": 1424500 + }, + { + "epoch": 15.68, + "learning_rate": 8.580642499587435e-08, + "loss": 3.8209, + "step": 1425000 + }, + { + "epoch": 15.68, + "learning_rate": 8.579267286429396e-08, + "loss": 3.8368, + "step": 1425500 + }, + { + "epoch": 15.69, + "learning_rate": 8.577892073271356e-08, + "loss": 3.8159, + "step": 1426000 + }, + { + "epoch": 15.69, + "learning_rate": 8.576516860113317e-08, + "loss": 3.7983, + "step": 1426500 + }, + { + "epoch": 15.7, + "learning_rate": 8.575141646955278e-08, + "loss": 3.8144, + "step": 1427000 + }, + { + "epoch": 15.7, + "learning_rate": 8.573766433797239e-08, + "loss": 3.8218, + "step": 1427500 + }, + { + "epoch": 15.71, + "learning_rate": 8.572391220639198e-08, + "loss": 3.799, + "step": 1428000 + }, + { + "epoch": 15.72, + "learning_rate": 8.57101600748116e-08, + "loss": 3.8186, + "step": 1428500 + }, + { + "epoch": 15.72, + "learning_rate": 8.56964079432312e-08, + "loss": 3.8238, + "step": 1429000 + }, + { + "epoch": 15.73, + "learning_rate": 8.568265581165081e-08, + "loss": 3.8201, + "step": 1429500 + }, + { + "epoch": 15.73, + "learning_rate": 8.566890368007041e-08, + "loss": 3.8304, + "step": 1430000 + }, + { + "epoch": 15.74, + "learning_rate": 8.565515154849002e-08, + "loss": 3.8215, + "step": 1430500 + }, + { + "epoch": 15.74, + "learning_rate": 8.564139941690963e-08, + "loss": 3.8245, + "step": 1431000 + }, + { + "epoch": 15.75, + "learning_rate": 8.562764728532922e-08, + "loss": 3.8111, + "step": 1431500 + }, + { + "epoch": 15.75, + "learning_rate": 8.561389515374883e-08, + "loss": 3.8059, + "step": 1432000 + }, + { + "epoch": 15.76, + "learning_rate": 8.560014302216844e-08, + "loss": 3.8297, + "step": 1432500 + }, + { + "epoch": 15.77, + "learning_rate": 8.558639089058804e-08, + "loss": 3.815, + "step": 1433000 + }, + { + "epoch": 15.77, + "learning_rate": 8.557263875900765e-08, + "loss": 3.816, + "step": 1433500 + }, + { + "epoch": 15.78, + "learning_rate": 8.555888662742726e-08, + "loss": 3.8185, + "step": 1434000 + }, + { + "epoch": 15.78, + "learning_rate": 8.554513449584685e-08, + "loss": 3.8295, + "step": 1434500 + }, + { + "epoch": 15.79, + "learning_rate": 8.553138236426646e-08, + "loss": 3.8245, + "step": 1435000 + }, + { + "epoch": 15.79, + "learning_rate": 8.551763023268607e-08, + "loss": 3.8294, + "step": 1435500 + }, + { + "epoch": 15.8, + "learning_rate": 8.550387810110567e-08, + "loss": 3.8202, + "step": 1436000 + }, + { + "epoch": 15.8, + "learning_rate": 8.549012596952528e-08, + "loss": 3.8467, + "step": 1436500 + }, + { + "epoch": 15.81, + "learning_rate": 8.547637383794489e-08, + "loss": 3.8229, + "step": 1437000 + }, + { + "epoch": 15.81, + "learning_rate": 8.546262170636448e-08, + "loss": 3.8318, + "step": 1437500 + }, + { + "epoch": 15.82, + "learning_rate": 8.544886957478409e-08, + "loss": 3.8036, + "step": 1438000 + }, + { + "epoch": 15.83, + "learning_rate": 8.54351174432037e-08, + "loss": 3.8267, + "step": 1438500 + }, + { + "epoch": 15.83, + "learning_rate": 8.54213653116233e-08, + "loss": 3.841, + "step": 1439000 + }, + { + "epoch": 15.84, + "learning_rate": 8.54076131800429e-08, + "loss": 3.8275, + "step": 1439500 + }, + { + "epoch": 15.84, + "learning_rate": 8.539386104846252e-08, + "loss": 3.8073, + "step": 1440000 + }, + { + "epoch": 15.85, + "learning_rate": 8.538010891688211e-08, + "loss": 3.8203, + "step": 1440500 + }, + { + "epoch": 15.85, + "learning_rate": 8.536635678530172e-08, + "loss": 3.8233, + "step": 1441000 + }, + { + "epoch": 15.86, + "learning_rate": 8.535260465372133e-08, + "loss": 3.828, + "step": 1441500 + }, + { + "epoch": 15.86, + "learning_rate": 8.533885252214093e-08, + "loss": 3.8295, + "step": 1442000 + }, + { + "epoch": 15.87, + "learning_rate": 8.532510039056054e-08, + "loss": 3.8235, + "step": 1442500 + }, + { + "epoch": 15.88, + "learning_rate": 8.531134825898015e-08, + "loss": 3.814, + "step": 1443000 + }, + { + "epoch": 15.88, + "learning_rate": 8.529759612739974e-08, + "loss": 3.8075, + "step": 1443500 + }, + { + "epoch": 15.89, + "learning_rate": 8.528384399581935e-08, + "loss": 3.8301, + "step": 1444000 + }, + { + "epoch": 15.89, + "learning_rate": 8.527009186423896e-08, + "loss": 3.8131, + "step": 1444500 + }, + { + "epoch": 15.9, + "learning_rate": 8.525633973265856e-08, + "loss": 3.815, + "step": 1445000 + }, + { + "epoch": 15.9, + "learning_rate": 8.524258760107817e-08, + "loss": 3.826, + "step": 1445500 + }, + { + "epoch": 15.91, + "learning_rate": 8.522883546949778e-08, + "loss": 3.7978, + "step": 1446000 + }, + { + "epoch": 15.91, + "learning_rate": 8.521508333791737e-08, + "loss": 3.8198, + "step": 1446500 + }, + { + "epoch": 15.92, + "learning_rate": 8.520133120633698e-08, + "loss": 3.8134, + "step": 1447000 + }, + { + "epoch": 15.92, + "learning_rate": 8.518757907475659e-08, + "loss": 3.8186, + "step": 1447500 + }, + { + "epoch": 15.93, + "learning_rate": 8.517382694317619e-08, + "loss": 3.818, + "step": 1448000 + }, + { + "epoch": 15.94, + "learning_rate": 8.51600748115958e-08, + "loss": 3.8001, + "step": 1448500 + }, + { + "epoch": 15.94, + "learning_rate": 8.51463226800154e-08, + "loss": 3.8267, + "step": 1449000 + }, + { + "epoch": 15.95, + "learning_rate": 8.5132570548435e-08, + "loss": 3.8269, + "step": 1449500 + }, + { + "epoch": 15.95, + "learning_rate": 8.511881841685461e-08, + "loss": 3.8159, + "step": 1450000 + }, + { + "epoch": 15.96, + "learning_rate": 8.510506628527422e-08, + "loss": 3.815, + "step": 1450500 + }, + { + "epoch": 15.96, + "learning_rate": 8.509131415369382e-08, + "loss": 3.8191, + "step": 1451000 + }, + { + "epoch": 15.97, + "learning_rate": 8.507756202211342e-08, + "loss": 3.8218, + "step": 1451500 + }, + { + "epoch": 15.97, + "learning_rate": 8.506380989053303e-08, + "loss": 3.8213, + "step": 1452000 + }, + { + "epoch": 15.98, + "learning_rate": 8.505005775895263e-08, + "loss": 3.8265, + "step": 1452500 + }, + { + "epoch": 15.99, + "learning_rate": 8.503630562737224e-08, + "loss": 3.8337, + "step": 1453000 + }, + { + "epoch": 15.99, + "learning_rate": 8.502255349579185e-08, + "loss": 3.8047, + "step": 1453500 + }, + { + "epoch": 16.0, + "learning_rate": 8.500880136421145e-08, + "loss": 3.8296, + "step": 1454000 + }, + { + "epoch": 16.0, + "eval_loss": 3.867938995361328, + "eval_runtime": 6.1324, + "eval_samples_per_second": 253.41, + "step": 1454320 + }, + { + "epoch": 16.0, + "learning_rate": 8.499504923263105e-08, + "loss": 3.8063, + "step": 1454500 + }, + { + "epoch": 16.01, + "learning_rate": 8.498129710105066e-08, + "loss": 3.8215, + "step": 1455000 + }, + { + "epoch": 16.01, + "learning_rate": 8.496754496947026e-08, + "loss": 3.8266, + "step": 1455500 + }, + { + "epoch": 16.02, + "learning_rate": 8.495379283788987e-08, + "loss": 3.8106, + "step": 1456000 + }, + { + "epoch": 16.02, + "learning_rate": 8.494004070630948e-08, + "loss": 3.811, + "step": 1456500 + }, + { + "epoch": 16.03, + "learning_rate": 8.492628857472907e-08, + "loss": 3.8191, + "step": 1457000 + }, + { + "epoch": 16.03, + "learning_rate": 8.491253644314868e-08, + "loss": 3.8184, + "step": 1457500 + }, + { + "epoch": 16.04, + "learning_rate": 8.48987843115683e-08, + "loss": 3.7913, + "step": 1458000 + }, + { + "epoch": 16.05, + "learning_rate": 8.488503217998789e-08, + "loss": 3.827, + "step": 1458500 + }, + { + "epoch": 16.05, + "learning_rate": 8.48712800484075e-08, + "loss": 3.8153, + "step": 1459000 + }, + { + "epoch": 16.06, + "learning_rate": 8.485752791682711e-08, + "loss": 3.8196, + "step": 1459500 + }, + { + "epoch": 16.06, + "learning_rate": 8.48437757852467e-08, + "loss": 3.8183, + "step": 1460000 + }, + { + "epoch": 16.07, + "learning_rate": 8.483002365366631e-08, + "loss": 3.8109, + "step": 1460500 + }, + { + "epoch": 16.07, + "learning_rate": 8.481627152208592e-08, + "loss": 3.8173, + "step": 1461000 + }, + { + "epoch": 16.08, + "learning_rate": 8.480251939050552e-08, + "loss": 3.8081, + "step": 1461500 + }, + { + "epoch": 16.08, + "learning_rate": 8.478876725892513e-08, + "loss": 3.8132, + "step": 1462000 + }, + { + "epoch": 16.09, + "learning_rate": 8.477501512734474e-08, + "loss": 3.8262, + "step": 1462500 + }, + { + "epoch": 16.1, + "learning_rate": 8.476126299576433e-08, + "loss": 3.8229, + "step": 1463000 + }, + { + "epoch": 16.1, + "learning_rate": 8.474751086418394e-08, + "loss": 3.8127, + "step": 1463500 + }, + { + "epoch": 16.11, + "learning_rate": 8.473375873260355e-08, + "loss": 3.8149, + "step": 1464000 + }, + { + "epoch": 16.11, + "learning_rate": 8.472000660102315e-08, + "loss": 3.7961, + "step": 1464500 + }, + { + "epoch": 16.12, + "learning_rate": 8.470625446944276e-08, + "loss": 3.8306, + "step": 1465000 + }, + { + "epoch": 16.12, + "learning_rate": 8.469250233786235e-08, + "loss": 3.8204, + "step": 1465500 + }, + { + "epoch": 16.13, + "learning_rate": 8.467875020628196e-08, + "loss": 3.8135, + "step": 1466000 + }, + { + "epoch": 16.13, + "learning_rate": 8.466499807470157e-08, + "loss": 3.8081, + "step": 1466500 + }, + { + "epoch": 16.14, + "learning_rate": 8.465124594312117e-08, + "loss": 3.8348, + "step": 1467000 + }, + { + "epoch": 16.15, + "learning_rate": 8.463749381154078e-08, + "loss": 3.8197, + "step": 1467500 + }, + { + "epoch": 16.15, + "learning_rate": 8.462374167996039e-08, + "loss": 3.8224, + "step": 1468000 + }, + { + "epoch": 16.16, + "learning_rate": 8.460998954837998e-08, + "loss": 3.8009, + "step": 1468500 + }, + { + "epoch": 16.16, + "learning_rate": 8.459623741679959e-08, + "loss": 3.8142, + "step": 1469000 + }, + { + "epoch": 16.17, + "learning_rate": 8.45824852852192e-08, + "loss": 3.8072, + "step": 1469500 + }, + { + "epoch": 16.17, + "learning_rate": 8.45687331536388e-08, + "loss": 3.8411, + "step": 1470000 + }, + { + "epoch": 16.18, + "learning_rate": 8.455498102205841e-08, + "loss": 3.8058, + "step": 1470500 + }, + { + "epoch": 16.18, + "learning_rate": 8.454122889047802e-08, + "loss": 3.82, + "step": 1471000 + }, + { + "epoch": 16.19, + "learning_rate": 8.452747675889763e-08, + "loss": 3.8322, + "step": 1471500 + }, + { + "epoch": 16.19, + "learning_rate": 8.451372462731722e-08, + "loss": 3.8097, + "step": 1472000 + }, + { + "epoch": 16.2, + "learning_rate": 8.449997249573683e-08, + "loss": 3.8139, + "step": 1472500 + }, + { + "epoch": 16.21, + "learning_rate": 8.448622036415644e-08, + "loss": 3.8154, + "step": 1473000 + }, + { + "epoch": 16.21, + "learning_rate": 8.447246823257604e-08, + "loss": 3.8077, + "step": 1473500 + }, + { + "epoch": 16.22, + "learning_rate": 8.445871610099565e-08, + "loss": 3.8279, + "step": 1474000 + }, + { + "epoch": 16.22, + "learning_rate": 8.444496396941526e-08, + "loss": 3.8178, + "step": 1474500 + }, + { + "epoch": 16.23, + "learning_rate": 8.443121183783487e-08, + "loss": 3.8317, + "step": 1475000 + }, + { + "epoch": 16.23, + "learning_rate": 8.441745970625446e-08, + "loss": 3.8131, + "step": 1475500 + }, + { + "epoch": 16.24, + "learning_rate": 8.440370757467407e-08, + "loss": 3.814, + "step": 1476000 + }, + { + "epoch": 16.24, + "learning_rate": 8.438995544309368e-08, + "loss": 3.8155, + "step": 1476500 + }, + { + "epoch": 16.25, + "learning_rate": 8.437620331151329e-08, + "loss": 3.8094, + "step": 1477000 + }, + { + "epoch": 16.26, + "learning_rate": 8.436245117993289e-08, + "loss": 3.8151, + "step": 1477500 + }, + { + "epoch": 16.26, + "learning_rate": 8.43486990483525e-08, + "loss": 3.8262, + "step": 1478000 + }, + { + "epoch": 16.27, + "learning_rate": 8.43349469167721e-08, + "loss": 3.8172, + "step": 1478500 + }, + { + "epoch": 16.27, + "learning_rate": 8.43211947851917e-08, + "loss": 3.811, + "step": 1479000 + }, + { + "epoch": 16.28, + "learning_rate": 8.430744265361131e-08, + "loss": 3.8097, + "step": 1479500 + }, + { + "epoch": 16.28, + "learning_rate": 8.429369052203092e-08, + "loss": 3.8284, + "step": 1480000 + }, + { + "epoch": 16.29, + "learning_rate": 8.427993839045052e-08, + "loss": 3.8205, + "step": 1480500 + }, + { + "epoch": 16.29, + "learning_rate": 8.426618625887012e-08, + "loss": 3.8206, + "step": 1481000 + }, + { + "epoch": 16.3, + "learning_rate": 8.425243412728973e-08, + "loss": 3.805, + "step": 1481500 + }, + { + "epoch": 16.3, + "learning_rate": 8.423868199570933e-08, + "loss": 3.8002, + "step": 1482000 + }, + { + "epoch": 16.31, + "learning_rate": 8.422492986412894e-08, + "loss": 3.826, + "step": 1482500 + }, + { + "epoch": 16.32, + "learning_rate": 8.421117773254855e-08, + "loss": 3.8249, + "step": 1483000 + }, + { + "epoch": 16.32, + "learning_rate": 8.419742560096814e-08, + "loss": 3.8073, + "step": 1483500 + }, + { + "epoch": 16.33, + "learning_rate": 8.418367346938775e-08, + "loss": 3.8248, + "step": 1484000 + }, + { + "epoch": 16.33, + "learning_rate": 8.416992133780736e-08, + "loss": 3.8351, + "step": 1484500 + }, + { + "epoch": 16.34, + "learning_rate": 8.415616920622696e-08, + "loss": 3.839, + "step": 1485000 + }, + { + "epoch": 16.34, + "learning_rate": 8.414241707464657e-08, + "loss": 3.8135, + "step": 1485500 + }, + { + "epoch": 16.35, + "learning_rate": 8.412866494306618e-08, + "loss": 3.8355, + "step": 1486000 + }, + { + "epoch": 16.35, + "learning_rate": 8.411491281148577e-08, + "loss": 3.8078, + "step": 1486500 + }, + { + "epoch": 16.36, + "learning_rate": 8.410116067990538e-08, + "loss": 3.8177, + "step": 1487000 + }, + { + "epoch": 16.37, + "learning_rate": 8.408740854832499e-08, + "loss": 3.8221, + "step": 1487500 + }, + { + "epoch": 16.37, + "learning_rate": 8.407365641674459e-08, + "loss": 3.8262, + "step": 1488000 + }, + { + "epoch": 16.38, + "learning_rate": 8.40599042851642e-08, + "loss": 3.8176, + "step": 1488500 + }, + { + "epoch": 16.38, + "learning_rate": 8.404615215358381e-08, + "loss": 3.8234, + "step": 1489000 + }, + { + "epoch": 16.39, + "learning_rate": 8.40324000220034e-08, + "loss": 3.8211, + "step": 1489500 + }, + { + "epoch": 16.39, + "learning_rate": 8.401864789042301e-08, + "loss": 3.8328, + "step": 1490000 + }, + { + "epoch": 16.4, + "learning_rate": 8.400489575884262e-08, + "loss": 3.8035, + "step": 1490500 + }, + { + "epoch": 16.4, + "learning_rate": 8.399114362726222e-08, + "loss": 3.8202, + "step": 1491000 + }, + { + "epoch": 16.41, + "learning_rate": 8.397739149568183e-08, + "loss": 3.7977, + "step": 1491500 + }, + { + "epoch": 16.41, + "learning_rate": 8.396363936410144e-08, + "loss": 3.8308, + "step": 1492000 + }, + { + "epoch": 16.42, + "learning_rate": 8.394988723252103e-08, + "loss": 3.8182, + "step": 1492500 + }, + { + "epoch": 16.43, + "learning_rate": 8.393613510094064e-08, + "loss": 3.8137, + "step": 1493000 + }, + { + "epoch": 16.43, + "learning_rate": 8.392238296936025e-08, + "loss": 3.7955, + "step": 1493500 + }, + { + "epoch": 16.44, + "learning_rate": 8.390863083777985e-08, + "loss": 3.8029, + "step": 1494000 + }, + { + "epoch": 16.44, + "learning_rate": 8.389487870619946e-08, + "loss": 3.8189, + "step": 1494500 + }, + { + "epoch": 16.45, + "learning_rate": 8.388112657461907e-08, + "loss": 3.8303, + "step": 1495000 + }, + { + "epoch": 16.45, + "learning_rate": 8.386737444303866e-08, + "loss": 3.8199, + "step": 1495500 + }, + { + "epoch": 16.46, + "learning_rate": 8.385362231145827e-08, + "loss": 3.8183, + "step": 1496000 + }, + { + "epoch": 16.46, + "learning_rate": 8.383987017987788e-08, + "loss": 3.8091, + "step": 1496500 + }, + { + "epoch": 16.47, + "learning_rate": 8.382611804829748e-08, + "loss": 3.8141, + "step": 1497000 + }, + { + "epoch": 16.48, + "learning_rate": 8.381236591671709e-08, + "loss": 3.8107, + "step": 1497500 + }, + { + "epoch": 16.48, + "learning_rate": 8.37986137851367e-08, + "loss": 3.8077, + "step": 1498000 + }, + { + "epoch": 16.49, + "learning_rate": 8.378486165355629e-08, + "loss": 3.8042, + "step": 1498500 + }, + { + "epoch": 16.49, + "learning_rate": 8.37711095219759e-08, + "loss": 3.8148, + "step": 1499000 + }, + { + "epoch": 16.5, + "learning_rate": 8.375735739039551e-08, + "loss": 3.8195, + "step": 1499500 + }, + { + "epoch": 16.5, + "learning_rate": 8.374360525881511e-08, + "loss": 3.8122, + "step": 1500000 + }, + { + "epoch": 16.51, + "learning_rate": 8.372985312723472e-08, + "loss": 3.8212, + "step": 1500500 + }, + { + "epoch": 16.51, + "learning_rate": 8.371610099565433e-08, + "loss": 3.8143, + "step": 1501000 + }, + { + "epoch": 16.52, + "learning_rate": 8.370234886407392e-08, + "loss": 3.8178, + "step": 1501500 + }, + { + "epoch": 16.52, + "learning_rate": 8.368859673249353e-08, + "loss": 3.8171, + "step": 1502000 + }, + { + "epoch": 16.53, + "learning_rate": 8.367484460091314e-08, + "loss": 3.7978, + "step": 1502500 + }, + { + "epoch": 16.54, + "learning_rate": 8.366109246933274e-08, + "loss": 3.8218, + "step": 1503000 + }, + { + "epoch": 16.54, + "learning_rate": 8.364734033775235e-08, + "loss": 3.802, + "step": 1503500 + }, + { + "epoch": 16.55, + "learning_rate": 8.363358820617196e-08, + "loss": 3.8081, + "step": 1504000 + }, + { + "epoch": 16.55, + "learning_rate": 8.361983607459155e-08, + "loss": 3.8245, + "step": 1504500 + }, + { + "epoch": 16.56, + "learning_rate": 8.360608394301116e-08, + "loss": 3.8207, + "step": 1505000 + }, + { + "epoch": 16.56, + "learning_rate": 8.359233181143077e-08, + "loss": 3.8157, + "step": 1505500 + }, + { + "epoch": 16.57, + "learning_rate": 8.357857967985037e-08, + "loss": 3.822, + "step": 1506000 + }, + { + "epoch": 16.57, + "learning_rate": 8.356482754826998e-08, + "loss": 3.8244, + "step": 1506500 + }, + { + "epoch": 16.58, + "learning_rate": 8.355107541668959e-08, + "loss": 3.8074, + "step": 1507000 + }, + { + "epoch": 16.59, + "learning_rate": 8.353732328510918e-08, + "loss": 3.8139, + "step": 1507500 + }, + { + "epoch": 16.59, + "learning_rate": 8.352357115352879e-08, + "loss": 3.8167, + "step": 1508000 + }, + { + "epoch": 16.6, + "learning_rate": 8.35098190219484e-08, + "loss": 3.7909, + "step": 1508500 + }, + { + "epoch": 16.6, + "learning_rate": 8.3496066890368e-08, + "loss": 3.809, + "step": 1509000 + }, + { + "epoch": 16.61, + "learning_rate": 8.34823147587876e-08, + "loss": 3.8155, + "step": 1509500 + }, + { + "epoch": 16.61, + "learning_rate": 8.346856262720722e-08, + "loss": 3.838, + "step": 1510000 + }, + { + "epoch": 16.62, + "learning_rate": 8.345481049562681e-08, + "loss": 3.81, + "step": 1510500 + }, + { + "epoch": 16.62, + "learning_rate": 8.344105836404642e-08, + "loss": 3.8336, + "step": 1511000 + }, + { + "epoch": 16.63, + "learning_rate": 8.342730623246603e-08, + "loss": 3.8026, + "step": 1511500 + }, + { + "epoch": 16.63, + "learning_rate": 8.341355410088563e-08, + "loss": 3.8077, + "step": 1512000 + }, + { + "epoch": 16.64, + "learning_rate": 8.339980196930524e-08, + "loss": 3.8022, + "step": 1512500 + }, + { + "epoch": 16.65, + "learning_rate": 8.338604983772484e-08, + "loss": 3.8183, + "step": 1513000 + }, + { + "epoch": 16.65, + "learning_rate": 8.337229770614444e-08, + "loss": 3.8237, + "step": 1513500 + }, + { + "epoch": 16.66, + "learning_rate": 8.335854557456405e-08, + "loss": 3.8232, + "step": 1514000 + }, + { + "epoch": 16.66, + "learning_rate": 8.334479344298366e-08, + "loss": 3.8215, + "step": 1514500 + }, + { + "epoch": 16.67, + "learning_rate": 8.333104131140326e-08, + "loss": 3.8177, + "step": 1515000 + }, + { + "epoch": 16.67, + "learning_rate": 8.331728917982286e-08, + "loss": 3.8132, + "step": 1515500 + }, + { + "epoch": 16.68, + "learning_rate": 8.330353704824247e-08, + "loss": 3.815, + "step": 1516000 + }, + { + "epoch": 16.68, + "learning_rate": 8.328978491666207e-08, + "loss": 3.8021, + "step": 1516500 + }, + { + "epoch": 16.69, + "learning_rate": 8.327603278508168e-08, + "loss": 3.8201, + "step": 1517000 + }, + { + "epoch": 16.7, + "learning_rate": 8.326228065350129e-08, + "loss": 3.8174, + "step": 1517500 + }, + { + "epoch": 16.7, + "learning_rate": 8.324852852192089e-08, + "loss": 3.8086, + "step": 1518000 + }, + { + "epoch": 16.71, + "learning_rate": 8.32347763903405e-08, + "loss": 3.7831, + "step": 1518500 + }, + { + "epoch": 16.71, + "learning_rate": 8.32210242587601e-08, + "loss": 3.8169, + "step": 1519000 + }, + { + "epoch": 16.72, + "learning_rate": 8.32072721271797e-08, + "loss": 3.8041, + "step": 1519500 + }, + { + "epoch": 16.72, + "learning_rate": 8.319351999559931e-08, + "loss": 3.8056, + "step": 1520000 + }, + { + "epoch": 16.73, + "learning_rate": 8.317976786401892e-08, + "loss": 3.8088, + "step": 1520500 + }, + { + "epoch": 16.73, + "learning_rate": 8.316601573243851e-08, + "loss": 3.8156, + "step": 1521000 + }, + { + "epoch": 16.74, + "learning_rate": 8.315226360085812e-08, + "loss": 3.8172, + "step": 1521500 + }, + { + "epoch": 16.74, + "learning_rate": 8.313851146927773e-08, + "loss": 3.8142, + "step": 1522000 + }, + { + "epoch": 16.75, + "learning_rate": 8.312475933769734e-08, + "loss": 3.7944, + "step": 1522500 + }, + { + "epoch": 16.76, + "learning_rate": 8.311100720611694e-08, + "loss": 3.8203, + "step": 1523000 + }, + { + "epoch": 16.76, + "learning_rate": 8.309725507453655e-08, + "loss": 3.8098, + "step": 1523500 + }, + { + "epoch": 16.77, + "learning_rate": 8.308350294295616e-08, + "loss": 3.8026, + "step": 1524000 + }, + { + "epoch": 16.77, + "learning_rate": 8.306975081137577e-08, + "loss": 3.8283, + "step": 1524500 + }, + { + "epoch": 16.78, + "learning_rate": 8.305599867979536e-08, + "loss": 3.7991, + "step": 1525000 + }, + { + "epoch": 16.78, + "learning_rate": 8.304224654821497e-08, + "loss": 3.801, + "step": 1525500 + }, + { + "epoch": 16.79, + "learning_rate": 8.302849441663458e-08, + "loss": 3.8296, + "step": 1526000 + }, + { + "epoch": 16.79, + "learning_rate": 8.301474228505419e-08, + "loss": 3.8056, + "step": 1526500 + }, + { + "epoch": 16.8, + "learning_rate": 8.300099015347379e-08, + "loss": 3.8248, + "step": 1527000 + }, + { + "epoch": 16.81, + "learning_rate": 8.29872380218934e-08, + "loss": 3.8135, + "step": 1527500 + }, + { + "epoch": 16.81, + "learning_rate": 8.2973485890313e-08, + "loss": 3.8361, + "step": 1528000 + }, + { + "epoch": 16.82, + "learning_rate": 8.29597337587326e-08, + "loss": 3.8299, + "step": 1528500 + }, + { + "epoch": 16.82, + "learning_rate": 8.294598162715221e-08, + "loss": 3.8374, + "step": 1529000 + }, + { + "epoch": 16.83, + "learning_rate": 8.293222949557182e-08, + "loss": 3.8095, + "step": 1529500 + }, + { + "epoch": 16.83, + "learning_rate": 8.291847736399142e-08, + "loss": 3.8198, + "step": 1530000 + }, + { + "epoch": 16.84, + "learning_rate": 8.290472523241103e-08, + "loss": 3.8068, + "step": 1530500 + }, + { + "epoch": 16.84, + "learning_rate": 8.289097310083064e-08, + "loss": 3.8264, + "step": 1531000 + }, + { + "epoch": 16.85, + "learning_rate": 8.287722096925023e-08, + "loss": 3.8222, + "step": 1531500 + }, + { + "epoch": 16.85, + "learning_rate": 8.286346883766984e-08, + "loss": 3.824, + "step": 1532000 + }, + { + "epoch": 16.86, + "learning_rate": 8.284971670608945e-08, + "loss": 3.8305, + "step": 1532500 + }, + { + "epoch": 16.87, + "learning_rate": 8.283596457450905e-08, + "loss": 3.8065, + "step": 1533000 + }, + { + "epoch": 16.87, + "learning_rate": 8.282221244292866e-08, + "loss": 3.8001, + "step": 1533500 + }, + { + "epoch": 16.88, + "learning_rate": 8.280846031134827e-08, + "loss": 3.8058, + "step": 1534000 + }, + { + "epoch": 16.88, + "learning_rate": 8.279470817976786e-08, + "loss": 3.8292, + "step": 1534500 + }, + { + "epoch": 16.89, + "learning_rate": 8.278095604818747e-08, + "loss": 3.8223, + "step": 1535000 + }, + { + "epoch": 16.89, + "learning_rate": 8.276720391660708e-08, + "loss": 3.8232, + "step": 1535500 + }, + { + "epoch": 16.9, + "learning_rate": 8.275345178502668e-08, + "loss": 3.8234, + "step": 1536000 + }, + { + "epoch": 16.9, + "learning_rate": 8.273969965344629e-08, + "loss": 3.8135, + "step": 1536500 + }, + { + "epoch": 16.91, + "learning_rate": 8.27259475218659e-08, + "loss": 3.7976, + "step": 1537000 + }, + { + "epoch": 16.92, + "learning_rate": 8.271219539028549e-08, + "loss": 3.813, + "step": 1537500 + }, + { + "epoch": 16.92, + "learning_rate": 8.26984432587051e-08, + "loss": 3.8222, + "step": 1538000 + }, + { + "epoch": 16.93, + "learning_rate": 8.268469112712471e-08, + "loss": 3.8058, + "step": 1538500 + }, + { + "epoch": 16.93, + "learning_rate": 8.26709389955443e-08, + "loss": 3.797, + "step": 1539000 + }, + { + "epoch": 16.94, + "learning_rate": 8.265718686396391e-08, + "loss": 3.8028, + "step": 1539500 + }, + { + "epoch": 16.94, + "learning_rate": 8.264343473238351e-08, + "loss": 3.8118, + "step": 1540000 + }, + { + "epoch": 16.95, + "learning_rate": 8.262968260080312e-08, + "loss": 3.8241, + "step": 1540500 + }, + { + "epoch": 16.95, + "learning_rate": 8.261593046922273e-08, + "loss": 3.8202, + "step": 1541000 + }, + { + "epoch": 16.96, + "learning_rate": 8.260217833764233e-08, + "loss": 3.8157, + "step": 1541500 + }, + { + "epoch": 16.96, + "learning_rate": 8.258842620606194e-08, + "loss": 3.8186, + "step": 1542000 + }, + { + "epoch": 16.97, + "learning_rate": 8.257467407448154e-08, + "loss": 3.7993, + "step": 1542500 + }, + { + "epoch": 16.98, + "learning_rate": 8.256092194290114e-08, + "loss": 3.8206, + "step": 1543000 + }, + { + "epoch": 16.98, + "learning_rate": 8.254716981132075e-08, + "loss": 3.8121, + "step": 1543500 + }, + { + "epoch": 16.99, + "learning_rate": 8.253341767974036e-08, + "loss": 3.8117, + "step": 1544000 + }, + { + "epoch": 16.99, + "learning_rate": 8.251966554815996e-08, + "loss": 3.8319, + "step": 1544500 + }, + { + "epoch": 17.0, + "learning_rate": 8.250591341657956e-08, + "loss": 3.8055, + "step": 1545000 + }, + { + "epoch": 17.0, + "eval_loss": 3.863797903060913, + "eval_runtime": 6.1486, + "eval_samples_per_second": 252.74, + "step": 1545215 + }, + { + "epoch": 17.0, + "learning_rate": 8.249216128499917e-08, + "loss": 3.8056, + "step": 1545500 + }, + { + "epoch": 17.01, + "learning_rate": 8.247840915341877e-08, + "loss": 3.8228, + "step": 1546000 + }, + { + "epoch": 17.01, + "learning_rate": 8.246465702183838e-08, + "loss": 3.814, + "step": 1546500 + }, + { + "epoch": 17.02, + "learning_rate": 8.245090489025799e-08, + "loss": 3.8356, + "step": 1547000 + }, + { + "epoch": 17.03, + "learning_rate": 8.243715275867759e-08, + "loss": 3.8054, + "step": 1547500 + }, + { + "epoch": 17.03, + "learning_rate": 8.24234006270972e-08, + "loss": 3.8348, + "step": 1548000 + }, + { + "epoch": 17.04, + "learning_rate": 8.24096484955168e-08, + "loss": 3.8178, + "step": 1548500 + }, + { + "epoch": 17.04, + "learning_rate": 8.23958963639364e-08, + "loss": 3.8074, + "step": 1549000 + }, + { + "epoch": 17.05, + "learning_rate": 8.238214423235601e-08, + "loss": 3.8183, + "step": 1549500 + }, + { + "epoch": 17.05, + "learning_rate": 8.236839210077562e-08, + "loss": 3.8092, + "step": 1550000 + }, + { + "epoch": 17.06, + "learning_rate": 8.235463996919521e-08, + "loss": 3.82, + "step": 1550500 + }, + { + "epoch": 17.06, + "learning_rate": 8.234088783761482e-08, + "loss": 3.8148, + "step": 1551000 + }, + { + "epoch": 17.07, + "learning_rate": 8.232713570603443e-08, + "loss": 3.8271, + "step": 1551500 + }, + { + "epoch": 17.07, + "learning_rate": 8.231338357445403e-08, + "loss": 3.7969, + "step": 1552000 + }, + { + "epoch": 17.08, + "learning_rate": 8.229963144287364e-08, + "loss": 3.8158, + "step": 1552500 + }, + { + "epoch": 17.09, + "learning_rate": 8.228587931129325e-08, + "loss": 3.7993, + "step": 1553000 + }, + { + "epoch": 17.09, + "learning_rate": 8.227212717971284e-08, + "loss": 3.8134, + "step": 1553500 + }, + { + "epoch": 17.1, + "learning_rate": 8.225837504813245e-08, + "loss": 3.808, + "step": 1554000 + }, + { + "epoch": 17.1, + "learning_rate": 8.224462291655206e-08, + "loss": 3.8102, + "step": 1554500 + }, + { + "epoch": 17.11, + "learning_rate": 8.223087078497166e-08, + "loss": 3.8079, + "step": 1555000 + }, + { + "epoch": 17.11, + "learning_rate": 8.221711865339127e-08, + "loss": 3.7953, + "step": 1555500 + }, + { + "epoch": 17.12, + "learning_rate": 8.220336652181088e-08, + "loss": 3.8064, + "step": 1556000 + }, + { + "epoch": 17.12, + "learning_rate": 8.218961439023047e-08, + "loss": 3.8192, + "step": 1556500 + }, + { + "epoch": 17.13, + "learning_rate": 8.217586225865008e-08, + "loss": 3.8122, + "step": 1557000 + }, + { + "epoch": 17.14, + "learning_rate": 8.216211012706969e-08, + "loss": 3.8117, + "step": 1557500 + }, + { + "epoch": 17.14, + "learning_rate": 8.214835799548929e-08, + "loss": 3.8003, + "step": 1558000 + }, + { + "epoch": 17.15, + "learning_rate": 8.21346058639089e-08, + "loss": 3.8231, + "step": 1558500 + }, + { + "epoch": 17.15, + "learning_rate": 8.212085373232851e-08, + "loss": 3.8048, + "step": 1559000 + }, + { + "epoch": 17.16, + "learning_rate": 8.21071016007481e-08, + "loss": 3.8077, + "step": 1559500 + }, + { + "epoch": 17.16, + "learning_rate": 8.209334946916771e-08, + "loss": 3.8346, + "step": 1560000 + }, + { + "epoch": 17.17, + "learning_rate": 8.207959733758732e-08, + "loss": 3.806, + "step": 1560500 + }, + { + "epoch": 17.17, + "learning_rate": 8.206584520600692e-08, + "loss": 3.8002, + "step": 1561000 + }, + { + "epoch": 17.18, + "learning_rate": 8.205209307442653e-08, + "loss": 3.8166, + "step": 1561500 + }, + { + "epoch": 17.18, + "learning_rate": 8.203834094284614e-08, + "loss": 3.8159, + "step": 1562000 + }, + { + "epoch": 17.19, + "learning_rate": 8.202458881126573e-08, + "loss": 3.8072, + "step": 1562500 + }, + { + "epoch": 17.2, + "learning_rate": 8.201083667968534e-08, + "loss": 3.817, + "step": 1563000 + }, + { + "epoch": 17.2, + "learning_rate": 8.199708454810495e-08, + "loss": 3.8219, + "step": 1563500 + }, + { + "epoch": 17.21, + "learning_rate": 8.198333241652455e-08, + "loss": 3.8106, + "step": 1564000 + }, + { + "epoch": 17.21, + "learning_rate": 8.196958028494416e-08, + "loss": 3.8281, + "step": 1564500 + }, + { + "epoch": 17.22, + "learning_rate": 8.195582815336377e-08, + "loss": 3.8231, + "step": 1565000 + }, + { + "epoch": 17.22, + "learning_rate": 8.194207602178336e-08, + "loss": 3.8251, + "step": 1565500 + }, + { + "epoch": 17.23, + "learning_rate": 8.192832389020297e-08, + "loss": 3.7855, + "step": 1566000 + }, + { + "epoch": 17.23, + "learning_rate": 8.191457175862258e-08, + "loss": 3.8052, + "step": 1566500 + }, + { + "epoch": 17.24, + "learning_rate": 8.190081962704218e-08, + "loss": 3.804, + "step": 1567000 + }, + { + "epoch": 17.25, + "learning_rate": 8.188706749546179e-08, + "loss": 3.8133, + "step": 1567500 + }, + { + "epoch": 17.25, + "learning_rate": 8.18733153638814e-08, + "loss": 3.8133, + "step": 1568000 + }, + { + "epoch": 17.26, + "learning_rate": 8.1859563232301e-08, + "loss": 3.8117, + "step": 1568500 + }, + { + "epoch": 17.26, + "learning_rate": 8.18458111007206e-08, + "loss": 3.8164, + "step": 1569000 + }, + { + "epoch": 17.27, + "learning_rate": 8.183205896914021e-08, + "loss": 3.8293, + "step": 1569500 + }, + { + "epoch": 17.27, + "learning_rate": 8.181830683755982e-08, + "loss": 3.8111, + "step": 1570000 + }, + { + "epoch": 17.28, + "learning_rate": 8.180455470597942e-08, + "loss": 3.8175, + "step": 1570500 + }, + { + "epoch": 17.28, + "learning_rate": 8.179080257439903e-08, + "loss": 3.8184, + "step": 1571000 + }, + { + "epoch": 17.29, + "learning_rate": 8.177705044281863e-08, + "loss": 3.809, + "step": 1571500 + }, + { + "epoch": 17.29, + "learning_rate": 8.176329831123824e-08, + "loss": 3.8166, + "step": 1572000 + }, + { + "epoch": 17.3, + "learning_rate": 8.174954617965784e-08, + "loss": 3.7916, + "step": 1572500 + }, + { + "epoch": 17.31, + "learning_rate": 8.173579404807745e-08, + "loss": 3.8001, + "step": 1573000 + }, + { + "epoch": 17.31, + "learning_rate": 8.172204191649706e-08, + "loss": 3.7957, + "step": 1573500 + }, + { + "epoch": 17.32, + "learning_rate": 8.170828978491667e-08, + "loss": 3.8287, + "step": 1574000 + }, + { + "epoch": 17.32, + "learning_rate": 8.169453765333626e-08, + "loss": 3.7886, + "step": 1574500 + }, + { + "epoch": 17.33, + "learning_rate": 8.168078552175587e-08, + "loss": 3.7996, + "step": 1575000 + }, + { + "epoch": 17.33, + "learning_rate": 8.166703339017548e-08, + "loss": 3.8156, + "step": 1575500 + }, + { + "epoch": 17.34, + "learning_rate": 8.165328125859508e-08, + "loss": 3.8299, + "step": 1576000 + }, + { + "epoch": 17.34, + "learning_rate": 8.163952912701469e-08, + "loss": 3.7958, + "step": 1576500 + }, + { + "epoch": 17.35, + "learning_rate": 8.16257769954343e-08, + "loss": 3.8098, + "step": 1577000 + }, + { + "epoch": 17.36, + "learning_rate": 8.16120248638539e-08, + "loss": 3.8173, + "step": 1577500 + }, + { + "epoch": 17.36, + "learning_rate": 8.15982727322735e-08, + "loss": 3.8085, + "step": 1578000 + }, + { + "epoch": 17.37, + "learning_rate": 8.158452060069311e-08, + "loss": 3.8139, + "step": 1578500 + }, + { + "epoch": 17.37, + "learning_rate": 8.157076846911271e-08, + "loss": 3.8063, + "step": 1579000 + }, + { + "epoch": 17.38, + "learning_rate": 8.155701633753232e-08, + "loss": 3.812, + "step": 1579500 + }, + { + "epoch": 17.38, + "learning_rate": 8.154326420595193e-08, + "loss": 3.822, + "step": 1580000 + }, + { + "epoch": 17.39, + "learning_rate": 8.152951207437152e-08, + "loss": 3.803, + "step": 1580500 + }, + { + "epoch": 17.39, + "learning_rate": 8.151575994279113e-08, + "loss": 3.8241, + "step": 1581000 + }, + { + "epoch": 17.4, + "learning_rate": 8.150200781121074e-08, + "loss": 3.822, + "step": 1581500 + }, + { + "epoch": 17.4, + "learning_rate": 8.148825567963034e-08, + "loss": 3.8209, + "step": 1582000 + }, + { + "epoch": 17.41, + "learning_rate": 8.147450354804995e-08, + "loss": 3.8123, + "step": 1582500 + }, + { + "epoch": 17.42, + "learning_rate": 8.146075141646956e-08, + "loss": 3.8178, + "step": 1583000 + }, + { + "epoch": 17.42, + "learning_rate": 8.144699928488915e-08, + "loss": 3.8181, + "step": 1583500 + }, + { + "epoch": 17.43, + "learning_rate": 8.143324715330876e-08, + "loss": 3.8058, + "step": 1584000 + }, + { + "epoch": 17.43, + "learning_rate": 8.141949502172837e-08, + "loss": 3.8149, + "step": 1584500 + }, + { + "epoch": 17.44, + "learning_rate": 8.140574289014797e-08, + "loss": 3.7947, + "step": 1585000 + }, + { + "epoch": 17.44, + "learning_rate": 8.139199075856758e-08, + "loss": 3.7961, + "step": 1585500 + }, + { + "epoch": 17.45, + "learning_rate": 8.137823862698719e-08, + "loss": 3.8096, + "step": 1586000 + }, + { + "epoch": 17.45, + "learning_rate": 8.136448649540678e-08, + "loss": 3.8186, + "step": 1586500 + }, + { + "epoch": 17.46, + "learning_rate": 8.135073436382639e-08, + "loss": 3.7836, + "step": 1587000 + }, + { + "epoch": 17.47, + "learning_rate": 8.1336982232246e-08, + "loss": 3.7944, + "step": 1587500 + }, + { + "epoch": 17.47, + "learning_rate": 8.13232301006656e-08, + "loss": 3.8094, + "step": 1588000 + }, + { + "epoch": 17.48, + "learning_rate": 8.130947796908521e-08, + "loss": 3.8001, + "step": 1588500 + }, + { + "epoch": 17.48, + "learning_rate": 8.129572583750482e-08, + "loss": 3.8086, + "step": 1589000 + }, + { + "epoch": 17.49, + "learning_rate": 8.128197370592441e-08, + "loss": 3.8057, + "step": 1589500 + }, + { + "epoch": 17.49, + "learning_rate": 8.126822157434402e-08, + "loss": 3.8218, + "step": 1590000 + }, + { + "epoch": 17.5, + "learning_rate": 8.125446944276363e-08, + "loss": 3.8295, + "step": 1590500 + }, + { + "epoch": 17.5, + "learning_rate": 8.124071731118323e-08, + "loss": 3.8025, + "step": 1591000 + }, + { + "epoch": 17.51, + "learning_rate": 8.122696517960284e-08, + "loss": 3.8178, + "step": 1591500 + }, + { + "epoch": 17.51, + "learning_rate": 8.121321304802245e-08, + "loss": 3.8006, + "step": 1592000 + }, + { + "epoch": 17.52, + "learning_rate": 8.119946091644204e-08, + "loss": 3.8002, + "step": 1592500 + }, + { + "epoch": 17.53, + "learning_rate": 8.118570878486165e-08, + "loss": 3.8318, + "step": 1593000 + }, + { + "epoch": 17.53, + "learning_rate": 8.117195665328126e-08, + "loss": 3.8271, + "step": 1593500 + }, + { + "epoch": 17.54, + "learning_rate": 8.115820452170086e-08, + "loss": 3.8159, + "step": 1594000 + }, + { + "epoch": 17.54, + "learning_rate": 8.114445239012047e-08, + "loss": 3.8124, + "step": 1594500 + }, + { + "epoch": 17.55, + "learning_rate": 8.113070025854008e-08, + "loss": 3.7887, + "step": 1595000 + }, + { + "epoch": 17.55, + "learning_rate": 8.111694812695967e-08, + "loss": 3.7857, + "step": 1595500 + }, + { + "epoch": 17.56, + "learning_rate": 8.110319599537928e-08, + "loss": 3.8085, + "step": 1596000 + }, + { + "epoch": 17.56, + "learning_rate": 8.108944386379889e-08, + "loss": 3.8354, + "step": 1596500 + }, + { + "epoch": 17.57, + "learning_rate": 8.107569173221849e-08, + "loss": 3.8247, + "step": 1597000 + }, + { + "epoch": 17.58, + "learning_rate": 8.10619396006381e-08, + "loss": 3.8248, + "step": 1597500 + }, + { + "epoch": 17.58, + "learning_rate": 8.10481874690577e-08, + "loss": 3.8029, + "step": 1598000 + }, + { + "epoch": 17.59, + "learning_rate": 8.10344353374773e-08, + "loss": 3.8099, + "step": 1598500 + }, + { + "epoch": 17.59, + "learning_rate": 8.102068320589691e-08, + "loss": 3.8066, + "step": 1599000 + }, + { + "epoch": 17.6, + "learning_rate": 8.100693107431652e-08, + "loss": 3.8289, + "step": 1599500 + }, + { + "epoch": 17.6, + "learning_rate": 8.099317894273612e-08, + "loss": 3.8304, + "step": 1600000 + }, + { + "epoch": 17.61, + "learning_rate": 8.097942681115573e-08, + "loss": 3.8041, + "step": 1600500 + }, + { + "epoch": 17.61, + "learning_rate": 8.096567467957533e-08, + "loss": 3.8128, + "step": 1601000 + }, + { + "epoch": 17.62, + "learning_rate": 8.095192254799493e-08, + "loss": 3.821, + "step": 1601500 + }, + { + "epoch": 17.62, + "learning_rate": 8.093817041641454e-08, + "loss": 3.8065, + "step": 1602000 + }, + { + "epoch": 17.63, + "learning_rate": 8.092441828483415e-08, + "loss": 3.7951, + "step": 1602500 + }, + { + "epoch": 17.64, + "learning_rate": 8.091066615325375e-08, + "loss": 3.8105, + "step": 1603000 + }, + { + "epoch": 17.64, + "learning_rate": 8.089691402167336e-08, + "loss": 3.7891, + "step": 1603500 + }, + { + "epoch": 17.65, + "learning_rate": 8.088316189009296e-08, + "loss": 3.8157, + "step": 1604000 + }, + { + "epoch": 17.65, + "learning_rate": 8.086940975851256e-08, + "loss": 3.8124, + "step": 1604500 + }, + { + "epoch": 17.66, + "learning_rate": 8.085565762693217e-08, + "loss": 3.7946, + "step": 1605000 + }, + { + "epoch": 17.66, + "learning_rate": 8.084190549535178e-08, + "loss": 3.8254, + "step": 1605500 + }, + { + "epoch": 17.67, + "learning_rate": 8.082815336377138e-08, + "loss": 3.8205, + "step": 1606000 + }, + { + "epoch": 17.67, + "learning_rate": 8.081440123219098e-08, + "loss": 3.7953, + "step": 1606500 + }, + { + "epoch": 17.68, + "learning_rate": 8.08006491006106e-08, + "loss": 3.7984, + "step": 1607000 + }, + { + "epoch": 17.69, + "learning_rate": 8.078689696903019e-08, + "loss": 3.8071, + "step": 1607500 + }, + { + "epoch": 17.69, + "learning_rate": 8.07731448374498e-08, + "loss": 3.7946, + "step": 1608000 + }, + { + "epoch": 17.7, + "learning_rate": 8.075939270586941e-08, + "loss": 3.8044, + "step": 1608500 + }, + { + "epoch": 17.7, + "learning_rate": 8.0745640574289e-08, + "loss": 3.8063, + "step": 1609000 + }, + { + "epoch": 17.71, + "learning_rate": 8.073188844270861e-08, + "loss": 3.8125, + "step": 1609500 + }, + { + "epoch": 17.71, + "learning_rate": 8.071813631112822e-08, + "loss": 3.8094, + "step": 1610000 + }, + { + "epoch": 17.72, + "learning_rate": 8.070438417954782e-08, + "loss": 3.7871, + "step": 1610500 + }, + { + "epoch": 17.72, + "learning_rate": 8.069063204796743e-08, + "loss": 3.7944, + "step": 1611000 + }, + { + "epoch": 17.73, + "learning_rate": 8.067687991638704e-08, + "loss": 3.8184, + "step": 1611500 + }, + { + "epoch": 17.73, + "learning_rate": 8.066312778480663e-08, + "loss": 3.8204, + "step": 1612000 + }, + { + "epoch": 17.74, + "learning_rate": 8.064937565322624e-08, + "loss": 3.798, + "step": 1612500 + }, + { + "epoch": 17.75, + "learning_rate": 8.063562352164585e-08, + "loss": 3.804, + "step": 1613000 + }, + { + "epoch": 17.75, + "learning_rate": 8.062187139006545e-08, + "loss": 3.8104, + "step": 1613500 + }, + { + "epoch": 17.76, + "learning_rate": 8.060811925848506e-08, + "loss": 3.831, + "step": 1614000 + }, + { + "epoch": 17.76, + "learning_rate": 8.059436712690467e-08, + "loss": 3.8131, + "step": 1614500 + }, + { + "epoch": 17.77, + "learning_rate": 8.058061499532426e-08, + "loss": 3.8057, + "step": 1615000 + }, + { + "epoch": 17.77, + "learning_rate": 8.056686286374387e-08, + "loss": 3.7959, + "step": 1615500 + }, + { + "epoch": 17.78, + "learning_rate": 8.055311073216348e-08, + "loss": 3.8168, + "step": 1616000 + }, + { + "epoch": 17.78, + "learning_rate": 8.053935860058308e-08, + "loss": 3.8002, + "step": 1616500 + }, + { + "epoch": 17.79, + "learning_rate": 8.052560646900269e-08, + "loss": 3.8248, + "step": 1617000 + }, + { + "epoch": 17.8, + "learning_rate": 8.05118543374223e-08, + "loss": 3.8025, + "step": 1617500 + }, + { + "epoch": 17.8, + "learning_rate": 8.04981022058419e-08, + "loss": 3.7936, + "step": 1618000 + }, + { + "epoch": 17.81, + "learning_rate": 8.04843500742615e-08, + "loss": 3.8057, + "step": 1618500 + }, + { + "epoch": 17.81, + "learning_rate": 8.047059794268111e-08, + "loss": 3.8193, + "step": 1619000 + }, + { + "epoch": 17.82, + "learning_rate": 8.045684581110072e-08, + "loss": 3.8163, + "step": 1619500 + }, + { + "epoch": 17.82, + "learning_rate": 8.044309367952032e-08, + "loss": 3.8261, + "step": 1620000 + }, + { + "epoch": 17.83, + "learning_rate": 8.042934154793993e-08, + "loss": 3.8164, + "step": 1620500 + }, + { + "epoch": 17.83, + "learning_rate": 8.041558941635954e-08, + "loss": 3.8159, + "step": 1621000 + }, + { + "epoch": 17.84, + "learning_rate": 8.040183728477915e-08, + "loss": 3.8192, + "step": 1621500 + }, + { + "epoch": 17.84, + "learning_rate": 8.038808515319874e-08, + "loss": 3.8154, + "step": 1622000 + }, + { + "epoch": 17.85, + "learning_rate": 8.037433302161835e-08, + "loss": 3.8084, + "step": 1622500 + }, + { + "epoch": 17.86, + "learning_rate": 8.036058089003796e-08, + "loss": 3.826, + "step": 1623000 + }, + { + "epoch": 17.86, + "learning_rate": 8.034682875845756e-08, + "loss": 3.7953, + "step": 1623500 + }, + { + "epoch": 17.87, + "learning_rate": 8.033307662687717e-08, + "loss": 3.8011, + "step": 1624000 + }, + { + "epoch": 17.87, + "learning_rate": 8.031932449529678e-08, + "loss": 3.8136, + "step": 1624500 + }, + { + "epoch": 17.88, + "learning_rate": 8.030557236371637e-08, + "loss": 3.8168, + "step": 1625000 + }, + { + "epoch": 17.88, + "learning_rate": 8.029182023213598e-08, + "loss": 3.817, + "step": 1625500 + }, + { + "epoch": 17.89, + "learning_rate": 8.027806810055559e-08, + "loss": 3.7977, + "step": 1626000 + }, + { + "epoch": 17.89, + "learning_rate": 8.026431596897519e-08, + "loss": 3.8016, + "step": 1626500 + }, + { + "epoch": 17.9, + "learning_rate": 8.02505638373948e-08, + "loss": 3.8335, + "step": 1627000 + }, + { + "epoch": 17.91, + "learning_rate": 8.02368117058144e-08, + "loss": 3.8079, + "step": 1627500 + }, + { + "epoch": 17.91, + "learning_rate": 8.0223059574234e-08, + "loss": 3.7859, + "step": 1628000 + }, + { + "epoch": 17.92, + "learning_rate": 8.020930744265361e-08, + "loss": 3.8093, + "step": 1628500 + }, + { + "epoch": 17.92, + "learning_rate": 8.019555531107322e-08, + "loss": 3.8239, + "step": 1629000 + }, + { + "epoch": 17.93, + "learning_rate": 8.018180317949282e-08, + "loss": 3.7991, + "step": 1629500 + }, + { + "epoch": 17.93, + "learning_rate": 8.016805104791243e-08, + "loss": 3.8086, + "step": 1630000 + }, + { + "epoch": 17.94, + "learning_rate": 8.015429891633203e-08, + "loss": 3.8006, + "step": 1630500 + }, + { + "epoch": 17.94, + "learning_rate": 8.014054678475163e-08, + "loss": 3.8033, + "step": 1631000 + }, + { + "epoch": 17.95, + "learning_rate": 8.012679465317124e-08, + "loss": 3.8026, + "step": 1631500 + }, + { + "epoch": 17.95, + "learning_rate": 8.011304252159085e-08, + "loss": 3.8134, + "step": 1632000 + }, + { + "epoch": 17.96, + "learning_rate": 8.009929039001045e-08, + "loss": 3.8328, + "step": 1632500 + }, + { + "epoch": 17.97, + "learning_rate": 8.008553825843005e-08, + "loss": 3.8086, + "step": 1633000 + }, + { + "epoch": 17.97, + "learning_rate": 8.007178612684966e-08, + "loss": 3.7903, + "step": 1633500 + }, + { + "epoch": 17.98, + "learning_rate": 8.005803399526926e-08, + "loss": 3.8028, + "step": 1634000 + }, + { + "epoch": 17.98, + "learning_rate": 8.004428186368887e-08, + "loss": 3.8028, + "step": 1634500 + }, + { + "epoch": 17.99, + "learning_rate": 8.003052973210848e-08, + "loss": 3.7952, + "step": 1635000 + }, + { + "epoch": 17.99, + "learning_rate": 8.001677760052808e-08, + "loss": 3.7984, + "step": 1635500 + }, + { + "epoch": 18.0, + "learning_rate": 8.000302546894768e-08, + "loss": 3.788, + "step": 1636000 + }, + { + "epoch": 18.0, + "eval_loss": 3.860872507095337, + "eval_runtime": 6.1352, + "eval_samples_per_second": 253.291, + "step": 1636110 + }, + { + "epoch": 18.0, + "learning_rate": 7.99892733373673e-08, + "loss": 3.8161, + "step": 1636500 + }, + { + "epoch": 18.01, + "learning_rate": 7.997552120578689e-08, + "loss": 3.8062, + "step": 1637000 + }, + { + "epoch": 18.02, + "learning_rate": 7.99617690742065e-08, + "loss": 3.8121, + "step": 1637500 + }, + { + "epoch": 18.02, + "learning_rate": 7.994801694262611e-08, + "loss": 3.8058, + "step": 1638000 + }, + { + "epoch": 18.03, + "learning_rate": 7.99342648110457e-08, + "loss": 3.8118, + "step": 1638500 + }, + { + "epoch": 18.03, + "learning_rate": 7.992051267946531e-08, + "loss": 3.8155, + "step": 1639000 + }, + { + "epoch": 18.04, + "learning_rate": 7.990676054788492e-08, + "loss": 3.8114, + "step": 1639500 + }, + { + "epoch": 18.04, + "learning_rate": 7.989300841630452e-08, + "loss": 3.8172, + "step": 1640000 + }, + { + "epoch": 18.05, + "learning_rate": 7.987925628472413e-08, + "loss": 3.7924, + "step": 1640500 + }, + { + "epoch": 18.05, + "learning_rate": 7.986550415314374e-08, + "loss": 3.8207, + "step": 1641000 + }, + { + "epoch": 18.06, + "learning_rate": 7.985175202156333e-08, + "loss": 3.8103, + "step": 1641500 + }, + { + "epoch": 18.06, + "learning_rate": 7.983799988998294e-08, + "loss": 3.7955, + "step": 1642000 + }, + { + "epoch": 18.07, + "learning_rate": 7.982424775840255e-08, + "loss": 3.8059, + "step": 1642500 + }, + { + "epoch": 18.08, + "learning_rate": 7.981049562682215e-08, + "loss": 3.8079, + "step": 1643000 + }, + { + "epoch": 18.08, + "learning_rate": 7.979674349524176e-08, + "loss": 3.807, + "step": 1643500 + }, + { + "epoch": 18.09, + "learning_rate": 7.978299136366137e-08, + "loss": 3.8145, + "step": 1644000 + }, + { + "epoch": 18.09, + "learning_rate": 7.976923923208096e-08, + "loss": 3.7965, + "step": 1644500 + }, + { + "epoch": 18.1, + "learning_rate": 7.975548710050057e-08, + "loss": 3.8198, + "step": 1645000 + }, + { + "epoch": 18.1, + "learning_rate": 7.974173496892018e-08, + "loss": 3.8106, + "step": 1645500 + }, + { + "epoch": 18.11, + "learning_rate": 7.972798283733978e-08, + "loss": 3.8196, + "step": 1646000 + }, + { + "epoch": 18.11, + "learning_rate": 7.971423070575939e-08, + "loss": 3.8105, + "step": 1646500 + }, + { + "epoch": 18.12, + "learning_rate": 7.9700478574179e-08, + "loss": 3.8011, + "step": 1647000 + }, + { + "epoch": 18.13, + "learning_rate": 7.96867264425986e-08, + "loss": 3.7978, + "step": 1647500 + }, + { + "epoch": 18.13, + "learning_rate": 7.96729743110182e-08, + "loss": 3.8097, + "step": 1648000 + }, + { + "epoch": 18.14, + "learning_rate": 7.965922217943781e-08, + "loss": 3.7914, + "step": 1648500 + }, + { + "epoch": 18.14, + "learning_rate": 7.964547004785741e-08, + "loss": 3.8115, + "step": 1649000 + }, + { + "epoch": 18.15, + "learning_rate": 7.963171791627702e-08, + "loss": 3.8168, + "step": 1649500 + }, + { + "epoch": 18.15, + "learning_rate": 7.961796578469663e-08, + "loss": 3.8022, + "step": 1650000 + }, + { + "epoch": 18.16, + "learning_rate": 7.960421365311622e-08, + "loss": 3.8098, + "step": 1650500 + }, + { + "epoch": 18.16, + "learning_rate": 7.959046152153583e-08, + "loss": 3.8304, + "step": 1651000 + }, + { + "epoch": 18.17, + "learning_rate": 7.957670938995544e-08, + "loss": 3.812, + "step": 1651500 + }, + { + "epoch": 18.17, + "learning_rate": 7.956295725837504e-08, + "loss": 3.8087, + "step": 1652000 + }, + { + "epoch": 18.18, + "learning_rate": 7.954920512679465e-08, + "loss": 3.8073, + "step": 1652500 + }, + { + "epoch": 18.19, + "learning_rate": 7.953545299521426e-08, + "loss": 3.8192, + "step": 1653000 + }, + { + "epoch": 18.19, + "learning_rate": 7.952170086363385e-08, + "loss": 3.7984, + "step": 1653500 + }, + { + "epoch": 18.2, + "learning_rate": 7.950794873205346e-08, + "loss": 3.8063, + "step": 1654000 + }, + { + "epoch": 18.2, + "learning_rate": 7.949419660047307e-08, + "loss": 3.8025, + "step": 1654500 + }, + { + "epoch": 18.21, + "learning_rate": 7.948044446889267e-08, + "loss": 3.8158, + "step": 1655000 + }, + { + "epoch": 18.21, + "learning_rate": 7.946669233731228e-08, + "loss": 3.8049, + "step": 1655500 + }, + { + "epoch": 18.22, + "learning_rate": 7.945294020573189e-08, + "loss": 3.8024, + "step": 1656000 + }, + { + "epoch": 18.22, + "learning_rate": 7.943918807415148e-08, + "loss": 3.8011, + "step": 1656500 + }, + { + "epoch": 18.23, + "learning_rate": 7.942543594257109e-08, + "loss": 3.802, + "step": 1657000 + }, + { + "epoch": 18.24, + "learning_rate": 7.94116838109907e-08, + "loss": 3.8148, + "step": 1657500 + }, + { + "epoch": 18.24, + "learning_rate": 7.93979316794103e-08, + "loss": 3.8231, + "step": 1658000 + }, + { + "epoch": 18.25, + "learning_rate": 7.93841795478299e-08, + "loss": 3.7882, + "step": 1658500 + }, + { + "epoch": 18.25, + "learning_rate": 7.937042741624952e-08, + "loss": 3.8075, + "step": 1659000 + }, + { + "epoch": 18.26, + "learning_rate": 7.935667528466911e-08, + "loss": 3.8046, + "step": 1659500 + }, + { + "epoch": 18.26, + "learning_rate": 7.934292315308872e-08, + "loss": 3.7936, + "step": 1660000 + }, + { + "epoch": 18.27, + "learning_rate": 7.932917102150833e-08, + "loss": 3.8076, + "step": 1660500 + }, + { + "epoch": 18.27, + "learning_rate": 7.931541888992793e-08, + "loss": 3.7951, + "step": 1661000 + }, + { + "epoch": 18.28, + "learning_rate": 7.930166675834754e-08, + "loss": 3.8149, + "step": 1661500 + }, + { + "epoch": 18.28, + "learning_rate": 7.928791462676715e-08, + "loss": 3.8178, + "step": 1662000 + }, + { + "epoch": 18.29, + "learning_rate": 7.927416249518674e-08, + "loss": 3.8022, + "step": 1662500 + }, + { + "epoch": 18.3, + "learning_rate": 7.926041036360635e-08, + "loss": 3.7946, + "step": 1663000 + }, + { + "epoch": 18.3, + "learning_rate": 7.924665823202596e-08, + "loss": 3.8025, + "step": 1663500 + }, + { + "epoch": 18.31, + "learning_rate": 7.923290610044556e-08, + "loss": 3.7899, + "step": 1664000 + }, + { + "epoch": 18.31, + "learning_rate": 7.921915396886517e-08, + "loss": 3.8124, + "step": 1664500 + }, + { + "epoch": 18.32, + "learning_rate": 7.920540183728477e-08, + "loss": 3.8166, + "step": 1665000 + }, + { + "epoch": 18.32, + "learning_rate": 7.919164970570437e-08, + "loss": 3.801, + "step": 1665500 + }, + { + "epoch": 18.33, + "learning_rate": 7.917789757412398e-08, + "loss": 3.7891, + "step": 1666000 + }, + { + "epoch": 18.33, + "learning_rate": 7.916414544254359e-08, + "loss": 3.8237, + "step": 1666500 + }, + { + "epoch": 18.34, + "learning_rate": 7.91503933109632e-08, + "loss": 3.8199, + "step": 1667000 + }, + { + "epoch": 18.35, + "learning_rate": 7.91366411793828e-08, + "loss": 3.7858, + "step": 1667500 + }, + { + "epoch": 18.35, + "learning_rate": 7.91228890478024e-08, + "loss": 3.7956, + "step": 1668000 + }, + { + "epoch": 18.36, + "learning_rate": 7.910913691622201e-08, + "loss": 3.806, + "step": 1668500 + }, + { + "epoch": 18.36, + "learning_rate": 7.909538478464162e-08, + "loss": 3.8147, + "step": 1669000 + }, + { + "epoch": 18.37, + "learning_rate": 7.908163265306122e-08, + "loss": 3.8237, + "step": 1669500 + }, + { + "epoch": 18.37, + "learning_rate": 7.906788052148083e-08, + "loss": 3.8001, + "step": 1670000 + }, + { + "epoch": 18.38, + "learning_rate": 7.905412838990044e-08, + "loss": 3.8098, + "step": 1670500 + }, + { + "epoch": 18.38, + "learning_rate": 7.904037625832003e-08, + "loss": 3.7977, + "step": 1671000 + }, + { + "epoch": 18.39, + "learning_rate": 7.902662412673964e-08, + "loss": 3.8142, + "step": 1671500 + }, + { + "epoch": 18.39, + "learning_rate": 7.901287199515925e-08, + "loss": 3.8138, + "step": 1672000 + }, + { + "epoch": 18.4, + "learning_rate": 7.899911986357886e-08, + "loss": 3.8075, + "step": 1672500 + }, + { + "epoch": 18.41, + "learning_rate": 7.898536773199846e-08, + "loss": 3.7988, + "step": 1673000 + }, + { + "epoch": 18.41, + "learning_rate": 7.897161560041807e-08, + "loss": 3.787, + "step": 1673500 + }, + { + "epoch": 18.42, + "learning_rate": 7.895786346883768e-08, + "loss": 3.8045, + "step": 1674000 + }, + { + "epoch": 18.42, + "learning_rate": 7.894411133725727e-08, + "loss": 3.8195, + "step": 1674500 + }, + { + "epoch": 18.43, + "learning_rate": 7.893035920567688e-08, + "loss": 3.8074, + "step": 1675000 + }, + { + "epoch": 18.43, + "learning_rate": 7.891660707409649e-08, + "loss": 3.7948, + "step": 1675500 + }, + { + "epoch": 18.44, + "learning_rate": 7.890285494251609e-08, + "loss": 3.7984, + "step": 1676000 + }, + { + "epoch": 18.44, + "learning_rate": 7.88891028109357e-08, + "loss": 3.81, + "step": 1676500 + }, + { + "epoch": 18.45, + "learning_rate": 7.88753506793553e-08, + "loss": 3.8049, + "step": 1677000 + }, + { + "epoch": 18.46, + "learning_rate": 7.88615985477749e-08, + "loss": 3.8051, + "step": 1677500 + }, + { + "epoch": 18.46, + "learning_rate": 7.884784641619451e-08, + "loss": 3.8276, + "step": 1678000 + }, + { + "epoch": 18.47, + "learning_rate": 7.883409428461412e-08, + "loss": 3.8282, + "step": 1678500 + }, + { + "epoch": 18.47, + "learning_rate": 7.882034215303372e-08, + "loss": 3.7945, + "step": 1679000 + }, + { + "epoch": 18.48, + "learning_rate": 7.880659002145333e-08, + "loss": 3.7994, + "step": 1679500 + }, + { + "epoch": 18.48, + "learning_rate": 7.879283788987294e-08, + "loss": 3.8124, + "step": 1680000 + }, + { + "epoch": 18.49, + "learning_rate": 7.877908575829253e-08, + "loss": 3.8033, + "step": 1680500 + }, + { + "epoch": 18.49, + "learning_rate": 7.876533362671214e-08, + "loss": 3.7845, + "step": 1681000 + }, + { + "epoch": 18.5, + "learning_rate": 7.875158149513175e-08, + "loss": 3.7968, + "step": 1681500 + }, + { + "epoch": 18.5, + "learning_rate": 7.873782936355135e-08, + "loss": 3.7885, + "step": 1682000 + }, + { + "epoch": 18.51, + "learning_rate": 7.872407723197096e-08, + "loss": 3.8202, + "step": 1682500 + }, + { + "epoch": 18.52, + "learning_rate": 7.871032510039057e-08, + "loss": 3.8037, + "step": 1683000 + }, + { + "epoch": 18.52, + "learning_rate": 7.869657296881016e-08, + "loss": 3.8026, + "step": 1683500 + }, + { + "epoch": 18.53, + "learning_rate": 7.868282083722977e-08, + "loss": 3.7969, + "step": 1684000 + }, + { + "epoch": 18.53, + "learning_rate": 7.866906870564938e-08, + "loss": 3.8033, + "step": 1684500 + }, + { + "epoch": 18.54, + "learning_rate": 7.865531657406898e-08, + "loss": 3.8002, + "step": 1685000 + }, + { + "epoch": 18.54, + "learning_rate": 7.864156444248859e-08, + "loss": 3.8247, + "step": 1685500 + }, + { + "epoch": 18.55, + "learning_rate": 7.86278123109082e-08, + "loss": 3.8131, + "step": 1686000 + }, + { + "epoch": 18.55, + "learning_rate": 7.861406017932779e-08, + "loss": 3.7968, + "step": 1686500 + }, + { + "epoch": 18.56, + "learning_rate": 7.86003080477474e-08, + "loss": 3.8057, + "step": 1687000 + }, + { + "epoch": 18.57, + "learning_rate": 7.858655591616701e-08, + "loss": 3.7945, + "step": 1687500 + }, + { + "epoch": 18.57, + "learning_rate": 7.85728037845866e-08, + "loss": 3.7944, + "step": 1688000 + }, + { + "epoch": 18.58, + "learning_rate": 7.855905165300622e-08, + "loss": 3.8053, + "step": 1688500 + }, + { + "epoch": 18.58, + "learning_rate": 7.854529952142582e-08, + "loss": 3.7992, + "step": 1689000 + }, + { + "epoch": 18.59, + "learning_rate": 7.853154738984542e-08, + "loss": 3.809, + "step": 1689500 + }, + { + "epoch": 18.59, + "learning_rate": 7.851779525826503e-08, + "loss": 3.8041, + "step": 1690000 + }, + { + "epoch": 18.6, + "learning_rate": 7.850404312668463e-08, + "loss": 3.8037, + "step": 1690500 + }, + { + "epoch": 18.6, + "learning_rate": 7.849029099510424e-08, + "loss": 3.8158, + "step": 1691000 + }, + { + "epoch": 18.61, + "learning_rate": 7.847653886352385e-08, + "loss": 3.8153, + "step": 1691500 + }, + { + "epoch": 18.61, + "learning_rate": 7.846278673194344e-08, + "loss": 3.8137, + "step": 1692000 + }, + { + "epoch": 18.62, + "learning_rate": 7.844903460036305e-08, + "loss": 3.8136, + "step": 1692500 + }, + { + "epoch": 18.63, + "learning_rate": 7.843528246878266e-08, + "loss": 3.8069, + "step": 1693000 + }, + { + "epoch": 18.63, + "learning_rate": 7.842153033720226e-08, + "loss": 3.8125, + "step": 1693500 + }, + { + "epoch": 18.64, + "learning_rate": 7.840777820562187e-08, + "loss": 3.8106, + "step": 1694000 + }, + { + "epoch": 18.64, + "learning_rate": 7.839402607404147e-08, + "loss": 3.8094, + "step": 1694500 + }, + { + "epoch": 18.65, + "learning_rate": 7.838027394246107e-08, + "loss": 3.8034, + "step": 1695000 + }, + { + "epoch": 18.65, + "learning_rate": 7.836652181088068e-08, + "loss": 3.8082, + "step": 1695500 + }, + { + "epoch": 18.66, + "learning_rate": 7.835276967930029e-08, + "loss": 3.7996, + "step": 1696000 + }, + { + "epoch": 18.66, + "learning_rate": 7.833901754771989e-08, + "loss": 3.7986, + "step": 1696500 + }, + { + "epoch": 18.67, + "learning_rate": 7.83252654161395e-08, + "loss": 3.8118, + "step": 1697000 + }, + { + "epoch": 18.68, + "learning_rate": 7.83115132845591e-08, + "loss": 3.8101, + "step": 1697500 + }, + { + "epoch": 18.68, + "learning_rate": 7.82977611529787e-08, + "loss": 3.822, + "step": 1698000 + }, + { + "epoch": 18.69, + "learning_rate": 7.828400902139831e-08, + "loss": 3.8212, + "step": 1698500 + }, + { + "epoch": 18.69, + "learning_rate": 7.827025688981792e-08, + "loss": 3.8175, + "step": 1699000 + }, + { + "epoch": 18.7, + "learning_rate": 7.825650475823752e-08, + "loss": 3.7896, + "step": 1699500 + }, + { + "epoch": 18.7, + "learning_rate": 7.824275262665712e-08, + "loss": 3.7976, + "step": 1700000 + }, + { + "epoch": 18.71, + "learning_rate": 7.822900049507673e-08, + "loss": 3.8041, + "step": 1700500 + }, + { + "epoch": 18.71, + "learning_rate": 7.821524836349633e-08, + "loss": 3.8006, + "step": 1701000 + }, + { + "epoch": 18.72, + "learning_rate": 7.820149623191594e-08, + "loss": 3.8091, + "step": 1701500 + }, + { + "epoch": 18.72, + "learning_rate": 7.818774410033555e-08, + "loss": 3.8033, + "step": 1702000 + }, + { + "epoch": 18.73, + "learning_rate": 7.817399196875514e-08, + "loss": 3.8165, + "step": 1702500 + }, + { + "epoch": 18.74, + "learning_rate": 7.816023983717475e-08, + "loss": 3.8281, + "step": 1703000 + }, + { + "epoch": 18.74, + "learning_rate": 7.814648770559436e-08, + "loss": 3.7862, + "step": 1703500 + }, + { + "epoch": 18.75, + "learning_rate": 7.813273557401396e-08, + "loss": 3.798, + "step": 1704000 + }, + { + "epoch": 18.75, + "learning_rate": 7.811898344243357e-08, + "loss": 3.7944, + "step": 1704500 + }, + { + "epoch": 18.76, + "learning_rate": 7.810523131085318e-08, + "loss": 3.7783, + "step": 1705000 + }, + { + "epoch": 18.76, + "learning_rate": 7.809147917927277e-08, + "loss": 3.8174, + "step": 1705500 + }, + { + "epoch": 18.77, + "learning_rate": 7.807772704769238e-08, + "loss": 3.8093, + "step": 1706000 + }, + { + "epoch": 18.77, + "learning_rate": 7.806397491611199e-08, + "loss": 3.812, + "step": 1706500 + }, + { + "epoch": 18.78, + "learning_rate": 7.805022278453159e-08, + "loss": 3.8109, + "step": 1707000 + }, + { + "epoch": 18.79, + "learning_rate": 7.80364706529512e-08, + "loss": 3.7995, + "step": 1707500 + }, + { + "epoch": 18.79, + "learning_rate": 7.802271852137081e-08, + "loss": 3.8057, + "step": 1708000 + }, + { + "epoch": 18.8, + "learning_rate": 7.80089663897904e-08, + "loss": 3.8048, + "step": 1708500 + }, + { + "epoch": 18.8, + "learning_rate": 7.799521425821001e-08, + "loss": 3.792, + "step": 1709000 + }, + { + "epoch": 18.81, + "learning_rate": 7.798146212662962e-08, + "loss": 3.8111, + "step": 1709500 + }, + { + "epoch": 18.81, + "learning_rate": 7.796770999504922e-08, + "loss": 3.8003, + "step": 1710000 + }, + { + "epoch": 18.82, + "learning_rate": 7.795395786346883e-08, + "loss": 3.8219, + "step": 1710500 + }, + { + "epoch": 18.82, + "learning_rate": 7.794020573188844e-08, + "loss": 3.7904, + "step": 1711000 + }, + { + "epoch": 18.83, + "learning_rate": 7.792645360030803e-08, + "loss": 3.8104, + "step": 1711500 + }, + { + "epoch": 18.83, + "learning_rate": 7.791270146872764e-08, + "loss": 3.7934, + "step": 1712000 + }, + { + "epoch": 18.84, + "learning_rate": 7.789894933714725e-08, + "loss": 3.7794, + "step": 1712500 + }, + { + "epoch": 18.85, + "learning_rate": 7.788519720556685e-08, + "loss": 3.8252, + "step": 1713000 + }, + { + "epoch": 18.85, + "learning_rate": 7.787144507398646e-08, + "loss": 3.7968, + "step": 1713500 + }, + { + "epoch": 18.86, + "learning_rate": 7.785769294240607e-08, + "loss": 3.8034, + "step": 1714000 + }, + { + "epoch": 18.86, + "learning_rate": 7.784394081082568e-08, + "loss": 3.7927, + "step": 1714500 + }, + { + "epoch": 18.87, + "learning_rate": 7.783018867924527e-08, + "loss": 3.7913, + "step": 1715000 + }, + { + "epoch": 18.87, + "learning_rate": 7.781643654766488e-08, + "loss": 3.8069, + "step": 1715500 + }, + { + "epoch": 18.88, + "learning_rate": 7.780268441608449e-08, + "loss": 3.8055, + "step": 1716000 + }, + { + "epoch": 18.88, + "learning_rate": 7.77889322845041e-08, + "loss": 3.7979, + "step": 1716500 + }, + { + "epoch": 18.89, + "learning_rate": 7.77751801529237e-08, + "loss": 3.8133, + "step": 1717000 + }, + { + "epoch": 18.9, + "learning_rate": 7.77614280213433e-08, + "loss": 3.8077, + "step": 1717500 + }, + { + "epoch": 18.9, + "learning_rate": 7.774767588976292e-08, + "loss": 3.8178, + "step": 1718000 + }, + { + "epoch": 18.91, + "learning_rate": 7.773392375818251e-08, + "loss": 3.8209, + "step": 1718500 + }, + { + "epoch": 18.91, + "learning_rate": 7.772017162660212e-08, + "loss": 3.8106, + "step": 1719000 + }, + { + "epoch": 18.92, + "learning_rate": 7.770641949502173e-08, + "loss": 3.8102, + "step": 1719500 + }, + { + "epoch": 18.92, + "learning_rate": 7.769266736344134e-08, + "loss": 3.8029, + "step": 1720000 + }, + { + "epoch": 18.93, + "learning_rate": 7.767891523186094e-08, + "loss": 3.7924, + "step": 1720500 + }, + { + "epoch": 18.93, + "learning_rate": 7.766516310028054e-08, + "loss": 3.8019, + "step": 1721000 + }, + { + "epoch": 18.94, + "learning_rate": 7.765141096870015e-08, + "loss": 3.7882, + "step": 1721500 + }, + { + "epoch": 18.94, + "learning_rate": 7.763765883711975e-08, + "loss": 3.813, + "step": 1722000 + }, + { + "epoch": 18.95, + "learning_rate": 7.762390670553936e-08, + "loss": 3.8002, + "step": 1722500 + }, + { + "epoch": 18.96, + "learning_rate": 7.761015457395897e-08, + "loss": 3.791, + "step": 1723000 + }, + { + "epoch": 18.96, + "learning_rate": 7.759640244237857e-08, + "loss": 3.8016, + "step": 1723500 + }, + { + "epoch": 18.97, + "learning_rate": 7.758265031079817e-08, + "loss": 3.7868, + "step": 1724000 + }, + { + "epoch": 18.97, + "learning_rate": 7.756889817921778e-08, + "loss": 3.786, + "step": 1724500 + }, + { + "epoch": 18.98, + "learning_rate": 7.755514604763738e-08, + "loss": 3.8076, + "step": 1725000 + }, + { + "epoch": 18.98, + "learning_rate": 7.754139391605699e-08, + "loss": 3.7932, + "step": 1725500 + }, + { + "epoch": 18.99, + "learning_rate": 7.75276417844766e-08, + "loss": 3.7891, + "step": 1726000 + }, + { + "epoch": 18.99, + "learning_rate": 7.75138896528962e-08, + "loss": 3.8037, + "step": 1726500 + }, + { + "epoch": 19.0, + "learning_rate": 7.75001375213158e-08, + "loss": 3.8056, + "step": 1727000 + }, + { + "epoch": 19.0, + "eval_loss": 3.857905387878418, + "eval_runtime": 6.1406, + "eval_samples_per_second": 253.07, + "step": 1727005 + }, + { + "epoch": 19.01, + "learning_rate": 7.748638538973541e-08, + "loss": 3.7989, + "step": 1727500 + }, + { + "epoch": 19.01, + "learning_rate": 7.747263325815501e-08, + "loss": 3.8012, + "step": 1728000 + }, + { + "epoch": 19.02, + "learning_rate": 7.745888112657462e-08, + "loss": 3.7847, + "step": 1728500 + }, + { + "epoch": 19.02, + "learning_rate": 7.744512899499423e-08, + "loss": 3.8137, + "step": 1729000 + }, + { + "epoch": 19.03, + "learning_rate": 7.743137686341382e-08, + "loss": 3.7922, + "step": 1729500 + }, + { + "epoch": 19.03, + "learning_rate": 7.741762473183343e-08, + "loss": 3.7999, + "step": 1730000 + }, + { + "epoch": 19.04, + "learning_rate": 7.740387260025304e-08, + "loss": 3.7989, + "step": 1730500 + }, + { + "epoch": 19.04, + "learning_rate": 7.739012046867264e-08, + "loss": 3.7866, + "step": 1731000 + }, + { + "epoch": 19.05, + "learning_rate": 7.737636833709225e-08, + "loss": 3.7825, + "step": 1731500 + }, + { + "epoch": 19.05, + "learning_rate": 7.736261620551186e-08, + "loss": 3.7951, + "step": 1732000 + }, + { + "epoch": 19.06, + "learning_rate": 7.734886407393145e-08, + "loss": 3.8076, + "step": 1732500 + }, + { + "epoch": 19.07, + "learning_rate": 7.733511194235106e-08, + "loss": 3.8111, + "step": 1733000 + }, + { + "epoch": 19.07, + "learning_rate": 7.732135981077067e-08, + "loss": 3.8063, + "step": 1733500 + }, + { + "epoch": 19.08, + "learning_rate": 7.730760767919027e-08, + "loss": 3.8003, + "step": 1734000 + }, + { + "epoch": 19.08, + "learning_rate": 7.729385554760988e-08, + "loss": 3.8058, + "step": 1734500 + }, + { + "epoch": 19.09, + "learning_rate": 7.728010341602949e-08, + "loss": 3.7894, + "step": 1735000 + }, + { + "epoch": 19.09, + "learning_rate": 7.726635128444908e-08, + "loss": 3.8014, + "step": 1735500 + }, + { + "epoch": 19.1, + "learning_rate": 7.725259915286869e-08, + "loss": 3.7958, + "step": 1736000 + }, + { + "epoch": 19.1, + "learning_rate": 7.72388470212883e-08, + "loss": 3.7969, + "step": 1736500 + }, + { + "epoch": 19.11, + "learning_rate": 7.72250948897079e-08, + "loss": 3.7957, + "step": 1737000 + }, + { + "epoch": 19.12, + "learning_rate": 7.721134275812751e-08, + "loss": 3.7896, + "step": 1737500 + }, + { + "epoch": 19.12, + "learning_rate": 7.719759062654712e-08, + "loss": 3.802, + "step": 1738000 + }, + { + "epoch": 19.13, + "learning_rate": 7.718383849496671e-08, + "loss": 3.8054, + "step": 1738500 + }, + { + "epoch": 19.13, + "learning_rate": 7.717008636338632e-08, + "loss": 3.817, + "step": 1739000 + }, + { + "epoch": 19.14, + "learning_rate": 7.715633423180593e-08, + "loss": 3.7997, + "step": 1739500 + }, + { + "epoch": 19.14, + "learning_rate": 7.714258210022553e-08, + "loss": 3.805, + "step": 1740000 + }, + { + "epoch": 19.15, + "learning_rate": 7.712882996864514e-08, + "loss": 3.7882, + "step": 1740500 + }, + { + "epoch": 19.15, + "learning_rate": 7.711507783706475e-08, + "loss": 3.7908, + "step": 1741000 + }, + { + "epoch": 19.16, + "learning_rate": 7.710132570548434e-08, + "loss": 3.8155, + "step": 1741500 + }, + { + "epoch": 19.16, + "learning_rate": 7.708757357390395e-08, + "loss": 3.7997, + "step": 1742000 + }, + { + "epoch": 19.17, + "learning_rate": 7.707382144232356e-08, + "loss": 3.7989, + "step": 1742500 + }, + { + "epoch": 19.18, + "learning_rate": 7.706006931074316e-08, + "loss": 3.8028, + "step": 1743000 + }, + { + "epoch": 19.18, + "learning_rate": 7.704631717916277e-08, + "loss": 3.7905, + "step": 1743500 + }, + { + "epoch": 19.19, + "learning_rate": 7.703256504758238e-08, + "loss": 3.79, + "step": 1744000 + }, + { + "epoch": 19.19, + "learning_rate": 7.701881291600197e-08, + "loss": 3.7958, + "step": 1744500 + }, + { + "epoch": 19.2, + "learning_rate": 7.700506078442158e-08, + "loss": 3.7968, + "step": 1745000 + }, + { + "epoch": 19.2, + "learning_rate": 7.699130865284119e-08, + "loss": 3.8083, + "step": 1745500 + }, + { + "epoch": 19.21, + "learning_rate": 7.697755652126079e-08, + "loss": 3.8037, + "step": 1746000 + }, + { + "epoch": 19.21, + "learning_rate": 7.69638043896804e-08, + "loss": 3.7927, + "step": 1746500 + }, + { + "epoch": 19.22, + "learning_rate": 7.69500522581e-08, + "loss": 3.8105, + "step": 1747000 + }, + { + "epoch": 19.23, + "learning_rate": 7.69363001265196e-08, + "loss": 3.8142, + "step": 1747500 + }, + { + "epoch": 19.23, + "learning_rate": 7.692254799493921e-08, + "loss": 3.7748, + "step": 1748000 + }, + { + "epoch": 19.24, + "learning_rate": 7.690879586335882e-08, + "loss": 3.8161, + "step": 1748500 + }, + { + "epoch": 19.24, + "learning_rate": 7.689504373177842e-08, + "loss": 3.8144, + "step": 1749000 + }, + { + "epoch": 19.25, + "learning_rate": 7.688129160019803e-08, + "loss": 3.7775, + "step": 1749500 + }, + { + "epoch": 19.25, + "learning_rate": 7.686753946861764e-08, + "loss": 3.8036, + "step": 1750000 + }, + { + "epoch": 19.26, + "learning_rate": 7.685378733703723e-08, + "loss": 3.8006, + "step": 1750500 + }, + { + "epoch": 19.26, + "learning_rate": 7.684003520545684e-08, + "loss": 3.7953, + "step": 1751000 + }, + { + "epoch": 19.27, + "learning_rate": 7.682628307387645e-08, + "loss": 3.8102, + "step": 1751500 + }, + { + "epoch": 19.27, + "learning_rate": 7.681253094229605e-08, + "loss": 3.7986, + "step": 1752000 + }, + { + "epoch": 19.28, + "learning_rate": 7.679877881071566e-08, + "loss": 3.7997, + "step": 1752500 + }, + { + "epoch": 19.29, + "learning_rate": 7.678502667913526e-08, + "loss": 3.8148, + "step": 1753000 + }, + { + "epoch": 19.29, + "learning_rate": 7.677127454755486e-08, + "loss": 3.7945, + "step": 1753500 + }, + { + "epoch": 19.3, + "learning_rate": 7.675752241597447e-08, + "loss": 3.7945, + "step": 1754000 + }, + { + "epoch": 19.3, + "learning_rate": 7.674377028439408e-08, + "loss": 3.7781, + "step": 1754500 + }, + { + "epoch": 19.31, + "learning_rate": 7.673001815281368e-08, + "loss": 3.7944, + "step": 1755000 + }, + { + "epoch": 19.31, + "learning_rate": 7.671626602123329e-08, + "loss": 3.8031, + "step": 1755500 + }, + { + "epoch": 19.32, + "learning_rate": 7.67025138896529e-08, + "loss": 3.7659, + "step": 1756000 + }, + { + "epoch": 19.32, + "learning_rate": 7.668876175807249e-08, + "loss": 3.8018, + "step": 1756500 + }, + { + "epoch": 19.33, + "learning_rate": 7.66750096264921e-08, + "loss": 3.8116, + "step": 1757000 + }, + { + "epoch": 19.34, + "learning_rate": 7.666125749491171e-08, + "loss": 3.7869, + "step": 1757500 + }, + { + "epoch": 19.34, + "learning_rate": 7.66475053633313e-08, + "loss": 3.8056, + "step": 1758000 + }, + { + "epoch": 19.35, + "learning_rate": 7.663375323175091e-08, + "loss": 3.8048, + "step": 1758500 + }, + { + "epoch": 19.35, + "learning_rate": 7.662000110017052e-08, + "loss": 3.8022, + "step": 1759000 + }, + { + "epoch": 19.36, + "learning_rate": 7.660624896859012e-08, + "loss": 3.8281, + "step": 1759500 + }, + { + "epoch": 19.36, + "learning_rate": 7.659249683700973e-08, + "loss": 3.8001, + "step": 1760000 + }, + { + "epoch": 19.37, + "learning_rate": 7.657874470542934e-08, + "loss": 3.8216, + "step": 1760500 + }, + { + "epoch": 19.37, + "learning_rate": 7.656499257384894e-08, + "loss": 3.7881, + "step": 1761000 + }, + { + "epoch": 19.38, + "learning_rate": 7.655124044226854e-08, + "loss": 3.7993, + "step": 1761500 + }, + { + "epoch": 19.39, + "learning_rate": 7.653748831068815e-08, + "loss": 3.8056, + "step": 1762000 + }, + { + "epoch": 19.39, + "learning_rate": 7.652373617910775e-08, + "loss": 3.8083, + "step": 1762500 + }, + { + "epoch": 19.4, + "learning_rate": 7.650998404752736e-08, + "loss": 3.7851, + "step": 1763000 + }, + { + "epoch": 19.4, + "learning_rate": 7.649623191594697e-08, + "loss": 3.783, + "step": 1763500 + }, + { + "epoch": 19.41, + "learning_rate": 7.648247978436658e-08, + "loss": 3.8046, + "step": 1764000 + }, + { + "epoch": 19.41, + "learning_rate": 7.646872765278617e-08, + "loss": 3.8168, + "step": 1764500 + }, + { + "epoch": 19.42, + "learning_rate": 7.645497552120578e-08, + "loss": 3.8018, + "step": 1765000 + }, + { + "epoch": 19.42, + "learning_rate": 7.644122338962539e-08, + "loss": 3.8224, + "step": 1765500 + }, + { + "epoch": 19.43, + "learning_rate": 7.642747125804499e-08, + "loss": 3.8096, + "step": 1766000 + }, + { + "epoch": 19.43, + "learning_rate": 7.64137191264646e-08, + "loss": 3.8257, + "step": 1766500 + }, + { + "epoch": 19.44, + "learning_rate": 7.639996699488421e-08, + "loss": 3.8101, + "step": 1767000 + }, + { + "epoch": 19.45, + "learning_rate": 7.638621486330382e-08, + "loss": 3.7982, + "step": 1767500 + }, + { + "epoch": 19.45, + "learning_rate": 7.637246273172341e-08, + "loss": 3.8054, + "step": 1768000 + }, + { + "epoch": 19.46, + "learning_rate": 7.635871060014302e-08, + "loss": 3.8039, + "step": 1768500 + }, + { + "epoch": 19.46, + "learning_rate": 7.634495846856263e-08, + "loss": 3.8004, + "step": 1769000 + }, + { + "epoch": 19.47, + "learning_rate": 7.633120633698223e-08, + "loss": 3.8, + "step": 1769500 + }, + { + "epoch": 19.47, + "learning_rate": 7.631745420540184e-08, + "loss": 3.7962, + "step": 1770000 + }, + { + "epoch": 19.48, + "learning_rate": 7.630370207382145e-08, + "loss": 3.781, + "step": 1770500 + }, + { + "epoch": 19.48, + "learning_rate": 7.628994994224104e-08, + "loss": 3.8127, + "step": 1771000 + }, + { + "epoch": 19.49, + "learning_rate": 7.627619781066065e-08, + "loss": 3.8074, + "step": 1771500 + }, + { + "epoch": 19.5, + "learning_rate": 7.626244567908026e-08, + "loss": 3.8072, + "step": 1772000 + }, + { + "epoch": 19.5, + "learning_rate": 7.624869354749986e-08, + "loss": 3.809, + "step": 1772500 + }, + { + "epoch": 19.51, + "learning_rate": 7.623494141591947e-08, + "loss": 3.8032, + "step": 1773000 + }, + { + "epoch": 19.51, + "learning_rate": 7.622118928433908e-08, + "loss": 3.8006, + "step": 1773500 + }, + { + "epoch": 19.52, + "learning_rate": 7.620743715275867e-08, + "loss": 3.7921, + "step": 1774000 + }, + { + "epoch": 19.52, + "learning_rate": 7.619368502117828e-08, + "loss": 3.8094, + "step": 1774500 + }, + { + "epoch": 19.53, + "learning_rate": 7.617993288959789e-08, + "loss": 3.8035, + "step": 1775000 + }, + { + "epoch": 19.53, + "learning_rate": 7.616618075801749e-08, + "loss": 3.8032, + "step": 1775500 + }, + { + "epoch": 19.54, + "learning_rate": 7.61524286264371e-08, + "loss": 3.8109, + "step": 1776000 + }, + { + "epoch": 19.54, + "learning_rate": 7.61386764948567e-08, + "loss": 3.8038, + "step": 1776500 + }, + { + "epoch": 19.55, + "learning_rate": 7.61249243632763e-08, + "loss": 3.809, + "step": 1777000 + }, + { + "epoch": 19.56, + "learning_rate": 7.611117223169591e-08, + "loss": 3.7974, + "step": 1777500 + }, + { + "epoch": 19.56, + "learning_rate": 7.609742010011552e-08, + "loss": 3.7956, + "step": 1778000 + }, + { + "epoch": 19.57, + "learning_rate": 7.608366796853512e-08, + "loss": 3.8194, + "step": 1778500 + }, + { + "epoch": 19.57, + "learning_rate": 7.606991583695473e-08, + "loss": 3.7878, + "step": 1779000 + }, + { + "epoch": 19.58, + "learning_rate": 7.605616370537434e-08, + "loss": 3.8054, + "step": 1779500 + }, + { + "epoch": 19.58, + "learning_rate": 7.604241157379393e-08, + "loss": 3.8027, + "step": 1780000 + }, + { + "epoch": 19.59, + "learning_rate": 7.602865944221354e-08, + "loss": 3.793, + "step": 1780500 + }, + { + "epoch": 19.59, + "learning_rate": 7.601490731063315e-08, + "loss": 3.8004, + "step": 1781000 + }, + { + "epoch": 19.6, + "learning_rate": 7.600115517905275e-08, + "loss": 3.7821, + "step": 1781500 + }, + { + "epoch": 19.61, + "learning_rate": 7.598740304747236e-08, + "loss": 3.7983, + "step": 1782000 + }, + { + "epoch": 19.61, + "learning_rate": 7.597365091589196e-08, + "loss": 3.8056, + "step": 1782500 + }, + { + "epoch": 19.62, + "learning_rate": 7.595989878431156e-08, + "loss": 3.792, + "step": 1783000 + }, + { + "epoch": 19.62, + "learning_rate": 7.594614665273117e-08, + "loss": 3.8047, + "step": 1783500 + }, + { + "epoch": 19.63, + "learning_rate": 7.593239452115078e-08, + "loss": 3.8029, + "step": 1784000 + }, + { + "epoch": 19.63, + "learning_rate": 7.591864238957038e-08, + "loss": 3.8079, + "step": 1784500 + }, + { + "epoch": 19.64, + "learning_rate": 7.590489025798999e-08, + "loss": 3.7968, + "step": 1785000 + }, + { + "epoch": 19.64, + "learning_rate": 7.58911381264096e-08, + "loss": 3.8104, + "step": 1785500 + }, + { + "epoch": 19.65, + "learning_rate": 7.587738599482919e-08, + "loss": 3.7925, + "step": 1786000 + }, + { + "epoch": 19.65, + "learning_rate": 7.58636338632488e-08, + "loss": 3.785, + "step": 1786500 + }, + { + "epoch": 19.66, + "learning_rate": 7.584988173166841e-08, + "loss": 3.7858, + "step": 1787000 + }, + { + "epoch": 19.67, + "learning_rate": 7.5836129600088e-08, + "loss": 3.8069, + "step": 1787500 + }, + { + "epoch": 19.67, + "learning_rate": 7.582237746850761e-08, + "loss": 3.782, + "step": 1788000 + }, + { + "epoch": 19.68, + "learning_rate": 7.580862533692722e-08, + "loss": 3.8182, + "step": 1788500 + }, + { + "epoch": 19.68, + "learning_rate": 7.579487320534682e-08, + "loss": 3.7936, + "step": 1789000 + }, + { + "epoch": 19.69, + "learning_rate": 7.578112107376643e-08, + "loss": 3.8008, + "step": 1789500 + }, + { + "epoch": 19.69, + "learning_rate": 7.576736894218604e-08, + "loss": 3.7869, + "step": 1790000 + }, + { + "epoch": 19.7, + "learning_rate": 7.575361681060563e-08, + "loss": 3.7896, + "step": 1790500 + }, + { + "epoch": 19.7, + "learning_rate": 7.573986467902524e-08, + "loss": 3.8021, + "step": 1791000 + }, + { + "epoch": 19.71, + "learning_rate": 7.572611254744485e-08, + "loss": 3.8002, + "step": 1791500 + }, + { + "epoch": 19.72, + "learning_rate": 7.571236041586445e-08, + "loss": 3.7978, + "step": 1792000 + }, + { + "epoch": 19.72, + "learning_rate": 7.569860828428406e-08, + "loss": 3.7956, + "step": 1792500 + }, + { + "epoch": 19.73, + "learning_rate": 7.568485615270367e-08, + "loss": 3.8255, + "step": 1793000 + }, + { + "epoch": 19.73, + "learning_rate": 7.567110402112326e-08, + "loss": 3.8056, + "step": 1793500 + }, + { + "epoch": 19.74, + "learning_rate": 7.565735188954287e-08, + "loss": 3.7751, + "step": 1794000 + }, + { + "epoch": 19.74, + "learning_rate": 7.564359975796248e-08, + "loss": 3.8257, + "step": 1794500 + }, + { + "epoch": 19.75, + "learning_rate": 7.562984762638208e-08, + "loss": 3.8093, + "step": 1795000 + }, + { + "epoch": 19.75, + "learning_rate": 7.561609549480169e-08, + "loss": 3.7838, + "step": 1795500 + }, + { + "epoch": 19.76, + "learning_rate": 7.56023433632213e-08, + "loss": 3.787, + "step": 1796000 + }, + { + "epoch": 19.76, + "learning_rate": 7.55885912316409e-08, + "loss": 3.8141, + "step": 1796500 + }, + { + "epoch": 19.77, + "learning_rate": 7.55748391000605e-08, + "loss": 3.7984, + "step": 1797000 + }, + { + "epoch": 19.78, + "learning_rate": 7.556108696848011e-08, + "loss": 3.7842, + "step": 1797500 + }, + { + "epoch": 19.78, + "learning_rate": 7.554733483689971e-08, + "loss": 3.7862, + "step": 1798000 + }, + { + "epoch": 19.79, + "learning_rate": 7.553358270531932e-08, + "loss": 3.8035, + "step": 1798500 + }, + { + "epoch": 19.79, + "learning_rate": 7.551983057373893e-08, + "loss": 3.8226, + "step": 1799000 + }, + { + "epoch": 19.8, + "learning_rate": 7.550607844215852e-08, + "loss": 3.7921, + "step": 1799500 + }, + { + "epoch": 19.8, + "learning_rate": 7.549232631057813e-08, + "loss": 3.8111, + "step": 1800000 + }, + { + "epoch": 19.81, + "learning_rate": 7.547857417899774e-08, + "loss": 3.8098, + "step": 1800500 + }, + { + "epoch": 19.81, + "learning_rate": 7.546482204741734e-08, + "loss": 3.8149, + "step": 1801000 + }, + { + "epoch": 19.82, + "learning_rate": 7.545106991583695e-08, + "loss": 3.8072, + "step": 1801500 + }, + { + "epoch": 19.83, + "learning_rate": 7.543731778425656e-08, + "loss": 3.8003, + "step": 1802000 + }, + { + "epoch": 19.83, + "learning_rate": 7.542356565267615e-08, + "loss": 3.8052, + "step": 1802500 + }, + { + "epoch": 19.84, + "learning_rate": 7.540981352109576e-08, + "loss": 3.8257, + "step": 1803000 + }, + { + "epoch": 19.84, + "learning_rate": 7.539606138951537e-08, + "loss": 3.8208, + "step": 1803500 + }, + { + "epoch": 19.85, + "learning_rate": 7.538230925793497e-08, + "loss": 3.8083, + "step": 1804000 + }, + { + "epoch": 19.85, + "learning_rate": 7.536855712635458e-08, + "loss": 3.7994, + "step": 1804500 + }, + { + "epoch": 19.86, + "learning_rate": 7.535480499477419e-08, + "loss": 3.7928, + "step": 1805000 + }, + { + "epoch": 19.86, + "learning_rate": 7.534105286319378e-08, + "loss": 3.8015, + "step": 1805500 + }, + { + "epoch": 19.87, + "learning_rate": 7.532730073161339e-08, + "loss": 3.7857, + "step": 1806000 + }, + { + "epoch": 19.87, + "learning_rate": 7.5313548600033e-08, + "loss": 3.7995, + "step": 1806500 + }, + { + "epoch": 19.88, + "learning_rate": 7.52997964684526e-08, + "loss": 3.7964, + "step": 1807000 + }, + { + "epoch": 19.89, + "learning_rate": 7.528604433687221e-08, + "loss": 3.8072, + "step": 1807500 + }, + { + "epoch": 19.89, + "learning_rate": 7.527229220529182e-08, + "loss": 3.8056, + "step": 1808000 + }, + { + "epoch": 19.9, + "learning_rate": 7.525854007371141e-08, + "loss": 3.8157, + "step": 1808500 + }, + { + "epoch": 19.9, + "learning_rate": 7.524478794213102e-08, + "loss": 3.8169, + "step": 1809000 + }, + { + "epoch": 19.91, + "learning_rate": 7.523103581055063e-08, + "loss": 3.7919, + "step": 1809500 + }, + { + "epoch": 19.91, + "learning_rate": 7.521728367897023e-08, + "loss": 3.8037, + "step": 1810000 + }, + { + "epoch": 19.92, + "learning_rate": 7.520353154738984e-08, + "loss": 3.7931, + "step": 1810500 + }, + { + "epoch": 19.92, + "learning_rate": 7.518977941580945e-08, + "loss": 3.8172, + "step": 1811000 + }, + { + "epoch": 19.93, + "learning_rate": 7.517602728422906e-08, + "loss": 3.7923, + "step": 1811500 + }, + { + "epoch": 19.94, + "learning_rate": 7.516227515264865e-08, + "loss": 3.789, + "step": 1812000 + }, + { + "epoch": 19.94, + "learning_rate": 7.514852302106826e-08, + "loss": 3.7948, + "step": 1812500 + }, + { + "epoch": 19.95, + "learning_rate": 7.513477088948787e-08, + "loss": 3.8161, + "step": 1813000 + }, + { + "epoch": 19.95, + "learning_rate": 7.512101875790748e-08, + "loss": 3.7805, + "step": 1813500 + }, + { + "epoch": 19.96, + "learning_rate": 7.510726662632708e-08, + "loss": 3.8168, + "step": 1814000 + }, + { + "epoch": 19.96, + "learning_rate": 7.509351449474668e-08, + "loss": 3.8, + "step": 1814500 + }, + { + "epoch": 19.97, + "learning_rate": 7.50797623631663e-08, + "loss": 3.7967, + "step": 1815000 + }, + { + "epoch": 19.97, + "learning_rate": 7.506601023158589e-08, + "loss": 3.7892, + "step": 1815500 + }, + { + "epoch": 19.98, + "learning_rate": 7.50522581000055e-08, + "loss": 3.8025, + "step": 1816000 + }, + { + "epoch": 19.98, + "learning_rate": 7.503850596842511e-08, + "loss": 3.7884, + "step": 1816500 + }, + { + "epoch": 19.99, + "learning_rate": 7.502475383684472e-08, + "loss": 3.7893, + "step": 1817000 + }, + { + "epoch": 20.0, + "learning_rate": 7.501100170526431e-08, + "loss": 3.8053, + "step": 1817500 + }, + { + "epoch": 20.0, + "eval_loss": 3.8551576137542725, + "eval_runtime": 6.137, + "eval_samples_per_second": 253.22, + "step": 1817900 + }, + { + "epoch": 20.0, + "learning_rate": 7.499724957368392e-08, + "loss": 3.8178, + "step": 1818000 + }, + { + "epoch": 20.01, + "learning_rate": 7.498349744210353e-08, + "loss": 3.7973, + "step": 1818500 + }, + { + "epoch": 20.01, + "learning_rate": 7.496974531052313e-08, + "loss": 3.8044, + "step": 1819000 + }, + { + "epoch": 20.02, + "learning_rate": 7.495599317894274e-08, + "loss": 3.8117, + "step": 1819500 + }, + { + "epoch": 20.02, + "learning_rate": 7.494224104736235e-08, + "loss": 3.8076, + "step": 1820000 + }, + { + "epoch": 20.03, + "learning_rate": 7.492848891578194e-08, + "loss": 3.7841, + "step": 1820500 + }, + { + "epoch": 20.03, + "learning_rate": 7.491473678420155e-08, + "loss": 3.7897, + "step": 1821000 + }, + { + "epoch": 20.04, + "learning_rate": 7.490098465262116e-08, + "loss": 3.7963, + "step": 1821500 + }, + { + "epoch": 20.05, + "learning_rate": 7.488723252104076e-08, + "loss": 3.799, + "step": 1822000 + }, + { + "epoch": 20.05, + "learning_rate": 7.487348038946037e-08, + "loss": 3.8141, + "step": 1822500 + }, + { + "epoch": 20.06, + "learning_rate": 7.485972825787998e-08, + "loss": 3.7983, + "step": 1823000 + }, + { + "epoch": 20.06, + "learning_rate": 7.484597612629957e-08, + "loss": 3.7868, + "step": 1823500 + }, + { + "epoch": 20.07, + "learning_rate": 7.483222399471918e-08, + "loss": 3.8005, + "step": 1824000 + }, + { + "epoch": 20.07, + "learning_rate": 7.481847186313879e-08, + "loss": 3.7943, + "step": 1824500 + }, + { + "epoch": 20.08, + "learning_rate": 7.480471973155839e-08, + "loss": 3.8048, + "step": 1825000 + }, + { + "epoch": 20.08, + "learning_rate": 7.4790967599978e-08, + "loss": 3.774, + "step": 1825500 + }, + { + "epoch": 20.09, + "learning_rate": 7.477721546839761e-08, + "loss": 3.7974, + "step": 1826000 + }, + { + "epoch": 20.09, + "learning_rate": 7.47634633368172e-08, + "loss": 3.8042, + "step": 1826500 + }, + { + "epoch": 20.1, + "learning_rate": 7.474971120523681e-08, + "loss": 3.7929, + "step": 1827000 + }, + { + "epoch": 20.11, + "learning_rate": 7.473595907365642e-08, + "loss": 3.7979, + "step": 1827500 + }, + { + "epoch": 20.11, + "learning_rate": 7.472220694207602e-08, + "loss": 3.7783, + "step": 1828000 + }, + { + "epoch": 20.12, + "learning_rate": 7.470845481049563e-08, + "loss": 3.8265, + "step": 1828500 + }, + { + "epoch": 20.12, + "learning_rate": 7.469470267891524e-08, + "loss": 3.7892, + "step": 1829000 + }, + { + "epoch": 20.13, + "learning_rate": 7.468095054733483e-08, + "loss": 3.8005, + "step": 1829500 + }, + { + "epoch": 20.13, + "learning_rate": 7.466719841575444e-08, + "loss": 3.8023, + "step": 1830000 + }, + { + "epoch": 20.14, + "learning_rate": 7.465344628417405e-08, + "loss": 3.8144, + "step": 1830500 + }, + { + "epoch": 20.14, + "learning_rate": 7.463969415259365e-08, + "loss": 3.7976, + "step": 1831000 + }, + { + "epoch": 20.15, + "learning_rate": 7.462594202101326e-08, + "loss": 3.8036, + "step": 1831500 + }, + { + "epoch": 20.16, + "learning_rate": 7.461218988943287e-08, + "loss": 3.794, + "step": 1832000 + }, + { + "epoch": 20.16, + "learning_rate": 7.459843775785246e-08, + "loss": 3.8083, + "step": 1832500 + }, + { + "epoch": 20.17, + "learning_rate": 7.458468562627207e-08, + "loss": 3.7875, + "step": 1833000 + }, + { + "epoch": 20.17, + "learning_rate": 7.457093349469168e-08, + "loss": 3.8107, + "step": 1833500 + }, + { + "epoch": 20.18, + "learning_rate": 7.455718136311128e-08, + "loss": 3.7959, + "step": 1834000 + }, + { + "epoch": 20.18, + "learning_rate": 7.454342923153089e-08, + "loss": 3.7881, + "step": 1834500 + }, + { + "epoch": 20.19, + "learning_rate": 7.45296770999505e-08, + "loss": 3.7743, + "step": 1835000 + }, + { + "epoch": 20.19, + "learning_rate": 7.451592496837009e-08, + "loss": 3.7963, + "step": 1835500 + }, + { + "epoch": 20.2, + "learning_rate": 7.45021728367897e-08, + "loss": 3.809, + "step": 1836000 + }, + { + "epoch": 20.2, + "learning_rate": 7.448842070520931e-08, + "loss": 3.7948, + "step": 1836500 + }, + { + "epoch": 20.21, + "learning_rate": 7.447466857362891e-08, + "loss": 3.8011, + "step": 1837000 + }, + { + "epoch": 20.22, + "learning_rate": 7.446091644204852e-08, + "loss": 3.7751, + "step": 1837500 + }, + { + "epoch": 20.22, + "learning_rate": 7.444716431046813e-08, + "loss": 3.8016, + "step": 1838000 + }, + { + "epoch": 20.23, + "learning_rate": 7.443341217888772e-08, + "loss": 3.8048, + "step": 1838500 + }, + { + "epoch": 20.23, + "learning_rate": 7.441966004730733e-08, + "loss": 3.7817, + "step": 1839000 + }, + { + "epoch": 20.24, + "learning_rate": 7.440590791572694e-08, + "loss": 3.7809, + "step": 1839500 + }, + { + "epoch": 20.24, + "learning_rate": 7.439215578414654e-08, + "loss": 3.8087, + "step": 1840000 + }, + { + "epoch": 20.25, + "learning_rate": 7.437840365256615e-08, + "loss": 3.7912, + "step": 1840500 + }, + { + "epoch": 20.25, + "learning_rate": 7.436465152098574e-08, + "loss": 3.8013, + "step": 1841000 + }, + { + "epoch": 20.26, + "learning_rate": 7.435089938940535e-08, + "loss": 3.8082, + "step": 1841500 + }, + { + "epoch": 20.27, + "learning_rate": 7.433714725782496e-08, + "loss": 3.7888, + "step": 1842000 + }, + { + "epoch": 20.27, + "learning_rate": 7.432339512624456e-08, + "loss": 3.8055, + "step": 1842500 + }, + { + "epoch": 20.28, + "learning_rate": 7.430964299466417e-08, + "loss": 3.8234, + "step": 1843000 + }, + { + "epoch": 20.28, + "learning_rate": 7.429589086308378e-08, + "loss": 3.8088, + "step": 1843500 + }, + { + "epoch": 20.29, + "learning_rate": 7.428213873150337e-08, + "loss": 3.8143, + "step": 1844000 + }, + { + "epoch": 20.29, + "learning_rate": 7.426838659992298e-08, + "loss": 3.8115, + "step": 1844500 + }, + { + "epoch": 20.3, + "learning_rate": 7.425463446834259e-08, + "loss": 3.7793, + "step": 1845000 + }, + { + "epoch": 20.3, + "learning_rate": 7.424088233676219e-08, + "loss": 3.8052, + "step": 1845500 + }, + { + "epoch": 20.31, + "learning_rate": 7.42271302051818e-08, + "loss": 3.7956, + "step": 1846000 + }, + { + "epoch": 20.31, + "learning_rate": 7.42133780736014e-08, + "loss": 3.7983, + "step": 1846500 + }, + { + "epoch": 20.32, + "learning_rate": 7.4199625942021e-08, + "loss": 3.8051, + "step": 1847000 + }, + { + "epoch": 20.33, + "learning_rate": 7.418587381044061e-08, + "loss": 3.8185, + "step": 1847500 + }, + { + "epoch": 20.33, + "learning_rate": 7.417212167886022e-08, + "loss": 3.7892, + "step": 1848000 + }, + { + "epoch": 20.34, + "learning_rate": 7.415836954727982e-08, + "loss": 3.7841, + "step": 1848500 + }, + { + "epoch": 20.34, + "learning_rate": 7.414461741569943e-08, + "loss": 3.8014, + "step": 1849000 + }, + { + "epoch": 20.35, + "learning_rate": 7.413086528411903e-08, + "loss": 3.7852, + "step": 1849500 + }, + { + "epoch": 20.35, + "learning_rate": 7.411711315253863e-08, + "loss": 3.8016, + "step": 1850000 + }, + { + "epoch": 20.36, + "learning_rate": 7.410336102095824e-08, + "loss": 3.776, + "step": 1850500 + }, + { + "epoch": 20.36, + "learning_rate": 7.408960888937785e-08, + "loss": 3.7866, + "step": 1851000 + }, + { + "epoch": 20.37, + "learning_rate": 7.407585675779745e-08, + "loss": 3.8026, + "step": 1851500 + }, + { + "epoch": 20.38, + "learning_rate": 7.406210462621705e-08, + "loss": 3.788, + "step": 1852000 + }, + { + "epoch": 20.38, + "learning_rate": 7.404835249463666e-08, + "loss": 3.8102, + "step": 1852500 + }, + { + "epoch": 20.39, + "learning_rate": 7.403460036305626e-08, + "loss": 3.8028, + "step": 1853000 + }, + { + "epoch": 20.39, + "learning_rate": 7.402084823147587e-08, + "loss": 3.8203, + "step": 1853500 + }, + { + "epoch": 20.4, + "learning_rate": 7.400709609989548e-08, + "loss": 3.7897, + "step": 1854000 + }, + { + "epoch": 20.4, + "learning_rate": 7.399334396831507e-08, + "loss": 3.8082, + "step": 1854500 + }, + { + "epoch": 20.41, + "learning_rate": 7.397959183673468e-08, + "loss": 3.8066, + "step": 1855000 + }, + { + "epoch": 20.41, + "learning_rate": 7.39658397051543e-08, + "loss": 3.797, + "step": 1855500 + }, + { + "epoch": 20.42, + "learning_rate": 7.395208757357389e-08, + "loss": 3.7882, + "step": 1856000 + }, + { + "epoch": 20.42, + "learning_rate": 7.39383354419935e-08, + "loss": 3.7947, + "step": 1856500 + }, + { + "epoch": 20.43, + "learning_rate": 7.392458331041311e-08, + "loss": 3.8023, + "step": 1857000 + }, + { + "epoch": 20.44, + "learning_rate": 7.39108311788327e-08, + "loss": 3.7946, + "step": 1857500 + }, + { + "epoch": 20.44, + "learning_rate": 7.389707904725231e-08, + "loss": 3.7923, + "step": 1858000 + }, + { + "epoch": 20.45, + "learning_rate": 7.388332691567192e-08, + "loss": 3.7877, + "step": 1858500 + }, + { + "epoch": 20.45, + "learning_rate": 7.386957478409153e-08, + "loss": 3.7872, + "step": 1859000 + }, + { + "epoch": 20.46, + "learning_rate": 7.385582265251113e-08, + "loss": 3.8052, + "step": 1859500 + }, + { + "epoch": 20.46, + "learning_rate": 7.384207052093074e-08, + "loss": 3.8038, + "step": 1860000 + }, + { + "epoch": 20.47, + "learning_rate": 7.382831838935035e-08, + "loss": 3.8155, + "step": 1860500 + }, + { + "epoch": 20.47, + "learning_rate": 7.381456625776996e-08, + "loss": 3.8019, + "step": 1861000 + }, + { + "epoch": 20.48, + "learning_rate": 7.380081412618955e-08, + "loss": 3.7902, + "step": 1861500 + }, + { + "epoch": 20.49, + "learning_rate": 7.378706199460916e-08, + "loss": 3.7872, + "step": 1862000 + }, + { + "epoch": 20.49, + "learning_rate": 7.377330986302877e-08, + "loss": 3.8149, + "step": 1862500 + }, + { + "epoch": 20.5, + "learning_rate": 7.375955773144837e-08, + "loss": 3.791, + "step": 1863000 + }, + { + "epoch": 20.5, + "learning_rate": 7.374580559986798e-08, + "loss": 3.8104, + "step": 1863500 + }, + { + "epoch": 20.51, + "learning_rate": 7.373205346828759e-08, + "loss": 3.7963, + "step": 1864000 + }, + { + "epoch": 20.51, + "learning_rate": 7.37183013367072e-08, + "loss": 3.7954, + "step": 1864500 + }, + { + "epoch": 20.52, + "learning_rate": 7.370454920512679e-08, + "loss": 3.7881, + "step": 1865000 + }, + { + "epoch": 20.52, + "learning_rate": 7.36907970735464e-08, + "loss": 3.7903, + "step": 1865500 + }, + { + "epoch": 20.53, + "learning_rate": 7.367704494196601e-08, + "loss": 3.7818, + "step": 1866000 + }, + { + "epoch": 20.53, + "learning_rate": 7.36632928103856e-08, + "loss": 3.8289, + "step": 1866500 + }, + { + "epoch": 20.54, + "learning_rate": 7.364954067880522e-08, + "loss": 3.7697, + "step": 1867000 + }, + { + "epoch": 20.55, + "learning_rate": 7.363578854722483e-08, + "loss": 3.7891, + "step": 1867500 + }, + { + "epoch": 20.55, + "learning_rate": 7.362203641564442e-08, + "loss": 3.7982, + "step": 1868000 + }, + { + "epoch": 20.56, + "learning_rate": 7.360828428406403e-08, + "loss": 3.793, + "step": 1868500 + }, + { + "epoch": 20.56, + "learning_rate": 7.359453215248364e-08, + "loss": 3.8045, + "step": 1869000 + }, + { + "epoch": 20.57, + "learning_rate": 7.358078002090324e-08, + "loss": 3.7826, + "step": 1869500 + }, + { + "epoch": 20.57, + "learning_rate": 7.356702788932285e-08, + "loss": 3.8052, + "step": 1870000 + }, + { + "epoch": 20.58, + "learning_rate": 7.355327575774245e-08, + "loss": 3.7954, + "step": 1870500 + }, + { + "epoch": 20.58, + "learning_rate": 7.353952362616205e-08, + "loss": 3.8003, + "step": 1871000 + }, + { + "epoch": 20.59, + "learning_rate": 7.352577149458166e-08, + "loss": 3.8054, + "step": 1871500 + }, + { + "epoch": 20.6, + "learning_rate": 7.351201936300127e-08, + "loss": 3.8011, + "step": 1872000 + }, + { + "epoch": 20.6, + "learning_rate": 7.349826723142087e-08, + "loss": 3.8103, + "step": 1872500 + }, + { + "epoch": 20.61, + "learning_rate": 7.348451509984048e-08, + "loss": 3.8004, + "step": 1873000 + }, + { + "epoch": 20.61, + "learning_rate": 7.347076296826008e-08, + "loss": 3.7807, + "step": 1873500 + }, + { + "epoch": 20.62, + "learning_rate": 7.345701083667968e-08, + "loss": 3.8311, + "step": 1874000 + }, + { + "epoch": 20.62, + "learning_rate": 7.344325870509929e-08, + "loss": 3.8053, + "step": 1874500 + }, + { + "epoch": 20.63, + "learning_rate": 7.34295065735189e-08, + "loss": 3.7859, + "step": 1875000 + }, + { + "epoch": 20.63, + "learning_rate": 7.34157544419385e-08, + "loss": 3.7986, + "step": 1875500 + }, + { + "epoch": 20.64, + "learning_rate": 7.34020023103581e-08, + "loss": 3.8077, + "step": 1876000 + }, + { + "epoch": 20.64, + "learning_rate": 7.338825017877771e-08, + "loss": 3.7949, + "step": 1876500 + }, + { + "epoch": 20.65, + "learning_rate": 7.337449804719731e-08, + "loss": 3.7979, + "step": 1877000 + }, + { + "epoch": 20.66, + "learning_rate": 7.336074591561692e-08, + "loss": 3.7861, + "step": 1877500 + }, + { + "epoch": 20.66, + "learning_rate": 7.334699378403653e-08, + "loss": 3.7987, + "step": 1878000 + }, + { + "epoch": 20.67, + "learning_rate": 7.333324165245612e-08, + "loss": 3.7809, + "step": 1878500 + }, + { + "epoch": 20.67, + "learning_rate": 7.331948952087573e-08, + "loss": 3.7997, + "step": 1879000 + }, + { + "epoch": 20.68, + "learning_rate": 7.330573738929534e-08, + "loss": 3.802, + "step": 1879500 + }, + { + "epoch": 20.68, + "learning_rate": 7.329198525771494e-08, + "loss": 3.8025, + "step": 1880000 + }, + { + "epoch": 20.69, + "learning_rate": 7.327823312613455e-08, + "loss": 3.7878, + "step": 1880500 + }, + { + "epoch": 20.69, + "learning_rate": 7.326448099455416e-08, + "loss": 3.7954, + "step": 1881000 + }, + { + "epoch": 20.7, + "learning_rate": 7.325072886297375e-08, + "loss": 3.7928, + "step": 1881500 + }, + { + "epoch": 20.71, + "learning_rate": 7.323697673139336e-08, + "loss": 3.7754, + "step": 1882000 + }, + { + "epoch": 20.71, + "learning_rate": 7.322322459981297e-08, + "loss": 3.7879, + "step": 1882500 + }, + { + "epoch": 20.72, + "learning_rate": 7.320947246823257e-08, + "loss": 3.8079, + "step": 1883000 + }, + { + "epoch": 20.72, + "learning_rate": 7.319572033665218e-08, + "loss": 3.8016, + "step": 1883500 + }, + { + "epoch": 20.73, + "learning_rate": 7.318196820507179e-08, + "loss": 3.8313, + "step": 1884000 + }, + { + "epoch": 20.73, + "learning_rate": 7.316821607349138e-08, + "loss": 3.7935, + "step": 1884500 + }, + { + "epoch": 20.74, + "learning_rate": 7.3154463941911e-08, + "loss": 3.8102, + "step": 1885000 + }, + { + "epoch": 20.74, + "learning_rate": 7.31407118103306e-08, + "loss": 3.7591, + "step": 1885500 + }, + { + "epoch": 20.75, + "learning_rate": 7.31269596787502e-08, + "loss": 3.795, + "step": 1886000 + }, + { + "epoch": 20.75, + "learning_rate": 7.311320754716981e-08, + "loss": 3.7998, + "step": 1886500 + }, + { + "epoch": 20.76, + "learning_rate": 7.309945541558942e-08, + "loss": 3.7898, + "step": 1887000 + }, + { + "epoch": 20.77, + "learning_rate": 7.308570328400901e-08, + "loss": 3.8012, + "step": 1887500 + }, + { + "epoch": 20.77, + "learning_rate": 7.307195115242862e-08, + "loss": 3.7926, + "step": 1888000 + }, + { + "epoch": 20.78, + "learning_rate": 7.305819902084823e-08, + "loss": 3.7982, + "step": 1888500 + }, + { + "epoch": 20.78, + "learning_rate": 7.304444688926783e-08, + "loss": 3.7926, + "step": 1889000 + }, + { + "epoch": 20.79, + "learning_rate": 7.303069475768744e-08, + "loss": 3.7974, + "step": 1889500 + }, + { + "epoch": 20.79, + "learning_rate": 7.301694262610705e-08, + "loss": 3.7811, + "step": 1890000 + }, + { + "epoch": 20.8, + "learning_rate": 7.300319049452664e-08, + "loss": 3.7749, + "step": 1890500 + }, + { + "epoch": 20.8, + "learning_rate": 7.298943836294625e-08, + "loss": 3.8088, + "step": 1891000 + }, + { + "epoch": 20.81, + "learning_rate": 7.297568623136586e-08, + "loss": 3.7975, + "step": 1891500 + }, + { + "epoch": 20.82, + "learning_rate": 7.296193409978546e-08, + "loss": 3.7964, + "step": 1892000 + }, + { + "epoch": 20.82, + "learning_rate": 7.294818196820507e-08, + "loss": 3.8075, + "step": 1892500 + }, + { + "epoch": 20.83, + "learning_rate": 7.293442983662468e-08, + "loss": 3.8135, + "step": 1893000 + }, + { + "epoch": 20.83, + "learning_rate": 7.292067770504427e-08, + "loss": 3.7881, + "step": 1893500 + }, + { + "epoch": 20.84, + "learning_rate": 7.290692557346388e-08, + "loss": 3.7959, + "step": 1894000 + }, + { + "epoch": 20.84, + "learning_rate": 7.289317344188349e-08, + "loss": 3.798, + "step": 1894500 + }, + { + "epoch": 20.85, + "learning_rate": 7.287942131030309e-08, + "loss": 3.7938, + "step": 1895000 + }, + { + "epoch": 20.85, + "learning_rate": 7.28656691787227e-08, + "loss": 3.7948, + "step": 1895500 + }, + { + "epoch": 20.86, + "learning_rate": 7.28519170471423e-08, + "loss": 3.7893, + "step": 1896000 + }, + { + "epoch": 20.86, + "learning_rate": 7.28381649155619e-08, + "loss": 3.8041, + "step": 1896500 + }, + { + "epoch": 20.87, + "learning_rate": 7.282441278398151e-08, + "loss": 3.7813, + "step": 1897000 + }, + { + "epoch": 20.88, + "learning_rate": 7.281066065240112e-08, + "loss": 3.7873, + "step": 1897500 + }, + { + "epoch": 20.88, + "learning_rate": 7.279690852082072e-08, + "loss": 3.7765, + "step": 1898000 + }, + { + "epoch": 20.89, + "learning_rate": 7.278315638924033e-08, + "loss": 3.7836, + "step": 1898500 + }, + { + "epoch": 20.89, + "learning_rate": 7.276940425765994e-08, + "loss": 3.7898, + "step": 1899000 + }, + { + "epoch": 20.9, + "learning_rate": 7.275565212607953e-08, + "loss": 3.7897, + "step": 1899500 + }, + { + "epoch": 20.9, + "learning_rate": 7.274189999449914e-08, + "loss": 3.7884, + "step": 1900000 + }, + { + "epoch": 20.91, + "learning_rate": 7.272814786291875e-08, + "loss": 3.7891, + "step": 1900500 + }, + { + "epoch": 20.91, + "learning_rate": 7.271439573133835e-08, + "loss": 3.7833, + "step": 1901000 + }, + { + "epoch": 20.92, + "learning_rate": 7.270064359975796e-08, + "loss": 3.8108, + "step": 1901500 + }, + { + "epoch": 20.93, + "learning_rate": 7.268689146817757e-08, + "loss": 3.7944, + "step": 1902000 + }, + { + "epoch": 20.93, + "learning_rate": 7.267313933659716e-08, + "loss": 3.7908, + "step": 1902500 + }, + { + "epoch": 20.94, + "learning_rate": 7.265938720501677e-08, + "loss": 3.7982, + "step": 1903000 + }, + { + "epoch": 20.94, + "learning_rate": 7.264563507343638e-08, + "loss": 3.7978, + "step": 1903500 + }, + { + "epoch": 20.95, + "learning_rate": 7.263188294185598e-08, + "loss": 3.7832, + "step": 1904000 + }, + { + "epoch": 20.95, + "learning_rate": 7.261813081027559e-08, + "loss": 3.7935, + "step": 1904500 + }, + { + "epoch": 20.96, + "learning_rate": 7.26043786786952e-08, + "loss": 3.7906, + "step": 1905000 + }, + { + "epoch": 20.96, + "learning_rate": 7.259062654711479e-08, + "loss": 3.7975, + "step": 1905500 + }, + { + "epoch": 20.97, + "learning_rate": 7.25768744155344e-08, + "loss": 3.8038, + "step": 1906000 + }, + { + "epoch": 20.97, + "learning_rate": 7.256312228395401e-08, + "loss": 3.8055, + "step": 1906500 + }, + { + "epoch": 20.98, + "learning_rate": 7.25493701523736e-08, + "loss": 3.7944, + "step": 1907000 + }, + { + "epoch": 20.99, + "learning_rate": 7.253561802079322e-08, + "loss": 3.7839, + "step": 1907500 + }, + { + "epoch": 20.99, + "learning_rate": 7.252186588921282e-08, + "loss": 3.7735, + "step": 1908000 + }, + { + "epoch": 21.0, + "learning_rate": 7.250811375763243e-08, + "loss": 3.7982, + "step": 1908500 + }, + { + "epoch": 21.0, + "eval_loss": 3.8521878719329834, + "eval_runtime": 6.1908, + "eval_samples_per_second": 251.018, + "step": 1908795 + }, + { + "epoch": 21.0, + "learning_rate": 7.249436162605203e-08, + "loss": 3.7915, + "step": 1909000 + }, + { + "epoch": 21.01, + "learning_rate": 7.248060949447164e-08, + "loss": 3.8036, + "step": 1909500 + }, + { + "epoch": 21.01, + "learning_rate": 7.246685736289125e-08, + "loss": 3.7984, + "step": 1910000 + }, + { + "epoch": 21.02, + "learning_rate": 7.245310523131084e-08, + "loss": 3.8054, + "step": 1910500 + }, + { + "epoch": 21.02, + "learning_rate": 7.243935309973045e-08, + "loss": 3.8057, + "step": 1911000 + }, + { + "epoch": 21.03, + "learning_rate": 7.242560096815006e-08, + "loss": 3.7774, + "step": 1911500 + }, + { + "epoch": 21.04, + "learning_rate": 7.241184883656967e-08, + "loss": 3.7822, + "step": 1912000 + }, + { + "epoch": 21.04, + "learning_rate": 7.239809670498927e-08, + "loss": 3.8008, + "step": 1912500 + }, + { + "epoch": 21.05, + "learning_rate": 7.238434457340888e-08, + "loss": 3.7832, + "step": 1913000 + }, + { + "epoch": 21.05, + "learning_rate": 7.237059244182849e-08, + "loss": 3.7805, + "step": 1913500 + }, + { + "epoch": 21.06, + "learning_rate": 7.23568403102481e-08, + "loss": 3.7764, + "step": 1914000 + }, + { + "epoch": 21.06, + "learning_rate": 7.234308817866769e-08, + "loss": 3.7821, + "step": 1914500 + }, + { + "epoch": 21.07, + "learning_rate": 7.23293360470873e-08, + "loss": 3.797, + "step": 1915000 + }, + { + "epoch": 21.07, + "learning_rate": 7.231558391550691e-08, + "loss": 3.802, + "step": 1915500 + }, + { + "epoch": 21.08, + "learning_rate": 7.230183178392651e-08, + "loss": 3.789, + "step": 1916000 + }, + { + "epoch": 21.08, + "learning_rate": 7.228807965234612e-08, + "loss": 3.787, + "step": 1916500 + }, + { + "epoch": 21.09, + "learning_rate": 7.227432752076571e-08, + "loss": 3.8015, + "step": 1917000 + }, + { + "epoch": 21.1, + "learning_rate": 7.226057538918532e-08, + "loss": 3.8019, + "step": 1917500 + }, + { + "epoch": 21.1, + "learning_rate": 7.224682325760493e-08, + "loss": 3.7979, + "step": 1918000 + }, + { + "epoch": 21.11, + "learning_rate": 7.223307112602453e-08, + "loss": 3.8036, + "step": 1918500 + }, + { + "epoch": 21.11, + "learning_rate": 7.221931899444414e-08, + "loss": 3.7861, + "step": 1919000 + }, + { + "epoch": 21.12, + "learning_rate": 7.220556686286375e-08, + "loss": 3.7904, + "step": 1919500 + }, + { + "epoch": 21.12, + "learning_rate": 7.219181473128334e-08, + "loss": 3.7712, + "step": 1920000 + }, + { + "epoch": 21.13, + "learning_rate": 7.217806259970295e-08, + "loss": 3.8042, + "step": 1920500 + }, + { + "epoch": 21.13, + "learning_rate": 7.216431046812256e-08, + "loss": 3.7906, + "step": 1921000 + }, + { + "epoch": 21.14, + "learning_rate": 7.215055833654216e-08, + "loss": 3.8046, + "step": 1921500 + }, + { + "epoch": 21.15, + "learning_rate": 7.213680620496177e-08, + "loss": 3.7991, + "step": 1922000 + }, + { + "epoch": 21.15, + "learning_rate": 7.212305407338138e-08, + "loss": 3.7979, + "step": 1922500 + }, + { + "epoch": 21.16, + "learning_rate": 7.210930194180097e-08, + "loss": 3.7979, + "step": 1923000 + }, + { + "epoch": 21.16, + "learning_rate": 7.209554981022058e-08, + "loss": 3.8059, + "step": 1923500 + }, + { + "epoch": 21.17, + "learning_rate": 7.208179767864019e-08, + "loss": 3.7963, + "step": 1924000 + }, + { + "epoch": 21.17, + "learning_rate": 7.206804554705979e-08, + "loss": 3.7989, + "step": 1924500 + }, + { + "epoch": 21.18, + "learning_rate": 7.20542934154794e-08, + "loss": 3.7878, + "step": 1925000 + }, + { + "epoch": 21.18, + "learning_rate": 7.2040541283899e-08, + "loss": 3.7871, + "step": 1925500 + }, + { + "epoch": 21.19, + "learning_rate": 7.20267891523186e-08, + "loss": 3.8068, + "step": 1926000 + }, + { + "epoch": 21.19, + "learning_rate": 7.201303702073821e-08, + "loss": 3.7973, + "step": 1926500 + }, + { + "epoch": 21.2, + "learning_rate": 7.199928488915782e-08, + "loss": 3.7968, + "step": 1927000 + }, + { + "epoch": 21.21, + "learning_rate": 7.198553275757742e-08, + "loss": 3.7848, + "step": 1927500 + }, + { + "epoch": 21.21, + "learning_rate": 7.197178062599703e-08, + "loss": 3.7986, + "step": 1928000 + }, + { + "epoch": 21.22, + "learning_rate": 7.195802849441664e-08, + "loss": 3.7859, + "step": 1928500 + }, + { + "epoch": 21.22, + "learning_rate": 7.194427636283623e-08, + "loss": 3.7906, + "step": 1929000 + }, + { + "epoch": 21.23, + "learning_rate": 7.193052423125584e-08, + "loss": 3.8065, + "step": 1929500 + }, + { + "epoch": 21.23, + "learning_rate": 7.191677209967545e-08, + "loss": 3.7865, + "step": 1930000 + }, + { + "epoch": 21.24, + "learning_rate": 7.190301996809505e-08, + "loss": 3.8025, + "step": 1930500 + }, + { + "epoch": 21.24, + "learning_rate": 7.188926783651466e-08, + "loss": 3.7834, + "step": 1931000 + }, + { + "epoch": 21.25, + "learning_rate": 7.187551570493427e-08, + "loss": 3.8058, + "step": 1931500 + }, + { + "epoch": 21.26, + "learning_rate": 7.186176357335386e-08, + "loss": 3.8074, + "step": 1932000 + }, + { + "epoch": 21.26, + "learning_rate": 7.184801144177347e-08, + "loss": 3.8105, + "step": 1932500 + }, + { + "epoch": 21.27, + "learning_rate": 7.183425931019308e-08, + "loss": 3.8106, + "step": 1933000 + }, + { + "epoch": 21.27, + "learning_rate": 7.182050717861268e-08, + "loss": 3.7753, + "step": 1933500 + }, + { + "epoch": 21.28, + "learning_rate": 7.180675504703229e-08, + "loss": 3.7976, + "step": 1934000 + }, + { + "epoch": 21.28, + "learning_rate": 7.17930029154519e-08, + "loss": 3.7777, + "step": 1934500 + }, + { + "epoch": 21.29, + "learning_rate": 7.177925078387149e-08, + "loss": 3.7885, + "step": 1935000 + }, + { + "epoch": 21.29, + "learning_rate": 7.17654986522911e-08, + "loss": 3.8018, + "step": 1935500 + }, + { + "epoch": 21.3, + "learning_rate": 7.175174652071071e-08, + "loss": 3.7808, + "step": 1936000 + }, + { + "epoch": 21.3, + "learning_rate": 7.17379943891303e-08, + "loss": 3.7829, + "step": 1936500 + }, + { + "epoch": 21.31, + "learning_rate": 7.172424225754992e-08, + "loss": 3.8059, + "step": 1937000 + }, + { + "epoch": 21.32, + "learning_rate": 7.171049012596952e-08, + "loss": 3.7811, + "step": 1937500 + }, + { + "epoch": 21.32, + "learning_rate": 7.169673799438912e-08, + "loss": 3.799, + "step": 1938000 + }, + { + "epoch": 21.33, + "learning_rate": 7.168298586280873e-08, + "loss": 3.7879, + "step": 1938500 + }, + { + "epoch": 21.33, + "learning_rate": 7.166923373122834e-08, + "loss": 3.7984, + "step": 1939000 + }, + { + "epoch": 21.34, + "learning_rate": 7.165548159964794e-08, + "loss": 3.7884, + "step": 1939500 + }, + { + "epoch": 21.34, + "learning_rate": 7.164172946806754e-08, + "loss": 3.7958, + "step": 1940000 + }, + { + "epoch": 21.35, + "learning_rate": 7.162797733648715e-08, + "loss": 3.7943, + "step": 1940500 + }, + { + "epoch": 21.35, + "learning_rate": 7.161422520490675e-08, + "loss": 3.7958, + "step": 1941000 + }, + { + "epoch": 21.36, + "learning_rate": 7.160047307332636e-08, + "loss": 3.7818, + "step": 1941500 + }, + { + "epoch": 21.37, + "learning_rate": 7.158672094174597e-08, + "loss": 3.7927, + "step": 1942000 + }, + { + "epoch": 21.37, + "learning_rate": 7.157296881016557e-08, + "loss": 3.8043, + "step": 1942500 + }, + { + "epoch": 21.38, + "learning_rate": 7.155921667858517e-08, + "loss": 3.7694, + "step": 1943000 + }, + { + "epoch": 21.38, + "learning_rate": 7.154546454700478e-08, + "loss": 3.7858, + "step": 1943500 + }, + { + "epoch": 21.39, + "learning_rate": 7.153171241542438e-08, + "loss": 3.7929, + "step": 1944000 + }, + { + "epoch": 21.39, + "learning_rate": 7.151796028384399e-08, + "loss": 3.7837, + "step": 1944500 + }, + { + "epoch": 21.4, + "learning_rate": 7.15042081522636e-08, + "loss": 3.809, + "step": 1945000 + }, + { + "epoch": 21.4, + "learning_rate": 7.14904560206832e-08, + "loss": 3.793, + "step": 1945500 + }, + { + "epoch": 21.41, + "learning_rate": 7.14767038891028e-08, + "loss": 3.7922, + "step": 1946000 + }, + { + "epoch": 21.41, + "learning_rate": 7.146295175752241e-08, + "loss": 3.7974, + "step": 1946500 + }, + { + "epoch": 21.42, + "learning_rate": 7.144919962594201e-08, + "loss": 3.7915, + "step": 1947000 + }, + { + "epoch": 21.43, + "learning_rate": 7.143544749436162e-08, + "loss": 3.7852, + "step": 1947500 + }, + { + "epoch": 21.43, + "learning_rate": 7.142169536278123e-08, + "loss": 3.793, + "step": 1948000 + }, + { + "epoch": 21.44, + "learning_rate": 7.140794323120082e-08, + "loss": 3.7972, + "step": 1948500 + }, + { + "epoch": 21.44, + "learning_rate": 7.139419109962043e-08, + "loss": 3.7753, + "step": 1949000 + }, + { + "epoch": 21.45, + "learning_rate": 7.138043896804004e-08, + "loss": 3.8026, + "step": 1949500 + }, + { + "epoch": 21.45, + "learning_rate": 7.136668683645964e-08, + "loss": 3.8111, + "step": 1950000 + }, + { + "epoch": 21.46, + "learning_rate": 7.135293470487925e-08, + "loss": 3.789, + "step": 1950500 + }, + { + "epoch": 21.46, + "learning_rate": 7.133918257329886e-08, + "loss": 3.8035, + "step": 1951000 + }, + { + "epoch": 21.47, + "learning_rate": 7.132543044171845e-08, + "loss": 3.8021, + "step": 1951500 + }, + { + "epoch": 21.48, + "learning_rate": 7.131167831013806e-08, + "loss": 3.792, + "step": 1952000 + }, + { + "epoch": 21.48, + "learning_rate": 7.129792617855767e-08, + "loss": 3.7927, + "step": 1952500 + }, + { + "epoch": 21.49, + "learning_rate": 7.128417404697727e-08, + "loss": 3.8073, + "step": 1953000 + }, + { + "epoch": 21.49, + "learning_rate": 7.127042191539688e-08, + "loss": 3.7969, + "step": 1953500 + }, + { + "epoch": 21.5, + "learning_rate": 7.125666978381649e-08, + "loss": 3.7974, + "step": 1954000 + }, + { + "epoch": 21.5, + "learning_rate": 7.124291765223608e-08, + "loss": 3.7906, + "step": 1954500 + }, + { + "epoch": 21.51, + "learning_rate": 7.122916552065569e-08, + "loss": 3.8093, + "step": 1955000 + }, + { + "epoch": 21.51, + "learning_rate": 7.12154133890753e-08, + "loss": 3.7916, + "step": 1955500 + }, + { + "epoch": 21.52, + "learning_rate": 7.120166125749491e-08, + "loss": 3.791, + "step": 1956000 + }, + { + "epoch": 21.52, + "learning_rate": 7.118790912591451e-08, + "loss": 3.7791, + "step": 1956500 + }, + { + "epoch": 21.53, + "learning_rate": 7.117415699433412e-08, + "loss": 3.7935, + "step": 1957000 + }, + { + "epoch": 21.54, + "learning_rate": 7.116040486275373e-08, + "loss": 3.8089, + "step": 1957500 + }, + { + "epoch": 21.54, + "learning_rate": 7.114665273117332e-08, + "loss": 3.7956, + "step": 1958000 + }, + { + "epoch": 21.55, + "learning_rate": 7.113290059959293e-08, + "loss": 3.7878, + "step": 1958500 + }, + { + "epoch": 21.55, + "learning_rate": 7.111914846801254e-08, + "loss": 3.7664, + "step": 1959000 + }, + { + "epoch": 21.56, + "learning_rate": 7.110539633643215e-08, + "loss": 3.8112, + "step": 1959500 + }, + { + "epoch": 21.56, + "learning_rate": 7.109164420485175e-08, + "loss": 3.7953, + "step": 1960000 + }, + { + "epoch": 21.57, + "learning_rate": 7.107789207327136e-08, + "loss": 3.7952, + "step": 1960500 + }, + { + "epoch": 21.57, + "learning_rate": 7.106413994169097e-08, + "loss": 3.7973, + "step": 1961000 + }, + { + "epoch": 21.58, + "learning_rate": 7.105038781011057e-08, + "loss": 3.8128, + "step": 1961500 + }, + { + "epoch": 21.59, + "learning_rate": 7.103663567853017e-08, + "loss": 3.7756, + "step": 1962000 + }, + { + "epoch": 21.59, + "learning_rate": 7.102288354694978e-08, + "loss": 3.7989, + "step": 1962500 + }, + { + "epoch": 21.6, + "learning_rate": 7.100913141536939e-08, + "loss": 3.7933, + "step": 1963000 + }, + { + "epoch": 21.6, + "learning_rate": 7.099537928378899e-08, + "loss": 3.7988, + "step": 1963500 + }, + { + "epoch": 21.61, + "learning_rate": 7.09816271522086e-08, + "loss": 3.7644, + "step": 1964000 + }, + { + "epoch": 21.61, + "learning_rate": 7.09678750206282e-08, + "loss": 3.7737, + "step": 1964500 + }, + { + "epoch": 21.62, + "learning_rate": 7.09541228890478e-08, + "loss": 3.8122, + "step": 1965000 + }, + { + "epoch": 21.62, + "learning_rate": 7.094037075746741e-08, + "loss": 3.8006, + "step": 1965500 + }, + { + "epoch": 21.63, + "learning_rate": 7.092661862588702e-08, + "loss": 3.7809, + "step": 1966000 + }, + { + "epoch": 21.63, + "learning_rate": 7.091286649430662e-08, + "loss": 3.7782, + "step": 1966500 + }, + { + "epoch": 21.64, + "learning_rate": 7.089911436272622e-08, + "loss": 3.7981, + "step": 1967000 + }, + { + "epoch": 21.65, + "learning_rate": 7.088536223114583e-08, + "loss": 3.7923, + "step": 1967500 + }, + { + "epoch": 21.65, + "learning_rate": 7.087161009956543e-08, + "loss": 3.7902, + "step": 1968000 + }, + { + "epoch": 21.66, + "learning_rate": 7.085785796798504e-08, + "loss": 3.7796, + "step": 1968500 + }, + { + "epoch": 21.66, + "learning_rate": 7.084410583640465e-08, + "loss": 3.7889, + "step": 1969000 + }, + { + "epoch": 21.67, + "learning_rate": 7.083035370482424e-08, + "loss": 3.7837, + "step": 1969500 + }, + { + "epoch": 21.67, + "learning_rate": 7.081660157324385e-08, + "loss": 3.7844, + "step": 1970000 + }, + { + "epoch": 21.68, + "learning_rate": 7.080284944166346e-08, + "loss": 3.7851, + "step": 1970500 + }, + { + "epoch": 21.68, + "learning_rate": 7.078909731008306e-08, + "loss": 3.8011, + "step": 1971000 + }, + { + "epoch": 21.69, + "learning_rate": 7.077534517850267e-08, + "loss": 3.7916, + "step": 1971500 + }, + { + "epoch": 21.7, + "learning_rate": 7.076159304692228e-08, + "loss": 3.79, + "step": 1972000 + }, + { + "epoch": 21.7, + "learning_rate": 7.074784091534187e-08, + "loss": 3.7911, + "step": 1972500 + }, + { + "epoch": 21.71, + "learning_rate": 7.073408878376148e-08, + "loss": 3.7921, + "step": 1973000 + }, + { + "epoch": 21.71, + "learning_rate": 7.072033665218109e-08, + "loss": 3.7653, + "step": 1973500 + }, + { + "epoch": 21.72, + "learning_rate": 7.070658452060069e-08, + "loss": 3.8045, + "step": 1974000 + }, + { + "epoch": 21.72, + "learning_rate": 7.06928323890203e-08, + "loss": 3.8068, + "step": 1974500 + }, + { + "epoch": 21.73, + "learning_rate": 7.067908025743991e-08, + "loss": 3.7931, + "step": 1975000 + }, + { + "epoch": 21.73, + "learning_rate": 7.06653281258595e-08, + "loss": 3.79, + "step": 1975500 + }, + { + "epoch": 21.74, + "learning_rate": 7.065157599427911e-08, + "loss": 3.8049, + "step": 1976000 + }, + { + "epoch": 21.74, + "learning_rate": 7.063782386269872e-08, + "loss": 3.7892, + "step": 1976500 + }, + { + "epoch": 21.75, + "learning_rate": 7.062407173111832e-08, + "loss": 3.7835, + "step": 1977000 + }, + { + "epoch": 21.76, + "learning_rate": 7.061031959953793e-08, + "loss": 3.7738, + "step": 1977500 + }, + { + "epoch": 21.76, + "learning_rate": 7.059656746795754e-08, + "loss": 3.7954, + "step": 1978000 + }, + { + "epoch": 21.77, + "learning_rate": 7.058281533637713e-08, + "loss": 3.8024, + "step": 1978500 + }, + { + "epoch": 21.77, + "learning_rate": 7.056906320479674e-08, + "loss": 3.786, + "step": 1979000 + }, + { + "epoch": 21.78, + "learning_rate": 7.055531107321635e-08, + "loss": 3.8177, + "step": 1979500 + }, + { + "epoch": 21.78, + "learning_rate": 7.054155894163595e-08, + "loss": 3.7887, + "step": 1980000 + }, + { + "epoch": 21.79, + "learning_rate": 7.052780681005556e-08, + "loss": 3.7662, + "step": 1980500 + }, + { + "epoch": 21.79, + "learning_rate": 7.051405467847517e-08, + "loss": 3.7833, + "step": 1981000 + }, + { + "epoch": 21.8, + "learning_rate": 7.050030254689476e-08, + "loss": 3.7735, + "step": 1981500 + }, + { + "epoch": 21.81, + "learning_rate": 7.048655041531437e-08, + "loss": 3.7807, + "step": 1982000 + }, + { + "epoch": 21.81, + "learning_rate": 7.047279828373398e-08, + "loss": 3.8062, + "step": 1982500 + }, + { + "epoch": 21.82, + "learning_rate": 7.045904615215358e-08, + "loss": 3.7878, + "step": 1983000 + }, + { + "epoch": 21.82, + "learning_rate": 7.044529402057319e-08, + "loss": 3.7903, + "step": 1983500 + }, + { + "epoch": 21.83, + "learning_rate": 7.04315418889928e-08, + "loss": 3.8164, + "step": 1984000 + }, + { + "epoch": 21.83, + "learning_rate": 7.041778975741239e-08, + "loss": 3.8074, + "step": 1984500 + }, + { + "epoch": 21.84, + "learning_rate": 7.0404037625832e-08, + "loss": 3.779, + "step": 1985000 + }, + { + "epoch": 21.84, + "learning_rate": 7.039028549425161e-08, + "loss": 3.7793, + "step": 1985500 + }, + { + "epoch": 21.85, + "learning_rate": 7.037653336267121e-08, + "loss": 3.7971, + "step": 1986000 + }, + { + "epoch": 21.85, + "learning_rate": 7.036278123109082e-08, + "loss": 3.8091, + "step": 1986500 + }, + { + "epoch": 21.86, + "learning_rate": 7.034902909951043e-08, + "loss": 3.7856, + "step": 1987000 + }, + { + "epoch": 21.87, + "learning_rate": 7.033527696793002e-08, + "loss": 3.7919, + "step": 1987500 + }, + { + "epoch": 21.87, + "learning_rate": 7.032152483634963e-08, + "loss": 3.7837, + "step": 1988000 + }, + { + "epoch": 21.88, + "learning_rate": 7.030777270476924e-08, + "loss": 3.8052, + "step": 1988500 + }, + { + "epoch": 21.88, + "learning_rate": 7.029402057318884e-08, + "loss": 3.8023, + "step": 1989000 + }, + { + "epoch": 21.89, + "learning_rate": 7.028026844160845e-08, + "loss": 3.7983, + "step": 1989500 + }, + { + "epoch": 21.89, + "learning_rate": 7.026651631002806e-08, + "loss": 3.7899, + "step": 1990000 + }, + { + "epoch": 21.9, + "learning_rate": 7.025276417844765e-08, + "loss": 3.7778, + "step": 1990500 + }, + { + "epoch": 21.9, + "learning_rate": 7.023901204686726e-08, + "loss": 3.8032, + "step": 1991000 + }, + { + "epoch": 21.91, + "learning_rate": 7.022525991528686e-08, + "loss": 3.7824, + "step": 1991500 + }, + { + "epoch": 21.92, + "learning_rate": 7.021150778370647e-08, + "loss": 3.7982, + "step": 1992000 + }, + { + "epoch": 21.92, + "learning_rate": 7.019775565212608e-08, + "loss": 3.7951, + "step": 1992500 + }, + { + "epoch": 21.93, + "learning_rate": 7.018400352054567e-08, + "loss": 3.7974, + "step": 1993000 + }, + { + "epoch": 21.93, + "learning_rate": 7.017025138896528e-08, + "loss": 3.7859, + "step": 1993500 + }, + { + "epoch": 21.94, + "learning_rate": 7.015649925738489e-08, + "loss": 3.8058, + "step": 1994000 + }, + { + "epoch": 21.94, + "learning_rate": 7.014274712580449e-08, + "loss": 3.7919, + "step": 1994500 + }, + { + "epoch": 21.95, + "learning_rate": 7.01289949942241e-08, + "loss": 3.7757, + "step": 1995000 + }, + { + "epoch": 21.95, + "learning_rate": 7.01152428626437e-08, + "loss": 3.7913, + "step": 1995500 + }, + { + "epoch": 21.96, + "learning_rate": 7.01014907310633e-08, + "loss": 3.7847, + "step": 1996000 + }, + { + "epoch": 21.96, + "learning_rate": 7.008773859948291e-08, + "loss": 3.7816, + "step": 1996500 + }, + { + "epoch": 21.97, + "learning_rate": 7.007398646790252e-08, + "loss": 3.7847, + "step": 1997000 + }, + { + "epoch": 21.98, + "learning_rate": 7.006023433632212e-08, + "loss": 3.8077, + "step": 1997500 + }, + { + "epoch": 21.98, + "learning_rate": 7.004648220474173e-08, + "loss": 3.7798, + "step": 1998000 + }, + { + "epoch": 21.99, + "learning_rate": 7.003273007316134e-08, + "loss": 3.8163, + "step": 1998500 + }, + { + "epoch": 21.99, + "learning_rate": 7.001897794158093e-08, + "loss": 3.7825, + "step": 1999000 + }, + { + "epoch": 22.0, + "learning_rate": 7.000522581000054e-08, + "loss": 3.7664, + "step": 1999500 + }, + { + "epoch": 22.0, + "eval_loss": 3.850017786026001, + "eval_runtime": 6.1404, + "eval_samples_per_second": 253.077, + "step": 1999690 + }, + { + "epoch": 22.0, + "learning_rate": 6.999147367842015e-08, + "loss": 3.8066, + "step": 2000000 + }, + { + "epoch": 22.01, + "learning_rate": 6.997772154683975e-08, + "loss": 3.7788, + "step": 2000500 + }, + { + "epoch": 22.01, + "learning_rate": 6.996396941525936e-08, + "loss": 3.7936, + "step": 2001000 + }, + { + "epoch": 22.02, + "learning_rate": 6.995021728367896e-08, + "loss": 3.7973, + "step": 2001500 + }, + { + "epoch": 22.03, + "learning_rate": 6.993646515209856e-08, + "loss": 3.7848, + "step": 2002000 + }, + { + "epoch": 22.03, + "learning_rate": 6.992271302051817e-08, + "loss": 3.7862, + "step": 2002500 + }, + { + "epoch": 22.04, + "learning_rate": 6.990896088893778e-08, + "loss": 3.796, + "step": 2003000 + }, + { + "epoch": 22.04, + "learning_rate": 6.989520875735739e-08, + "loss": 3.8068, + "step": 2003500 + }, + { + "epoch": 22.05, + "learning_rate": 6.988145662577698e-08, + "loss": 3.7887, + "step": 2004000 + }, + { + "epoch": 22.05, + "learning_rate": 6.98677044941966e-08, + "loss": 3.7964, + "step": 2004500 + }, + { + "epoch": 22.06, + "learning_rate": 6.98539523626162e-08, + "loss": 3.79, + "step": 2005000 + }, + { + "epoch": 22.06, + "learning_rate": 6.98402002310358e-08, + "loss": 3.8109, + "step": 2005500 + }, + { + "epoch": 22.07, + "learning_rate": 6.982644809945541e-08, + "loss": 3.8085, + "step": 2006000 + }, + { + "epoch": 22.07, + "learning_rate": 6.981269596787502e-08, + "loss": 3.7893, + "step": 2006500 + }, + { + "epoch": 22.08, + "learning_rate": 6.979894383629463e-08, + "loss": 3.8047, + "step": 2007000 + }, + { + "epoch": 22.09, + "learning_rate": 6.978519170471422e-08, + "loss": 3.7857, + "step": 2007500 + }, + { + "epoch": 22.09, + "learning_rate": 6.977143957313383e-08, + "loss": 3.7952, + "step": 2008000 + }, + { + "epoch": 22.1, + "learning_rate": 6.975768744155344e-08, + "loss": 3.792, + "step": 2008500 + }, + { + "epoch": 22.1, + "learning_rate": 6.974393530997305e-08, + "loss": 3.7971, + "step": 2009000 + }, + { + "epoch": 22.11, + "learning_rate": 6.973018317839265e-08, + "loss": 3.7849, + "step": 2009500 + }, + { + "epoch": 22.11, + "learning_rate": 6.971643104681226e-08, + "loss": 3.7939, + "step": 2010000 + }, + { + "epoch": 22.12, + "learning_rate": 6.970267891523187e-08, + "loss": 3.7788, + "step": 2010500 + }, + { + "epoch": 22.12, + "learning_rate": 6.968892678365146e-08, + "loss": 3.779, + "step": 2011000 + }, + { + "epoch": 22.13, + "learning_rate": 6.967517465207107e-08, + "loss": 3.8088, + "step": 2011500 + }, + { + "epoch": 22.14, + "learning_rate": 6.966142252049068e-08, + "loss": 3.7793, + "step": 2012000 + }, + { + "epoch": 22.14, + "learning_rate": 6.964767038891028e-08, + "loss": 3.8087, + "step": 2012500 + }, + { + "epoch": 22.15, + "learning_rate": 6.963391825732989e-08, + "loss": 3.7913, + "step": 2013000 + }, + { + "epoch": 22.15, + "learning_rate": 6.96201661257495e-08, + "loss": 3.7824, + "step": 2013500 + }, + { + "epoch": 22.16, + "learning_rate": 6.960641399416909e-08, + "loss": 3.7825, + "step": 2014000 + }, + { + "epoch": 22.16, + "learning_rate": 6.95926618625887e-08, + "loss": 3.7882, + "step": 2014500 + }, + { + "epoch": 22.17, + "learning_rate": 6.957890973100831e-08, + "loss": 3.8101, + "step": 2015000 + }, + { + "epoch": 22.17, + "learning_rate": 6.956515759942791e-08, + "loss": 3.8003, + "step": 2015500 + }, + { + "epoch": 22.18, + "learning_rate": 6.955140546784752e-08, + "loss": 3.8157, + "step": 2016000 + }, + { + "epoch": 22.18, + "learning_rate": 6.953765333626713e-08, + "loss": 3.7551, + "step": 2016500 + }, + { + "epoch": 22.19, + "learning_rate": 6.952390120468672e-08, + "loss": 3.7764, + "step": 2017000 + }, + { + "epoch": 22.2, + "learning_rate": 6.951014907310633e-08, + "loss": 3.7947, + "step": 2017500 + }, + { + "epoch": 22.2, + "learning_rate": 6.949639694152594e-08, + "loss": 3.778, + "step": 2018000 + }, + { + "epoch": 22.21, + "learning_rate": 6.948264480994554e-08, + "loss": 3.7788, + "step": 2018500 + }, + { + "epoch": 22.21, + "learning_rate": 6.946889267836515e-08, + "loss": 3.7832, + "step": 2019000 + }, + { + "epoch": 22.22, + "learning_rate": 6.945514054678476e-08, + "loss": 3.8007, + "step": 2019500 + }, + { + "epoch": 22.22, + "learning_rate": 6.944138841520435e-08, + "loss": 3.7839, + "step": 2020000 + }, + { + "epoch": 22.23, + "learning_rate": 6.942763628362396e-08, + "loss": 3.8046, + "step": 2020500 + }, + { + "epoch": 22.23, + "learning_rate": 6.941388415204357e-08, + "loss": 3.7683, + "step": 2021000 + }, + { + "epoch": 22.24, + "learning_rate": 6.940013202046317e-08, + "loss": 3.7778, + "step": 2021500 + }, + { + "epoch": 22.25, + "learning_rate": 6.938637988888278e-08, + "loss": 3.7763, + "step": 2022000 + }, + { + "epoch": 22.25, + "learning_rate": 6.937262775730239e-08, + "loss": 3.8052, + "step": 2022500 + }, + { + "epoch": 22.26, + "learning_rate": 6.935887562572198e-08, + "loss": 3.7951, + "step": 2023000 + }, + { + "epoch": 22.26, + "learning_rate": 6.934512349414159e-08, + "loss": 3.7712, + "step": 2023500 + }, + { + "epoch": 22.27, + "learning_rate": 6.93313713625612e-08, + "loss": 3.7888, + "step": 2024000 + }, + { + "epoch": 22.27, + "learning_rate": 6.93176192309808e-08, + "loss": 3.7814, + "step": 2024500 + }, + { + "epoch": 22.28, + "learning_rate": 6.93038670994004e-08, + "loss": 3.7712, + "step": 2025000 + }, + { + "epoch": 22.28, + "learning_rate": 6.929011496782001e-08, + "loss": 3.7829, + "step": 2025500 + }, + { + "epoch": 22.29, + "learning_rate": 6.927636283623961e-08, + "loss": 3.8138, + "step": 2026000 + }, + { + "epoch": 22.29, + "learning_rate": 6.926261070465922e-08, + "loss": 3.7753, + "step": 2026500 + }, + { + "epoch": 22.3, + "learning_rate": 6.924885857307883e-08, + "loss": 3.7798, + "step": 2027000 + }, + { + "epoch": 22.31, + "learning_rate": 6.923510644149843e-08, + "loss": 3.8074, + "step": 2027500 + }, + { + "epoch": 22.31, + "learning_rate": 6.922135430991803e-08, + "loss": 3.7888, + "step": 2028000 + }, + { + "epoch": 22.32, + "learning_rate": 6.920760217833764e-08, + "loss": 3.7851, + "step": 2028500 + }, + { + "epoch": 22.32, + "learning_rate": 6.919385004675724e-08, + "loss": 3.7909, + "step": 2029000 + }, + { + "epoch": 22.33, + "learning_rate": 6.918009791517685e-08, + "loss": 3.7922, + "step": 2029500 + }, + { + "epoch": 22.33, + "learning_rate": 6.916634578359646e-08, + "loss": 3.8047, + "step": 2030000 + }, + { + "epoch": 22.34, + "learning_rate": 6.915259365201606e-08, + "loss": 3.8004, + "step": 2030500 + }, + { + "epoch": 22.34, + "learning_rate": 6.913884152043566e-08, + "loss": 3.7928, + "step": 2031000 + }, + { + "epoch": 22.35, + "learning_rate": 6.912508938885527e-08, + "loss": 3.7868, + "step": 2031500 + }, + { + "epoch": 22.36, + "learning_rate": 6.911133725727487e-08, + "loss": 3.7918, + "step": 2032000 + }, + { + "epoch": 22.36, + "learning_rate": 6.909758512569448e-08, + "loss": 3.797, + "step": 2032500 + }, + { + "epoch": 22.37, + "learning_rate": 6.908383299411409e-08, + "loss": 3.793, + "step": 2033000 + }, + { + "epoch": 22.37, + "learning_rate": 6.907008086253368e-08, + "loss": 3.7841, + "step": 2033500 + }, + { + "epoch": 22.38, + "learning_rate": 6.90563287309533e-08, + "loss": 3.7899, + "step": 2034000 + }, + { + "epoch": 22.38, + "learning_rate": 6.90425765993729e-08, + "loss": 3.7759, + "step": 2034500 + }, + { + "epoch": 22.39, + "learning_rate": 6.90288244677925e-08, + "loss": 3.7857, + "step": 2035000 + }, + { + "epoch": 22.39, + "learning_rate": 6.901507233621211e-08, + "loss": 3.7883, + "step": 2035500 + }, + { + "epoch": 22.4, + "learning_rate": 6.900132020463172e-08, + "loss": 3.7867, + "step": 2036000 + }, + { + "epoch": 22.4, + "learning_rate": 6.898756807305131e-08, + "loss": 3.767, + "step": 2036500 + }, + { + "epoch": 22.41, + "learning_rate": 6.897381594147092e-08, + "loss": 3.7827, + "step": 2037000 + }, + { + "epoch": 22.42, + "learning_rate": 6.896006380989053e-08, + "loss": 3.7938, + "step": 2037500 + }, + { + "epoch": 22.42, + "learning_rate": 6.894631167831013e-08, + "loss": 3.7737, + "step": 2038000 + }, + { + "epoch": 22.43, + "learning_rate": 6.893255954672974e-08, + "loss": 3.8141, + "step": 2038500 + }, + { + "epoch": 22.43, + "learning_rate": 6.891880741514935e-08, + "loss": 3.7932, + "step": 2039000 + }, + { + "epoch": 22.44, + "learning_rate": 6.890505528356894e-08, + "loss": 3.809, + "step": 2039500 + }, + { + "epoch": 22.44, + "learning_rate": 6.889130315198855e-08, + "loss": 3.7865, + "step": 2040000 + }, + { + "epoch": 22.45, + "learning_rate": 6.887755102040816e-08, + "loss": 3.8069, + "step": 2040500 + }, + { + "epoch": 22.45, + "learning_rate": 6.886379888882776e-08, + "loss": 3.7876, + "step": 2041000 + }, + { + "epoch": 22.46, + "learning_rate": 6.885004675724737e-08, + "loss": 3.7873, + "step": 2041500 + }, + { + "epoch": 22.47, + "learning_rate": 6.883629462566698e-08, + "loss": 3.788, + "step": 2042000 + }, + { + "epoch": 22.47, + "learning_rate": 6.882254249408657e-08, + "loss": 3.7951, + "step": 2042500 + }, + { + "epoch": 22.48, + "learning_rate": 6.880879036250618e-08, + "loss": 3.7851, + "step": 2043000 + }, + { + "epoch": 22.48, + "learning_rate": 6.879503823092579e-08, + "loss": 3.7649, + "step": 2043500 + }, + { + "epoch": 22.49, + "learning_rate": 6.878128609934539e-08, + "loss": 3.7786, + "step": 2044000 + }, + { + "epoch": 22.49, + "learning_rate": 6.8767533967765e-08, + "loss": 3.8037, + "step": 2044500 + }, + { + "epoch": 22.5, + "learning_rate": 6.875378183618461e-08, + "loss": 3.8096, + "step": 2045000 + }, + { + "epoch": 22.5, + "learning_rate": 6.87400297046042e-08, + "loss": 3.7658, + "step": 2045500 + }, + { + "epoch": 22.51, + "learning_rate": 6.872627757302381e-08, + "loss": 3.79, + "step": 2046000 + }, + { + "epoch": 22.51, + "learning_rate": 6.871252544144342e-08, + "loss": 3.7836, + "step": 2046500 + }, + { + "epoch": 22.52, + "learning_rate": 6.869877330986302e-08, + "loss": 3.7934, + "step": 2047000 + }, + { + "epoch": 22.53, + "learning_rate": 6.868502117828263e-08, + "loss": 3.7855, + "step": 2047500 + }, + { + "epoch": 22.53, + "learning_rate": 6.867126904670224e-08, + "loss": 3.7741, + "step": 2048000 + }, + { + "epoch": 22.54, + "learning_rate": 6.865751691512183e-08, + "loss": 3.8051, + "step": 2048500 + }, + { + "epoch": 22.54, + "learning_rate": 6.864376478354144e-08, + "loss": 3.7978, + "step": 2049000 + }, + { + "epoch": 22.55, + "learning_rate": 6.863001265196105e-08, + "loss": 3.7771, + "step": 2049500 + }, + { + "epoch": 22.55, + "learning_rate": 6.861626052038065e-08, + "loss": 3.8046, + "step": 2050000 + }, + { + "epoch": 22.56, + "learning_rate": 6.860250838880026e-08, + "loss": 3.7837, + "step": 2050500 + }, + { + "epoch": 22.56, + "learning_rate": 6.858875625721987e-08, + "loss": 3.7748, + "step": 2051000 + }, + { + "epoch": 22.57, + "learning_rate": 6.857500412563946e-08, + "loss": 3.7899, + "step": 2051500 + }, + { + "epoch": 22.58, + "learning_rate": 6.856125199405907e-08, + "loss": 3.775, + "step": 2052000 + }, + { + "epoch": 22.58, + "learning_rate": 6.854749986247868e-08, + "loss": 3.7796, + "step": 2052500 + }, + { + "epoch": 22.59, + "learning_rate": 6.853374773089828e-08, + "loss": 3.8067, + "step": 2053000 + }, + { + "epoch": 22.59, + "learning_rate": 6.851999559931789e-08, + "loss": 3.8023, + "step": 2053500 + }, + { + "epoch": 22.6, + "learning_rate": 6.85062434677375e-08, + "loss": 3.7928, + "step": 2054000 + }, + { + "epoch": 22.6, + "learning_rate": 6.84924913361571e-08, + "loss": 3.7919, + "step": 2054500 + }, + { + "epoch": 22.61, + "learning_rate": 6.84787392045767e-08, + "loss": 3.7914, + "step": 2055000 + }, + { + "epoch": 22.61, + "learning_rate": 6.846498707299631e-08, + "loss": 3.7881, + "step": 2055500 + }, + { + "epoch": 22.62, + "learning_rate": 6.845123494141592e-08, + "loss": 3.771, + "step": 2056000 + }, + { + "epoch": 22.63, + "learning_rate": 6.843748280983553e-08, + "loss": 3.7893, + "step": 2056500 + }, + { + "epoch": 22.63, + "learning_rate": 6.842373067825513e-08, + "loss": 3.7695, + "step": 2057000 + }, + { + "epoch": 22.64, + "learning_rate": 6.840997854667473e-08, + "loss": 3.7846, + "step": 2057500 + }, + { + "epoch": 22.64, + "learning_rate": 6.839622641509434e-08, + "loss": 3.7742, + "step": 2058000 + }, + { + "epoch": 22.65, + "learning_rate": 6.838247428351395e-08, + "loss": 3.7913, + "step": 2058500 + }, + { + "epoch": 22.65, + "learning_rate": 6.836872215193355e-08, + "loss": 3.7827, + "step": 2059000 + }, + { + "epoch": 22.66, + "learning_rate": 6.835497002035316e-08, + "loss": 3.7851, + "step": 2059500 + }, + { + "epoch": 22.66, + "learning_rate": 6.834121788877277e-08, + "loss": 3.7711, + "step": 2060000 + }, + { + "epoch": 22.67, + "learning_rate": 6.832746575719236e-08, + "loss": 3.8043, + "step": 2060500 + }, + { + "epoch": 22.67, + "learning_rate": 6.831371362561197e-08, + "loss": 3.7923, + "step": 2061000 + }, + { + "epoch": 22.68, + "learning_rate": 6.829996149403158e-08, + "loss": 3.8001, + "step": 2061500 + }, + { + "epoch": 22.69, + "learning_rate": 6.828620936245118e-08, + "loss": 3.7818, + "step": 2062000 + }, + { + "epoch": 22.69, + "learning_rate": 6.827245723087079e-08, + "loss": 3.8019, + "step": 2062500 + }, + { + "epoch": 22.7, + "learning_rate": 6.82587050992904e-08, + "loss": 3.7833, + "step": 2063000 + }, + { + "epoch": 22.7, + "learning_rate": 6.824495296771e-08, + "loss": 3.7806, + "step": 2063500 + }, + { + "epoch": 22.71, + "learning_rate": 6.82312008361296e-08, + "loss": 3.788, + "step": 2064000 + }, + { + "epoch": 22.71, + "learning_rate": 6.821744870454921e-08, + "loss": 3.7813, + "step": 2064500 + }, + { + "epoch": 22.72, + "learning_rate": 6.820369657296881e-08, + "loss": 3.802, + "step": 2065000 + }, + { + "epoch": 22.72, + "learning_rate": 6.818994444138842e-08, + "loss": 3.8017, + "step": 2065500 + }, + { + "epoch": 22.73, + "learning_rate": 6.817619230980801e-08, + "loss": 3.7946, + "step": 2066000 + }, + { + "epoch": 22.74, + "learning_rate": 6.816244017822762e-08, + "loss": 3.7915, + "step": 2066500 + }, + { + "epoch": 22.74, + "learning_rate": 6.814868804664723e-08, + "loss": 3.7797, + "step": 2067000 + }, + { + "epoch": 22.75, + "learning_rate": 6.813493591506683e-08, + "loss": 3.7801, + "step": 2067500 + }, + { + "epoch": 22.75, + "learning_rate": 6.812118378348644e-08, + "loss": 3.7907, + "step": 2068000 + }, + { + "epoch": 22.76, + "learning_rate": 6.810743165190605e-08, + "loss": 3.7919, + "step": 2068500 + }, + { + "epoch": 22.76, + "learning_rate": 6.809367952032564e-08, + "loss": 3.7985, + "step": 2069000 + }, + { + "epoch": 22.77, + "learning_rate": 6.807992738874525e-08, + "loss": 3.7606, + "step": 2069500 + }, + { + "epoch": 22.77, + "learning_rate": 6.806617525716486e-08, + "loss": 3.7897, + "step": 2070000 + }, + { + "epoch": 22.78, + "learning_rate": 6.805242312558446e-08, + "loss": 3.7761, + "step": 2070500 + }, + { + "epoch": 22.78, + "learning_rate": 6.803867099400407e-08, + "loss": 3.7869, + "step": 2071000 + }, + { + "epoch": 22.79, + "learning_rate": 6.802491886242368e-08, + "loss": 3.787, + "step": 2071500 + }, + { + "epoch": 22.8, + "learning_rate": 6.801116673084327e-08, + "loss": 3.7958, + "step": 2072000 + }, + { + "epoch": 22.8, + "learning_rate": 6.799741459926288e-08, + "loss": 3.803, + "step": 2072500 + }, + { + "epoch": 22.81, + "learning_rate": 6.798366246768249e-08, + "loss": 3.7943, + "step": 2073000 + }, + { + "epoch": 22.81, + "learning_rate": 6.796991033610209e-08, + "loss": 3.7805, + "step": 2073500 + }, + { + "epoch": 22.82, + "learning_rate": 6.79561582045217e-08, + "loss": 3.8007, + "step": 2074000 + }, + { + "epoch": 22.82, + "learning_rate": 6.794240607294131e-08, + "loss": 3.7836, + "step": 2074500 + }, + { + "epoch": 22.83, + "learning_rate": 6.79286539413609e-08, + "loss": 3.7707, + "step": 2075000 + }, + { + "epoch": 22.83, + "learning_rate": 6.791490180978051e-08, + "loss": 3.7915, + "step": 2075500 + }, + { + "epoch": 22.84, + "learning_rate": 6.790114967820012e-08, + "loss": 3.7768, + "step": 2076000 + }, + { + "epoch": 22.85, + "learning_rate": 6.788739754661972e-08, + "loss": 3.7799, + "step": 2076500 + }, + { + "epoch": 22.85, + "learning_rate": 6.787364541503933e-08, + "loss": 3.79, + "step": 2077000 + }, + { + "epoch": 22.86, + "learning_rate": 6.785989328345894e-08, + "loss": 3.7838, + "step": 2077500 + }, + { + "epoch": 22.86, + "learning_rate": 6.784614115187853e-08, + "loss": 3.7975, + "step": 2078000 + }, + { + "epoch": 22.87, + "learning_rate": 6.783238902029814e-08, + "loss": 3.7785, + "step": 2078500 + }, + { + "epoch": 22.87, + "learning_rate": 6.781863688871775e-08, + "loss": 3.7954, + "step": 2079000 + }, + { + "epoch": 22.88, + "learning_rate": 6.780488475713735e-08, + "loss": 3.77, + "step": 2079500 + }, + { + "epoch": 22.88, + "learning_rate": 6.779113262555696e-08, + "loss": 3.7757, + "step": 2080000 + }, + { + "epoch": 22.89, + "learning_rate": 6.777738049397657e-08, + "loss": 3.806, + "step": 2080500 + }, + { + "epoch": 22.89, + "learning_rate": 6.776362836239616e-08, + "loss": 3.7922, + "step": 2081000 + }, + { + "epoch": 22.9, + "learning_rate": 6.774987623081577e-08, + "loss": 3.7956, + "step": 2081500 + }, + { + "epoch": 22.91, + "learning_rate": 6.773612409923538e-08, + "loss": 3.8087, + "step": 2082000 + }, + { + "epoch": 22.91, + "learning_rate": 6.772237196765498e-08, + "loss": 3.7701, + "step": 2082500 + }, + { + "epoch": 22.92, + "learning_rate": 6.770861983607459e-08, + "loss": 3.7702, + "step": 2083000 + }, + { + "epoch": 22.92, + "learning_rate": 6.76948677044942e-08, + "loss": 3.7899, + "step": 2083500 + }, + { + "epoch": 22.93, + "learning_rate": 6.768111557291379e-08, + "loss": 3.7991, + "step": 2084000 + }, + { + "epoch": 22.93, + "learning_rate": 6.76673634413334e-08, + "loss": 3.7842, + "step": 2084500 + }, + { + "epoch": 22.94, + "learning_rate": 6.765361130975301e-08, + "loss": 3.7864, + "step": 2085000 + }, + { + "epoch": 22.94, + "learning_rate": 6.76398591781726e-08, + "loss": 3.7759, + "step": 2085500 + }, + { + "epoch": 22.95, + "learning_rate": 6.762610704659222e-08, + "loss": 3.8131, + "step": 2086000 + }, + { + "epoch": 22.96, + "learning_rate": 6.761235491501183e-08, + "loss": 3.7874, + "step": 2086500 + }, + { + "epoch": 22.96, + "learning_rate": 6.759860278343142e-08, + "loss": 3.7895, + "step": 2087000 + }, + { + "epoch": 22.97, + "learning_rate": 6.758485065185103e-08, + "loss": 3.7987, + "step": 2087500 + }, + { + "epoch": 22.97, + "learning_rate": 6.757109852027064e-08, + "loss": 3.7952, + "step": 2088000 + }, + { + "epoch": 22.98, + "learning_rate": 6.755734638869024e-08, + "loss": 3.7918, + "step": 2088500 + }, + { + "epoch": 22.98, + "learning_rate": 6.754359425710985e-08, + "loss": 3.7925, + "step": 2089000 + }, + { + "epoch": 22.99, + "learning_rate": 6.752984212552945e-08, + "loss": 3.7983, + "step": 2089500 + }, + { + "epoch": 22.99, + "learning_rate": 6.751608999394905e-08, + "loss": 3.803, + "step": 2090000 + }, + { + "epoch": 23.0, + "learning_rate": 6.750233786236866e-08, + "loss": 3.7918, + "step": 2090500 + }, + { + "epoch": 23.0, + "eval_loss": 3.8476030826568604, + "eval_runtime": 6.1381, + "eval_samples_per_second": 253.175, + "step": 2090585 + }, + { + "epoch": 23.0, + "learning_rate": 6.748858573078827e-08, + "loss": 3.7923, + "step": 2091000 + }, + { + "epoch": 23.01, + "learning_rate": 6.747483359920787e-08, + "loss": 3.7865, + "step": 2091500 + }, + { + "epoch": 23.02, + "learning_rate": 6.746108146762747e-08, + "loss": 3.8029, + "step": 2092000 + }, + { + "epoch": 23.02, + "learning_rate": 6.744732933604708e-08, + "loss": 3.7909, + "step": 2092500 + }, + { + "epoch": 23.03, + "learning_rate": 6.743357720446668e-08, + "loss": 3.7795, + "step": 2093000 + }, + { + "epoch": 23.03, + "learning_rate": 6.741982507288629e-08, + "loss": 3.7981, + "step": 2093500 + }, + { + "epoch": 23.04, + "learning_rate": 6.74060729413059e-08, + "loss": 3.7653, + "step": 2094000 + }, + { + "epoch": 23.04, + "learning_rate": 6.73923208097255e-08, + "loss": 3.7908, + "step": 2094500 + }, + { + "epoch": 23.05, + "learning_rate": 6.73785686781451e-08, + "loss": 3.8037, + "step": 2095000 + }, + { + "epoch": 23.05, + "learning_rate": 6.736481654656471e-08, + "loss": 3.8039, + "step": 2095500 + }, + { + "epoch": 23.06, + "learning_rate": 6.735106441498431e-08, + "loss": 3.7688, + "step": 2096000 + }, + { + "epoch": 23.07, + "learning_rate": 6.733731228340392e-08, + "loss": 3.7864, + "step": 2096500 + }, + { + "epoch": 23.07, + "learning_rate": 6.732356015182353e-08, + "loss": 3.8128, + "step": 2097000 + }, + { + "epoch": 23.08, + "learning_rate": 6.730980802024312e-08, + "loss": 3.8003, + "step": 2097500 + }, + { + "epoch": 23.08, + "learning_rate": 6.729605588866273e-08, + "loss": 3.7882, + "step": 2098000 + }, + { + "epoch": 23.09, + "learning_rate": 6.728230375708234e-08, + "loss": 3.7851, + "step": 2098500 + }, + { + "epoch": 23.09, + "learning_rate": 6.726855162550194e-08, + "loss": 3.804, + "step": 2099000 + }, + { + "epoch": 23.1, + "learning_rate": 6.725479949392155e-08, + "loss": 3.7985, + "step": 2099500 + }, + { + "epoch": 23.1, + "learning_rate": 6.724104736234116e-08, + "loss": 3.792, + "step": 2100000 + }, + { + "epoch": 23.11, + "learning_rate": 6.722729523076077e-08, + "loss": 3.7817, + "step": 2100500 + }, + { + "epoch": 23.11, + "learning_rate": 6.721354309918036e-08, + "loss": 3.7726, + "step": 2101000 + }, + { + "epoch": 23.12, + "learning_rate": 6.719979096759997e-08, + "loss": 3.7833, + "step": 2101500 + }, + { + "epoch": 23.13, + "learning_rate": 6.718603883601958e-08, + "loss": 3.7814, + "step": 2102000 + }, + { + "epoch": 23.13, + "learning_rate": 6.717228670443918e-08, + "loss": 3.798, + "step": 2102500 + }, + { + "epoch": 23.14, + "learning_rate": 6.715853457285879e-08, + "loss": 3.7779, + "step": 2103000 + }, + { + "epoch": 23.14, + "learning_rate": 6.71447824412784e-08, + "loss": 3.7791, + "step": 2103500 + }, + { + "epoch": 23.15, + "learning_rate": 6.7131030309698e-08, + "loss": 3.7828, + "step": 2104000 + }, + { + "epoch": 23.15, + "learning_rate": 6.71172781781176e-08, + "loss": 3.7968, + "step": 2104500 + }, + { + "epoch": 23.16, + "learning_rate": 6.710352604653721e-08, + "loss": 3.7775, + "step": 2105000 + }, + { + "epoch": 23.16, + "learning_rate": 6.708977391495682e-08, + "loss": 3.7672, + "step": 2105500 + }, + { + "epoch": 23.17, + "learning_rate": 6.707602178337643e-08, + "loss": 3.7741, + "step": 2106000 + }, + { + "epoch": 23.18, + "learning_rate": 6.706226965179603e-08, + "loss": 3.7777, + "step": 2106500 + }, + { + "epoch": 23.18, + "learning_rate": 6.704851752021564e-08, + "loss": 3.8072, + "step": 2107000 + }, + { + "epoch": 23.19, + "learning_rate": 6.703476538863525e-08, + "loss": 3.7885, + "step": 2107500 + }, + { + "epoch": 23.19, + "learning_rate": 6.702101325705484e-08, + "loss": 3.7898, + "step": 2108000 + }, + { + "epoch": 23.2, + "learning_rate": 6.700726112547445e-08, + "loss": 3.7962, + "step": 2108500 + }, + { + "epoch": 23.2, + "learning_rate": 6.699350899389406e-08, + "loss": 3.7817, + "step": 2109000 + }, + { + "epoch": 23.21, + "learning_rate": 6.697975686231366e-08, + "loss": 3.7932, + "step": 2109500 + }, + { + "epoch": 23.21, + "learning_rate": 6.696600473073327e-08, + "loss": 3.7787, + "step": 2110000 + }, + { + "epoch": 23.22, + "learning_rate": 6.695225259915288e-08, + "loss": 3.7838, + "step": 2110500 + }, + { + "epoch": 23.22, + "learning_rate": 6.693850046757247e-08, + "loss": 3.7731, + "step": 2111000 + }, + { + "epoch": 23.23, + "learning_rate": 6.692474833599208e-08, + "loss": 3.7806, + "step": 2111500 + }, + { + "epoch": 23.24, + "learning_rate": 6.691099620441169e-08, + "loss": 3.779, + "step": 2112000 + }, + { + "epoch": 23.24, + "learning_rate": 6.689724407283129e-08, + "loss": 3.7721, + "step": 2112500 + }, + { + "epoch": 23.25, + "learning_rate": 6.68834919412509e-08, + "loss": 3.7955, + "step": 2113000 + }, + { + "epoch": 23.25, + "learning_rate": 6.68697398096705e-08, + "loss": 3.7856, + "step": 2113500 + }, + { + "epoch": 23.26, + "learning_rate": 6.68559876780901e-08, + "loss": 3.7926, + "step": 2114000 + }, + { + "epoch": 23.26, + "learning_rate": 6.684223554650971e-08, + "loss": 3.7946, + "step": 2114500 + }, + { + "epoch": 23.27, + "learning_rate": 6.682848341492932e-08, + "loss": 3.7947, + "step": 2115000 + }, + { + "epoch": 23.27, + "learning_rate": 6.681473128334892e-08, + "loss": 3.7704, + "step": 2115500 + }, + { + "epoch": 23.28, + "learning_rate": 6.680097915176852e-08, + "loss": 3.7955, + "step": 2116000 + }, + { + "epoch": 23.29, + "learning_rate": 6.678722702018813e-08, + "loss": 3.7845, + "step": 2116500 + }, + { + "epoch": 23.29, + "learning_rate": 6.677347488860773e-08, + "loss": 3.7842, + "step": 2117000 + }, + { + "epoch": 23.3, + "learning_rate": 6.675972275702734e-08, + "loss": 3.7909, + "step": 2117500 + }, + { + "epoch": 23.3, + "learning_rate": 6.674597062544695e-08, + "loss": 3.7703, + "step": 2118000 + }, + { + "epoch": 23.31, + "learning_rate": 6.673221849386655e-08, + "loss": 3.7604, + "step": 2118500 + }, + { + "epoch": 23.31, + "learning_rate": 6.671846636228615e-08, + "loss": 3.782, + "step": 2119000 + }, + { + "epoch": 23.32, + "learning_rate": 6.670471423070576e-08, + "loss": 3.7961, + "step": 2119500 + }, + { + "epoch": 23.32, + "learning_rate": 6.669096209912536e-08, + "loss": 3.7941, + "step": 2120000 + }, + { + "epoch": 23.33, + "learning_rate": 6.667720996754497e-08, + "loss": 3.7854, + "step": 2120500 + }, + { + "epoch": 23.33, + "learning_rate": 6.666345783596458e-08, + "loss": 3.8026, + "step": 2121000 + }, + { + "epoch": 23.34, + "learning_rate": 6.664970570438417e-08, + "loss": 3.7924, + "step": 2121500 + }, + { + "epoch": 23.35, + "learning_rate": 6.663595357280378e-08, + "loss": 3.7815, + "step": 2122000 + }, + { + "epoch": 23.35, + "learning_rate": 6.66222014412234e-08, + "loss": 3.7973, + "step": 2122500 + }, + { + "epoch": 23.36, + "learning_rate": 6.660844930964299e-08, + "loss": 3.7851, + "step": 2123000 + }, + { + "epoch": 23.36, + "learning_rate": 6.65946971780626e-08, + "loss": 3.7672, + "step": 2123500 + }, + { + "epoch": 23.37, + "learning_rate": 6.658094504648221e-08, + "loss": 3.792, + "step": 2124000 + }, + { + "epoch": 23.37, + "learning_rate": 6.65671929149018e-08, + "loss": 3.791, + "step": 2124500 + }, + { + "epoch": 23.38, + "learning_rate": 6.655344078332141e-08, + "loss": 3.7732, + "step": 2125000 + }, + { + "epoch": 23.38, + "learning_rate": 6.653968865174102e-08, + "loss": 3.7763, + "step": 2125500 + }, + { + "epoch": 23.39, + "learning_rate": 6.652593652016062e-08, + "loss": 3.7699, + "step": 2126000 + }, + { + "epoch": 23.4, + "learning_rate": 6.651218438858023e-08, + "loss": 3.8097, + "step": 2126500 + }, + { + "epoch": 23.4, + "learning_rate": 6.649843225699984e-08, + "loss": 3.8056, + "step": 2127000 + }, + { + "epoch": 23.41, + "learning_rate": 6.648468012541943e-08, + "loss": 3.7866, + "step": 2127500 + }, + { + "epoch": 23.41, + "learning_rate": 6.647092799383904e-08, + "loss": 3.7866, + "step": 2128000 + }, + { + "epoch": 23.42, + "learning_rate": 6.645717586225865e-08, + "loss": 3.7972, + "step": 2128500 + }, + { + "epoch": 23.42, + "learning_rate": 6.644342373067825e-08, + "loss": 3.7713, + "step": 2129000 + }, + { + "epoch": 23.43, + "learning_rate": 6.642967159909786e-08, + "loss": 3.7773, + "step": 2129500 + }, + { + "epoch": 23.43, + "learning_rate": 6.641591946751747e-08, + "loss": 3.7944, + "step": 2130000 + }, + { + "epoch": 23.44, + "learning_rate": 6.640216733593706e-08, + "loss": 3.796, + "step": 2130500 + }, + { + "epoch": 23.44, + "learning_rate": 6.638841520435667e-08, + "loss": 3.78, + "step": 2131000 + }, + { + "epoch": 23.45, + "learning_rate": 6.637466307277628e-08, + "loss": 3.7835, + "step": 2131500 + }, + { + "epoch": 23.46, + "learning_rate": 6.636091094119588e-08, + "loss": 3.8021, + "step": 2132000 + }, + { + "epoch": 23.46, + "learning_rate": 6.634715880961549e-08, + "loss": 3.7906, + "step": 2132500 + }, + { + "epoch": 23.47, + "learning_rate": 6.63334066780351e-08, + "loss": 3.77, + "step": 2133000 + }, + { + "epoch": 23.47, + "learning_rate": 6.631965454645469e-08, + "loss": 3.7904, + "step": 2133500 + }, + { + "epoch": 23.48, + "learning_rate": 6.63059024148743e-08, + "loss": 3.7869, + "step": 2134000 + }, + { + "epoch": 23.48, + "learning_rate": 6.629215028329391e-08, + "loss": 3.8014, + "step": 2134500 + }, + { + "epoch": 23.49, + "learning_rate": 6.627839815171351e-08, + "loss": 3.801, + "step": 2135000 + }, + { + "epoch": 23.49, + "learning_rate": 6.626464602013312e-08, + "loss": 3.8017, + "step": 2135500 + }, + { + "epoch": 23.5, + "learning_rate": 6.625089388855273e-08, + "loss": 3.7829, + "step": 2136000 + }, + { + "epoch": 23.51, + "learning_rate": 6.623714175697232e-08, + "loss": 3.7841, + "step": 2136500 + }, + { + "epoch": 23.51, + "learning_rate": 6.622338962539193e-08, + "loss": 3.7978, + "step": 2137000 + }, + { + "epoch": 23.52, + "learning_rate": 6.620963749381154e-08, + "loss": 3.7788, + "step": 2137500 + }, + { + "epoch": 23.52, + "learning_rate": 6.619588536223114e-08, + "loss": 3.7865, + "step": 2138000 + }, + { + "epoch": 23.53, + "learning_rate": 6.618213323065075e-08, + "loss": 3.7776, + "step": 2138500 + }, + { + "epoch": 23.53, + "learning_rate": 6.616838109907036e-08, + "loss": 3.7965, + "step": 2139000 + }, + { + "epoch": 23.54, + "learning_rate": 6.615462896748995e-08, + "loss": 3.7865, + "step": 2139500 + }, + { + "epoch": 23.54, + "learning_rate": 6.614087683590956e-08, + "loss": 3.7859, + "step": 2140000 + }, + { + "epoch": 23.55, + "learning_rate": 6.612712470432917e-08, + "loss": 3.7904, + "step": 2140500 + }, + { + "epoch": 23.55, + "learning_rate": 6.611337257274877e-08, + "loss": 3.7755, + "step": 2141000 + }, + { + "epoch": 23.56, + "learning_rate": 6.609962044116838e-08, + "loss": 3.7803, + "step": 2141500 + }, + { + "epoch": 23.57, + "learning_rate": 6.608586830958797e-08, + "loss": 3.7904, + "step": 2142000 + }, + { + "epoch": 23.57, + "learning_rate": 6.607211617800758e-08, + "loss": 3.7813, + "step": 2142500 + }, + { + "epoch": 23.58, + "learning_rate": 6.605836404642719e-08, + "loss": 3.7805, + "step": 2143000 + }, + { + "epoch": 23.58, + "learning_rate": 6.604461191484679e-08, + "loss": 3.8034, + "step": 2143500 + }, + { + "epoch": 23.59, + "learning_rate": 6.60308597832664e-08, + "loss": 3.7744, + "step": 2144000 + }, + { + "epoch": 23.59, + "learning_rate": 6.6017107651686e-08, + "loss": 3.7755, + "step": 2144500 + }, + { + "epoch": 23.6, + "learning_rate": 6.60033555201056e-08, + "loss": 3.7819, + "step": 2145000 + }, + { + "epoch": 23.6, + "learning_rate": 6.598960338852521e-08, + "loss": 3.7644, + "step": 2145500 + }, + { + "epoch": 23.61, + "learning_rate": 6.597585125694482e-08, + "loss": 3.7802, + "step": 2146000 + }, + { + "epoch": 23.62, + "learning_rate": 6.596209912536442e-08, + "loss": 3.7869, + "step": 2146500 + }, + { + "epoch": 23.62, + "learning_rate": 6.594834699378403e-08, + "loss": 3.7928, + "step": 2147000 + }, + { + "epoch": 23.63, + "learning_rate": 6.593459486220364e-08, + "loss": 3.7752, + "step": 2147500 + }, + { + "epoch": 23.63, + "learning_rate": 6.592084273062324e-08, + "loss": 3.7859, + "step": 2148000 + }, + { + "epoch": 23.64, + "learning_rate": 6.590709059904284e-08, + "loss": 3.7979, + "step": 2148500 + }, + { + "epoch": 23.64, + "learning_rate": 6.589333846746245e-08, + "loss": 3.7873, + "step": 2149000 + }, + { + "epoch": 23.65, + "learning_rate": 6.587958633588206e-08, + "loss": 3.7993, + "step": 2149500 + }, + { + "epoch": 23.65, + "learning_rate": 6.586583420430166e-08, + "loss": 3.7738, + "step": 2150000 + }, + { + "epoch": 23.66, + "learning_rate": 6.585208207272127e-08, + "loss": 3.7902, + "step": 2150500 + }, + { + "epoch": 23.66, + "learning_rate": 6.583832994114087e-08, + "loss": 3.7978, + "step": 2151000 + }, + { + "epoch": 23.67, + "learning_rate": 6.582457780956048e-08, + "loss": 3.7804, + "step": 2151500 + }, + { + "epoch": 23.68, + "learning_rate": 6.581082567798008e-08, + "loss": 3.7932, + "step": 2152000 + }, + { + "epoch": 23.68, + "learning_rate": 6.579707354639969e-08, + "loss": 3.7693, + "step": 2152500 + }, + { + "epoch": 23.69, + "learning_rate": 6.57833214148193e-08, + "loss": 3.787, + "step": 2153000 + }, + { + "epoch": 23.69, + "learning_rate": 6.576956928323891e-08, + "loss": 3.7787, + "step": 2153500 + }, + { + "epoch": 23.7, + "learning_rate": 6.57558171516585e-08, + "loss": 3.7675, + "step": 2154000 + }, + { + "epoch": 23.7, + "learning_rate": 6.574206502007811e-08, + "loss": 3.7807, + "step": 2154500 + }, + { + "epoch": 23.71, + "learning_rate": 6.572831288849772e-08, + "loss": 3.7665, + "step": 2155000 + }, + { + "epoch": 23.71, + "learning_rate": 6.571456075691732e-08, + "loss": 3.8003, + "step": 2155500 + }, + { + "epoch": 23.72, + "learning_rate": 6.570080862533693e-08, + "loss": 3.7891, + "step": 2156000 + }, + { + "epoch": 23.73, + "learning_rate": 6.568705649375654e-08, + "loss": 3.7925, + "step": 2156500 + }, + { + "epoch": 23.73, + "learning_rate": 6.567330436217613e-08, + "loss": 3.7628, + "step": 2157000 + }, + { + "epoch": 23.74, + "learning_rate": 6.565955223059574e-08, + "loss": 3.7741, + "step": 2157500 + }, + { + "epoch": 23.74, + "learning_rate": 6.564580009901535e-08, + "loss": 3.7682, + "step": 2158000 + }, + { + "epoch": 23.75, + "learning_rate": 6.563204796743495e-08, + "loss": 3.7939, + "step": 2158500 + }, + { + "epoch": 23.75, + "learning_rate": 6.561829583585456e-08, + "loss": 3.7871, + "step": 2159000 + }, + { + "epoch": 23.76, + "learning_rate": 6.560454370427417e-08, + "loss": 3.7872, + "step": 2159500 + }, + { + "epoch": 23.76, + "learning_rate": 6.559079157269376e-08, + "loss": 3.7955, + "step": 2160000 + }, + { + "epoch": 23.77, + "learning_rate": 6.557703944111337e-08, + "loss": 3.771, + "step": 2160500 + }, + { + "epoch": 23.77, + "learning_rate": 6.556328730953298e-08, + "loss": 3.7673, + "step": 2161000 + }, + { + "epoch": 23.78, + "learning_rate": 6.554953517795258e-08, + "loss": 3.7961, + "step": 2161500 + }, + { + "epoch": 23.79, + "learning_rate": 6.553578304637219e-08, + "loss": 3.7783, + "step": 2162000 + }, + { + "epoch": 23.79, + "learning_rate": 6.55220309147918e-08, + "loss": 3.7833, + "step": 2162500 + }, + { + "epoch": 23.8, + "learning_rate": 6.550827878321139e-08, + "loss": 3.7698, + "step": 2163000 + }, + { + "epoch": 23.8, + "learning_rate": 6.5494526651631e-08, + "loss": 3.7815, + "step": 2163500 + }, + { + "epoch": 23.81, + "learning_rate": 6.548077452005061e-08, + "loss": 3.7752, + "step": 2164000 + }, + { + "epoch": 23.81, + "learning_rate": 6.546702238847021e-08, + "loss": 3.778, + "step": 2164500 + }, + { + "epoch": 23.82, + "learning_rate": 6.545327025688982e-08, + "loss": 3.7862, + "step": 2165000 + }, + { + "epoch": 23.82, + "learning_rate": 6.543951812530943e-08, + "loss": 3.7898, + "step": 2165500 + }, + { + "epoch": 23.83, + "learning_rate": 6.542576599372902e-08, + "loss": 3.777, + "step": 2166000 + }, + { + "epoch": 23.84, + "learning_rate": 6.541201386214863e-08, + "loss": 3.7993, + "step": 2166500 + }, + { + "epoch": 23.84, + "learning_rate": 6.539826173056824e-08, + "loss": 3.77, + "step": 2167000 + }, + { + "epoch": 23.85, + "learning_rate": 6.538450959898784e-08, + "loss": 3.7857, + "step": 2167500 + }, + { + "epoch": 23.85, + "learning_rate": 6.537075746740745e-08, + "loss": 3.7901, + "step": 2168000 + }, + { + "epoch": 23.86, + "learning_rate": 6.535700533582706e-08, + "loss": 3.7658, + "step": 2168500 + }, + { + "epoch": 23.86, + "learning_rate": 6.534325320424665e-08, + "loss": 3.8108, + "step": 2169000 + }, + { + "epoch": 23.87, + "learning_rate": 6.532950107266626e-08, + "loss": 3.7875, + "step": 2169500 + }, + { + "epoch": 23.87, + "learning_rate": 6.531574894108587e-08, + "loss": 3.7712, + "step": 2170000 + }, + { + "epoch": 23.88, + "learning_rate": 6.530199680950547e-08, + "loss": 3.7845, + "step": 2170500 + }, + { + "epoch": 23.88, + "learning_rate": 6.528824467792508e-08, + "loss": 3.7862, + "step": 2171000 + }, + { + "epoch": 23.89, + "learning_rate": 6.527449254634469e-08, + "loss": 3.7958, + "step": 2171500 + }, + { + "epoch": 23.9, + "learning_rate": 6.526074041476428e-08, + "loss": 3.785, + "step": 2172000 + }, + { + "epoch": 23.9, + "learning_rate": 6.524698828318389e-08, + "loss": 3.784, + "step": 2172500 + }, + { + "epoch": 23.91, + "learning_rate": 6.52332361516035e-08, + "loss": 3.7767, + "step": 2173000 + }, + { + "epoch": 23.91, + "learning_rate": 6.52194840200231e-08, + "loss": 3.7828, + "step": 2173500 + }, + { + "epoch": 23.92, + "learning_rate": 6.52057318884427e-08, + "loss": 3.7802, + "step": 2174000 + }, + { + "epoch": 23.92, + "learning_rate": 6.519197975686232e-08, + "loss": 3.7831, + "step": 2174500 + }, + { + "epoch": 23.93, + "learning_rate": 6.517822762528191e-08, + "loss": 3.7759, + "step": 2175000 + }, + { + "epoch": 23.93, + "learning_rate": 6.516447549370152e-08, + "loss": 3.8091, + "step": 2175500 + }, + { + "epoch": 23.94, + "learning_rate": 6.515072336212113e-08, + "loss": 3.7912, + "step": 2176000 + }, + { + "epoch": 23.95, + "learning_rate": 6.513697123054073e-08, + "loss": 3.7834, + "step": 2176500 + }, + { + "epoch": 23.95, + "learning_rate": 6.512321909896034e-08, + "loss": 3.7651, + "step": 2177000 + }, + { + "epoch": 23.96, + "learning_rate": 6.510946696737994e-08, + "loss": 3.8108, + "step": 2177500 + }, + { + "epoch": 23.96, + "learning_rate": 6.509571483579954e-08, + "loss": 3.7605, + "step": 2178000 + }, + { + "epoch": 23.97, + "learning_rate": 6.508196270421915e-08, + "loss": 3.7836, + "step": 2178500 + }, + { + "epoch": 23.97, + "learning_rate": 6.506821057263876e-08, + "loss": 3.803, + "step": 2179000 + }, + { + "epoch": 23.98, + "learning_rate": 6.505445844105836e-08, + "loss": 3.7886, + "step": 2179500 + }, + { + "epoch": 23.98, + "learning_rate": 6.504070630947797e-08, + "loss": 3.7975, + "step": 2180000 + }, + { + "epoch": 23.99, + "learning_rate": 6.502695417789757e-08, + "loss": 3.7913, + "step": 2180500 + }, + { + "epoch": 23.99, + "learning_rate": 6.501320204631717e-08, + "loss": 3.7933, + "step": 2181000 + }, + { + "epoch": 24.0, + "eval_loss": 3.8459019660949707, + "eval_runtime": 6.1331, + "eval_samples_per_second": 253.378, + "step": 2181480 + }, + { + "epoch": 24.0, + "learning_rate": 6.499944991473678e-08, + "loss": 3.7727, + "step": 2181500 + }, + { + "epoch": 24.01, + "learning_rate": 6.498569778315639e-08, + "loss": 3.7902, + "step": 2182000 + }, + { + "epoch": 24.01, + "learning_rate": 6.497194565157599e-08, + "loss": 3.8013, + "step": 2182500 + }, + { + "epoch": 24.02, + "learning_rate": 6.49581935199956e-08, + "loss": 3.7888, + "step": 2183000 + }, + { + "epoch": 24.02, + "learning_rate": 6.49444413884152e-08, + "loss": 3.7796, + "step": 2183500 + }, + { + "epoch": 24.03, + "learning_rate": 6.49306892568348e-08, + "loss": 3.7925, + "step": 2184000 + }, + { + "epoch": 24.03, + "learning_rate": 6.491693712525441e-08, + "loss": 3.7916, + "step": 2184500 + }, + { + "epoch": 24.04, + "learning_rate": 6.490318499367402e-08, + "loss": 3.7738, + "step": 2185000 + }, + { + "epoch": 24.04, + "learning_rate": 6.488943286209361e-08, + "loss": 3.785, + "step": 2185500 + }, + { + "epoch": 24.05, + "learning_rate": 6.487568073051322e-08, + "loss": 3.7681, + "step": 2186000 + }, + { + "epoch": 24.06, + "learning_rate": 6.486192859893283e-08, + "loss": 3.7897, + "step": 2186500 + }, + { + "epoch": 24.06, + "learning_rate": 6.484817646735243e-08, + "loss": 3.7961, + "step": 2187000 + }, + { + "epoch": 24.07, + "learning_rate": 6.483442433577204e-08, + "loss": 3.7765, + "step": 2187500 + }, + { + "epoch": 24.07, + "learning_rate": 6.482067220419165e-08, + "loss": 3.7851, + "step": 2188000 + }, + { + "epoch": 24.08, + "learning_rate": 6.480692007261124e-08, + "loss": 3.7912, + "step": 2188500 + }, + { + "epoch": 24.08, + "learning_rate": 6.479316794103085e-08, + "loss": 3.8018, + "step": 2189000 + }, + { + "epoch": 24.09, + "learning_rate": 6.477941580945046e-08, + "loss": 3.7736, + "step": 2189500 + }, + { + "epoch": 24.09, + "learning_rate": 6.476566367787006e-08, + "loss": 3.8048, + "step": 2190000 + }, + { + "epoch": 24.1, + "learning_rate": 6.475191154628967e-08, + "loss": 3.7711, + "step": 2190500 + }, + { + "epoch": 24.1, + "learning_rate": 6.473815941470928e-08, + "loss": 3.7748, + "step": 2191000 + }, + { + "epoch": 24.11, + "learning_rate": 6.472440728312887e-08, + "loss": 3.7644, + "step": 2191500 + }, + { + "epoch": 24.12, + "learning_rate": 6.471065515154848e-08, + "loss": 3.7776, + "step": 2192000 + }, + { + "epoch": 24.12, + "learning_rate": 6.469690301996809e-08, + "loss": 3.7648, + "step": 2192500 + }, + { + "epoch": 24.13, + "learning_rate": 6.468315088838769e-08, + "loss": 3.7914, + "step": 2193000 + }, + { + "epoch": 24.13, + "learning_rate": 6.46693987568073e-08, + "loss": 3.7858, + "step": 2193500 + }, + { + "epoch": 24.14, + "learning_rate": 6.465564662522691e-08, + "loss": 3.7813, + "step": 2194000 + }, + { + "epoch": 24.14, + "learning_rate": 6.46418944936465e-08, + "loss": 3.7772, + "step": 2194500 + }, + { + "epoch": 24.15, + "learning_rate": 6.462814236206611e-08, + "loss": 3.7733, + "step": 2195000 + }, + { + "epoch": 24.15, + "learning_rate": 6.461439023048572e-08, + "loss": 3.7552, + "step": 2195500 + }, + { + "epoch": 24.16, + "learning_rate": 6.460063809890532e-08, + "loss": 3.7866, + "step": 2196000 + }, + { + "epoch": 24.17, + "learning_rate": 6.458688596732493e-08, + "loss": 3.7733, + "step": 2196500 + }, + { + "epoch": 24.17, + "learning_rate": 6.457313383574454e-08, + "loss": 3.7834, + "step": 2197000 + }, + { + "epoch": 24.18, + "learning_rate": 6.455938170416413e-08, + "loss": 3.7992, + "step": 2197500 + }, + { + "epoch": 24.18, + "learning_rate": 6.454562957258374e-08, + "loss": 3.7752, + "step": 2198000 + }, + { + "epoch": 24.19, + "learning_rate": 6.453187744100335e-08, + "loss": 3.7766, + "step": 2198500 + }, + { + "epoch": 24.19, + "learning_rate": 6.451812530942296e-08, + "loss": 3.7866, + "step": 2199000 + }, + { + "epoch": 24.2, + "learning_rate": 6.450437317784256e-08, + "loss": 3.7986, + "step": 2199500 + }, + { + "epoch": 24.2, + "learning_rate": 6.449062104626217e-08, + "loss": 3.8012, + "step": 2200000 + }, + { + "epoch": 24.21, + "learning_rate": 6.447686891468178e-08, + "loss": 3.7978, + "step": 2200500 + }, + { + "epoch": 24.21, + "learning_rate": 6.446311678310139e-08, + "loss": 3.7853, + "step": 2201000 + }, + { + "epoch": 24.22, + "learning_rate": 6.444936465152098e-08, + "loss": 3.7722, + "step": 2201500 + }, + { + "epoch": 24.23, + "learning_rate": 6.443561251994059e-08, + "loss": 3.7814, + "step": 2202000 + }, + { + "epoch": 24.23, + "learning_rate": 6.44218603883602e-08, + "loss": 3.8017, + "step": 2202500 + }, + { + "epoch": 24.24, + "learning_rate": 6.44081082567798e-08, + "loss": 3.7893, + "step": 2203000 + }, + { + "epoch": 24.24, + "learning_rate": 6.43943561251994e-08, + "loss": 3.7741, + "step": 2203500 + }, + { + "epoch": 24.25, + "learning_rate": 6.438060399361902e-08, + "loss": 3.7874, + "step": 2204000 + }, + { + "epoch": 24.25, + "learning_rate": 6.436685186203862e-08, + "loss": 3.7698, + "step": 2204500 + }, + { + "epoch": 24.26, + "learning_rate": 6.435309973045822e-08, + "loss": 3.7822, + "step": 2205000 + }, + { + "epoch": 24.26, + "learning_rate": 6.433934759887783e-08, + "loss": 3.7918, + "step": 2205500 + }, + { + "epoch": 24.27, + "learning_rate": 6.432559546729744e-08, + "loss": 3.7934, + "step": 2206000 + }, + { + "epoch": 24.28, + "learning_rate": 6.431184333571704e-08, + "loss": 3.8104, + "step": 2206500 + }, + { + "epoch": 24.28, + "learning_rate": 6.429809120413664e-08, + "loss": 3.782, + "step": 2207000 + }, + { + "epoch": 24.29, + "learning_rate": 6.428433907255625e-08, + "loss": 3.7724, + "step": 2207500 + }, + { + "epoch": 24.29, + "learning_rate": 6.427058694097585e-08, + "loss": 3.7681, + "step": 2208000 + }, + { + "epoch": 24.3, + "learning_rate": 6.425683480939546e-08, + "loss": 3.7791, + "step": 2208500 + }, + { + "epoch": 24.3, + "learning_rate": 6.424308267781507e-08, + "loss": 3.7833, + "step": 2209000 + }, + { + "epoch": 24.31, + "learning_rate": 6.422933054623466e-08, + "loss": 3.7919, + "step": 2209500 + }, + { + "epoch": 24.31, + "learning_rate": 6.421557841465427e-08, + "loss": 3.7678, + "step": 2210000 + }, + { + "epoch": 24.32, + "learning_rate": 6.420182628307388e-08, + "loss": 3.7824, + "step": 2210500 + }, + { + "epoch": 24.32, + "learning_rate": 6.418807415149348e-08, + "loss": 3.7914, + "step": 2211000 + }, + { + "epoch": 24.33, + "learning_rate": 6.417432201991309e-08, + "loss": 3.785, + "step": 2211500 + }, + { + "epoch": 24.34, + "learning_rate": 6.41605698883327e-08, + "loss": 3.7755, + "step": 2212000 + }, + { + "epoch": 24.34, + "learning_rate": 6.41468177567523e-08, + "loss": 3.7889, + "step": 2212500 + }, + { + "epoch": 24.35, + "learning_rate": 6.41330656251719e-08, + "loss": 3.7747, + "step": 2213000 + }, + { + "epoch": 24.35, + "learning_rate": 6.411931349359151e-08, + "loss": 3.7758, + "step": 2213500 + }, + { + "epoch": 24.36, + "learning_rate": 6.410556136201111e-08, + "loss": 3.7724, + "step": 2214000 + }, + { + "epoch": 24.36, + "learning_rate": 6.409180923043072e-08, + "loss": 3.7604, + "step": 2214500 + }, + { + "epoch": 24.37, + "learning_rate": 6.407805709885033e-08, + "loss": 3.8005, + "step": 2215000 + }, + { + "epoch": 24.37, + "learning_rate": 6.406430496726992e-08, + "loss": 3.7671, + "step": 2215500 + }, + { + "epoch": 24.38, + "learning_rate": 6.405055283568953e-08, + "loss": 3.8044, + "step": 2216000 + }, + { + "epoch": 24.39, + "learning_rate": 6.403680070410913e-08, + "loss": 3.7939, + "step": 2216500 + }, + { + "epoch": 24.39, + "learning_rate": 6.402304857252874e-08, + "loss": 3.7879, + "step": 2217000 + }, + { + "epoch": 24.4, + "learning_rate": 6.400929644094835e-08, + "loss": 3.7789, + "step": 2217500 + }, + { + "epoch": 24.4, + "learning_rate": 6.399554430936794e-08, + "loss": 3.7805, + "step": 2218000 + }, + { + "epoch": 24.41, + "learning_rate": 6.398179217778755e-08, + "loss": 3.782, + "step": 2218500 + }, + { + "epoch": 24.41, + "learning_rate": 6.396804004620716e-08, + "loss": 3.7824, + "step": 2219000 + }, + { + "epoch": 24.42, + "learning_rate": 6.395428791462676e-08, + "loss": 3.7862, + "step": 2219500 + }, + { + "epoch": 24.42, + "learning_rate": 6.394053578304637e-08, + "loss": 3.783, + "step": 2220000 + }, + { + "epoch": 24.43, + "learning_rate": 6.392678365146598e-08, + "loss": 3.7777, + "step": 2220500 + }, + { + "epoch": 24.43, + "learning_rate": 6.391303151988557e-08, + "loss": 3.793, + "step": 2221000 + }, + { + "epoch": 24.44, + "learning_rate": 6.389927938830518e-08, + "loss": 3.7864, + "step": 2221500 + }, + { + "epoch": 24.45, + "learning_rate": 6.388552725672479e-08, + "loss": 3.7589, + "step": 2222000 + }, + { + "epoch": 24.45, + "learning_rate": 6.387177512514439e-08, + "loss": 3.7835, + "step": 2222500 + }, + { + "epoch": 24.46, + "learning_rate": 6.3858022993564e-08, + "loss": 3.8005, + "step": 2223000 + }, + { + "epoch": 24.46, + "learning_rate": 6.384427086198361e-08, + "loss": 3.7688, + "step": 2223500 + }, + { + "epoch": 24.47, + "learning_rate": 6.38305187304032e-08, + "loss": 3.7897, + "step": 2224000 + }, + { + "epoch": 24.47, + "learning_rate": 6.381676659882281e-08, + "loss": 3.7793, + "step": 2224500 + }, + { + "epoch": 24.48, + "learning_rate": 6.380301446724242e-08, + "loss": 3.7966, + "step": 2225000 + }, + { + "epoch": 24.48, + "learning_rate": 6.378926233566202e-08, + "loss": 3.7577, + "step": 2225500 + }, + { + "epoch": 24.49, + "learning_rate": 6.377551020408163e-08, + "loss": 3.7956, + "step": 2226000 + }, + { + "epoch": 24.5, + "learning_rate": 6.376175807250124e-08, + "loss": 3.792, + "step": 2226500 + }, + { + "epoch": 24.5, + "learning_rate": 6.374800594092083e-08, + "loss": 3.7719, + "step": 2227000 + }, + { + "epoch": 24.51, + "learning_rate": 6.373425380934044e-08, + "loss": 3.8003, + "step": 2227500 + }, + { + "epoch": 24.51, + "learning_rate": 6.372050167776005e-08, + "loss": 3.7854, + "step": 2228000 + }, + { + "epoch": 24.52, + "learning_rate": 6.370674954617965e-08, + "loss": 3.7709, + "step": 2228500 + }, + { + "epoch": 24.52, + "learning_rate": 6.369299741459926e-08, + "loss": 3.7885, + "step": 2229000 + }, + { + "epoch": 24.53, + "learning_rate": 6.367924528301887e-08, + "loss": 3.7725, + "step": 2229500 + }, + { + "epoch": 24.53, + "learning_rate": 6.366549315143846e-08, + "loss": 3.773, + "step": 2230000 + }, + { + "epoch": 24.54, + "learning_rate": 6.365174101985807e-08, + "loss": 3.8165, + "step": 2230500 + }, + { + "epoch": 24.54, + "learning_rate": 6.363798888827768e-08, + "loss": 3.7903, + "step": 2231000 + }, + { + "epoch": 24.55, + "learning_rate": 6.362423675669728e-08, + "loss": 3.7867, + "step": 2231500 + }, + { + "epoch": 24.56, + "learning_rate": 6.361048462511689e-08, + "loss": 3.7889, + "step": 2232000 + }, + { + "epoch": 24.56, + "learning_rate": 6.35967324935365e-08, + "loss": 3.7864, + "step": 2232500 + }, + { + "epoch": 24.57, + "learning_rate": 6.358298036195609e-08, + "loss": 3.7649, + "step": 2233000 + }, + { + "epoch": 24.57, + "learning_rate": 6.35692282303757e-08, + "loss": 3.7964, + "step": 2233500 + }, + { + "epoch": 24.58, + "learning_rate": 6.355547609879531e-08, + "loss": 3.7768, + "step": 2234000 + }, + { + "epoch": 24.58, + "learning_rate": 6.354172396721491e-08, + "loss": 3.7996, + "step": 2234500 + }, + { + "epoch": 24.59, + "learning_rate": 6.352797183563452e-08, + "loss": 3.7782, + "step": 2235000 + }, + { + "epoch": 24.59, + "learning_rate": 6.351421970405413e-08, + "loss": 3.7833, + "step": 2235500 + }, + { + "epoch": 24.6, + "learning_rate": 6.350046757247372e-08, + "loss": 3.7789, + "step": 2236000 + }, + { + "epoch": 24.61, + "learning_rate": 6.348671544089333e-08, + "loss": 3.7888, + "step": 2236500 + }, + { + "epoch": 24.61, + "learning_rate": 6.347296330931294e-08, + "loss": 3.7814, + "step": 2237000 + }, + { + "epoch": 24.62, + "learning_rate": 6.345921117773254e-08, + "loss": 3.7624, + "step": 2237500 + }, + { + "epoch": 24.62, + "learning_rate": 6.344545904615215e-08, + "loss": 3.7913, + "step": 2238000 + }, + { + "epoch": 24.63, + "learning_rate": 6.343170691457176e-08, + "loss": 3.7761, + "step": 2238500 + }, + { + "epoch": 24.63, + "learning_rate": 6.341795478299135e-08, + "loss": 3.7954, + "step": 2239000 + }, + { + "epoch": 24.64, + "learning_rate": 6.340420265141096e-08, + "loss": 3.7944, + "step": 2239500 + }, + { + "epoch": 24.64, + "learning_rate": 6.339045051983057e-08, + "loss": 3.7627, + "step": 2240000 + }, + { + "epoch": 24.65, + "learning_rate": 6.337669838825017e-08, + "loss": 3.7854, + "step": 2240500 + }, + { + "epoch": 24.65, + "learning_rate": 6.336294625666978e-08, + "loss": 3.7795, + "step": 2241000 + }, + { + "epoch": 24.66, + "learning_rate": 6.334919412508938e-08, + "loss": 3.7945, + "step": 2241500 + }, + { + "epoch": 24.67, + "learning_rate": 6.333544199350898e-08, + "loss": 3.7853, + "step": 2242000 + }, + { + "epoch": 24.67, + "learning_rate": 6.332168986192859e-08, + "loss": 3.7593, + "step": 2242500 + }, + { + "epoch": 24.68, + "learning_rate": 6.33079377303482e-08, + "loss": 3.7882, + "step": 2243000 + }, + { + "epoch": 24.68, + "learning_rate": 6.32941855987678e-08, + "loss": 3.7964, + "step": 2243500 + }, + { + "epoch": 24.69, + "learning_rate": 6.32804334671874e-08, + "loss": 3.7842, + "step": 2244000 + }, + { + "epoch": 24.69, + "learning_rate": 6.326668133560701e-08, + "loss": 3.7538, + "step": 2244500 + }, + { + "epoch": 24.7, + "learning_rate": 6.325292920402661e-08, + "loss": 3.7671, + "step": 2245000 + }, + { + "epoch": 24.7, + "learning_rate": 6.323917707244622e-08, + "loss": 3.7769, + "step": 2245500 + }, + { + "epoch": 24.71, + "learning_rate": 6.322542494086583e-08, + "loss": 3.7886, + "step": 2246000 + }, + { + "epoch": 24.72, + "learning_rate": 6.321167280928544e-08, + "loss": 3.7843, + "step": 2246500 + }, + { + "epoch": 24.72, + "learning_rate": 6.319792067770503e-08, + "loss": 3.7952, + "step": 2247000 + }, + { + "epoch": 24.73, + "learning_rate": 6.318416854612464e-08, + "loss": 3.7913, + "step": 2247500 + }, + { + "epoch": 24.73, + "learning_rate": 6.317041641454425e-08, + "loss": 3.7783, + "step": 2248000 + }, + { + "epoch": 24.74, + "learning_rate": 6.315666428296386e-08, + "loss": 3.781, + "step": 2248500 + }, + { + "epoch": 24.74, + "learning_rate": 6.314291215138346e-08, + "loss": 3.7849, + "step": 2249000 + }, + { + "epoch": 24.75, + "learning_rate": 6.312916001980307e-08, + "loss": 3.7823, + "step": 2249500 + }, + { + "epoch": 24.75, + "learning_rate": 6.311540788822268e-08, + "loss": 3.7945, + "step": 2250000 + }, + { + "epoch": 24.76, + "learning_rate": 6.310165575664227e-08, + "loss": 3.772, + "step": 2250500 + }, + { + "epoch": 24.76, + "learning_rate": 6.308790362506188e-08, + "loss": 3.7838, + "step": 2251000 + }, + { + "epoch": 24.77, + "learning_rate": 6.307415149348149e-08, + "loss": 3.784, + "step": 2251500 + }, + { + "epoch": 24.78, + "learning_rate": 6.30603993619011e-08, + "loss": 3.7837, + "step": 2252000 + }, + { + "epoch": 24.78, + "learning_rate": 6.30466472303207e-08, + "loss": 3.7702, + "step": 2252500 + }, + { + "epoch": 24.79, + "learning_rate": 6.303289509874031e-08, + "loss": 3.7945, + "step": 2253000 + }, + { + "epoch": 24.79, + "learning_rate": 6.301914296715992e-08, + "loss": 3.7806, + "step": 2253500 + }, + { + "epoch": 24.8, + "learning_rate": 6.300539083557951e-08, + "loss": 3.789, + "step": 2254000 + }, + { + "epoch": 24.8, + "learning_rate": 6.299163870399912e-08, + "loss": 3.7947, + "step": 2254500 + }, + { + "epoch": 24.81, + "learning_rate": 6.297788657241873e-08, + "loss": 3.7788, + "step": 2255000 + }, + { + "epoch": 24.81, + "learning_rate": 6.296413444083833e-08, + "loss": 3.7698, + "step": 2255500 + }, + { + "epoch": 24.82, + "learning_rate": 6.295038230925794e-08, + "loss": 3.791, + "step": 2256000 + }, + { + "epoch": 24.83, + "learning_rate": 6.293663017767755e-08, + "loss": 3.7946, + "step": 2256500 + }, + { + "epoch": 24.83, + "learning_rate": 6.292287804609714e-08, + "loss": 3.7807, + "step": 2257000 + }, + { + "epoch": 24.84, + "learning_rate": 6.290912591451675e-08, + "loss": 3.7608, + "step": 2257500 + }, + { + "epoch": 24.84, + "learning_rate": 6.289537378293636e-08, + "loss": 3.7852, + "step": 2258000 + }, + { + "epoch": 24.85, + "learning_rate": 6.288162165135596e-08, + "loss": 3.7601, + "step": 2258500 + }, + { + "epoch": 24.85, + "learning_rate": 6.286786951977557e-08, + "loss": 3.7698, + "step": 2259000 + }, + { + "epoch": 24.86, + "learning_rate": 6.285411738819518e-08, + "loss": 3.7934, + "step": 2259500 + }, + { + "epoch": 24.86, + "learning_rate": 6.284036525661477e-08, + "loss": 3.7819, + "step": 2260000 + }, + { + "epoch": 24.87, + "learning_rate": 6.282661312503438e-08, + "loss": 3.7845, + "step": 2260500 + }, + { + "epoch": 24.87, + "learning_rate": 6.281286099345399e-08, + "loss": 3.7846, + "step": 2261000 + }, + { + "epoch": 24.88, + "learning_rate": 6.279910886187359e-08, + "loss": 3.7928, + "step": 2261500 + }, + { + "epoch": 24.89, + "learning_rate": 6.27853567302932e-08, + "loss": 3.788, + "step": 2262000 + }, + { + "epoch": 24.89, + "learning_rate": 6.27716045987128e-08, + "loss": 3.7788, + "step": 2262500 + }, + { + "epoch": 24.9, + "learning_rate": 6.27578524671324e-08, + "loss": 3.7765, + "step": 2263000 + }, + { + "epoch": 24.9, + "learning_rate": 6.274410033555201e-08, + "loss": 3.7985, + "step": 2263500 + }, + { + "epoch": 24.91, + "learning_rate": 6.273034820397162e-08, + "loss": 3.7623, + "step": 2264000 + }, + { + "epoch": 24.91, + "learning_rate": 6.271659607239122e-08, + "loss": 3.7797, + "step": 2264500 + }, + { + "epoch": 24.92, + "learning_rate": 6.270284394081083e-08, + "loss": 3.7569, + "step": 2265000 + }, + { + "epoch": 24.92, + "learning_rate": 6.268909180923043e-08, + "loss": 3.7794, + "step": 2265500 + }, + { + "epoch": 24.93, + "learning_rate": 6.267533967765003e-08, + "loss": 3.7702, + "step": 2266000 + }, + { + "epoch": 24.94, + "learning_rate": 6.266158754606964e-08, + "loss": 3.7864, + "step": 2266500 + }, + { + "epoch": 24.94, + "learning_rate": 6.264783541448925e-08, + "loss": 3.7948, + "step": 2267000 + }, + { + "epoch": 24.95, + "learning_rate": 6.263408328290885e-08, + "loss": 3.7727, + "step": 2267500 + }, + { + "epoch": 24.95, + "learning_rate": 6.262033115132846e-08, + "loss": 3.7818, + "step": 2268000 + }, + { + "epoch": 24.96, + "learning_rate": 6.260657901974806e-08, + "loss": 3.7972, + "step": 2268500 + }, + { + "epoch": 24.96, + "learning_rate": 6.259282688816766e-08, + "loss": 3.7806, + "step": 2269000 + }, + { + "epoch": 24.97, + "learning_rate": 6.257907475658727e-08, + "loss": 3.7722, + "step": 2269500 + }, + { + "epoch": 24.97, + "learning_rate": 6.256532262500688e-08, + "loss": 3.7786, + "step": 2270000 + }, + { + "epoch": 24.98, + "learning_rate": 6.255157049342648e-08, + "loss": 3.7767, + "step": 2270500 + }, + { + "epoch": 24.98, + "learning_rate": 6.253781836184608e-08, + "loss": 3.787, + "step": 2271000 + }, + { + "epoch": 24.99, + "learning_rate": 6.25240662302657e-08, + "loss": 3.7705, + "step": 2271500 + }, + { + "epoch": 25.0, + "learning_rate": 6.251031409868529e-08, + "loss": 3.7566, + "step": 2272000 + }, + { + "epoch": 25.0, + "eval_loss": 3.8438775539398193, + "eval_runtime": 6.1326, + "eval_samples_per_second": 253.401, + "step": 2272375 + }, + { + "epoch": 25.0, + "learning_rate": 6.24965619671049e-08, + "loss": 3.7873, + "step": 2272500 + }, + { + "epoch": 25.01, + "learning_rate": 6.248280983552451e-08, + "loss": 3.7669, + "step": 2273000 + }, + { + "epoch": 25.01, + "learning_rate": 6.24690577039441e-08, + "loss": 3.7735, + "step": 2273500 + }, + { + "epoch": 25.02, + "learning_rate": 6.245530557236371e-08, + "loss": 3.7649, + "step": 2274000 + }, + { + "epoch": 25.02, + "learning_rate": 6.244155344078332e-08, + "loss": 3.7931, + "step": 2274500 + }, + { + "epoch": 25.03, + "learning_rate": 6.242780130920292e-08, + "loss": 3.7839, + "step": 2275000 + }, + { + "epoch": 25.03, + "learning_rate": 6.241404917762253e-08, + "loss": 3.8016, + "step": 2275500 + }, + { + "epoch": 25.04, + "learning_rate": 6.240029704604214e-08, + "loss": 3.7919, + "step": 2276000 + }, + { + "epoch": 25.05, + "learning_rate": 6.238654491446173e-08, + "loss": 3.752, + "step": 2276500 + }, + { + "epoch": 25.05, + "learning_rate": 6.237279278288134e-08, + "loss": 3.7826, + "step": 2277000 + }, + { + "epoch": 25.06, + "learning_rate": 6.235904065130095e-08, + "loss": 3.7753, + "step": 2277500 + }, + { + "epoch": 25.06, + "learning_rate": 6.234528851972055e-08, + "loss": 3.7775, + "step": 2278000 + }, + { + "epoch": 25.07, + "learning_rate": 6.233153638814016e-08, + "loss": 3.7891, + "step": 2278500 + }, + { + "epoch": 25.07, + "learning_rate": 6.231778425655977e-08, + "loss": 3.7868, + "step": 2279000 + }, + { + "epoch": 25.08, + "learning_rate": 6.230403212497936e-08, + "loss": 3.7845, + "step": 2279500 + }, + { + "epoch": 25.08, + "learning_rate": 6.229027999339897e-08, + "loss": 3.7854, + "step": 2280000 + }, + { + "epoch": 25.09, + "learning_rate": 6.227652786181858e-08, + "loss": 3.7873, + "step": 2280500 + }, + { + "epoch": 25.09, + "learning_rate": 6.226277573023818e-08, + "loss": 3.788, + "step": 2281000 + }, + { + "epoch": 25.1, + "learning_rate": 6.224902359865779e-08, + "loss": 3.7844, + "step": 2281500 + }, + { + "epoch": 25.11, + "learning_rate": 6.22352714670774e-08, + "loss": 3.7674, + "step": 2282000 + }, + { + "epoch": 25.11, + "learning_rate": 6.2221519335497e-08, + "loss": 3.7884, + "step": 2282500 + }, + { + "epoch": 25.12, + "learning_rate": 6.22077672039166e-08, + "loss": 3.7587, + "step": 2283000 + }, + { + "epoch": 25.12, + "learning_rate": 6.219401507233621e-08, + "loss": 3.7868, + "step": 2283500 + }, + { + "epoch": 25.13, + "learning_rate": 6.218026294075581e-08, + "loss": 3.774, + "step": 2284000 + }, + { + "epoch": 25.13, + "learning_rate": 6.216651080917542e-08, + "loss": 3.7733, + "step": 2284500 + }, + { + "epoch": 25.14, + "learning_rate": 6.215275867759503e-08, + "loss": 3.7893, + "step": 2285000 + }, + { + "epoch": 25.14, + "learning_rate": 6.213900654601462e-08, + "loss": 3.7815, + "step": 2285500 + }, + { + "epoch": 25.15, + "learning_rate": 6.212525441443423e-08, + "loss": 3.7774, + "step": 2286000 + }, + { + "epoch": 25.16, + "learning_rate": 6.211150228285384e-08, + "loss": 3.7847, + "step": 2286500 + }, + { + "epoch": 25.16, + "learning_rate": 6.209775015127344e-08, + "loss": 3.7967, + "step": 2287000 + }, + { + "epoch": 25.17, + "learning_rate": 6.208399801969305e-08, + "loss": 3.7801, + "step": 2287500 + }, + { + "epoch": 25.17, + "learning_rate": 6.207024588811266e-08, + "loss": 3.7852, + "step": 2288000 + }, + { + "epoch": 25.18, + "learning_rate": 6.205649375653225e-08, + "loss": 3.7871, + "step": 2288500 + }, + { + "epoch": 25.18, + "learning_rate": 6.204274162495186e-08, + "loss": 3.7657, + "step": 2289000 + }, + { + "epoch": 25.19, + "learning_rate": 6.202898949337147e-08, + "loss": 3.7727, + "step": 2289500 + }, + { + "epoch": 25.19, + "learning_rate": 6.201523736179107e-08, + "loss": 3.7613, + "step": 2290000 + }, + { + "epoch": 25.2, + "learning_rate": 6.200148523021068e-08, + "loss": 3.7813, + "step": 2290500 + }, + { + "epoch": 25.2, + "learning_rate": 6.198773309863029e-08, + "loss": 3.7844, + "step": 2291000 + }, + { + "epoch": 25.21, + "learning_rate": 6.197398096704988e-08, + "loss": 3.7852, + "step": 2291500 + }, + { + "epoch": 25.22, + "learning_rate": 6.196022883546949e-08, + "loss": 3.7711, + "step": 2292000 + }, + { + "epoch": 25.22, + "learning_rate": 6.194647670388909e-08, + "loss": 3.7706, + "step": 2292500 + }, + { + "epoch": 25.23, + "learning_rate": 6.19327245723087e-08, + "loss": 3.7737, + "step": 2293000 + }, + { + "epoch": 25.23, + "learning_rate": 6.191897244072831e-08, + "loss": 3.7703, + "step": 2293500 + }, + { + "epoch": 25.24, + "learning_rate": 6.190522030914792e-08, + "loss": 3.8006, + "step": 2294000 + }, + { + "epoch": 25.24, + "learning_rate": 6.189146817756751e-08, + "loss": 3.7648, + "step": 2294500 + }, + { + "epoch": 25.25, + "learning_rate": 6.187771604598712e-08, + "loss": 3.7828, + "step": 2295000 + }, + { + "epoch": 25.25, + "learning_rate": 6.186396391440673e-08, + "loss": 3.771, + "step": 2295500 + }, + { + "epoch": 25.26, + "learning_rate": 6.185021178282634e-08, + "loss": 3.7829, + "step": 2296000 + }, + { + "epoch": 25.27, + "learning_rate": 6.183645965124594e-08, + "loss": 3.7817, + "step": 2296500 + }, + { + "epoch": 25.27, + "learning_rate": 6.182270751966555e-08, + "loss": 3.7844, + "step": 2297000 + }, + { + "epoch": 25.28, + "learning_rate": 6.180895538808515e-08, + "loss": 3.7805, + "step": 2297500 + }, + { + "epoch": 25.28, + "learning_rate": 6.179520325650475e-08, + "loss": 3.773, + "step": 2298000 + }, + { + "epoch": 25.29, + "learning_rate": 6.178145112492436e-08, + "loss": 3.7693, + "step": 2298500 + }, + { + "epoch": 25.29, + "learning_rate": 6.176769899334397e-08, + "loss": 3.7696, + "step": 2299000 + }, + { + "epoch": 25.3, + "learning_rate": 6.175394686176358e-08, + "loss": 3.7855, + "step": 2299500 + }, + { + "epoch": 25.3, + "learning_rate": 6.174019473018318e-08, + "loss": 3.7852, + "step": 2300000 + }, + { + "epoch": 25.31, + "learning_rate": 6.172644259860278e-08, + "loss": 3.7705, + "step": 2300500 + }, + { + "epoch": 25.31, + "learning_rate": 6.17126904670224e-08, + "loss": 3.7708, + "step": 2301000 + }, + { + "epoch": 25.32, + "learning_rate": 6.169893833544199e-08, + "loss": 3.7663, + "step": 2301500 + }, + { + "epoch": 25.33, + "learning_rate": 6.16851862038616e-08, + "loss": 3.7739, + "step": 2302000 + }, + { + "epoch": 25.33, + "learning_rate": 6.167143407228121e-08, + "loss": 3.7757, + "step": 2302500 + }, + { + "epoch": 25.34, + "learning_rate": 6.16576819407008e-08, + "loss": 3.7826, + "step": 2303000 + }, + { + "epoch": 25.34, + "learning_rate": 6.164392980912041e-08, + "loss": 3.8014, + "step": 2303500 + }, + { + "epoch": 25.35, + "learning_rate": 6.163017767754002e-08, + "loss": 3.7793, + "step": 2304000 + }, + { + "epoch": 25.35, + "learning_rate": 6.161642554595962e-08, + "loss": 3.7651, + "step": 2304500 + }, + { + "epoch": 25.36, + "learning_rate": 6.160267341437923e-08, + "loss": 3.7919, + "step": 2305000 + }, + { + "epoch": 25.36, + "learning_rate": 6.158892128279884e-08, + "loss": 3.7844, + "step": 2305500 + }, + { + "epoch": 25.37, + "learning_rate": 6.157516915121843e-08, + "loss": 3.7775, + "step": 2306000 + }, + { + "epoch": 25.38, + "learning_rate": 6.156141701963804e-08, + "loss": 3.7775, + "step": 2306500 + }, + { + "epoch": 25.38, + "learning_rate": 6.154766488805765e-08, + "loss": 3.782, + "step": 2307000 + }, + { + "epoch": 25.39, + "learning_rate": 6.153391275647725e-08, + "loss": 3.7555, + "step": 2307500 + }, + { + "epoch": 25.39, + "learning_rate": 6.152016062489686e-08, + "loss": 3.8002, + "step": 2308000 + }, + { + "epoch": 25.4, + "learning_rate": 6.150640849331647e-08, + "loss": 3.7858, + "step": 2308500 + }, + { + "epoch": 25.4, + "learning_rate": 6.149265636173606e-08, + "loss": 3.7874, + "step": 2309000 + }, + { + "epoch": 25.41, + "learning_rate": 6.147890423015567e-08, + "loss": 3.7941, + "step": 2309500 + }, + { + "epoch": 25.41, + "learning_rate": 6.146515209857528e-08, + "loss": 3.7707, + "step": 2310000 + }, + { + "epoch": 25.42, + "learning_rate": 6.145139996699488e-08, + "loss": 3.7573, + "step": 2310500 + }, + { + "epoch": 25.42, + "learning_rate": 6.143764783541449e-08, + "loss": 3.7707, + "step": 2311000 + }, + { + "epoch": 25.43, + "learning_rate": 6.14238957038341e-08, + "loss": 3.7959, + "step": 2311500 + }, + { + "epoch": 25.44, + "learning_rate": 6.14101435722537e-08, + "loss": 3.7978, + "step": 2312000 + }, + { + "epoch": 25.44, + "learning_rate": 6.13963914406733e-08, + "loss": 3.7869, + "step": 2312500 + }, + { + "epoch": 25.45, + "learning_rate": 6.138263930909291e-08, + "loss": 3.7771, + "step": 2313000 + }, + { + "epoch": 25.45, + "learning_rate": 6.136888717751251e-08, + "loss": 3.7888, + "step": 2313500 + }, + { + "epoch": 25.46, + "learning_rate": 6.135513504593212e-08, + "loss": 3.7739, + "step": 2314000 + }, + { + "epoch": 25.46, + "learning_rate": 6.134138291435173e-08, + "loss": 3.7694, + "step": 2314500 + }, + { + "epoch": 25.47, + "learning_rate": 6.132763078277132e-08, + "loss": 3.7725, + "step": 2315000 + }, + { + "epoch": 25.47, + "learning_rate": 6.131387865119093e-08, + "loss": 3.7655, + "step": 2315500 + }, + { + "epoch": 25.48, + "learning_rate": 6.130012651961054e-08, + "loss": 3.7822, + "step": 2316000 + }, + { + "epoch": 25.49, + "learning_rate": 6.128637438803014e-08, + "loss": 3.7942, + "step": 2316500 + }, + { + "epoch": 25.49, + "learning_rate": 6.127262225644975e-08, + "loss": 3.7736, + "step": 2317000 + }, + { + "epoch": 25.5, + "learning_rate": 6.125887012486936e-08, + "loss": 3.795, + "step": 2317500 + }, + { + "epoch": 25.5, + "learning_rate": 6.124511799328895e-08, + "loss": 3.7659, + "step": 2318000 + }, + { + "epoch": 25.51, + "learning_rate": 6.123136586170856e-08, + "loss": 3.7899, + "step": 2318500 + }, + { + "epoch": 25.51, + "learning_rate": 6.121761373012817e-08, + "loss": 3.7795, + "step": 2319000 + }, + { + "epoch": 25.52, + "learning_rate": 6.120386159854777e-08, + "loss": 3.7932, + "step": 2319500 + }, + { + "epoch": 25.52, + "learning_rate": 6.119010946696738e-08, + "loss": 3.7747, + "step": 2320000 + }, + { + "epoch": 25.53, + "learning_rate": 6.117635733538699e-08, + "loss": 3.7987, + "step": 2320500 + }, + { + "epoch": 25.53, + "learning_rate": 6.116260520380658e-08, + "loss": 3.7687, + "step": 2321000 + }, + { + "epoch": 25.54, + "learning_rate": 6.114885307222619e-08, + "loss": 3.7621, + "step": 2321500 + }, + { + "epoch": 25.55, + "learning_rate": 6.11351009406458e-08, + "loss": 3.7836, + "step": 2322000 + }, + { + "epoch": 25.55, + "learning_rate": 6.11213488090654e-08, + "loss": 3.7598, + "step": 2322500 + }, + { + "epoch": 25.56, + "learning_rate": 6.1107596677485e-08, + "loss": 3.7756, + "step": 2323000 + }, + { + "epoch": 25.56, + "learning_rate": 6.109384454590462e-08, + "loss": 3.7616, + "step": 2323500 + }, + { + "epoch": 25.57, + "learning_rate": 6.108009241432421e-08, + "loss": 3.7796, + "step": 2324000 + }, + { + "epoch": 25.57, + "learning_rate": 6.106634028274382e-08, + "loss": 3.7985, + "step": 2324500 + }, + { + "epoch": 25.58, + "learning_rate": 6.105258815116343e-08, + "loss": 3.7815, + "step": 2325000 + }, + { + "epoch": 25.58, + "learning_rate": 6.103883601958303e-08, + "loss": 3.7842, + "step": 2325500 + }, + { + "epoch": 25.59, + "learning_rate": 6.102508388800264e-08, + "loss": 3.7893, + "step": 2326000 + }, + { + "epoch": 25.6, + "learning_rate": 6.101133175642225e-08, + "loss": 3.7794, + "step": 2326500 + }, + { + "epoch": 25.6, + "learning_rate": 6.099757962484184e-08, + "loss": 3.7716, + "step": 2327000 + }, + { + "epoch": 25.61, + "learning_rate": 6.098382749326145e-08, + "loss": 3.7854, + "step": 2327500 + }, + { + "epoch": 25.61, + "learning_rate": 6.097007536168106e-08, + "loss": 3.7813, + "step": 2328000 + }, + { + "epoch": 25.62, + "learning_rate": 6.095632323010066e-08, + "loss": 3.7766, + "step": 2328500 + }, + { + "epoch": 25.62, + "learning_rate": 6.094257109852027e-08, + "loss": 3.7656, + "step": 2329000 + }, + { + "epoch": 25.63, + "learning_rate": 6.092881896693987e-08, + "loss": 3.7788, + "step": 2329500 + }, + { + "epoch": 25.63, + "learning_rate": 6.091506683535947e-08, + "loss": 3.7611, + "step": 2330000 + }, + { + "epoch": 25.64, + "learning_rate": 6.090131470377908e-08, + "loss": 3.7729, + "step": 2330500 + }, + { + "epoch": 25.64, + "learning_rate": 6.088756257219869e-08, + "loss": 3.7794, + "step": 2331000 + }, + { + "epoch": 25.65, + "learning_rate": 6.087381044061829e-08, + "loss": 3.8079, + "step": 2331500 + }, + { + "epoch": 25.66, + "learning_rate": 6.08600583090379e-08, + "loss": 3.7762, + "step": 2332000 + }, + { + "epoch": 25.66, + "learning_rate": 6.08463061774575e-08, + "loss": 3.7987, + "step": 2332500 + }, + { + "epoch": 25.67, + "learning_rate": 6.08325540458771e-08, + "loss": 3.7761, + "step": 2333000 + }, + { + "epoch": 25.67, + "learning_rate": 6.081880191429671e-08, + "loss": 3.7683, + "step": 2333500 + }, + { + "epoch": 25.68, + "learning_rate": 6.080504978271632e-08, + "loss": 3.777, + "step": 2334000 + }, + { + "epoch": 25.68, + "learning_rate": 6.079129765113592e-08, + "loss": 3.7876, + "step": 2334500 + }, + { + "epoch": 25.69, + "learning_rate": 6.077754551955552e-08, + "loss": 3.7628, + "step": 2335000 + }, + { + "epoch": 25.69, + "learning_rate": 6.076379338797513e-08, + "loss": 3.7726, + "step": 2335500 + }, + { + "epoch": 25.7, + "learning_rate": 6.075004125639473e-08, + "loss": 3.7645, + "step": 2336000 + }, + { + "epoch": 25.71, + "learning_rate": 6.073628912481434e-08, + "loss": 3.7705, + "step": 2336500 + }, + { + "epoch": 25.71, + "learning_rate": 6.072253699323395e-08, + "loss": 3.7557, + "step": 2337000 + }, + { + "epoch": 25.72, + "learning_rate": 6.070878486165355e-08, + "loss": 3.788, + "step": 2337500 + }, + { + "epoch": 25.72, + "learning_rate": 6.069503273007315e-08, + "loss": 3.7836, + "step": 2338000 + }, + { + "epoch": 25.73, + "learning_rate": 6.068128059849276e-08, + "loss": 3.7669, + "step": 2338500 + }, + { + "epoch": 25.73, + "learning_rate": 6.066752846691236e-08, + "loss": 3.7823, + "step": 2339000 + }, + { + "epoch": 25.74, + "learning_rate": 6.065377633533197e-08, + "loss": 3.7789, + "step": 2339500 + }, + { + "epoch": 25.74, + "learning_rate": 6.064002420375158e-08, + "loss": 3.7757, + "step": 2340000 + }, + { + "epoch": 25.75, + "learning_rate": 6.062627207217117e-08, + "loss": 3.7749, + "step": 2340500 + }, + { + "epoch": 25.75, + "learning_rate": 6.061251994059078e-08, + "loss": 3.7677, + "step": 2341000 + }, + { + "epoch": 25.76, + "learning_rate": 6.059876780901039e-08, + "loss": 3.7828, + "step": 2341500 + }, + { + "epoch": 25.77, + "learning_rate": 6.058501567742999e-08, + "loss": 3.7845, + "step": 2342000 + }, + { + "epoch": 25.77, + "learning_rate": 6.05712635458496e-08, + "loss": 3.7801, + "step": 2342500 + }, + { + "epoch": 25.78, + "learning_rate": 6.055751141426921e-08, + "loss": 3.7764, + "step": 2343000 + }, + { + "epoch": 25.78, + "learning_rate": 6.054375928268882e-08, + "loss": 3.7942, + "step": 2343500 + }, + { + "epoch": 25.79, + "learning_rate": 6.053000715110841e-08, + "loss": 3.7978, + "step": 2344000 + }, + { + "epoch": 25.79, + "learning_rate": 6.051625501952802e-08, + "loss": 3.7915, + "step": 2344500 + }, + { + "epoch": 25.8, + "learning_rate": 6.050250288794763e-08, + "loss": 3.7939, + "step": 2345000 + }, + { + "epoch": 25.8, + "learning_rate": 6.048875075636724e-08, + "loss": 3.7668, + "step": 2345500 + }, + { + "epoch": 25.81, + "learning_rate": 6.047499862478684e-08, + "loss": 3.7792, + "step": 2346000 + }, + { + "epoch": 25.82, + "learning_rate": 6.046124649320645e-08, + "loss": 3.779, + "step": 2346500 + }, + { + "epoch": 25.82, + "learning_rate": 6.044749436162606e-08, + "loss": 3.7851, + "step": 2347000 + }, + { + "epoch": 25.83, + "learning_rate": 6.043374223004565e-08, + "loss": 3.7814, + "step": 2347500 + }, + { + "epoch": 25.83, + "learning_rate": 6.041999009846526e-08, + "loss": 3.7667, + "step": 2348000 + }, + { + "epoch": 25.84, + "learning_rate": 6.040623796688487e-08, + "loss": 3.7815, + "step": 2348500 + }, + { + "epoch": 25.84, + "learning_rate": 6.039248583530448e-08, + "loss": 3.8019, + "step": 2349000 + }, + { + "epoch": 25.85, + "learning_rate": 6.037873370372408e-08, + "loss": 3.7686, + "step": 2349500 + }, + { + "epoch": 25.85, + "learning_rate": 6.036498157214369e-08, + "loss": 3.7932, + "step": 2350000 + }, + { + "epoch": 25.86, + "learning_rate": 6.03512294405633e-08, + "loss": 3.8067, + "step": 2350500 + }, + { + "epoch": 25.87, + "learning_rate": 6.033747730898289e-08, + "loss": 3.7819, + "step": 2351000 + }, + { + "epoch": 25.87, + "learning_rate": 6.03237251774025e-08, + "loss": 3.7826, + "step": 2351500 + }, + { + "epoch": 25.88, + "learning_rate": 6.030997304582211e-08, + "loss": 3.7856, + "step": 2352000 + }, + { + "epoch": 25.88, + "learning_rate": 6.02962209142417e-08, + "loss": 3.784, + "step": 2352500 + }, + { + "epoch": 25.89, + "learning_rate": 6.028246878266132e-08, + "loss": 3.7675, + "step": 2353000 + }, + { + "epoch": 25.89, + "learning_rate": 6.026871665108092e-08, + "loss": 3.7782, + "step": 2353500 + }, + { + "epoch": 25.9, + "learning_rate": 6.025496451950052e-08, + "loss": 3.7778, + "step": 2354000 + }, + { + "epoch": 25.9, + "learning_rate": 6.024121238792013e-08, + "loss": 3.7635, + "step": 2354500 + }, + { + "epoch": 25.91, + "learning_rate": 6.022746025633974e-08, + "loss": 3.7648, + "step": 2355000 + }, + { + "epoch": 25.91, + "learning_rate": 6.021370812475934e-08, + "loss": 3.7992, + "step": 2355500 + }, + { + "epoch": 25.92, + "learning_rate": 6.019995599317895e-08, + "loss": 3.7981, + "step": 2356000 + }, + { + "epoch": 25.93, + "learning_rate": 6.018620386159855e-08, + "loss": 3.7838, + "step": 2356500 + }, + { + "epoch": 25.93, + "learning_rate": 6.017245173001815e-08, + "loss": 3.7791, + "step": 2357000 + }, + { + "epoch": 25.94, + "learning_rate": 6.015869959843776e-08, + "loss": 3.7912, + "step": 2357500 + }, + { + "epoch": 25.94, + "learning_rate": 6.014494746685737e-08, + "loss": 3.7875, + "step": 2358000 + }, + { + "epoch": 25.95, + "learning_rate": 6.013119533527697e-08, + "loss": 3.7618, + "step": 2358500 + }, + { + "epoch": 25.95, + "learning_rate": 6.011744320369657e-08, + "loss": 3.7864, + "step": 2359000 + }, + { + "epoch": 25.96, + "learning_rate": 6.010369107211618e-08, + "loss": 3.7703, + "step": 2359500 + }, + { + "epoch": 25.96, + "learning_rate": 6.008993894053578e-08, + "loss": 3.762, + "step": 2360000 + }, + { + "epoch": 25.97, + "learning_rate": 6.007618680895539e-08, + "loss": 3.7799, + "step": 2360500 + }, + { + "epoch": 25.98, + "learning_rate": 6.0062434677375e-08, + "loss": 3.7896, + "step": 2361000 + }, + { + "epoch": 25.98, + "learning_rate": 6.00486825457946e-08, + "loss": 3.7912, + "step": 2361500 + }, + { + "epoch": 25.99, + "learning_rate": 6.00349304142142e-08, + "loss": 3.7781, + "step": 2362000 + }, + { + "epoch": 25.99, + "learning_rate": 6.002117828263381e-08, + "loss": 3.7658, + "step": 2362500 + }, + { + "epoch": 26.0, + "learning_rate": 6.000742615105341e-08, + "loss": 3.7737, + "step": 2363000 + }, + { + "epoch": 26.0, + "eval_loss": 3.842207193374634, + "eval_runtime": 6.1338, + "eval_samples_per_second": 253.352, + "step": 2363270 + }, + { + "epoch": 26.0, + "learning_rate": 5.999367401947302e-08, + "loss": 3.7872, + "step": 2363500 + }, + { + "epoch": 26.01, + "learning_rate": 5.997992188789263e-08, + "loss": 3.7856, + "step": 2364000 + }, + { + "epoch": 26.01, + "learning_rate": 5.996616975631222e-08, + "loss": 3.7764, + "step": 2364500 + }, + { + "epoch": 26.02, + "learning_rate": 5.995241762473183e-08, + "loss": 3.7921, + "step": 2365000 + }, + { + "epoch": 26.02, + "learning_rate": 5.993866549315144e-08, + "loss": 3.7861, + "step": 2365500 + }, + { + "epoch": 26.03, + "learning_rate": 5.992491336157104e-08, + "loss": 3.7851, + "step": 2366000 + }, + { + "epoch": 26.04, + "learning_rate": 5.991116122999065e-08, + "loss": 3.7833, + "step": 2366500 + }, + { + "epoch": 26.04, + "learning_rate": 5.989740909841024e-08, + "loss": 3.7879, + "step": 2367000 + }, + { + "epoch": 26.05, + "learning_rate": 5.988365696682985e-08, + "loss": 3.7698, + "step": 2367500 + }, + { + "epoch": 26.05, + "learning_rate": 5.986990483524946e-08, + "loss": 3.7789, + "step": 2368000 + }, + { + "epoch": 26.06, + "learning_rate": 5.985615270366906e-08, + "loss": 3.7655, + "step": 2368500 + }, + { + "epoch": 26.06, + "learning_rate": 5.984240057208867e-08, + "loss": 3.7814, + "step": 2369000 + }, + { + "epoch": 26.07, + "learning_rate": 5.982864844050828e-08, + "loss": 3.7753, + "step": 2369500 + }, + { + "epoch": 26.07, + "learning_rate": 5.981489630892787e-08, + "loss": 3.777, + "step": 2370000 + }, + { + "epoch": 26.08, + "learning_rate": 5.980114417734748e-08, + "loss": 3.7726, + "step": 2370500 + }, + { + "epoch": 26.09, + "learning_rate": 5.978739204576709e-08, + "loss": 3.7943, + "step": 2371000 + }, + { + "epoch": 26.09, + "learning_rate": 5.977363991418669e-08, + "loss": 3.7709, + "step": 2371500 + }, + { + "epoch": 26.1, + "learning_rate": 5.97598877826063e-08, + "loss": 3.7716, + "step": 2372000 + }, + { + "epoch": 26.1, + "learning_rate": 5.974613565102591e-08, + "loss": 3.7818, + "step": 2372500 + }, + { + "epoch": 26.11, + "learning_rate": 5.97323835194455e-08, + "loss": 3.794, + "step": 2373000 + }, + { + "epoch": 26.11, + "learning_rate": 5.971863138786511e-08, + "loss": 3.7818, + "step": 2373500 + }, + { + "epoch": 26.12, + "learning_rate": 5.970487925628472e-08, + "loss": 3.7946, + "step": 2374000 + }, + { + "epoch": 26.12, + "learning_rate": 5.969112712470432e-08, + "loss": 3.7899, + "step": 2374500 + }, + { + "epoch": 26.13, + "learning_rate": 5.967737499312393e-08, + "loss": 3.7919, + "step": 2375000 + }, + { + "epoch": 26.13, + "learning_rate": 5.966362286154354e-08, + "loss": 3.7795, + "step": 2375500 + }, + { + "epoch": 26.14, + "learning_rate": 5.964987072996313e-08, + "loss": 3.7831, + "step": 2376000 + }, + { + "epoch": 26.15, + "learning_rate": 5.963611859838274e-08, + "loss": 3.7806, + "step": 2376500 + }, + { + "epoch": 26.15, + "learning_rate": 5.962236646680235e-08, + "loss": 3.7736, + "step": 2377000 + }, + { + "epoch": 26.16, + "learning_rate": 5.960861433522195e-08, + "loss": 3.763, + "step": 2377500 + }, + { + "epoch": 26.16, + "learning_rate": 5.959486220364156e-08, + "loss": 3.7864, + "step": 2378000 + }, + { + "epoch": 26.17, + "learning_rate": 5.958111007206117e-08, + "loss": 3.7685, + "step": 2378500 + }, + { + "epoch": 26.17, + "learning_rate": 5.956735794048077e-08, + "loss": 3.7795, + "step": 2379000 + }, + { + "epoch": 26.18, + "learning_rate": 5.955360580890037e-08, + "loss": 3.7827, + "step": 2379500 + }, + { + "epoch": 26.18, + "learning_rate": 5.953985367731998e-08, + "loss": 3.7858, + "step": 2380000 + }, + { + "epoch": 26.19, + "learning_rate": 5.9526101545739585e-08, + "loss": 3.7505, + "step": 2380500 + }, + { + "epoch": 26.2, + "learning_rate": 5.9512349414159194e-08, + "loss": 3.7931, + "step": 2381000 + }, + { + "epoch": 26.2, + "learning_rate": 5.94985972825788e-08, + "loss": 3.7812, + "step": 2381500 + }, + { + "epoch": 26.21, + "learning_rate": 5.94848451509984e-08, + "loss": 3.7708, + "step": 2382000 + }, + { + "epoch": 26.21, + "learning_rate": 5.947109301941801e-08, + "loss": 3.7741, + "step": 2382500 + }, + { + "epoch": 26.22, + "learning_rate": 5.945734088783761e-08, + "loss": 3.7581, + "step": 2383000 + }, + { + "epoch": 26.22, + "learning_rate": 5.9443588756257214e-08, + "loss": 3.7857, + "step": 2383500 + }, + { + "epoch": 26.23, + "learning_rate": 5.9429836624676824e-08, + "loss": 3.7701, + "step": 2384000 + }, + { + "epoch": 26.23, + "learning_rate": 5.941608449309643e-08, + "loss": 3.7712, + "step": 2384500 + }, + { + "epoch": 26.24, + "learning_rate": 5.940233236151603e-08, + "loss": 3.7747, + "step": 2385000 + }, + { + "epoch": 26.24, + "learning_rate": 5.938858022993564e-08, + "loss": 3.7815, + "step": 2385500 + }, + { + "epoch": 26.25, + "learning_rate": 5.937482809835525e-08, + "loss": 3.7773, + "step": 2386000 + }, + { + "epoch": 26.26, + "learning_rate": 5.9361075966774844e-08, + "loss": 3.7711, + "step": 2386500 + }, + { + "epoch": 26.26, + "learning_rate": 5.934732383519445e-08, + "loss": 3.7903, + "step": 2387000 + }, + { + "epoch": 26.27, + "learning_rate": 5.933357170361406e-08, + "loss": 3.7735, + "step": 2387500 + }, + { + "epoch": 26.27, + "learning_rate": 5.931981957203366e-08, + "loss": 3.7697, + "step": 2388000 + }, + { + "epoch": 26.28, + "learning_rate": 5.930606744045327e-08, + "loss": 3.7602, + "step": 2388500 + }, + { + "epoch": 26.28, + "learning_rate": 5.929231530887288e-08, + "loss": 3.7904, + "step": 2389000 + }, + { + "epoch": 26.29, + "learning_rate": 5.9278563177292473e-08, + "loss": 3.7805, + "step": 2389500 + }, + { + "epoch": 26.29, + "learning_rate": 5.926481104571208e-08, + "loss": 3.7785, + "step": 2390000 + }, + { + "epoch": 26.3, + "learning_rate": 5.925105891413169e-08, + "loss": 3.7777, + "step": 2390500 + }, + { + "epoch": 26.31, + "learning_rate": 5.923730678255129e-08, + "loss": 3.7741, + "step": 2391000 + }, + { + "epoch": 26.31, + "learning_rate": 5.92235546509709e-08, + "loss": 3.7773, + "step": 2391500 + }, + { + "epoch": 26.32, + "learning_rate": 5.920980251939051e-08, + "loss": 3.7667, + "step": 2392000 + }, + { + "epoch": 26.32, + "learning_rate": 5.91960503878101e-08, + "loss": 3.7742, + "step": 2392500 + }, + { + "epoch": 26.33, + "learning_rate": 5.918229825622971e-08, + "loss": 3.7687, + "step": 2393000 + }, + { + "epoch": 26.33, + "learning_rate": 5.916854612464932e-08, + "loss": 3.7527, + "step": 2393500 + }, + { + "epoch": 26.34, + "learning_rate": 5.915479399306892e-08, + "loss": 3.774, + "step": 2394000 + }, + { + "epoch": 26.34, + "learning_rate": 5.914104186148853e-08, + "loss": 3.7849, + "step": 2394500 + }, + { + "epoch": 26.35, + "learning_rate": 5.9127289729908136e-08, + "loss": 3.7754, + "step": 2395000 + }, + { + "epoch": 26.35, + "learning_rate": 5.911353759832773e-08, + "loss": 3.7695, + "step": 2395500 + }, + { + "epoch": 26.36, + "learning_rate": 5.909978546674734e-08, + "loss": 3.7717, + "step": 2396000 + }, + { + "epoch": 26.37, + "learning_rate": 5.908603333516695e-08, + "loss": 3.7749, + "step": 2396500 + }, + { + "epoch": 26.37, + "learning_rate": 5.907228120358655e-08, + "loss": 3.7851, + "step": 2397000 + }, + { + "epoch": 26.38, + "learning_rate": 5.905852907200616e-08, + "loss": 3.7749, + "step": 2397500 + }, + { + "epoch": 26.38, + "learning_rate": 5.9044776940425766e-08, + "loss": 3.8007, + "step": 2398000 + }, + { + "epoch": 26.39, + "learning_rate": 5.903102480884536e-08, + "loss": 3.7722, + "step": 2398500 + }, + { + "epoch": 26.39, + "learning_rate": 5.901727267726497e-08, + "loss": 3.7644, + "step": 2399000 + }, + { + "epoch": 26.4, + "learning_rate": 5.900352054568458e-08, + "loss": 3.7884, + "step": 2399500 + }, + { + "epoch": 26.4, + "learning_rate": 5.898976841410418e-08, + "loss": 3.7847, + "step": 2400000 + }, + { + "epoch": 26.41, + "learning_rate": 5.8976016282523786e-08, + "loss": 3.7774, + "step": 2400500 + }, + { + "epoch": 26.42, + "learning_rate": 5.8962264150943396e-08, + "loss": 3.7701, + "step": 2401000 + }, + { + "epoch": 26.42, + "learning_rate": 5.894851201936299e-08, + "loss": 3.7751, + "step": 2401500 + }, + { + "epoch": 26.43, + "learning_rate": 5.89347598877826e-08, + "loss": 3.7587, + "step": 2402000 + }, + { + "epoch": 26.43, + "learning_rate": 5.892100775620221e-08, + "loss": 3.7736, + "step": 2402500 + }, + { + "epoch": 26.44, + "learning_rate": 5.890725562462181e-08, + "loss": 3.7764, + "step": 2403000 + }, + { + "epoch": 26.44, + "learning_rate": 5.8893503493041416e-08, + "loss": 3.7746, + "step": 2403500 + }, + { + "epoch": 26.45, + "learning_rate": 5.8879751361461025e-08, + "loss": 3.7611, + "step": 2404000 + }, + { + "epoch": 26.45, + "learning_rate": 5.886599922988063e-08, + "loss": 3.7949, + "step": 2404500 + }, + { + "epoch": 26.46, + "learning_rate": 5.885224709830023e-08, + "loss": 3.7623, + "step": 2405000 + }, + { + "epoch": 26.46, + "learning_rate": 5.883849496671984e-08, + "loss": 3.7845, + "step": 2405500 + }, + { + "epoch": 26.47, + "learning_rate": 5.882474283513944e-08, + "loss": 3.7728, + "step": 2406000 + }, + { + "epoch": 26.48, + "learning_rate": 5.881099070355905e-08, + "loss": 3.7767, + "step": 2406500 + }, + { + "epoch": 26.48, + "learning_rate": 5.8797238571978655e-08, + "loss": 3.7894, + "step": 2407000 + }, + { + "epoch": 26.49, + "learning_rate": 5.878348644039826e-08, + "loss": 3.7906, + "step": 2407500 + }, + { + "epoch": 26.49, + "learning_rate": 5.876973430881787e-08, + "loss": 3.7745, + "step": 2408000 + }, + { + "epoch": 26.5, + "learning_rate": 5.8755982177237476e-08, + "loss": 3.7815, + "step": 2408500 + }, + { + "epoch": 26.5, + "learning_rate": 5.874223004565707e-08, + "loss": 3.7855, + "step": 2409000 + }, + { + "epoch": 26.51, + "learning_rate": 5.872847791407668e-08, + "loss": 3.7589, + "step": 2409500 + }, + { + "epoch": 26.51, + "learning_rate": 5.871472578249629e-08, + "loss": 3.7803, + "step": 2410000 + }, + { + "epoch": 26.52, + "learning_rate": 5.870097365091589e-08, + "loss": 3.7816, + "step": 2410500 + }, + { + "epoch": 26.53, + "learning_rate": 5.8687221519335497e-08, + "loss": 3.7804, + "step": 2411000 + }, + { + "epoch": 26.53, + "learning_rate": 5.8673469387755106e-08, + "loss": 3.7652, + "step": 2411500 + }, + { + "epoch": 26.54, + "learning_rate": 5.86597172561747e-08, + "loss": 3.7813, + "step": 2412000 + }, + { + "epoch": 26.54, + "learning_rate": 5.864596512459431e-08, + "loss": 3.7654, + "step": 2412500 + }, + { + "epoch": 26.55, + "learning_rate": 5.863221299301392e-08, + "loss": 3.7867, + "step": 2413000 + }, + { + "epoch": 26.55, + "learning_rate": 5.861846086143352e-08, + "loss": 3.7677, + "step": 2413500 + }, + { + "epoch": 26.56, + "learning_rate": 5.8604708729853126e-08, + "loss": 3.761, + "step": 2414000 + }, + { + "epoch": 26.56, + "learning_rate": 5.8590956598272735e-08, + "loss": 3.77, + "step": 2414500 + }, + { + "epoch": 26.57, + "learning_rate": 5.857720446669233e-08, + "loss": 3.7598, + "step": 2415000 + }, + { + "epoch": 26.57, + "learning_rate": 5.856345233511194e-08, + "loss": 3.7889, + "step": 2415500 + }, + { + "epoch": 26.58, + "learning_rate": 5.854970020353155e-08, + "loss": 3.7803, + "step": 2416000 + }, + { + "epoch": 26.59, + "learning_rate": 5.8535948071951146e-08, + "loss": 3.7704, + "step": 2416500 + }, + { + "epoch": 26.59, + "learning_rate": 5.8522195940370756e-08, + "loss": 3.7908, + "step": 2417000 + }, + { + "epoch": 26.6, + "learning_rate": 5.8508443808790365e-08, + "loss": 3.7793, + "step": 2417500 + }, + { + "epoch": 26.6, + "learning_rate": 5.849469167720996e-08, + "loss": 3.7613, + "step": 2418000 + }, + { + "epoch": 26.61, + "learning_rate": 5.848093954562957e-08, + "loss": 3.759, + "step": 2418500 + }, + { + "epoch": 26.61, + "learning_rate": 5.846718741404918e-08, + "loss": 3.7916, + "step": 2419000 + }, + { + "epoch": 26.62, + "learning_rate": 5.8453435282468776e-08, + "loss": 3.7645, + "step": 2419500 + }, + { + "epoch": 26.62, + "learning_rate": 5.8439683150888385e-08, + "loss": 3.7631, + "step": 2420000 + }, + { + "epoch": 26.63, + "learning_rate": 5.8425931019307995e-08, + "loss": 3.7749, + "step": 2420500 + }, + { + "epoch": 26.64, + "learning_rate": 5.841217888772759e-08, + "loss": 3.7805, + "step": 2421000 + }, + { + "epoch": 26.64, + "learning_rate": 5.83984267561472e-08, + "loss": 3.7797, + "step": 2421500 + }, + { + "epoch": 26.65, + "learning_rate": 5.838467462456681e-08, + "loss": 3.7695, + "step": 2422000 + }, + { + "epoch": 26.65, + "learning_rate": 5.8370922492986406e-08, + "loss": 3.7667, + "step": 2422500 + }, + { + "epoch": 26.66, + "learning_rate": 5.8357170361406015e-08, + "loss": 3.7953, + "step": 2423000 + }, + { + "epoch": 26.66, + "learning_rate": 5.8343418229825624e-08, + "loss": 3.7707, + "step": 2423500 + }, + { + "epoch": 26.67, + "learning_rate": 5.832966609824522e-08, + "loss": 3.7805, + "step": 2424000 + }, + { + "epoch": 26.67, + "learning_rate": 5.831591396666483e-08, + "loss": 3.7897, + "step": 2424500 + }, + { + "epoch": 26.68, + "learning_rate": 5.830216183508444e-08, + "loss": 3.7988, + "step": 2425000 + }, + { + "epoch": 26.68, + "learning_rate": 5.8288409703504035e-08, + "loss": 3.7814, + "step": 2425500 + }, + { + "epoch": 26.69, + "learning_rate": 5.8274657571923644e-08, + "loss": 3.7742, + "step": 2426000 + }, + { + "epoch": 26.7, + "learning_rate": 5.8260905440343254e-08, + "loss": 3.7692, + "step": 2426500 + }, + { + "epoch": 26.7, + "learning_rate": 5.824715330876285e-08, + "loss": 3.786, + "step": 2427000 + }, + { + "epoch": 26.71, + "learning_rate": 5.823340117718246e-08, + "loss": 3.7545, + "step": 2427500 + }, + { + "epoch": 26.71, + "learning_rate": 5.821964904560207e-08, + "loss": 3.7692, + "step": 2428000 + }, + { + "epoch": 26.72, + "learning_rate": 5.820589691402167e-08, + "loss": 3.7482, + "step": 2428500 + }, + { + "epoch": 26.72, + "learning_rate": 5.8192144782441274e-08, + "loss": 3.7687, + "step": 2429000 + }, + { + "epoch": 26.73, + "learning_rate": 5.8178392650860883e-08, + "loss": 3.7766, + "step": 2429500 + }, + { + "epoch": 26.73, + "learning_rate": 5.8164640519280486e-08, + "loss": 3.7693, + "step": 2430000 + }, + { + "epoch": 26.74, + "learning_rate": 5.8150888387700095e-08, + "loss": 3.7813, + "step": 2430500 + }, + { + "epoch": 26.75, + "learning_rate": 5.81371362561197e-08, + "loss": 3.7796, + "step": 2431000 + }, + { + "epoch": 26.75, + "learning_rate": 5.81233841245393e-08, + "loss": 3.7788, + "step": 2431500 + }, + { + "epoch": 26.76, + "learning_rate": 5.810963199295891e-08, + "loss": 3.7897, + "step": 2432000 + }, + { + "epoch": 26.76, + "learning_rate": 5.809587986137851e-08, + "loss": 3.7686, + "step": 2432500 + }, + { + "epoch": 26.77, + "learning_rate": 5.8082127729798116e-08, + "loss": 3.7768, + "step": 2433000 + }, + { + "epoch": 26.77, + "learning_rate": 5.8068375598217725e-08, + "loss": 3.7731, + "step": 2433500 + }, + { + "epoch": 26.78, + "learning_rate": 5.8054623466637334e-08, + "loss": 3.7948, + "step": 2434000 + }, + { + "epoch": 26.78, + "learning_rate": 5.804087133505693e-08, + "loss": 3.7717, + "step": 2434500 + }, + { + "epoch": 26.79, + "learning_rate": 5.802711920347654e-08, + "loss": 3.7593, + "step": 2435000 + }, + { + "epoch": 26.79, + "learning_rate": 5.801336707189615e-08, + "loss": 3.7777, + "step": 2435500 + }, + { + "epoch": 26.8, + "learning_rate": 5.7999614940315745e-08, + "loss": 3.772, + "step": 2436000 + }, + { + "epoch": 26.81, + "learning_rate": 5.7985862808735355e-08, + "loss": 3.7756, + "step": 2436500 + }, + { + "epoch": 26.81, + "learning_rate": 5.7972110677154964e-08, + "loss": 3.7812, + "step": 2437000 + }, + { + "epoch": 26.82, + "learning_rate": 5.795835854557456e-08, + "loss": 3.7801, + "step": 2437500 + }, + { + "epoch": 26.82, + "learning_rate": 5.794460641399417e-08, + "loss": 3.7796, + "step": 2438000 + }, + { + "epoch": 26.83, + "learning_rate": 5.793085428241378e-08, + "loss": 3.7765, + "step": 2438500 + }, + { + "epoch": 26.83, + "learning_rate": 5.7917102150833375e-08, + "loss": 3.7594, + "step": 2439000 + }, + { + "epoch": 26.84, + "learning_rate": 5.7903350019252984e-08, + "loss": 3.7695, + "step": 2439500 + }, + { + "epoch": 26.84, + "learning_rate": 5.7889597887672594e-08, + "loss": 3.7611, + "step": 2440000 + }, + { + "epoch": 26.85, + "learning_rate": 5.787584575609219e-08, + "loss": 3.7929, + "step": 2440500 + }, + { + "epoch": 26.86, + "learning_rate": 5.78620936245118e-08, + "loss": 3.7723, + "step": 2441000 + }, + { + "epoch": 26.86, + "learning_rate": 5.7848341492931395e-08, + "loss": 3.7771, + "step": 2441500 + }, + { + "epoch": 26.87, + "learning_rate": 5.7834589361351005e-08, + "loss": 3.7813, + "step": 2442000 + }, + { + "epoch": 26.87, + "learning_rate": 5.7820837229770614e-08, + "loss": 3.7879, + "step": 2442500 + }, + { + "epoch": 26.88, + "learning_rate": 5.780708509819021e-08, + "loss": 3.7668, + "step": 2443000 + }, + { + "epoch": 26.88, + "learning_rate": 5.779333296660982e-08, + "loss": 3.7743, + "step": 2443500 + }, + { + "epoch": 26.89, + "learning_rate": 5.777958083502943e-08, + "loss": 3.7716, + "step": 2444000 + }, + { + "epoch": 26.89, + "learning_rate": 5.7765828703449025e-08, + "loss": 3.7499, + "step": 2444500 + }, + { + "epoch": 26.9, + "learning_rate": 5.7752076571868634e-08, + "loss": 3.785, + "step": 2445000 + }, + { + "epoch": 26.9, + "learning_rate": 5.7738324440288243e-08, + "loss": 3.7754, + "step": 2445500 + }, + { + "epoch": 26.91, + "learning_rate": 5.772457230870784e-08, + "loss": 3.7717, + "step": 2446000 + }, + { + "epoch": 26.92, + "learning_rate": 5.771082017712745e-08, + "loss": 3.7622, + "step": 2446500 + }, + { + "epoch": 26.92, + "learning_rate": 5.769706804554706e-08, + "loss": 3.7753, + "step": 2447000 + }, + { + "epoch": 26.93, + "learning_rate": 5.7683315913966654e-08, + "loss": 3.7747, + "step": 2447500 + }, + { + "epoch": 26.93, + "learning_rate": 5.7669563782386264e-08, + "loss": 3.7903, + "step": 2448000 + }, + { + "epoch": 26.94, + "learning_rate": 5.765581165080587e-08, + "loss": 3.7934, + "step": 2448500 + }, + { + "epoch": 26.94, + "learning_rate": 5.764205951922547e-08, + "loss": 3.7794, + "step": 2449000 + }, + { + "epoch": 26.95, + "learning_rate": 5.762830738764508e-08, + "loss": 3.7702, + "step": 2449500 + }, + { + "epoch": 26.95, + "learning_rate": 5.761455525606469e-08, + "loss": 3.7847, + "step": 2450000 + }, + { + "epoch": 26.96, + "learning_rate": 5.760080312448429e-08, + "loss": 3.7946, + "step": 2450500 + }, + { + "epoch": 26.97, + "learning_rate": 5.758705099290389e-08, + "loss": 3.7673, + "step": 2451000 + }, + { + "epoch": 26.97, + "learning_rate": 5.75732988613235e-08, + "loss": 3.7819, + "step": 2451500 + }, + { + "epoch": 26.98, + "learning_rate": 5.7559546729743105e-08, + "loss": 3.7858, + "step": 2452000 + }, + { + "epoch": 26.98, + "learning_rate": 5.754579459816271e-08, + "loss": 3.7811, + "step": 2452500 + }, + { + "epoch": 26.99, + "learning_rate": 5.753204246658232e-08, + "loss": 3.7859, + "step": 2453000 + }, + { + "epoch": 26.99, + "learning_rate": 5.751829033500192e-08, + "loss": 3.7775, + "step": 2453500 + }, + { + "epoch": 27.0, + "learning_rate": 5.750453820342153e-08, + "loss": 3.7925, + "step": 2454000 + }, + { + "epoch": 27.0, + "eval_loss": 3.840773105621338, + "eval_runtime": 6.1339, + "eval_samples_per_second": 253.348, + "step": 2454165 + }, + { + "epoch": 27.0, + "learning_rate": 5.749078607184113e-08, + "loss": 3.7784, + "step": 2454500 + }, + { + "epoch": 27.01, + "learning_rate": 5.7477033940260735e-08, + "loss": 3.7795, + "step": 2455000 + }, + { + "epoch": 27.01, + "learning_rate": 5.7463281808680344e-08, + "loss": 3.7811, + "step": 2455500 + }, + { + "epoch": 27.02, + "learning_rate": 5.7449529677099954e-08, + "loss": 3.7829, + "step": 2456000 + }, + { + "epoch": 27.03, + "learning_rate": 5.743577754551955e-08, + "loss": 3.779, + "step": 2456500 + }, + { + "epoch": 27.03, + "learning_rate": 5.742202541393916e-08, + "loss": 3.7681, + "step": 2457000 + }, + { + "epoch": 27.04, + "learning_rate": 5.740827328235877e-08, + "loss": 3.782, + "step": 2457500 + }, + { + "epoch": 27.04, + "learning_rate": 5.7394521150778365e-08, + "loss": 3.7715, + "step": 2458000 + }, + { + "epoch": 27.05, + "learning_rate": 5.7380769019197974e-08, + "loss": 3.7683, + "step": 2458500 + }, + { + "epoch": 27.05, + "learning_rate": 5.736701688761758e-08, + "loss": 3.7647, + "step": 2459000 + }, + { + "epoch": 27.06, + "learning_rate": 5.735326475603718e-08, + "loss": 3.7747, + "step": 2459500 + }, + { + "epoch": 27.06, + "learning_rate": 5.733951262445679e-08, + "loss": 3.7878, + "step": 2460000 + }, + { + "epoch": 27.07, + "learning_rate": 5.73257604928764e-08, + "loss": 3.7735, + "step": 2460500 + }, + { + "epoch": 27.08, + "learning_rate": 5.7312008361295994e-08, + "loss": 3.7622, + "step": 2461000 + }, + { + "epoch": 27.08, + "learning_rate": 5.7298256229715603e-08, + "loss": 3.773, + "step": 2461500 + }, + { + "epoch": 27.09, + "learning_rate": 5.728450409813521e-08, + "loss": 3.7763, + "step": 2462000 + }, + { + "epoch": 27.09, + "learning_rate": 5.727075196655481e-08, + "loss": 3.7688, + "step": 2462500 + }, + { + "epoch": 27.1, + "learning_rate": 5.725699983497442e-08, + "loss": 3.7821, + "step": 2463000 + }, + { + "epoch": 27.1, + "learning_rate": 5.724324770339403e-08, + "loss": 3.7752, + "step": 2463500 + }, + { + "epoch": 27.11, + "learning_rate": 5.7229495571813624e-08, + "loss": 3.7674, + "step": 2464000 + }, + { + "epoch": 27.11, + "learning_rate": 5.721574344023323e-08, + "loss": 3.7745, + "step": 2464500 + }, + { + "epoch": 27.12, + "learning_rate": 5.720199130865284e-08, + "loss": 3.7602, + "step": 2465000 + }, + { + "epoch": 27.12, + "learning_rate": 5.718823917707244e-08, + "loss": 3.7678, + "step": 2465500 + }, + { + "epoch": 27.13, + "learning_rate": 5.717448704549205e-08, + "loss": 3.7643, + "step": 2466000 + }, + { + "epoch": 27.14, + "learning_rate": 5.716073491391166e-08, + "loss": 3.7778, + "step": 2466500 + }, + { + "epoch": 27.14, + "learning_rate": 5.7146982782331253e-08, + "loss": 3.7777, + "step": 2467000 + }, + { + "epoch": 27.15, + "learning_rate": 5.713323065075086e-08, + "loss": 3.7687, + "step": 2467500 + }, + { + "epoch": 27.15, + "learning_rate": 5.711947851917047e-08, + "loss": 3.7464, + "step": 2468000 + }, + { + "epoch": 27.16, + "learning_rate": 5.710572638759007e-08, + "loss": 3.77, + "step": 2468500 + }, + { + "epoch": 27.16, + "learning_rate": 5.709197425600968e-08, + "loss": 3.7767, + "step": 2469000 + }, + { + "epoch": 27.17, + "learning_rate": 5.707822212442929e-08, + "loss": 3.7757, + "step": 2469500 + }, + { + "epoch": 27.17, + "learning_rate": 5.706446999284888e-08, + "loss": 3.7763, + "step": 2470000 + }, + { + "epoch": 27.18, + "learning_rate": 5.705071786126849e-08, + "loss": 3.7761, + "step": 2470500 + }, + { + "epoch": 27.19, + "learning_rate": 5.70369657296881e-08, + "loss": 3.7845, + "step": 2471000 + }, + { + "epoch": 27.19, + "learning_rate": 5.70232135981077e-08, + "loss": 3.7746, + "step": 2471500 + }, + { + "epoch": 27.2, + "learning_rate": 5.700946146652731e-08, + "loss": 3.788, + "step": 2472000 + }, + { + "epoch": 27.2, + "learning_rate": 5.6995709334946916e-08, + "loss": 3.7648, + "step": 2472500 + }, + { + "epoch": 27.21, + "learning_rate": 5.698195720336651e-08, + "loss": 3.784, + "step": 2473000 + }, + { + "epoch": 27.21, + "learning_rate": 5.696820507178612e-08, + "loss": 3.7723, + "step": 2473500 + }, + { + "epoch": 27.22, + "learning_rate": 5.695445294020573e-08, + "loss": 3.7671, + "step": 2474000 + }, + { + "epoch": 27.22, + "learning_rate": 5.694070080862533e-08, + "loss": 3.7665, + "step": 2474500 + }, + { + "epoch": 27.23, + "learning_rate": 5.6926948677044937e-08, + "loss": 3.7761, + "step": 2475000 + }, + { + "epoch": 27.23, + "learning_rate": 5.6913196545464546e-08, + "loss": 3.7808, + "step": 2475500 + }, + { + "epoch": 27.24, + "learning_rate": 5.689944441388415e-08, + "loss": 3.7728, + "step": 2476000 + }, + { + "epoch": 27.25, + "learning_rate": 5.688569228230375e-08, + "loss": 3.7791, + "step": 2476500 + }, + { + "epoch": 27.25, + "learning_rate": 5.687194015072336e-08, + "loss": 3.7714, + "step": 2477000 + }, + { + "epoch": 27.26, + "learning_rate": 5.6858188019142963e-08, + "loss": 3.7593, + "step": 2477500 + }, + { + "epoch": 27.26, + "learning_rate": 5.684443588756257e-08, + "loss": 3.7739, + "step": 2478000 + }, + { + "epoch": 27.27, + "learning_rate": 5.6830683755982176e-08, + "loss": 3.7801, + "step": 2478500 + }, + { + "epoch": 27.27, + "learning_rate": 5.681693162440178e-08, + "loss": 3.7791, + "step": 2479000 + }, + { + "epoch": 27.28, + "learning_rate": 5.680317949282139e-08, + "loss": 3.7957, + "step": 2479500 + }, + { + "epoch": 27.28, + "learning_rate": 5.678942736124099e-08, + "loss": 3.8024, + "step": 2480000 + }, + { + "epoch": 27.29, + "learning_rate": 5.677567522966059e-08, + "loss": 3.7799, + "step": 2480500 + }, + { + "epoch": 27.3, + "learning_rate": 5.67619230980802e-08, + "loss": 3.7696, + "step": 2481000 + }, + { + "epoch": 27.3, + "learning_rate": 5.674817096649981e-08, + "loss": 3.7921, + "step": 2481500 + }, + { + "epoch": 27.31, + "learning_rate": 5.673441883491941e-08, + "loss": 3.7616, + "step": 2482000 + }, + { + "epoch": 27.31, + "learning_rate": 5.672066670333902e-08, + "loss": 3.7644, + "step": 2482500 + }, + { + "epoch": 27.32, + "learning_rate": 5.6706914571758627e-08, + "loss": 3.7694, + "step": 2483000 + }, + { + "epoch": 27.32, + "learning_rate": 5.669316244017822e-08, + "loss": 3.8009, + "step": 2483500 + }, + { + "epoch": 27.33, + "learning_rate": 5.667941030859783e-08, + "loss": 3.7742, + "step": 2484000 + }, + { + "epoch": 27.33, + "learning_rate": 5.666565817701744e-08, + "loss": 3.7777, + "step": 2484500 + }, + { + "epoch": 27.34, + "learning_rate": 5.665190604543704e-08, + "loss": 3.7744, + "step": 2485000 + }, + { + "epoch": 27.34, + "learning_rate": 5.663815391385665e-08, + "loss": 3.7674, + "step": 2485500 + }, + { + "epoch": 27.35, + "learning_rate": 5.6624401782276256e-08, + "loss": 3.76, + "step": 2486000 + }, + { + "epoch": 27.36, + "learning_rate": 5.661064965069585e-08, + "loss": 3.7629, + "step": 2486500 + }, + { + "epoch": 27.36, + "learning_rate": 5.659689751911546e-08, + "loss": 3.7733, + "step": 2487000 + }, + { + "epoch": 27.37, + "learning_rate": 5.658314538753507e-08, + "loss": 3.7698, + "step": 2487500 + }, + { + "epoch": 27.37, + "learning_rate": 5.656939325595467e-08, + "loss": 3.7703, + "step": 2488000 + }, + { + "epoch": 27.38, + "learning_rate": 5.6555641124374276e-08, + "loss": 3.7668, + "step": 2488500 + }, + { + "epoch": 27.38, + "learning_rate": 5.6541888992793886e-08, + "loss": 3.7885, + "step": 2489000 + }, + { + "epoch": 27.39, + "learning_rate": 5.652813686121348e-08, + "loss": 3.745, + "step": 2489500 + }, + { + "epoch": 27.39, + "learning_rate": 5.651438472963309e-08, + "loss": 3.7769, + "step": 2490000 + }, + { + "epoch": 27.4, + "learning_rate": 5.65006325980527e-08, + "loss": 3.7878, + "step": 2490500 + }, + { + "epoch": 27.41, + "learning_rate": 5.6486880466472297e-08, + "loss": 3.773, + "step": 2491000 + }, + { + "epoch": 27.41, + "learning_rate": 5.6473128334891906e-08, + "loss": 3.7826, + "step": 2491500 + }, + { + "epoch": 27.42, + "learning_rate": 5.6459376203311515e-08, + "loss": 3.7878, + "step": 2492000 + }, + { + "epoch": 27.42, + "learning_rate": 5.644562407173111e-08, + "loss": 3.7658, + "step": 2492500 + }, + { + "epoch": 27.43, + "learning_rate": 5.643187194015072e-08, + "loss": 3.7805, + "step": 2493000 + }, + { + "epoch": 27.43, + "learning_rate": 5.641811980857033e-08, + "loss": 3.7743, + "step": 2493500 + }, + { + "epoch": 27.44, + "learning_rate": 5.6404367676989926e-08, + "loss": 3.7648, + "step": 2494000 + }, + { + "epoch": 27.44, + "learning_rate": 5.6390615545409536e-08, + "loss": 3.7586, + "step": 2494500 + }, + { + "epoch": 27.45, + "learning_rate": 5.6376863413829145e-08, + "loss": 3.7699, + "step": 2495000 + }, + { + "epoch": 27.45, + "learning_rate": 5.636311128224874e-08, + "loss": 3.7685, + "step": 2495500 + }, + { + "epoch": 27.46, + "learning_rate": 5.634935915066835e-08, + "loss": 3.7542, + "step": 2496000 + }, + { + "epoch": 27.47, + "learning_rate": 5.633560701908796e-08, + "loss": 3.7903, + "step": 2496500 + }, + { + "epoch": 27.47, + "learning_rate": 5.6321854887507556e-08, + "loss": 3.7841, + "step": 2497000 + }, + { + "epoch": 27.48, + "learning_rate": 5.6308102755927165e-08, + "loss": 3.7718, + "step": 2497500 + }, + { + "epoch": 27.48, + "learning_rate": 5.6294350624346775e-08, + "loss": 3.7784, + "step": 2498000 + }, + { + "epoch": 27.49, + "learning_rate": 5.628059849276637e-08, + "loss": 3.7739, + "step": 2498500 + }, + { + "epoch": 27.49, + "learning_rate": 5.626684636118598e-08, + "loss": 3.7789, + "step": 2499000 + }, + { + "epoch": 27.5, + "learning_rate": 5.625309422960559e-08, + "loss": 3.7729, + "step": 2499500 + }, + { + "epoch": 27.5, + "learning_rate": 5.6239342098025185e-08, + "loss": 3.776, + "step": 2500000 + }, + { + "epoch": 27.51, + "learning_rate": 5.6225589966444795e-08, + "loss": 3.7615, + "step": 2500500 + }, + { + "epoch": 27.52, + "learning_rate": 5.6211837834864404e-08, + "loss": 3.7569, + "step": 2501000 + }, + { + "epoch": 27.52, + "learning_rate": 5.619808570328401e-08, + "loss": 3.7833, + "step": 2501500 + }, + { + "epoch": 27.53, + "learning_rate": 5.618433357170361e-08, + "loss": 3.7894, + "step": 2502000 + }, + { + "epoch": 27.53, + "learning_rate": 5.617058144012322e-08, + "loss": 3.7727, + "step": 2502500 + }, + { + "epoch": 27.54, + "learning_rate": 5.615682930854282e-08, + "loss": 3.7861, + "step": 2503000 + }, + { + "epoch": 27.54, + "learning_rate": 5.614307717696243e-08, + "loss": 3.7824, + "step": 2503500 + }, + { + "epoch": 27.55, + "learning_rate": 5.6129325045382034e-08, + "loss": 3.7856, + "step": 2504000 + }, + { + "epoch": 27.55, + "learning_rate": 5.6115572913801636e-08, + "loss": 3.7811, + "step": 2504500 + }, + { + "epoch": 27.56, + "learning_rate": 5.6101820782221246e-08, + "loss": 3.7596, + "step": 2505000 + }, + { + "epoch": 27.56, + "learning_rate": 5.608806865064085e-08, + "loss": 3.768, + "step": 2505500 + }, + { + "epoch": 27.57, + "learning_rate": 5.607431651906045e-08, + "loss": 3.7423, + "step": 2506000 + }, + { + "epoch": 27.58, + "learning_rate": 5.606056438748006e-08, + "loss": 3.749, + "step": 2506500 + }, + { + "epoch": 27.58, + "learning_rate": 5.604681225589967e-08, + "loss": 3.7694, + "step": 2507000 + }, + { + "epoch": 27.59, + "learning_rate": 5.6033060124319266e-08, + "loss": 3.7716, + "step": 2507500 + }, + { + "epoch": 27.59, + "learning_rate": 5.6019307992738875e-08, + "loss": 3.7752, + "step": 2508000 + }, + { + "epoch": 27.6, + "learning_rate": 5.6005555861158485e-08, + "loss": 3.7713, + "step": 2508500 + }, + { + "epoch": 27.6, + "learning_rate": 5.599180372957808e-08, + "loss": 3.7807, + "step": 2509000 + }, + { + "epoch": 27.61, + "learning_rate": 5.597805159799769e-08, + "loss": 3.7974, + "step": 2509500 + }, + { + "epoch": 27.61, + "learning_rate": 5.59642994664173e-08, + "loss": 3.7773, + "step": 2510000 + }, + { + "epoch": 27.62, + "learning_rate": 5.5950547334836896e-08, + "loss": 3.7589, + "step": 2510500 + }, + { + "epoch": 27.63, + "learning_rate": 5.5936795203256505e-08, + "loss": 3.7781, + "step": 2511000 + }, + { + "epoch": 27.63, + "learning_rate": 5.5923043071676114e-08, + "loss": 3.7828, + "step": 2511500 + }, + { + "epoch": 27.64, + "learning_rate": 5.590929094009571e-08, + "loss": 3.7844, + "step": 2512000 + }, + { + "epoch": 27.64, + "learning_rate": 5.589553880851532e-08, + "loss": 3.7958, + "step": 2512500 + }, + { + "epoch": 27.65, + "learning_rate": 5.588178667693493e-08, + "loss": 3.7846, + "step": 2513000 + }, + { + "epoch": 27.65, + "learning_rate": 5.5868034545354525e-08, + "loss": 3.7839, + "step": 2513500 + }, + { + "epoch": 27.66, + "learning_rate": 5.5854282413774135e-08, + "loss": 3.7775, + "step": 2514000 + }, + { + "epoch": 27.66, + "learning_rate": 5.5840530282193744e-08, + "loss": 3.7874, + "step": 2514500 + }, + { + "epoch": 27.67, + "learning_rate": 5.582677815061334e-08, + "loss": 3.7782, + "step": 2515000 + }, + { + "epoch": 27.67, + "learning_rate": 5.581302601903295e-08, + "loss": 3.7515, + "step": 2515500 + }, + { + "epoch": 27.68, + "learning_rate": 5.579927388745256e-08, + "loss": 3.7642, + "step": 2516000 + }, + { + "epoch": 27.69, + "learning_rate": 5.5785521755872155e-08, + "loss": 3.7799, + "step": 2516500 + }, + { + "epoch": 27.69, + "learning_rate": 5.5771769624291764e-08, + "loss": 3.7762, + "step": 2517000 + }, + { + "epoch": 27.7, + "learning_rate": 5.575801749271136e-08, + "loss": 3.7714, + "step": 2517500 + }, + { + "epoch": 27.7, + "learning_rate": 5.574426536113097e-08, + "loss": 3.7664, + "step": 2518000 + }, + { + "epoch": 27.71, + "learning_rate": 5.573051322955058e-08, + "loss": 3.7528, + "step": 2518500 + }, + { + "epoch": 27.71, + "learning_rate": 5.5716761097970175e-08, + "loss": 3.7635, + "step": 2519000 + }, + { + "epoch": 27.72, + "learning_rate": 5.5703008966389784e-08, + "loss": 3.7855, + "step": 2519500 + }, + { + "epoch": 27.72, + "learning_rate": 5.5689256834809394e-08, + "loss": 3.7817, + "step": 2520000 + }, + { + "epoch": 27.73, + "learning_rate": 5.567550470322899e-08, + "loss": 3.7862, + "step": 2520500 + }, + { + "epoch": 27.74, + "learning_rate": 5.56617525716486e-08, + "loss": 3.7665, + "step": 2521000 + }, + { + "epoch": 27.74, + "learning_rate": 5.564800044006821e-08, + "loss": 3.7795, + "step": 2521500 + }, + { + "epoch": 27.75, + "learning_rate": 5.5634248308487805e-08, + "loss": 3.7701, + "step": 2522000 + }, + { + "epoch": 27.75, + "learning_rate": 5.5620496176907414e-08, + "loss": 3.773, + "step": 2522500 + }, + { + "epoch": 27.76, + "learning_rate": 5.5606744045327023e-08, + "loss": 3.7687, + "step": 2523000 + }, + { + "epoch": 27.76, + "learning_rate": 5.5592991913746626e-08, + "loss": 3.7872, + "step": 2523500 + }, + { + "epoch": 27.77, + "learning_rate": 5.557923978216623e-08, + "loss": 3.7766, + "step": 2524000 + }, + { + "epoch": 27.77, + "learning_rate": 5.556548765058584e-08, + "loss": 3.7713, + "step": 2524500 + }, + { + "epoch": 27.78, + "learning_rate": 5.555173551900544e-08, + "loss": 3.7771, + "step": 2525000 + }, + { + "epoch": 27.78, + "learning_rate": 5.553798338742505e-08, + "loss": 3.7745, + "step": 2525500 + }, + { + "epoch": 27.79, + "learning_rate": 5.552423125584465e-08, + "loss": 3.7837, + "step": 2526000 + }, + { + "epoch": 27.8, + "learning_rate": 5.5510479124264256e-08, + "loss": 3.765, + "step": 2526500 + }, + { + "epoch": 27.8, + "learning_rate": 5.5496726992683865e-08, + "loss": 3.7627, + "step": 2527000 + }, + { + "epoch": 27.81, + "learning_rate": 5.548297486110347e-08, + "loss": 3.7865, + "step": 2527500 + }, + { + "epoch": 27.81, + "learning_rate": 5.546922272952307e-08, + "loss": 3.7823, + "step": 2528000 + }, + { + "epoch": 27.82, + "learning_rate": 5.545547059794268e-08, + "loss": 3.7855, + "step": 2528500 + }, + { + "epoch": 27.82, + "learning_rate": 5.544171846636229e-08, + "loss": 3.7676, + "step": 2529000 + }, + { + "epoch": 27.83, + "learning_rate": 5.5427966334781885e-08, + "loss": 3.7727, + "step": 2529500 + }, + { + "epoch": 27.83, + "learning_rate": 5.5414214203201495e-08, + "loss": 3.7777, + "step": 2530000 + }, + { + "epoch": 27.84, + "learning_rate": 5.5400462071621104e-08, + "loss": 3.7815, + "step": 2530500 + }, + { + "epoch": 27.85, + "learning_rate": 5.53867099400407e-08, + "loss": 3.7724, + "step": 2531000 + }, + { + "epoch": 27.85, + "learning_rate": 5.537295780846031e-08, + "loss": 3.7604, + "step": 2531500 + }, + { + "epoch": 27.86, + "learning_rate": 5.535920567687992e-08, + "loss": 3.7824, + "step": 2532000 + }, + { + "epoch": 27.86, + "learning_rate": 5.5345453545299515e-08, + "loss": 3.7874, + "step": 2532500 + }, + { + "epoch": 27.87, + "learning_rate": 5.5331701413719124e-08, + "loss": 3.7708, + "step": 2533000 + }, + { + "epoch": 27.87, + "learning_rate": 5.5317949282138734e-08, + "loss": 3.784, + "step": 2533500 + }, + { + "epoch": 27.88, + "learning_rate": 5.530419715055833e-08, + "loss": 3.7727, + "step": 2534000 + }, + { + "epoch": 27.88, + "learning_rate": 5.529044501897794e-08, + "loss": 3.7758, + "step": 2534500 + }, + { + "epoch": 27.89, + "learning_rate": 5.527669288739755e-08, + "loss": 3.7757, + "step": 2535000 + }, + { + "epoch": 27.89, + "learning_rate": 5.5262940755817144e-08, + "loss": 3.7779, + "step": 2535500 + }, + { + "epoch": 27.9, + "learning_rate": 5.5249188624236754e-08, + "loss": 3.7977, + "step": 2536000 + }, + { + "epoch": 27.91, + "learning_rate": 5.523543649265636e-08, + "loss": 3.7672, + "step": 2536500 + }, + { + "epoch": 27.91, + "learning_rate": 5.522168436107596e-08, + "loss": 3.7609, + "step": 2537000 + }, + { + "epoch": 27.92, + "learning_rate": 5.520793222949557e-08, + "loss": 3.7765, + "step": 2537500 + }, + { + "epoch": 27.92, + "learning_rate": 5.519418009791518e-08, + "loss": 3.772, + "step": 2538000 + }, + { + "epoch": 27.93, + "learning_rate": 5.5180427966334774e-08, + "loss": 3.7711, + "step": 2538500 + }, + { + "epoch": 27.93, + "learning_rate": 5.5166675834754383e-08, + "loss": 3.773, + "step": 2539000 + }, + { + "epoch": 27.94, + "learning_rate": 5.515292370317399e-08, + "loss": 3.7819, + "step": 2539500 + }, + { + "epoch": 27.94, + "learning_rate": 5.513917157159359e-08, + "loss": 3.7896, + "step": 2540000 + }, + { + "epoch": 27.95, + "learning_rate": 5.51254194400132e-08, + "loss": 3.7858, + "step": 2540500 + }, + { + "epoch": 27.96, + "learning_rate": 5.511166730843281e-08, + "loss": 3.7686, + "step": 2541000 + }, + { + "epoch": 27.96, + "learning_rate": 5.5097915176852404e-08, + "loss": 3.7729, + "step": 2541500 + }, + { + "epoch": 27.97, + "learning_rate": 5.508416304527201e-08, + "loss": 3.748, + "step": 2542000 + }, + { + "epoch": 27.97, + "learning_rate": 5.507041091369162e-08, + "loss": 3.7825, + "step": 2542500 + }, + { + "epoch": 27.98, + "learning_rate": 5.505665878211122e-08, + "loss": 3.7678, + "step": 2543000 + }, + { + "epoch": 27.98, + "learning_rate": 5.504290665053083e-08, + "loss": 3.7622, + "step": 2543500 + }, + { + "epoch": 27.99, + "learning_rate": 5.502915451895044e-08, + "loss": 3.7713, + "step": 2544000 + }, + { + "epoch": 27.99, + "learning_rate": 5.501540238737003e-08, + "loss": 3.7515, + "step": 2544500 + }, + { + "epoch": 28.0, + "learning_rate": 5.500165025578964e-08, + "loss": 3.7844, + "step": 2545000 + }, + { + "epoch": 28.0, + "eval_loss": 3.839246988296509, + "eval_runtime": 6.1365, + "eval_samples_per_second": 253.24, + "step": 2545060 + }, + { + "epoch": 28.0, + "learning_rate": 5.498789812420925e-08, + "loss": 3.7648, + "step": 2545500 + }, + { + "epoch": 28.01, + "learning_rate": 5.497414599262885e-08, + "loss": 3.7538, + "step": 2546000 + }, + { + "epoch": 28.02, + "learning_rate": 5.496039386104846e-08, + "loss": 3.7758, + "step": 2546500 + }, + { + "epoch": 28.02, + "learning_rate": 5.4946641729468067e-08, + "loss": 3.7803, + "step": 2547000 + }, + { + "epoch": 28.03, + "learning_rate": 5.493288959788766e-08, + "loss": 3.7638, + "step": 2547500 + }, + { + "epoch": 28.03, + "learning_rate": 5.491913746630727e-08, + "loss": 3.7699, + "step": 2548000 + }, + { + "epoch": 28.04, + "learning_rate": 5.490538533472688e-08, + "loss": 3.7678, + "step": 2548500 + }, + { + "epoch": 28.04, + "learning_rate": 5.4891633203146484e-08, + "loss": 3.777, + "step": 2549000 + }, + { + "epoch": 28.05, + "learning_rate": 5.487788107156609e-08, + "loss": 3.7615, + "step": 2549500 + }, + { + "epoch": 28.05, + "learning_rate": 5.4864128939985696e-08, + "loss": 3.7693, + "step": 2550000 + }, + { + "epoch": 28.06, + "learning_rate": 5.48503768084053e-08, + "loss": 3.7768, + "step": 2550500 + }, + { + "epoch": 28.07, + "learning_rate": 5.483662467682491e-08, + "loss": 3.7737, + "step": 2551000 + }, + { + "epoch": 28.07, + "learning_rate": 5.482287254524451e-08, + "loss": 3.7877, + "step": 2551500 + }, + { + "epoch": 28.08, + "learning_rate": 5.4809120413664114e-08, + "loss": 3.7772, + "step": 2552000 + }, + { + "epoch": 28.08, + "learning_rate": 5.479536828208372e-08, + "loss": 3.7896, + "step": 2552500 + }, + { + "epoch": 28.09, + "learning_rate": 5.478161615050333e-08, + "loss": 3.7897, + "step": 2553000 + }, + { + "epoch": 28.09, + "learning_rate": 5.476786401892293e-08, + "loss": 3.781, + "step": 2553500 + }, + { + "epoch": 28.1, + "learning_rate": 5.475411188734254e-08, + "loss": 3.7779, + "step": 2554000 + }, + { + "epoch": 28.1, + "learning_rate": 5.474035975576215e-08, + "loss": 3.7693, + "step": 2554500 + }, + { + "epoch": 28.11, + "learning_rate": 5.4726607624181743e-08, + "loss": 3.7703, + "step": 2555000 + }, + { + "epoch": 28.11, + "learning_rate": 5.471285549260135e-08, + "loss": 3.7683, + "step": 2555500 + }, + { + "epoch": 28.12, + "learning_rate": 5.469910336102096e-08, + "loss": 3.7797, + "step": 2556000 + }, + { + "epoch": 28.13, + "learning_rate": 5.468535122944056e-08, + "loss": 3.7733, + "step": 2556500 + }, + { + "epoch": 28.13, + "learning_rate": 5.467159909786017e-08, + "loss": 3.793, + "step": 2557000 + }, + { + "epoch": 28.14, + "learning_rate": 5.465784696627978e-08, + "loss": 3.7693, + "step": 2557500 + }, + { + "epoch": 28.14, + "learning_rate": 5.464409483469937e-08, + "loss": 3.7774, + "step": 2558000 + }, + { + "epoch": 28.15, + "learning_rate": 5.463034270311898e-08, + "loss": 3.7578, + "step": 2558500 + }, + { + "epoch": 28.15, + "learning_rate": 5.461659057153859e-08, + "loss": 3.7819, + "step": 2559000 + }, + { + "epoch": 28.16, + "learning_rate": 5.460283843995819e-08, + "loss": 3.7834, + "step": 2559500 + }, + { + "epoch": 28.16, + "learning_rate": 5.45890863083778e-08, + "loss": 3.7725, + "step": 2560000 + }, + { + "epoch": 28.17, + "learning_rate": 5.4575334176797406e-08, + "loss": 3.7732, + "step": 2560500 + }, + { + "epoch": 28.18, + "learning_rate": 5.4561582045217e-08, + "loss": 3.7672, + "step": 2561000 + }, + { + "epoch": 28.18, + "learning_rate": 5.454782991363661e-08, + "loss": 3.7612, + "step": 2561500 + }, + { + "epoch": 28.19, + "learning_rate": 5.453407778205622e-08, + "loss": 3.7845, + "step": 2562000 + }, + { + "epoch": 28.19, + "learning_rate": 5.452032565047582e-08, + "loss": 3.7869, + "step": 2562500 + }, + { + "epoch": 28.2, + "learning_rate": 5.450657351889543e-08, + "loss": 3.7658, + "step": 2563000 + }, + { + "epoch": 28.2, + "learning_rate": 5.4492821387315036e-08, + "loss": 3.7645, + "step": 2563500 + }, + { + "epoch": 28.21, + "learning_rate": 5.447906925573463e-08, + "loss": 3.7735, + "step": 2564000 + }, + { + "epoch": 28.21, + "learning_rate": 5.446531712415424e-08, + "loss": 3.7891, + "step": 2564500 + }, + { + "epoch": 28.22, + "learning_rate": 5.445156499257385e-08, + "loss": 3.7481, + "step": 2565000 + }, + { + "epoch": 28.22, + "learning_rate": 5.443781286099345e-08, + "loss": 3.7873, + "step": 2565500 + }, + { + "epoch": 28.23, + "learning_rate": 5.4424060729413056e-08, + "loss": 3.7717, + "step": 2566000 + }, + { + "epoch": 28.24, + "learning_rate": 5.4410308597832666e-08, + "loss": 3.7897, + "step": 2566500 + }, + { + "epoch": 28.24, + "learning_rate": 5.439655646625226e-08, + "loss": 3.7744, + "step": 2567000 + }, + { + "epoch": 28.25, + "learning_rate": 5.438280433467187e-08, + "loss": 3.7865, + "step": 2567500 + }, + { + "epoch": 28.25, + "learning_rate": 5.436905220309148e-08, + "loss": 3.7623, + "step": 2568000 + }, + { + "epoch": 28.26, + "learning_rate": 5.4355300071511077e-08, + "loss": 3.7768, + "step": 2568500 + }, + { + "epoch": 28.26, + "learning_rate": 5.4341547939930686e-08, + "loss": 3.741, + "step": 2569000 + }, + { + "epoch": 28.27, + "learning_rate": 5.4327795808350295e-08, + "loss": 3.772, + "step": 2569500 + }, + { + "epoch": 28.27, + "learning_rate": 5.431404367676989e-08, + "loss": 3.7886, + "step": 2570000 + }, + { + "epoch": 28.28, + "learning_rate": 5.43002915451895e-08, + "loss": 3.7689, + "step": 2570500 + }, + { + "epoch": 28.29, + "learning_rate": 5.428653941360911e-08, + "loss": 3.751, + "step": 2571000 + }, + { + "epoch": 28.29, + "learning_rate": 5.4272787282028706e-08, + "loss": 3.7591, + "step": 2571500 + }, + { + "epoch": 28.3, + "learning_rate": 5.4259035150448315e-08, + "loss": 3.7691, + "step": 2572000 + }, + { + "epoch": 28.3, + "learning_rate": 5.4245283018867925e-08, + "loss": 3.7827, + "step": 2572500 + }, + { + "epoch": 28.31, + "learning_rate": 5.423153088728753e-08, + "loss": 3.7593, + "step": 2573000 + }, + { + "epoch": 28.31, + "learning_rate": 5.421777875570713e-08, + "loss": 3.7846, + "step": 2573500 + }, + { + "epoch": 28.32, + "learning_rate": 5.420402662412674e-08, + "loss": 3.7724, + "step": 2574000 + }, + { + "epoch": 28.32, + "learning_rate": 5.419027449254634e-08, + "loss": 3.776, + "step": 2574500 + }, + { + "epoch": 28.33, + "learning_rate": 5.4176522360965945e-08, + "loss": 3.7606, + "step": 2575000 + }, + { + "epoch": 28.33, + "learning_rate": 5.4162770229385554e-08, + "loss": 3.7724, + "step": 2575500 + }, + { + "epoch": 28.34, + "learning_rate": 5.414901809780516e-08, + "loss": 3.7714, + "step": 2576000 + }, + { + "epoch": 28.35, + "learning_rate": 5.4135265966224766e-08, + "loss": 3.7638, + "step": 2576500 + }, + { + "epoch": 28.35, + "learning_rate": 5.412151383464437e-08, + "loss": 3.7734, + "step": 2577000 + }, + { + "epoch": 28.36, + "learning_rate": 5.410776170306397e-08, + "loss": 3.7924, + "step": 2577500 + }, + { + "epoch": 28.36, + "learning_rate": 5.409400957148358e-08, + "loss": 3.7568, + "step": 2578000 + }, + { + "epoch": 28.37, + "learning_rate": 5.408025743990319e-08, + "loss": 3.7593, + "step": 2578500 + }, + { + "epoch": 28.37, + "learning_rate": 5.406650530832279e-08, + "loss": 3.7787, + "step": 2579000 + }, + { + "epoch": 28.38, + "learning_rate": 5.4052753176742396e-08, + "loss": 3.7753, + "step": 2579500 + }, + { + "epoch": 28.38, + "learning_rate": 5.4039001045162005e-08, + "loss": 3.7882, + "step": 2580000 + }, + { + "epoch": 28.39, + "learning_rate": 5.40252489135816e-08, + "loss": 3.7676, + "step": 2580500 + }, + { + "epoch": 28.4, + "learning_rate": 5.401149678200121e-08, + "loss": 3.7744, + "step": 2581000 + }, + { + "epoch": 28.4, + "learning_rate": 5.399774465042082e-08, + "loss": 3.7636, + "step": 2581500 + }, + { + "epoch": 28.41, + "learning_rate": 5.3983992518840416e-08, + "loss": 3.7886, + "step": 2582000 + }, + { + "epoch": 28.41, + "learning_rate": 5.3970240387260026e-08, + "loss": 3.7681, + "step": 2582500 + }, + { + "epoch": 28.42, + "learning_rate": 5.3956488255679635e-08, + "loss": 3.766, + "step": 2583000 + }, + { + "epoch": 28.42, + "learning_rate": 5.394273612409923e-08, + "loss": 3.7915, + "step": 2583500 + }, + { + "epoch": 28.43, + "learning_rate": 5.392898399251884e-08, + "loss": 3.7581, + "step": 2584000 + }, + { + "epoch": 28.43, + "learning_rate": 5.391523186093845e-08, + "loss": 3.7746, + "step": 2584500 + }, + { + "epoch": 28.44, + "learning_rate": 5.3901479729358046e-08, + "loss": 3.7623, + "step": 2585000 + }, + { + "epoch": 28.44, + "learning_rate": 5.3887727597777655e-08, + "loss": 3.75, + "step": 2585500 + }, + { + "epoch": 28.45, + "learning_rate": 5.3873975466197265e-08, + "loss": 3.7702, + "step": 2586000 + }, + { + "epoch": 28.46, + "learning_rate": 5.386022333461686e-08, + "loss": 3.7589, + "step": 2586500 + }, + { + "epoch": 28.46, + "learning_rate": 5.384647120303647e-08, + "loss": 3.772, + "step": 2587000 + }, + { + "epoch": 28.47, + "learning_rate": 5.383271907145608e-08, + "loss": 3.7585, + "step": 2587500 + }, + { + "epoch": 28.47, + "learning_rate": 5.3818966939875676e-08, + "loss": 3.7907, + "step": 2588000 + }, + { + "epoch": 28.48, + "learning_rate": 5.3805214808295285e-08, + "loss": 3.7732, + "step": 2588500 + }, + { + "epoch": 28.48, + "learning_rate": 5.3791462676714894e-08, + "loss": 3.7656, + "step": 2589000 + }, + { + "epoch": 28.49, + "learning_rate": 5.377771054513449e-08, + "loss": 3.7644, + "step": 2589500 + }, + { + "epoch": 28.49, + "learning_rate": 5.37639584135541e-08, + "loss": 3.77, + "step": 2590000 + }, + { + "epoch": 28.5, + "learning_rate": 5.375020628197371e-08, + "loss": 3.7773, + "step": 2590500 + }, + { + "epoch": 28.51, + "learning_rate": 5.3736454150393305e-08, + "loss": 3.786, + "step": 2591000 + }, + { + "epoch": 28.51, + "learning_rate": 5.3722702018812914e-08, + "loss": 3.7799, + "step": 2591500 + }, + { + "epoch": 28.52, + "learning_rate": 5.370894988723251e-08, + "loss": 3.7574, + "step": 2592000 + }, + { + "epoch": 28.52, + "learning_rate": 5.369519775565212e-08, + "loss": 3.7796, + "step": 2592500 + }, + { + "epoch": 28.53, + "learning_rate": 5.368144562407173e-08, + "loss": 3.7768, + "step": 2593000 + }, + { + "epoch": 28.53, + "learning_rate": 5.3667693492491325e-08, + "loss": 3.7606, + "step": 2593500 + }, + { + "epoch": 28.54, + "learning_rate": 5.3653941360910935e-08, + "loss": 3.7666, + "step": 2594000 + }, + { + "epoch": 28.54, + "learning_rate": 5.3640189229330544e-08, + "loss": 3.7677, + "step": 2594500 + }, + { + "epoch": 28.55, + "learning_rate": 5.362643709775015e-08, + "loss": 3.7744, + "step": 2595000 + }, + { + "epoch": 28.55, + "learning_rate": 5.361268496616975e-08, + "loss": 3.7769, + "step": 2595500 + }, + { + "epoch": 28.56, + "learning_rate": 5.359893283458936e-08, + "loss": 3.7697, + "step": 2596000 + }, + { + "epoch": 28.57, + "learning_rate": 5.358518070300896e-08, + "loss": 3.7767, + "step": 2596500 + }, + { + "epoch": 28.57, + "learning_rate": 5.3571428571428564e-08, + "loss": 3.7853, + "step": 2597000 + }, + { + "epoch": 28.58, + "learning_rate": 5.3557676439848174e-08, + "loss": 3.7758, + "step": 2597500 + }, + { + "epoch": 28.58, + "learning_rate": 5.3543924308267776e-08, + "loss": 3.7597, + "step": 2598000 + }, + { + "epoch": 28.59, + "learning_rate": 5.3530172176687386e-08, + "loss": 3.7688, + "step": 2598500 + }, + { + "epoch": 28.59, + "learning_rate": 5.351642004510699e-08, + "loss": 3.7669, + "step": 2599000 + }, + { + "epoch": 28.6, + "learning_rate": 5.350266791352659e-08, + "loss": 3.7656, + "step": 2599500 + }, + { + "epoch": 28.6, + "learning_rate": 5.34889157819462e-08, + "loss": 3.7675, + "step": 2600000 + }, + { + "epoch": 28.61, + "learning_rate": 5.347516365036581e-08, + "loss": 3.7869, + "step": 2600500 + }, + { + "epoch": 28.62, + "learning_rate": 5.3461411518785406e-08, + "loss": 3.779, + "step": 2601000 + }, + { + "epoch": 28.62, + "learning_rate": 5.3447659387205015e-08, + "loss": 3.7399, + "step": 2601500 + }, + { + "epoch": 28.63, + "learning_rate": 5.3433907255624625e-08, + "loss": 3.7663, + "step": 2602000 + }, + { + "epoch": 28.63, + "learning_rate": 5.342015512404422e-08, + "loss": 3.7642, + "step": 2602500 + }, + { + "epoch": 28.64, + "learning_rate": 5.340640299246383e-08, + "loss": 3.7799, + "step": 2603000 + }, + { + "epoch": 28.64, + "learning_rate": 5.339265086088344e-08, + "loss": 3.7614, + "step": 2603500 + }, + { + "epoch": 28.65, + "learning_rate": 5.3378898729303036e-08, + "loss": 3.7695, + "step": 2604000 + }, + { + "epoch": 28.65, + "learning_rate": 5.3365146597722645e-08, + "loss": 3.7504, + "step": 2604500 + }, + { + "epoch": 28.66, + "learning_rate": 5.3351394466142254e-08, + "loss": 3.7806, + "step": 2605000 + }, + { + "epoch": 28.66, + "learning_rate": 5.333764233456185e-08, + "loss": 3.7669, + "step": 2605500 + }, + { + "epoch": 28.67, + "learning_rate": 5.332389020298146e-08, + "loss": 3.7608, + "step": 2606000 + }, + { + "epoch": 28.68, + "learning_rate": 5.331013807140107e-08, + "loss": 3.7731, + "step": 2606500 + }, + { + "epoch": 28.68, + "learning_rate": 5.3296385939820665e-08, + "loss": 3.7712, + "step": 2607000 + }, + { + "epoch": 28.69, + "learning_rate": 5.3282633808240274e-08, + "loss": 3.7637, + "step": 2607500 + }, + { + "epoch": 28.69, + "learning_rate": 5.3268881676659884e-08, + "loss": 3.777, + "step": 2608000 + }, + { + "epoch": 28.7, + "learning_rate": 5.325512954507948e-08, + "loss": 3.8021, + "step": 2608500 + }, + { + "epoch": 28.7, + "learning_rate": 5.324137741349909e-08, + "loss": 3.7743, + "step": 2609000 + }, + { + "epoch": 28.71, + "learning_rate": 5.32276252819187e-08, + "loss": 3.7644, + "step": 2609500 + }, + { + "epoch": 28.71, + "learning_rate": 5.3213873150338295e-08, + "loss": 3.7773, + "step": 2610000 + }, + { + "epoch": 28.72, + "learning_rate": 5.3200121018757904e-08, + "loss": 3.7637, + "step": 2610500 + }, + { + "epoch": 28.73, + "learning_rate": 5.3186368887177513e-08, + "loss": 3.7642, + "step": 2611000 + }, + { + "epoch": 28.73, + "learning_rate": 5.317261675559711e-08, + "loss": 3.7669, + "step": 2611500 + }, + { + "epoch": 28.74, + "learning_rate": 5.315886462401672e-08, + "loss": 3.754, + "step": 2612000 + }, + { + "epoch": 28.74, + "learning_rate": 5.314511249243633e-08, + "loss": 3.7749, + "step": 2612500 + }, + { + "epoch": 28.75, + "learning_rate": 5.3131360360855924e-08, + "loss": 3.7772, + "step": 2613000 + }, + { + "epoch": 28.75, + "learning_rate": 5.3117608229275534e-08, + "loss": 3.7566, + "step": 2613500 + }, + { + "epoch": 28.76, + "learning_rate": 5.310385609769514e-08, + "loss": 3.7741, + "step": 2614000 + }, + { + "epoch": 28.76, + "learning_rate": 5.309010396611474e-08, + "loss": 3.7913, + "step": 2614500 + }, + { + "epoch": 28.77, + "learning_rate": 5.307635183453435e-08, + "loss": 3.7543, + "step": 2615000 + }, + { + "epoch": 28.77, + "learning_rate": 5.306259970295396e-08, + "loss": 3.7629, + "step": 2615500 + }, + { + "epoch": 28.78, + "learning_rate": 5.3048847571373554e-08, + "loss": 3.7702, + "step": 2616000 + }, + { + "epoch": 28.79, + "learning_rate": 5.303509543979316e-08, + "loss": 3.7668, + "step": 2616500 + }, + { + "epoch": 28.79, + "learning_rate": 5.302134330821277e-08, + "loss": 3.7776, + "step": 2617000 + }, + { + "epoch": 28.8, + "learning_rate": 5.300759117663237e-08, + "loss": 3.7817, + "step": 2617500 + }, + { + "epoch": 28.8, + "learning_rate": 5.299383904505198e-08, + "loss": 3.7509, + "step": 2618000 + }, + { + "epoch": 28.81, + "learning_rate": 5.298008691347159e-08, + "loss": 3.7949, + "step": 2618500 + }, + { + "epoch": 28.81, + "learning_rate": 5.2966334781891183e-08, + "loss": 3.7508, + "step": 2619000 + }, + { + "epoch": 28.82, + "learning_rate": 5.295258265031079e-08, + "loss": 3.7715, + "step": 2619500 + }, + { + "epoch": 28.82, + "learning_rate": 5.29388305187304e-08, + "loss": 3.7736, + "step": 2620000 + }, + { + "epoch": 28.83, + "learning_rate": 5.2925078387150005e-08, + "loss": 3.8018, + "step": 2620500 + }, + { + "epoch": 28.84, + "learning_rate": 5.291132625556961e-08, + "loss": 3.746, + "step": 2621000 + }, + { + "epoch": 28.84, + "learning_rate": 5.289757412398922e-08, + "loss": 3.7793, + "step": 2621500 + }, + { + "epoch": 28.85, + "learning_rate": 5.288382199240882e-08, + "loss": 3.7789, + "step": 2622000 + }, + { + "epoch": 28.85, + "learning_rate": 5.287006986082842e-08, + "loss": 3.7764, + "step": 2622500 + }, + { + "epoch": 28.86, + "learning_rate": 5.285631772924803e-08, + "loss": 3.7818, + "step": 2623000 + }, + { + "epoch": 28.86, + "learning_rate": 5.2842565597667634e-08, + "loss": 3.7871, + "step": 2623500 + }, + { + "epoch": 28.87, + "learning_rate": 5.2828813466087244e-08, + "loss": 3.7518, + "step": 2624000 + }, + { + "epoch": 28.87, + "learning_rate": 5.2815061334506847e-08, + "loss": 3.7617, + "step": 2624500 + }, + { + "epoch": 28.88, + "learning_rate": 5.280130920292645e-08, + "loss": 3.7624, + "step": 2625000 + }, + { + "epoch": 28.88, + "learning_rate": 5.278755707134606e-08, + "loss": 3.7674, + "step": 2625500 + }, + { + "epoch": 28.89, + "learning_rate": 5.277380493976567e-08, + "loss": 3.7542, + "step": 2626000 + }, + { + "epoch": 28.9, + "learning_rate": 5.2760052808185264e-08, + "loss": 3.7845, + "step": 2626500 + }, + { + "epoch": 28.9, + "learning_rate": 5.2746300676604873e-08, + "loss": 3.7622, + "step": 2627000 + }, + { + "epoch": 28.91, + "learning_rate": 5.273254854502448e-08, + "loss": 3.7746, + "step": 2627500 + }, + { + "epoch": 28.91, + "learning_rate": 5.271879641344408e-08, + "loss": 3.7668, + "step": 2628000 + }, + { + "epoch": 28.92, + "learning_rate": 5.270504428186369e-08, + "loss": 3.7733, + "step": 2628500 + }, + { + "epoch": 28.92, + "learning_rate": 5.26912921502833e-08, + "loss": 3.7719, + "step": 2629000 + }, + { + "epoch": 28.93, + "learning_rate": 5.2677540018702894e-08, + "loss": 3.7859, + "step": 2629500 + }, + { + "epoch": 28.93, + "learning_rate": 5.26637878871225e-08, + "loss": 3.7451, + "step": 2630000 + }, + { + "epoch": 28.94, + "learning_rate": 5.265003575554211e-08, + "loss": 3.7672, + "step": 2630500 + }, + { + "epoch": 28.95, + "learning_rate": 5.263628362396171e-08, + "loss": 3.7799, + "step": 2631000 + }, + { + "epoch": 28.95, + "learning_rate": 5.262253149238132e-08, + "loss": 3.7727, + "step": 2631500 + }, + { + "epoch": 28.96, + "learning_rate": 5.260877936080093e-08, + "loss": 3.7583, + "step": 2632000 + }, + { + "epoch": 28.96, + "learning_rate": 5.259502722922052e-08, + "loss": 3.7717, + "step": 2632500 + }, + { + "epoch": 28.97, + "learning_rate": 5.258127509764013e-08, + "loss": 3.7811, + "step": 2633000 + }, + { + "epoch": 28.97, + "learning_rate": 5.256752296605974e-08, + "loss": 3.7761, + "step": 2633500 + }, + { + "epoch": 28.98, + "learning_rate": 5.255377083447934e-08, + "loss": 3.7703, + "step": 2634000 + }, + { + "epoch": 28.98, + "learning_rate": 5.254001870289895e-08, + "loss": 3.7956, + "step": 2634500 + }, + { + "epoch": 28.99, + "learning_rate": 5.252626657131856e-08, + "loss": 3.7707, + "step": 2635000 + }, + { + "epoch": 28.99, + "learning_rate": 5.251251443973815e-08, + "loss": 3.7806, + "step": 2635500 + }, + { + "epoch": 29.0, + "eval_loss": 3.8371315002441406, + "eval_runtime": 6.1315, + "eval_samples_per_second": 253.443, + "step": 2635955 + }, + { + "epoch": 29.0, + "learning_rate": 5.249876230815776e-08, + "loss": 3.7788, + "step": 2636000 + }, + { + "epoch": 29.01, + "learning_rate": 5.248501017657737e-08, + "loss": 3.7582, + "step": 2636500 + }, + { + "epoch": 29.01, + "learning_rate": 5.247125804499697e-08, + "loss": 3.7732, + "step": 2637000 + }, + { + "epoch": 29.02, + "learning_rate": 5.245750591341658e-08, + "loss": 3.7512, + "step": 2637500 + }, + { + "epoch": 29.02, + "learning_rate": 5.2443753781836186e-08, + "loss": 3.7929, + "step": 2638000 + }, + { + "epoch": 29.03, + "learning_rate": 5.243000165025578e-08, + "loss": 3.7656, + "step": 2638500 + }, + { + "epoch": 29.03, + "learning_rate": 5.241624951867539e-08, + "loss": 3.7692, + "step": 2639000 + }, + { + "epoch": 29.04, + "learning_rate": 5.2402497387095e-08, + "loss": 3.777, + "step": 2639500 + }, + { + "epoch": 29.04, + "learning_rate": 5.23887452555146e-08, + "loss": 3.7839, + "step": 2640000 + }, + { + "epoch": 29.05, + "learning_rate": 5.2374993123934207e-08, + "loss": 3.7703, + "step": 2640500 + }, + { + "epoch": 29.06, + "learning_rate": 5.2361240992353816e-08, + "loss": 3.7759, + "step": 2641000 + }, + { + "epoch": 29.06, + "learning_rate": 5.234748886077341e-08, + "loss": 3.7743, + "step": 2641500 + }, + { + "epoch": 29.07, + "learning_rate": 5.233373672919302e-08, + "loss": 3.7739, + "step": 2642000 + }, + { + "epoch": 29.07, + "learning_rate": 5.231998459761263e-08, + "loss": 3.766, + "step": 2642500 + }, + { + "epoch": 29.08, + "learning_rate": 5.230623246603223e-08, + "loss": 3.78, + "step": 2643000 + }, + { + "epoch": 29.08, + "learning_rate": 5.2292480334451836e-08, + "loss": 3.7674, + "step": 2643500 + }, + { + "epoch": 29.09, + "learning_rate": 5.2278728202871446e-08, + "loss": 3.7697, + "step": 2644000 + }, + { + "epoch": 29.09, + "learning_rate": 5.226497607129104e-08, + "loss": 3.7716, + "step": 2644500 + }, + { + "epoch": 29.1, + "learning_rate": 5.225122393971065e-08, + "loss": 3.7889, + "step": 2645000 + }, + { + "epoch": 29.11, + "learning_rate": 5.223747180813026e-08, + "loss": 3.7806, + "step": 2645500 + }, + { + "epoch": 29.11, + "learning_rate": 5.222371967654986e-08, + "loss": 3.7795, + "step": 2646000 + }, + { + "epoch": 29.12, + "learning_rate": 5.2209967544969466e-08, + "loss": 3.7646, + "step": 2646500 + }, + { + "epoch": 29.12, + "learning_rate": 5.2196215413389075e-08, + "loss": 3.7739, + "step": 2647000 + }, + { + "epoch": 29.13, + "learning_rate": 5.218246328180868e-08, + "loss": 3.7712, + "step": 2647500 + }, + { + "epoch": 29.13, + "learning_rate": 5.216871115022829e-08, + "loss": 3.7654, + "step": 2648000 + }, + { + "epoch": 29.14, + "learning_rate": 5.215495901864789e-08, + "loss": 3.7617, + "step": 2648500 + }, + { + "epoch": 29.14, + "learning_rate": 5.214120688706749e-08, + "loss": 3.7845, + "step": 2649000 + }, + { + "epoch": 29.15, + "learning_rate": 5.21274547554871e-08, + "loss": 3.7715, + "step": 2649500 + }, + { + "epoch": 29.15, + "learning_rate": 5.2113702623906705e-08, + "loss": 3.7715, + "step": 2650000 + }, + { + "epoch": 29.16, + "learning_rate": 5.209995049232631e-08, + "loss": 3.7799, + "step": 2650500 + }, + { + "epoch": 29.17, + "learning_rate": 5.208619836074592e-08, + "loss": 3.7781, + "step": 2651000 + }, + { + "epoch": 29.17, + "learning_rate": 5.2072446229165526e-08, + "loss": 3.7449, + "step": 2651500 + }, + { + "epoch": 29.18, + "learning_rate": 5.205869409758512e-08, + "loss": 3.7581, + "step": 2652000 + }, + { + "epoch": 29.18, + "learning_rate": 5.204494196600473e-08, + "loss": 3.7688, + "step": 2652500 + }, + { + "epoch": 29.19, + "learning_rate": 5.203118983442434e-08, + "loss": 3.7602, + "step": 2653000 + }, + { + "epoch": 29.19, + "learning_rate": 5.201743770284394e-08, + "loss": 3.7786, + "step": 2653500 + }, + { + "epoch": 29.2, + "learning_rate": 5.2003685571263546e-08, + "loss": 3.7608, + "step": 2654000 + }, + { + "epoch": 29.2, + "learning_rate": 5.1989933439683156e-08, + "loss": 3.7783, + "step": 2654500 + }, + { + "epoch": 29.21, + "learning_rate": 5.197618130810275e-08, + "loss": 3.7864, + "step": 2655000 + }, + { + "epoch": 29.22, + "learning_rate": 5.196242917652236e-08, + "loss": 3.7672, + "step": 2655500 + }, + { + "epoch": 29.22, + "learning_rate": 5.194867704494197e-08, + "loss": 3.7579, + "step": 2656000 + }, + { + "epoch": 29.23, + "learning_rate": 5.1934924913361567e-08, + "loss": 3.7875, + "step": 2656500 + }, + { + "epoch": 29.23, + "learning_rate": 5.1921172781781176e-08, + "loss": 3.7724, + "step": 2657000 + }, + { + "epoch": 29.24, + "learning_rate": 5.1907420650200785e-08, + "loss": 3.7789, + "step": 2657500 + }, + { + "epoch": 29.24, + "learning_rate": 5.189366851862038e-08, + "loss": 3.7829, + "step": 2658000 + }, + { + "epoch": 29.25, + "learning_rate": 5.187991638703999e-08, + "loss": 3.7595, + "step": 2658500 + }, + { + "epoch": 29.25, + "learning_rate": 5.18661642554596e-08, + "loss": 3.7664, + "step": 2659000 + }, + { + "epoch": 29.26, + "learning_rate": 5.1852412123879196e-08, + "loss": 3.7671, + "step": 2659500 + }, + { + "epoch": 29.26, + "learning_rate": 5.1838659992298806e-08, + "loss": 3.7832, + "step": 2660000 + }, + { + "epoch": 29.27, + "learning_rate": 5.1824907860718415e-08, + "loss": 3.767, + "step": 2660500 + }, + { + "epoch": 29.28, + "learning_rate": 5.181115572913801e-08, + "loss": 3.7562, + "step": 2661000 + }, + { + "epoch": 29.28, + "learning_rate": 5.179740359755762e-08, + "loss": 3.7839, + "step": 2661500 + }, + { + "epoch": 29.29, + "learning_rate": 5.178365146597723e-08, + "loss": 3.7679, + "step": 2662000 + }, + { + "epoch": 29.29, + "learning_rate": 5.1769899334396826e-08, + "loss": 3.7655, + "step": 2662500 + }, + { + "epoch": 29.3, + "learning_rate": 5.1756147202816435e-08, + "loss": 3.7888, + "step": 2663000 + }, + { + "epoch": 29.3, + "learning_rate": 5.1742395071236044e-08, + "loss": 3.7766, + "step": 2663500 + }, + { + "epoch": 29.31, + "learning_rate": 5.172864293965564e-08, + "loss": 3.7862, + "step": 2664000 + }, + { + "epoch": 29.31, + "learning_rate": 5.171489080807525e-08, + "loss": 3.7609, + "step": 2664500 + }, + { + "epoch": 29.32, + "learning_rate": 5.170113867649486e-08, + "loss": 3.7497, + "step": 2665000 + }, + { + "epoch": 29.33, + "learning_rate": 5.1687386544914455e-08, + "loss": 3.766, + "step": 2665500 + }, + { + "epoch": 29.33, + "learning_rate": 5.1673634413334065e-08, + "loss": 3.7607, + "step": 2666000 + }, + { + "epoch": 29.34, + "learning_rate": 5.1659882281753674e-08, + "loss": 3.7743, + "step": 2666500 + }, + { + "epoch": 29.34, + "learning_rate": 5.164613015017327e-08, + "loss": 3.7687, + "step": 2667000 + }, + { + "epoch": 29.35, + "learning_rate": 5.163237801859288e-08, + "loss": 3.7682, + "step": 2667500 + }, + { + "epoch": 29.35, + "learning_rate": 5.161862588701248e-08, + "loss": 3.7585, + "step": 2668000 + }, + { + "epoch": 29.36, + "learning_rate": 5.1604873755432085e-08, + "loss": 3.7678, + "step": 2668500 + }, + { + "epoch": 29.36, + "learning_rate": 5.1591121623851694e-08, + "loss": 3.7719, + "step": 2669000 + }, + { + "epoch": 29.37, + "learning_rate": 5.15773694922713e-08, + "loss": 3.7735, + "step": 2669500 + }, + { + "epoch": 29.37, + "learning_rate": 5.15636173606909e-08, + "loss": 3.7626, + "step": 2670000 + }, + { + "epoch": 29.38, + "learning_rate": 5.154986522911051e-08, + "loss": 3.7917, + "step": 2670500 + }, + { + "epoch": 29.39, + "learning_rate": 5.153611309753011e-08, + "loss": 3.7661, + "step": 2671000 + }, + { + "epoch": 29.39, + "learning_rate": 5.152236096594972e-08, + "loss": 3.7685, + "step": 2671500 + }, + { + "epoch": 29.4, + "learning_rate": 5.1508608834369324e-08, + "loss": 3.7689, + "step": 2672000 + }, + { + "epoch": 29.4, + "learning_rate": 5.1494856702788927e-08, + "loss": 3.7658, + "step": 2672500 + }, + { + "epoch": 29.41, + "learning_rate": 5.1481104571208536e-08, + "loss": 3.7561, + "step": 2673000 + }, + { + "epoch": 29.41, + "learning_rate": 5.1467352439628145e-08, + "loss": 3.7797, + "step": 2673500 + }, + { + "epoch": 29.42, + "learning_rate": 5.145360030804774e-08, + "loss": 3.75, + "step": 2674000 + }, + { + "epoch": 29.42, + "learning_rate": 5.143984817646735e-08, + "loss": 3.784, + "step": 2674500 + }, + { + "epoch": 29.43, + "learning_rate": 5.142609604488696e-08, + "loss": 3.753, + "step": 2675000 + }, + { + "epoch": 29.44, + "learning_rate": 5.1412343913306556e-08, + "loss": 3.7834, + "step": 2675500 + }, + { + "epoch": 29.44, + "learning_rate": 5.1398591781726166e-08, + "loss": 3.7866, + "step": 2676000 + }, + { + "epoch": 29.45, + "learning_rate": 5.1384839650145775e-08, + "loss": 3.7743, + "step": 2676500 + }, + { + "epoch": 29.45, + "learning_rate": 5.137108751856537e-08, + "loss": 3.7687, + "step": 2677000 + }, + { + "epoch": 29.46, + "learning_rate": 5.135733538698498e-08, + "loss": 3.7778, + "step": 2677500 + }, + { + "epoch": 29.46, + "learning_rate": 5.134358325540459e-08, + "loss": 3.7665, + "step": 2678000 + }, + { + "epoch": 29.47, + "learning_rate": 5.1329831123824186e-08, + "loss": 3.76, + "step": 2678500 + }, + { + "epoch": 29.47, + "learning_rate": 5.1316078992243795e-08, + "loss": 3.7785, + "step": 2679000 + }, + { + "epoch": 29.48, + "learning_rate": 5.1302326860663405e-08, + "loss": 3.7741, + "step": 2679500 + }, + { + "epoch": 29.48, + "learning_rate": 5.1288574729083e-08, + "loss": 3.7606, + "step": 2680000 + }, + { + "epoch": 29.49, + "learning_rate": 5.127482259750261e-08, + "loss": 3.7899, + "step": 2680500 + }, + { + "epoch": 29.5, + "learning_rate": 5.126107046592222e-08, + "loss": 3.7911, + "step": 2681000 + }, + { + "epoch": 29.5, + "learning_rate": 5.1247318334341815e-08, + "loss": 3.7766, + "step": 2681500 + }, + { + "epoch": 29.51, + "learning_rate": 5.1233566202761425e-08, + "loss": 3.7699, + "step": 2682000 + }, + { + "epoch": 29.51, + "learning_rate": 5.1219814071181034e-08, + "loss": 3.7947, + "step": 2682500 + }, + { + "epoch": 29.52, + "learning_rate": 5.120606193960063e-08, + "loss": 3.7589, + "step": 2683000 + }, + { + "epoch": 29.52, + "learning_rate": 5.119230980802024e-08, + "loss": 3.7703, + "step": 2683500 + }, + { + "epoch": 29.53, + "learning_rate": 5.117855767643985e-08, + "loss": 3.7753, + "step": 2684000 + }, + { + "epoch": 29.53, + "learning_rate": 5.1164805544859445e-08, + "loss": 3.7753, + "step": 2684500 + }, + { + "epoch": 29.54, + "learning_rate": 5.1151053413279054e-08, + "loss": 3.7778, + "step": 2685000 + }, + { + "epoch": 29.55, + "learning_rate": 5.1137301281698664e-08, + "loss": 3.7739, + "step": 2685500 + }, + { + "epoch": 29.55, + "learning_rate": 5.112354915011826e-08, + "loss": 3.7665, + "step": 2686000 + }, + { + "epoch": 29.56, + "learning_rate": 5.110979701853787e-08, + "loss": 3.7768, + "step": 2686500 + }, + { + "epoch": 29.56, + "learning_rate": 5.109604488695748e-08, + "loss": 3.7747, + "step": 2687000 + }, + { + "epoch": 29.57, + "learning_rate": 5.1082292755377075e-08, + "loss": 3.7648, + "step": 2687500 + }, + { + "epoch": 29.57, + "learning_rate": 5.1068540623796684e-08, + "loss": 3.777, + "step": 2688000 + }, + { + "epoch": 29.58, + "learning_rate": 5.105478849221629e-08, + "loss": 3.7719, + "step": 2688500 + }, + { + "epoch": 29.58, + "learning_rate": 5.104103636063589e-08, + "loss": 3.7652, + "step": 2689000 + }, + { + "epoch": 29.59, + "learning_rate": 5.10272842290555e-08, + "loss": 3.7532, + "step": 2689500 + }, + { + "epoch": 29.59, + "learning_rate": 5.101353209747511e-08, + "loss": 3.7796, + "step": 2690000 + }, + { + "epoch": 29.6, + "learning_rate": 5.0999779965894704e-08, + "loss": 3.7788, + "step": 2690500 + }, + { + "epoch": 29.61, + "learning_rate": 5.0986027834314314e-08, + "loss": 3.7661, + "step": 2691000 + }, + { + "epoch": 29.61, + "learning_rate": 5.097227570273392e-08, + "loss": 3.779, + "step": 2691500 + }, + { + "epoch": 29.62, + "learning_rate": 5.095852357115352e-08, + "loss": 3.7811, + "step": 2692000 + }, + { + "epoch": 29.62, + "learning_rate": 5.094477143957313e-08, + "loss": 3.7689, + "step": 2692500 + }, + { + "epoch": 29.63, + "learning_rate": 5.093101930799274e-08, + "loss": 3.7608, + "step": 2693000 + }, + { + "epoch": 29.63, + "learning_rate": 5.091726717641234e-08, + "loss": 3.7392, + "step": 2693500 + }, + { + "epoch": 29.64, + "learning_rate": 5.090351504483194e-08, + "loss": 3.7649, + "step": 2694000 + }, + { + "epoch": 29.64, + "learning_rate": 5.088976291325155e-08, + "loss": 3.7671, + "step": 2694500 + }, + { + "epoch": 29.65, + "learning_rate": 5.0876010781671155e-08, + "loss": 3.7487, + "step": 2695000 + }, + { + "epoch": 29.66, + "learning_rate": 5.0862258650090765e-08, + "loss": 3.786, + "step": 2695500 + }, + { + "epoch": 29.66, + "learning_rate": 5.084850651851037e-08, + "loss": 3.765, + "step": 2696000 + }, + { + "epoch": 29.67, + "learning_rate": 5.083475438692997e-08, + "loss": 3.7641, + "step": 2696500 + }, + { + "epoch": 29.67, + "learning_rate": 5.082100225534958e-08, + "loss": 3.7796, + "step": 2697000 + }, + { + "epoch": 29.68, + "learning_rate": 5.080725012376918e-08, + "loss": 3.7616, + "step": 2697500 + }, + { + "epoch": 29.68, + "learning_rate": 5.0793497992188785e-08, + "loss": 3.7701, + "step": 2698000 + }, + { + "epoch": 29.69, + "learning_rate": 5.0779745860608394e-08, + "loss": 3.7539, + "step": 2698500 + }, + { + "epoch": 29.69, + "learning_rate": 5.0765993729028003e-08, + "loss": 3.7698, + "step": 2699000 + }, + { + "epoch": 29.7, + "learning_rate": 5.07522415974476e-08, + "loss": 3.7831, + "step": 2699500 + }, + { + "epoch": 29.7, + "learning_rate": 5.073848946586721e-08, + "loss": 3.7817, + "step": 2700000 + }, + { + "epoch": 29.71, + "learning_rate": 5.072473733428682e-08, + "loss": 3.7802, + "step": 2700500 + }, + { + "epoch": 29.72, + "learning_rate": 5.0710985202706414e-08, + "loss": 3.7495, + "step": 2701000 + }, + { + "epoch": 29.72, + "learning_rate": 5.0697233071126024e-08, + "loss": 3.7709, + "step": 2701500 + }, + { + "epoch": 29.73, + "learning_rate": 5.068348093954563e-08, + "loss": 3.7522, + "step": 2702000 + }, + { + "epoch": 29.73, + "learning_rate": 5.066972880796523e-08, + "loss": 3.7773, + "step": 2702500 + }, + { + "epoch": 29.74, + "learning_rate": 5.065597667638484e-08, + "loss": 3.7634, + "step": 2703000 + }, + { + "epoch": 29.74, + "learning_rate": 5.064222454480445e-08, + "loss": 3.7454, + "step": 2703500 + }, + { + "epoch": 29.75, + "learning_rate": 5.0628472413224044e-08, + "loss": 3.7574, + "step": 2704000 + }, + { + "epoch": 29.75, + "learning_rate": 5.0614720281643653e-08, + "loss": 3.7686, + "step": 2704500 + }, + { + "epoch": 29.76, + "learning_rate": 5.060096815006326e-08, + "loss": 3.7488, + "step": 2705000 + }, + { + "epoch": 29.77, + "learning_rate": 5.058721601848286e-08, + "loss": 3.7639, + "step": 2705500 + }, + { + "epoch": 29.77, + "learning_rate": 5.057346388690247e-08, + "loss": 3.7683, + "step": 2706000 + }, + { + "epoch": 29.78, + "learning_rate": 5.055971175532208e-08, + "loss": 3.7728, + "step": 2706500 + }, + { + "epoch": 29.78, + "learning_rate": 5.0545959623741674e-08, + "loss": 3.7648, + "step": 2707000 + }, + { + "epoch": 29.79, + "learning_rate": 5.053220749216128e-08, + "loss": 3.773, + "step": 2707500 + }, + { + "epoch": 29.79, + "learning_rate": 5.051845536058089e-08, + "loss": 3.7837, + "step": 2708000 + }, + { + "epoch": 29.8, + "learning_rate": 5.050470322900049e-08, + "loss": 3.784, + "step": 2708500 + }, + { + "epoch": 29.8, + "learning_rate": 5.04909510974201e-08, + "loss": 3.7826, + "step": 2709000 + }, + { + "epoch": 29.81, + "learning_rate": 5.047719896583971e-08, + "loss": 3.769, + "step": 2709500 + }, + { + "epoch": 29.81, + "learning_rate": 5.04634468342593e-08, + "loss": 3.7715, + "step": 2710000 + }, + { + "epoch": 29.82, + "learning_rate": 5.044969470267891e-08, + "loss": 3.7552, + "step": 2710500 + }, + { + "epoch": 29.83, + "learning_rate": 5.043594257109852e-08, + "loss": 3.7617, + "step": 2711000 + }, + { + "epoch": 29.83, + "learning_rate": 5.042219043951812e-08, + "loss": 3.7364, + "step": 2711500 + }, + { + "epoch": 29.84, + "learning_rate": 5.040843830793773e-08, + "loss": 3.7823, + "step": 2712000 + }, + { + "epoch": 29.84, + "learning_rate": 5.0394686176357337e-08, + "loss": 3.7426, + "step": 2712500 + }, + { + "epoch": 29.85, + "learning_rate": 5.038093404477693e-08, + "loss": 3.7587, + "step": 2713000 + }, + { + "epoch": 29.85, + "learning_rate": 5.036718191319654e-08, + "loss": 3.7369, + "step": 2713500 + }, + { + "epoch": 29.86, + "learning_rate": 5.035342978161615e-08, + "loss": 3.778, + "step": 2714000 + }, + { + "epoch": 29.86, + "learning_rate": 5.033967765003575e-08, + "loss": 3.7513, + "step": 2714500 + }, + { + "epoch": 29.87, + "learning_rate": 5.032592551845536e-08, + "loss": 3.7663, + "step": 2715000 + }, + { + "epoch": 29.88, + "learning_rate": 5.0312173386874966e-08, + "loss": 3.7903, + "step": 2715500 + }, + { + "epoch": 29.88, + "learning_rate": 5.029842125529456e-08, + "loss": 3.7809, + "step": 2716000 + }, + { + "epoch": 29.89, + "learning_rate": 5.028466912371417e-08, + "loss": 3.7518, + "step": 2716500 + }, + { + "epoch": 29.89, + "learning_rate": 5.027091699213378e-08, + "loss": 3.7621, + "step": 2717000 + }, + { + "epoch": 29.9, + "learning_rate": 5.0257164860553384e-08, + "loss": 3.7608, + "step": 2717500 + }, + { + "epoch": 29.9, + "learning_rate": 5.0243412728972986e-08, + "loss": 3.7797, + "step": 2718000 + }, + { + "epoch": 29.91, + "learning_rate": 5.0229660597392596e-08, + "loss": 3.7561, + "step": 2718500 + }, + { + "epoch": 29.91, + "learning_rate": 5.02159084658122e-08, + "loss": 3.7748, + "step": 2719000 + }, + { + "epoch": 29.92, + "learning_rate": 5.02021563342318e-08, + "loss": 3.7772, + "step": 2719500 + }, + { + "epoch": 29.92, + "learning_rate": 5.018840420265141e-08, + "loss": 3.7611, + "step": 2720000 + }, + { + "epoch": 29.93, + "learning_rate": 5.0174652071071013e-08, + "loss": 3.766, + "step": 2720500 + }, + { + "epoch": 29.94, + "learning_rate": 5.016089993949062e-08, + "loss": 3.7769, + "step": 2721000 + }, + { + "epoch": 29.94, + "learning_rate": 5.0147147807910225e-08, + "loss": 3.7593, + "step": 2721500 + }, + { + "epoch": 29.95, + "learning_rate": 5.013339567632983e-08, + "loss": 3.761, + "step": 2722000 + }, + { + "epoch": 29.95, + "learning_rate": 5.011964354474944e-08, + "loss": 3.7647, + "step": 2722500 + }, + { + "epoch": 29.96, + "learning_rate": 5.010589141316905e-08, + "loss": 3.7685, + "step": 2723000 + }, + { + "epoch": 29.96, + "learning_rate": 5.009213928158864e-08, + "loss": 3.764, + "step": 2723500 + }, + { + "epoch": 29.97, + "learning_rate": 5.007838715000825e-08, + "loss": 3.7785, + "step": 2724000 + }, + { + "epoch": 29.97, + "learning_rate": 5.006463501842786e-08, + "loss": 3.7616, + "step": 2724500 + }, + { + "epoch": 29.98, + "learning_rate": 5.005088288684746e-08, + "loss": 3.7695, + "step": 2725000 + }, + { + "epoch": 29.99, + "learning_rate": 5.003713075526707e-08, + "loss": 3.7728, + "step": 2725500 + }, + { + "epoch": 29.99, + "learning_rate": 5.0023378623686676e-08, + "loss": 3.7582, + "step": 2726000 + }, + { + "epoch": 30.0, + "learning_rate": 5.000962649210627e-08, + "loss": 3.7561, + "step": 2726500 + }, + { + "epoch": 30.0, + "eval_loss": 3.83630633354187, + "eval_runtime": 6.1313, + "eval_samples_per_second": 253.455, + "step": 2726850 + }, + { + "epoch": 30.0, + "learning_rate": 4.999587436052588e-08, + "loss": 3.7688, + "step": 2727000 + }, + { + "epoch": 30.01, + "learning_rate": 4.9982122228945485e-08, + "loss": 3.7779, + "step": 2727500 + }, + { + "epoch": 30.01, + "learning_rate": 4.996837009736509e-08, + "loss": 3.7676, + "step": 2728000 + }, + { + "epoch": 30.02, + "learning_rate": 4.9954617965784697e-08, + "loss": 3.7718, + "step": 2728500 + }, + { + "epoch": 30.02, + "learning_rate": 4.99408658342043e-08, + "loss": 3.7617, + "step": 2729000 + }, + { + "epoch": 30.03, + "learning_rate": 4.99271137026239e-08, + "loss": 3.7666, + "step": 2729500 + }, + { + "epoch": 30.03, + "learning_rate": 4.991336157104351e-08, + "loss": 3.7736, + "step": 2730000 + }, + { + "epoch": 30.04, + "learning_rate": 4.9899609439463114e-08, + "loss": 3.7874, + "step": 2730500 + }, + { + "epoch": 30.05, + "learning_rate": 4.988585730788272e-08, + "loss": 3.7805, + "step": 2731000 + }, + { + "epoch": 30.05, + "learning_rate": 4.9872105176302326e-08, + "loss": 3.7761, + "step": 2731500 + }, + { + "epoch": 30.06, + "learning_rate": 4.985835304472193e-08, + "loss": 3.7637, + "step": 2732000 + }, + { + "epoch": 30.06, + "learning_rate": 4.984460091314153e-08, + "loss": 3.7604, + "step": 2732500 + }, + { + "epoch": 30.07, + "learning_rate": 4.983084878156114e-08, + "loss": 3.7796, + "step": 2733000 + }, + { + "epoch": 30.07, + "learning_rate": 4.9817096649980744e-08, + "loss": 3.7736, + "step": 2733500 + }, + { + "epoch": 30.08, + "learning_rate": 4.9803344518400346e-08, + "loss": 3.7527, + "step": 2734000 + }, + { + "epoch": 30.08, + "learning_rate": 4.9789592386819956e-08, + "loss": 3.7637, + "step": 2734500 + }, + { + "epoch": 30.09, + "learning_rate": 4.977584025523956e-08, + "loss": 3.751, + "step": 2735000 + }, + { + "epoch": 30.1, + "learning_rate": 4.976208812365916e-08, + "loss": 3.7808, + "step": 2735500 + }, + { + "epoch": 30.1, + "learning_rate": 4.974833599207877e-08, + "loss": 3.7896, + "step": 2736000 + }, + { + "epoch": 30.11, + "learning_rate": 4.9734583860498373e-08, + "loss": 3.7621, + "step": 2736500 + }, + { + "epoch": 30.11, + "learning_rate": 4.9720831728917976e-08, + "loss": 3.7667, + "step": 2737000 + }, + { + "epoch": 30.12, + "learning_rate": 4.9707079597337585e-08, + "loss": 3.7652, + "step": 2737500 + }, + { + "epoch": 30.12, + "learning_rate": 4.969332746575719e-08, + "loss": 3.7661, + "step": 2738000 + }, + { + "epoch": 30.13, + "learning_rate": 4.967957533417679e-08, + "loss": 3.7616, + "step": 2738500 + }, + { + "epoch": 30.13, + "learning_rate": 4.96658232025964e-08, + "loss": 3.7662, + "step": 2739000 + }, + { + "epoch": 30.14, + "learning_rate": 4.9652071071016e-08, + "loss": 3.7703, + "step": 2739500 + }, + { + "epoch": 30.14, + "learning_rate": 4.9638318939435606e-08, + "loss": 3.7515, + "step": 2740000 + }, + { + "epoch": 30.15, + "learning_rate": 4.9624566807855215e-08, + "loss": 3.7783, + "step": 2740500 + }, + { + "epoch": 30.16, + "learning_rate": 4.961081467627482e-08, + "loss": 3.764, + "step": 2741000 + }, + { + "epoch": 30.16, + "learning_rate": 4.959706254469442e-08, + "loss": 3.7763, + "step": 2741500 + }, + { + "epoch": 30.17, + "learning_rate": 4.958331041311403e-08, + "loss": 3.7594, + "step": 2742000 + }, + { + "epoch": 30.17, + "learning_rate": 4.956955828153363e-08, + "loss": 3.7763, + "step": 2742500 + }, + { + "epoch": 30.18, + "learning_rate": 4.955580614995324e-08, + "loss": 3.7589, + "step": 2743000 + }, + { + "epoch": 30.18, + "learning_rate": 4.9542054018372845e-08, + "loss": 3.7605, + "step": 2743500 + }, + { + "epoch": 30.19, + "learning_rate": 4.9528301886792454e-08, + "loss": 3.7564, + "step": 2744000 + }, + { + "epoch": 30.19, + "learning_rate": 4.9514549755212057e-08, + "loss": 3.741, + "step": 2744500 + }, + { + "epoch": 30.2, + "learning_rate": 4.950079762363166e-08, + "loss": 3.7658, + "step": 2745000 + }, + { + "epoch": 30.21, + "learning_rate": 4.948704549205127e-08, + "loss": 3.7663, + "step": 2745500 + }, + { + "epoch": 30.21, + "learning_rate": 4.947329336047087e-08, + "loss": 3.7596, + "step": 2746000 + }, + { + "epoch": 30.22, + "learning_rate": 4.945954122889048e-08, + "loss": 3.7453, + "step": 2746500 + }, + { + "epoch": 30.22, + "learning_rate": 4.9445789097310084e-08, + "loss": 3.7729, + "step": 2747000 + }, + { + "epoch": 30.23, + "learning_rate": 4.9432036965729686e-08, + "loss": 3.764, + "step": 2747500 + }, + { + "epoch": 30.23, + "learning_rate": 4.9418284834149296e-08, + "loss": 3.7804, + "step": 2748000 + }, + { + "epoch": 30.24, + "learning_rate": 4.94045327025689e-08, + "loss": 3.7645, + "step": 2748500 + }, + { + "epoch": 30.24, + "learning_rate": 4.93907805709885e-08, + "loss": 3.7773, + "step": 2749000 + }, + { + "epoch": 30.25, + "learning_rate": 4.937702843940811e-08, + "loss": 3.7722, + "step": 2749500 + }, + { + "epoch": 30.25, + "learning_rate": 4.936327630782771e-08, + "loss": 3.7499, + "step": 2750000 + }, + { + "epoch": 30.26, + "learning_rate": 4.9349524176247316e-08, + "loss": 3.7661, + "step": 2750500 + }, + { + "epoch": 30.27, + "learning_rate": 4.9335772044666925e-08, + "loss": 3.7661, + "step": 2751000 + }, + { + "epoch": 30.27, + "learning_rate": 4.932201991308653e-08, + "loss": 3.7737, + "step": 2751500 + }, + { + "epoch": 30.28, + "learning_rate": 4.930826778150613e-08, + "loss": 3.7708, + "step": 2752000 + }, + { + "epoch": 30.28, + "learning_rate": 4.929451564992574e-08, + "loss": 3.7807, + "step": 2752500 + }, + { + "epoch": 30.29, + "learning_rate": 4.928076351834534e-08, + "loss": 3.7713, + "step": 2753000 + }, + { + "epoch": 30.29, + "learning_rate": 4.9267011386764945e-08, + "loss": 3.7693, + "step": 2753500 + }, + { + "epoch": 30.3, + "learning_rate": 4.9253259255184555e-08, + "loss": 3.7757, + "step": 2754000 + }, + { + "epoch": 30.3, + "learning_rate": 4.923950712360416e-08, + "loss": 3.7532, + "step": 2754500 + }, + { + "epoch": 30.31, + "learning_rate": 4.922575499202376e-08, + "loss": 3.7784, + "step": 2755000 + }, + { + "epoch": 30.32, + "learning_rate": 4.921200286044337e-08, + "loss": 3.7774, + "step": 2755500 + }, + { + "epoch": 30.32, + "learning_rate": 4.919825072886297e-08, + "loss": 3.7813, + "step": 2756000 + }, + { + "epoch": 30.33, + "learning_rate": 4.9184498597282575e-08, + "loss": 3.7772, + "step": 2756500 + }, + { + "epoch": 30.33, + "learning_rate": 4.9170746465702184e-08, + "loss": 3.7765, + "step": 2757000 + }, + { + "epoch": 30.34, + "learning_rate": 4.915699433412179e-08, + "loss": 3.7641, + "step": 2757500 + }, + { + "epoch": 30.34, + "learning_rate": 4.914324220254139e-08, + "loss": 3.7798, + "step": 2758000 + }, + { + "epoch": 30.35, + "learning_rate": 4.9129490070961e-08, + "loss": 3.773, + "step": 2758500 + }, + { + "epoch": 30.35, + "learning_rate": 4.91157379393806e-08, + "loss": 3.7698, + "step": 2759000 + }, + { + "epoch": 30.36, + "learning_rate": 4.9101985807800205e-08, + "loss": 3.7586, + "step": 2759500 + }, + { + "epoch": 30.36, + "learning_rate": 4.9088233676219814e-08, + "loss": 3.7472, + "step": 2760000 + }, + { + "epoch": 30.37, + "learning_rate": 4.907448154463942e-08, + "loss": 3.7723, + "step": 2760500 + }, + { + "epoch": 30.38, + "learning_rate": 4.906072941305902e-08, + "loss": 3.7598, + "step": 2761000 + }, + { + "epoch": 30.38, + "learning_rate": 4.904697728147862e-08, + "loss": 3.7702, + "step": 2761500 + }, + { + "epoch": 30.39, + "learning_rate": 4.903322514989823e-08, + "loss": 3.7761, + "step": 2762000 + }, + { + "epoch": 30.39, + "learning_rate": 4.9019473018317834e-08, + "loss": 3.79, + "step": 2762500 + }, + { + "epoch": 30.4, + "learning_rate": 4.900572088673744e-08, + "loss": 3.7617, + "step": 2763000 + }, + { + "epoch": 30.4, + "learning_rate": 4.8991968755157046e-08, + "loss": 3.7543, + "step": 2763500 + }, + { + "epoch": 30.41, + "learning_rate": 4.897821662357665e-08, + "loss": 3.7747, + "step": 2764000 + }, + { + "epoch": 30.41, + "learning_rate": 4.896446449199625e-08, + "loss": 3.7671, + "step": 2764500 + }, + { + "epoch": 30.42, + "learning_rate": 4.895071236041586e-08, + "loss": 3.7713, + "step": 2765000 + }, + { + "epoch": 30.43, + "learning_rate": 4.8936960228835464e-08, + "loss": 3.7651, + "step": 2765500 + }, + { + "epoch": 30.43, + "learning_rate": 4.8923208097255067e-08, + "loss": 3.7741, + "step": 2766000 + }, + { + "epoch": 30.44, + "learning_rate": 4.8909455965674676e-08, + "loss": 3.7969, + "step": 2766500 + }, + { + "epoch": 30.44, + "learning_rate": 4.889570383409428e-08, + "loss": 3.7808, + "step": 2767000 + }, + { + "epoch": 30.45, + "learning_rate": 4.888195170251389e-08, + "loss": 3.765, + "step": 2767500 + }, + { + "epoch": 30.45, + "learning_rate": 4.886819957093349e-08, + "loss": 3.7633, + "step": 2768000 + }, + { + "epoch": 30.46, + "learning_rate": 4.88544474393531e-08, + "loss": 3.7839, + "step": 2768500 + }, + { + "epoch": 30.46, + "learning_rate": 4.88406953077727e-08, + "loss": 3.7686, + "step": 2769000 + }, + { + "epoch": 30.47, + "learning_rate": 4.882694317619231e-08, + "loss": 3.7721, + "step": 2769500 + }, + { + "epoch": 30.47, + "learning_rate": 4.8813191044611915e-08, + "loss": 3.7667, + "step": 2770000 + }, + { + "epoch": 30.48, + "learning_rate": 4.879943891303152e-08, + "loss": 3.764, + "step": 2770500 + }, + { + "epoch": 30.49, + "learning_rate": 4.878568678145113e-08, + "loss": 3.7623, + "step": 2771000 + }, + { + "epoch": 30.49, + "learning_rate": 4.877193464987073e-08, + "loss": 3.7597, + "step": 2771500 + }, + { + "epoch": 30.5, + "learning_rate": 4.875818251829033e-08, + "loss": 3.7763, + "step": 2772000 + }, + { + "epoch": 30.5, + "learning_rate": 4.874443038670994e-08, + "loss": 3.7755, + "step": 2772500 + }, + { + "epoch": 30.51, + "learning_rate": 4.8730678255129544e-08, + "loss": 3.7711, + "step": 2773000 + }, + { + "epoch": 30.51, + "learning_rate": 4.871692612354915e-08, + "loss": 3.7698, + "step": 2773500 + }, + { + "epoch": 30.52, + "learning_rate": 4.8703173991968756e-08, + "loss": 3.7626, + "step": 2774000 + }, + { + "epoch": 30.52, + "learning_rate": 4.868942186038836e-08, + "loss": 3.783, + "step": 2774500 + }, + { + "epoch": 30.53, + "learning_rate": 4.867566972880796e-08, + "loss": 3.768, + "step": 2775000 + }, + { + "epoch": 30.54, + "learning_rate": 4.866191759722757e-08, + "loss": 3.7432, + "step": 2775500 + }, + { + "epoch": 30.54, + "learning_rate": 4.8648165465647174e-08, + "loss": 3.7664, + "step": 2776000 + }, + { + "epoch": 30.55, + "learning_rate": 4.863441333406678e-08, + "loss": 3.7585, + "step": 2776500 + }, + { + "epoch": 30.55, + "learning_rate": 4.8620661202486386e-08, + "loss": 3.7453, + "step": 2777000 + }, + { + "epoch": 30.56, + "learning_rate": 4.860690907090599e-08, + "loss": 3.7638, + "step": 2777500 + }, + { + "epoch": 30.56, + "learning_rate": 4.859315693932559e-08, + "loss": 3.774, + "step": 2778000 + }, + { + "epoch": 30.57, + "learning_rate": 4.85794048077452e-08, + "loss": 3.7559, + "step": 2778500 + }, + { + "epoch": 30.57, + "learning_rate": 4.8565652676164804e-08, + "loss": 3.7843, + "step": 2779000 + }, + { + "epoch": 30.58, + "learning_rate": 4.8551900544584406e-08, + "loss": 3.7573, + "step": 2779500 + }, + { + "epoch": 30.58, + "learning_rate": 4.8538148413004016e-08, + "loss": 3.7607, + "step": 2780000 + }, + { + "epoch": 30.59, + "learning_rate": 4.852439628142362e-08, + "loss": 3.7865, + "step": 2780500 + }, + { + "epoch": 30.6, + "learning_rate": 4.851064414984322e-08, + "loss": 3.7575, + "step": 2781000 + }, + { + "epoch": 30.6, + "learning_rate": 4.849689201826283e-08, + "loss": 3.7812, + "step": 2781500 + }, + { + "epoch": 30.61, + "learning_rate": 4.848313988668243e-08, + "loss": 3.7777, + "step": 2782000 + }, + { + "epoch": 30.61, + "learning_rate": 4.8469387755102036e-08, + "loss": 3.779, + "step": 2782500 + }, + { + "epoch": 30.62, + "learning_rate": 4.8455635623521645e-08, + "loss": 3.7601, + "step": 2783000 + }, + { + "epoch": 30.62, + "learning_rate": 4.844188349194125e-08, + "loss": 3.7718, + "step": 2783500 + }, + { + "epoch": 30.63, + "learning_rate": 4.842813136036085e-08, + "loss": 3.7692, + "step": 2784000 + }, + { + "epoch": 30.63, + "learning_rate": 4.841437922878046e-08, + "loss": 3.762, + "step": 2784500 + }, + { + "epoch": 30.64, + "learning_rate": 4.840062709720006e-08, + "loss": 3.7785, + "step": 2785000 + }, + { + "epoch": 30.65, + "learning_rate": 4.8386874965619666e-08, + "loss": 3.7583, + "step": 2785500 + }, + { + "epoch": 30.65, + "learning_rate": 4.8373122834039275e-08, + "loss": 3.7628, + "step": 2786000 + }, + { + "epoch": 30.66, + "learning_rate": 4.835937070245888e-08, + "loss": 3.7599, + "step": 2786500 + }, + { + "epoch": 30.66, + "learning_rate": 4.834561857087848e-08, + "loss": 3.7768, + "step": 2787000 + }, + { + "epoch": 30.67, + "learning_rate": 4.833186643929809e-08, + "loss": 3.7559, + "step": 2787500 + }, + { + "epoch": 30.67, + "learning_rate": 4.831811430771769e-08, + "loss": 3.7761, + "step": 2788000 + }, + { + "epoch": 30.68, + "learning_rate": 4.8304362176137295e-08, + "loss": 3.7617, + "step": 2788500 + }, + { + "epoch": 30.68, + "learning_rate": 4.8290610044556904e-08, + "loss": 3.7389, + "step": 2789000 + }, + { + "epoch": 30.69, + "learning_rate": 4.827685791297651e-08, + "loss": 3.7612, + "step": 2789500 + }, + { + "epoch": 30.69, + "learning_rate": 4.826310578139611e-08, + "loss": 3.7523, + "step": 2790000 + }, + { + "epoch": 30.7, + "learning_rate": 4.824935364981572e-08, + "loss": 3.7705, + "step": 2790500 + }, + { + "epoch": 30.71, + "learning_rate": 4.823560151823532e-08, + "loss": 3.7817, + "step": 2791000 + }, + { + "epoch": 30.71, + "learning_rate": 4.822184938665493e-08, + "loss": 3.781, + "step": 2791500 + }, + { + "epoch": 30.72, + "learning_rate": 4.8208097255074534e-08, + "loss": 3.772, + "step": 2792000 + }, + { + "epoch": 30.72, + "learning_rate": 4.819434512349414e-08, + "loss": 3.7727, + "step": 2792500 + }, + { + "epoch": 30.73, + "learning_rate": 4.8180592991913746e-08, + "loss": 3.758, + "step": 2793000 + }, + { + "epoch": 30.73, + "learning_rate": 4.816684086033335e-08, + "loss": 3.759, + "step": 2793500 + }, + { + "epoch": 30.74, + "learning_rate": 4.815308872875296e-08, + "loss": 3.7775, + "step": 2794000 + }, + { + "epoch": 30.74, + "learning_rate": 4.813933659717256e-08, + "loss": 3.7637, + "step": 2794500 + }, + { + "epoch": 30.75, + "learning_rate": 4.812558446559217e-08, + "loss": 3.7672, + "step": 2795000 + }, + { + "epoch": 30.76, + "learning_rate": 4.811183233401177e-08, + "loss": 3.7612, + "step": 2795500 + }, + { + "epoch": 30.76, + "learning_rate": 4.8098080202431376e-08, + "loss": 3.7714, + "step": 2796000 + }, + { + "epoch": 30.77, + "learning_rate": 4.8084328070850985e-08, + "loss": 3.7668, + "step": 2796500 + }, + { + "epoch": 30.77, + "learning_rate": 4.807057593927059e-08, + "loss": 3.7779, + "step": 2797000 + }, + { + "epoch": 30.78, + "learning_rate": 4.805682380769019e-08, + "loss": 3.7697, + "step": 2797500 + }, + { + "epoch": 30.78, + "learning_rate": 4.80430716761098e-08, + "loss": 3.7632, + "step": 2798000 + }, + { + "epoch": 30.79, + "learning_rate": 4.80293195445294e-08, + "loss": 3.7711, + "step": 2798500 + }, + { + "epoch": 30.79, + "learning_rate": 4.8015567412949005e-08, + "loss": 3.7685, + "step": 2799000 + }, + { + "epoch": 30.8, + "learning_rate": 4.800181528136861e-08, + "loss": 3.75, + "step": 2799500 + }, + { + "epoch": 30.8, + "learning_rate": 4.798806314978822e-08, + "loss": 3.7665, + "step": 2800000 + }, + { + "epoch": 30.81, + "learning_rate": 4.797431101820782e-08, + "loss": 3.7597, + "step": 2800500 + }, + { + "epoch": 30.82, + "learning_rate": 4.796055888662742e-08, + "loss": 3.7627, + "step": 2801000 + }, + { + "epoch": 30.82, + "learning_rate": 4.794680675504703e-08, + "loss": 3.7578, + "step": 2801500 + }, + { + "epoch": 30.83, + "learning_rate": 4.7933054623466635e-08, + "loss": 3.7702, + "step": 2802000 + }, + { + "epoch": 30.83, + "learning_rate": 4.791930249188624e-08, + "loss": 3.7588, + "step": 2802500 + }, + { + "epoch": 30.84, + "learning_rate": 4.790555036030585e-08, + "loss": 3.7708, + "step": 2803000 + }, + { + "epoch": 30.84, + "learning_rate": 4.789179822872545e-08, + "loss": 3.7717, + "step": 2803500 + }, + { + "epoch": 30.85, + "learning_rate": 4.787804609714505e-08, + "loss": 3.7545, + "step": 2804000 + }, + { + "epoch": 30.85, + "learning_rate": 4.786429396556466e-08, + "loss": 3.7612, + "step": 2804500 + }, + { + "epoch": 30.86, + "learning_rate": 4.7850541833984264e-08, + "loss": 3.7512, + "step": 2805000 + }, + { + "epoch": 30.87, + "learning_rate": 4.783678970240387e-08, + "loss": 3.7577, + "step": 2805500 + }, + { + "epoch": 30.87, + "learning_rate": 4.7823037570823477e-08, + "loss": 3.7646, + "step": 2806000 + }, + { + "epoch": 30.88, + "learning_rate": 4.780928543924308e-08, + "loss": 3.755, + "step": 2806500 + }, + { + "epoch": 30.88, + "learning_rate": 4.779553330766268e-08, + "loss": 3.769, + "step": 2807000 + }, + { + "epoch": 30.89, + "learning_rate": 4.778178117608229e-08, + "loss": 3.767, + "step": 2807500 + }, + { + "epoch": 30.89, + "learning_rate": 4.7768029044501894e-08, + "loss": 3.7876, + "step": 2808000 + }, + { + "epoch": 30.9, + "learning_rate": 4.77542769129215e-08, + "loss": 3.7541, + "step": 2808500 + }, + { + "epoch": 30.9, + "learning_rate": 4.7740524781341106e-08, + "loss": 3.7731, + "step": 2809000 + }, + { + "epoch": 30.91, + "learning_rate": 4.772677264976071e-08, + "loss": 3.7782, + "step": 2809500 + }, + { + "epoch": 30.91, + "learning_rate": 4.771302051818031e-08, + "loss": 3.7528, + "step": 2810000 + }, + { + "epoch": 30.92, + "learning_rate": 4.769926838659992e-08, + "loss": 3.7666, + "step": 2810500 + }, + { + "epoch": 30.93, + "learning_rate": 4.7685516255019524e-08, + "loss": 3.7498, + "step": 2811000 + }, + { + "epoch": 30.93, + "learning_rate": 4.7671764123439126e-08, + "loss": 3.7747, + "step": 2811500 + }, + { + "epoch": 30.94, + "learning_rate": 4.7658011991858736e-08, + "loss": 3.7664, + "step": 2812000 + }, + { + "epoch": 30.94, + "learning_rate": 4.764425986027834e-08, + "loss": 3.7761, + "step": 2812500 + }, + { + "epoch": 30.95, + "learning_rate": 4.763050772869794e-08, + "loss": 3.7742, + "step": 2813000 + }, + { + "epoch": 30.95, + "learning_rate": 4.761675559711755e-08, + "loss": 3.7603, + "step": 2813500 + }, + { + "epoch": 30.96, + "learning_rate": 4.760300346553715e-08, + "loss": 3.7617, + "step": 2814000 + }, + { + "epoch": 30.96, + "learning_rate": 4.7589251333956756e-08, + "loss": 3.7783, + "step": 2814500 + }, + { + "epoch": 30.97, + "learning_rate": 4.7575499202376365e-08, + "loss": 3.7608, + "step": 2815000 + }, + { + "epoch": 30.98, + "learning_rate": 4.756174707079597e-08, + "loss": 3.7677, + "step": 2815500 + }, + { + "epoch": 30.98, + "learning_rate": 4.754799493921558e-08, + "loss": 3.7615, + "step": 2816000 + }, + { + "epoch": 30.99, + "learning_rate": 4.753424280763518e-08, + "loss": 3.7602, + "step": 2816500 + }, + { + "epoch": 30.99, + "learning_rate": 4.752049067605479e-08, + "loss": 3.7753, + "step": 2817000 + }, + { + "epoch": 31.0, + "learning_rate": 4.750673854447439e-08, + "loss": 3.7547, + "step": 2817500 + }, + { + "epoch": 31.0, + "eval_loss": 3.8350541591644287, + "eval_runtime": 6.1317, + "eval_samples_per_second": 253.436, + "step": 2817745 + }, + { + "epoch": 31.0, + "learning_rate": 4.7492986412894e-08, + "loss": 3.7804, + "step": 2818000 + }, + { + "epoch": 31.01, + "learning_rate": 4.7479234281313604e-08, + "loss": 3.7624, + "step": 2818500 + }, + { + "epoch": 31.01, + "learning_rate": 4.746548214973321e-08, + "loss": 3.7589, + "step": 2819000 + }, + { + "epoch": 31.02, + "learning_rate": 4.7451730018152816e-08, + "loss": 3.7668, + "step": 2819500 + }, + { + "epoch": 31.02, + "learning_rate": 4.743797788657242e-08, + "loss": 3.7825, + "step": 2820000 + }, + { + "epoch": 31.03, + "learning_rate": 4.742422575499202e-08, + "loss": 3.7547, + "step": 2820500 + }, + { + "epoch": 31.04, + "learning_rate": 4.741047362341163e-08, + "loss": 3.7509, + "step": 2821000 + }, + { + "epoch": 31.04, + "learning_rate": 4.7396721491831234e-08, + "loss": 3.7609, + "step": 2821500 + }, + { + "epoch": 31.05, + "learning_rate": 4.7382969360250837e-08, + "loss": 3.7754, + "step": 2822000 + }, + { + "epoch": 31.05, + "learning_rate": 4.7369217228670446e-08, + "loss": 3.7622, + "step": 2822500 + }, + { + "epoch": 31.06, + "learning_rate": 4.735546509709005e-08, + "loss": 3.7794, + "step": 2823000 + }, + { + "epoch": 31.06, + "learning_rate": 4.734171296550965e-08, + "loss": 3.7724, + "step": 2823500 + }, + { + "epoch": 31.07, + "learning_rate": 4.732796083392926e-08, + "loss": 3.7799, + "step": 2824000 + }, + { + "epoch": 31.07, + "learning_rate": 4.7314208702348863e-08, + "loss": 3.7618, + "step": 2824500 + }, + { + "epoch": 31.08, + "learning_rate": 4.7300456570768466e-08, + "loss": 3.7584, + "step": 2825000 + }, + { + "epoch": 31.09, + "learning_rate": 4.7286704439188076e-08, + "loss": 3.7682, + "step": 2825500 + }, + { + "epoch": 31.09, + "learning_rate": 4.727295230760768e-08, + "loss": 3.7581, + "step": 2826000 + }, + { + "epoch": 31.1, + "learning_rate": 4.725920017602728e-08, + "loss": 3.7488, + "step": 2826500 + }, + { + "epoch": 31.1, + "learning_rate": 4.724544804444689e-08, + "loss": 3.7473, + "step": 2827000 + }, + { + "epoch": 31.11, + "learning_rate": 4.723169591286649e-08, + "loss": 3.7451, + "step": 2827500 + }, + { + "epoch": 31.11, + "learning_rate": 4.7217943781286096e-08, + "loss": 3.7653, + "step": 2828000 + }, + { + "epoch": 31.12, + "learning_rate": 4.7204191649705705e-08, + "loss": 3.7823, + "step": 2828500 + }, + { + "epoch": 31.12, + "learning_rate": 4.719043951812531e-08, + "loss": 3.7602, + "step": 2829000 + }, + { + "epoch": 31.13, + "learning_rate": 4.717668738654491e-08, + "loss": 3.7746, + "step": 2829500 + }, + { + "epoch": 31.13, + "learning_rate": 4.716293525496452e-08, + "loss": 3.7704, + "step": 2830000 + }, + { + "epoch": 31.14, + "learning_rate": 4.714918312338412e-08, + "loss": 3.7468, + "step": 2830500 + }, + { + "epoch": 31.15, + "learning_rate": 4.7135430991803725e-08, + "loss": 3.7529, + "step": 2831000 + }, + { + "epoch": 31.15, + "learning_rate": 4.7121678860223335e-08, + "loss": 3.7644, + "step": 2831500 + }, + { + "epoch": 31.16, + "learning_rate": 4.710792672864294e-08, + "loss": 3.7472, + "step": 2832000 + }, + { + "epoch": 31.16, + "learning_rate": 4.709417459706254e-08, + "loss": 3.762, + "step": 2832500 + }, + { + "epoch": 31.17, + "learning_rate": 4.708042246548215e-08, + "loss": 3.7639, + "step": 2833000 + }, + { + "epoch": 31.17, + "learning_rate": 4.706667033390175e-08, + "loss": 3.7704, + "step": 2833500 + }, + { + "epoch": 31.18, + "learning_rate": 4.7052918202321355e-08, + "loss": 3.781, + "step": 2834000 + }, + { + "epoch": 31.18, + "learning_rate": 4.7039166070740964e-08, + "loss": 3.754, + "step": 2834500 + }, + { + "epoch": 31.19, + "learning_rate": 4.702541393916057e-08, + "loss": 3.7404, + "step": 2835000 + }, + { + "epoch": 31.2, + "learning_rate": 4.701166180758017e-08, + "loss": 3.7774, + "step": 2835500 + }, + { + "epoch": 31.2, + "learning_rate": 4.699790967599977e-08, + "loss": 3.7722, + "step": 2836000 + }, + { + "epoch": 31.21, + "learning_rate": 4.698415754441938e-08, + "loss": 3.7581, + "step": 2836500 + }, + { + "epoch": 31.21, + "learning_rate": 4.6970405412838985e-08, + "loss": 3.7647, + "step": 2837000 + }, + { + "epoch": 31.22, + "learning_rate": 4.695665328125859e-08, + "loss": 3.7561, + "step": 2837500 + }, + { + "epoch": 31.22, + "learning_rate": 4.6942901149678197e-08, + "loss": 3.7666, + "step": 2838000 + }, + { + "epoch": 31.23, + "learning_rate": 4.69291490180978e-08, + "loss": 3.7781, + "step": 2838500 + }, + { + "epoch": 31.23, + "learning_rate": 4.691539688651741e-08, + "loss": 3.7711, + "step": 2839000 + }, + { + "epoch": 31.24, + "learning_rate": 4.690164475493701e-08, + "loss": 3.7762, + "step": 2839500 + }, + { + "epoch": 31.24, + "learning_rate": 4.688789262335662e-08, + "loss": 3.7832, + "step": 2840000 + }, + { + "epoch": 31.25, + "learning_rate": 4.6874140491776223e-08, + "loss": 3.7576, + "step": 2840500 + }, + { + "epoch": 31.26, + "learning_rate": 4.6860388360195826e-08, + "loss": 3.7571, + "step": 2841000 + }, + { + "epoch": 31.26, + "learning_rate": 4.6846636228615436e-08, + "loss": 3.7655, + "step": 2841500 + }, + { + "epoch": 31.27, + "learning_rate": 4.683288409703504e-08, + "loss": 3.7752, + "step": 2842000 + }, + { + "epoch": 31.27, + "learning_rate": 4.681913196545465e-08, + "loss": 3.7684, + "step": 2842500 + }, + { + "epoch": 31.28, + "learning_rate": 4.680537983387425e-08, + "loss": 3.7697, + "step": 2843000 + }, + { + "epoch": 31.28, + "learning_rate": 4.679162770229385e-08, + "loss": 3.7572, + "step": 2843500 + }, + { + "epoch": 31.29, + "learning_rate": 4.677787557071346e-08, + "loss": 3.7696, + "step": 2844000 + }, + { + "epoch": 31.29, + "learning_rate": 4.6764123439133065e-08, + "loss": 3.7776, + "step": 2844500 + }, + { + "epoch": 31.3, + "learning_rate": 4.675037130755267e-08, + "loss": 3.7747, + "step": 2845000 + }, + { + "epoch": 31.31, + "learning_rate": 4.673661917597228e-08, + "loss": 3.7542, + "step": 2845500 + }, + { + "epoch": 31.31, + "learning_rate": 4.672286704439188e-08, + "loss": 3.7617, + "step": 2846000 + }, + { + "epoch": 31.32, + "learning_rate": 4.670911491281148e-08, + "loss": 3.7723, + "step": 2846500 + }, + { + "epoch": 31.32, + "learning_rate": 4.669536278123109e-08, + "loss": 3.7529, + "step": 2847000 + }, + { + "epoch": 31.33, + "learning_rate": 4.6681610649650695e-08, + "loss": 3.7716, + "step": 2847500 + }, + { + "epoch": 31.33, + "learning_rate": 4.66678585180703e-08, + "loss": 3.7513, + "step": 2848000 + }, + { + "epoch": 31.34, + "learning_rate": 4.665410638648991e-08, + "loss": 3.7451, + "step": 2848500 + }, + { + "epoch": 31.34, + "learning_rate": 4.664035425490951e-08, + "loss": 3.7455, + "step": 2849000 + }, + { + "epoch": 31.35, + "learning_rate": 4.662660212332911e-08, + "loss": 3.7479, + "step": 2849500 + }, + { + "epoch": 31.35, + "learning_rate": 4.661284999174872e-08, + "loss": 3.7753, + "step": 2850000 + }, + { + "epoch": 31.36, + "learning_rate": 4.6599097860168324e-08, + "loss": 3.7624, + "step": 2850500 + }, + { + "epoch": 31.37, + "learning_rate": 4.658534572858793e-08, + "loss": 3.7497, + "step": 2851000 + }, + { + "epoch": 31.37, + "learning_rate": 4.6571593597007536e-08, + "loss": 3.7774, + "step": 2851500 + }, + { + "epoch": 31.38, + "learning_rate": 4.655784146542714e-08, + "loss": 3.7462, + "step": 2852000 + }, + { + "epoch": 31.38, + "learning_rate": 4.654408933384674e-08, + "loss": 3.756, + "step": 2852500 + }, + { + "epoch": 31.39, + "learning_rate": 4.653033720226635e-08, + "loss": 3.7646, + "step": 2853000 + }, + { + "epoch": 31.39, + "learning_rate": 4.6516585070685954e-08, + "loss": 3.7675, + "step": 2853500 + }, + { + "epoch": 31.4, + "learning_rate": 4.6502832939105557e-08, + "loss": 3.7844, + "step": 2854000 + }, + { + "epoch": 31.4, + "learning_rate": 4.6489080807525166e-08, + "loss": 3.7741, + "step": 2854500 + }, + { + "epoch": 31.41, + "learning_rate": 4.647532867594477e-08, + "loss": 3.7726, + "step": 2855000 + }, + { + "epoch": 31.42, + "learning_rate": 4.646157654436437e-08, + "loss": 3.7899, + "step": 2855500 + }, + { + "epoch": 31.42, + "learning_rate": 4.644782441278398e-08, + "loss": 3.7748, + "step": 2856000 + }, + { + "epoch": 31.43, + "learning_rate": 4.6434072281203583e-08, + "loss": 3.7596, + "step": 2856500 + }, + { + "epoch": 31.43, + "learning_rate": 4.6420320149623186e-08, + "loss": 3.7571, + "step": 2857000 + }, + { + "epoch": 31.44, + "learning_rate": 4.6406568018042796e-08, + "loss": 3.773, + "step": 2857500 + }, + { + "epoch": 31.44, + "learning_rate": 4.63928158864624e-08, + "loss": 3.7514, + "step": 2858000 + }, + { + "epoch": 31.45, + "learning_rate": 4.6379063754882e-08, + "loss": 3.7691, + "step": 2858500 + }, + { + "epoch": 31.45, + "learning_rate": 4.636531162330161e-08, + "loss": 3.7671, + "step": 2859000 + }, + { + "epoch": 31.46, + "learning_rate": 4.635155949172121e-08, + "loss": 3.7645, + "step": 2859500 + }, + { + "epoch": 31.46, + "learning_rate": 4.6337807360140816e-08, + "loss": 3.7678, + "step": 2860000 + }, + { + "epoch": 31.47, + "learning_rate": 4.6324055228560425e-08, + "loss": 3.7796, + "step": 2860500 + }, + { + "epoch": 31.48, + "learning_rate": 4.631030309698003e-08, + "loss": 3.7344, + "step": 2861000 + }, + { + "epoch": 31.48, + "learning_rate": 4.629655096539963e-08, + "loss": 3.773, + "step": 2861500 + }, + { + "epoch": 31.49, + "learning_rate": 4.628279883381924e-08, + "loss": 3.7784, + "step": 2862000 + }, + { + "epoch": 31.49, + "learning_rate": 4.626904670223884e-08, + "loss": 3.7664, + "step": 2862500 + }, + { + "epoch": 31.5, + "learning_rate": 4.6255294570658445e-08, + "loss": 3.7637, + "step": 2863000 + }, + { + "epoch": 31.5, + "learning_rate": 4.6241542439078055e-08, + "loss": 3.76, + "step": 2863500 + }, + { + "epoch": 31.51, + "learning_rate": 4.622779030749766e-08, + "loss": 3.7663, + "step": 2864000 + }, + { + "epoch": 31.51, + "learning_rate": 4.621403817591727e-08, + "loss": 3.7632, + "step": 2864500 + }, + { + "epoch": 31.52, + "learning_rate": 4.620028604433687e-08, + "loss": 3.7738, + "step": 2865000 + }, + { + "epoch": 31.53, + "learning_rate": 4.618653391275648e-08, + "loss": 3.7734, + "step": 2865500 + }, + { + "epoch": 31.53, + "learning_rate": 4.617278178117608e-08, + "loss": 3.7733, + "step": 2866000 + }, + { + "epoch": 31.54, + "learning_rate": 4.615902964959569e-08, + "loss": 3.7602, + "step": 2866500 + }, + { + "epoch": 31.54, + "learning_rate": 4.6145277518015294e-08, + "loss": 3.759, + "step": 2867000 + }, + { + "epoch": 31.55, + "learning_rate": 4.6131525386434896e-08, + "loss": 3.7606, + "step": 2867500 + }, + { + "epoch": 31.55, + "learning_rate": 4.6117773254854506e-08, + "loss": 3.7613, + "step": 2868000 + }, + { + "epoch": 31.56, + "learning_rate": 4.610402112327411e-08, + "loss": 3.7669, + "step": 2868500 + }, + { + "epoch": 31.56, + "learning_rate": 4.609026899169371e-08, + "loss": 3.7778, + "step": 2869000 + }, + { + "epoch": 31.57, + "learning_rate": 4.607651686011332e-08, + "loss": 3.7637, + "step": 2869500 + }, + { + "epoch": 31.57, + "learning_rate": 4.606276472853292e-08, + "loss": 3.7706, + "step": 2870000 + }, + { + "epoch": 31.58, + "learning_rate": 4.6049012596952526e-08, + "loss": 3.7623, + "step": 2870500 + }, + { + "epoch": 31.59, + "learning_rate": 4.6035260465372135e-08, + "loss": 3.7506, + "step": 2871000 + }, + { + "epoch": 31.59, + "learning_rate": 4.602150833379174e-08, + "loss": 3.7803, + "step": 2871500 + }, + { + "epoch": 31.6, + "learning_rate": 4.600775620221134e-08, + "loss": 3.763, + "step": 2872000 + }, + { + "epoch": 31.6, + "learning_rate": 4.599400407063095e-08, + "loss": 3.7775, + "step": 2872500 + }, + { + "epoch": 31.61, + "learning_rate": 4.598025193905055e-08, + "loss": 3.7881, + "step": 2873000 + }, + { + "epoch": 31.61, + "learning_rate": 4.5966499807470156e-08, + "loss": 3.7619, + "step": 2873500 + }, + { + "epoch": 31.62, + "learning_rate": 4.595274767588976e-08, + "loss": 3.7801, + "step": 2874000 + }, + { + "epoch": 31.62, + "learning_rate": 4.593899554430937e-08, + "loss": 3.7487, + "step": 2874500 + }, + { + "epoch": 31.63, + "learning_rate": 4.592524341272897e-08, + "loss": 3.7724, + "step": 2875000 + }, + { + "epoch": 31.64, + "learning_rate": 4.591149128114857e-08, + "loss": 3.7762, + "step": 2875500 + }, + { + "epoch": 31.64, + "learning_rate": 4.589773914956818e-08, + "loss": 3.7645, + "step": 2876000 + }, + { + "epoch": 31.65, + "learning_rate": 4.5883987017987785e-08, + "loss": 3.7904, + "step": 2876500 + }, + { + "epoch": 31.65, + "learning_rate": 4.587023488640739e-08, + "loss": 3.7704, + "step": 2877000 + }, + { + "epoch": 31.66, + "learning_rate": 4.5856482754827e-08, + "loss": 3.7736, + "step": 2877500 + }, + { + "epoch": 31.66, + "learning_rate": 4.58427306232466e-08, + "loss": 3.7675, + "step": 2878000 + }, + { + "epoch": 31.67, + "learning_rate": 4.58289784916662e-08, + "loss": 3.7622, + "step": 2878500 + }, + { + "epoch": 31.67, + "learning_rate": 4.581522636008581e-08, + "loss": 3.7754, + "step": 2879000 + }, + { + "epoch": 31.68, + "learning_rate": 4.5801474228505415e-08, + "loss": 3.7542, + "step": 2879500 + }, + { + "epoch": 31.68, + "learning_rate": 4.578772209692502e-08, + "loss": 3.7504, + "step": 2880000 + }, + { + "epoch": 31.69, + "learning_rate": 4.577396996534463e-08, + "loss": 3.7874, + "step": 2880500 + }, + { + "epoch": 31.7, + "learning_rate": 4.576021783376423e-08, + "loss": 3.7741, + "step": 2881000 + }, + { + "epoch": 31.7, + "learning_rate": 4.574646570218383e-08, + "loss": 3.7626, + "step": 2881500 + }, + { + "epoch": 31.71, + "learning_rate": 4.573271357060344e-08, + "loss": 3.7653, + "step": 2882000 + }, + { + "epoch": 31.71, + "learning_rate": 4.5718961439023044e-08, + "loss": 3.7624, + "step": 2882500 + }, + { + "epoch": 31.72, + "learning_rate": 4.570520930744265e-08, + "loss": 3.7706, + "step": 2883000 + }, + { + "epoch": 31.72, + "learning_rate": 4.5691457175862256e-08, + "loss": 3.7713, + "step": 2883500 + }, + { + "epoch": 31.73, + "learning_rate": 4.567770504428186e-08, + "loss": 3.7734, + "step": 2884000 + }, + { + "epoch": 31.73, + "learning_rate": 4.566395291270146e-08, + "loss": 3.7543, + "step": 2884500 + }, + { + "epoch": 31.74, + "learning_rate": 4.565020078112107e-08, + "loss": 3.7646, + "step": 2885000 + }, + { + "epoch": 31.75, + "learning_rate": 4.5636448649540674e-08, + "loss": 3.7717, + "step": 2885500 + }, + { + "epoch": 31.75, + "learning_rate": 4.5622696517960277e-08, + "loss": 3.7728, + "step": 2886000 + }, + { + "epoch": 31.76, + "learning_rate": 4.5608944386379886e-08, + "loss": 3.7767, + "step": 2886500 + }, + { + "epoch": 31.76, + "learning_rate": 4.559519225479949e-08, + "loss": 3.7712, + "step": 2887000 + }, + { + "epoch": 31.77, + "learning_rate": 4.55814401232191e-08, + "loss": 3.7717, + "step": 2887500 + }, + { + "epoch": 31.77, + "learning_rate": 4.55676879916387e-08, + "loss": 3.7609, + "step": 2888000 + }, + { + "epoch": 31.78, + "learning_rate": 4.5553935860058304e-08, + "loss": 3.7534, + "step": 2888500 + }, + { + "epoch": 31.78, + "learning_rate": 4.554018372847791e-08, + "loss": 3.772, + "step": 2889000 + }, + { + "epoch": 31.79, + "learning_rate": 4.5526431596897516e-08, + "loss": 3.7745, + "step": 2889500 + }, + { + "epoch": 31.79, + "learning_rate": 4.5512679465317125e-08, + "loss": 3.7666, + "step": 2890000 + }, + { + "epoch": 31.8, + "learning_rate": 4.549892733373673e-08, + "loss": 3.7742, + "step": 2890500 + }, + { + "epoch": 31.81, + "learning_rate": 4.548517520215634e-08, + "loss": 3.766, + "step": 2891000 + }, + { + "epoch": 31.81, + "learning_rate": 4.547142307057594e-08, + "loss": 3.7757, + "step": 2891500 + }, + { + "epoch": 31.82, + "learning_rate": 4.545767093899554e-08, + "loss": 3.757, + "step": 2892000 + }, + { + "epoch": 31.82, + "learning_rate": 4.544391880741515e-08, + "loss": 3.7643, + "step": 2892500 + }, + { + "epoch": 31.83, + "learning_rate": 4.5430166675834755e-08, + "loss": 3.7695, + "step": 2893000 + }, + { + "epoch": 31.83, + "learning_rate": 4.541641454425436e-08, + "loss": 3.7485, + "step": 2893500 + }, + { + "epoch": 31.84, + "learning_rate": 4.5402662412673967e-08, + "loss": 3.7661, + "step": 2894000 + }, + { + "epoch": 31.84, + "learning_rate": 4.538891028109357e-08, + "loss": 3.7542, + "step": 2894500 + }, + { + "epoch": 31.85, + "learning_rate": 4.537515814951317e-08, + "loss": 3.7698, + "step": 2895000 + }, + { + "epoch": 31.86, + "learning_rate": 4.536140601793278e-08, + "loss": 3.7569, + "step": 2895500 + }, + { + "epoch": 31.86, + "learning_rate": 4.5347653886352384e-08, + "loss": 3.7834, + "step": 2896000 + }, + { + "epoch": 31.87, + "learning_rate": 4.533390175477199e-08, + "loss": 3.7492, + "step": 2896500 + }, + { + "epoch": 31.87, + "learning_rate": 4.5320149623191596e-08, + "loss": 3.7479, + "step": 2897000 + }, + { + "epoch": 31.88, + "learning_rate": 4.53063974916112e-08, + "loss": 3.7585, + "step": 2897500 + }, + { + "epoch": 31.88, + "learning_rate": 4.52926453600308e-08, + "loss": 3.7707, + "step": 2898000 + }, + { + "epoch": 31.89, + "learning_rate": 4.527889322845041e-08, + "loss": 3.7628, + "step": 2898500 + }, + { + "epoch": 31.89, + "learning_rate": 4.5265141096870014e-08, + "loss": 3.7557, + "step": 2899000 + }, + { + "epoch": 31.9, + "learning_rate": 4.5251388965289616e-08, + "loss": 3.7756, + "step": 2899500 + }, + { + "epoch": 31.9, + "learning_rate": 4.5237636833709226e-08, + "loss": 3.7599, + "step": 2900000 + }, + { + "epoch": 31.91, + "learning_rate": 4.522388470212883e-08, + "loss": 3.7688, + "step": 2900500 + }, + { + "epoch": 31.92, + "learning_rate": 4.521013257054843e-08, + "loss": 3.7723, + "step": 2901000 + }, + { + "epoch": 31.92, + "learning_rate": 4.519638043896804e-08, + "loss": 3.7697, + "step": 2901500 + }, + { + "epoch": 31.93, + "learning_rate": 4.5182628307387643e-08, + "loss": 3.7732, + "step": 2902000 + }, + { + "epoch": 31.93, + "learning_rate": 4.5168876175807246e-08, + "loss": 3.7892, + "step": 2902500 + }, + { + "epoch": 31.94, + "learning_rate": 4.5155124044226855e-08, + "loss": 3.7608, + "step": 2903000 + }, + { + "epoch": 31.94, + "learning_rate": 4.514137191264646e-08, + "loss": 3.7634, + "step": 2903500 + }, + { + "epoch": 31.95, + "learning_rate": 4.512761978106606e-08, + "loss": 3.7704, + "step": 2904000 + }, + { + "epoch": 31.95, + "learning_rate": 4.511386764948567e-08, + "loss": 3.7704, + "step": 2904500 + }, + { + "epoch": 31.96, + "learning_rate": 4.510011551790527e-08, + "loss": 3.7595, + "step": 2905000 + }, + { + "epoch": 31.97, + "learning_rate": 4.5086363386324876e-08, + "loss": 3.7566, + "step": 2905500 + }, + { + "epoch": 31.97, + "learning_rate": 4.5072611254744485e-08, + "loss": 3.75, + "step": 2906000 + }, + { + "epoch": 31.98, + "learning_rate": 4.505885912316409e-08, + "loss": 3.7582, + "step": 2906500 + }, + { + "epoch": 31.98, + "learning_rate": 4.504510699158369e-08, + "loss": 3.7454, + "step": 2907000 + }, + { + "epoch": 31.99, + "learning_rate": 4.50313548600033e-08, + "loss": 3.7708, + "step": 2907500 + }, + { + "epoch": 31.99, + "learning_rate": 4.50176027284229e-08, + "loss": 3.7597, + "step": 2908000 + }, + { + "epoch": 32.0, + "learning_rate": 4.5003850596842505e-08, + "loss": 3.7627, + "step": 2908500 + }, + { + "epoch": 32.0, + "eval_loss": 3.833939552307129, + "eval_runtime": 6.1377, + "eval_samples_per_second": 253.189, + "step": 2908640 + }, + { + "epoch": 32.0, + "learning_rate": 4.4990098465262115e-08, + "loss": 3.7739, + "step": 2909000 + }, + { + "epoch": 32.01, + "learning_rate": 4.497634633368172e-08, + "loss": 3.771, + "step": 2909500 + }, + { + "epoch": 32.01, + "learning_rate": 4.496259420210132e-08, + "loss": 3.7809, + "step": 2910000 + }, + { + "epoch": 32.02, + "learning_rate": 4.494884207052093e-08, + "loss": 3.7688, + "step": 2910500 + }, + { + "epoch": 32.03, + "learning_rate": 4.493508993894053e-08, + "loss": 3.7808, + "step": 2911000 + }, + { + "epoch": 32.03, + "learning_rate": 4.4921337807360135e-08, + "loss": 3.7662, + "step": 2911500 + }, + { + "epoch": 32.04, + "learning_rate": 4.4907585675779744e-08, + "loss": 3.7659, + "step": 2912000 + }, + { + "epoch": 32.04, + "learning_rate": 4.489383354419935e-08, + "loss": 3.7515, + "step": 2912500 + }, + { + "epoch": 32.05, + "learning_rate": 4.4880081412618956e-08, + "loss": 3.7561, + "step": 2913000 + }, + { + "epoch": 32.05, + "learning_rate": 4.486632928103856e-08, + "loss": 3.7819, + "step": 2913500 + }, + { + "epoch": 32.06, + "learning_rate": 4.485257714945817e-08, + "loss": 3.753, + "step": 2914000 + }, + { + "epoch": 32.06, + "learning_rate": 4.483882501787777e-08, + "loss": 3.7538, + "step": 2914500 + }, + { + "epoch": 32.07, + "learning_rate": 4.4825072886297374e-08, + "loss": 3.763, + "step": 2915000 + }, + { + "epoch": 32.08, + "learning_rate": 4.481132075471698e-08, + "loss": 3.7705, + "step": 2915500 + }, + { + "epoch": 32.08, + "learning_rate": 4.4797568623136586e-08, + "loss": 3.7716, + "step": 2916000 + }, + { + "epoch": 32.09, + "learning_rate": 4.478381649155619e-08, + "loss": 3.7653, + "step": 2916500 + }, + { + "epoch": 32.09, + "learning_rate": 4.47700643599758e-08, + "loss": 3.7478, + "step": 2917000 + }, + { + "epoch": 32.1, + "learning_rate": 4.47563122283954e-08, + "loss": 3.7677, + "step": 2917500 + }, + { + "epoch": 32.1, + "learning_rate": 4.4742560096815003e-08, + "loss": 3.7625, + "step": 2918000 + }, + { + "epoch": 32.11, + "learning_rate": 4.472880796523461e-08, + "loss": 3.7589, + "step": 2918500 + }, + { + "epoch": 32.11, + "learning_rate": 4.4715055833654215e-08, + "loss": 3.7732, + "step": 2919000 + }, + { + "epoch": 32.12, + "learning_rate": 4.470130370207382e-08, + "loss": 3.7632, + "step": 2919500 + }, + { + "epoch": 32.12, + "learning_rate": 4.468755157049343e-08, + "loss": 3.7839, + "step": 2920000 + }, + { + "epoch": 32.13, + "learning_rate": 4.467379943891303e-08, + "loss": 3.7659, + "step": 2920500 + }, + { + "epoch": 32.14, + "learning_rate": 4.466004730733263e-08, + "loss": 3.7841, + "step": 2921000 + }, + { + "epoch": 32.14, + "learning_rate": 4.464629517575224e-08, + "loss": 3.7682, + "step": 2921500 + }, + { + "epoch": 32.15, + "learning_rate": 4.4632543044171845e-08, + "loss": 3.7596, + "step": 2922000 + }, + { + "epoch": 32.15, + "learning_rate": 4.461879091259145e-08, + "loss": 3.766, + "step": 2922500 + }, + { + "epoch": 32.16, + "learning_rate": 4.460503878101106e-08, + "loss": 3.7626, + "step": 2923000 + }, + { + "epoch": 32.16, + "learning_rate": 4.459128664943066e-08, + "loss": 3.7563, + "step": 2923500 + }, + { + "epoch": 32.17, + "learning_rate": 4.457753451785026e-08, + "loss": 3.7722, + "step": 2924000 + }, + { + "epoch": 32.17, + "learning_rate": 4.456378238626987e-08, + "loss": 3.7595, + "step": 2924500 + }, + { + "epoch": 32.18, + "learning_rate": 4.4550030254689475e-08, + "loss": 3.7716, + "step": 2925000 + }, + { + "epoch": 32.19, + "learning_rate": 4.453627812310908e-08, + "loss": 3.7866, + "step": 2925500 + }, + { + "epoch": 32.19, + "learning_rate": 4.4522525991528687e-08, + "loss": 3.7672, + "step": 2926000 + }, + { + "epoch": 32.2, + "learning_rate": 4.450877385994829e-08, + "loss": 3.7701, + "step": 2926500 + }, + { + "epoch": 32.2, + "learning_rate": 4.449502172836789e-08, + "loss": 3.7589, + "step": 2927000 + }, + { + "epoch": 32.21, + "learning_rate": 4.44812695967875e-08, + "loss": 3.7759, + "step": 2927500 + }, + { + "epoch": 32.21, + "learning_rate": 4.4467517465207104e-08, + "loss": 3.7759, + "step": 2928000 + }, + { + "epoch": 32.22, + "learning_rate": 4.445376533362671e-08, + "loss": 3.7632, + "step": 2928500 + }, + { + "epoch": 32.22, + "learning_rate": 4.4440013202046316e-08, + "loss": 3.7689, + "step": 2929000 + }, + { + "epoch": 32.23, + "learning_rate": 4.442626107046592e-08, + "loss": 3.768, + "step": 2929500 + }, + { + "epoch": 32.23, + "learning_rate": 4.441250893888552e-08, + "loss": 3.7713, + "step": 2930000 + }, + { + "epoch": 32.24, + "learning_rate": 4.439875680730513e-08, + "loss": 3.759, + "step": 2930500 + }, + { + "epoch": 32.25, + "learning_rate": 4.4385004675724734e-08, + "loss": 3.7702, + "step": 2931000 + }, + { + "epoch": 32.25, + "learning_rate": 4.4371252544144337e-08, + "loss": 3.7557, + "step": 2931500 + }, + { + "epoch": 32.26, + "learning_rate": 4.4357500412563946e-08, + "loss": 3.755, + "step": 2932000 + }, + { + "epoch": 32.26, + "learning_rate": 4.434374828098355e-08, + "loss": 3.7709, + "step": 2932500 + }, + { + "epoch": 32.27, + "learning_rate": 4.432999614940315e-08, + "loss": 3.769, + "step": 2933000 + }, + { + "epoch": 32.27, + "learning_rate": 4.431624401782276e-08, + "loss": 3.7544, + "step": 2933500 + }, + { + "epoch": 32.28, + "learning_rate": 4.4302491886242363e-08, + "loss": 3.7651, + "step": 2934000 + }, + { + "epoch": 32.28, + "learning_rate": 4.4288739754661966e-08, + "loss": 3.7577, + "step": 2934500 + }, + { + "epoch": 32.29, + "learning_rate": 4.4274987623081575e-08, + "loss": 3.7505, + "step": 2935000 + }, + { + "epoch": 32.3, + "learning_rate": 4.426123549150118e-08, + "loss": 3.7566, + "step": 2935500 + }, + { + "epoch": 32.3, + "learning_rate": 4.424748335992078e-08, + "loss": 3.778, + "step": 2936000 + }, + { + "epoch": 32.31, + "learning_rate": 4.423373122834039e-08, + "loss": 3.7646, + "step": 2936500 + }, + { + "epoch": 32.31, + "learning_rate": 4.421997909675999e-08, + "loss": 3.7626, + "step": 2937000 + }, + { + "epoch": 32.32, + "learning_rate": 4.42062269651796e-08, + "loss": 3.7619, + "step": 2937500 + }, + { + "epoch": 32.32, + "learning_rate": 4.4192474833599205e-08, + "loss": 3.7657, + "step": 2938000 + }, + { + "epoch": 32.33, + "learning_rate": 4.4178722702018814e-08, + "loss": 3.7646, + "step": 2938500 + }, + { + "epoch": 32.33, + "learning_rate": 4.416497057043842e-08, + "loss": 3.762, + "step": 2939000 + }, + { + "epoch": 32.34, + "learning_rate": 4.4151218438858026e-08, + "loss": 3.7669, + "step": 2939500 + }, + { + "epoch": 32.35, + "learning_rate": 4.413746630727763e-08, + "loss": 3.759, + "step": 2940000 + }, + { + "epoch": 32.35, + "learning_rate": 4.412371417569723e-08, + "loss": 3.7604, + "step": 2940500 + }, + { + "epoch": 32.36, + "learning_rate": 4.410996204411684e-08, + "loss": 3.7646, + "step": 2941000 + }, + { + "epoch": 32.36, + "learning_rate": 4.4096209912536444e-08, + "loss": 3.7715, + "step": 2941500 + }, + { + "epoch": 32.37, + "learning_rate": 4.408245778095605e-08, + "loss": 3.7595, + "step": 2942000 + }, + { + "epoch": 32.37, + "learning_rate": 4.4068705649375656e-08, + "loss": 3.7614, + "step": 2942500 + }, + { + "epoch": 32.38, + "learning_rate": 4.405495351779526e-08, + "loss": 3.7506, + "step": 2943000 + }, + { + "epoch": 32.38, + "learning_rate": 4.404120138621486e-08, + "loss": 3.7703, + "step": 2943500 + }, + { + "epoch": 32.39, + "learning_rate": 4.402744925463447e-08, + "loss": 3.7626, + "step": 2944000 + }, + { + "epoch": 32.39, + "learning_rate": 4.4013697123054074e-08, + "loss": 3.7741, + "step": 2944500 + }, + { + "epoch": 32.4, + "learning_rate": 4.3999944991473676e-08, + "loss": 3.7726, + "step": 2945000 + }, + { + "epoch": 32.41, + "learning_rate": 4.3986192859893286e-08, + "loss": 3.7592, + "step": 2945500 + }, + { + "epoch": 32.41, + "learning_rate": 4.397244072831289e-08, + "loss": 3.7762, + "step": 2946000 + }, + { + "epoch": 32.42, + "learning_rate": 4.395868859673249e-08, + "loss": 3.7656, + "step": 2946500 + }, + { + "epoch": 32.42, + "learning_rate": 4.39449364651521e-08, + "loss": 3.7611, + "step": 2947000 + }, + { + "epoch": 32.43, + "learning_rate": 4.39311843335717e-08, + "loss": 3.7454, + "step": 2947500 + }, + { + "epoch": 32.43, + "learning_rate": 4.3917432201991306e-08, + "loss": 3.7511, + "step": 2948000 + }, + { + "epoch": 32.44, + "learning_rate": 4.390368007041091e-08, + "loss": 3.7713, + "step": 2948500 + }, + { + "epoch": 32.44, + "learning_rate": 4.388992793883052e-08, + "loss": 3.7681, + "step": 2949000 + }, + { + "epoch": 32.45, + "learning_rate": 4.387617580725012e-08, + "loss": 3.7506, + "step": 2949500 + }, + { + "epoch": 32.46, + "learning_rate": 4.3862423675669723e-08, + "loss": 3.7634, + "step": 2950000 + }, + { + "epoch": 32.46, + "learning_rate": 4.384867154408933e-08, + "loss": 3.7474, + "step": 2950500 + }, + { + "epoch": 32.47, + "learning_rate": 4.3834919412508935e-08, + "loss": 3.7637, + "step": 2951000 + }, + { + "epoch": 32.47, + "learning_rate": 4.382116728092854e-08, + "loss": 3.7749, + "step": 2951500 + }, + { + "epoch": 32.48, + "learning_rate": 4.380741514934815e-08, + "loss": 3.7541, + "step": 2952000 + }, + { + "epoch": 32.48, + "learning_rate": 4.379366301776775e-08, + "loss": 3.7772, + "step": 2952500 + }, + { + "epoch": 32.49, + "learning_rate": 4.377991088618735e-08, + "loss": 3.7436, + "step": 2953000 + }, + { + "epoch": 32.49, + "learning_rate": 4.376615875460696e-08, + "loss": 3.763, + "step": 2953500 + }, + { + "epoch": 32.5, + "learning_rate": 4.3752406623026565e-08, + "loss": 3.7643, + "step": 2954000 + }, + { + "epoch": 32.5, + "learning_rate": 4.373865449144617e-08, + "loss": 3.7901, + "step": 2954500 + }, + { + "epoch": 32.51, + "learning_rate": 4.372490235986578e-08, + "loss": 3.7566, + "step": 2955000 + }, + { + "epoch": 32.52, + "learning_rate": 4.371115022828538e-08, + "loss": 3.772, + "step": 2955500 + }, + { + "epoch": 32.52, + "learning_rate": 4.369739809670498e-08, + "loss": 3.7485, + "step": 2956000 + }, + { + "epoch": 32.53, + "learning_rate": 4.368364596512459e-08, + "loss": 3.7632, + "step": 2956500 + }, + { + "epoch": 32.53, + "learning_rate": 4.3669893833544195e-08, + "loss": 3.7468, + "step": 2957000 + }, + { + "epoch": 32.54, + "learning_rate": 4.36561417019638e-08, + "loss": 3.7573, + "step": 2957500 + }, + { + "epoch": 32.54, + "learning_rate": 4.364238957038341e-08, + "loss": 3.7436, + "step": 2958000 + }, + { + "epoch": 32.55, + "learning_rate": 4.362863743880301e-08, + "loss": 3.7799, + "step": 2958500 + }, + { + "epoch": 32.55, + "learning_rate": 4.361488530722261e-08, + "loss": 3.766, + "step": 2959000 + }, + { + "epoch": 32.56, + "learning_rate": 4.360113317564222e-08, + "loss": 3.7647, + "step": 2959500 + }, + { + "epoch": 32.57, + "learning_rate": 4.3587381044061824e-08, + "loss": 3.7733, + "step": 2960000 + }, + { + "epoch": 32.57, + "learning_rate": 4.3573628912481434e-08, + "loss": 3.7449, + "step": 2960500 + }, + { + "epoch": 32.58, + "learning_rate": 4.3559876780901036e-08, + "loss": 3.783, + "step": 2961000 + }, + { + "epoch": 32.58, + "learning_rate": 4.3546124649320646e-08, + "loss": 3.7437, + "step": 2961500 + }, + { + "epoch": 32.59, + "learning_rate": 4.353237251774025e-08, + "loss": 3.7606, + "step": 2962000 + }, + { + "epoch": 32.59, + "learning_rate": 4.351862038615986e-08, + "loss": 3.7609, + "step": 2962500 + }, + { + "epoch": 32.6, + "learning_rate": 4.350486825457946e-08, + "loss": 3.7618, + "step": 2963000 + }, + { + "epoch": 32.6, + "learning_rate": 4.349111612299906e-08, + "loss": 3.765, + "step": 2963500 + }, + { + "epoch": 32.61, + "learning_rate": 4.347736399141867e-08, + "loss": 3.7543, + "step": 2964000 + }, + { + "epoch": 32.61, + "learning_rate": 4.3463611859838275e-08, + "loss": 3.774, + "step": 2964500 + }, + { + "epoch": 32.62, + "learning_rate": 4.344985972825788e-08, + "loss": 3.7458, + "step": 2965000 + }, + { + "epoch": 32.63, + "learning_rate": 4.343610759667749e-08, + "loss": 3.7467, + "step": 2965500 + }, + { + "epoch": 32.63, + "learning_rate": 4.342235546509709e-08, + "loss": 3.7703, + "step": 2966000 + }, + { + "epoch": 32.64, + "learning_rate": 4.340860333351669e-08, + "loss": 3.7566, + "step": 2966500 + }, + { + "epoch": 32.64, + "learning_rate": 4.33948512019363e-08, + "loss": 3.7616, + "step": 2967000 + }, + { + "epoch": 32.65, + "learning_rate": 4.3381099070355905e-08, + "loss": 3.7825, + "step": 2967500 + }, + { + "epoch": 32.65, + "learning_rate": 4.336734693877551e-08, + "loss": 3.7414, + "step": 2968000 + }, + { + "epoch": 32.66, + "learning_rate": 4.335359480719512e-08, + "loss": 3.7699, + "step": 2968500 + }, + { + "epoch": 32.66, + "learning_rate": 4.333984267561472e-08, + "loss": 3.751, + "step": 2969000 + }, + { + "epoch": 32.67, + "learning_rate": 4.332609054403432e-08, + "loss": 3.7714, + "step": 2969500 + }, + { + "epoch": 32.68, + "learning_rate": 4.331233841245393e-08, + "loss": 3.7849, + "step": 2970000 + }, + { + "epoch": 32.68, + "learning_rate": 4.3298586280873534e-08, + "loss": 3.7592, + "step": 2970500 + }, + { + "epoch": 32.69, + "learning_rate": 4.328483414929314e-08, + "loss": 3.7627, + "step": 2971000 + }, + { + "epoch": 32.69, + "learning_rate": 4.3271082017712747e-08, + "loss": 3.7586, + "step": 2971500 + }, + { + "epoch": 32.7, + "learning_rate": 4.325732988613235e-08, + "loss": 3.7579, + "step": 2972000 + }, + { + "epoch": 32.7, + "learning_rate": 4.324357775455195e-08, + "loss": 3.7703, + "step": 2972500 + }, + { + "epoch": 32.71, + "learning_rate": 4.322982562297156e-08, + "loss": 3.7526, + "step": 2973000 + }, + { + "epoch": 32.71, + "learning_rate": 4.3216073491391164e-08, + "loss": 3.7641, + "step": 2973500 + }, + { + "epoch": 32.72, + "learning_rate": 4.320232135981077e-08, + "loss": 3.7771, + "step": 2974000 + }, + { + "epoch": 32.72, + "learning_rate": 4.3188569228230376e-08, + "loss": 3.7728, + "step": 2974500 + }, + { + "epoch": 32.73, + "learning_rate": 4.317481709664998e-08, + "loss": 3.755, + "step": 2975000 + }, + { + "epoch": 32.74, + "learning_rate": 4.316106496506958e-08, + "loss": 3.7741, + "step": 2975500 + }, + { + "epoch": 32.74, + "learning_rate": 4.314731283348919e-08, + "loss": 3.757, + "step": 2976000 + }, + { + "epoch": 32.75, + "learning_rate": 4.3133560701908794e-08, + "loss": 3.7664, + "step": 2976500 + }, + { + "epoch": 32.75, + "learning_rate": 4.3119808570328396e-08, + "loss": 3.7677, + "step": 2977000 + }, + { + "epoch": 32.76, + "learning_rate": 4.3106056438748006e-08, + "loss": 3.7583, + "step": 2977500 + }, + { + "epoch": 32.76, + "learning_rate": 4.309230430716761e-08, + "loss": 3.7527, + "step": 2978000 + }, + { + "epoch": 32.77, + "learning_rate": 4.307855217558721e-08, + "loss": 3.7548, + "step": 2978500 + }, + { + "epoch": 32.77, + "learning_rate": 4.306480004400682e-08, + "loss": 3.7546, + "step": 2979000 + }, + { + "epoch": 32.78, + "learning_rate": 4.305104791242642e-08, + "loss": 3.7429, + "step": 2979500 + }, + { + "epoch": 32.79, + "learning_rate": 4.3037295780846026e-08, + "loss": 3.7678, + "step": 2980000 + }, + { + "epoch": 32.79, + "learning_rate": 4.3023543649265635e-08, + "loss": 3.7551, + "step": 2980500 + }, + { + "epoch": 32.8, + "learning_rate": 4.300979151768524e-08, + "loss": 3.7555, + "step": 2981000 + }, + { + "epoch": 32.8, + "learning_rate": 4.299603938610484e-08, + "loss": 3.7727, + "step": 2981500 + }, + { + "epoch": 32.81, + "learning_rate": 4.298228725452445e-08, + "loss": 3.7689, + "step": 2982000 + }, + { + "epoch": 32.81, + "learning_rate": 4.296853512294405e-08, + "loss": 3.7536, + "step": 2982500 + }, + { + "epoch": 32.82, + "learning_rate": 4.2954782991363656e-08, + "loss": 3.7687, + "step": 2983000 + }, + { + "epoch": 32.82, + "learning_rate": 4.2941030859783265e-08, + "loss": 3.7444, + "step": 2983500 + }, + { + "epoch": 32.83, + "learning_rate": 4.292727872820287e-08, + "loss": 3.7678, + "step": 2984000 + }, + { + "epoch": 32.83, + "learning_rate": 4.291352659662247e-08, + "loss": 3.7579, + "step": 2984500 + }, + { + "epoch": 32.84, + "learning_rate": 4.289977446504208e-08, + "loss": 3.7618, + "step": 2985000 + }, + { + "epoch": 32.85, + "learning_rate": 4.288602233346168e-08, + "loss": 3.7664, + "step": 2985500 + }, + { + "epoch": 32.85, + "learning_rate": 4.287227020188129e-08, + "loss": 3.7642, + "step": 2986000 + }, + { + "epoch": 32.86, + "learning_rate": 4.2858518070300894e-08, + "loss": 3.757, + "step": 2986500 + }, + { + "epoch": 32.86, + "learning_rate": 4.2844765938720504e-08, + "loss": 3.7786, + "step": 2987000 + }, + { + "epoch": 32.87, + "learning_rate": 4.2831013807140107e-08, + "loss": 3.7681, + "step": 2987500 + }, + { + "epoch": 32.87, + "learning_rate": 4.281726167555971e-08, + "loss": 3.7579, + "step": 2988000 + }, + { + "epoch": 32.88, + "learning_rate": 4.280350954397932e-08, + "loss": 3.7778, + "step": 2988500 + }, + { + "epoch": 32.88, + "learning_rate": 4.278975741239892e-08, + "loss": 3.7516, + "step": 2989000 + }, + { + "epoch": 32.89, + "learning_rate": 4.2776005280818524e-08, + "loss": 3.7847, + "step": 2989500 + }, + { + "epoch": 32.9, + "learning_rate": 4.2762253149238133e-08, + "loss": 3.7717, + "step": 2990000 + }, + { + "epoch": 32.9, + "learning_rate": 4.2748501017657736e-08, + "loss": 3.7692, + "step": 2990500 + }, + { + "epoch": 32.91, + "learning_rate": 4.273474888607734e-08, + "loss": 3.7644, + "step": 2991000 + }, + { + "epoch": 32.91, + "learning_rate": 4.272099675449695e-08, + "loss": 3.7618, + "step": 2991500 + }, + { + "epoch": 32.92, + "learning_rate": 4.270724462291655e-08, + "loss": 3.7547, + "step": 2992000 + }, + { + "epoch": 32.92, + "learning_rate": 4.2693492491336154e-08, + "loss": 3.7741, + "step": 2992500 + }, + { + "epoch": 32.93, + "learning_rate": 4.267974035975576e-08, + "loss": 3.7592, + "step": 2993000 + }, + { + "epoch": 32.93, + "learning_rate": 4.2665988228175366e-08, + "loss": 3.7573, + "step": 2993500 + }, + { + "epoch": 32.94, + "learning_rate": 4.265223609659497e-08, + "loss": 3.7622, + "step": 2994000 + }, + { + "epoch": 32.94, + "learning_rate": 4.263848396501458e-08, + "loss": 3.7624, + "step": 2994500 + }, + { + "epoch": 32.95, + "learning_rate": 4.262473183343418e-08, + "loss": 3.7706, + "step": 2995000 + }, + { + "epoch": 32.96, + "learning_rate": 4.261097970185378e-08, + "loss": 3.7734, + "step": 2995500 + }, + { + "epoch": 32.96, + "learning_rate": 4.259722757027339e-08, + "loss": 3.749, + "step": 2996000 + }, + { + "epoch": 32.97, + "learning_rate": 4.2583475438692995e-08, + "loss": 3.7742, + "step": 2996500 + }, + { + "epoch": 32.97, + "learning_rate": 4.25697233071126e-08, + "loss": 3.7586, + "step": 2997000 + }, + { + "epoch": 32.98, + "learning_rate": 4.255597117553221e-08, + "loss": 3.76, + "step": 2997500 + }, + { + "epoch": 32.98, + "learning_rate": 4.254221904395181e-08, + "loss": 3.762, + "step": 2998000 + }, + { + "epoch": 32.99, + "learning_rate": 4.252846691237141e-08, + "loss": 3.7522, + "step": 2998500 + }, + { + "epoch": 32.99, + "learning_rate": 4.251471478079102e-08, + "loss": 3.7554, + "step": 2999000 + }, + { + "epoch": 33.0, + "learning_rate": 4.2500962649210625e-08, + "loss": 3.747, + "step": 2999500 + }, + { + "epoch": 33.0, + "eval_loss": 3.8329977989196777, + "eval_runtime": 6.1462, + "eval_samples_per_second": 252.838, + "step": 2999535 + }, + { + "epoch": 33.01, + "learning_rate": 4.248721051763023e-08, + "loss": 3.7509, + "step": 3000000 + }, + { + "epoch": 33.01, + "learning_rate": 4.247345838604984e-08, + "loss": 3.772, + "step": 3000500 + }, + { + "epoch": 33.02, + "learning_rate": 4.245970625446944e-08, + "loss": 3.7605, + "step": 3001000 + }, + { + "epoch": 33.02, + "learning_rate": 4.244595412288904e-08, + "loss": 3.7658, + "step": 3001500 + }, + { + "epoch": 33.03, + "learning_rate": 4.243220199130865e-08, + "loss": 3.7546, + "step": 3002000 + }, + { + "epoch": 33.03, + "learning_rate": 4.2418449859728254e-08, + "loss": 3.7638, + "step": 3002500 + }, + { + "epoch": 33.04, + "learning_rate": 4.240469772814786e-08, + "loss": 3.7481, + "step": 3003000 + }, + { + "epoch": 33.04, + "learning_rate": 4.2390945596567467e-08, + "loss": 3.7547, + "step": 3003500 + }, + { + "epoch": 33.05, + "learning_rate": 4.237719346498707e-08, + "loss": 3.7651, + "step": 3004000 + }, + { + "epoch": 33.05, + "learning_rate": 4.236344133340667e-08, + "loss": 3.7707, + "step": 3004500 + }, + { + "epoch": 33.06, + "learning_rate": 4.234968920182628e-08, + "loss": 3.7629, + "step": 3005000 + }, + { + "epoch": 33.07, + "learning_rate": 4.2335937070245884e-08, + "loss": 3.7581, + "step": 3005500 + }, + { + "epoch": 33.07, + "learning_rate": 4.232218493866549e-08, + "loss": 3.7644, + "step": 3006000 + }, + { + "epoch": 33.08, + "learning_rate": 4.2308432807085096e-08, + "loss": 3.7705, + "step": 3006500 + }, + { + "epoch": 33.08, + "learning_rate": 4.22946806755047e-08, + "loss": 3.7539, + "step": 3007000 + }, + { + "epoch": 33.09, + "learning_rate": 4.22809285439243e-08, + "loss": 3.7572, + "step": 3007500 + }, + { + "epoch": 33.09, + "learning_rate": 4.226717641234391e-08, + "loss": 3.7639, + "step": 3008000 + }, + { + "epoch": 33.1, + "learning_rate": 4.2253424280763514e-08, + "loss": 3.7854, + "step": 3008500 + }, + { + "epoch": 33.1, + "learning_rate": 4.223967214918312e-08, + "loss": 3.7894, + "step": 3009000 + }, + { + "epoch": 33.11, + "learning_rate": 4.2225920017602726e-08, + "loss": 3.749, + "step": 3009500 + }, + { + "epoch": 33.12, + "learning_rate": 4.2212167886022335e-08, + "loss": 3.7561, + "step": 3010000 + }, + { + "epoch": 33.12, + "learning_rate": 4.219841575444194e-08, + "loss": 3.7404, + "step": 3010500 + }, + { + "epoch": 33.13, + "learning_rate": 4.218466362286154e-08, + "loss": 3.7568, + "step": 3011000 + }, + { + "epoch": 33.13, + "learning_rate": 4.217091149128115e-08, + "loss": 3.7517, + "step": 3011500 + }, + { + "epoch": 33.14, + "learning_rate": 4.215715935970075e-08, + "loss": 3.7675, + "step": 3012000 + }, + { + "epoch": 33.14, + "learning_rate": 4.214340722812036e-08, + "loss": 3.7567, + "step": 3012500 + }, + { + "epoch": 33.15, + "learning_rate": 4.2129655096539965e-08, + "loss": 3.7464, + "step": 3013000 + }, + { + "epoch": 33.15, + "learning_rate": 4.211590296495957e-08, + "loss": 3.7698, + "step": 3013500 + }, + { + "epoch": 33.16, + "learning_rate": 4.210215083337918e-08, + "loss": 3.7666, + "step": 3014000 + }, + { + "epoch": 33.16, + "learning_rate": 4.208839870179878e-08, + "loss": 3.7744, + "step": 3014500 + }, + { + "epoch": 33.17, + "learning_rate": 4.207464657021838e-08, + "loss": 3.7653, + "step": 3015000 + }, + { + "epoch": 33.18, + "learning_rate": 4.206089443863799e-08, + "loss": 3.7471, + "step": 3015500 + }, + { + "epoch": 33.18, + "learning_rate": 4.2047142307057594e-08, + "loss": 3.7603, + "step": 3016000 + }, + { + "epoch": 33.19, + "learning_rate": 4.20333901754772e-08, + "loss": 3.7521, + "step": 3016500 + }, + { + "epoch": 33.19, + "learning_rate": 4.2019638043896806e-08, + "loss": 3.7689, + "step": 3017000 + }, + { + "epoch": 33.2, + "learning_rate": 4.200588591231641e-08, + "loss": 3.7625, + "step": 3017500 + }, + { + "epoch": 33.2, + "learning_rate": 4.199213378073601e-08, + "loss": 3.7758, + "step": 3018000 + }, + { + "epoch": 33.21, + "learning_rate": 4.197838164915562e-08, + "loss": 3.7647, + "step": 3018500 + }, + { + "epoch": 33.21, + "learning_rate": 4.1964629517575224e-08, + "loss": 3.7661, + "step": 3019000 + }, + { + "epoch": 33.22, + "learning_rate": 4.1950877385994827e-08, + "loss": 3.7566, + "step": 3019500 + }, + { + "epoch": 33.23, + "learning_rate": 4.1937125254414436e-08, + "loss": 3.7669, + "step": 3020000 + }, + { + "epoch": 33.23, + "learning_rate": 4.192337312283404e-08, + "loss": 3.7672, + "step": 3020500 + }, + { + "epoch": 33.24, + "learning_rate": 4.190962099125364e-08, + "loss": 3.7771, + "step": 3021000 + }, + { + "epoch": 33.24, + "learning_rate": 4.189586885967325e-08, + "loss": 3.753, + "step": 3021500 + }, + { + "epoch": 33.25, + "learning_rate": 4.1882116728092853e-08, + "loss": 3.7635, + "step": 3022000 + }, + { + "epoch": 33.25, + "learning_rate": 4.1868364596512456e-08, + "loss": 3.7706, + "step": 3022500 + }, + { + "epoch": 33.26, + "learning_rate": 4.1854612464932066e-08, + "loss": 3.7654, + "step": 3023000 + }, + { + "epoch": 33.26, + "learning_rate": 4.184086033335167e-08, + "loss": 3.7604, + "step": 3023500 + }, + { + "epoch": 33.27, + "learning_rate": 4.182710820177127e-08, + "loss": 3.7589, + "step": 3024000 + }, + { + "epoch": 33.27, + "learning_rate": 4.1813356070190874e-08, + "loss": 3.7553, + "step": 3024500 + }, + { + "epoch": 33.28, + "learning_rate": 4.179960393861048e-08, + "loss": 3.743, + "step": 3025000 + }, + { + "epoch": 33.29, + "learning_rate": 4.1785851807030086e-08, + "loss": 3.7732, + "step": 3025500 + }, + { + "epoch": 33.29, + "learning_rate": 4.177209967544969e-08, + "loss": 3.7586, + "step": 3026000 + }, + { + "epoch": 33.3, + "learning_rate": 4.17583475438693e-08, + "loss": 3.7716, + "step": 3026500 + }, + { + "epoch": 33.3, + "learning_rate": 4.17445954122889e-08, + "loss": 3.7426, + "step": 3027000 + }, + { + "epoch": 33.31, + "learning_rate": 4.17308432807085e-08, + "loss": 3.7566, + "step": 3027500 + }, + { + "epoch": 33.31, + "learning_rate": 4.171709114912811e-08, + "loss": 3.7571, + "step": 3028000 + }, + { + "epoch": 33.32, + "learning_rate": 4.1703339017547715e-08, + "loss": 3.7617, + "step": 3028500 + }, + { + "epoch": 33.32, + "learning_rate": 4.168958688596732e-08, + "loss": 3.7525, + "step": 3029000 + }, + { + "epoch": 33.33, + "learning_rate": 4.167583475438693e-08, + "loss": 3.7476, + "step": 3029500 + }, + { + "epoch": 33.34, + "learning_rate": 4.166208262280653e-08, + "loss": 3.7594, + "step": 3030000 + }, + { + "epoch": 33.34, + "learning_rate": 4.164833049122613e-08, + "loss": 3.7607, + "step": 3030500 + }, + { + "epoch": 33.35, + "learning_rate": 4.163457835964574e-08, + "loss": 3.7721, + "step": 3031000 + }, + { + "epoch": 33.35, + "learning_rate": 4.1620826228065345e-08, + "loss": 3.7623, + "step": 3031500 + }, + { + "epoch": 33.36, + "learning_rate": 4.160707409648495e-08, + "loss": 3.777, + "step": 3032000 + }, + { + "epoch": 33.36, + "learning_rate": 4.159332196490456e-08, + "loss": 3.7642, + "step": 3032500 + }, + { + "epoch": 33.37, + "learning_rate": 4.157956983332416e-08, + "loss": 3.7705, + "step": 3033000 + }, + { + "epoch": 33.37, + "learning_rate": 4.156581770174377e-08, + "loss": 3.7486, + "step": 3033500 + }, + { + "epoch": 33.38, + "learning_rate": 4.155206557016337e-08, + "loss": 3.763, + "step": 3034000 + }, + { + "epoch": 33.38, + "learning_rate": 4.153831343858298e-08, + "loss": 3.7491, + "step": 3034500 + }, + { + "epoch": 33.39, + "learning_rate": 4.1524561307002584e-08, + "loss": 3.7708, + "step": 3035000 + }, + { + "epoch": 33.4, + "learning_rate": 4.151080917542219e-08, + "loss": 3.7563, + "step": 3035500 + }, + { + "epoch": 33.4, + "learning_rate": 4.1497057043841796e-08, + "loss": 3.7398, + "step": 3036000 + }, + { + "epoch": 33.41, + "learning_rate": 4.14833049122614e-08, + "loss": 3.7767, + "step": 3036500 + }, + { + "epoch": 33.41, + "learning_rate": 4.146955278068101e-08, + "loss": 3.7629, + "step": 3037000 + }, + { + "epoch": 33.42, + "learning_rate": 4.145580064910061e-08, + "loss": 3.7446, + "step": 3037500 + }, + { + "epoch": 33.42, + "learning_rate": 4.1442048517520213e-08, + "loss": 3.7554, + "step": 3038000 + }, + { + "epoch": 33.43, + "learning_rate": 4.142829638593982e-08, + "loss": 3.7561, + "step": 3038500 + }, + { + "epoch": 33.43, + "learning_rate": 4.1414544254359426e-08, + "loss": 3.7707, + "step": 3039000 + }, + { + "epoch": 33.44, + "learning_rate": 4.140079212277903e-08, + "loss": 3.7551, + "step": 3039500 + }, + { + "epoch": 33.45, + "learning_rate": 4.138703999119864e-08, + "loss": 3.7714, + "step": 3040000 + }, + { + "epoch": 33.45, + "learning_rate": 4.137328785961824e-08, + "loss": 3.7602, + "step": 3040500 + }, + { + "epoch": 33.46, + "learning_rate": 4.135953572803784e-08, + "loss": 3.7592, + "step": 3041000 + }, + { + "epoch": 33.46, + "learning_rate": 4.134578359645745e-08, + "loss": 3.7626, + "step": 3041500 + }, + { + "epoch": 33.47, + "learning_rate": 4.1332031464877055e-08, + "loss": 3.7747, + "step": 3042000 + }, + { + "epoch": 33.47, + "learning_rate": 4.131827933329666e-08, + "loss": 3.7721, + "step": 3042500 + }, + { + "epoch": 33.48, + "learning_rate": 4.130452720171627e-08, + "loss": 3.7797, + "step": 3043000 + }, + { + "epoch": 33.48, + "learning_rate": 4.129077507013587e-08, + "loss": 3.784, + "step": 3043500 + }, + { + "epoch": 33.49, + "learning_rate": 4.127702293855547e-08, + "loss": 3.7671, + "step": 3044000 + }, + { + "epoch": 33.49, + "learning_rate": 4.126327080697508e-08, + "loss": 3.7624, + "step": 3044500 + }, + { + "epoch": 33.5, + "learning_rate": 4.1249518675394685e-08, + "loss": 3.7744, + "step": 3045000 + }, + { + "epoch": 33.51, + "learning_rate": 4.123576654381429e-08, + "loss": 3.7607, + "step": 3045500 + }, + { + "epoch": 33.51, + "learning_rate": 4.12220144122339e-08, + "loss": 3.757, + "step": 3046000 + }, + { + "epoch": 33.52, + "learning_rate": 4.12082622806535e-08, + "loss": 3.7394, + "step": 3046500 + }, + { + "epoch": 33.52, + "learning_rate": 4.11945101490731e-08, + "loss": 3.771, + "step": 3047000 + }, + { + "epoch": 33.53, + "learning_rate": 4.118075801749271e-08, + "loss": 3.7761, + "step": 3047500 + }, + { + "epoch": 33.53, + "learning_rate": 4.1167005885912314e-08, + "loss": 3.758, + "step": 3048000 + }, + { + "epoch": 33.54, + "learning_rate": 4.115325375433192e-08, + "loss": 3.751, + "step": 3048500 + }, + { + "epoch": 33.54, + "learning_rate": 4.1139501622751526e-08, + "loss": 3.7572, + "step": 3049000 + }, + { + "epoch": 33.55, + "learning_rate": 4.112574949117113e-08, + "loss": 3.7566, + "step": 3049500 + }, + { + "epoch": 33.56, + "learning_rate": 4.111199735959073e-08, + "loss": 3.7676, + "step": 3050000 + }, + { + "epoch": 33.56, + "learning_rate": 4.109824522801034e-08, + "loss": 3.7521, + "step": 3050500 + }, + { + "epoch": 33.57, + "learning_rate": 4.1084493096429944e-08, + "loss": 3.7559, + "step": 3051000 + }, + { + "epoch": 33.57, + "learning_rate": 4.1070740964849547e-08, + "loss": 3.772, + "step": 3051500 + }, + { + "epoch": 33.58, + "learning_rate": 4.1056988833269156e-08, + "loss": 3.7746, + "step": 3052000 + }, + { + "epoch": 33.58, + "learning_rate": 4.104323670168876e-08, + "loss": 3.7613, + "step": 3052500 + }, + { + "epoch": 33.59, + "learning_rate": 4.102948457010836e-08, + "loss": 3.7554, + "step": 3053000 + }, + { + "epoch": 33.59, + "learning_rate": 4.101573243852797e-08, + "loss": 3.7688, + "step": 3053500 + }, + { + "epoch": 33.6, + "learning_rate": 4.1001980306947573e-08, + "loss": 3.7644, + "step": 3054000 + }, + { + "epoch": 33.6, + "learning_rate": 4.0988228175367176e-08, + "loss": 3.7535, + "step": 3054500 + }, + { + "epoch": 33.61, + "learning_rate": 4.0974476043786786e-08, + "loss": 3.7637, + "step": 3055000 + }, + { + "epoch": 33.62, + "learning_rate": 4.096072391220639e-08, + "loss": 3.7706, + "step": 3055500 + }, + { + "epoch": 33.62, + "learning_rate": 4.094697178062599e-08, + "loss": 3.7621, + "step": 3056000 + }, + { + "epoch": 33.63, + "learning_rate": 4.09332196490456e-08, + "loss": 3.7837, + "step": 3056500 + }, + { + "epoch": 33.63, + "learning_rate": 4.09194675174652e-08, + "loss": 3.7603, + "step": 3057000 + }, + { + "epoch": 33.64, + "learning_rate": 4.090571538588481e-08, + "loss": 3.7445, + "step": 3057500 + }, + { + "epoch": 33.64, + "learning_rate": 4.0891963254304415e-08, + "loss": 3.7676, + "step": 3058000 + }, + { + "epoch": 33.65, + "learning_rate": 4.087821112272402e-08, + "loss": 3.764, + "step": 3058500 + }, + { + "epoch": 33.65, + "learning_rate": 4.086445899114363e-08, + "loss": 3.7582, + "step": 3059000 + }, + { + "epoch": 33.66, + "learning_rate": 4.085070685956323e-08, + "loss": 3.7507, + "step": 3059500 + }, + { + "epoch": 33.67, + "learning_rate": 4.083695472798284e-08, + "loss": 3.7639, + "step": 3060000 + }, + { + "epoch": 33.67, + "learning_rate": 4.082320259640244e-08, + "loss": 3.7751, + "step": 3060500 + }, + { + "epoch": 33.68, + "learning_rate": 4.080945046482205e-08, + "loss": 3.7752, + "step": 3061000 + }, + { + "epoch": 33.68, + "learning_rate": 4.0795698333241654e-08, + "loss": 3.7751, + "step": 3061500 + }, + { + "epoch": 33.69, + "learning_rate": 4.078194620166126e-08, + "loss": 3.7732, + "step": 3062000 + }, + { + "epoch": 33.69, + "learning_rate": 4.076819407008086e-08, + "loss": 3.7648, + "step": 3062500 + }, + { + "epoch": 33.7, + "learning_rate": 4.075444193850047e-08, + "loss": 3.7605, + "step": 3063000 + }, + { + "epoch": 33.7, + "learning_rate": 4.074068980692007e-08, + "loss": 3.7618, + "step": 3063500 + }, + { + "epoch": 33.71, + "learning_rate": 4.0726937675339674e-08, + "loss": 3.7506, + "step": 3064000 + }, + { + "epoch": 33.71, + "learning_rate": 4.0713185543759284e-08, + "loss": 3.7538, + "step": 3064500 + }, + { + "epoch": 33.72, + "learning_rate": 4.0699433412178886e-08, + "loss": 3.7593, + "step": 3065000 + }, + { + "epoch": 33.73, + "learning_rate": 4.068568128059849e-08, + "loss": 3.7742, + "step": 3065500 + }, + { + "epoch": 33.73, + "learning_rate": 4.06719291490181e-08, + "loss": 3.7644, + "step": 3066000 + }, + { + "epoch": 33.74, + "learning_rate": 4.06581770174377e-08, + "loss": 3.7353, + "step": 3066500 + }, + { + "epoch": 33.74, + "learning_rate": 4.0644424885857304e-08, + "loss": 3.7488, + "step": 3067000 + }, + { + "epoch": 33.75, + "learning_rate": 4.063067275427691e-08, + "loss": 3.7609, + "step": 3067500 + }, + { + "epoch": 33.75, + "learning_rate": 4.0616920622696516e-08, + "loss": 3.7697, + "step": 3068000 + }, + { + "epoch": 33.76, + "learning_rate": 4.060316849111612e-08, + "loss": 3.7595, + "step": 3068500 + }, + { + "epoch": 33.76, + "learning_rate": 4.058941635953573e-08, + "loss": 3.7318, + "step": 3069000 + }, + { + "epoch": 33.77, + "learning_rate": 4.057566422795533e-08, + "loss": 3.7651, + "step": 3069500 + }, + { + "epoch": 33.78, + "learning_rate": 4.0561912096374934e-08, + "loss": 3.7637, + "step": 3070000 + }, + { + "epoch": 33.78, + "learning_rate": 4.054815996479454e-08, + "loss": 3.7531, + "step": 3070500 + }, + { + "epoch": 33.79, + "learning_rate": 4.0534407833214146e-08, + "loss": 3.7617, + "step": 3071000 + }, + { + "epoch": 33.79, + "learning_rate": 4.052065570163375e-08, + "loss": 3.7627, + "step": 3071500 + }, + { + "epoch": 33.8, + "learning_rate": 4.050690357005336e-08, + "loss": 3.7748, + "step": 3072000 + }, + { + "epoch": 33.8, + "learning_rate": 4.049315143847296e-08, + "loss": 3.7788, + "step": 3072500 + }, + { + "epoch": 33.81, + "learning_rate": 4.047939930689256e-08, + "loss": 3.7751, + "step": 3073000 + }, + { + "epoch": 33.81, + "learning_rate": 4.046564717531217e-08, + "loss": 3.7731, + "step": 3073500 + }, + { + "epoch": 33.82, + "learning_rate": 4.0451895043731775e-08, + "loss": 3.7528, + "step": 3074000 + }, + { + "epoch": 33.82, + "learning_rate": 4.043814291215138e-08, + "loss": 3.7635, + "step": 3074500 + }, + { + "epoch": 33.83, + "learning_rate": 4.042439078057099e-08, + "loss": 3.771, + "step": 3075000 + }, + { + "epoch": 33.84, + "learning_rate": 4.041063864899059e-08, + "loss": 3.7479, + "step": 3075500 + }, + { + "epoch": 33.84, + "learning_rate": 4.039688651741019e-08, + "loss": 3.7559, + "step": 3076000 + }, + { + "epoch": 33.85, + "learning_rate": 4.03831343858298e-08, + "loss": 3.77, + "step": 3076500 + }, + { + "epoch": 33.85, + "learning_rate": 4.0369382254249405e-08, + "loss": 3.7472, + "step": 3077000 + }, + { + "epoch": 33.86, + "learning_rate": 4.035563012266901e-08, + "loss": 3.7628, + "step": 3077500 + }, + { + "epoch": 33.86, + "learning_rate": 4.034187799108862e-08, + "loss": 3.7652, + "step": 3078000 + }, + { + "epoch": 33.87, + "learning_rate": 4.032812585950822e-08, + "loss": 3.7565, + "step": 3078500 + }, + { + "epoch": 33.87, + "learning_rate": 4.031437372792782e-08, + "loss": 3.739, + "step": 3079000 + }, + { + "epoch": 33.88, + "learning_rate": 4.030062159634743e-08, + "loss": 3.742, + "step": 3079500 + }, + { + "epoch": 33.89, + "learning_rate": 4.0286869464767034e-08, + "loss": 3.7808, + "step": 3080000 + }, + { + "epoch": 33.89, + "learning_rate": 4.027311733318664e-08, + "loss": 3.7563, + "step": 3080500 + }, + { + "epoch": 33.9, + "learning_rate": 4.0259365201606246e-08, + "loss": 3.7559, + "step": 3081000 + }, + { + "epoch": 33.9, + "learning_rate": 4.024561307002585e-08, + "loss": 3.7584, + "step": 3081500 + }, + { + "epoch": 33.91, + "learning_rate": 4.023186093844546e-08, + "loss": 3.7686, + "step": 3082000 + }, + { + "epoch": 33.91, + "learning_rate": 4.021810880686506e-08, + "loss": 3.7599, + "step": 3082500 + }, + { + "epoch": 33.92, + "learning_rate": 4.020435667528467e-08, + "loss": 3.7561, + "step": 3083000 + }, + { + "epoch": 33.92, + "learning_rate": 4.019060454370427e-08, + "loss": 3.7647, + "step": 3083500 + }, + { + "epoch": 33.93, + "learning_rate": 4.017685241212388e-08, + "loss": 3.7669, + "step": 3084000 + }, + { + "epoch": 33.93, + "learning_rate": 4.0163100280543485e-08, + "loss": 3.7577, + "step": 3084500 + }, + { + "epoch": 33.94, + "learning_rate": 4.014934814896309e-08, + "loss": 3.7742, + "step": 3085000 + }, + { + "epoch": 33.95, + "learning_rate": 4.01355960173827e-08, + "loss": 3.7626, + "step": 3085500 + }, + { + "epoch": 33.95, + "learning_rate": 4.01218438858023e-08, + "loss": 3.7714, + "step": 3086000 + }, + { + "epoch": 33.96, + "learning_rate": 4.01080917542219e-08, + "loss": 3.7616, + "step": 3086500 + }, + { + "epoch": 33.96, + "learning_rate": 4.009433962264151e-08, + "loss": 3.7667, + "step": 3087000 + }, + { + "epoch": 33.97, + "learning_rate": 4.0080587491061115e-08, + "loss": 3.7657, + "step": 3087500 + }, + { + "epoch": 33.97, + "learning_rate": 4.006683535948072e-08, + "loss": 3.7608, + "step": 3088000 + }, + { + "epoch": 33.98, + "learning_rate": 4.005308322790033e-08, + "loss": 3.7442, + "step": 3088500 + }, + { + "epoch": 33.98, + "learning_rate": 4.003933109631993e-08, + "loss": 3.7704, + "step": 3089000 + }, + { + "epoch": 33.99, + "learning_rate": 4.002557896473953e-08, + "loss": 3.7562, + "step": 3089500 + }, + { + "epoch": 34.0, + "learning_rate": 4.001182683315914e-08, + "loss": 3.783, + "step": 3090000 + }, + { + "epoch": 34.0, + "eval_loss": 3.8324673175811768, + "eval_runtime": 6.1509, + "eval_samples_per_second": 252.646, + "step": 3090430 + }, + { + "epoch": 34.0, + "learning_rate": 3.9998074701578745e-08, + "loss": 3.7363, + "step": 3090500 + }, + { + "epoch": 34.01, + "learning_rate": 3.998432256999835e-08, + "loss": 3.7691, + "step": 3091000 + }, + { + "epoch": 34.01, + "learning_rate": 3.9970570438417957e-08, + "loss": 3.7711, + "step": 3091500 + }, + { + "epoch": 34.02, + "learning_rate": 3.995681830683756e-08, + "loss": 3.7681, + "step": 3092000 + }, + { + "epoch": 34.02, + "learning_rate": 3.994306617525716e-08, + "loss": 3.7686, + "step": 3092500 + }, + { + "epoch": 34.03, + "learning_rate": 3.992931404367677e-08, + "loss": 3.7628, + "step": 3093000 + }, + { + "epoch": 34.03, + "learning_rate": 3.9915561912096374e-08, + "loss": 3.7731, + "step": 3093500 + }, + { + "epoch": 34.04, + "learning_rate": 3.990180978051598e-08, + "loss": 3.7735, + "step": 3094000 + }, + { + "epoch": 34.04, + "learning_rate": 3.9888057648935586e-08, + "loss": 3.7526, + "step": 3094500 + }, + { + "epoch": 34.05, + "learning_rate": 3.987430551735519e-08, + "loss": 3.7488, + "step": 3095000 + }, + { + "epoch": 34.06, + "learning_rate": 3.986055338577479e-08, + "loss": 3.7633, + "step": 3095500 + }, + { + "epoch": 34.06, + "learning_rate": 3.98468012541944e-08, + "loss": 3.7538, + "step": 3096000 + }, + { + "epoch": 34.07, + "learning_rate": 3.9833049122614004e-08, + "loss": 3.7591, + "step": 3096500 + }, + { + "epoch": 34.07, + "learning_rate": 3.9819296991033606e-08, + "loss": 3.7755, + "step": 3097000 + }, + { + "epoch": 34.08, + "learning_rate": 3.9805544859453216e-08, + "loss": 3.7724, + "step": 3097500 + }, + { + "epoch": 34.08, + "learning_rate": 3.979179272787282e-08, + "loss": 3.7637, + "step": 3098000 + }, + { + "epoch": 34.09, + "learning_rate": 3.977804059629242e-08, + "loss": 3.7667, + "step": 3098500 + }, + { + "epoch": 34.09, + "learning_rate": 3.9764288464712024e-08, + "loss": 3.7622, + "step": 3099000 + }, + { + "epoch": 34.1, + "learning_rate": 3.9750536333131633e-08, + "loss": 3.7746, + "step": 3099500 + }, + { + "epoch": 34.11, + "learning_rate": 3.9736784201551236e-08, + "loss": 3.7509, + "step": 3100000 + }, + { + "epoch": 34.11, + "learning_rate": 3.972303206997084e-08, + "loss": 3.7528, + "step": 3100500 + }, + { + "epoch": 34.12, + "learning_rate": 3.970927993839045e-08, + "loss": 3.7828, + "step": 3101000 + }, + { + "epoch": 34.12, + "learning_rate": 3.969552780681005e-08, + "loss": 3.7774, + "step": 3101500 + }, + { + "epoch": 34.13, + "learning_rate": 3.9681775675229654e-08, + "loss": 3.7708, + "step": 3102000 + }, + { + "epoch": 34.13, + "learning_rate": 3.966802354364926e-08, + "loss": 3.7526, + "step": 3102500 + }, + { + "epoch": 34.14, + "learning_rate": 3.9654271412068866e-08, + "loss": 3.7436, + "step": 3103000 + }, + { + "epoch": 34.14, + "learning_rate": 3.964051928048847e-08, + "loss": 3.7613, + "step": 3103500 + }, + { + "epoch": 34.15, + "learning_rate": 3.962676714890808e-08, + "loss": 3.765, + "step": 3104000 + }, + { + "epoch": 34.15, + "learning_rate": 3.961301501732768e-08, + "loss": 3.7545, + "step": 3104500 + }, + { + "epoch": 34.16, + "learning_rate": 3.959926288574729e-08, + "loss": 3.7454, + "step": 3105000 + }, + { + "epoch": 34.17, + "learning_rate": 3.958551075416689e-08, + "loss": 3.7654, + "step": 3105500 + }, + { + "epoch": 34.17, + "learning_rate": 3.95717586225865e-08, + "loss": 3.7695, + "step": 3106000 + }, + { + "epoch": 34.18, + "learning_rate": 3.9558006491006105e-08, + "loss": 3.7572, + "step": 3106500 + }, + { + "epoch": 34.18, + "learning_rate": 3.954425435942571e-08, + "loss": 3.7565, + "step": 3107000 + }, + { + "epoch": 34.19, + "learning_rate": 3.9530502227845317e-08, + "loss": 3.7727, + "step": 3107500 + }, + { + "epoch": 34.19, + "learning_rate": 3.951675009626492e-08, + "loss": 3.7678, + "step": 3108000 + }, + { + "epoch": 34.2, + "learning_rate": 3.950299796468453e-08, + "loss": 3.756, + "step": 3108500 + }, + { + "epoch": 34.2, + "learning_rate": 3.948924583310413e-08, + "loss": 3.7523, + "step": 3109000 + }, + { + "epoch": 34.21, + "learning_rate": 3.9475493701523734e-08, + "loss": 3.7604, + "step": 3109500 + }, + { + "epoch": 34.22, + "learning_rate": 3.9461741569943344e-08, + "loss": 3.762, + "step": 3110000 + }, + { + "epoch": 34.22, + "learning_rate": 3.9447989438362946e-08, + "loss": 3.7738, + "step": 3110500 + }, + { + "epoch": 34.23, + "learning_rate": 3.943423730678255e-08, + "loss": 3.7746, + "step": 3111000 + }, + { + "epoch": 34.23, + "learning_rate": 3.942048517520216e-08, + "loss": 3.769, + "step": 3111500 + }, + { + "epoch": 34.24, + "learning_rate": 3.940673304362176e-08, + "loss": 3.7635, + "step": 3112000 + }, + { + "epoch": 34.24, + "learning_rate": 3.9392980912041364e-08, + "loss": 3.7458, + "step": 3112500 + }, + { + "epoch": 34.25, + "learning_rate": 3.937922878046097e-08, + "loss": 3.7535, + "step": 3113000 + }, + { + "epoch": 34.25, + "learning_rate": 3.9365476648880576e-08, + "loss": 3.7603, + "step": 3113500 + }, + { + "epoch": 34.26, + "learning_rate": 3.935172451730018e-08, + "loss": 3.7512, + "step": 3114000 + }, + { + "epoch": 34.26, + "learning_rate": 3.933797238571979e-08, + "loss": 3.7659, + "step": 3114500 + }, + { + "epoch": 34.27, + "learning_rate": 3.932422025413939e-08, + "loss": 3.7545, + "step": 3115000 + }, + { + "epoch": 34.28, + "learning_rate": 3.9310468122558993e-08, + "loss": 3.741, + "step": 3115500 + }, + { + "epoch": 34.28, + "learning_rate": 3.92967159909786e-08, + "loss": 3.7497, + "step": 3116000 + }, + { + "epoch": 34.29, + "learning_rate": 3.9282963859398205e-08, + "loss": 3.7511, + "step": 3116500 + }, + { + "epoch": 34.29, + "learning_rate": 3.926921172781781e-08, + "loss": 3.7459, + "step": 3117000 + }, + { + "epoch": 34.3, + "learning_rate": 3.925545959623742e-08, + "loss": 3.7645, + "step": 3117500 + }, + { + "epoch": 34.3, + "learning_rate": 3.924170746465702e-08, + "loss": 3.7506, + "step": 3118000 + }, + { + "epoch": 34.31, + "learning_rate": 3.922795533307662e-08, + "loss": 3.7576, + "step": 3118500 + }, + { + "epoch": 34.31, + "learning_rate": 3.921420320149623e-08, + "loss": 3.7676, + "step": 3119000 + }, + { + "epoch": 34.32, + "learning_rate": 3.9200451069915835e-08, + "loss": 3.7631, + "step": 3119500 + }, + { + "epoch": 34.33, + "learning_rate": 3.918669893833544e-08, + "loss": 3.7492, + "step": 3120000 + }, + { + "epoch": 34.33, + "learning_rate": 3.917294680675505e-08, + "loss": 3.747, + "step": 3120500 + }, + { + "epoch": 34.34, + "learning_rate": 3.915919467517465e-08, + "loss": 3.756, + "step": 3121000 + }, + { + "epoch": 34.34, + "learning_rate": 3.914544254359425e-08, + "loss": 3.759, + "step": 3121500 + }, + { + "epoch": 34.35, + "learning_rate": 3.913169041201386e-08, + "loss": 3.761, + "step": 3122000 + }, + { + "epoch": 34.35, + "learning_rate": 3.9117938280433465e-08, + "loss": 3.749, + "step": 3122500 + }, + { + "epoch": 34.36, + "learning_rate": 3.910418614885307e-08, + "loss": 3.7814, + "step": 3123000 + }, + { + "epoch": 34.36, + "learning_rate": 3.9090434017272677e-08, + "loss": 3.7494, + "step": 3123500 + }, + { + "epoch": 34.37, + "learning_rate": 3.907668188569228e-08, + "loss": 3.773, + "step": 3124000 + }, + { + "epoch": 34.37, + "learning_rate": 3.906292975411188e-08, + "loss": 3.7532, + "step": 3124500 + }, + { + "epoch": 34.38, + "learning_rate": 3.904917762253149e-08, + "loss": 3.7647, + "step": 3125000 + }, + { + "epoch": 34.39, + "learning_rate": 3.9035425490951094e-08, + "loss": 3.7446, + "step": 3125500 + }, + { + "epoch": 34.39, + "learning_rate": 3.90216733593707e-08, + "loss": 3.76, + "step": 3126000 + }, + { + "epoch": 34.4, + "learning_rate": 3.9007921227790306e-08, + "loss": 3.7679, + "step": 3126500 + }, + { + "epoch": 34.4, + "learning_rate": 3.899416909620991e-08, + "loss": 3.784, + "step": 3127000 + }, + { + "epoch": 34.41, + "learning_rate": 3.898041696462951e-08, + "loss": 3.7548, + "step": 3127500 + }, + { + "epoch": 34.41, + "learning_rate": 3.896666483304912e-08, + "loss": 3.7381, + "step": 3128000 + }, + { + "epoch": 34.42, + "learning_rate": 3.8952912701468724e-08, + "loss": 3.7516, + "step": 3128500 + }, + { + "epoch": 34.42, + "learning_rate": 3.8939160569888327e-08, + "loss": 3.7591, + "step": 3129000 + }, + { + "epoch": 34.43, + "learning_rate": 3.8925408438307936e-08, + "loss": 3.7594, + "step": 3129500 + }, + { + "epoch": 34.44, + "learning_rate": 3.891165630672754e-08, + "loss": 3.7691, + "step": 3130000 + }, + { + "epoch": 34.44, + "learning_rate": 3.889790417514715e-08, + "loss": 3.7688, + "step": 3130500 + }, + { + "epoch": 34.45, + "learning_rate": 3.888415204356675e-08, + "loss": 3.7578, + "step": 3131000 + }, + { + "epoch": 34.45, + "learning_rate": 3.887039991198636e-08, + "loss": 3.7741, + "step": 3131500 + }, + { + "epoch": 34.46, + "learning_rate": 3.885664778040596e-08, + "loss": 3.7459, + "step": 3132000 + }, + { + "epoch": 34.46, + "learning_rate": 3.884289564882557e-08, + "loss": 3.7577, + "step": 3132500 + }, + { + "epoch": 34.47, + "learning_rate": 3.8829143517245175e-08, + "loss": 3.7659, + "step": 3133000 + }, + { + "epoch": 34.47, + "learning_rate": 3.881539138566478e-08, + "loss": 3.7616, + "step": 3133500 + }, + { + "epoch": 34.48, + "learning_rate": 3.880163925408439e-08, + "loss": 3.745, + "step": 3134000 + }, + { + "epoch": 34.48, + "learning_rate": 3.878788712250399e-08, + "loss": 3.7574, + "step": 3134500 + }, + { + "epoch": 34.49, + "learning_rate": 3.877413499092359e-08, + "loss": 3.7498, + "step": 3135000 + }, + { + "epoch": 34.5, + "learning_rate": 3.87603828593432e-08, + "loss": 3.757, + "step": 3135500 + }, + { + "epoch": 34.5, + "learning_rate": 3.8746630727762804e-08, + "loss": 3.7834, + "step": 3136000 + }, + { + "epoch": 34.51, + "learning_rate": 3.873287859618241e-08, + "loss": 3.7823, + "step": 3136500 + }, + { + "epoch": 34.51, + "learning_rate": 3.871912646460201e-08, + "loss": 3.7526, + "step": 3137000 + }, + { + "epoch": 34.52, + "learning_rate": 3.870537433302162e-08, + "loss": 3.7573, + "step": 3137500 + }, + { + "epoch": 34.52, + "learning_rate": 3.869162220144122e-08, + "loss": 3.7536, + "step": 3138000 + }, + { + "epoch": 34.53, + "learning_rate": 3.8677870069860825e-08, + "loss": 3.7845, + "step": 3138500 + }, + { + "epoch": 34.53, + "learning_rate": 3.8664117938280434e-08, + "loss": 3.7693, + "step": 3139000 + }, + { + "epoch": 34.54, + "learning_rate": 3.865036580670004e-08, + "loss": 3.7439, + "step": 3139500 + }, + { + "epoch": 34.55, + "learning_rate": 3.863661367511964e-08, + "loss": 3.7533, + "step": 3140000 + }, + { + "epoch": 34.55, + "learning_rate": 3.862286154353925e-08, + "loss": 3.7679, + "step": 3140500 + }, + { + "epoch": 34.56, + "learning_rate": 3.860910941195885e-08, + "loss": 3.7505, + "step": 3141000 + }, + { + "epoch": 34.56, + "learning_rate": 3.8595357280378454e-08, + "loss": 3.768, + "step": 3141500 + }, + { + "epoch": 34.57, + "learning_rate": 3.8581605148798064e-08, + "loss": 3.754, + "step": 3142000 + }, + { + "epoch": 34.57, + "learning_rate": 3.8567853017217666e-08, + "loss": 3.7658, + "step": 3142500 + }, + { + "epoch": 34.58, + "learning_rate": 3.855410088563727e-08, + "loss": 3.7819, + "step": 3143000 + }, + { + "epoch": 34.58, + "learning_rate": 3.854034875405688e-08, + "loss": 3.7366, + "step": 3143500 + }, + { + "epoch": 34.59, + "learning_rate": 3.852659662247648e-08, + "loss": 3.7628, + "step": 3144000 + }, + { + "epoch": 34.59, + "learning_rate": 3.8512844490896084e-08, + "loss": 3.7597, + "step": 3144500 + }, + { + "epoch": 34.6, + "learning_rate": 3.849909235931569e-08, + "loss": 3.7584, + "step": 3145000 + }, + { + "epoch": 34.61, + "learning_rate": 3.8485340227735296e-08, + "loss": 3.7631, + "step": 3145500 + }, + { + "epoch": 34.61, + "learning_rate": 3.84715880961549e-08, + "loss": 3.7451, + "step": 3146000 + }, + { + "epoch": 34.62, + "learning_rate": 3.845783596457451e-08, + "loss": 3.7569, + "step": 3146500 + }, + { + "epoch": 34.62, + "learning_rate": 3.844408383299411e-08, + "loss": 3.7817, + "step": 3147000 + }, + { + "epoch": 34.63, + "learning_rate": 3.8430331701413713e-08, + "loss": 3.7546, + "step": 3147500 + }, + { + "epoch": 34.63, + "learning_rate": 3.841657956983332e-08, + "loss": 3.7719, + "step": 3148000 + }, + { + "epoch": 34.64, + "learning_rate": 3.8402827438252925e-08, + "loss": 3.7693, + "step": 3148500 + }, + { + "epoch": 34.64, + "learning_rate": 3.838907530667253e-08, + "loss": 3.7647, + "step": 3149000 + }, + { + "epoch": 34.65, + "learning_rate": 3.837532317509214e-08, + "loss": 3.7741, + "step": 3149500 + }, + { + "epoch": 34.66, + "learning_rate": 3.836157104351174e-08, + "loss": 3.7681, + "step": 3150000 + }, + { + "epoch": 34.66, + "learning_rate": 3.834781891193134e-08, + "loss": 3.7631, + "step": 3150500 + }, + { + "epoch": 34.67, + "learning_rate": 3.833406678035095e-08, + "loss": 3.7656, + "step": 3151000 + }, + { + "epoch": 34.67, + "learning_rate": 3.8320314648770555e-08, + "loss": 3.7562, + "step": 3151500 + }, + { + "epoch": 34.68, + "learning_rate": 3.830656251719016e-08, + "loss": 3.7427, + "step": 3152000 + }, + { + "epoch": 34.68, + "learning_rate": 3.829281038560977e-08, + "loss": 3.7609, + "step": 3152500 + }, + { + "epoch": 34.69, + "learning_rate": 3.827905825402937e-08, + "loss": 3.7662, + "step": 3153000 + }, + { + "epoch": 34.69, + "learning_rate": 3.826530612244898e-08, + "loss": 3.7525, + "step": 3153500 + }, + { + "epoch": 34.7, + "learning_rate": 3.825155399086858e-08, + "loss": 3.7613, + "step": 3154000 + }, + { + "epoch": 34.7, + "learning_rate": 3.8237801859288185e-08, + "loss": 3.7575, + "step": 3154500 + }, + { + "epoch": 34.71, + "learning_rate": 3.8224049727707794e-08, + "loss": 3.7577, + "step": 3155000 + }, + { + "epoch": 34.72, + "learning_rate": 3.82102975961274e-08, + "loss": 3.7785, + "step": 3155500 + }, + { + "epoch": 34.72, + "learning_rate": 3.8196545464547006e-08, + "loss": 3.7529, + "step": 3156000 + }, + { + "epoch": 34.73, + "learning_rate": 3.818279333296661e-08, + "loss": 3.7678, + "step": 3156500 + }, + { + "epoch": 34.73, + "learning_rate": 3.816904120138622e-08, + "loss": 3.7747, + "step": 3157000 + }, + { + "epoch": 34.74, + "learning_rate": 3.815528906980582e-08, + "loss": 3.7659, + "step": 3157500 + }, + { + "epoch": 34.74, + "learning_rate": 3.8141536938225424e-08, + "loss": 3.7625, + "step": 3158000 + }, + { + "epoch": 34.75, + "learning_rate": 3.812778480664503e-08, + "loss": 3.7625, + "step": 3158500 + }, + { + "epoch": 34.75, + "learning_rate": 3.8114032675064636e-08, + "loss": 3.7487, + "step": 3159000 + }, + { + "epoch": 34.76, + "learning_rate": 3.810028054348424e-08, + "loss": 3.7496, + "step": 3159500 + }, + { + "epoch": 34.77, + "learning_rate": 3.808652841190385e-08, + "loss": 3.7513, + "step": 3160000 + }, + { + "epoch": 34.77, + "learning_rate": 3.807277628032345e-08, + "loss": 3.7657, + "step": 3160500 + }, + { + "epoch": 34.78, + "learning_rate": 3.805902414874305e-08, + "loss": 3.7621, + "step": 3161000 + }, + { + "epoch": 34.78, + "learning_rate": 3.804527201716266e-08, + "loss": 3.7579, + "step": 3161500 + }, + { + "epoch": 34.79, + "learning_rate": 3.8031519885582265e-08, + "loss": 3.749, + "step": 3162000 + }, + { + "epoch": 34.79, + "learning_rate": 3.801776775400187e-08, + "loss": 3.7435, + "step": 3162500 + }, + { + "epoch": 34.8, + "learning_rate": 3.800401562242148e-08, + "loss": 3.7611, + "step": 3163000 + }, + { + "epoch": 34.8, + "learning_rate": 3.799026349084108e-08, + "loss": 3.7477, + "step": 3163500 + }, + { + "epoch": 34.81, + "learning_rate": 3.797651135926068e-08, + "loss": 3.7631, + "step": 3164000 + }, + { + "epoch": 34.81, + "learning_rate": 3.796275922768029e-08, + "loss": 3.7659, + "step": 3164500 + }, + { + "epoch": 34.82, + "learning_rate": 3.7949007096099895e-08, + "loss": 3.7622, + "step": 3165000 + }, + { + "epoch": 34.83, + "learning_rate": 3.79352549645195e-08, + "loss": 3.7454, + "step": 3165500 + }, + { + "epoch": 34.83, + "learning_rate": 3.792150283293911e-08, + "loss": 3.7585, + "step": 3166000 + }, + { + "epoch": 34.84, + "learning_rate": 3.790775070135871e-08, + "loss": 3.7771, + "step": 3166500 + }, + { + "epoch": 34.84, + "learning_rate": 3.789399856977831e-08, + "loss": 3.746, + "step": 3167000 + }, + { + "epoch": 34.85, + "learning_rate": 3.788024643819792e-08, + "loss": 3.7558, + "step": 3167500 + }, + { + "epoch": 34.85, + "learning_rate": 3.7866494306617524e-08, + "loss": 3.7619, + "step": 3168000 + }, + { + "epoch": 34.86, + "learning_rate": 3.785274217503713e-08, + "loss": 3.7612, + "step": 3168500 + }, + { + "epoch": 34.86, + "learning_rate": 3.7838990043456737e-08, + "loss": 3.7685, + "step": 3169000 + }, + { + "epoch": 34.87, + "learning_rate": 3.782523791187634e-08, + "loss": 3.746, + "step": 3169500 + }, + { + "epoch": 34.88, + "learning_rate": 3.781148578029594e-08, + "loss": 3.7522, + "step": 3170000 + }, + { + "epoch": 34.88, + "learning_rate": 3.779773364871555e-08, + "loss": 3.7454, + "step": 3170500 + }, + { + "epoch": 34.89, + "learning_rate": 3.7783981517135154e-08, + "loss": 3.7675, + "step": 3171000 + }, + { + "epoch": 34.89, + "learning_rate": 3.777022938555476e-08, + "loss": 3.7658, + "step": 3171500 + }, + { + "epoch": 34.9, + "learning_rate": 3.7756477253974366e-08, + "loss": 3.763, + "step": 3172000 + }, + { + "epoch": 34.9, + "learning_rate": 3.774272512239397e-08, + "loss": 3.7854, + "step": 3172500 + }, + { + "epoch": 34.91, + "learning_rate": 3.772897299081357e-08, + "loss": 3.7661, + "step": 3173000 + }, + { + "epoch": 34.91, + "learning_rate": 3.771522085923318e-08, + "loss": 3.7764, + "step": 3173500 + }, + { + "epoch": 34.92, + "learning_rate": 3.7701468727652784e-08, + "loss": 3.7697, + "step": 3174000 + }, + { + "epoch": 34.92, + "learning_rate": 3.7687716596072386e-08, + "loss": 3.7649, + "step": 3174500 + }, + { + "epoch": 34.93, + "learning_rate": 3.767396446449199e-08, + "loss": 3.7716, + "step": 3175000 + }, + { + "epoch": 34.94, + "learning_rate": 3.76602123329116e-08, + "loss": 3.7511, + "step": 3175500 + }, + { + "epoch": 34.94, + "learning_rate": 3.76464602013312e-08, + "loss": 3.7532, + "step": 3176000 + }, + { + "epoch": 34.95, + "learning_rate": 3.7632708069750804e-08, + "loss": 3.7483, + "step": 3176500 + }, + { + "epoch": 34.95, + "learning_rate": 3.761895593817041e-08, + "loss": 3.7787, + "step": 3177000 + }, + { + "epoch": 34.96, + "learning_rate": 3.7605203806590016e-08, + "loss": 3.7528, + "step": 3177500 + }, + { + "epoch": 34.96, + "learning_rate": 3.7591451675009625e-08, + "loss": 3.7322, + "step": 3178000 + }, + { + "epoch": 34.97, + "learning_rate": 3.757769954342923e-08, + "loss": 3.7456, + "step": 3178500 + }, + { + "epoch": 34.97, + "learning_rate": 3.756394741184884e-08, + "loss": 3.7534, + "step": 3179000 + }, + { + "epoch": 34.98, + "learning_rate": 3.755019528026844e-08, + "loss": 3.75, + "step": 3179500 + }, + { + "epoch": 34.99, + "learning_rate": 3.753644314868805e-08, + "loss": 3.7583, + "step": 3180000 + }, + { + "epoch": 34.99, + "learning_rate": 3.752269101710765e-08, + "loss": 3.7479, + "step": 3180500 + }, + { + "epoch": 35.0, + "learning_rate": 3.7508938885527255e-08, + "loss": 3.7683, + "step": 3181000 + }, + { + "epoch": 35.0, + "eval_loss": 3.830986261367798, + "eval_runtime": 6.1438, + "eval_samples_per_second": 252.938, + "step": 3181325 + }, + { + "epoch": 35.0, + "learning_rate": 3.7495186753946864e-08, + "loss": 3.763, + "step": 3181500 + }, + { + "epoch": 35.01, + "learning_rate": 3.748143462236647e-08, + "loss": 3.7628, + "step": 3182000 + }, + { + "epoch": 35.01, + "learning_rate": 3.746768249078607e-08, + "loss": 3.7443, + "step": 3182500 + }, + { + "epoch": 35.02, + "learning_rate": 3.745393035920568e-08, + "loss": 3.7576, + "step": 3183000 + }, + { + "epoch": 35.02, + "learning_rate": 3.744017822762528e-08, + "loss": 3.7796, + "step": 3183500 + }, + { + "epoch": 35.03, + "learning_rate": 3.7426426096044884e-08, + "loss": 3.7635, + "step": 3184000 + }, + { + "epoch": 35.03, + "learning_rate": 3.7412673964464494e-08, + "loss": 3.7604, + "step": 3184500 + }, + { + "epoch": 35.04, + "learning_rate": 3.7398921832884097e-08, + "loss": 3.7572, + "step": 3185000 + }, + { + "epoch": 35.05, + "learning_rate": 3.73851697013037e-08, + "loss": 3.7542, + "step": 3185500 + }, + { + "epoch": 35.05, + "learning_rate": 3.737141756972331e-08, + "loss": 3.772, + "step": 3186000 + }, + { + "epoch": 35.06, + "learning_rate": 3.735766543814291e-08, + "loss": 3.7616, + "step": 3186500 + }, + { + "epoch": 35.06, + "learning_rate": 3.7343913306562514e-08, + "loss": 3.7499, + "step": 3187000 + }, + { + "epoch": 35.07, + "learning_rate": 3.7330161174982123e-08, + "loss": 3.7592, + "step": 3187500 + }, + { + "epoch": 35.07, + "learning_rate": 3.7316409043401726e-08, + "loss": 3.7566, + "step": 3188000 + }, + { + "epoch": 35.08, + "learning_rate": 3.730265691182133e-08, + "loss": 3.7733, + "step": 3188500 + }, + { + "epoch": 35.08, + "learning_rate": 3.728890478024094e-08, + "loss": 3.7569, + "step": 3189000 + }, + { + "epoch": 35.09, + "learning_rate": 3.727515264866054e-08, + "loss": 3.7502, + "step": 3189500 + }, + { + "epoch": 35.1, + "learning_rate": 3.7261400517080144e-08, + "loss": 3.7651, + "step": 3190000 + }, + { + "epoch": 35.1, + "learning_rate": 3.724764838549975e-08, + "loss": 3.7607, + "step": 3190500 + }, + { + "epoch": 35.11, + "learning_rate": 3.7233896253919356e-08, + "loss": 3.7418, + "step": 3191000 + }, + { + "epoch": 35.11, + "learning_rate": 3.722014412233896e-08, + "loss": 3.7648, + "step": 3191500 + }, + { + "epoch": 35.12, + "learning_rate": 3.720639199075857e-08, + "loss": 3.7653, + "step": 3192000 + }, + { + "epoch": 35.12, + "learning_rate": 3.719263985917817e-08, + "loss": 3.7658, + "step": 3192500 + }, + { + "epoch": 35.13, + "learning_rate": 3.717888772759777e-08, + "loss": 3.7577, + "step": 3193000 + }, + { + "epoch": 35.13, + "learning_rate": 3.716513559601738e-08, + "loss": 3.7496, + "step": 3193500 + }, + { + "epoch": 35.14, + "learning_rate": 3.7151383464436985e-08, + "loss": 3.7812, + "step": 3194000 + }, + { + "epoch": 35.14, + "learning_rate": 3.713763133285659e-08, + "loss": 3.7793, + "step": 3194500 + }, + { + "epoch": 35.15, + "learning_rate": 3.71238792012762e-08, + "loss": 3.7575, + "step": 3195000 + }, + { + "epoch": 35.16, + "learning_rate": 3.71101270696958e-08, + "loss": 3.7444, + "step": 3195500 + }, + { + "epoch": 35.16, + "learning_rate": 3.70963749381154e-08, + "loss": 3.7654, + "step": 3196000 + }, + { + "epoch": 35.17, + "learning_rate": 3.708262280653501e-08, + "loss": 3.7499, + "step": 3196500 + }, + { + "epoch": 35.17, + "learning_rate": 3.7068870674954615e-08, + "loss": 3.7668, + "step": 3197000 + }, + { + "epoch": 35.18, + "learning_rate": 3.705511854337422e-08, + "loss": 3.7744, + "step": 3197500 + }, + { + "epoch": 35.18, + "learning_rate": 3.704136641179383e-08, + "loss": 3.765, + "step": 3198000 + }, + { + "epoch": 35.19, + "learning_rate": 3.702761428021343e-08, + "loss": 3.7595, + "step": 3198500 + }, + { + "epoch": 35.19, + "learning_rate": 3.701386214863303e-08, + "loss": 3.7532, + "step": 3199000 + }, + { + "epoch": 35.2, + "learning_rate": 3.700011001705264e-08, + "loss": 3.7549, + "step": 3199500 + }, + { + "epoch": 35.21, + "learning_rate": 3.6986357885472244e-08, + "loss": 3.7565, + "step": 3200000 + }, + { + "epoch": 35.21, + "learning_rate": 3.697260575389185e-08, + "loss": 3.7727, + "step": 3200500 + }, + { + "epoch": 35.22, + "learning_rate": 3.6958853622311457e-08, + "loss": 3.7534, + "step": 3201000 + }, + { + "epoch": 35.22, + "learning_rate": 3.694510149073106e-08, + "loss": 3.7669, + "step": 3201500 + }, + { + "epoch": 35.23, + "learning_rate": 3.693134935915066e-08, + "loss": 3.7552, + "step": 3202000 + }, + { + "epoch": 35.23, + "learning_rate": 3.691759722757027e-08, + "loss": 3.7583, + "step": 3202500 + }, + { + "epoch": 35.24, + "learning_rate": 3.6903845095989874e-08, + "loss": 3.7589, + "step": 3203000 + }, + { + "epoch": 35.24, + "learning_rate": 3.6890092964409483e-08, + "loss": 3.7549, + "step": 3203500 + }, + { + "epoch": 35.25, + "learning_rate": 3.6876340832829086e-08, + "loss": 3.7456, + "step": 3204000 + }, + { + "epoch": 35.25, + "learning_rate": 3.6862588701248695e-08, + "loss": 3.7603, + "step": 3204500 + }, + { + "epoch": 35.26, + "learning_rate": 3.68488365696683e-08, + "loss": 3.7455, + "step": 3205000 + }, + { + "epoch": 35.27, + "learning_rate": 3.683508443808791e-08, + "loss": 3.7548, + "step": 3205500 + }, + { + "epoch": 35.27, + "learning_rate": 3.682133230650751e-08, + "loss": 3.7794, + "step": 3206000 + }, + { + "epoch": 35.28, + "learning_rate": 3.680758017492711e-08, + "loss": 3.7802, + "step": 3206500 + }, + { + "epoch": 35.28, + "learning_rate": 3.679382804334672e-08, + "loss": 3.7461, + "step": 3207000 + }, + { + "epoch": 35.29, + "learning_rate": 3.6780075911766325e-08, + "loss": 3.7675, + "step": 3207500 + }, + { + "epoch": 35.29, + "learning_rate": 3.676632378018593e-08, + "loss": 3.7518, + "step": 3208000 + }, + { + "epoch": 35.3, + "learning_rate": 3.675257164860554e-08, + "loss": 3.7436, + "step": 3208500 + }, + { + "epoch": 35.3, + "learning_rate": 3.673881951702514e-08, + "loss": 3.7601, + "step": 3209000 + }, + { + "epoch": 35.31, + "learning_rate": 3.672506738544474e-08, + "loss": 3.758, + "step": 3209500 + }, + { + "epoch": 35.32, + "learning_rate": 3.671131525386435e-08, + "loss": 3.75, + "step": 3210000 + }, + { + "epoch": 35.32, + "learning_rate": 3.6697563122283955e-08, + "loss": 3.7634, + "step": 3210500 + }, + { + "epoch": 35.33, + "learning_rate": 3.668381099070356e-08, + "loss": 3.7651, + "step": 3211000 + }, + { + "epoch": 35.33, + "learning_rate": 3.667005885912316e-08, + "loss": 3.7413, + "step": 3211500 + }, + { + "epoch": 35.34, + "learning_rate": 3.665630672754277e-08, + "loss": 3.7687, + "step": 3212000 + }, + { + "epoch": 35.34, + "learning_rate": 3.664255459596237e-08, + "loss": 3.7693, + "step": 3212500 + }, + { + "epoch": 35.35, + "learning_rate": 3.6628802464381975e-08, + "loss": 3.7416, + "step": 3213000 + }, + { + "epoch": 35.35, + "learning_rate": 3.6615050332801584e-08, + "loss": 3.7533, + "step": 3213500 + }, + { + "epoch": 35.36, + "learning_rate": 3.660129820122119e-08, + "loss": 3.7421, + "step": 3214000 + }, + { + "epoch": 35.36, + "learning_rate": 3.658754606964079e-08, + "loss": 3.7698, + "step": 3214500 + }, + { + "epoch": 35.37, + "learning_rate": 3.65737939380604e-08, + "loss": 3.7658, + "step": 3215000 + }, + { + "epoch": 35.38, + "learning_rate": 3.656004180648e-08, + "loss": 3.755, + "step": 3215500 + }, + { + "epoch": 35.38, + "learning_rate": 3.6546289674899605e-08, + "loss": 3.7737, + "step": 3216000 + }, + { + "epoch": 35.39, + "learning_rate": 3.6532537543319214e-08, + "loss": 3.7678, + "step": 3216500 + }, + { + "epoch": 35.39, + "learning_rate": 3.6518785411738817e-08, + "loss": 3.7605, + "step": 3217000 + }, + { + "epoch": 35.4, + "learning_rate": 3.650503328015842e-08, + "loss": 3.748, + "step": 3217500 + }, + { + "epoch": 35.4, + "learning_rate": 3.649128114857803e-08, + "loss": 3.7462, + "step": 3218000 + }, + { + "epoch": 35.41, + "learning_rate": 3.647752901699763e-08, + "loss": 3.7651, + "step": 3218500 + }, + { + "epoch": 35.41, + "learning_rate": 3.6463776885417234e-08, + "loss": 3.7655, + "step": 3219000 + }, + { + "epoch": 35.42, + "learning_rate": 3.6450024753836843e-08, + "loss": 3.759, + "step": 3219500 + }, + { + "epoch": 35.43, + "learning_rate": 3.6436272622256446e-08, + "loss": 3.7539, + "step": 3220000 + }, + { + "epoch": 35.43, + "learning_rate": 3.642252049067605e-08, + "loss": 3.7606, + "step": 3220500 + }, + { + "epoch": 35.44, + "learning_rate": 3.640876835909566e-08, + "loss": 3.759, + "step": 3221000 + }, + { + "epoch": 35.44, + "learning_rate": 3.639501622751526e-08, + "loss": 3.7476, + "step": 3221500 + }, + { + "epoch": 35.45, + "learning_rate": 3.6381264095934864e-08, + "loss": 3.7626, + "step": 3222000 + }, + { + "epoch": 35.45, + "learning_rate": 3.636751196435447e-08, + "loss": 3.7408, + "step": 3222500 + }, + { + "epoch": 35.46, + "learning_rate": 3.6353759832774076e-08, + "loss": 3.7681, + "step": 3223000 + }, + { + "epoch": 35.46, + "learning_rate": 3.634000770119368e-08, + "loss": 3.7667, + "step": 3223500 + }, + { + "epoch": 35.47, + "learning_rate": 3.632625556961329e-08, + "loss": 3.7483, + "step": 3224000 + }, + { + "epoch": 35.47, + "learning_rate": 3.631250343803289e-08, + "loss": 3.7658, + "step": 3224500 + }, + { + "epoch": 35.48, + "learning_rate": 3.629875130645249e-08, + "loss": 3.7547, + "step": 3225000 + }, + { + "epoch": 35.49, + "learning_rate": 3.62849991748721e-08, + "loss": 3.7614, + "step": 3225500 + }, + { + "epoch": 35.49, + "learning_rate": 3.6271247043291705e-08, + "loss": 3.7749, + "step": 3226000 + }, + { + "epoch": 35.5, + "learning_rate": 3.6257494911711315e-08, + "loss": 3.7602, + "step": 3226500 + }, + { + "epoch": 35.5, + "learning_rate": 3.624374278013092e-08, + "loss": 3.7532, + "step": 3227000 + }, + { + "epoch": 35.51, + "learning_rate": 3.622999064855053e-08, + "loss": 3.7808, + "step": 3227500 + }, + { + "epoch": 35.51, + "learning_rate": 3.621623851697013e-08, + "loss": 3.7655, + "step": 3228000 + }, + { + "epoch": 35.52, + "learning_rate": 3.620248638538974e-08, + "loss": 3.7646, + "step": 3228500 + }, + { + "epoch": 35.52, + "learning_rate": 3.618873425380934e-08, + "loss": 3.7631, + "step": 3229000 + }, + { + "epoch": 35.53, + "learning_rate": 3.6174982122228944e-08, + "loss": 3.7667, + "step": 3229500 + }, + { + "epoch": 35.54, + "learning_rate": 3.6161229990648554e-08, + "loss": 3.7501, + "step": 3230000 + }, + { + "epoch": 35.54, + "learning_rate": 3.6147477859068156e-08, + "loss": 3.7474, + "step": 3230500 + }, + { + "epoch": 35.55, + "learning_rate": 3.613372572748776e-08, + "loss": 3.7584, + "step": 3231000 + }, + { + "epoch": 35.55, + "learning_rate": 3.611997359590737e-08, + "loss": 3.7638, + "step": 3231500 + }, + { + "epoch": 35.56, + "learning_rate": 3.610622146432697e-08, + "loss": 3.7413, + "step": 3232000 + }, + { + "epoch": 35.56, + "learning_rate": 3.6092469332746574e-08, + "loss": 3.7489, + "step": 3232500 + }, + { + "epoch": 35.57, + "learning_rate": 3.607871720116618e-08, + "loss": 3.785, + "step": 3233000 + }, + { + "epoch": 35.57, + "learning_rate": 3.6064965069585786e-08, + "loss": 3.7486, + "step": 3233500 + }, + { + "epoch": 35.58, + "learning_rate": 3.605121293800539e-08, + "loss": 3.7537, + "step": 3234000 + }, + { + "epoch": 35.59, + "learning_rate": 3.6037460806425e-08, + "loss": 3.7735, + "step": 3234500 + }, + { + "epoch": 35.59, + "learning_rate": 3.60237086748446e-08, + "loss": 3.7669, + "step": 3235000 + }, + { + "epoch": 35.6, + "learning_rate": 3.6009956543264203e-08, + "loss": 3.7732, + "step": 3235500 + }, + { + "epoch": 35.6, + "learning_rate": 3.599620441168381e-08, + "loss": 3.7557, + "step": 3236000 + }, + { + "epoch": 35.61, + "learning_rate": 3.5982452280103416e-08, + "loss": 3.7497, + "step": 3236500 + }, + { + "epoch": 35.61, + "learning_rate": 3.596870014852302e-08, + "loss": 3.7714, + "step": 3237000 + }, + { + "epoch": 35.62, + "learning_rate": 3.595494801694263e-08, + "loss": 3.7532, + "step": 3237500 + }, + { + "epoch": 35.62, + "learning_rate": 3.594119588536223e-08, + "loss": 3.748, + "step": 3238000 + }, + { + "epoch": 35.63, + "learning_rate": 3.592744375378183e-08, + "loss": 3.7625, + "step": 3238500 + }, + { + "epoch": 35.63, + "learning_rate": 3.591369162220144e-08, + "loss": 3.7747, + "step": 3239000 + }, + { + "epoch": 35.64, + "learning_rate": 3.5899939490621045e-08, + "loss": 3.7654, + "step": 3239500 + }, + { + "epoch": 35.65, + "learning_rate": 3.588618735904065e-08, + "loss": 3.7394, + "step": 3240000 + }, + { + "epoch": 35.65, + "learning_rate": 3.587243522746026e-08, + "loss": 3.7778, + "step": 3240500 + }, + { + "epoch": 35.66, + "learning_rate": 3.585868309587986e-08, + "loss": 3.7784, + "step": 3241000 + }, + { + "epoch": 35.66, + "learning_rate": 3.584493096429946e-08, + "loss": 3.7612, + "step": 3241500 + }, + { + "epoch": 35.67, + "learning_rate": 3.583117883271907e-08, + "loss": 3.7558, + "step": 3242000 + }, + { + "epoch": 35.67, + "learning_rate": 3.5817426701138675e-08, + "loss": 3.7675, + "step": 3242500 + }, + { + "epoch": 35.68, + "learning_rate": 3.580367456955828e-08, + "loss": 3.7489, + "step": 3243000 + }, + { + "epoch": 35.68, + "learning_rate": 3.578992243797789e-08, + "loss": 3.7632, + "step": 3243500 + }, + { + "epoch": 35.69, + "learning_rate": 3.577617030639749e-08, + "loss": 3.7603, + "step": 3244000 + }, + { + "epoch": 35.7, + "learning_rate": 3.576241817481709e-08, + "loss": 3.7582, + "step": 3244500 + }, + { + "epoch": 35.7, + "learning_rate": 3.57486660432367e-08, + "loss": 3.7405, + "step": 3245000 + }, + { + "epoch": 35.71, + "learning_rate": 3.5734913911656304e-08, + "loss": 3.7599, + "step": 3245500 + }, + { + "epoch": 35.71, + "learning_rate": 3.572116178007591e-08, + "loss": 3.77, + "step": 3246000 + }, + { + "epoch": 35.72, + "learning_rate": 3.5707409648495516e-08, + "loss": 3.7767, + "step": 3246500 + }, + { + "epoch": 35.72, + "learning_rate": 3.569365751691512e-08, + "loss": 3.7612, + "step": 3247000 + }, + { + "epoch": 35.73, + "learning_rate": 3.567990538533472e-08, + "loss": 3.7631, + "step": 3247500 + }, + { + "epoch": 35.73, + "learning_rate": 3.566615325375433e-08, + "loss": 3.7756, + "step": 3248000 + }, + { + "epoch": 35.74, + "learning_rate": 3.5652401122173934e-08, + "loss": 3.7478, + "step": 3248500 + }, + { + "epoch": 35.74, + "learning_rate": 3.5638648990593537e-08, + "loss": 3.7675, + "step": 3249000 + }, + { + "epoch": 35.75, + "learning_rate": 3.562489685901314e-08, + "loss": 3.7627, + "step": 3249500 + }, + { + "epoch": 35.76, + "learning_rate": 3.561114472743275e-08, + "loss": 3.7517, + "step": 3250000 + }, + { + "epoch": 35.76, + "learning_rate": 3.559739259585235e-08, + "loss": 3.7617, + "step": 3250500 + }, + { + "epoch": 35.77, + "learning_rate": 3.558364046427196e-08, + "loss": 3.758, + "step": 3251000 + }, + { + "epoch": 35.77, + "learning_rate": 3.5569888332691564e-08, + "loss": 3.7893, + "step": 3251500 + }, + { + "epoch": 35.78, + "learning_rate": 3.555613620111117e-08, + "loss": 3.7586, + "step": 3252000 + }, + { + "epoch": 35.78, + "learning_rate": 3.5542384069530776e-08, + "loss": 3.7676, + "step": 3252500 + }, + { + "epoch": 35.79, + "learning_rate": 3.5528631937950385e-08, + "loss": 3.7649, + "step": 3253000 + }, + { + "epoch": 35.79, + "learning_rate": 3.551487980636999e-08, + "loss": 3.7311, + "step": 3253500 + }, + { + "epoch": 35.8, + "learning_rate": 3.550112767478959e-08, + "loss": 3.7621, + "step": 3254000 + }, + { + "epoch": 35.81, + "learning_rate": 3.54873755432092e-08, + "loss": 3.7611, + "step": 3254500 + }, + { + "epoch": 35.81, + "learning_rate": 3.54736234116288e-08, + "loss": 3.7769, + "step": 3255000 + }, + { + "epoch": 35.82, + "learning_rate": 3.5459871280048405e-08, + "loss": 3.7518, + "step": 3255500 + }, + { + "epoch": 35.82, + "learning_rate": 3.5446119148468015e-08, + "loss": 3.7648, + "step": 3256000 + }, + { + "epoch": 35.83, + "learning_rate": 3.543236701688762e-08, + "loss": 3.7633, + "step": 3256500 + }, + { + "epoch": 35.83, + "learning_rate": 3.541861488530722e-08, + "loss": 3.7729, + "step": 3257000 + }, + { + "epoch": 35.84, + "learning_rate": 3.540486275372683e-08, + "loss": 3.7648, + "step": 3257500 + }, + { + "epoch": 35.84, + "learning_rate": 3.539111062214643e-08, + "loss": 3.7593, + "step": 3258000 + }, + { + "epoch": 35.85, + "learning_rate": 3.5377358490566035e-08, + "loss": 3.7592, + "step": 3258500 + }, + { + "epoch": 35.85, + "learning_rate": 3.5363606358985644e-08, + "loss": 3.7465, + "step": 3259000 + }, + { + "epoch": 35.86, + "learning_rate": 3.534985422740525e-08, + "loss": 3.7534, + "step": 3259500 + }, + { + "epoch": 35.87, + "learning_rate": 3.533610209582485e-08, + "loss": 3.7618, + "step": 3260000 + }, + { + "epoch": 35.87, + "learning_rate": 3.532234996424446e-08, + "loss": 3.7535, + "step": 3260500 + }, + { + "epoch": 35.88, + "learning_rate": 3.530859783266406e-08, + "loss": 3.7715, + "step": 3261000 + }, + { + "epoch": 35.88, + "learning_rate": 3.5294845701083664e-08, + "loss": 3.7349, + "step": 3261500 + }, + { + "epoch": 35.89, + "learning_rate": 3.5281093569503274e-08, + "loss": 3.7472, + "step": 3262000 + }, + { + "epoch": 35.89, + "learning_rate": 3.5267341437922876e-08, + "loss": 3.7644, + "step": 3262500 + }, + { + "epoch": 35.9, + "learning_rate": 3.525358930634248e-08, + "loss": 3.7508, + "step": 3263000 + }, + { + "epoch": 35.9, + "learning_rate": 3.523983717476209e-08, + "loss": 3.7655, + "step": 3263500 + }, + { + "epoch": 35.91, + "learning_rate": 3.522608504318169e-08, + "loss": 3.7504, + "step": 3264000 + }, + { + "epoch": 35.92, + "learning_rate": 3.5212332911601294e-08, + "loss": 3.7377, + "step": 3264500 + }, + { + "epoch": 35.92, + "learning_rate": 3.51985807800209e-08, + "loss": 3.7442, + "step": 3265000 + }, + { + "epoch": 35.93, + "learning_rate": 3.5184828648440506e-08, + "loss": 3.7406, + "step": 3265500 + }, + { + "epoch": 35.93, + "learning_rate": 3.517107651686011e-08, + "loss": 3.7411, + "step": 3266000 + }, + { + "epoch": 35.94, + "learning_rate": 3.515732438527972e-08, + "loss": 3.7462, + "step": 3266500 + }, + { + "epoch": 35.94, + "learning_rate": 3.514357225369932e-08, + "loss": 3.7395, + "step": 3267000 + }, + { + "epoch": 35.95, + "learning_rate": 3.5129820122118924e-08, + "loss": 3.7508, + "step": 3267500 + }, + { + "epoch": 35.95, + "learning_rate": 3.511606799053853e-08, + "loss": 3.7407, + "step": 3268000 + }, + { + "epoch": 35.96, + "learning_rate": 3.5102315858958136e-08, + "loss": 3.7679, + "step": 3268500 + }, + { + "epoch": 35.96, + "learning_rate": 3.508856372737774e-08, + "loss": 3.7356, + "step": 3269000 + }, + { + "epoch": 35.97, + "learning_rate": 3.507481159579735e-08, + "loss": 3.7711, + "step": 3269500 + }, + { + "epoch": 35.98, + "learning_rate": 3.506105946421695e-08, + "loss": 3.7418, + "step": 3270000 + }, + { + "epoch": 35.98, + "learning_rate": 3.504730733263655e-08, + "loss": 3.7441, + "step": 3270500 + }, + { + "epoch": 35.99, + "learning_rate": 3.503355520105616e-08, + "loss": 3.7542, + "step": 3271000 + }, + { + "epoch": 35.99, + "learning_rate": 3.5019803069475765e-08, + "loss": 3.7456, + "step": 3271500 + }, + { + "epoch": 36.0, + "learning_rate": 3.500605093789537e-08, + "loss": 3.7628, + "step": 3272000 + }, + { + "epoch": 36.0, + "eval_loss": 3.830293655395508, + "eval_runtime": 6.1452, + "eval_samples_per_second": 252.88, + "step": 3272220 + }, + { + "epoch": 36.0, + "learning_rate": 3.499229880631498e-08, + "loss": 3.7572, + "step": 3272500 + }, + { + "epoch": 36.01, + "learning_rate": 3.497854667473458e-08, + "loss": 3.7541, + "step": 3273000 + }, + { + "epoch": 36.01, + "learning_rate": 3.496479454315418e-08, + "loss": 3.7611, + "step": 3273500 + }, + { + "epoch": 36.02, + "learning_rate": 3.495104241157379e-08, + "loss": 3.7494, + "step": 3274000 + }, + { + "epoch": 36.03, + "learning_rate": 3.4937290279993395e-08, + "loss": 3.7695, + "step": 3274500 + }, + { + "epoch": 36.03, + "learning_rate": 3.4923538148413004e-08, + "loss": 3.7574, + "step": 3275000 + }, + { + "epoch": 36.04, + "learning_rate": 3.490978601683261e-08, + "loss": 3.7651, + "step": 3275500 + }, + { + "epoch": 36.04, + "learning_rate": 3.4896033885252216e-08, + "loss": 3.7412, + "step": 3276000 + }, + { + "epoch": 36.05, + "learning_rate": 3.488228175367182e-08, + "loss": 3.76, + "step": 3276500 + }, + { + "epoch": 36.05, + "learning_rate": 3.486852962209142e-08, + "loss": 3.7484, + "step": 3277000 + }, + { + "epoch": 36.06, + "learning_rate": 3.485477749051103e-08, + "loss": 3.7741, + "step": 3277500 + }, + { + "epoch": 36.06, + "learning_rate": 3.4841025358930634e-08, + "loss": 3.7568, + "step": 3278000 + }, + { + "epoch": 36.07, + "learning_rate": 3.482727322735024e-08, + "loss": 3.7402, + "step": 3278500 + }, + { + "epoch": 36.07, + "learning_rate": 3.4813521095769846e-08, + "loss": 3.777, + "step": 3279000 + }, + { + "epoch": 36.08, + "learning_rate": 3.479976896418945e-08, + "loss": 3.7681, + "step": 3279500 + }, + { + "epoch": 36.09, + "learning_rate": 3.478601683260906e-08, + "loss": 3.7249, + "step": 3280000 + }, + { + "epoch": 36.09, + "learning_rate": 3.477226470102866e-08, + "loss": 3.7468, + "step": 3280500 + }, + { + "epoch": 36.1, + "learning_rate": 3.4758512569448263e-08, + "loss": 3.7586, + "step": 3281000 + }, + { + "epoch": 36.1, + "learning_rate": 3.474476043786787e-08, + "loss": 3.754, + "step": 3281500 + }, + { + "epoch": 36.11, + "learning_rate": 3.4731008306287475e-08, + "loss": 3.7811, + "step": 3282000 + }, + { + "epoch": 36.11, + "learning_rate": 3.471725617470708e-08, + "loss": 3.7615, + "step": 3282500 + }, + { + "epoch": 36.12, + "learning_rate": 3.470350404312669e-08, + "loss": 3.7866, + "step": 3283000 + }, + { + "epoch": 36.12, + "learning_rate": 3.468975191154629e-08, + "loss": 3.7567, + "step": 3283500 + }, + { + "epoch": 36.13, + "learning_rate": 3.467599977996589e-08, + "loss": 3.7655, + "step": 3284000 + }, + { + "epoch": 36.14, + "learning_rate": 3.46622476483855e-08, + "loss": 3.7372, + "step": 3284500 + }, + { + "epoch": 36.14, + "learning_rate": 3.4648495516805105e-08, + "loss": 3.749, + "step": 3285000 + }, + { + "epoch": 36.15, + "learning_rate": 3.463474338522471e-08, + "loss": 3.7512, + "step": 3285500 + }, + { + "epoch": 36.15, + "learning_rate": 3.462099125364432e-08, + "loss": 3.766, + "step": 3286000 + }, + { + "epoch": 36.16, + "learning_rate": 3.460723912206392e-08, + "loss": 3.7551, + "step": 3286500 + }, + { + "epoch": 36.16, + "learning_rate": 3.459348699048352e-08, + "loss": 3.7657, + "step": 3287000 + }, + { + "epoch": 36.17, + "learning_rate": 3.4579734858903125e-08, + "loss": 3.7636, + "step": 3287500 + }, + { + "epoch": 36.17, + "learning_rate": 3.4565982727322735e-08, + "loss": 3.7603, + "step": 3288000 + }, + { + "epoch": 36.18, + "learning_rate": 3.455223059574234e-08, + "loss": 3.7543, + "step": 3288500 + }, + { + "epoch": 36.18, + "learning_rate": 3.453847846416194e-08, + "loss": 3.756, + "step": 3289000 + }, + { + "epoch": 36.19, + "learning_rate": 3.452472633258155e-08, + "loss": 3.767, + "step": 3289500 + }, + { + "epoch": 36.2, + "learning_rate": 3.451097420100115e-08, + "loss": 3.7715, + "step": 3290000 + }, + { + "epoch": 36.2, + "learning_rate": 3.4497222069420755e-08, + "loss": 3.7453, + "step": 3290500 + }, + { + "epoch": 36.21, + "learning_rate": 3.4483469937840364e-08, + "loss": 3.7627, + "step": 3291000 + }, + { + "epoch": 36.21, + "learning_rate": 3.446971780625997e-08, + "loss": 3.7471, + "step": 3291500 + }, + { + "epoch": 36.22, + "learning_rate": 3.445596567467957e-08, + "loss": 3.7506, + "step": 3292000 + }, + { + "epoch": 36.22, + "learning_rate": 3.444221354309918e-08, + "loss": 3.7506, + "step": 3292500 + }, + { + "epoch": 36.23, + "learning_rate": 3.442846141151878e-08, + "loss": 3.7422, + "step": 3293000 + }, + { + "epoch": 36.23, + "learning_rate": 3.4414709279938384e-08, + "loss": 3.7496, + "step": 3293500 + }, + { + "epoch": 36.24, + "learning_rate": 3.4400957148357994e-08, + "loss": 3.7396, + "step": 3294000 + }, + { + "epoch": 36.25, + "learning_rate": 3.4387205016777596e-08, + "loss": 3.7716, + "step": 3294500 + }, + { + "epoch": 36.25, + "learning_rate": 3.43734528851972e-08, + "loss": 3.7551, + "step": 3295000 + }, + { + "epoch": 36.26, + "learning_rate": 3.435970075361681e-08, + "loss": 3.7586, + "step": 3295500 + }, + { + "epoch": 36.26, + "learning_rate": 3.434594862203641e-08, + "loss": 3.7584, + "step": 3296000 + }, + { + "epoch": 36.27, + "learning_rate": 3.4332196490456014e-08, + "loss": 3.7485, + "step": 3296500 + }, + { + "epoch": 36.27, + "learning_rate": 3.4318444358875623e-08, + "loss": 3.7678, + "step": 3297000 + }, + { + "epoch": 36.28, + "learning_rate": 3.4304692227295226e-08, + "loss": 3.7618, + "step": 3297500 + }, + { + "epoch": 36.28, + "learning_rate": 3.429094009571483e-08, + "loss": 3.7575, + "step": 3298000 + }, + { + "epoch": 36.29, + "learning_rate": 3.427718796413444e-08, + "loss": 3.773, + "step": 3298500 + }, + { + "epoch": 36.29, + "learning_rate": 3.426343583255404e-08, + "loss": 3.7468, + "step": 3299000 + }, + { + "epoch": 36.3, + "learning_rate": 3.424968370097365e-08, + "loss": 3.7639, + "step": 3299500 + }, + { + "epoch": 36.31, + "learning_rate": 3.423593156939325e-08, + "loss": 3.7435, + "step": 3300000 + }, + { + "epoch": 36.31, + "learning_rate": 3.422217943781286e-08, + "loss": 3.7592, + "step": 3300500 + }, + { + "epoch": 36.32, + "learning_rate": 3.4208427306232465e-08, + "loss": 3.7639, + "step": 3301000 + }, + { + "epoch": 36.32, + "learning_rate": 3.4194675174652074e-08, + "loss": 3.7449, + "step": 3301500 + }, + { + "epoch": 36.33, + "learning_rate": 3.418092304307168e-08, + "loss": 3.7646, + "step": 3302000 + }, + { + "epoch": 36.33, + "learning_rate": 3.416717091149128e-08, + "loss": 3.7578, + "step": 3302500 + }, + { + "epoch": 36.34, + "learning_rate": 3.415341877991089e-08, + "loss": 3.7541, + "step": 3303000 + }, + { + "epoch": 36.34, + "learning_rate": 3.413966664833049e-08, + "loss": 3.7837, + "step": 3303500 + }, + { + "epoch": 36.35, + "learning_rate": 3.4125914516750095e-08, + "loss": 3.7294, + "step": 3304000 + }, + { + "epoch": 36.36, + "learning_rate": 3.4112162385169704e-08, + "loss": 3.7509, + "step": 3304500 + }, + { + "epoch": 36.36, + "learning_rate": 3.4098410253589307e-08, + "loss": 3.7379, + "step": 3305000 + }, + { + "epoch": 36.37, + "learning_rate": 3.408465812200891e-08, + "loss": 3.761, + "step": 3305500 + }, + { + "epoch": 36.37, + "learning_rate": 3.407090599042852e-08, + "loss": 3.7543, + "step": 3306000 + }, + { + "epoch": 36.38, + "learning_rate": 3.405715385884812e-08, + "loss": 3.7695, + "step": 3306500 + }, + { + "epoch": 36.38, + "learning_rate": 3.4043401727267724e-08, + "loss": 3.7918, + "step": 3307000 + }, + { + "epoch": 36.39, + "learning_rate": 3.4029649595687334e-08, + "loss": 3.7618, + "step": 3307500 + }, + { + "epoch": 36.39, + "learning_rate": 3.4015897464106936e-08, + "loss": 3.7684, + "step": 3308000 + }, + { + "epoch": 36.4, + "learning_rate": 3.400214533252654e-08, + "loss": 3.7653, + "step": 3308500 + }, + { + "epoch": 36.4, + "learning_rate": 3.398839320094615e-08, + "loss": 3.759, + "step": 3309000 + }, + { + "epoch": 36.41, + "learning_rate": 3.397464106936575e-08, + "loss": 3.7535, + "step": 3309500 + }, + { + "epoch": 36.42, + "learning_rate": 3.3960888937785354e-08, + "loss": 3.7288, + "step": 3310000 + }, + { + "epoch": 36.42, + "learning_rate": 3.394713680620496e-08, + "loss": 3.7677, + "step": 3310500 + }, + { + "epoch": 36.43, + "learning_rate": 3.3933384674624566e-08, + "loss": 3.7574, + "step": 3311000 + }, + { + "epoch": 36.43, + "learning_rate": 3.391963254304417e-08, + "loss": 3.7572, + "step": 3311500 + }, + { + "epoch": 36.44, + "learning_rate": 3.390588041146378e-08, + "loss": 3.7484, + "step": 3312000 + }, + { + "epoch": 36.44, + "learning_rate": 3.389212827988338e-08, + "loss": 3.7417, + "step": 3312500 + }, + { + "epoch": 36.45, + "learning_rate": 3.3878376148302983e-08, + "loss": 3.751, + "step": 3313000 + }, + { + "epoch": 36.45, + "learning_rate": 3.386462401672259e-08, + "loss": 3.7607, + "step": 3313500 + }, + { + "epoch": 36.46, + "learning_rate": 3.3850871885142195e-08, + "loss": 3.7404, + "step": 3314000 + }, + { + "epoch": 36.47, + "learning_rate": 3.38371197535618e-08, + "loss": 3.7475, + "step": 3314500 + }, + { + "epoch": 36.47, + "learning_rate": 3.382336762198141e-08, + "loss": 3.7747, + "step": 3315000 + }, + { + "epoch": 36.48, + "learning_rate": 3.380961549040101e-08, + "loss": 3.7519, + "step": 3315500 + }, + { + "epoch": 36.48, + "learning_rate": 3.379586335882061e-08, + "loss": 3.7613, + "step": 3316000 + }, + { + "epoch": 36.49, + "learning_rate": 3.378211122724022e-08, + "loss": 3.7553, + "step": 3316500 + }, + { + "epoch": 36.49, + "learning_rate": 3.3768359095659825e-08, + "loss": 3.7538, + "step": 3317000 + }, + { + "epoch": 36.5, + "learning_rate": 3.375460696407943e-08, + "loss": 3.7644, + "step": 3317500 + }, + { + "epoch": 36.5, + "learning_rate": 3.374085483249904e-08, + "loss": 3.7701, + "step": 3318000 + }, + { + "epoch": 36.51, + "learning_rate": 3.372710270091864e-08, + "loss": 3.7645, + "step": 3318500 + }, + { + "epoch": 36.51, + "learning_rate": 3.371335056933824e-08, + "loss": 3.7694, + "step": 3319000 + }, + { + "epoch": 36.52, + "learning_rate": 3.369959843775785e-08, + "loss": 3.7549, + "step": 3319500 + }, + { + "epoch": 36.53, + "learning_rate": 3.3685846306177455e-08, + "loss": 3.7638, + "step": 3320000 + }, + { + "epoch": 36.53, + "learning_rate": 3.367209417459706e-08, + "loss": 3.74, + "step": 3320500 + }, + { + "epoch": 36.54, + "learning_rate": 3.3658342043016667e-08, + "loss": 3.7404, + "step": 3321000 + }, + { + "epoch": 36.54, + "learning_rate": 3.364458991143627e-08, + "loss": 3.7488, + "step": 3321500 + }, + { + "epoch": 36.55, + "learning_rate": 3.363083777985587e-08, + "loss": 3.7579, + "step": 3322000 + }, + { + "epoch": 36.55, + "learning_rate": 3.361708564827548e-08, + "loss": 3.7404, + "step": 3322500 + }, + { + "epoch": 36.56, + "learning_rate": 3.3603333516695084e-08, + "loss": 3.7581, + "step": 3323000 + }, + { + "epoch": 36.56, + "learning_rate": 3.3589581385114694e-08, + "loss": 3.7702, + "step": 3323500 + }, + { + "epoch": 36.57, + "learning_rate": 3.3575829253534296e-08, + "loss": 3.7504, + "step": 3324000 + }, + { + "epoch": 36.58, + "learning_rate": 3.35620771219539e-08, + "loss": 3.7651, + "step": 3324500 + }, + { + "epoch": 36.58, + "learning_rate": 3.354832499037351e-08, + "loss": 3.7689, + "step": 3325000 + }, + { + "epoch": 36.59, + "learning_rate": 3.353457285879311e-08, + "loss": 3.7499, + "step": 3325500 + }, + { + "epoch": 36.59, + "learning_rate": 3.352082072721272e-08, + "loss": 3.7553, + "step": 3326000 + }, + { + "epoch": 36.6, + "learning_rate": 3.350706859563232e-08, + "loss": 3.7384, + "step": 3326500 + }, + { + "epoch": 36.6, + "learning_rate": 3.3493316464051926e-08, + "loss": 3.7559, + "step": 3327000 + }, + { + "epoch": 36.61, + "learning_rate": 3.3479564332471535e-08, + "loss": 3.7568, + "step": 3327500 + }, + { + "epoch": 36.61, + "learning_rate": 3.346581220089114e-08, + "loss": 3.7664, + "step": 3328000 + }, + { + "epoch": 36.62, + "learning_rate": 3.345206006931074e-08, + "loss": 3.7485, + "step": 3328500 + }, + { + "epoch": 36.62, + "learning_rate": 3.343830793773035e-08, + "loss": 3.7721, + "step": 3329000 + }, + { + "epoch": 36.63, + "learning_rate": 3.342455580614995e-08, + "loss": 3.767, + "step": 3329500 + }, + { + "epoch": 36.64, + "learning_rate": 3.3410803674569555e-08, + "loss": 3.7522, + "step": 3330000 + }, + { + "epoch": 36.64, + "learning_rate": 3.3397051542989165e-08, + "loss": 3.7549, + "step": 3330500 + }, + { + "epoch": 36.65, + "learning_rate": 3.338329941140877e-08, + "loss": 3.7631, + "step": 3331000 + }, + { + "epoch": 36.65, + "learning_rate": 3.336954727982837e-08, + "loss": 3.7691, + "step": 3331500 + }, + { + "epoch": 36.66, + "learning_rate": 3.335579514824798e-08, + "loss": 3.7474, + "step": 3332000 + }, + { + "epoch": 36.66, + "learning_rate": 3.334204301666758e-08, + "loss": 3.7603, + "step": 3332500 + }, + { + "epoch": 36.67, + "learning_rate": 3.3328290885087185e-08, + "loss": 3.76, + "step": 3333000 + }, + { + "epoch": 36.67, + "learning_rate": 3.3314538753506794e-08, + "loss": 3.757, + "step": 3333500 + }, + { + "epoch": 36.68, + "learning_rate": 3.33007866219264e-08, + "loss": 3.754, + "step": 3334000 + }, + { + "epoch": 36.69, + "learning_rate": 3.3287034490346e-08, + "loss": 3.7662, + "step": 3334500 + }, + { + "epoch": 36.69, + "learning_rate": 3.327328235876561e-08, + "loss": 3.7687, + "step": 3335000 + }, + { + "epoch": 36.7, + "learning_rate": 3.325953022718521e-08, + "loss": 3.7468, + "step": 3335500 + }, + { + "epoch": 36.7, + "learning_rate": 3.3245778095604815e-08, + "loss": 3.7632, + "step": 3336000 + }, + { + "epoch": 36.71, + "learning_rate": 3.3232025964024424e-08, + "loss": 3.7748, + "step": 3336500 + }, + { + "epoch": 36.71, + "learning_rate": 3.321827383244403e-08, + "loss": 3.7645, + "step": 3337000 + }, + { + "epoch": 36.72, + "learning_rate": 3.320452170086363e-08, + "loss": 3.7481, + "step": 3337500 + }, + { + "epoch": 36.72, + "learning_rate": 3.319076956928324e-08, + "loss": 3.7526, + "step": 3338000 + }, + { + "epoch": 36.73, + "learning_rate": 3.317701743770284e-08, + "loss": 3.7492, + "step": 3338500 + }, + { + "epoch": 36.73, + "learning_rate": 3.3163265306122444e-08, + "loss": 3.7478, + "step": 3339000 + }, + { + "epoch": 36.74, + "learning_rate": 3.3149513174542054e-08, + "loss": 3.753, + "step": 3339500 + }, + { + "epoch": 36.75, + "learning_rate": 3.3135761042961656e-08, + "loss": 3.7598, + "step": 3340000 + }, + { + "epoch": 36.75, + "learning_rate": 3.312200891138126e-08, + "loss": 3.7573, + "step": 3340500 + }, + { + "epoch": 36.76, + "learning_rate": 3.310825677980087e-08, + "loss": 3.7614, + "step": 3341000 + }, + { + "epoch": 36.76, + "learning_rate": 3.309450464822047e-08, + "loss": 3.7612, + "step": 3341500 + }, + { + "epoch": 36.77, + "learning_rate": 3.3080752516640074e-08, + "loss": 3.7601, + "step": 3342000 + }, + { + "epoch": 36.77, + "learning_rate": 3.306700038505968e-08, + "loss": 3.733, + "step": 3342500 + }, + { + "epoch": 36.78, + "learning_rate": 3.3053248253479286e-08, + "loss": 3.7733, + "step": 3343000 + }, + { + "epoch": 36.78, + "learning_rate": 3.303949612189889e-08, + "loss": 3.7624, + "step": 3343500 + }, + { + "epoch": 36.79, + "learning_rate": 3.30257439903185e-08, + "loss": 3.7838, + "step": 3344000 + }, + { + "epoch": 36.8, + "learning_rate": 3.30119918587381e-08, + "loss": 3.7641, + "step": 3344500 + }, + { + "epoch": 36.8, + "learning_rate": 3.2998239727157703e-08, + "loss": 3.7617, + "step": 3345000 + }, + { + "epoch": 36.81, + "learning_rate": 3.298448759557731e-08, + "loss": 3.7623, + "step": 3345500 + }, + { + "epoch": 36.81, + "learning_rate": 3.2970735463996915e-08, + "loss": 3.7417, + "step": 3346000 + }, + { + "epoch": 36.82, + "learning_rate": 3.295698333241652e-08, + "loss": 3.7438, + "step": 3346500 + }, + { + "epoch": 36.82, + "learning_rate": 3.294323120083613e-08, + "loss": 3.7599, + "step": 3347000 + }, + { + "epoch": 36.83, + "learning_rate": 3.292947906925573e-08, + "loss": 3.764, + "step": 3347500 + }, + { + "epoch": 36.83, + "learning_rate": 3.291572693767534e-08, + "loss": 3.74, + "step": 3348000 + }, + { + "epoch": 36.84, + "learning_rate": 3.290197480609494e-08, + "loss": 3.7732, + "step": 3348500 + }, + { + "epoch": 36.84, + "learning_rate": 3.288822267451455e-08, + "loss": 3.765, + "step": 3349000 + }, + { + "epoch": 36.85, + "learning_rate": 3.2874470542934154e-08, + "loss": 3.7756, + "step": 3349500 + }, + { + "epoch": 36.86, + "learning_rate": 3.2860718411353764e-08, + "loss": 3.7496, + "step": 3350000 + }, + { + "epoch": 36.86, + "learning_rate": 3.2846966279773366e-08, + "loss": 3.7548, + "step": 3350500 + }, + { + "epoch": 36.87, + "learning_rate": 3.283321414819297e-08, + "loss": 3.7471, + "step": 3351000 + }, + { + "epoch": 36.87, + "learning_rate": 3.281946201661258e-08, + "loss": 3.769, + "step": 3351500 + }, + { + "epoch": 36.88, + "learning_rate": 3.280570988503218e-08, + "loss": 3.7452, + "step": 3352000 + }, + { + "epoch": 36.88, + "learning_rate": 3.2791957753451784e-08, + "loss": 3.755, + "step": 3352500 + }, + { + "epoch": 36.89, + "learning_rate": 3.2778205621871393e-08, + "loss": 3.7629, + "step": 3353000 + }, + { + "epoch": 36.89, + "learning_rate": 3.2764453490290996e-08, + "loss": 3.754, + "step": 3353500 + }, + { + "epoch": 36.9, + "learning_rate": 3.27507013587106e-08, + "loss": 3.7626, + "step": 3354000 + }, + { + "epoch": 36.91, + "learning_rate": 3.273694922713021e-08, + "loss": 3.7692, + "step": 3354500 + }, + { + "epoch": 36.91, + "learning_rate": 3.272319709554981e-08, + "loss": 3.7536, + "step": 3355000 + }, + { + "epoch": 36.92, + "learning_rate": 3.2709444963969414e-08, + "loss": 3.7732, + "step": 3355500 + }, + { + "epoch": 36.92, + "learning_rate": 3.269569283238902e-08, + "loss": 3.7312, + "step": 3356000 + }, + { + "epoch": 36.93, + "learning_rate": 3.2681940700808626e-08, + "loss": 3.7441, + "step": 3356500 + }, + { + "epoch": 36.93, + "learning_rate": 3.266818856922823e-08, + "loss": 3.7718, + "step": 3357000 + }, + { + "epoch": 36.94, + "learning_rate": 3.265443643764784e-08, + "loss": 3.7594, + "step": 3357500 + }, + { + "epoch": 36.94, + "learning_rate": 3.264068430606744e-08, + "loss": 3.7664, + "step": 3358000 + }, + { + "epoch": 36.95, + "learning_rate": 3.262693217448704e-08, + "loss": 3.7581, + "step": 3358500 + }, + { + "epoch": 36.95, + "learning_rate": 3.261318004290665e-08, + "loss": 3.7538, + "step": 3359000 + }, + { + "epoch": 36.96, + "learning_rate": 3.2599427911326255e-08, + "loss": 3.7542, + "step": 3359500 + }, + { + "epoch": 36.97, + "learning_rate": 3.258567577974586e-08, + "loss": 3.753, + "step": 3360000 + }, + { + "epoch": 36.97, + "learning_rate": 3.257192364816547e-08, + "loss": 3.7576, + "step": 3360500 + }, + { + "epoch": 36.98, + "learning_rate": 3.255817151658507e-08, + "loss": 3.757, + "step": 3361000 + }, + { + "epoch": 36.98, + "learning_rate": 3.254441938500467e-08, + "loss": 3.749, + "step": 3361500 + }, + { + "epoch": 36.99, + "learning_rate": 3.2530667253424276e-08, + "loss": 3.7588, + "step": 3362000 + }, + { + "epoch": 36.99, + "learning_rate": 3.2516915121843885e-08, + "loss": 3.759, + "step": 3362500 + }, + { + "epoch": 37.0, + "learning_rate": 3.250316299026349e-08, + "loss": 3.7614, + "step": 3363000 + }, + { + "epoch": 37.0, + "eval_loss": 3.8296761512756348, + "eval_runtime": 6.1458, + "eval_samples_per_second": 252.856, + "step": 3363115 + }, + { + "epoch": 37.0, + "learning_rate": 3.248941085868309e-08, + "loss": 3.7603, + "step": 3363500 + }, + { + "epoch": 37.01, + "learning_rate": 3.24756587271027e-08, + "loss": 3.7573, + "step": 3364000 + }, + { + "epoch": 37.02, + "learning_rate": 3.24619065955223e-08, + "loss": 3.7588, + "step": 3364500 + }, + { + "epoch": 37.02, + "learning_rate": 3.2448154463941905e-08, + "loss": 3.755, + "step": 3365000 + }, + { + "epoch": 37.03, + "learning_rate": 3.2434402332361514e-08, + "loss": 3.7495, + "step": 3365500 + }, + { + "epoch": 37.03, + "learning_rate": 3.242065020078112e-08, + "loss": 3.769, + "step": 3366000 + }, + { + "epoch": 37.04, + "learning_rate": 3.240689806920072e-08, + "loss": 3.7485, + "step": 3366500 + }, + { + "epoch": 37.04, + "learning_rate": 3.239314593762033e-08, + "loss": 3.7702, + "step": 3367000 + }, + { + "epoch": 37.05, + "learning_rate": 3.237939380603993e-08, + "loss": 3.763, + "step": 3367500 + }, + { + "epoch": 37.05, + "learning_rate": 3.2365641674459535e-08, + "loss": 3.743, + "step": 3368000 + }, + { + "epoch": 37.06, + "learning_rate": 3.2351889542879144e-08, + "loss": 3.752, + "step": 3368500 + }, + { + "epoch": 37.06, + "learning_rate": 3.233813741129875e-08, + "loss": 3.7524, + "step": 3369000 + }, + { + "epoch": 37.07, + "learning_rate": 3.232438527971835e-08, + "loss": 3.7663, + "step": 3369500 + }, + { + "epoch": 37.08, + "learning_rate": 3.231063314813796e-08, + "loss": 3.77, + "step": 3370000 + }, + { + "epoch": 37.08, + "learning_rate": 3.229688101655756e-08, + "loss": 3.7668, + "step": 3370500 + }, + { + "epoch": 37.09, + "learning_rate": 3.228312888497717e-08, + "loss": 3.749, + "step": 3371000 + }, + { + "epoch": 37.09, + "learning_rate": 3.2269376753396774e-08, + "loss": 3.7427, + "step": 3371500 + }, + { + "epoch": 37.1, + "learning_rate": 3.2255624621816376e-08, + "loss": 3.75, + "step": 3372000 + }, + { + "epoch": 37.1, + "learning_rate": 3.2241872490235986e-08, + "loss": 3.7652, + "step": 3372500 + }, + { + "epoch": 37.11, + "learning_rate": 3.222812035865559e-08, + "loss": 3.7411, + "step": 3373000 + }, + { + "epoch": 37.11, + "learning_rate": 3.22143682270752e-08, + "loss": 3.7549, + "step": 3373500 + }, + { + "epoch": 37.12, + "learning_rate": 3.22006160954948e-08, + "loss": 3.7574, + "step": 3374000 + }, + { + "epoch": 37.13, + "learning_rate": 3.218686396391441e-08, + "loss": 3.7368, + "step": 3374500 + }, + { + "epoch": 37.13, + "learning_rate": 3.217311183233401e-08, + "loss": 3.7368, + "step": 3375000 + }, + { + "epoch": 37.14, + "learning_rate": 3.2159359700753615e-08, + "loss": 3.7486, + "step": 3375500 + }, + { + "epoch": 37.14, + "learning_rate": 3.2145607569173225e-08, + "loss": 3.7401, + "step": 3376000 + }, + { + "epoch": 37.15, + "learning_rate": 3.213185543759283e-08, + "loss": 3.7667, + "step": 3376500 + }, + { + "epoch": 37.15, + "learning_rate": 3.211810330601243e-08, + "loss": 3.7564, + "step": 3377000 + }, + { + "epoch": 37.16, + "learning_rate": 3.210435117443204e-08, + "loss": 3.7523, + "step": 3377500 + }, + { + "epoch": 37.16, + "learning_rate": 3.209059904285164e-08, + "loss": 3.7384, + "step": 3378000 + }, + { + "epoch": 37.17, + "learning_rate": 3.2076846911271245e-08, + "loss": 3.7569, + "step": 3378500 + }, + { + "epoch": 37.17, + "learning_rate": 3.2063094779690854e-08, + "loss": 3.7702, + "step": 3379000 + }, + { + "epoch": 37.18, + "learning_rate": 3.204934264811046e-08, + "loss": 3.7692, + "step": 3379500 + }, + { + "epoch": 37.19, + "learning_rate": 3.203559051653006e-08, + "loss": 3.7377, + "step": 3380000 + }, + { + "epoch": 37.19, + "learning_rate": 3.202183838494967e-08, + "loss": 3.7749, + "step": 3380500 + }, + { + "epoch": 37.2, + "learning_rate": 3.200808625336927e-08, + "loss": 3.7511, + "step": 3381000 + }, + { + "epoch": 37.2, + "learning_rate": 3.1994334121788874e-08, + "loss": 3.754, + "step": 3381500 + }, + { + "epoch": 37.21, + "learning_rate": 3.1980581990208484e-08, + "loss": 3.7647, + "step": 3382000 + }, + { + "epoch": 37.21, + "learning_rate": 3.1966829858628087e-08, + "loss": 3.7611, + "step": 3382500 + }, + { + "epoch": 37.22, + "learning_rate": 3.195307772704769e-08, + "loss": 3.7646, + "step": 3383000 + }, + { + "epoch": 37.22, + "learning_rate": 3.19393255954673e-08, + "loss": 3.76, + "step": 3383500 + }, + { + "epoch": 37.23, + "learning_rate": 3.19255734638869e-08, + "loss": 3.7569, + "step": 3384000 + }, + { + "epoch": 37.24, + "learning_rate": 3.1911821332306504e-08, + "loss": 3.7556, + "step": 3384500 + }, + { + "epoch": 37.24, + "learning_rate": 3.1898069200726113e-08, + "loss": 3.7725, + "step": 3385000 + }, + { + "epoch": 37.25, + "learning_rate": 3.1884317069145716e-08, + "loss": 3.7548, + "step": 3385500 + }, + { + "epoch": 37.25, + "learning_rate": 3.187056493756532e-08, + "loss": 3.7667, + "step": 3386000 + }, + { + "epoch": 37.26, + "learning_rate": 3.185681280598493e-08, + "loss": 3.7431, + "step": 3386500 + }, + { + "epoch": 37.26, + "learning_rate": 3.184306067440453e-08, + "loss": 3.768, + "step": 3387000 + }, + { + "epoch": 37.27, + "learning_rate": 3.1829308542824134e-08, + "loss": 3.7647, + "step": 3387500 + }, + { + "epoch": 37.27, + "learning_rate": 3.181555641124374e-08, + "loss": 3.7518, + "step": 3388000 + }, + { + "epoch": 37.28, + "learning_rate": 3.1801804279663346e-08, + "loss": 3.7596, + "step": 3388500 + }, + { + "epoch": 37.28, + "learning_rate": 3.178805214808295e-08, + "loss": 3.7602, + "step": 3389000 + }, + { + "epoch": 37.29, + "learning_rate": 3.177430001650256e-08, + "loss": 3.786, + "step": 3389500 + }, + { + "epoch": 37.3, + "learning_rate": 3.176054788492216e-08, + "loss": 3.7581, + "step": 3390000 + }, + { + "epoch": 37.3, + "learning_rate": 3.174679575334176e-08, + "loss": 3.7483, + "step": 3390500 + }, + { + "epoch": 37.31, + "learning_rate": 3.173304362176137e-08, + "loss": 3.7434, + "step": 3391000 + }, + { + "epoch": 37.31, + "learning_rate": 3.1719291490180975e-08, + "loss": 3.7517, + "step": 3391500 + }, + { + "epoch": 37.32, + "learning_rate": 3.170553935860058e-08, + "loss": 3.7596, + "step": 3392000 + }, + { + "epoch": 37.32, + "learning_rate": 3.169178722702019e-08, + "loss": 3.7699, + "step": 3392500 + }, + { + "epoch": 37.33, + "learning_rate": 3.167803509543979e-08, + "loss": 3.7546, + "step": 3393000 + }, + { + "epoch": 37.33, + "learning_rate": 3.166428296385939e-08, + "loss": 3.7551, + "step": 3393500 + }, + { + "epoch": 37.34, + "learning_rate": 3.1650530832279e-08, + "loss": 3.7466, + "step": 3394000 + }, + { + "epoch": 37.35, + "learning_rate": 3.1636778700698605e-08, + "loss": 3.7481, + "step": 3394500 + }, + { + "epoch": 37.35, + "learning_rate": 3.162302656911821e-08, + "loss": 3.7817, + "step": 3395000 + }, + { + "epoch": 37.36, + "learning_rate": 3.160927443753782e-08, + "loss": 3.768, + "step": 3395500 + }, + { + "epoch": 37.36, + "learning_rate": 3.159552230595742e-08, + "loss": 3.7507, + "step": 3396000 + }, + { + "epoch": 37.37, + "learning_rate": 3.158177017437703e-08, + "loss": 3.7723, + "step": 3396500 + }, + { + "epoch": 37.37, + "learning_rate": 3.156801804279663e-08, + "loss": 3.7692, + "step": 3397000 + }, + { + "epoch": 37.38, + "learning_rate": 3.155426591121624e-08, + "loss": 3.7483, + "step": 3397500 + }, + { + "epoch": 37.38, + "learning_rate": 3.1540513779635844e-08, + "loss": 3.7496, + "step": 3398000 + }, + { + "epoch": 37.39, + "learning_rate": 3.152676164805545e-08, + "loss": 3.7613, + "step": 3398500 + }, + { + "epoch": 37.39, + "learning_rate": 3.1513009516475056e-08, + "loss": 3.7695, + "step": 3399000 + }, + { + "epoch": 37.4, + "learning_rate": 3.149925738489466e-08, + "loss": 3.7552, + "step": 3399500 + }, + { + "epoch": 37.41, + "learning_rate": 3.148550525331426e-08, + "loss": 3.7689, + "step": 3400000 + }, + { + "epoch": 37.41, + "learning_rate": 3.147175312173387e-08, + "loss": 3.7375, + "step": 3400500 + }, + { + "epoch": 37.42, + "learning_rate": 3.1458000990153473e-08, + "loss": 3.7754, + "step": 3401000 + }, + { + "epoch": 37.42, + "learning_rate": 3.1444248858573076e-08, + "loss": 3.7627, + "step": 3401500 + }, + { + "epoch": 37.43, + "learning_rate": 3.1430496726992686e-08, + "loss": 3.7679, + "step": 3402000 + }, + { + "epoch": 37.43, + "learning_rate": 3.141674459541229e-08, + "loss": 3.7565, + "step": 3402500 + }, + { + "epoch": 37.44, + "learning_rate": 3.140299246383189e-08, + "loss": 3.7455, + "step": 3403000 + }, + { + "epoch": 37.44, + "learning_rate": 3.13892403322515e-08, + "loss": 3.7591, + "step": 3403500 + }, + { + "epoch": 37.45, + "learning_rate": 3.13754882006711e-08, + "loss": 3.7735, + "step": 3404000 + }, + { + "epoch": 37.46, + "learning_rate": 3.1361736069090706e-08, + "loss": 3.7531, + "step": 3404500 + }, + { + "epoch": 37.46, + "learning_rate": 3.1347983937510315e-08, + "loss": 3.7574, + "step": 3405000 + }, + { + "epoch": 37.47, + "learning_rate": 3.133423180592992e-08, + "loss": 3.7464, + "step": 3405500 + }, + { + "epoch": 37.47, + "learning_rate": 3.132047967434952e-08, + "loss": 3.766, + "step": 3406000 + }, + { + "epoch": 37.48, + "learning_rate": 3.130672754276913e-08, + "loss": 3.7378, + "step": 3406500 + }, + { + "epoch": 37.48, + "learning_rate": 3.129297541118873e-08, + "loss": 3.7488, + "step": 3407000 + }, + { + "epoch": 37.49, + "learning_rate": 3.1279223279608335e-08, + "loss": 3.777, + "step": 3407500 + }, + { + "epoch": 37.49, + "learning_rate": 3.1265471148027945e-08, + "loss": 3.7627, + "step": 3408000 + }, + { + "epoch": 37.5, + "learning_rate": 3.125171901644755e-08, + "loss": 3.7484, + "step": 3408500 + }, + { + "epoch": 37.5, + "learning_rate": 3.123796688486715e-08, + "loss": 3.7706, + "step": 3409000 + }, + { + "epoch": 37.51, + "learning_rate": 3.122421475328676e-08, + "loss": 3.7523, + "step": 3409500 + }, + { + "epoch": 37.52, + "learning_rate": 3.121046262170636e-08, + "loss": 3.756, + "step": 3410000 + }, + { + "epoch": 37.52, + "learning_rate": 3.1196710490125965e-08, + "loss": 3.7488, + "step": 3410500 + }, + { + "epoch": 37.53, + "learning_rate": 3.1182958358545574e-08, + "loss": 3.7655, + "step": 3411000 + }, + { + "epoch": 37.53, + "learning_rate": 3.116920622696518e-08, + "loss": 3.7671, + "step": 3411500 + }, + { + "epoch": 37.54, + "learning_rate": 3.115545409538478e-08, + "loss": 3.7543, + "step": 3412000 + }, + { + "epoch": 37.54, + "learning_rate": 3.114170196380439e-08, + "loss": 3.754, + "step": 3412500 + }, + { + "epoch": 37.55, + "learning_rate": 3.112794983222399e-08, + "loss": 3.7609, + "step": 3413000 + }, + { + "epoch": 37.55, + "learning_rate": 3.1114197700643595e-08, + "loss": 3.7599, + "step": 3413500 + }, + { + "epoch": 37.56, + "learning_rate": 3.1100445569063204e-08, + "loss": 3.7916, + "step": 3414000 + }, + { + "epoch": 37.57, + "learning_rate": 3.1086693437482807e-08, + "loss": 3.7638, + "step": 3414500 + }, + { + "epoch": 37.57, + "learning_rate": 3.107294130590241e-08, + "loss": 3.7652, + "step": 3415000 + }, + { + "epoch": 37.58, + "learning_rate": 3.105918917432202e-08, + "loss": 3.7718, + "step": 3415500 + }, + { + "epoch": 37.58, + "learning_rate": 3.104543704274162e-08, + "loss": 3.7686, + "step": 3416000 + }, + { + "epoch": 37.59, + "learning_rate": 3.1031684911161224e-08, + "loss": 3.7525, + "step": 3416500 + }, + { + "epoch": 37.59, + "learning_rate": 3.1017932779580833e-08, + "loss": 3.7379, + "step": 3417000 + }, + { + "epoch": 37.6, + "learning_rate": 3.1004180648000436e-08, + "loss": 3.7551, + "step": 3417500 + }, + { + "epoch": 37.6, + "learning_rate": 3.099042851642004e-08, + "loss": 3.7739, + "step": 3418000 + }, + { + "epoch": 37.61, + "learning_rate": 3.097667638483965e-08, + "loss": 3.738, + "step": 3418500 + }, + { + "epoch": 37.61, + "learning_rate": 3.096292425325925e-08, + "loss": 3.7457, + "step": 3419000 + }, + { + "epoch": 37.62, + "learning_rate": 3.094917212167886e-08, + "loss": 3.7526, + "step": 3419500 + }, + { + "epoch": 37.63, + "learning_rate": 3.093541999009846e-08, + "loss": 3.7446, + "step": 3420000 + }, + { + "epoch": 37.63, + "learning_rate": 3.0921667858518066e-08, + "loss": 3.7633, + "step": 3420500 + }, + { + "epoch": 37.64, + "learning_rate": 3.0907915726937675e-08, + "loss": 3.7568, + "step": 3421000 + }, + { + "epoch": 37.64, + "learning_rate": 3.089416359535728e-08, + "loss": 3.7442, + "step": 3421500 + }, + { + "epoch": 37.65, + "learning_rate": 3.088041146377689e-08, + "loss": 3.74, + "step": 3422000 + }, + { + "epoch": 37.65, + "learning_rate": 3.086665933219649e-08, + "loss": 3.7407, + "step": 3422500 + }, + { + "epoch": 37.66, + "learning_rate": 3.08529072006161e-08, + "loss": 3.7679, + "step": 3423000 + }, + { + "epoch": 37.66, + "learning_rate": 3.08391550690357e-08, + "loss": 3.7464, + "step": 3423500 + }, + { + "epoch": 37.67, + "learning_rate": 3.0825402937455305e-08, + "loss": 3.747, + "step": 3424000 + }, + { + "epoch": 37.68, + "learning_rate": 3.0811650805874914e-08, + "loss": 3.7591, + "step": 3424500 + }, + { + "epoch": 37.68, + "learning_rate": 3.079789867429452e-08, + "loss": 3.7494, + "step": 3425000 + }, + { + "epoch": 37.69, + "learning_rate": 3.078414654271412e-08, + "loss": 3.7703, + "step": 3425500 + }, + { + "epoch": 37.69, + "learning_rate": 3.077039441113373e-08, + "loss": 3.7678, + "step": 3426000 + }, + { + "epoch": 37.7, + "learning_rate": 3.075664227955333e-08, + "loss": 3.7448, + "step": 3426500 + }, + { + "epoch": 37.7, + "learning_rate": 3.0742890147972934e-08, + "loss": 3.7676, + "step": 3427000 + }, + { + "epoch": 37.71, + "learning_rate": 3.0729138016392544e-08, + "loss": 3.7554, + "step": 3427500 + }, + { + "epoch": 37.71, + "learning_rate": 3.0715385884812146e-08, + "loss": 3.7644, + "step": 3428000 + }, + { + "epoch": 37.72, + "learning_rate": 3.070163375323175e-08, + "loss": 3.741, + "step": 3428500 + }, + { + "epoch": 37.72, + "learning_rate": 3.068788162165136e-08, + "loss": 3.7481, + "step": 3429000 + }, + { + "epoch": 37.73, + "learning_rate": 3.067412949007096e-08, + "loss": 3.7594, + "step": 3429500 + }, + { + "epoch": 37.74, + "learning_rate": 3.0660377358490564e-08, + "loss": 3.7575, + "step": 3430000 + }, + { + "epoch": 37.74, + "learning_rate": 3.064662522691017e-08, + "loss": 3.7572, + "step": 3430500 + }, + { + "epoch": 37.75, + "learning_rate": 3.0632873095329776e-08, + "loss": 3.773, + "step": 3431000 + }, + { + "epoch": 37.75, + "learning_rate": 3.061912096374938e-08, + "loss": 3.76, + "step": 3431500 + }, + { + "epoch": 37.76, + "learning_rate": 3.060536883216899e-08, + "loss": 3.7595, + "step": 3432000 + }, + { + "epoch": 37.76, + "learning_rate": 3.059161670058859e-08, + "loss": 3.7229, + "step": 3432500 + }, + { + "epoch": 37.77, + "learning_rate": 3.0577864569008193e-08, + "loss": 3.7395, + "step": 3433000 + }, + { + "epoch": 37.77, + "learning_rate": 3.05641124374278e-08, + "loss": 3.7567, + "step": 3433500 + }, + { + "epoch": 37.78, + "learning_rate": 3.0550360305847406e-08, + "loss": 3.7415, + "step": 3434000 + }, + { + "epoch": 37.79, + "learning_rate": 3.053660817426701e-08, + "loss": 3.7541, + "step": 3434500 + }, + { + "epoch": 37.79, + "learning_rate": 3.052285604268662e-08, + "loss": 3.7471, + "step": 3435000 + }, + { + "epoch": 37.8, + "learning_rate": 3.050910391110622e-08, + "loss": 3.76, + "step": 3435500 + }, + { + "epoch": 37.8, + "learning_rate": 3.049535177952582e-08, + "loss": 3.751, + "step": 3436000 + }, + { + "epoch": 37.81, + "learning_rate": 3.048159964794543e-08, + "loss": 3.7786, + "step": 3436500 + }, + { + "epoch": 37.81, + "learning_rate": 3.0467847516365035e-08, + "loss": 3.7555, + "step": 3437000 + }, + { + "epoch": 37.82, + "learning_rate": 3.045409538478464e-08, + "loss": 3.7446, + "step": 3437500 + }, + { + "epoch": 37.82, + "learning_rate": 3.044034325320424e-08, + "loss": 3.7888, + "step": 3438000 + }, + { + "epoch": 37.83, + "learning_rate": 3.042659112162385e-08, + "loss": 3.7453, + "step": 3438500 + }, + { + "epoch": 37.83, + "learning_rate": 3.041283899004345e-08, + "loss": 3.7538, + "step": 3439000 + }, + { + "epoch": 37.84, + "learning_rate": 3.0399086858463055e-08, + "loss": 3.7609, + "step": 3439500 + }, + { + "epoch": 37.85, + "learning_rate": 3.0385334726882665e-08, + "loss": 3.7641, + "step": 3440000 + }, + { + "epoch": 37.85, + "learning_rate": 3.037158259530227e-08, + "loss": 3.7627, + "step": 3440500 + }, + { + "epoch": 37.86, + "learning_rate": 3.035783046372187e-08, + "loss": 3.7684, + "step": 3441000 + }, + { + "epoch": 37.86, + "learning_rate": 3.034407833214148e-08, + "loss": 3.7567, + "step": 3441500 + }, + { + "epoch": 37.87, + "learning_rate": 3.033032620056108e-08, + "loss": 3.7295, + "step": 3442000 + }, + { + "epoch": 37.87, + "learning_rate": 3.0316574068980685e-08, + "loss": 3.7563, + "step": 3442500 + }, + { + "epoch": 37.88, + "learning_rate": 3.0302821937400294e-08, + "loss": 3.7524, + "step": 3443000 + }, + { + "epoch": 37.88, + "learning_rate": 3.02890698058199e-08, + "loss": 3.7432, + "step": 3443500 + }, + { + "epoch": 37.89, + "learning_rate": 3.0275317674239506e-08, + "loss": 3.7419, + "step": 3444000 + }, + { + "epoch": 37.9, + "learning_rate": 3.026156554265911e-08, + "loss": 3.7552, + "step": 3444500 + }, + { + "epoch": 37.9, + "learning_rate": 3.024781341107872e-08, + "loss": 3.7488, + "step": 3445000 + }, + { + "epoch": 37.91, + "learning_rate": 3.023406127949832e-08, + "loss": 3.7537, + "step": 3445500 + }, + { + "epoch": 37.91, + "learning_rate": 3.022030914791793e-08, + "loss": 3.7605, + "step": 3446000 + }, + { + "epoch": 37.92, + "learning_rate": 3.020655701633753e-08, + "loss": 3.754, + "step": 3446500 + }, + { + "epoch": 37.92, + "learning_rate": 3.0192804884757136e-08, + "loss": 3.7469, + "step": 3447000 + }, + { + "epoch": 37.93, + "learning_rate": 3.0179052753176745e-08, + "loss": 3.7449, + "step": 3447500 + }, + { + "epoch": 37.93, + "learning_rate": 3.016530062159635e-08, + "loss": 3.7657, + "step": 3448000 + }, + { + "epoch": 37.94, + "learning_rate": 3.015154849001595e-08, + "loss": 3.7562, + "step": 3448500 + }, + { + "epoch": 37.94, + "learning_rate": 3.013779635843556e-08, + "loss": 3.7506, + "step": 3449000 + }, + { + "epoch": 37.95, + "learning_rate": 3.012404422685516e-08, + "loss": 3.7683, + "step": 3449500 + }, + { + "epoch": 37.96, + "learning_rate": 3.0110292095274766e-08, + "loss": 3.7657, + "step": 3450000 + }, + { + "epoch": 37.96, + "learning_rate": 3.0096539963694375e-08, + "loss": 3.734, + "step": 3450500 + }, + { + "epoch": 37.97, + "learning_rate": 3.008278783211398e-08, + "loss": 3.7426, + "step": 3451000 + }, + { + "epoch": 37.97, + "learning_rate": 3.006903570053358e-08, + "loss": 3.772, + "step": 3451500 + }, + { + "epoch": 37.98, + "learning_rate": 3.005528356895319e-08, + "loss": 3.743, + "step": 3452000 + }, + { + "epoch": 37.98, + "learning_rate": 3.004153143737279e-08, + "loss": 3.7583, + "step": 3452500 + }, + { + "epoch": 37.99, + "learning_rate": 3.0027779305792395e-08, + "loss": 3.7463, + "step": 3453000 + }, + { + "epoch": 37.99, + "learning_rate": 3.0014027174212005e-08, + "loss": 3.7617, + "step": 3453500 + }, + { + "epoch": 38.0, + "learning_rate": 3.000027504263161e-08, + "loss": 3.7398, + "step": 3454000 + }, + { + "epoch": 38.0, + "eval_loss": 3.8288087844848633, + "eval_runtime": 6.1441, + "eval_samples_per_second": 252.924, + "step": 3454010 + }, + { + "epoch": 38.01, + "learning_rate": 2.998652291105121e-08, + "loss": 3.747, + "step": 3454500 + }, + { + "epoch": 38.01, + "learning_rate": 2.997277077947082e-08, + "loss": 3.7693, + "step": 3455000 + }, + { + "epoch": 38.02, + "learning_rate": 2.995901864789042e-08, + "loss": 3.7615, + "step": 3455500 + }, + { + "epoch": 38.02, + "learning_rate": 2.9945266516310025e-08, + "loss": 3.7497, + "step": 3456000 + }, + { + "epoch": 38.03, + "learning_rate": 2.9931514384729634e-08, + "loss": 3.7631, + "step": 3456500 + }, + { + "epoch": 38.03, + "learning_rate": 2.991776225314924e-08, + "loss": 3.7566, + "step": 3457000 + }, + { + "epoch": 38.04, + "learning_rate": 2.990401012156884e-08, + "loss": 3.7484, + "step": 3457500 + }, + { + "epoch": 38.04, + "learning_rate": 2.989025798998845e-08, + "loss": 3.7608, + "step": 3458000 + }, + { + "epoch": 38.05, + "learning_rate": 2.987650585840805e-08, + "loss": 3.7656, + "step": 3458500 + }, + { + "epoch": 38.05, + "learning_rate": 2.9862753726827654e-08, + "loss": 3.7521, + "step": 3459000 + }, + { + "epoch": 38.06, + "learning_rate": 2.9849001595247264e-08, + "loss": 3.755, + "step": 3459500 + }, + { + "epoch": 38.07, + "learning_rate": 2.9835249463666866e-08, + "loss": 3.731, + "step": 3460000 + }, + { + "epoch": 38.07, + "learning_rate": 2.982149733208647e-08, + "loss": 3.7729, + "step": 3460500 + }, + { + "epoch": 38.08, + "learning_rate": 2.980774520050608e-08, + "loss": 3.7281, + "step": 3461000 + }, + { + "epoch": 38.08, + "learning_rate": 2.979399306892568e-08, + "loss": 3.7644, + "step": 3461500 + }, + { + "epoch": 38.09, + "learning_rate": 2.9780240937345287e-08, + "loss": 3.7385, + "step": 3462000 + }, + { + "epoch": 38.09, + "learning_rate": 2.9766488805764893e-08, + "loss": 3.7639, + "step": 3462500 + }, + { + "epoch": 38.1, + "learning_rate": 2.9752736674184496e-08, + "loss": 3.7688, + "step": 3463000 + }, + { + "epoch": 38.1, + "learning_rate": 2.9738984542604102e-08, + "loss": 3.758, + "step": 3463500 + }, + { + "epoch": 38.11, + "learning_rate": 2.9725232411023708e-08, + "loss": 3.7447, + "step": 3464000 + }, + { + "epoch": 38.12, + "learning_rate": 2.9711480279443314e-08, + "loss": 3.7391, + "step": 3464500 + }, + { + "epoch": 38.12, + "learning_rate": 2.9697728147862917e-08, + "loss": 3.7558, + "step": 3465000 + }, + { + "epoch": 38.13, + "learning_rate": 2.9683976016282526e-08, + "loss": 3.7415, + "step": 3465500 + }, + { + "epoch": 38.13, + "learning_rate": 2.967022388470213e-08, + "loss": 3.742, + "step": 3466000 + }, + { + "epoch": 38.14, + "learning_rate": 2.965647175312173e-08, + "loss": 3.7526, + "step": 3466500 + }, + { + "epoch": 38.14, + "learning_rate": 2.964271962154134e-08, + "loss": 3.7474, + "step": 3467000 + }, + { + "epoch": 38.15, + "learning_rate": 2.9628967489960944e-08, + "loss": 3.7646, + "step": 3467500 + }, + { + "epoch": 38.15, + "learning_rate": 2.9615215358380546e-08, + "loss": 3.7553, + "step": 3468000 + }, + { + "epoch": 38.16, + "learning_rate": 2.9601463226800156e-08, + "loss": 3.7563, + "step": 3468500 + }, + { + "epoch": 38.16, + "learning_rate": 2.958771109521976e-08, + "loss": 3.7656, + "step": 3469000 + }, + { + "epoch": 38.17, + "learning_rate": 2.957395896363936e-08, + "loss": 3.7524, + "step": 3469500 + }, + { + "epoch": 38.18, + "learning_rate": 2.956020683205897e-08, + "loss": 3.7736, + "step": 3470000 + }, + { + "epoch": 38.18, + "learning_rate": 2.9546454700478573e-08, + "loss": 3.7517, + "step": 3470500 + }, + { + "epoch": 38.19, + "learning_rate": 2.9532702568898176e-08, + "loss": 3.7344, + "step": 3471000 + }, + { + "epoch": 38.19, + "learning_rate": 2.9518950437317785e-08, + "loss": 3.7378, + "step": 3471500 + }, + { + "epoch": 38.2, + "learning_rate": 2.9505198305737388e-08, + "loss": 3.7528, + "step": 3472000 + }, + { + "epoch": 38.2, + "learning_rate": 2.949144617415699e-08, + "loss": 3.7721, + "step": 3472500 + }, + { + "epoch": 38.21, + "learning_rate": 2.94776940425766e-08, + "loss": 3.7645, + "step": 3473000 + }, + { + "epoch": 38.21, + "learning_rate": 2.9463941910996203e-08, + "loss": 3.7421, + "step": 3473500 + }, + { + "epoch": 38.22, + "learning_rate": 2.9450189779415806e-08, + "loss": 3.7563, + "step": 3474000 + }, + { + "epoch": 38.23, + "learning_rate": 2.9436437647835412e-08, + "loss": 3.7472, + "step": 3474500 + }, + { + "epoch": 38.23, + "learning_rate": 2.9422685516255018e-08, + "loss": 3.7597, + "step": 3475000 + }, + { + "epoch": 38.24, + "learning_rate": 2.9408933384674624e-08, + "loss": 3.7593, + "step": 3475500 + }, + { + "epoch": 38.24, + "learning_rate": 2.9395181253094226e-08, + "loss": 3.7685, + "step": 3476000 + }, + { + "epoch": 38.25, + "learning_rate": 2.9381429121513836e-08, + "loss": 3.7487, + "step": 3476500 + }, + { + "epoch": 38.25, + "learning_rate": 2.936767698993344e-08, + "loss": 3.7574, + "step": 3477000 + }, + { + "epoch": 38.26, + "learning_rate": 2.935392485835304e-08, + "loss": 3.7373, + "step": 3477500 + }, + { + "epoch": 38.26, + "learning_rate": 2.934017272677265e-08, + "loss": 3.7508, + "step": 3478000 + }, + { + "epoch": 38.27, + "learning_rate": 2.9326420595192253e-08, + "loss": 3.7464, + "step": 3478500 + }, + { + "epoch": 38.27, + "learning_rate": 2.9312668463611856e-08, + "loss": 3.7557, + "step": 3479000 + }, + { + "epoch": 38.28, + "learning_rate": 2.9298916332031465e-08, + "loss": 3.7528, + "step": 3479500 + }, + { + "epoch": 38.29, + "learning_rate": 2.9285164200451068e-08, + "loss": 3.7653, + "step": 3480000 + }, + { + "epoch": 38.29, + "learning_rate": 2.927141206887067e-08, + "loss": 3.763, + "step": 3480500 + }, + { + "epoch": 38.3, + "learning_rate": 2.925765993729028e-08, + "loss": 3.7287, + "step": 3481000 + }, + { + "epoch": 38.3, + "learning_rate": 2.9243907805709883e-08, + "loss": 3.7664, + "step": 3481500 + }, + { + "epoch": 38.31, + "learning_rate": 2.9230155674129486e-08, + "loss": 3.7501, + "step": 3482000 + }, + { + "epoch": 38.31, + "learning_rate": 2.9216403542549095e-08, + "loss": 3.7539, + "step": 3482500 + }, + { + "epoch": 38.32, + "learning_rate": 2.9202651410968698e-08, + "loss": 3.7468, + "step": 3483000 + }, + { + "epoch": 38.32, + "learning_rate": 2.91888992793883e-08, + "loss": 3.7583, + "step": 3483500 + }, + { + "epoch": 38.33, + "learning_rate": 2.917514714780791e-08, + "loss": 3.7505, + "step": 3484000 + }, + { + "epoch": 38.34, + "learning_rate": 2.9161395016227513e-08, + "loss": 3.7501, + "step": 3484500 + }, + { + "epoch": 38.34, + "learning_rate": 2.9147642884647115e-08, + "loss": 3.7699, + "step": 3485000 + }, + { + "epoch": 38.35, + "learning_rate": 2.9133890753066725e-08, + "loss": 3.7592, + "step": 3485500 + }, + { + "epoch": 38.35, + "learning_rate": 2.9120138621486327e-08, + "loss": 3.7565, + "step": 3486000 + }, + { + "epoch": 38.36, + "learning_rate": 2.9106386489905933e-08, + "loss": 3.7387, + "step": 3486500 + }, + { + "epoch": 38.36, + "learning_rate": 2.909263435832554e-08, + "loss": 3.7679, + "step": 3487000 + }, + { + "epoch": 38.37, + "learning_rate": 2.9078882226745145e-08, + "loss": 3.7425, + "step": 3487500 + }, + { + "epoch": 38.37, + "learning_rate": 2.9065130095164748e-08, + "loss": 3.7636, + "step": 3488000 + }, + { + "epoch": 38.38, + "learning_rate": 2.9051377963584357e-08, + "loss": 3.7664, + "step": 3488500 + }, + { + "epoch": 38.38, + "learning_rate": 2.903762583200396e-08, + "loss": 3.7403, + "step": 3489000 + }, + { + "epoch": 38.39, + "learning_rate": 2.9023873700423563e-08, + "loss": 3.7367, + "step": 3489500 + }, + { + "epoch": 38.4, + "learning_rate": 2.9010121568843172e-08, + "loss": 3.7402, + "step": 3490000 + }, + { + "epoch": 38.4, + "learning_rate": 2.8996369437262775e-08, + "loss": 3.7482, + "step": 3490500 + }, + { + "epoch": 38.41, + "learning_rate": 2.8982617305682378e-08, + "loss": 3.7431, + "step": 3491000 + }, + { + "epoch": 38.41, + "learning_rate": 2.8968865174101987e-08, + "loss": 3.7447, + "step": 3491500 + }, + { + "epoch": 38.42, + "learning_rate": 2.895511304252159e-08, + "loss": 3.7605, + "step": 3492000 + }, + { + "epoch": 38.42, + "learning_rate": 2.8941360910941193e-08, + "loss": 3.7714, + "step": 3492500 + }, + { + "epoch": 38.43, + "learning_rate": 2.8927608779360802e-08, + "loss": 3.7476, + "step": 3493000 + }, + { + "epoch": 38.43, + "learning_rate": 2.8913856647780405e-08, + "loss": 3.7413, + "step": 3493500 + }, + { + "epoch": 38.44, + "learning_rate": 2.8900104516200007e-08, + "loss": 3.7629, + "step": 3494000 + }, + { + "epoch": 38.45, + "learning_rate": 2.8886352384619617e-08, + "loss": 3.7504, + "step": 3494500 + }, + { + "epoch": 38.45, + "learning_rate": 2.887260025303922e-08, + "loss": 3.7838, + "step": 3495000 + }, + { + "epoch": 38.46, + "learning_rate": 2.8858848121458822e-08, + "loss": 3.7529, + "step": 3495500 + }, + { + "epoch": 38.46, + "learning_rate": 2.884509598987843e-08, + "loss": 3.7282, + "step": 3496000 + }, + { + "epoch": 38.47, + "learning_rate": 2.8831343858298034e-08, + "loss": 3.7472, + "step": 3496500 + }, + { + "epoch": 38.47, + "learning_rate": 2.8817591726717637e-08, + "loss": 3.7521, + "step": 3497000 + }, + { + "epoch": 38.48, + "learning_rate": 2.8803839595137246e-08, + "loss": 3.7688, + "step": 3497500 + }, + { + "epoch": 38.48, + "learning_rate": 2.879008746355685e-08, + "loss": 3.7477, + "step": 3498000 + }, + { + "epoch": 38.49, + "learning_rate": 2.8776335331976455e-08, + "loss": 3.7729, + "step": 3498500 + }, + { + "epoch": 38.49, + "learning_rate": 2.876258320039606e-08, + "loss": 3.7531, + "step": 3499000 + }, + { + "epoch": 38.5, + "learning_rate": 2.8748831068815667e-08, + "loss": 3.7701, + "step": 3499500 + }, + { + "epoch": 38.51, + "learning_rate": 2.873507893723527e-08, + "loss": 3.7546, + "step": 3500000 + }, + { + "epoch": 38.51, + "learning_rate": 2.8721326805654876e-08, + "loss": 3.7481, + "step": 3500500 + }, + { + "epoch": 38.52, + "learning_rate": 2.8707574674074482e-08, + "loss": 3.7578, + "step": 3501000 + }, + { + "epoch": 38.52, + "learning_rate": 2.8693822542494085e-08, + "loss": 3.7348, + "step": 3501500 + }, + { + "epoch": 38.53, + "learning_rate": 2.8680070410913694e-08, + "loss": 3.7407, + "step": 3502000 + }, + { + "epoch": 38.53, + "learning_rate": 2.8666318279333297e-08, + "loss": 3.7657, + "step": 3502500 + }, + { + "epoch": 38.54, + "learning_rate": 2.86525661477529e-08, + "loss": 3.7645, + "step": 3503000 + }, + { + "epoch": 38.54, + "learning_rate": 2.863881401617251e-08, + "loss": 3.757, + "step": 3503500 + }, + { + "epoch": 38.55, + "learning_rate": 2.862506188459211e-08, + "loss": 3.748, + "step": 3504000 + }, + { + "epoch": 38.56, + "learning_rate": 2.8611309753011714e-08, + "loss": 3.7746, + "step": 3504500 + }, + { + "epoch": 38.56, + "learning_rate": 2.8597557621431324e-08, + "loss": 3.7413, + "step": 3505000 + }, + { + "epoch": 38.57, + "learning_rate": 2.8583805489850926e-08, + "loss": 3.7268, + "step": 3505500 + }, + { + "epoch": 38.57, + "learning_rate": 2.857005335827053e-08, + "loss": 3.7501, + "step": 3506000 + }, + { + "epoch": 38.58, + "learning_rate": 2.8556301226690138e-08, + "loss": 3.7468, + "step": 3506500 + }, + { + "epoch": 38.58, + "learning_rate": 2.854254909510974e-08, + "loss": 3.7567, + "step": 3507000 + }, + { + "epoch": 38.59, + "learning_rate": 2.8528796963529344e-08, + "loss": 3.7683, + "step": 3507500 + }, + { + "epoch": 38.59, + "learning_rate": 2.8515044831948953e-08, + "loss": 3.771, + "step": 3508000 + }, + { + "epoch": 38.6, + "learning_rate": 2.8501292700368556e-08, + "loss": 3.7619, + "step": 3508500 + }, + { + "epoch": 38.6, + "learning_rate": 2.848754056878816e-08, + "loss": 3.7504, + "step": 3509000 + }, + { + "epoch": 38.61, + "learning_rate": 2.8473788437207768e-08, + "loss": 3.7455, + "step": 3509500 + }, + { + "epoch": 38.62, + "learning_rate": 2.846003630562737e-08, + "loss": 3.757, + "step": 3510000 + }, + { + "epoch": 38.62, + "learning_rate": 2.8446284174046973e-08, + "loss": 3.7603, + "step": 3510500 + }, + { + "epoch": 38.63, + "learning_rate": 2.8432532042466583e-08, + "loss": 3.7521, + "step": 3511000 + }, + { + "epoch": 38.63, + "learning_rate": 2.8418779910886185e-08, + "loss": 3.783, + "step": 3511500 + }, + { + "epoch": 38.64, + "learning_rate": 2.840502777930579e-08, + "loss": 3.7568, + "step": 3512000 + }, + { + "epoch": 38.64, + "learning_rate": 2.8391275647725394e-08, + "loss": 3.7694, + "step": 3512500 + }, + { + "epoch": 38.65, + "learning_rate": 2.8377523516145004e-08, + "loss": 3.7544, + "step": 3513000 + }, + { + "epoch": 38.65, + "learning_rate": 2.8363771384564606e-08, + "loss": 3.747, + "step": 3513500 + }, + { + "epoch": 38.66, + "learning_rate": 2.835001925298421e-08, + "loss": 3.749, + "step": 3514000 + }, + { + "epoch": 38.67, + "learning_rate": 2.833626712140382e-08, + "loss": 3.7521, + "step": 3514500 + }, + { + "epoch": 38.67, + "learning_rate": 2.832251498982342e-08, + "loss": 3.7781, + "step": 3515000 + }, + { + "epoch": 38.68, + "learning_rate": 2.8308762858243024e-08, + "loss": 3.74, + "step": 3515500 + }, + { + "epoch": 38.68, + "learning_rate": 2.8295010726662633e-08, + "loss": 3.7428, + "step": 3516000 + }, + { + "epoch": 38.69, + "learning_rate": 2.8281258595082236e-08, + "loss": 3.7529, + "step": 3516500 + }, + { + "epoch": 38.69, + "learning_rate": 2.826750646350184e-08, + "loss": 3.7672, + "step": 3517000 + }, + { + "epoch": 38.7, + "learning_rate": 2.8253754331921448e-08, + "loss": 3.7565, + "step": 3517500 + }, + { + "epoch": 38.7, + "learning_rate": 2.824000220034105e-08, + "loss": 3.7635, + "step": 3518000 + }, + { + "epoch": 38.71, + "learning_rate": 2.8226250068760653e-08, + "loss": 3.7433, + "step": 3518500 + }, + { + "epoch": 38.72, + "learning_rate": 2.8212497937180263e-08, + "loss": 3.7469, + "step": 3519000 + }, + { + "epoch": 38.72, + "learning_rate": 2.8198745805599865e-08, + "loss": 3.7532, + "step": 3519500 + }, + { + "epoch": 38.73, + "learning_rate": 2.8184993674019468e-08, + "loss": 3.7597, + "step": 3520000 + }, + { + "epoch": 38.73, + "learning_rate": 2.8171241542439078e-08, + "loss": 3.7533, + "step": 3520500 + }, + { + "epoch": 38.74, + "learning_rate": 2.815748941085868e-08, + "loss": 3.7697, + "step": 3521000 + }, + { + "epoch": 38.74, + "learning_rate": 2.8143737279278283e-08, + "loss": 3.7405, + "step": 3521500 + }, + { + "epoch": 38.75, + "learning_rate": 2.8129985147697892e-08, + "loss": 3.7465, + "step": 3522000 + }, + { + "epoch": 38.75, + "learning_rate": 2.8116233016117495e-08, + "loss": 3.7584, + "step": 3522500 + }, + { + "epoch": 38.76, + "learning_rate": 2.81024808845371e-08, + "loss": 3.7655, + "step": 3523000 + }, + { + "epoch": 38.76, + "learning_rate": 2.8088728752956707e-08, + "loss": 3.7634, + "step": 3523500 + }, + { + "epoch": 38.77, + "learning_rate": 2.8074976621376313e-08, + "loss": 3.7449, + "step": 3524000 + }, + { + "epoch": 38.78, + "learning_rate": 2.8061224489795916e-08, + "loss": 3.7725, + "step": 3524500 + }, + { + "epoch": 38.78, + "learning_rate": 2.8047472358215525e-08, + "loss": 3.7667, + "step": 3525000 + }, + { + "epoch": 38.79, + "learning_rate": 2.8033720226635128e-08, + "loss": 3.7567, + "step": 3525500 + }, + { + "epoch": 38.79, + "learning_rate": 2.801996809505473e-08, + "loss": 3.7658, + "step": 3526000 + }, + { + "epoch": 38.8, + "learning_rate": 2.800621596347434e-08, + "loss": 3.7498, + "step": 3526500 + }, + { + "epoch": 38.8, + "learning_rate": 2.7992463831893943e-08, + "loss": 3.7465, + "step": 3527000 + }, + { + "epoch": 38.81, + "learning_rate": 2.7978711700313545e-08, + "loss": 3.7611, + "step": 3527500 + }, + { + "epoch": 38.81, + "learning_rate": 2.7964959568733155e-08, + "loss": 3.7632, + "step": 3528000 + }, + { + "epoch": 38.82, + "learning_rate": 2.7951207437152758e-08, + "loss": 3.7851, + "step": 3528500 + }, + { + "epoch": 38.83, + "learning_rate": 2.793745530557236e-08, + "loss": 3.7624, + "step": 3529000 + }, + { + "epoch": 38.83, + "learning_rate": 2.792370317399197e-08, + "loss": 3.7496, + "step": 3529500 + }, + { + "epoch": 38.84, + "learning_rate": 2.7909951042411572e-08, + "loss": 3.7649, + "step": 3530000 + }, + { + "epoch": 38.84, + "learning_rate": 2.7896198910831175e-08, + "loss": 3.759, + "step": 3530500 + }, + { + "epoch": 38.85, + "learning_rate": 2.7882446779250784e-08, + "loss": 3.7518, + "step": 3531000 + }, + { + "epoch": 38.85, + "learning_rate": 2.7868694647670387e-08, + "loss": 3.7602, + "step": 3531500 + }, + { + "epoch": 38.86, + "learning_rate": 2.785494251608999e-08, + "loss": 3.7501, + "step": 3532000 + }, + { + "epoch": 38.86, + "learning_rate": 2.78411903845096e-08, + "loss": 3.7557, + "step": 3532500 + }, + { + "epoch": 38.87, + "learning_rate": 2.7827438252929202e-08, + "loss": 3.7326, + "step": 3533000 + }, + { + "epoch": 38.87, + "learning_rate": 2.7813686121348805e-08, + "loss": 3.7613, + "step": 3533500 + }, + { + "epoch": 38.88, + "learning_rate": 2.7799933989768414e-08, + "loss": 3.7563, + "step": 3534000 + }, + { + "epoch": 38.89, + "learning_rate": 2.7786181858188017e-08, + "loss": 3.7545, + "step": 3534500 + }, + { + "epoch": 38.89, + "learning_rate": 2.7772429726607623e-08, + "loss": 3.7558, + "step": 3535000 + }, + { + "epoch": 38.9, + "learning_rate": 2.775867759502723e-08, + "loss": 3.7496, + "step": 3535500 + }, + { + "epoch": 38.9, + "learning_rate": 2.7744925463446835e-08, + "loss": 3.7532, + "step": 3536000 + }, + { + "epoch": 38.91, + "learning_rate": 2.7731173331866438e-08, + "loss": 3.7688, + "step": 3536500 + }, + { + "epoch": 38.91, + "learning_rate": 2.7717421200286047e-08, + "loss": 3.7702, + "step": 3537000 + }, + { + "epoch": 38.92, + "learning_rate": 2.770366906870565e-08, + "loss": 3.7472, + "step": 3537500 + }, + { + "epoch": 38.92, + "learning_rate": 2.7689916937125252e-08, + "loss": 3.7546, + "step": 3538000 + }, + { + "epoch": 38.93, + "learning_rate": 2.7676164805544862e-08, + "loss": 3.7416, + "step": 3538500 + }, + { + "epoch": 38.94, + "learning_rate": 2.7662412673964464e-08, + "loss": 3.7591, + "step": 3539000 + }, + { + "epoch": 38.94, + "learning_rate": 2.7648660542384067e-08, + "loss": 3.7804, + "step": 3539500 + }, + { + "epoch": 38.95, + "learning_rate": 2.7634908410803676e-08, + "loss": 3.7571, + "step": 3540000 + }, + { + "epoch": 38.95, + "learning_rate": 2.762115627922328e-08, + "loss": 3.7568, + "step": 3540500 + }, + { + "epoch": 38.96, + "learning_rate": 2.7607404147642882e-08, + "loss": 3.7703, + "step": 3541000 + }, + { + "epoch": 38.96, + "learning_rate": 2.759365201606249e-08, + "loss": 3.7581, + "step": 3541500 + }, + { + "epoch": 38.97, + "learning_rate": 2.7579899884482094e-08, + "loss": 3.7577, + "step": 3542000 + }, + { + "epoch": 38.97, + "learning_rate": 2.7566147752901697e-08, + "loss": 3.7753, + "step": 3542500 + }, + { + "epoch": 38.98, + "learning_rate": 2.7552395621321306e-08, + "loss": 3.7399, + "step": 3543000 + }, + { + "epoch": 38.98, + "learning_rate": 2.753864348974091e-08, + "loss": 3.7544, + "step": 3543500 + }, + { + "epoch": 38.99, + "learning_rate": 2.752489135816051e-08, + "loss": 3.7492, + "step": 3544000 + }, + { + "epoch": 39.0, + "learning_rate": 2.751113922658012e-08, + "loss": 3.7431, + "step": 3544500 + }, + { + "epoch": 39.0, + "eval_loss": 3.8281142711639404, + "eval_runtime": 6.1436, + "eval_samples_per_second": 252.947, + "step": 3544905 + }, + { + "epoch": 39.0, + "learning_rate": 2.7497387094999724e-08, + "loss": 3.7527, + "step": 3545000 + }, + { + "epoch": 39.01, + "learning_rate": 2.7483634963419326e-08, + "loss": 3.7626, + "step": 3545500 + }, + { + "epoch": 39.01, + "learning_rate": 2.7469882831838936e-08, + "loss": 3.7582, + "step": 3546000 + }, + { + "epoch": 39.02, + "learning_rate": 2.745613070025854e-08, + "loss": 3.7599, + "step": 3546500 + }, + { + "epoch": 39.02, + "learning_rate": 2.7442378568678144e-08, + "loss": 3.7629, + "step": 3547000 + }, + { + "epoch": 39.03, + "learning_rate": 2.742862643709775e-08, + "loss": 3.7516, + "step": 3547500 + }, + { + "epoch": 39.03, + "learning_rate": 2.7414874305517353e-08, + "loss": 3.7613, + "step": 3548000 + }, + { + "epoch": 39.04, + "learning_rate": 2.740112217393696e-08, + "loss": 3.7516, + "step": 3548500 + }, + { + "epoch": 39.05, + "learning_rate": 2.7387370042356565e-08, + "loss": 3.7697, + "step": 3549000 + }, + { + "epoch": 39.05, + "learning_rate": 2.737361791077617e-08, + "loss": 3.7474, + "step": 3549500 + }, + { + "epoch": 39.06, + "learning_rate": 2.7359865779195774e-08, + "loss": 3.7391, + "step": 3550000 + }, + { + "epoch": 39.06, + "learning_rate": 2.7346113647615377e-08, + "loss": 3.738, + "step": 3550500 + }, + { + "epoch": 39.07, + "learning_rate": 2.7332361516034986e-08, + "loss": 3.7482, + "step": 3551000 + }, + { + "epoch": 39.07, + "learning_rate": 2.731860938445459e-08, + "loss": 3.7533, + "step": 3551500 + }, + { + "epoch": 39.08, + "learning_rate": 2.730485725287419e-08, + "loss": 3.7452, + "step": 3552000 + }, + { + "epoch": 39.08, + "learning_rate": 2.72911051212938e-08, + "loss": 3.7709, + "step": 3552500 + }, + { + "epoch": 39.09, + "learning_rate": 2.7277352989713404e-08, + "loss": 3.7591, + "step": 3553000 + }, + { + "epoch": 39.09, + "learning_rate": 2.7263600858133006e-08, + "loss": 3.7553, + "step": 3553500 + }, + { + "epoch": 39.1, + "learning_rate": 2.7249848726552616e-08, + "loss": 3.7516, + "step": 3554000 + }, + { + "epoch": 39.11, + "learning_rate": 2.723609659497222e-08, + "loss": 3.7508, + "step": 3554500 + }, + { + "epoch": 39.11, + "learning_rate": 2.722234446339182e-08, + "loss": 3.7729, + "step": 3555000 + }, + { + "epoch": 39.12, + "learning_rate": 2.720859233181143e-08, + "loss": 3.7414, + "step": 3555500 + }, + { + "epoch": 39.12, + "learning_rate": 2.7194840200231033e-08, + "loss": 3.7659, + "step": 3556000 + }, + { + "epoch": 39.13, + "learning_rate": 2.7181088068650636e-08, + "loss": 3.7573, + "step": 3556500 + }, + { + "epoch": 39.13, + "learning_rate": 2.7167335937070245e-08, + "loss": 3.7726, + "step": 3557000 + }, + { + "epoch": 39.14, + "learning_rate": 2.7153583805489848e-08, + "loss": 3.7529, + "step": 3557500 + }, + { + "epoch": 39.14, + "learning_rate": 2.713983167390945e-08, + "loss": 3.7511, + "step": 3558000 + }, + { + "epoch": 39.15, + "learning_rate": 2.712607954232906e-08, + "loss": 3.7553, + "step": 3558500 + }, + { + "epoch": 39.16, + "learning_rate": 2.7112327410748663e-08, + "loss": 3.7635, + "step": 3559000 + }, + { + "epoch": 39.16, + "learning_rate": 2.709857527916827e-08, + "loss": 3.7514, + "step": 3559500 + }, + { + "epoch": 39.17, + "learning_rate": 2.7084823147587875e-08, + "loss": 3.7555, + "step": 3560000 + }, + { + "epoch": 39.17, + "learning_rate": 2.707107101600748e-08, + "loss": 3.7559, + "step": 3560500 + }, + { + "epoch": 39.18, + "learning_rate": 2.7057318884427084e-08, + "loss": 3.7769, + "step": 3561000 + }, + { + "epoch": 39.18, + "learning_rate": 2.7043566752846693e-08, + "loss": 3.7502, + "step": 3561500 + }, + { + "epoch": 39.19, + "learning_rate": 2.7029814621266296e-08, + "loss": 3.7651, + "step": 3562000 + }, + { + "epoch": 39.19, + "learning_rate": 2.70160624896859e-08, + "loss": 3.7491, + "step": 3562500 + }, + { + "epoch": 39.2, + "learning_rate": 2.7002310358105508e-08, + "loss": 3.7614, + "step": 3563000 + }, + { + "epoch": 39.2, + "learning_rate": 2.698855822652511e-08, + "loss": 3.7349, + "step": 3563500 + }, + { + "epoch": 39.21, + "learning_rate": 2.6974806094944713e-08, + "loss": 3.764, + "step": 3564000 + }, + { + "epoch": 39.22, + "learning_rate": 2.6961053963364323e-08, + "loss": 3.7249, + "step": 3564500 + }, + { + "epoch": 39.22, + "learning_rate": 2.6947301831783925e-08, + "loss": 3.7416, + "step": 3565000 + }, + { + "epoch": 39.23, + "learning_rate": 2.6933549700203528e-08, + "loss": 3.7664, + "step": 3565500 + }, + { + "epoch": 39.23, + "learning_rate": 2.6919797568623137e-08, + "loss": 3.7663, + "step": 3566000 + }, + { + "epoch": 39.24, + "learning_rate": 2.690604543704274e-08, + "loss": 3.7502, + "step": 3566500 + }, + { + "epoch": 39.24, + "learning_rate": 2.6892293305462343e-08, + "loss": 3.758, + "step": 3567000 + }, + { + "epoch": 39.25, + "learning_rate": 2.6878541173881952e-08, + "loss": 3.7765, + "step": 3567500 + }, + { + "epoch": 39.25, + "learning_rate": 2.6864789042301555e-08, + "loss": 3.7612, + "step": 3568000 + }, + { + "epoch": 39.26, + "learning_rate": 2.6851036910721158e-08, + "loss": 3.7405, + "step": 3568500 + }, + { + "epoch": 39.27, + "learning_rate": 2.6837284779140767e-08, + "loss": 3.7569, + "step": 3569000 + }, + { + "epoch": 39.27, + "learning_rate": 2.682353264756037e-08, + "loss": 3.7444, + "step": 3569500 + }, + { + "epoch": 39.28, + "learning_rate": 2.6809780515979972e-08, + "loss": 3.7473, + "step": 3570000 + }, + { + "epoch": 39.28, + "learning_rate": 2.6796028384399582e-08, + "loss": 3.7597, + "step": 3570500 + }, + { + "epoch": 39.29, + "learning_rate": 2.6782276252819184e-08, + "loss": 3.7522, + "step": 3571000 + }, + { + "epoch": 39.29, + "learning_rate": 2.676852412123879e-08, + "loss": 3.7539, + "step": 3571500 + }, + { + "epoch": 39.3, + "learning_rate": 2.6754771989658397e-08, + "loss": 3.7756, + "step": 3572000 + }, + { + "epoch": 39.3, + "learning_rate": 2.6741019858078003e-08, + "loss": 3.7507, + "step": 3572500 + }, + { + "epoch": 39.31, + "learning_rate": 2.6727267726497605e-08, + "loss": 3.7501, + "step": 3573000 + }, + { + "epoch": 39.31, + "learning_rate": 2.6713515594917215e-08, + "loss": 3.7598, + "step": 3573500 + }, + { + "epoch": 39.32, + "learning_rate": 2.6699763463336817e-08, + "loss": 3.735, + "step": 3574000 + }, + { + "epoch": 39.33, + "learning_rate": 2.668601133175642e-08, + "loss": 3.7333, + "step": 3574500 + }, + { + "epoch": 39.33, + "learning_rate": 2.667225920017603e-08, + "loss": 3.7587, + "step": 3575000 + }, + { + "epoch": 39.34, + "learning_rate": 2.6658507068595632e-08, + "loss": 3.7494, + "step": 3575500 + }, + { + "epoch": 39.34, + "learning_rate": 2.6644754937015235e-08, + "loss": 3.7504, + "step": 3576000 + }, + { + "epoch": 39.35, + "learning_rate": 2.6631002805434844e-08, + "loss": 3.7508, + "step": 3576500 + }, + { + "epoch": 39.35, + "learning_rate": 2.6617250673854447e-08, + "loss": 3.7532, + "step": 3577000 + }, + { + "epoch": 39.36, + "learning_rate": 2.660349854227405e-08, + "loss": 3.7461, + "step": 3577500 + }, + { + "epoch": 39.36, + "learning_rate": 2.658974641069366e-08, + "loss": 3.7665, + "step": 3578000 + }, + { + "epoch": 39.37, + "learning_rate": 2.6575994279113262e-08, + "loss": 3.7678, + "step": 3578500 + }, + { + "epoch": 39.38, + "learning_rate": 2.6562242147532864e-08, + "loss": 3.7574, + "step": 3579000 + }, + { + "epoch": 39.38, + "learning_rate": 2.6548490015952474e-08, + "loss": 3.7469, + "step": 3579500 + }, + { + "epoch": 39.39, + "learning_rate": 2.6534737884372077e-08, + "loss": 3.7684, + "step": 3580000 + }, + { + "epoch": 39.39, + "learning_rate": 2.652098575279168e-08, + "loss": 3.753, + "step": 3580500 + }, + { + "epoch": 39.4, + "learning_rate": 2.650723362121129e-08, + "loss": 3.7608, + "step": 3581000 + }, + { + "epoch": 39.4, + "learning_rate": 2.649348148963089e-08, + "loss": 3.7634, + "step": 3581500 + }, + { + "epoch": 39.41, + "learning_rate": 2.6479729358050494e-08, + "loss": 3.7401, + "step": 3582000 + }, + { + "epoch": 39.41, + "learning_rate": 2.6465977226470103e-08, + "loss": 3.7589, + "step": 3582500 + }, + { + "epoch": 39.42, + "learning_rate": 2.6452225094889706e-08, + "loss": 3.7508, + "step": 3583000 + }, + { + "epoch": 39.42, + "learning_rate": 2.6438472963309312e-08, + "loss": 3.7686, + "step": 3583500 + }, + { + "epoch": 39.43, + "learning_rate": 2.6424720831728918e-08, + "loss": 3.7509, + "step": 3584000 + }, + { + "epoch": 39.44, + "learning_rate": 2.6410968700148524e-08, + "loss": 3.7568, + "step": 3584500 + }, + { + "epoch": 39.44, + "learning_rate": 2.6397216568568127e-08, + "loss": 3.7508, + "step": 3585000 + }, + { + "epoch": 39.45, + "learning_rate": 2.6383464436987733e-08, + "loss": 3.7476, + "step": 3585500 + }, + { + "epoch": 39.45, + "learning_rate": 2.636971230540734e-08, + "loss": 3.7497, + "step": 3586000 + }, + { + "epoch": 39.46, + "learning_rate": 2.6355960173826942e-08, + "loss": 3.756, + "step": 3586500 + }, + { + "epoch": 39.46, + "learning_rate": 2.6342208042246545e-08, + "loss": 3.7475, + "step": 3587000 + }, + { + "epoch": 39.47, + "learning_rate": 2.6328455910666154e-08, + "loss": 3.7439, + "step": 3587500 + }, + { + "epoch": 39.47, + "learning_rate": 2.6314703779085757e-08, + "loss": 3.7544, + "step": 3588000 + }, + { + "epoch": 39.48, + "learning_rate": 2.630095164750536e-08, + "loss": 3.7585, + "step": 3588500 + }, + { + "epoch": 39.49, + "learning_rate": 2.628719951592497e-08, + "loss": 3.7376, + "step": 3589000 + }, + { + "epoch": 39.49, + "learning_rate": 2.627344738434457e-08, + "loss": 3.739, + "step": 3589500 + }, + { + "epoch": 39.5, + "learning_rate": 2.6259695252764174e-08, + "loss": 3.7554, + "step": 3590000 + }, + { + "epoch": 39.5, + "learning_rate": 2.6245943121183783e-08, + "loss": 3.7475, + "step": 3590500 + }, + { + "epoch": 39.51, + "learning_rate": 2.6232190989603386e-08, + "loss": 3.7488, + "step": 3591000 + }, + { + "epoch": 39.51, + "learning_rate": 2.621843885802299e-08, + "loss": 3.7381, + "step": 3591500 + }, + { + "epoch": 39.52, + "learning_rate": 2.6204686726442598e-08, + "loss": 3.7652, + "step": 3592000 + }, + { + "epoch": 39.52, + "learning_rate": 2.61909345948622e-08, + "loss": 3.7506, + "step": 3592500 + }, + { + "epoch": 39.53, + "learning_rate": 2.6177182463281804e-08, + "loss": 3.7288, + "step": 3593000 + }, + { + "epoch": 39.53, + "learning_rate": 2.6163430331701413e-08, + "loss": 3.7374, + "step": 3593500 + }, + { + "epoch": 39.54, + "learning_rate": 2.6149678200121016e-08, + "loss": 3.7555, + "step": 3594000 + }, + { + "epoch": 39.55, + "learning_rate": 2.6135926068540622e-08, + "loss": 3.7623, + "step": 3594500 + }, + { + "epoch": 39.55, + "learning_rate": 2.6122173936960228e-08, + "loss": 3.7537, + "step": 3595000 + }, + { + "epoch": 39.56, + "learning_rate": 2.610842180537983e-08, + "loss": 3.7435, + "step": 3595500 + }, + { + "epoch": 39.56, + "learning_rate": 2.6094669673799437e-08, + "loss": 3.765, + "step": 3596000 + }, + { + "epoch": 39.57, + "learning_rate": 2.6080917542219043e-08, + "loss": 3.7544, + "step": 3596500 + }, + { + "epoch": 39.57, + "learning_rate": 2.606716541063865e-08, + "loss": 3.773, + "step": 3597000 + }, + { + "epoch": 39.58, + "learning_rate": 2.605341327905825e-08, + "loss": 3.7369, + "step": 3597500 + }, + { + "epoch": 39.58, + "learning_rate": 2.603966114747786e-08, + "loss": 3.7827, + "step": 3598000 + }, + { + "epoch": 39.59, + "learning_rate": 2.6025909015897463e-08, + "loss": 3.733, + "step": 3598500 + }, + { + "epoch": 39.6, + "learning_rate": 2.6012156884317066e-08, + "loss": 3.7613, + "step": 3599000 + }, + { + "epoch": 39.6, + "learning_rate": 2.5998404752736676e-08, + "loss": 3.7647, + "step": 3599500 + }, + { + "epoch": 39.61, + "learning_rate": 2.5984652621156278e-08, + "loss": 3.7525, + "step": 3600000 + }, + { + "epoch": 39.61, + "learning_rate": 2.597090048957588e-08, + "loss": 3.7465, + "step": 3600500 + }, + { + "epoch": 39.62, + "learning_rate": 2.595714835799549e-08, + "loss": 3.7643, + "step": 3601000 + }, + { + "epoch": 39.62, + "learning_rate": 2.5943396226415093e-08, + "loss": 3.7448, + "step": 3601500 + }, + { + "epoch": 39.63, + "learning_rate": 2.5929644094834696e-08, + "loss": 3.7479, + "step": 3602000 + }, + { + "epoch": 39.63, + "learning_rate": 2.5915891963254305e-08, + "loss": 3.7394, + "step": 3602500 + }, + { + "epoch": 39.64, + "learning_rate": 2.5902139831673908e-08, + "loss": 3.7712, + "step": 3603000 + }, + { + "epoch": 39.64, + "learning_rate": 2.588838770009351e-08, + "loss": 3.7561, + "step": 3603500 + }, + { + "epoch": 39.65, + "learning_rate": 2.587463556851312e-08, + "loss": 3.7628, + "step": 3604000 + }, + { + "epoch": 39.66, + "learning_rate": 2.5860883436932723e-08, + "loss": 3.7543, + "step": 3604500 + }, + { + "epoch": 39.66, + "learning_rate": 2.5847131305352325e-08, + "loss": 3.7771, + "step": 3605000 + }, + { + "epoch": 39.67, + "learning_rate": 2.5833379173771935e-08, + "loss": 3.7358, + "step": 3605500 + }, + { + "epoch": 39.67, + "learning_rate": 2.5819627042191537e-08, + "loss": 3.7361, + "step": 3606000 + }, + { + "epoch": 39.68, + "learning_rate": 2.580587491061114e-08, + "loss": 3.7409, + "step": 3606500 + }, + { + "epoch": 39.68, + "learning_rate": 2.579212277903075e-08, + "loss": 3.7485, + "step": 3607000 + }, + { + "epoch": 39.69, + "learning_rate": 2.5778370647450352e-08, + "loss": 3.7512, + "step": 3607500 + }, + { + "epoch": 39.69, + "learning_rate": 2.5764618515869958e-08, + "loss": 3.7575, + "step": 3608000 + }, + { + "epoch": 39.7, + "learning_rate": 2.5750866384289564e-08, + "loss": 3.7575, + "step": 3608500 + }, + { + "epoch": 39.71, + "learning_rate": 2.573711425270917e-08, + "loss": 3.7486, + "step": 3609000 + }, + { + "epoch": 39.71, + "learning_rate": 2.5723362121128773e-08, + "loss": 3.7514, + "step": 3609500 + }, + { + "epoch": 39.72, + "learning_rate": 2.5709609989548382e-08, + "loss": 3.7499, + "step": 3610000 + }, + { + "epoch": 39.72, + "learning_rate": 2.5695857857967985e-08, + "loss": 3.7663, + "step": 3610500 + }, + { + "epoch": 39.73, + "learning_rate": 2.5682105726387588e-08, + "loss": 3.7386, + "step": 3611000 + }, + { + "epoch": 39.73, + "learning_rate": 2.5668353594807197e-08, + "loss": 3.7534, + "step": 3611500 + }, + { + "epoch": 39.74, + "learning_rate": 2.56546014632268e-08, + "loss": 3.7624, + "step": 3612000 + }, + { + "epoch": 39.74, + "learning_rate": 2.5640849331646403e-08, + "loss": 3.753, + "step": 3612500 + }, + { + "epoch": 39.75, + "learning_rate": 2.5627097200066012e-08, + "loss": 3.7432, + "step": 3613000 + }, + { + "epoch": 39.75, + "learning_rate": 2.5613345068485615e-08, + "loss": 3.7619, + "step": 3613500 + }, + { + "epoch": 39.76, + "learning_rate": 2.5599592936905217e-08, + "loss": 3.7607, + "step": 3614000 + }, + { + "epoch": 39.77, + "learning_rate": 2.5585840805324827e-08, + "loss": 3.7643, + "step": 3614500 + }, + { + "epoch": 39.77, + "learning_rate": 2.557208867374443e-08, + "loss": 3.7582, + "step": 3615000 + }, + { + "epoch": 39.78, + "learning_rate": 2.5558336542164032e-08, + "loss": 3.7385, + "step": 3615500 + }, + { + "epoch": 39.78, + "learning_rate": 2.554458441058364e-08, + "loss": 3.774, + "step": 3616000 + }, + { + "epoch": 39.79, + "learning_rate": 2.5530832279003244e-08, + "loss": 3.7446, + "step": 3616500 + }, + { + "epoch": 39.79, + "learning_rate": 2.5517080147422847e-08, + "loss": 3.7795, + "step": 3617000 + }, + { + "epoch": 39.8, + "learning_rate": 2.5503328015842456e-08, + "loss": 3.7636, + "step": 3617500 + }, + { + "epoch": 39.8, + "learning_rate": 2.548957588426206e-08, + "loss": 3.7638, + "step": 3618000 + }, + { + "epoch": 39.81, + "learning_rate": 2.5475823752681662e-08, + "loss": 3.7551, + "step": 3618500 + }, + { + "epoch": 39.82, + "learning_rate": 2.546207162110127e-08, + "loss": 3.747, + "step": 3619000 + }, + { + "epoch": 39.82, + "learning_rate": 2.5448319489520874e-08, + "loss": 3.756, + "step": 3619500 + }, + { + "epoch": 39.83, + "learning_rate": 2.543456735794048e-08, + "loss": 3.7599, + "step": 3620000 + }, + { + "epoch": 39.83, + "learning_rate": 2.5420815226360086e-08, + "loss": 3.7624, + "step": 3620500 + }, + { + "epoch": 39.84, + "learning_rate": 2.5407063094779692e-08, + "loss": 3.7513, + "step": 3621000 + }, + { + "epoch": 39.84, + "learning_rate": 2.5393310963199295e-08, + "loss": 3.7631, + "step": 3621500 + }, + { + "epoch": 39.85, + "learning_rate": 2.5379558831618904e-08, + "loss": 3.7609, + "step": 3622000 + }, + { + "epoch": 39.85, + "learning_rate": 2.5365806700038507e-08, + "loss": 3.7623, + "step": 3622500 + }, + { + "epoch": 39.86, + "learning_rate": 2.535205456845811e-08, + "loss": 3.7492, + "step": 3623000 + }, + { + "epoch": 39.86, + "learning_rate": 2.533830243687772e-08, + "loss": 3.7356, + "step": 3623500 + }, + { + "epoch": 39.87, + "learning_rate": 2.532455030529732e-08, + "loss": 3.7639, + "step": 3624000 + }, + { + "epoch": 39.88, + "learning_rate": 2.5310798173716924e-08, + "loss": 3.7451, + "step": 3624500 + }, + { + "epoch": 39.88, + "learning_rate": 2.5297046042136527e-08, + "loss": 3.7455, + "step": 3625000 + }, + { + "epoch": 39.89, + "learning_rate": 2.5283293910556136e-08, + "loss": 3.7552, + "step": 3625500 + }, + { + "epoch": 39.89, + "learning_rate": 2.526954177897574e-08, + "loss": 3.742, + "step": 3626000 + }, + { + "epoch": 39.9, + "learning_rate": 2.5255789647395342e-08, + "loss": 3.7565, + "step": 3626500 + }, + { + "epoch": 39.9, + "learning_rate": 2.524203751581495e-08, + "loss": 3.7544, + "step": 3627000 + }, + { + "epoch": 39.91, + "learning_rate": 2.5228285384234554e-08, + "loss": 3.7759, + "step": 3627500 + }, + { + "epoch": 39.91, + "learning_rate": 2.5214533252654157e-08, + "loss": 3.7453, + "step": 3628000 + }, + { + "epoch": 39.92, + "learning_rate": 2.5200781121073766e-08, + "loss": 3.7566, + "step": 3628500 + }, + { + "epoch": 39.93, + "learning_rate": 2.518702898949337e-08, + "loss": 3.7467, + "step": 3629000 + }, + { + "epoch": 39.93, + "learning_rate": 2.517327685791297e-08, + "loss": 3.7466, + "step": 3629500 + }, + { + "epoch": 39.94, + "learning_rate": 2.515952472633258e-08, + "loss": 3.7533, + "step": 3630000 + }, + { + "epoch": 39.94, + "learning_rate": 2.5145772594752184e-08, + "loss": 3.7639, + "step": 3630500 + }, + { + "epoch": 39.95, + "learning_rate": 2.513202046317179e-08, + "loss": 3.7613, + "step": 3631000 + }, + { + "epoch": 39.95, + "learning_rate": 2.5118268331591396e-08, + "loss": 3.7544, + "step": 3631500 + }, + { + "epoch": 39.96, + "learning_rate": 2.5104516200011e-08, + "loss": 3.7549, + "step": 3632000 + }, + { + "epoch": 39.96, + "learning_rate": 2.5090764068430604e-08, + "loss": 3.7352, + "step": 3632500 + }, + { + "epoch": 39.97, + "learning_rate": 2.507701193685021e-08, + "loss": 3.746, + "step": 3633000 + }, + { + "epoch": 39.97, + "learning_rate": 2.5063259805269816e-08, + "loss": 3.7455, + "step": 3633500 + }, + { + "epoch": 39.98, + "learning_rate": 2.504950767368942e-08, + "loss": 3.7472, + "step": 3634000 + }, + { + "epoch": 39.99, + "learning_rate": 2.503575554210903e-08, + "loss": 3.764, + "step": 3634500 + }, + { + "epoch": 39.99, + "learning_rate": 2.502200341052863e-08, + "loss": 3.7572, + "step": 3635000 + }, + { + "epoch": 40.0, + "learning_rate": 2.5008251278948234e-08, + "loss": 3.7413, + "step": 3635500 + }, + { + "epoch": 40.0, + "eval_loss": 3.8280036449432373, + "eval_runtime": 6.1422, + "eval_samples_per_second": 253.003, + "step": 3635800 + }, + { + "epoch": 40.0, + "learning_rate": 2.499449914736784e-08, + "loss": 3.7442, + "step": 3636000 + }, + { + "epoch": 40.01, + "learning_rate": 2.4980747015787446e-08, + "loss": 3.7811, + "step": 3636500 + }, + { + "epoch": 40.01, + "learning_rate": 2.4966994884207052e-08, + "loss": 3.7505, + "step": 3637000 + }, + { + "epoch": 40.02, + "learning_rate": 2.4953242752626655e-08, + "loss": 3.7792, + "step": 3637500 + }, + { + "epoch": 40.02, + "learning_rate": 2.493949062104626e-08, + "loss": 3.758, + "step": 3638000 + }, + { + "epoch": 40.03, + "learning_rate": 2.4925738489465867e-08, + "loss": 3.7368, + "step": 3638500 + }, + { + "epoch": 40.04, + "learning_rate": 2.491198635788547e-08, + "loss": 3.7549, + "step": 3639000 + }, + { + "epoch": 40.04, + "learning_rate": 2.4898234226305076e-08, + "loss": 3.7389, + "step": 3639500 + }, + { + "epoch": 40.05, + "learning_rate": 2.488448209472468e-08, + "loss": 3.7411, + "step": 3640000 + }, + { + "epoch": 40.05, + "learning_rate": 2.4870729963144284e-08, + "loss": 3.7476, + "step": 3640500 + }, + { + "epoch": 40.06, + "learning_rate": 2.485697783156389e-08, + "loss": 3.7505, + "step": 3641000 + }, + { + "epoch": 40.06, + "learning_rate": 2.4843225699983496e-08, + "loss": 3.751, + "step": 3641500 + }, + { + "epoch": 40.07, + "learning_rate": 2.48294735684031e-08, + "loss": 3.7524, + "step": 3642000 + }, + { + "epoch": 40.07, + "learning_rate": 2.4815721436822705e-08, + "loss": 3.7497, + "step": 3642500 + }, + { + "epoch": 40.08, + "learning_rate": 2.480196930524231e-08, + "loss": 3.7489, + "step": 3643000 + }, + { + "epoch": 40.08, + "learning_rate": 2.4788217173661914e-08, + "loss": 3.7505, + "step": 3643500 + }, + { + "epoch": 40.09, + "learning_rate": 2.477446504208152e-08, + "loss": 3.751, + "step": 3644000 + }, + { + "epoch": 40.1, + "learning_rate": 2.4760712910501126e-08, + "loss": 3.7529, + "step": 3644500 + }, + { + "epoch": 40.1, + "learning_rate": 2.4746960778920732e-08, + "loss": 3.7683, + "step": 3645000 + }, + { + "epoch": 40.11, + "learning_rate": 2.4733208647340338e-08, + "loss": 3.7568, + "step": 3645500 + }, + { + "epoch": 40.11, + "learning_rate": 2.4719456515759944e-08, + "loss": 3.7498, + "step": 3646000 + }, + { + "epoch": 40.12, + "learning_rate": 2.4705704384179547e-08, + "loss": 3.7549, + "step": 3646500 + }, + { + "epoch": 40.12, + "learning_rate": 2.4691952252599153e-08, + "loss": 3.7663, + "step": 3647000 + }, + { + "epoch": 40.13, + "learning_rate": 2.467820012101876e-08, + "loss": 3.7708, + "step": 3647500 + }, + { + "epoch": 40.13, + "learning_rate": 2.466444798943836e-08, + "loss": 3.7673, + "step": 3648000 + }, + { + "epoch": 40.14, + "learning_rate": 2.4650695857857968e-08, + "loss": 3.7371, + "step": 3648500 + }, + { + "epoch": 40.15, + "learning_rate": 2.4636943726277574e-08, + "loss": 3.7526, + "step": 3649000 + }, + { + "epoch": 40.15, + "learning_rate": 2.4623191594697176e-08, + "loss": 3.7514, + "step": 3649500 + }, + { + "epoch": 40.16, + "learning_rate": 2.4609439463116782e-08, + "loss": 3.7619, + "step": 3650000 + }, + { + "epoch": 40.16, + "learning_rate": 2.459568733153639e-08, + "loss": 3.7652, + "step": 3650500 + }, + { + "epoch": 40.17, + "learning_rate": 2.458193519995599e-08, + "loss": 3.77, + "step": 3651000 + }, + { + "epoch": 40.17, + "learning_rate": 2.4568183068375597e-08, + "loss": 3.7597, + "step": 3651500 + }, + { + "epoch": 40.18, + "learning_rate": 2.4554430936795203e-08, + "loss": 3.7499, + "step": 3652000 + }, + { + "epoch": 40.18, + "learning_rate": 2.4540678805214806e-08, + "loss": 3.7295, + "step": 3652500 + }, + { + "epoch": 40.19, + "learning_rate": 2.4526926673634412e-08, + "loss": 3.7581, + "step": 3653000 + }, + { + "epoch": 40.19, + "learning_rate": 2.4513174542054015e-08, + "loss": 3.7627, + "step": 3653500 + }, + { + "epoch": 40.2, + "learning_rate": 2.449942241047362e-08, + "loss": 3.762, + "step": 3654000 + }, + { + "epoch": 40.21, + "learning_rate": 2.4485670278893227e-08, + "loss": 3.756, + "step": 3654500 + }, + { + "epoch": 40.21, + "learning_rate": 2.447191814731283e-08, + "loss": 3.7854, + "step": 3655000 + }, + { + "epoch": 40.22, + "learning_rate": 2.4458166015732436e-08, + "loss": 3.7549, + "step": 3655500 + }, + { + "epoch": 40.22, + "learning_rate": 2.444441388415204e-08, + "loss": 3.7408, + "step": 3656000 + }, + { + "epoch": 40.23, + "learning_rate": 2.4430661752571648e-08, + "loss": 3.7669, + "step": 3656500 + }, + { + "epoch": 40.23, + "learning_rate": 2.4416909620991254e-08, + "loss": 3.7574, + "step": 3657000 + }, + { + "epoch": 40.24, + "learning_rate": 2.440315748941086e-08, + "loss": 3.7767, + "step": 3657500 + }, + { + "epoch": 40.24, + "learning_rate": 2.4389405357830462e-08, + "loss": 3.7453, + "step": 3658000 + }, + { + "epoch": 40.25, + "learning_rate": 2.437565322625007e-08, + "loss": 3.731, + "step": 3658500 + }, + { + "epoch": 40.26, + "learning_rate": 2.4361901094669675e-08, + "loss": 3.7741, + "step": 3659000 + }, + { + "epoch": 40.26, + "learning_rate": 2.4348148963089277e-08, + "loss": 3.768, + "step": 3659500 + }, + { + "epoch": 40.27, + "learning_rate": 2.4334396831508883e-08, + "loss": 3.7396, + "step": 3660000 + }, + { + "epoch": 40.27, + "learning_rate": 2.432064469992849e-08, + "loss": 3.7497, + "step": 3660500 + }, + { + "epoch": 40.28, + "learning_rate": 2.4306892568348092e-08, + "loss": 3.7385, + "step": 3661000 + }, + { + "epoch": 40.28, + "learning_rate": 2.4293140436767698e-08, + "loss": 3.7418, + "step": 3661500 + }, + { + "epoch": 40.29, + "learning_rate": 2.4279388305187304e-08, + "loss": 3.7454, + "step": 3662000 + }, + { + "epoch": 40.29, + "learning_rate": 2.4265636173606907e-08, + "loss": 3.7345, + "step": 3662500 + }, + { + "epoch": 40.3, + "learning_rate": 2.4251884042026513e-08, + "loss": 3.7431, + "step": 3663000 + }, + { + "epoch": 40.3, + "learning_rate": 2.423813191044612e-08, + "loss": 3.7608, + "step": 3663500 + }, + { + "epoch": 40.31, + "learning_rate": 2.4224379778865722e-08, + "loss": 3.7438, + "step": 3664000 + }, + { + "epoch": 40.32, + "learning_rate": 2.4210627647285328e-08, + "loss": 3.7593, + "step": 3664500 + }, + { + "epoch": 40.32, + "learning_rate": 2.4196875515704934e-08, + "loss": 3.762, + "step": 3665000 + }, + { + "epoch": 40.33, + "learning_rate": 2.4183123384124536e-08, + "loss": 3.7565, + "step": 3665500 + }, + { + "epoch": 40.33, + "learning_rate": 2.4169371252544142e-08, + "loss": 3.7698, + "step": 3666000 + }, + { + "epoch": 40.34, + "learning_rate": 2.415561912096375e-08, + "loss": 3.7306, + "step": 3666500 + }, + { + "epoch": 40.34, + "learning_rate": 2.414186698938335e-08, + "loss": 3.7387, + "step": 3667000 + }, + { + "epoch": 40.35, + "learning_rate": 2.4128114857802957e-08, + "loss": 3.7603, + "step": 3667500 + }, + { + "epoch": 40.35, + "learning_rate": 2.4114362726222563e-08, + "loss": 3.7615, + "step": 3668000 + }, + { + "epoch": 40.36, + "learning_rate": 2.410061059464217e-08, + "loss": 3.7574, + "step": 3668500 + }, + { + "epoch": 40.37, + "learning_rate": 2.4086858463061775e-08, + "loss": 3.7618, + "step": 3669000 + }, + { + "epoch": 40.37, + "learning_rate": 2.407310633148138e-08, + "loss": 3.7468, + "step": 3669500 + }, + { + "epoch": 40.38, + "learning_rate": 2.4059354199900984e-08, + "loss": 3.7649, + "step": 3670000 + }, + { + "epoch": 40.38, + "learning_rate": 2.404560206832059e-08, + "loss": 3.7842, + "step": 3670500 + }, + { + "epoch": 40.39, + "learning_rate": 2.4031849936740196e-08, + "loss": 3.7433, + "step": 3671000 + }, + { + "epoch": 40.39, + "learning_rate": 2.40180978051598e-08, + "loss": 3.7461, + "step": 3671500 + }, + { + "epoch": 40.4, + "learning_rate": 2.4004345673579405e-08, + "loss": 3.748, + "step": 3672000 + }, + { + "epoch": 40.4, + "learning_rate": 2.3990593541999008e-08, + "loss": 3.7319, + "step": 3672500 + }, + { + "epoch": 40.41, + "learning_rate": 2.3976841410418614e-08, + "loss": 3.7704, + "step": 3673000 + }, + { + "epoch": 40.41, + "learning_rate": 2.396308927883822e-08, + "loss": 3.7613, + "step": 3673500 + }, + { + "epoch": 40.42, + "learning_rate": 2.3949337147257823e-08, + "loss": 3.7466, + "step": 3674000 + }, + { + "epoch": 40.43, + "learning_rate": 2.393558501567743e-08, + "loss": 3.7571, + "step": 3674500 + }, + { + "epoch": 40.43, + "learning_rate": 2.3921832884097035e-08, + "loss": 3.7626, + "step": 3675000 + }, + { + "epoch": 40.44, + "learning_rate": 2.3908080752516637e-08, + "loss": 3.7379, + "step": 3675500 + }, + { + "epoch": 40.44, + "learning_rate": 2.3894328620936243e-08, + "loss": 3.7528, + "step": 3676000 + }, + { + "epoch": 40.45, + "learning_rate": 2.388057648935585e-08, + "loss": 3.7584, + "step": 3676500 + }, + { + "epoch": 40.45, + "learning_rate": 2.3866824357775452e-08, + "loss": 3.7486, + "step": 3677000 + }, + { + "epoch": 40.46, + "learning_rate": 2.3853072226195058e-08, + "loss": 3.7545, + "step": 3677500 + }, + { + "epoch": 40.46, + "learning_rate": 2.3839320094614664e-08, + "loss": 3.7479, + "step": 3678000 + }, + { + "epoch": 40.47, + "learning_rate": 2.3825567963034267e-08, + "loss": 3.7772, + "step": 3678500 + }, + { + "epoch": 40.48, + "learning_rate": 2.3811815831453873e-08, + "loss": 3.7687, + "step": 3679000 + }, + { + "epoch": 40.48, + "learning_rate": 2.379806369987348e-08, + "loss": 3.7486, + "step": 3679500 + }, + { + "epoch": 40.49, + "learning_rate": 2.3784311568293085e-08, + "loss": 3.745, + "step": 3680000 + }, + { + "epoch": 40.49, + "learning_rate": 2.3770559436712688e-08, + "loss": 3.7553, + "step": 3680500 + }, + { + "epoch": 40.5, + "learning_rate": 2.3756807305132294e-08, + "loss": 3.7608, + "step": 3681000 + }, + { + "epoch": 40.5, + "learning_rate": 2.37430551735519e-08, + "loss": 3.7382, + "step": 3681500 + }, + { + "epoch": 40.51, + "learning_rate": 2.3729303041971506e-08, + "loss": 3.748, + "step": 3682000 + }, + { + "epoch": 40.51, + "learning_rate": 2.3715550910391112e-08, + "loss": 3.7452, + "step": 3682500 + }, + { + "epoch": 40.52, + "learning_rate": 2.3701798778810715e-08, + "loss": 3.7484, + "step": 3683000 + }, + { + "epoch": 40.52, + "learning_rate": 2.368804664723032e-08, + "loss": 3.7618, + "step": 3683500 + }, + { + "epoch": 40.53, + "learning_rate": 2.3674294515649927e-08, + "loss": 3.7637, + "step": 3684000 + }, + { + "epoch": 40.54, + "learning_rate": 2.366054238406953e-08, + "loss": 3.7464, + "step": 3684500 + }, + { + "epoch": 40.54, + "learning_rate": 2.3646790252489135e-08, + "loss": 3.751, + "step": 3685000 + }, + { + "epoch": 40.55, + "learning_rate": 2.363303812090874e-08, + "loss": 3.7315, + "step": 3685500 + }, + { + "epoch": 40.55, + "learning_rate": 2.3619285989328344e-08, + "loss": 3.7446, + "step": 3686000 + }, + { + "epoch": 40.56, + "learning_rate": 2.360553385774795e-08, + "loss": 3.7501, + "step": 3686500 + }, + { + "epoch": 40.56, + "learning_rate": 2.3591781726167556e-08, + "loss": 3.7567, + "step": 3687000 + }, + { + "epoch": 40.57, + "learning_rate": 2.357802959458716e-08, + "loss": 3.7624, + "step": 3687500 + }, + { + "epoch": 40.57, + "learning_rate": 2.3564277463006765e-08, + "loss": 3.7453, + "step": 3688000 + }, + { + "epoch": 40.58, + "learning_rate": 2.355052533142637e-08, + "loss": 3.7343, + "step": 3688500 + }, + { + "epoch": 40.59, + "learning_rate": 2.3536773199845974e-08, + "loss": 3.7285, + "step": 3689000 + }, + { + "epoch": 40.59, + "learning_rate": 2.352302106826558e-08, + "loss": 3.7435, + "step": 3689500 + }, + { + "epoch": 40.6, + "learning_rate": 2.3509268936685186e-08, + "loss": 3.7478, + "step": 3690000 + }, + { + "epoch": 40.6, + "learning_rate": 2.349551680510479e-08, + "loss": 3.7543, + "step": 3690500 + }, + { + "epoch": 40.61, + "learning_rate": 2.3481764673524395e-08, + "loss": 3.7568, + "step": 3691000 + }, + { + "epoch": 40.61, + "learning_rate": 2.3468012541943997e-08, + "loss": 3.7517, + "step": 3691500 + }, + { + "epoch": 40.62, + "learning_rate": 2.3454260410363603e-08, + "loss": 3.7589, + "step": 3692000 + }, + { + "epoch": 40.62, + "learning_rate": 2.344050827878321e-08, + "loss": 3.751, + "step": 3692500 + }, + { + "epoch": 40.63, + "learning_rate": 2.3426756147202815e-08, + "loss": 3.7353, + "step": 3693000 + }, + { + "epoch": 40.63, + "learning_rate": 2.341300401562242e-08, + "loss": 3.7469, + "step": 3693500 + }, + { + "epoch": 40.64, + "learning_rate": 2.3399251884042027e-08, + "loss": 3.7485, + "step": 3694000 + }, + { + "epoch": 40.65, + "learning_rate": 2.338549975246163e-08, + "loss": 3.7652, + "step": 3694500 + }, + { + "epoch": 40.65, + "learning_rate": 2.3371747620881236e-08, + "loss": 3.747, + "step": 3695000 + }, + { + "epoch": 40.66, + "learning_rate": 2.3357995489300842e-08, + "loss": 3.7773, + "step": 3695500 + }, + { + "epoch": 40.66, + "learning_rate": 2.3344243357720445e-08, + "loss": 3.7505, + "step": 3696000 + }, + { + "epoch": 40.67, + "learning_rate": 2.333049122614005e-08, + "loss": 3.7555, + "step": 3696500 + }, + { + "epoch": 40.67, + "learning_rate": 2.3316739094559657e-08, + "loss": 3.7529, + "step": 3697000 + }, + { + "epoch": 40.68, + "learning_rate": 2.330298696297926e-08, + "loss": 3.7597, + "step": 3697500 + }, + { + "epoch": 40.68, + "learning_rate": 2.3289234831398866e-08, + "loss": 3.7524, + "step": 3698000 + }, + { + "epoch": 40.69, + "learning_rate": 2.3275482699818472e-08, + "loss": 3.7679, + "step": 3698500 + }, + { + "epoch": 40.7, + "learning_rate": 2.3261730568238075e-08, + "loss": 3.7462, + "step": 3699000 + }, + { + "epoch": 40.7, + "learning_rate": 2.324797843665768e-08, + "loss": 3.7436, + "step": 3699500 + }, + { + "epoch": 40.71, + "learning_rate": 2.3234226305077287e-08, + "loss": 3.7497, + "step": 3700000 + }, + { + "epoch": 40.71, + "learning_rate": 2.322047417349689e-08, + "loss": 3.7518, + "step": 3700500 + }, + { + "epoch": 40.72, + "learning_rate": 2.3206722041916495e-08, + "loss": 3.7489, + "step": 3701000 + }, + { + "epoch": 40.72, + "learning_rate": 2.31929699103361e-08, + "loss": 3.7569, + "step": 3701500 + }, + { + "epoch": 40.73, + "learning_rate": 2.3179217778755704e-08, + "loss": 3.7521, + "step": 3702000 + }, + { + "epoch": 40.73, + "learning_rate": 2.316546564717531e-08, + "loss": 3.7539, + "step": 3702500 + }, + { + "epoch": 40.74, + "learning_rate": 2.3151713515594916e-08, + "loss": 3.7507, + "step": 3703000 + }, + { + "epoch": 40.74, + "learning_rate": 2.313796138401452e-08, + "loss": 3.7574, + "step": 3703500 + }, + { + "epoch": 40.75, + "learning_rate": 2.3124209252434125e-08, + "loss": 3.7616, + "step": 3704000 + }, + { + "epoch": 40.76, + "learning_rate": 2.311045712085373e-08, + "loss": 3.7539, + "step": 3704500 + }, + { + "epoch": 40.76, + "learning_rate": 2.3096704989273337e-08, + "loss": 3.7629, + "step": 3705000 + }, + { + "epoch": 40.77, + "learning_rate": 2.3082952857692943e-08, + "loss": 3.7405, + "step": 3705500 + }, + { + "epoch": 40.77, + "learning_rate": 2.306920072611255e-08, + "loss": 3.7474, + "step": 3706000 + }, + { + "epoch": 40.78, + "learning_rate": 2.3055448594532152e-08, + "loss": 3.7701, + "step": 3706500 + }, + { + "epoch": 40.78, + "learning_rate": 2.3041696462951758e-08, + "loss": 3.744, + "step": 3707000 + }, + { + "epoch": 40.79, + "learning_rate": 2.3027944331371364e-08, + "loss": 3.746, + "step": 3707500 + }, + { + "epoch": 40.79, + "learning_rate": 2.3014192199790967e-08, + "loss": 3.7631, + "step": 3708000 + }, + { + "epoch": 40.8, + "learning_rate": 2.3000440068210573e-08, + "loss": 3.7351, + "step": 3708500 + }, + { + "epoch": 40.81, + "learning_rate": 2.298668793663018e-08, + "loss": 3.7712, + "step": 3709000 + }, + { + "epoch": 40.81, + "learning_rate": 2.297293580504978e-08, + "loss": 3.7618, + "step": 3709500 + }, + { + "epoch": 40.82, + "learning_rate": 2.2959183673469388e-08, + "loss": 3.7501, + "step": 3710000 + }, + { + "epoch": 40.82, + "learning_rate": 2.294543154188899e-08, + "loss": 3.7593, + "step": 3710500 + }, + { + "epoch": 40.83, + "learning_rate": 2.2931679410308596e-08, + "loss": 3.7435, + "step": 3711000 + }, + { + "epoch": 40.83, + "learning_rate": 2.2917927278728202e-08, + "loss": 3.7384, + "step": 3711500 + }, + { + "epoch": 40.84, + "learning_rate": 2.2904175147147805e-08, + "loss": 3.7597, + "step": 3712000 + }, + { + "epoch": 40.84, + "learning_rate": 2.289042301556741e-08, + "loss": 3.7686, + "step": 3712500 + }, + { + "epoch": 40.85, + "learning_rate": 2.2876670883987017e-08, + "loss": 3.768, + "step": 3713000 + }, + { + "epoch": 40.85, + "learning_rate": 2.286291875240662e-08, + "loss": 3.77, + "step": 3713500 + }, + { + "epoch": 40.86, + "learning_rate": 2.2849166620826226e-08, + "loss": 3.7626, + "step": 3714000 + }, + { + "epoch": 40.87, + "learning_rate": 2.2835414489245832e-08, + "loss": 3.7454, + "step": 3714500 + }, + { + "epoch": 40.87, + "learning_rate": 2.2821662357665435e-08, + "loss": 3.7454, + "step": 3715000 + }, + { + "epoch": 40.88, + "learning_rate": 2.280791022608504e-08, + "loss": 3.7522, + "step": 3715500 + }, + { + "epoch": 40.88, + "learning_rate": 2.2794158094504647e-08, + "loss": 3.7403, + "step": 3716000 + }, + { + "epoch": 40.89, + "learning_rate": 2.2780405962924253e-08, + "loss": 3.7291, + "step": 3716500 + }, + { + "epoch": 40.89, + "learning_rate": 2.276665383134386e-08, + "loss": 3.762, + "step": 3717000 + }, + { + "epoch": 40.9, + "learning_rate": 2.2752901699763465e-08, + "loss": 3.7236, + "step": 3717500 + }, + { + "epoch": 40.9, + "learning_rate": 2.2739149568183068e-08, + "loss": 3.7745, + "step": 3718000 + }, + { + "epoch": 40.91, + "learning_rate": 2.2725397436602674e-08, + "loss": 3.7601, + "step": 3718500 + }, + { + "epoch": 40.92, + "learning_rate": 2.271164530502228e-08, + "loss": 3.7492, + "step": 3719000 + }, + { + "epoch": 40.92, + "learning_rate": 2.2697893173441882e-08, + "loss": 3.7721, + "step": 3719500 + }, + { + "epoch": 40.93, + "learning_rate": 2.268414104186149e-08, + "loss": 3.7546, + "step": 3720000 + }, + { + "epoch": 40.93, + "learning_rate": 2.2670388910281094e-08, + "loss": 3.7526, + "step": 3720500 + }, + { + "epoch": 40.94, + "learning_rate": 2.2656636778700697e-08, + "loss": 3.7511, + "step": 3721000 + }, + { + "epoch": 40.94, + "learning_rate": 2.2642884647120303e-08, + "loss": 3.7598, + "step": 3721500 + }, + { + "epoch": 40.95, + "learning_rate": 2.262913251553991e-08, + "loss": 3.7696, + "step": 3722000 + }, + { + "epoch": 40.95, + "learning_rate": 2.2615380383959512e-08, + "loss": 3.7263, + "step": 3722500 + }, + { + "epoch": 40.96, + "learning_rate": 2.2601628252379118e-08, + "loss": 3.738, + "step": 3723000 + }, + { + "epoch": 40.96, + "learning_rate": 2.2587876120798724e-08, + "loss": 3.7484, + "step": 3723500 + }, + { + "epoch": 40.97, + "learning_rate": 2.2574123989218327e-08, + "loss": 3.7621, + "step": 3724000 + }, + { + "epoch": 40.98, + "learning_rate": 2.2560371857637933e-08, + "loss": 3.7515, + "step": 3724500 + }, + { + "epoch": 40.98, + "learning_rate": 2.254661972605754e-08, + "loss": 3.7382, + "step": 3725000 + }, + { + "epoch": 40.99, + "learning_rate": 2.253286759447714e-08, + "loss": 3.7509, + "step": 3725500 + }, + { + "epoch": 40.99, + "learning_rate": 2.2519115462896748e-08, + "loss": 3.7562, + "step": 3726000 + }, + { + "epoch": 41.0, + "learning_rate": 2.2505363331316354e-08, + "loss": 3.7366, + "step": 3726500 + }, + { + "epoch": 41.0, + "eval_loss": 3.8274526596069336, + "eval_runtime": 6.1402, + "eval_samples_per_second": 253.087, + "step": 3726695 + }, + { + "epoch": 41.0, + "learning_rate": 2.2491611199735956e-08, + "loss": 3.7448, + "step": 3727000 + }, + { + "epoch": 41.01, + "learning_rate": 2.2477859068155562e-08, + "loss": 3.755, + "step": 3727500 + }, + { + "epoch": 41.01, + "learning_rate": 2.246410693657517e-08, + "loss": 3.7514, + "step": 3728000 + }, + { + "epoch": 41.02, + "learning_rate": 2.245035480499477e-08, + "loss": 3.7666, + "step": 3728500 + }, + { + "epoch": 41.03, + "learning_rate": 2.2436602673414377e-08, + "loss": 3.7526, + "step": 3729000 + }, + { + "epoch": 41.03, + "learning_rate": 2.2422850541833983e-08, + "loss": 3.7238, + "step": 3729500 + }, + { + "epoch": 41.04, + "learning_rate": 2.240909841025359e-08, + "loss": 3.7593, + "step": 3730000 + }, + { + "epoch": 41.04, + "learning_rate": 2.2395346278673195e-08, + "loss": 3.773, + "step": 3730500 + }, + { + "epoch": 41.05, + "learning_rate": 2.2381594147092798e-08, + "loss": 3.754, + "step": 3731000 + }, + { + "epoch": 41.05, + "learning_rate": 2.2367842015512404e-08, + "loss": 3.7496, + "step": 3731500 + }, + { + "epoch": 41.06, + "learning_rate": 2.235408988393201e-08, + "loss": 3.7484, + "step": 3732000 + }, + { + "epoch": 41.06, + "learning_rate": 2.2340337752351613e-08, + "loss": 3.7336, + "step": 3732500 + }, + { + "epoch": 41.07, + "learning_rate": 2.232658562077122e-08, + "loss": 3.7499, + "step": 3733000 + }, + { + "epoch": 41.07, + "learning_rate": 2.2312833489190825e-08, + "loss": 3.7529, + "step": 3733500 + }, + { + "epoch": 41.08, + "learning_rate": 2.2299081357610428e-08, + "loss": 3.7528, + "step": 3734000 + }, + { + "epoch": 41.09, + "learning_rate": 2.2285329226030034e-08, + "loss": 3.7357, + "step": 3734500 + }, + { + "epoch": 41.09, + "learning_rate": 2.227157709444964e-08, + "loss": 3.7483, + "step": 3735000 + }, + { + "epoch": 41.1, + "learning_rate": 2.2257824962869242e-08, + "loss": 3.7598, + "step": 3735500 + }, + { + "epoch": 41.1, + "learning_rate": 2.224407283128885e-08, + "loss": 3.7456, + "step": 3736000 + }, + { + "epoch": 41.11, + "learning_rate": 2.2230320699708454e-08, + "loss": 3.7542, + "step": 3736500 + }, + { + "epoch": 41.11, + "learning_rate": 2.2216568568128057e-08, + "loss": 3.7699, + "step": 3737000 + }, + { + "epoch": 41.12, + "learning_rate": 2.2202816436547663e-08, + "loss": 3.7522, + "step": 3737500 + }, + { + "epoch": 41.12, + "learning_rate": 2.218906430496727e-08, + "loss": 3.7586, + "step": 3738000 + }, + { + "epoch": 41.13, + "learning_rate": 2.2175312173386872e-08, + "loss": 3.7271, + "step": 3738500 + }, + { + "epoch": 41.14, + "learning_rate": 2.2161560041806478e-08, + "loss": 3.7561, + "step": 3739000 + }, + { + "epoch": 41.14, + "learning_rate": 2.2147807910226084e-08, + "loss": 3.7639, + "step": 3739500 + }, + { + "epoch": 41.15, + "learning_rate": 2.2134055778645687e-08, + "loss": 3.7653, + "step": 3740000 + }, + { + "epoch": 41.15, + "learning_rate": 2.2120303647065293e-08, + "loss": 3.7379, + "step": 3740500 + }, + { + "epoch": 41.16, + "learning_rate": 2.21065515154849e-08, + "loss": 3.7567, + "step": 3741000 + }, + { + "epoch": 41.16, + "learning_rate": 2.2092799383904505e-08, + "loss": 3.7376, + "step": 3741500 + }, + { + "epoch": 41.17, + "learning_rate": 2.207904725232411e-08, + "loss": 3.7474, + "step": 3742000 + }, + { + "epoch": 41.17, + "learning_rate": 2.2065295120743717e-08, + "loss": 3.745, + "step": 3742500 + }, + { + "epoch": 41.18, + "learning_rate": 2.205154298916332e-08, + "loss": 3.75, + "step": 3743000 + }, + { + "epoch": 41.18, + "learning_rate": 2.2037790857582926e-08, + "loss": 3.7449, + "step": 3743500 + }, + { + "epoch": 41.19, + "learning_rate": 2.2024038726002532e-08, + "loss": 3.7348, + "step": 3744000 + }, + { + "epoch": 41.2, + "learning_rate": 2.2010286594422134e-08, + "loss": 3.7571, + "step": 3744500 + }, + { + "epoch": 41.2, + "learning_rate": 2.199653446284174e-08, + "loss": 3.7488, + "step": 3745000 + }, + { + "epoch": 41.21, + "learning_rate": 2.1982782331261347e-08, + "loss": 3.7689, + "step": 3745500 + }, + { + "epoch": 41.21, + "learning_rate": 2.196903019968095e-08, + "loss": 3.7565, + "step": 3746000 + }, + { + "epoch": 41.22, + "learning_rate": 2.1955278068100555e-08, + "loss": 3.7637, + "step": 3746500 + }, + { + "epoch": 41.22, + "learning_rate": 2.1941525936520158e-08, + "loss": 3.7408, + "step": 3747000 + }, + { + "epoch": 41.23, + "learning_rate": 2.1927773804939764e-08, + "loss": 3.7432, + "step": 3747500 + }, + { + "epoch": 41.23, + "learning_rate": 2.191402167335937e-08, + "loss": 3.7498, + "step": 3748000 + }, + { + "epoch": 41.24, + "learning_rate": 2.1900269541778973e-08, + "loss": 3.7614, + "step": 3748500 + }, + { + "epoch": 41.25, + "learning_rate": 2.188651741019858e-08, + "loss": 3.7449, + "step": 3749000 + }, + { + "epoch": 41.25, + "learning_rate": 2.1872765278618185e-08, + "loss": 3.7419, + "step": 3749500 + }, + { + "epoch": 41.26, + "learning_rate": 2.1859013147037788e-08, + "loss": 3.7489, + "step": 3750000 + }, + { + "epoch": 41.26, + "learning_rate": 2.1845261015457394e-08, + "loss": 3.7471, + "step": 3750500 + }, + { + "epoch": 41.27, + "learning_rate": 2.1831508883877e-08, + "loss": 3.7709, + "step": 3751000 + }, + { + "epoch": 41.27, + "learning_rate": 2.1817756752296602e-08, + "loss": 3.7717, + "step": 3751500 + }, + { + "epoch": 41.28, + "learning_rate": 2.180400462071621e-08, + "loss": 3.7559, + "step": 3752000 + }, + { + "epoch": 41.28, + "learning_rate": 2.1790252489135814e-08, + "loss": 3.7555, + "step": 3752500 + }, + { + "epoch": 41.29, + "learning_rate": 2.177650035755542e-08, + "loss": 3.767, + "step": 3753000 + }, + { + "epoch": 41.29, + "learning_rate": 2.1762748225975027e-08, + "loss": 3.7548, + "step": 3753500 + }, + { + "epoch": 41.3, + "learning_rate": 2.1748996094394633e-08, + "loss": 3.7506, + "step": 3754000 + }, + { + "epoch": 41.31, + "learning_rate": 2.1735243962814235e-08, + "loss": 3.7483, + "step": 3754500 + }, + { + "epoch": 41.31, + "learning_rate": 2.172149183123384e-08, + "loss": 3.7509, + "step": 3755000 + }, + { + "epoch": 41.32, + "learning_rate": 2.1707739699653447e-08, + "loss": 3.7527, + "step": 3755500 + }, + { + "epoch": 41.32, + "learning_rate": 2.169398756807305e-08, + "loss": 3.7408, + "step": 3756000 + }, + { + "epoch": 41.33, + "learning_rate": 2.1680235436492656e-08, + "loss": 3.7695, + "step": 3756500 + }, + { + "epoch": 41.33, + "learning_rate": 2.1666483304912262e-08, + "loss": 3.752, + "step": 3757000 + }, + { + "epoch": 41.34, + "learning_rate": 2.1652731173331865e-08, + "loss": 3.7689, + "step": 3757500 + }, + { + "epoch": 41.34, + "learning_rate": 2.163897904175147e-08, + "loss": 3.7489, + "step": 3758000 + }, + { + "epoch": 41.35, + "learning_rate": 2.1625226910171077e-08, + "loss": 3.7518, + "step": 3758500 + }, + { + "epoch": 41.36, + "learning_rate": 2.161147477859068e-08, + "loss": 3.7576, + "step": 3759000 + }, + { + "epoch": 41.36, + "learning_rate": 2.1597722647010286e-08, + "loss": 3.759, + "step": 3759500 + }, + { + "epoch": 41.37, + "learning_rate": 2.1583970515429892e-08, + "loss": 3.7495, + "step": 3760000 + }, + { + "epoch": 41.37, + "learning_rate": 2.1570218383849494e-08, + "loss": 3.7522, + "step": 3760500 + }, + { + "epoch": 41.38, + "learning_rate": 2.15564662522691e-08, + "loss": 3.7453, + "step": 3761000 + }, + { + "epoch": 41.38, + "learning_rate": 2.1542714120688707e-08, + "loss": 3.7581, + "step": 3761500 + }, + { + "epoch": 41.39, + "learning_rate": 2.152896198910831e-08, + "loss": 3.733, + "step": 3762000 + }, + { + "epoch": 41.39, + "learning_rate": 2.1515209857527915e-08, + "loss": 3.7393, + "step": 3762500 + }, + { + "epoch": 41.4, + "learning_rate": 2.150145772594752e-08, + "loss": 3.7254, + "step": 3763000 + }, + { + "epoch": 41.4, + "learning_rate": 2.1487705594367124e-08, + "loss": 3.7494, + "step": 3763500 + }, + { + "epoch": 41.41, + "learning_rate": 2.147395346278673e-08, + "loss": 3.752, + "step": 3764000 + }, + { + "epoch": 41.42, + "learning_rate": 2.1460201331206336e-08, + "loss": 3.761, + "step": 3764500 + }, + { + "epoch": 41.42, + "learning_rate": 2.1446449199625942e-08, + "loss": 3.7394, + "step": 3765000 + }, + { + "epoch": 41.43, + "learning_rate": 2.1432697068045548e-08, + "loss": 3.7618, + "step": 3765500 + }, + { + "epoch": 41.43, + "learning_rate": 2.141894493646515e-08, + "loss": 3.735, + "step": 3766000 + }, + { + "epoch": 41.44, + "learning_rate": 2.1405192804884757e-08, + "loss": 3.7423, + "step": 3766500 + }, + { + "epoch": 41.44, + "learning_rate": 2.1391440673304363e-08, + "loss": 3.7767, + "step": 3767000 + }, + { + "epoch": 41.45, + "learning_rate": 2.1377688541723966e-08, + "loss": 3.7685, + "step": 3767500 + }, + { + "epoch": 41.45, + "learning_rate": 2.1363936410143572e-08, + "loss": 3.7453, + "step": 3768000 + }, + { + "epoch": 41.46, + "learning_rate": 2.1350184278563178e-08, + "loss": 3.7634, + "step": 3768500 + }, + { + "epoch": 41.47, + "learning_rate": 2.133643214698278e-08, + "loss": 3.7128, + "step": 3769000 + }, + { + "epoch": 41.47, + "learning_rate": 2.1322680015402387e-08, + "loss": 3.7746, + "step": 3769500 + }, + { + "epoch": 41.48, + "learning_rate": 2.1308927883821993e-08, + "loss": 3.7423, + "step": 3770000 + }, + { + "epoch": 41.48, + "learning_rate": 2.1295175752241595e-08, + "loss": 3.7456, + "step": 3770500 + }, + { + "epoch": 41.49, + "learning_rate": 2.12814236206612e-08, + "loss": 3.757, + "step": 3771000 + }, + { + "epoch": 41.49, + "learning_rate": 2.1267671489080807e-08, + "loss": 3.7594, + "step": 3771500 + }, + { + "epoch": 41.5, + "learning_rate": 2.125391935750041e-08, + "loss": 3.7421, + "step": 3772000 + }, + { + "epoch": 41.5, + "learning_rate": 2.1240167225920016e-08, + "loss": 3.759, + "step": 3772500 + }, + { + "epoch": 41.51, + "learning_rate": 2.1226415094339622e-08, + "loss": 3.7402, + "step": 3773000 + }, + { + "epoch": 41.51, + "learning_rate": 2.1212662962759225e-08, + "loss": 3.7571, + "step": 3773500 + }, + { + "epoch": 41.52, + "learning_rate": 2.119891083117883e-08, + "loss": 3.7696, + "step": 3774000 + }, + { + "epoch": 41.53, + "learning_rate": 2.1185158699598437e-08, + "loss": 3.745, + "step": 3774500 + }, + { + "epoch": 41.53, + "learning_rate": 2.117140656801804e-08, + "loss": 3.7412, + "step": 3775000 + }, + { + "epoch": 41.54, + "learning_rate": 2.1157654436437646e-08, + "loss": 3.7447, + "step": 3775500 + }, + { + "epoch": 41.54, + "learning_rate": 2.1143902304857252e-08, + "loss": 3.7666, + "step": 3776000 + }, + { + "epoch": 41.55, + "learning_rate": 2.1130150173276854e-08, + "loss": 3.75, + "step": 3776500 + }, + { + "epoch": 41.55, + "learning_rate": 2.111639804169646e-08, + "loss": 3.7505, + "step": 3777000 + }, + { + "epoch": 41.56, + "learning_rate": 2.1102645910116067e-08, + "loss": 3.7305, + "step": 3777500 + }, + { + "epoch": 41.56, + "learning_rate": 2.1088893778535673e-08, + "loss": 3.7659, + "step": 3778000 + }, + { + "epoch": 41.57, + "learning_rate": 2.107514164695528e-08, + "loss": 3.751, + "step": 3778500 + }, + { + "epoch": 41.58, + "learning_rate": 2.1061389515374885e-08, + "loss": 3.7478, + "step": 3779000 + }, + { + "epoch": 41.58, + "learning_rate": 2.1047637383794487e-08, + "loss": 3.7563, + "step": 3779500 + }, + { + "epoch": 41.59, + "learning_rate": 2.1033885252214093e-08, + "loss": 3.7604, + "step": 3780000 + }, + { + "epoch": 41.59, + "learning_rate": 2.10201331206337e-08, + "loss": 3.7799, + "step": 3780500 + }, + { + "epoch": 41.6, + "learning_rate": 2.1006380989053302e-08, + "loss": 3.7469, + "step": 3781000 + }, + { + "epoch": 41.6, + "learning_rate": 2.0992628857472908e-08, + "loss": 3.7506, + "step": 3781500 + }, + { + "epoch": 41.61, + "learning_rate": 2.0978876725892514e-08, + "loss": 3.7445, + "step": 3782000 + }, + { + "epoch": 41.61, + "learning_rate": 2.0965124594312117e-08, + "loss": 3.7519, + "step": 3782500 + }, + { + "epoch": 41.62, + "learning_rate": 2.0951372462731723e-08, + "loss": 3.761, + "step": 3783000 + }, + { + "epoch": 41.62, + "learning_rate": 2.093762033115133e-08, + "loss": 3.7359, + "step": 3783500 + }, + { + "epoch": 41.63, + "learning_rate": 2.0923868199570932e-08, + "loss": 3.7705, + "step": 3784000 + }, + { + "epoch": 41.64, + "learning_rate": 2.0910116067990538e-08, + "loss": 3.7724, + "step": 3784500 + }, + { + "epoch": 41.64, + "learning_rate": 2.089636393641014e-08, + "loss": 3.7709, + "step": 3785000 + }, + { + "epoch": 41.65, + "learning_rate": 2.0882611804829747e-08, + "loss": 3.7446, + "step": 3785500 + }, + { + "epoch": 41.65, + "learning_rate": 2.0868859673249353e-08, + "loss": 3.7496, + "step": 3786000 + }, + { + "epoch": 41.66, + "learning_rate": 2.0855107541668955e-08, + "loss": 3.7615, + "step": 3786500 + }, + { + "epoch": 41.66, + "learning_rate": 2.084135541008856e-08, + "loss": 3.7493, + "step": 3787000 + }, + { + "epoch": 41.67, + "learning_rate": 2.0827603278508167e-08, + "loss": 3.7472, + "step": 3787500 + }, + { + "epoch": 41.67, + "learning_rate": 2.081385114692777e-08, + "loss": 3.7575, + "step": 3788000 + }, + { + "epoch": 41.68, + "learning_rate": 2.0800099015347376e-08, + "loss": 3.7393, + "step": 3788500 + }, + { + "epoch": 41.69, + "learning_rate": 2.0786346883766982e-08, + "loss": 3.7592, + "step": 3789000 + }, + { + "epoch": 41.69, + "learning_rate": 2.0772594752186588e-08, + "loss": 3.7522, + "step": 3789500 + }, + { + "epoch": 41.7, + "learning_rate": 2.0758842620606194e-08, + "loss": 3.7446, + "step": 3790000 + }, + { + "epoch": 41.7, + "learning_rate": 2.07450904890258e-08, + "loss": 3.756, + "step": 3790500 + }, + { + "epoch": 41.71, + "learning_rate": 2.0731338357445403e-08, + "loss": 3.7355, + "step": 3791000 + }, + { + "epoch": 41.71, + "learning_rate": 2.071758622586501e-08, + "loss": 3.7511, + "step": 3791500 + }, + { + "epoch": 41.72, + "learning_rate": 2.0703834094284615e-08, + "loss": 3.7528, + "step": 3792000 + }, + { + "epoch": 41.72, + "learning_rate": 2.0690081962704218e-08, + "loss": 3.7364, + "step": 3792500 + }, + { + "epoch": 41.73, + "learning_rate": 2.0676329831123824e-08, + "loss": 3.763, + "step": 3793000 + }, + { + "epoch": 41.73, + "learning_rate": 2.066257769954343e-08, + "loss": 3.7456, + "step": 3793500 + }, + { + "epoch": 41.74, + "learning_rate": 2.0648825567963033e-08, + "loss": 3.7592, + "step": 3794000 + }, + { + "epoch": 41.75, + "learning_rate": 2.063507343638264e-08, + "loss": 3.7419, + "step": 3794500 + }, + { + "epoch": 41.75, + "learning_rate": 2.0621321304802245e-08, + "loss": 3.7618, + "step": 3795000 + }, + { + "epoch": 41.76, + "learning_rate": 2.0607569173221847e-08, + "loss": 3.7476, + "step": 3795500 + }, + { + "epoch": 41.76, + "learning_rate": 2.0593817041641453e-08, + "loss": 3.7528, + "step": 3796000 + }, + { + "epoch": 41.77, + "learning_rate": 2.058006491006106e-08, + "loss": 3.7727, + "step": 3796500 + }, + { + "epoch": 41.77, + "learning_rate": 2.0566312778480662e-08, + "loss": 3.7414, + "step": 3797000 + }, + { + "epoch": 41.78, + "learning_rate": 2.0552560646900268e-08, + "loss": 3.7542, + "step": 3797500 + }, + { + "epoch": 41.78, + "learning_rate": 2.0538808515319874e-08, + "loss": 3.7541, + "step": 3798000 + }, + { + "epoch": 41.79, + "learning_rate": 2.0525056383739477e-08, + "loss": 3.7533, + "step": 3798500 + }, + { + "epoch": 41.8, + "learning_rate": 2.0511304252159083e-08, + "loss": 3.7418, + "step": 3799000 + }, + { + "epoch": 41.8, + "learning_rate": 2.049755212057869e-08, + "loss": 3.7685, + "step": 3799500 + }, + { + "epoch": 41.81, + "learning_rate": 2.0483799988998292e-08, + "loss": 3.7469, + "step": 3800000 + }, + { + "epoch": 41.81, + "learning_rate": 2.0470047857417898e-08, + "loss": 3.7529, + "step": 3800500 + }, + { + "epoch": 41.82, + "learning_rate": 2.0456295725837504e-08, + "loss": 3.7584, + "step": 3801000 + }, + { + "epoch": 41.82, + "learning_rate": 2.044254359425711e-08, + "loss": 3.7538, + "step": 3801500 + }, + { + "epoch": 41.83, + "learning_rate": 2.0428791462676716e-08, + "loss": 3.7639, + "step": 3802000 + }, + { + "epoch": 41.83, + "learning_rate": 2.0415039331096322e-08, + "loss": 3.7583, + "step": 3802500 + }, + { + "epoch": 41.84, + "learning_rate": 2.0401287199515925e-08, + "loss": 3.7581, + "step": 3803000 + }, + { + "epoch": 41.84, + "learning_rate": 2.038753506793553e-08, + "loss": 3.7666, + "step": 3803500 + }, + { + "epoch": 41.85, + "learning_rate": 2.0373782936355133e-08, + "loss": 3.7365, + "step": 3804000 + }, + { + "epoch": 41.86, + "learning_rate": 2.036003080477474e-08, + "loss": 3.756, + "step": 3804500 + }, + { + "epoch": 41.86, + "learning_rate": 2.0346278673194346e-08, + "loss": 3.7603, + "step": 3805000 + }, + { + "epoch": 41.87, + "learning_rate": 2.0332526541613948e-08, + "loss": 3.7555, + "step": 3805500 + }, + { + "epoch": 41.87, + "learning_rate": 2.0318774410033554e-08, + "loss": 3.7519, + "step": 3806000 + }, + { + "epoch": 41.88, + "learning_rate": 2.030502227845316e-08, + "loss": 3.7308, + "step": 3806500 + }, + { + "epoch": 41.88, + "learning_rate": 2.0291270146872763e-08, + "loss": 3.7556, + "step": 3807000 + }, + { + "epoch": 41.89, + "learning_rate": 2.027751801529237e-08, + "loss": 3.7614, + "step": 3807500 + }, + { + "epoch": 41.89, + "learning_rate": 2.0263765883711975e-08, + "loss": 3.7378, + "step": 3808000 + }, + { + "epoch": 41.9, + "learning_rate": 2.0250013752131578e-08, + "loss": 3.7663, + "step": 3808500 + }, + { + "epoch": 41.91, + "learning_rate": 2.0236261620551184e-08, + "loss": 3.7499, + "step": 3809000 + }, + { + "epoch": 41.91, + "learning_rate": 2.022250948897079e-08, + "loss": 3.7587, + "step": 3809500 + }, + { + "epoch": 41.92, + "learning_rate": 2.0208757357390393e-08, + "loss": 3.757, + "step": 3810000 + }, + { + "epoch": 41.92, + "learning_rate": 2.019500522581e-08, + "loss": 3.7581, + "step": 3810500 + }, + { + "epoch": 41.93, + "learning_rate": 2.0181253094229605e-08, + "loss": 3.7555, + "step": 3811000 + }, + { + "epoch": 41.93, + "learning_rate": 2.0167500962649207e-08, + "loss": 3.7295, + "step": 3811500 + }, + { + "epoch": 41.94, + "learning_rate": 2.0153748831068813e-08, + "loss": 3.7546, + "step": 3812000 + }, + { + "epoch": 41.94, + "learning_rate": 2.013999669948842e-08, + "loss": 3.7637, + "step": 3812500 + }, + { + "epoch": 41.95, + "learning_rate": 2.0126244567908026e-08, + "loss": 3.756, + "step": 3813000 + }, + { + "epoch": 41.96, + "learning_rate": 2.0112492436327628e-08, + "loss": 3.7671, + "step": 3813500 + }, + { + "epoch": 41.96, + "learning_rate": 2.0098740304747234e-08, + "loss": 3.7389, + "step": 3814000 + }, + { + "epoch": 41.97, + "learning_rate": 2.008498817316684e-08, + "loss": 3.7422, + "step": 3814500 + }, + { + "epoch": 41.97, + "learning_rate": 2.0071236041586446e-08, + "loss": 3.7757, + "step": 3815000 + }, + { + "epoch": 41.98, + "learning_rate": 2.0057483910006052e-08, + "loss": 3.7522, + "step": 3815500 + }, + { + "epoch": 41.98, + "learning_rate": 2.0043731778425655e-08, + "loss": 3.7477, + "step": 3816000 + }, + { + "epoch": 41.99, + "learning_rate": 2.002997964684526e-08, + "loss": 3.753, + "step": 3816500 + }, + { + "epoch": 41.99, + "learning_rate": 2.0016227515264867e-08, + "loss": 3.7464, + "step": 3817000 + }, + { + "epoch": 42.0, + "learning_rate": 2.000247538368447e-08, + "loss": 3.7457, + "step": 3817500 + }, + { + "epoch": 42.0, + "eval_loss": 3.826972723007202, + "eval_runtime": 6.1413, + "eval_samples_per_second": 253.042, + "step": 3817590 + }, + { + "epoch": 42.0, + "learning_rate": 1.9988723252104076e-08, + "loss": 3.7521, + "step": 3818000 + }, + { + "epoch": 42.01, + "learning_rate": 1.9974971120523682e-08, + "loss": 3.7688, + "step": 3818500 + }, + { + "epoch": 42.02, + "learning_rate": 1.9961218988943285e-08, + "loss": 3.7495, + "step": 3819000 + }, + { + "epoch": 42.02, + "learning_rate": 1.994746685736289e-08, + "loss": 3.7746, + "step": 3819500 + }, + { + "epoch": 42.03, + "learning_rate": 1.9933714725782497e-08, + "loss": 3.7643, + "step": 3820000 + }, + { + "epoch": 42.03, + "learning_rate": 1.99199625942021e-08, + "loss": 3.769, + "step": 3820500 + }, + { + "epoch": 42.04, + "learning_rate": 1.9906210462621706e-08, + "loss": 3.7365, + "step": 3821000 + }, + { + "epoch": 42.04, + "learning_rate": 1.989245833104131e-08, + "loss": 3.7571, + "step": 3821500 + }, + { + "epoch": 42.05, + "learning_rate": 1.9878706199460914e-08, + "loss": 3.7632, + "step": 3822000 + }, + { + "epoch": 42.05, + "learning_rate": 1.986495406788052e-08, + "loss": 3.7467, + "step": 3822500 + }, + { + "epoch": 42.06, + "learning_rate": 1.9851201936300123e-08, + "loss": 3.7787, + "step": 3823000 + }, + { + "epoch": 42.07, + "learning_rate": 1.983744980471973e-08, + "loss": 3.7472, + "step": 3823500 + }, + { + "epoch": 42.07, + "learning_rate": 1.9823697673139335e-08, + "loss": 3.7496, + "step": 3824000 + }, + { + "epoch": 42.08, + "learning_rate": 1.9809945541558938e-08, + "loss": 3.785, + "step": 3824500 + }, + { + "epoch": 42.08, + "learning_rate": 1.9796193409978544e-08, + "loss": 3.7285, + "step": 3825000 + }, + { + "epoch": 42.09, + "learning_rate": 1.978244127839815e-08, + "loss": 3.7687, + "step": 3825500 + }, + { + "epoch": 42.09, + "learning_rate": 1.9768689146817756e-08, + "loss": 3.7518, + "step": 3826000 + }, + { + "epoch": 42.1, + "learning_rate": 1.9754937015237362e-08, + "loss": 3.7477, + "step": 3826500 + }, + { + "epoch": 42.1, + "learning_rate": 1.9741184883656968e-08, + "loss": 3.7549, + "step": 3827000 + }, + { + "epoch": 42.11, + "learning_rate": 1.972743275207657e-08, + "loss": 3.7455, + "step": 3827500 + }, + { + "epoch": 42.11, + "learning_rate": 1.9713680620496177e-08, + "loss": 3.7529, + "step": 3828000 + }, + { + "epoch": 42.12, + "learning_rate": 1.9699928488915783e-08, + "loss": 3.7349, + "step": 3828500 + }, + { + "epoch": 42.13, + "learning_rate": 1.9686176357335386e-08, + "loss": 3.754, + "step": 3829000 + }, + { + "epoch": 42.13, + "learning_rate": 1.967242422575499e-08, + "loss": 3.7459, + "step": 3829500 + }, + { + "epoch": 42.14, + "learning_rate": 1.9658672094174598e-08, + "loss": 3.7437, + "step": 3830000 + }, + { + "epoch": 42.14, + "learning_rate": 1.96449199625942e-08, + "loss": 3.7373, + "step": 3830500 + }, + { + "epoch": 42.15, + "learning_rate": 1.9631167831013806e-08, + "loss": 3.7288, + "step": 3831000 + }, + { + "epoch": 42.15, + "learning_rate": 1.9617415699433412e-08, + "loss": 3.7549, + "step": 3831500 + }, + { + "epoch": 42.16, + "learning_rate": 1.9603663567853015e-08, + "loss": 3.7618, + "step": 3832000 + }, + { + "epoch": 42.16, + "learning_rate": 1.958991143627262e-08, + "loss": 3.7543, + "step": 3832500 + }, + { + "epoch": 42.17, + "learning_rate": 1.9576159304692227e-08, + "loss": 3.7412, + "step": 3833000 + }, + { + "epoch": 42.18, + "learning_rate": 1.956240717311183e-08, + "loss": 3.7465, + "step": 3833500 + }, + { + "epoch": 42.18, + "learning_rate": 1.9548655041531436e-08, + "loss": 3.7546, + "step": 3834000 + }, + { + "epoch": 42.19, + "learning_rate": 1.9534902909951042e-08, + "loss": 3.7321, + "step": 3834500 + }, + { + "epoch": 42.19, + "learning_rate": 1.9521150778370645e-08, + "loss": 3.7521, + "step": 3835000 + }, + { + "epoch": 42.2, + "learning_rate": 1.950739864679025e-08, + "loss": 3.7353, + "step": 3835500 + }, + { + "epoch": 42.2, + "learning_rate": 1.9493646515209857e-08, + "loss": 3.7449, + "step": 3836000 + }, + { + "epoch": 42.21, + "learning_rate": 1.947989438362946e-08, + "loss": 3.7547, + "step": 3836500 + }, + { + "epoch": 42.21, + "learning_rate": 1.9466142252049066e-08, + "loss": 3.7788, + "step": 3837000 + }, + { + "epoch": 42.22, + "learning_rate": 1.945239012046867e-08, + "loss": 3.7498, + "step": 3837500 + }, + { + "epoch": 42.22, + "learning_rate": 1.9438637988888278e-08, + "loss": 3.7606, + "step": 3838000 + }, + { + "epoch": 42.23, + "learning_rate": 1.9424885857307884e-08, + "loss": 3.7276, + "step": 3838500 + }, + { + "epoch": 42.24, + "learning_rate": 1.941113372572749e-08, + "loss": 3.76, + "step": 3839000 + }, + { + "epoch": 42.24, + "learning_rate": 1.9397381594147092e-08, + "loss": 3.741, + "step": 3839500 + }, + { + "epoch": 42.25, + "learning_rate": 1.93836294625667e-08, + "loss": 3.7512, + "step": 3840000 + }, + { + "epoch": 42.25, + "learning_rate": 1.9369877330986305e-08, + "loss": 3.7561, + "step": 3840500 + }, + { + "epoch": 42.26, + "learning_rate": 1.9356125199405907e-08, + "loss": 3.7355, + "step": 3841000 + }, + { + "epoch": 42.26, + "learning_rate": 1.9342373067825513e-08, + "loss": 3.771, + "step": 3841500 + }, + { + "epoch": 42.27, + "learning_rate": 1.9328620936245116e-08, + "loss": 3.7644, + "step": 3842000 + }, + { + "epoch": 42.27, + "learning_rate": 1.9314868804664722e-08, + "loss": 3.736, + "step": 3842500 + }, + { + "epoch": 42.28, + "learning_rate": 1.9301116673084328e-08, + "loss": 3.7491, + "step": 3843000 + }, + { + "epoch": 42.29, + "learning_rate": 1.928736454150393e-08, + "loss": 3.7657, + "step": 3843500 + }, + { + "epoch": 42.29, + "learning_rate": 1.9273612409923537e-08, + "loss": 3.7498, + "step": 3844000 + }, + { + "epoch": 42.3, + "learning_rate": 1.9259860278343143e-08, + "loss": 3.7614, + "step": 3844500 + }, + { + "epoch": 42.3, + "learning_rate": 1.9246108146762746e-08, + "loss": 3.7458, + "step": 3845000 + }, + { + "epoch": 42.31, + "learning_rate": 1.923235601518235e-08, + "loss": 3.7592, + "step": 3845500 + }, + { + "epoch": 42.31, + "learning_rate": 1.9218603883601958e-08, + "loss": 3.7364, + "step": 3846000 + }, + { + "epoch": 42.32, + "learning_rate": 1.920485175202156e-08, + "loss": 3.7457, + "step": 3846500 + }, + { + "epoch": 42.32, + "learning_rate": 1.9191099620441166e-08, + "loss": 3.7437, + "step": 3847000 + }, + { + "epoch": 42.33, + "learning_rate": 1.9177347488860772e-08, + "loss": 3.7565, + "step": 3847500 + }, + { + "epoch": 42.33, + "learning_rate": 1.9163595357280375e-08, + "loss": 3.7336, + "step": 3848000 + }, + { + "epoch": 42.34, + "learning_rate": 1.914984322569998e-08, + "loss": 3.7506, + "step": 3848500 + }, + { + "epoch": 42.35, + "learning_rate": 1.9136091094119587e-08, + "loss": 3.7573, + "step": 3849000 + }, + { + "epoch": 42.35, + "learning_rate": 1.9122338962539193e-08, + "loss": 3.7469, + "step": 3849500 + }, + { + "epoch": 42.36, + "learning_rate": 1.91085868309588e-08, + "loss": 3.7616, + "step": 3850000 + }, + { + "epoch": 42.36, + "learning_rate": 1.9094834699378405e-08, + "loss": 3.7588, + "step": 3850500 + }, + { + "epoch": 42.37, + "learning_rate": 1.9081082567798008e-08, + "loss": 3.7503, + "step": 3851000 + }, + { + "epoch": 42.37, + "learning_rate": 1.9067330436217614e-08, + "loss": 3.7383, + "step": 3851500 + }, + { + "epoch": 42.38, + "learning_rate": 1.905357830463722e-08, + "loss": 3.7413, + "step": 3852000 + }, + { + "epoch": 42.38, + "learning_rate": 1.9039826173056823e-08, + "loss": 3.7434, + "step": 3852500 + }, + { + "epoch": 42.39, + "learning_rate": 1.902607404147643e-08, + "loss": 3.7522, + "step": 3853000 + }, + { + "epoch": 42.4, + "learning_rate": 1.9012321909896035e-08, + "loss": 3.7583, + "step": 3853500 + }, + { + "epoch": 42.4, + "learning_rate": 1.8998569778315638e-08, + "loss": 3.7465, + "step": 3854000 + }, + { + "epoch": 42.41, + "learning_rate": 1.8984817646735244e-08, + "loss": 3.746, + "step": 3854500 + }, + { + "epoch": 42.41, + "learning_rate": 1.897106551515485e-08, + "loss": 3.7559, + "step": 3855000 + }, + { + "epoch": 42.42, + "learning_rate": 1.8957313383574452e-08, + "loss": 3.7499, + "step": 3855500 + }, + { + "epoch": 42.42, + "learning_rate": 1.894356125199406e-08, + "loss": 3.762, + "step": 3856000 + }, + { + "epoch": 42.43, + "learning_rate": 1.8929809120413665e-08, + "loss": 3.7389, + "step": 3856500 + }, + { + "epoch": 42.43, + "learning_rate": 1.8916056988833267e-08, + "loss": 3.7631, + "step": 3857000 + }, + { + "epoch": 42.44, + "learning_rate": 1.8902304857252873e-08, + "loss": 3.7355, + "step": 3857500 + }, + { + "epoch": 42.44, + "learning_rate": 1.888855272567248e-08, + "loss": 3.7424, + "step": 3858000 + }, + { + "epoch": 42.45, + "learning_rate": 1.8874800594092082e-08, + "loss": 3.7557, + "step": 3858500 + }, + { + "epoch": 42.46, + "learning_rate": 1.8861048462511688e-08, + "loss": 3.7594, + "step": 3859000 + }, + { + "epoch": 42.46, + "learning_rate": 1.884729633093129e-08, + "loss": 3.7544, + "step": 3859500 + }, + { + "epoch": 42.47, + "learning_rate": 1.8833544199350897e-08, + "loss": 3.7495, + "step": 3860000 + }, + { + "epoch": 42.47, + "learning_rate": 1.8819792067770503e-08, + "loss": 3.7673, + "step": 3860500 + }, + { + "epoch": 42.48, + "learning_rate": 1.880603993619011e-08, + "loss": 3.7409, + "step": 3861000 + }, + { + "epoch": 42.48, + "learning_rate": 1.8792287804609712e-08, + "loss": 3.7525, + "step": 3861500 + }, + { + "epoch": 42.49, + "learning_rate": 1.8778535673029318e-08, + "loss": 3.737, + "step": 3862000 + }, + { + "epoch": 42.49, + "learning_rate": 1.8764783541448924e-08, + "loss": 3.7432, + "step": 3862500 + }, + { + "epoch": 42.5, + "learning_rate": 1.875103140986853e-08, + "loss": 3.7518, + "step": 3863000 + }, + { + "epoch": 42.51, + "learning_rate": 1.8737279278288136e-08, + "loss": 3.7368, + "step": 3863500 + }, + { + "epoch": 42.51, + "learning_rate": 1.872352714670774e-08, + "loss": 3.7556, + "step": 3864000 + }, + { + "epoch": 42.52, + "learning_rate": 1.8709775015127345e-08, + "loss": 3.7591, + "step": 3864500 + }, + { + "epoch": 42.52, + "learning_rate": 1.869602288354695e-08, + "loss": 3.7552, + "step": 3865000 + }, + { + "epoch": 42.53, + "learning_rate": 1.8682270751966553e-08, + "loss": 3.7646, + "step": 3865500 + }, + { + "epoch": 42.53, + "learning_rate": 1.866851862038616e-08, + "loss": 3.7467, + "step": 3866000 + }, + { + "epoch": 42.54, + "learning_rate": 1.8654766488805765e-08, + "loss": 3.7644, + "step": 3866500 + }, + { + "epoch": 42.54, + "learning_rate": 1.8641014357225368e-08, + "loss": 3.7586, + "step": 3867000 + }, + { + "epoch": 42.55, + "learning_rate": 1.8627262225644974e-08, + "loss": 3.7498, + "step": 3867500 + }, + { + "epoch": 42.55, + "learning_rate": 1.861351009406458e-08, + "loss": 3.7552, + "step": 3868000 + }, + { + "epoch": 42.56, + "learning_rate": 1.8599757962484183e-08, + "loss": 3.7534, + "step": 3868500 + }, + { + "epoch": 42.57, + "learning_rate": 1.858600583090379e-08, + "loss": 3.7433, + "step": 3869000 + }, + { + "epoch": 42.57, + "learning_rate": 1.8572253699323395e-08, + "loss": 3.7508, + "step": 3869500 + }, + { + "epoch": 42.58, + "learning_rate": 1.8558501567742998e-08, + "loss": 3.7425, + "step": 3870000 + }, + { + "epoch": 42.58, + "learning_rate": 1.8544749436162604e-08, + "loss": 3.7523, + "step": 3870500 + }, + { + "epoch": 42.59, + "learning_rate": 1.853099730458221e-08, + "loss": 3.7589, + "step": 3871000 + }, + { + "epoch": 42.59, + "learning_rate": 1.8517245173001813e-08, + "loss": 3.7587, + "step": 3871500 + }, + { + "epoch": 42.6, + "learning_rate": 1.850349304142142e-08, + "loss": 3.756, + "step": 3872000 + }, + { + "epoch": 42.6, + "learning_rate": 1.8489740909841025e-08, + "loss": 3.7512, + "step": 3872500 + }, + { + "epoch": 42.61, + "learning_rate": 1.8475988778260627e-08, + "loss": 3.7579, + "step": 3873000 + }, + { + "epoch": 42.62, + "learning_rate": 1.8462236646680233e-08, + "loss": 3.7517, + "step": 3873500 + }, + { + "epoch": 42.62, + "learning_rate": 1.844848451509984e-08, + "loss": 3.7676, + "step": 3874000 + }, + { + "epoch": 42.63, + "learning_rate": 1.8434732383519445e-08, + "loss": 3.7597, + "step": 3874500 + }, + { + "epoch": 42.63, + "learning_rate": 1.842098025193905e-08, + "loss": 3.7431, + "step": 3875000 + }, + { + "epoch": 42.64, + "learning_rate": 1.8407228120358657e-08, + "loss": 3.7578, + "step": 3875500 + }, + { + "epoch": 42.64, + "learning_rate": 1.839347598877826e-08, + "loss": 3.7335, + "step": 3876000 + }, + { + "epoch": 42.65, + "learning_rate": 1.8379723857197866e-08, + "loss": 3.7423, + "step": 3876500 + }, + { + "epoch": 42.65, + "learning_rate": 1.8365971725617472e-08, + "loss": 3.7464, + "step": 3877000 + }, + { + "epoch": 42.66, + "learning_rate": 1.8352219594037075e-08, + "loss": 3.7518, + "step": 3877500 + }, + { + "epoch": 42.66, + "learning_rate": 1.833846746245668e-08, + "loss": 3.7478, + "step": 3878000 + }, + { + "epoch": 42.67, + "learning_rate": 1.8324715330876284e-08, + "loss": 3.7574, + "step": 3878500 + }, + { + "epoch": 42.68, + "learning_rate": 1.831096319929589e-08, + "loss": 3.7431, + "step": 3879000 + }, + { + "epoch": 42.68, + "learning_rate": 1.8297211067715496e-08, + "loss": 3.7416, + "step": 3879500 + }, + { + "epoch": 42.69, + "learning_rate": 1.82834589361351e-08, + "loss": 3.7526, + "step": 3880000 + }, + { + "epoch": 42.69, + "learning_rate": 1.8269706804554705e-08, + "loss": 3.7593, + "step": 3880500 + }, + { + "epoch": 42.7, + "learning_rate": 1.825595467297431e-08, + "loss": 3.7447, + "step": 3881000 + }, + { + "epoch": 42.7, + "learning_rate": 1.8242202541393913e-08, + "loss": 3.7542, + "step": 3881500 + }, + { + "epoch": 42.71, + "learning_rate": 1.822845040981352e-08, + "loss": 3.7555, + "step": 3882000 + }, + { + "epoch": 42.71, + "learning_rate": 1.8214698278233125e-08, + "loss": 3.7429, + "step": 3882500 + }, + { + "epoch": 42.72, + "learning_rate": 1.8200946146652728e-08, + "loss": 3.757, + "step": 3883000 + }, + { + "epoch": 42.73, + "learning_rate": 1.8187194015072334e-08, + "loss": 3.7564, + "step": 3883500 + }, + { + "epoch": 42.73, + "learning_rate": 1.817344188349194e-08, + "loss": 3.7726, + "step": 3884000 + }, + { + "epoch": 42.74, + "learning_rate": 1.8159689751911543e-08, + "loss": 3.7474, + "step": 3884500 + }, + { + "epoch": 42.74, + "learning_rate": 1.814593762033115e-08, + "loss": 3.7452, + "step": 3885000 + }, + { + "epoch": 42.75, + "learning_rate": 1.8132185488750755e-08, + "loss": 3.7672, + "step": 3885500 + }, + { + "epoch": 42.75, + "learning_rate": 1.811843335717036e-08, + "loss": 3.7476, + "step": 3886000 + }, + { + "epoch": 42.76, + "learning_rate": 1.8104681225589967e-08, + "loss": 3.7544, + "step": 3886500 + }, + { + "epoch": 42.76, + "learning_rate": 1.8090929094009573e-08, + "loss": 3.7442, + "step": 3887000 + }, + { + "epoch": 42.77, + "learning_rate": 1.8077176962429176e-08, + "loss": 3.7461, + "step": 3887500 + }, + { + "epoch": 42.77, + "learning_rate": 1.8063424830848782e-08, + "loss": 3.7622, + "step": 3888000 + }, + { + "epoch": 42.78, + "learning_rate": 1.8049672699268388e-08, + "loss": 3.7524, + "step": 3888500 + }, + { + "epoch": 42.79, + "learning_rate": 1.803592056768799e-08, + "loss": 3.7667, + "step": 3889000 + }, + { + "epoch": 42.79, + "learning_rate": 1.8022168436107597e-08, + "loss": 3.7439, + "step": 3889500 + }, + { + "epoch": 42.8, + "learning_rate": 1.8008416304527203e-08, + "loss": 3.759, + "step": 3890000 + }, + { + "epoch": 42.8, + "learning_rate": 1.7994664172946805e-08, + "loss": 3.7305, + "step": 3890500 + }, + { + "epoch": 42.81, + "learning_rate": 1.798091204136641e-08, + "loss": 3.7383, + "step": 3891000 + }, + { + "epoch": 42.81, + "learning_rate": 1.7967159909786018e-08, + "loss": 3.7449, + "step": 3891500 + }, + { + "epoch": 42.82, + "learning_rate": 1.795340777820562e-08, + "loss": 3.7588, + "step": 3892000 + }, + { + "epoch": 42.82, + "learning_rate": 1.7939655646625226e-08, + "loss": 3.7305, + "step": 3892500 + }, + { + "epoch": 42.83, + "learning_rate": 1.7925903515044832e-08, + "loss": 3.7582, + "step": 3893000 + }, + { + "epoch": 42.84, + "learning_rate": 1.7912151383464435e-08, + "loss": 3.7535, + "step": 3893500 + }, + { + "epoch": 42.84, + "learning_rate": 1.789839925188404e-08, + "loss": 3.7383, + "step": 3894000 + }, + { + "epoch": 42.85, + "learning_rate": 1.7884647120303647e-08, + "loss": 3.7578, + "step": 3894500 + }, + { + "epoch": 42.85, + "learning_rate": 1.787089498872325e-08, + "loss": 3.7597, + "step": 3895000 + }, + { + "epoch": 42.86, + "learning_rate": 1.7857142857142856e-08, + "loss": 3.7586, + "step": 3895500 + }, + { + "epoch": 42.86, + "learning_rate": 1.7843390725562462e-08, + "loss": 3.7546, + "step": 3896000 + }, + { + "epoch": 42.87, + "learning_rate": 1.7829638593982065e-08, + "loss": 3.7642, + "step": 3896500 + }, + { + "epoch": 42.87, + "learning_rate": 1.781588646240167e-08, + "loss": 3.7711, + "step": 3897000 + }, + { + "epoch": 42.88, + "learning_rate": 1.7802134330821277e-08, + "loss": 3.7659, + "step": 3897500 + }, + { + "epoch": 42.88, + "learning_rate": 1.7788382199240883e-08, + "loss": 3.7463, + "step": 3898000 + }, + { + "epoch": 42.89, + "learning_rate": 1.777463006766049e-08, + "loss": 3.7531, + "step": 3898500 + }, + { + "epoch": 42.9, + "learning_rate": 1.776087793608009e-08, + "loss": 3.7459, + "step": 3899000 + }, + { + "epoch": 42.9, + "learning_rate": 1.7747125804499698e-08, + "loss": 3.7472, + "step": 3899500 + }, + { + "epoch": 42.91, + "learning_rate": 1.7733373672919304e-08, + "loss": 3.754, + "step": 3900000 + }, + { + "epoch": 42.91, + "learning_rate": 1.7719621541338906e-08, + "loss": 3.7539, + "step": 3900500 + }, + { + "epoch": 42.92, + "learning_rate": 1.7705869409758512e-08, + "loss": 3.7487, + "step": 3901000 + }, + { + "epoch": 42.92, + "learning_rate": 1.769211727817812e-08, + "loss": 3.7393, + "step": 3901500 + }, + { + "epoch": 42.93, + "learning_rate": 1.767836514659772e-08, + "loss": 3.7347, + "step": 3902000 + }, + { + "epoch": 42.93, + "learning_rate": 1.7664613015017327e-08, + "loss": 3.7589, + "step": 3902500 + }, + { + "epoch": 42.94, + "learning_rate": 1.7650860883436933e-08, + "loss": 3.7485, + "step": 3903000 + }, + { + "epoch": 42.95, + "learning_rate": 1.7637108751856536e-08, + "loss": 3.7417, + "step": 3903500 + }, + { + "epoch": 42.95, + "learning_rate": 1.7623356620276142e-08, + "loss": 3.7644, + "step": 3904000 + }, + { + "epoch": 42.96, + "learning_rate": 1.7609604488695748e-08, + "loss": 3.7489, + "step": 3904500 + }, + { + "epoch": 42.96, + "learning_rate": 1.759585235711535e-08, + "loss": 3.7368, + "step": 3905000 + }, + { + "epoch": 42.97, + "learning_rate": 1.7582100225534957e-08, + "loss": 3.7603, + "step": 3905500 + }, + { + "epoch": 42.97, + "learning_rate": 1.7568348093954563e-08, + "loss": 3.774, + "step": 3906000 + }, + { + "epoch": 42.98, + "learning_rate": 1.7554595962374165e-08, + "loss": 3.7453, + "step": 3906500 + }, + { + "epoch": 42.98, + "learning_rate": 1.754084383079377e-08, + "loss": 3.7497, + "step": 3907000 + }, + { + "epoch": 42.99, + "learning_rate": 1.7527091699213378e-08, + "loss": 3.7558, + "step": 3907500 + }, + { + "epoch": 42.99, + "learning_rate": 1.751333956763298e-08, + "loss": 3.7521, + "step": 3908000 + }, + { + "epoch": 43.0, + "eval_loss": 3.8265368938446045, + "eval_runtime": 6.1446, + "eval_samples_per_second": 252.907, + "step": 3908485 + }, + { + "epoch": 43.0, + "learning_rate": 1.7499587436052586e-08, + "loss": 3.7414, + "step": 3908500 + }, + { + "epoch": 43.01, + "learning_rate": 1.7485835304472192e-08, + "loss": 3.7451, + "step": 3909000 + }, + { + "epoch": 43.01, + "learning_rate": 1.7472083172891795e-08, + "loss": 3.7656, + "step": 3909500 + }, + { + "epoch": 43.02, + "learning_rate": 1.74583310413114e-08, + "loss": 3.7551, + "step": 3910000 + }, + { + "epoch": 43.02, + "learning_rate": 1.7444578909731007e-08, + "loss": 3.7473, + "step": 3910500 + }, + { + "epoch": 43.03, + "learning_rate": 1.7430826778150613e-08, + "loss": 3.7501, + "step": 3911000 + }, + { + "epoch": 43.03, + "learning_rate": 1.741707464657022e-08, + "loss": 3.7466, + "step": 3911500 + }, + { + "epoch": 43.04, + "learning_rate": 1.7403322514989825e-08, + "loss": 3.7332, + "step": 3912000 + }, + { + "epoch": 43.04, + "learning_rate": 1.7389570383409428e-08, + "loss": 3.7329, + "step": 3912500 + }, + { + "epoch": 43.05, + "learning_rate": 1.7375818251829034e-08, + "loss": 3.7494, + "step": 3913000 + }, + { + "epoch": 43.06, + "learning_rate": 1.736206612024864e-08, + "loss": 3.7487, + "step": 3913500 + }, + { + "epoch": 43.06, + "learning_rate": 1.7348313988668243e-08, + "loss": 3.7518, + "step": 3914000 + }, + { + "epoch": 43.07, + "learning_rate": 1.733456185708785e-08, + "loss": 3.7354, + "step": 3914500 + }, + { + "epoch": 43.07, + "learning_rate": 1.7320809725507455e-08, + "loss": 3.7605, + "step": 3915000 + }, + { + "epoch": 43.08, + "learning_rate": 1.7307057593927058e-08, + "loss": 3.7419, + "step": 3915500 + }, + { + "epoch": 43.08, + "learning_rate": 1.7293305462346664e-08, + "loss": 3.7551, + "step": 3916000 + }, + { + "epoch": 43.09, + "learning_rate": 1.7279553330766266e-08, + "loss": 3.7628, + "step": 3916500 + }, + { + "epoch": 43.09, + "learning_rate": 1.7265801199185872e-08, + "loss": 3.7572, + "step": 3917000 + }, + { + "epoch": 43.1, + "learning_rate": 1.725204906760548e-08, + "loss": 3.7606, + "step": 3917500 + }, + { + "epoch": 43.1, + "learning_rate": 1.723829693602508e-08, + "loss": 3.7328, + "step": 3918000 + }, + { + "epoch": 43.11, + "learning_rate": 1.7224544804444687e-08, + "loss": 3.7582, + "step": 3918500 + }, + { + "epoch": 43.12, + "learning_rate": 1.7210792672864293e-08, + "loss": 3.7275, + "step": 3919000 + }, + { + "epoch": 43.12, + "learning_rate": 1.7197040541283896e-08, + "loss": 3.7586, + "step": 3919500 + }, + { + "epoch": 43.13, + "learning_rate": 1.7183288409703502e-08, + "loss": 3.7784, + "step": 3920000 + }, + { + "epoch": 43.13, + "learning_rate": 1.7169536278123108e-08, + "loss": 3.7543, + "step": 3920500 + }, + { + "epoch": 43.14, + "learning_rate": 1.715578414654271e-08, + "loss": 3.7445, + "step": 3921000 + }, + { + "epoch": 43.14, + "learning_rate": 1.7142032014962317e-08, + "loss": 3.752, + "step": 3921500 + }, + { + "epoch": 43.15, + "learning_rate": 1.7128279883381923e-08, + "loss": 3.7542, + "step": 3922000 + }, + { + "epoch": 43.15, + "learning_rate": 1.711452775180153e-08, + "loss": 3.7523, + "step": 3922500 + }, + { + "epoch": 43.16, + "learning_rate": 1.7100775620221135e-08, + "loss": 3.7364, + "step": 3923000 + }, + { + "epoch": 43.17, + "learning_rate": 1.708702348864074e-08, + "loss": 3.7724, + "step": 3923500 + }, + { + "epoch": 43.17, + "learning_rate": 1.7073271357060344e-08, + "loss": 3.757, + "step": 3924000 + }, + { + "epoch": 43.18, + "learning_rate": 1.705951922547995e-08, + "loss": 3.7511, + "step": 3924500 + }, + { + "epoch": 43.18, + "learning_rate": 1.7045767093899556e-08, + "loss": 3.7596, + "step": 3925000 + }, + { + "epoch": 43.19, + "learning_rate": 1.703201496231916e-08, + "loss": 3.7677, + "step": 3925500 + }, + { + "epoch": 43.19, + "learning_rate": 1.7018262830738764e-08, + "loss": 3.7455, + "step": 3926000 + }, + { + "epoch": 43.2, + "learning_rate": 1.700451069915837e-08, + "loss": 3.7329, + "step": 3926500 + }, + { + "epoch": 43.2, + "learning_rate": 1.6990758567577973e-08, + "loss": 3.7666, + "step": 3927000 + }, + { + "epoch": 43.21, + "learning_rate": 1.697700643599758e-08, + "loss": 3.7627, + "step": 3927500 + }, + { + "epoch": 43.21, + "learning_rate": 1.6963254304417185e-08, + "loss": 3.7295, + "step": 3928000 + }, + { + "epoch": 43.22, + "learning_rate": 1.6949502172836788e-08, + "loss": 3.7377, + "step": 3928500 + }, + { + "epoch": 43.23, + "learning_rate": 1.6935750041256394e-08, + "loss": 3.7628, + "step": 3929000 + }, + { + "epoch": 43.23, + "learning_rate": 1.6921997909676e-08, + "loss": 3.7564, + "step": 3929500 + }, + { + "epoch": 43.24, + "learning_rate": 1.6908245778095603e-08, + "loss": 3.759, + "step": 3930000 + }, + { + "epoch": 43.24, + "learning_rate": 1.689449364651521e-08, + "loss": 3.7647, + "step": 3930500 + }, + { + "epoch": 43.25, + "learning_rate": 1.6880741514934815e-08, + "loss": 3.747, + "step": 3931000 + }, + { + "epoch": 43.25, + "learning_rate": 1.6866989383354418e-08, + "loss": 3.7366, + "step": 3931500 + }, + { + "epoch": 43.26, + "learning_rate": 1.6853237251774024e-08, + "loss": 3.7561, + "step": 3932000 + }, + { + "epoch": 43.26, + "learning_rate": 1.683948512019363e-08, + "loss": 3.7585, + "step": 3932500 + }, + { + "epoch": 43.27, + "learning_rate": 1.6825732988613232e-08, + "loss": 3.7406, + "step": 3933000 + }, + { + "epoch": 43.28, + "learning_rate": 1.681198085703284e-08, + "loss": 3.7513, + "step": 3933500 + }, + { + "epoch": 43.28, + "learning_rate": 1.6798228725452444e-08, + "loss": 3.7603, + "step": 3934000 + }, + { + "epoch": 43.29, + "learning_rate": 1.678447659387205e-08, + "loss": 3.7519, + "step": 3934500 + }, + { + "epoch": 43.29, + "learning_rate": 1.6770724462291657e-08, + "loss": 3.7539, + "step": 3935000 + }, + { + "epoch": 43.3, + "learning_rate": 1.675697233071126e-08, + "loss": 3.7532, + "step": 3935500 + }, + { + "epoch": 43.3, + "learning_rate": 1.6743220199130865e-08, + "loss": 3.744, + "step": 3936000 + }, + { + "epoch": 43.31, + "learning_rate": 1.672946806755047e-08, + "loss": 3.7453, + "step": 3936500 + }, + { + "epoch": 43.31, + "learning_rate": 1.6715715935970074e-08, + "loss": 3.7478, + "step": 3937000 + }, + { + "epoch": 43.32, + "learning_rate": 1.670196380438968e-08, + "loss": 3.7449, + "step": 3937500 + }, + { + "epoch": 43.32, + "learning_rate": 1.6688211672809286e-08, + "loss": 3.7743, + "step": 3938000 + }, + { + "epoch": 43.33, + "learning_rate": 1.667445954122889e-08, + "loss": 3.7489, + "step": 3938500 + }, + { + "epoch": 43.34, + "learning_rate": 1.6660707409648495e-08, + "loss": 3.7545, + "step": 3939000 + }, + { + "epoch": 43.34, + "learning_rate": 1.66469552780681e-08, + "loss": 3.7516, + "step": 3939500 + }, + { + "epoch": 43.35, + "learning_rate": 1.6633203146487704e-08, + "loss": 3.7603, + "step": 3940000 + }, + { + "epoch": 43.35, + "learning_rate": 1.661945101490731e-08, + "loss": 3.7183, + "step": 3940500 + }, + { + "epoch": 43.36, + "learning_rate": 1.6605698883326916e-08, + "loss": 3.7403, + "step": 3941000 + }, + { + "epoch": 43.36, + "learning_rate": 1.659194675174652e-08, + "loss": 3.7494, + "step": 3941500 + }, + { + "epoch": 43.37, + "learning_rate": 1.6578194620166124e-08, + "loss": 3.7616, + "step": 3942000 + }, + { + "epoch": 43.37, + "learning_rate": 1.656444248858573e-08, + "loss": 3.7481, + "step": 3942500 + }, + { + "epoch": 43.38, + "learning_rate": 1.6550690357005333e-08, + "loss": 3.7688, + "step": 3943000 + }, + { + "epoch": 43.39, + "learning_rate": 1.653693822542494e-08, + "loss": 3.7439, + "step": 3943500 + }, + { + "epoch": 43.39, + "learning_rate": 1.6523186093844545e-08, + "loss": 3.763, + "step": 3944000 + }, + { + "epoch": 43.4, + "learning_rate": 1.6509433962264148e-08, + "loss": 3.7527, + "step": 3944500 + }, + { + "epoch": 43.4, + "learning_rate": 1.6495681830683754e-08, + "loss": 3.7271, + "step": 3945000 + }, + { + "epoch": 43.41, + "learning_rate": 1.648192969910336e-08, + "loss": 3.7486, + "step": 3945500 + }, + { + "epoch": 43.41, + "learning_rate": 1.6468177567522966e-08, + "loss": 3.743, + "step": 3946000 + }, + { + "epoch": 43.42, + "learning_rate": 1.645442543594257e-08, + "loss": 3.7578, + "step": 3946500 + }, + { + "epoch": 43.42, + "learning_rate": 1.6440673304362175e-08, + "loss": 3.7658, + "step": 3947000 + }, + { + "epoch": 43.43, + "learning_rate": 1.642692117278178e-08, + "loss": 3.7486, + "step": 3947500 + }, + { + "epoch": 43.43, + "learning_rate": 1.6413169041201387e-08, + "loss": 3.7691, + "step": 3948000 + }, + { + "epoch": 43.44, + "learning_rate": 1.6399416909620993e-08, + "loss": 3.7441, + "step": 3948500 + }, + { + "epoch": 43.45, + "learning_rate": 1.6385664778040596e-08, + "loss": 3.7427, + "step": 3949000 + }, + { + "epoch": 43.45, + "learning_rate": 1.6371912646460202e-08, + "loss": 3.7479, + "step": 3949500 + }, + { + "epoch": 43.46, + "learning_rate": 1.6358160514879808e-08, + "loss": 3.7587, + "step": 3950000 + }, + { + "epoch": 43.46, + "learning_rate": 1.634440838329941e-08, + "loss": 3.7497, + "step": 3950500 + }, + { + "epoch": 43.47, + "learning_rate": 1.6330656251719017e-08, + "loss": 3.7542, + "step": 3951000 + }, + { + "epoch": 43.47, + "learning_rate": 1.6316904120138623e-08, + "loss": 3.7543, + "step": 3951500 + }, + { + "epoch": 43.48, + "learning_rate": 1.6303151988558225e-08, + "loss": 3.7663, + "step": 3952000 + }, + { + "epoch": 43.48, + "learning_rate": 1.628939985697783e-08, + "loss": 3.746, + "step": 3952500 + }, + { + "epoch": 43.49, + "learning_rate": 1.6275647725397437e-08, + "loss": 3.7442, + "step": 3953000 + }, + { + "epoch": 43.5, + "learning_rate": 1.626189559381704e-08, + "loss": 3.7619, + "step": 3953500 + }, + { + "epoch": 43.5, + "learning_rate": 1.6248143462236646e-08, + "loss": 3.7618, + "step": 3954000 + }, + { + "epoch": 43.51, + "learning_rate": 1.623439133065625e-08, + "loss": 3.7458, + "step": 3954500 + }, + { + "epoch": 43.51, + "learning_rate": 1.6220639199075855e-08, + "loss": 3.754, + "step": 3955000 + }, + { + "epoch": 43.52, + "learning_rate": 1.620688706749546e-08, + "loss": 3.7589, + "step": 3955500 + }, + { + "epoch": 43.52, + "learning_rate": 1.6193134935915064e-08, + "loss": 3.7628, + "step": 3956000 + }, + { + "epoch": 43.53, + "learning_rate": 1.617938280433467e-08, + "loss": 3.7402, + "step": 3956500 + }, + { + "epoch": 43.53, + "learning_rate": 1.6165630672754276e-08, + "loss": 3.758, + "step": 3957000 + }, + { + "epoch": 43.54, + "learning_rate": 1.615187854117388e-08, + "loss": 3.7622, + "step": 3957500 + }, + { + "epoch": 43.54, + "learning_rate": 1.6138126409593484e-08, + "loss": 3.7414, + "step": 3958000 + }, + { + "epoch": 43.55, + "learning_rate": 1.612437427801309e-08, + "loss": 3.7597, + "step": 3958500 + }, + { + "epoch": 43.56, + "learning_rate": 1.6110622146432697e-08, + "loss": 3.748, + "step": 3959000 + }, + { + "epoch": 43.56, + "learning_rate": 1.6096870014852303e-08, + "loss": 3.7345, + "step": 3959500 + }, + { + "epoch": 43.57, + "learning_rate": 1.608311788327191e-08, + "loss": 3.7342, + "step": 3960000 + }, + { + "epoch": 43.57, + "learning_rate": 1.606936575169151e-08, + "loss": 3.7366, + "step": 3960500 + }, + { + "epoch": 43.58, + "learning_rate": 1.6055613620111117e-08, + "loss": 3.7442, + "step": 3961000 + }, + { + "epoch": 43.58, + "learning_rate": 1.6041861488530723e-08, + "loss": 3.7564, + "step": 3961500 + }, + { + "epoch": 43.59, + "learning_rate": 1.6028109356950326e-08, + "loss": 3.7583, + "step": 3962000 + }, + { + "epoch": 43.59, + "learning_rate": 1.6014357225369932e-08, + "loss": 3.7644, + "step": 3962500 + }, + { + "epoch": 43.6, + "learning_rate": 1.6000605093789538e-08, + "loss": 3.7518, + "step": 3963000 + }, + { + "epoch": 43.61, + "learning_rate": 1.598685296220914e-08, + "loss": 3.7247, + "step": 3963500 + }, + { + "epoch": 43.61, + "learning_rate": 1.5973100830628747e-08, + "loss": 3.7486, + "step": 3964000 + }, + { + "epoch": 43.62, + "learning_rate": 1.5959348699048353e-08, + "loss": 3.7722, + "step": 3964500 + }, + { + "epoch": 43.62, + "learning_rate": 1.5945596567467956e-08, + "loss": 3.7524, + "step": 3965000 + }, + { + "epoch": 43.63, + "learning_rate": 1.5931844435887562e-08, + "loss": 3.7537, + "step": 3965500 + }, + { + "epoch": 43.63, + "learning_rate": 1.5918092304307168e-08, + "loss": 3.7557, + "step": 3966000 + }, + { + "epoch": 43.64, + "learning_rate": 1.590434017272677e-08, + "loss": 3.739, + "step": 3966500 + }, + { + "epoch": 43.64, + "learning_rate": 1.5890588041146377e-08, + "loss": 3.7516, + "step": 3967000 + }, + { + "epoch": 43.65, + "learning_rate": 1.5876835909565983e-08, + "loss": 3.7658, + "step": 3967500 + }, + { + "epoch": 43.65, + "learning_rate": 1.5863083777985585e-08, + "loss": 3.7461, + "step": 3968000 + }, + { + "epoch": 43.66, + "learning_rate": 1.584933164640519e-08, + "loss": 3.7646, + "step": 3968500 + }, + { + "epoch": 43.67, + "learning_rate": 1.5835579514824797e-08, + "loss": 3.7654, + "step": 3969000 + }, + { + "epoch": 43.67, + "learning_rate": 1.58218273832444e-08, + "loss": 3.742, + "step": 3969500 + }, + { + "epoch": 43.68, + "learning_rate": 1.5808075251664006e-08, + "loss": 3.7434, + "step": 3970000 + }, + { + "epoch": 43.68, + "learning_rate": 1.5794323120083612e-08, + "loss": 3.7326, + "step": 3970500 + }, + { + "epoch": 43.69, + "learning_rate": 1.5780570988503218e-08, + "loss": 3.7485, + "step": 3971000 + }, + { + "epoch": 43.69, + "learning_rate": 1.5766818856922824e-08, + "loss": 3.7528, + "step": 3971500 + }, + { + "epoch": 43.7, + "learning_rate": 1.5753066725342427e-08, + "loss": 3.7413, + "step": 3972000 + }, + { + "epoch": 43.7, + "learning_rate": 1.5739314593762033e-08, + "loss": 3.7466, + "step": 3972500 + }, + { + "epoch": 43.71, + "learning_rate": 1.572556246218164e-08, + "loss": 3.7327, + "step": 3973000 + }, + { + "epoch": 43.72, + "learning_rate": 1.5711810330601242e-08, + "loss": 3.7435, + "step": 3973500 + }, + { + "epoch": 43.72, + "learning_rate": 1.5698058199020848e-08, + "loss": 3.7557, + "step": 3974000 + }, + { + "epoch": 43.73, + "learning_rate": 1.5684306067440454e-08, + "loss": 3.7623, + "step": 3974500 + }, + { + "epoch": 43.73, + "learning_rate": 1.5670553935860057e-08, + "loss": 3.7651, + "step": 3975000 + }, + { + "epoch": 43.74, + "learning_rate": 1.5656801804279663e-08, + "loss": 3.7621, + "step": 3975500 + }, + { + "epoch": 43.74, + "learning_rate": 1.564304967269927e-08, + "loss": 3.752, + "step": 3976000 + }, + { + "epoch": 43.75, + "learning_rate": 1.562929754111887e-08, + "loss": 3.7638, + "step": 3976500 + }, + { + "epoch": 43.75, + "learning_rate": 1.5615545409538477e-08, + "loss": 3.7707, + "step": 3977000 + }, + { + "epoch": 43.76, + "learning_rate": 1.5601793277958083e-08, + "loss": 3.7641, + "step": 3977500 + }, + { + "epoch": 43.76, + "learning_rate": 1.5588041146377686e-08, + "loss": 3.7709, + "step": 3978000 + }, + { + "epoch": 43.77, + "learning_rate": 1.5574289014797292e-08, + "loss": 3.7609, + "step": 3978500 + }, + { + "epoch": 43.78, + "learning_rate": 1.5560536883216898e-08, + "loss": 3.7335, + "step": 3979000 + }, + { + "epoch": 43.78, + "learning_rate": 1.55467847516365e-08, + "loss": 3.7616, + "step": 3979500 + }, + { + "epoch": 43.79, + "learning_rate": 1.5533032620056107e-08, + "loss": 3.7657, + "step": 3980000 + }, + { + "epoch": 43.79, + "learning_rate": 1.5519280488475713e-08, + "loss": 3.7445, + "step": 3980500 + }, + { + "epoch": 43.8, + "learning_rate": 1.5505528356895316e-08, + "loss": 3.7625, + "step": 3981000 + }, + { + "epoch": 43.8, + "learning_rate": 1.5491776225314922e-08, + "loss": 3.748, + "step": 3981500 + }, + { + "epoch": 43.81, + "learning_rate": 1.5478024093734528e-08, + "loss": 3.738, + "step": 3982000 + }, + { + "epoch": 43.81, + "learning_rate": 1.5464271962154134e-08, + "loss": 3.7519, + "step": 3982500 + }, + { + "epoch": 43.82, + "learning_rate": 1.545051983057374e-08, + "loss": 3.7531, + "step": 3983000 + }, + { + "epoch": 43.83, + "learning_rate": 1.5436767698993346e-08, + "loss": 3.7287, + "step": 3983500 + }, + { + "epoch": 43.83, + "learning_rate": 1.542301556741295e-08, + "loss": 3.7541, + "step": 3984000 + }, + { + "epoch": 43.84, + "learning_rate": 1.5409263435832555e-08, + "loss": 3.7561, + "step": 3984500 + }, + { + "epoch": 43.84, + "learning_rate": 1.539551130425216e-08, + "loss": 3.7396, + "step": 3985000 + }, + { + "epoch": 43.85, + "learning_rate": 1.5381759172671763e-08, + "loss": 3.7395, + "step": 3985500 + }, + { + "epoch": 43.85, + "learning_rate": 1.536800704109137e-08, + "loss": 3.755, + "step": 3986000 + }, + { + "epoch": 43.86, + "learning_rate": 1.5354254909510976e-08, + "loss": 3.7354, + "step": 3986500 + }, + { + "epoch": 43.86, + "learning_rate": 1.5340502777930578e-08, + "loss": 3.7432, + "step": 3987000 + }, + { + "epoch": 43.87, + "learning_rate": 1.5326750646350184e-08, + "loss": 3.7292, + "step": 3987500 + }, + { + "epoch": 43.87, + "learning_rate": 1.531299851476979e-08, + "loss": 3.7362, + "step": 3988000 + }, + { + "epoch": 43.88, + "learning_rate": 1.5299246383189393e-08, + "loss": 3.7508, + "step": 3988500 + }, + { + "epoch": 43.89, + "learning_rate": 1.5285494251609e-08, + "loss": 3.7473, + "step": 3989000 + }, + { + "epoch": 43.89, + "learning_rate": 1.5271742120028605e-08, + "loss": 3.7439, + "step": 3989500 + }, + { + "epoch": 43.9, + "learning_rate": 1.5257989988448208e-08, + "loss": 3.7426, + "step": 3990000 + }, + { + "epoch": 43.9, + "learning_rate": 1.5244237856867814e-08, + "loss": 3.7471, + "step": 3990500 + }, + { + "epoch": 43.91, + "learning_rate": 1.5230485725287417e-08, + "loss": 3.7552, + "step": 3991000 + }, + { + "epoch": 43.91, + "learning_rate": 1.5216733593707023e-08, + "loss": 3.761, + "step": 3991500 + }, + { + "epoch": 43.92, + "learning_rate": 1.520298146212663e-08, + "loss": 3.7363, + "step": 3992000 + }, + { + "epoch": 43.92, + "learning_rate": 1.518922933054623e-08, + "loss": 3.7388, + "step": 3992500 + }, + { + "epoch": 43.93, + "learning_rate": 1.5175477198965837e-08, + "loss": 3.7538, + "step": 3993000 + }, + { + "epoch": 43.94, + "learning_rate": 1.5161725067385443e-08, + "loss": 3.7506, + "step": 3993500 + }, + { + "epoch": 43.94, + "learning_rate": 1.514797293580505e-08, + "loss": 3.7394, + "step": 3994000 + }, + { + "epoch": 43.95, + "learning_rate": 1.5134220804224652e-08, + "loss": 3.7439, + "step": 3994500 + }, + { + "epoch": 43.95, + "learning_rate": 1.5120468672644258e-08, + "loss": 3.76, + "step": 3995000 + }, + { + "epoch": 43.96, + "learning_rate": 1.5106716541063864e-08, + "loss": 3.7396, + "step": 3995500 + }, + { + "epoch": 43.96, + "learning_rate": 1.509296440948347e-08, + "loss": 3.7401, + "step": 3996000 + }, + { + "epoch": 43.97, + "learning_rate": 1.5079212277903076e-08, + "loss": 3.761, + "step": 3996500 + }, + { + "epoch": 43.97, + "learning_rate": 1.506546014632268e-08, + "loss": 3.7527, + "step": 3997000 + }, + { + "epoch": 43.98, + "learning_rate": 1.5051708014742285e-08, + "loss": 3.7523, + "step": 3997500 + }, + { + "epoch": 43.98, + "learning_rate": 1.503795588316189e-08, + "loss": 3.7409, + "step": 3998000 + }, + { + "epoch": 43.99, + "learning_rate": 1.5024203751581494e-08, + "loss": 3.7826, + "step": 3998500 + }, + { + "epoch": 44.0, + "learning_rate": 1.50104516200011e-08, + "loss": 3.7284, + "step": 3999000 + }, + { + "epoch": 44.0, + "eval_loss": 3.8262314796447754, + "eval_runtime": 6.1442, + "eval_samples_per_second": 252.92, + "step": 3999380 + }, + { + "epoch": 44.0, + "learning_rate": 1.4996699488420706e-08, + "loss": 3.7537, + "step": 3999500 + }, + { + "epoch": 44.01, + "learning_rate": 1.498294735684031e-08, + "loss": 3.7579, + "step": 4000000 + }, + { + "epoch": 44.01, + "learning_rate": 1.4969195225259915e-08, + "loss": 3.7557, + "step": 4000500 + }, + { + "epoch": 44.02, + "learning_rate": 1.495544309367952e-08, + "loss": 3.764, + "step": 4001000 + }, + { + "epoch": 44.02, + "learning_rate": 1.4941690962099123e-08, + "loss": 3.7386, + "step": 4001500 + }, + { + "epoch": 44.03, + "learning_rate": 1.492793883051873e-08, + "loss": 3.7663, + "step": 4002000 + }, + { + "epoch": 44.03, + "learning_rate": 1.4914186698938336e-08, + "loss": 3.7542, + "step": 4002500 + }, + { + "epoch": 44.04, + "learning_rate": 1.4900434567357938e-08, + "loss": 3.7655, + "step": 4003000 + }, + { + "epoch": 44.05, + "learning_rate": 1.4886682435777544e-08, + "loss": 3.7417, + "step": 4003500 + }, + { + "epoch": 44.05, + "learning_rate": 1.487293030419715e-08, + "loss": 3.7389, + "step": 4004000 + }, + { + "epoch": 44.06, + "learning_rate": 1.4859178172616755e-08, + "loss": 3.748, + "step": 4004500 + }, + { + "epoch": 44.06, + "learning_rate": 1.484542604103636e-08, + "loss": 3.7346, + "step": 4005000 + }, + { + "epoch": 44.07, + "learning_rate": 1.4831673909455967e-08, + "loss": 3.761, + "step": 4005500 + }, + { + "epoch": 44.07, + "learning_rate": 1.481792177787557e-08, + "loss": 3.7599, + "step": 4006000 + }, + { + "epoch": 44.08, + "learning_rate": 1.4804169646295176e-08, + "loss": 3.7409, + "step": 4006500 + }, + { + "epoch": 44.08, + "learning_rate": 1.4790417514714782e-08, + "loss": 3.7606, + "step": 4007000 + }, + { + "epoch": 44.09, + "learning_rate": 1.4776665383134384e-08, + "loss": 3.7527, + "step": 4007500 + }, + { + "epoch": 44.09, + "learning_rate": 1.476291325155399e-08, + "loss": 3.7391, + "step": 4008000 + }, + { + "epoch": 44.1, + "learning_rate": 1.4749161119973596e-08, + "loss": 3.7454, + "step": 4008500 + }, + { + "epoch": 44.11, + "learning_rate": 1.4735408988393199e-08, + "loss": 3.7531, + "step": 4009000 + }, + { + "epoch": 44.11, + "learning_rate": 1.4721656856812805e-08, + "loss": 3.7613, + "step": 4009500 + }, + { + "epoch": 44.12, + "learning_rate": 1.470790472523241e-08, + "loss": 3.7616, + "step": 4010000 + }, + { + "epoch": 44.12, + "learning_rate": 1.4694152593652016e-08, + "loss": 3.7303, + "step": 4010500 + }, + { + "epoch": 44.13, + "learning_rate": 1.4680400462071622e-08, + "loss": 3.7826, + "step": 4011000 + }, + { + "epoch": 44.13, + "learning_rate": 1.4666648330491224e-08, + "loss": 3.7579, + "step": 4011500 + }, + { + "epoch": 44.14, + "learning_rate": 1.465289619891083e-08, + "loss": 3.7583, + "step": 4012000 + }, + { + "epoch": 44.14, + "learning_rate": 1.4639144067330436e-08, + "loss": 3.7686, + "step": 4012500 + }, + { + "epoch": 44.15, + "learning_rate": 1.4625391935750039e-08, + "loss": 3.7417, + "step": 4013000 + }, + { + "epoch": 44.16, + "learning_rate": 1.4611639804169645e-08, + "loss": 3.755, + "step": 4013500 + }, + { + "epoch": 44.16, + "learning_rate": 1.4597887672589251e-08, + "loss": 3.7374, + "step": 4014000 + }, + { + "epoch": 44.17, + "learning_rate": 1.4584135541008854e-08, + "loss": 3.7483, + "step": 4014500 + }, + { + "epoch": 44.17, + "learning_rate": 1.457038340942846e-08, + "loss": 3.7526, + "step": 4015000 + }, + { + "epoch": 44.18, + "learning_rate": 1.4556631277848066e-08, + "loss": 3.7413, + "step": 4015500 + }, + { + "epoch": 44.18, + "learning_rate": 1.454287914626767e-08, + "loss": 3.7603, + "step": 4016000 + }, + { + "epoch": 44.19, + "learning_rate": 1.4529127014687276e-08, + "loss": 3.7574, + "step": 4016500 + }, + { + "epoch": 44.19, + "learning_rate": 1.4515374883106882e-08, + "loss": 3.7513, + "step": 4017000 + }, + { + "epoch": 44.2, + "learning_rate": 1.4501622751526485e-08, + "loss": 3.7525, + "step": 4017500 + }, + { + "epoch": 44.2, + "learning_rate": 1.4487870619946091e-08, + "loss": 3.7578, + "step": 4018000 + }, + { + "epoch": 44.21, + "learning_rate": 1.4474118488365697e-08, + "loss": 3.7422, + "step": 4018500 + }, + { + "epoch": 44.22, + "learning_rate": 1.44603663567853e-08, + "loss": 3.7666, + "step": 4019000 + }, + { + "epoch": 44.22, + "learning_rate": 1.4446614225204906e-08, + "loss": 3.7305, + "step": 4019500 + }, + { + "epoch": 44.23, + "learning_rate": 1.4432862093624512e-08, + "loss": 3.7384, + "step": 4020000 + }, + { + "epoch": 44.23, + "learning_rate": 1.4419109962044115e-08, + "loss": 3.759, + "step": 4020500 + }, + { + "epoch": 44.24, + "learning_rate": 1.440535783046372e-08, + "loss": 3.7613, + "step": 4021000 + }, + { + "epoch": 44.24, + "learning_rate": 1.4391605698883327e-08, + "loss": 3.7539, + "step": 4021500 + }, + { + "epoch": 44.25, + "learning_rate": 1.4377853567302931e-08, + "loss": 3.741, + "step": 4022000 + }, + { + "epoch": 44.25, + "learning_rate": 1.4364101435722537e-08, + "loss": 3.7586, + "step": 4022500 + }, + { + "epoch": 44.26, + "learning_rate": 1.4350349304142143e-08, + "loss": 3.7493, + "step": 4023000 + }, + { + "epoch": 44.27, + "learning_rate": 1.4336597172561746e-08, + "loss": 3.7632, + "step": 4023500 + }, + { + "epoch": 44.27, + "learning_rate": 1.4322845040981352e-08, + "loss": 3.739, + "step": 4024000 + }, + { + "epoch": 44.28, + "learning_rate": 1.4309092909400958e-08, + "loss": 3.7355, + "step": 4024500 + }, + { + "epoch": 44.28, + "learning_rate": 1.429534077782056e-08, + "loss": 3.7567, + "step": 4025000 + }, + { + "epoch": 44.29, + "learning_rate": 1.4281588646240167e-08, + "loss": 3.7484, + "step": 4025500 + }, + { + "epoch": 44.29, + "learning_rate": 1.4267836514659773e-08, + "loss": 3.753, + "step": 4026000 + }, + { + "epoch": 44.3, + "learning_rate": 1.4254084383079376e-08, + "loss": 3.73, + "step": 4026500 + }, + { + "epoch": 44.3, + "learning_rate": 1.4240332251498982e-08, + "loss": 3.7349, + "step": 4027000 + }, + { + "epoch": 44.31, + "learning_rate": 1.4226580119918588e-08, + "loss": 3.7177, + "step": 4027500 + }, + { + "epoch": 44.31, + "learning_rate": 1.4212827988338192e-08, + "loss": 3.7518, + "step": 4028000 + }, + { + "epoch": 44.32, + "learning_rate": 1.4199075856757798e-08, + "loss": 3.7563, + "step": 4028500 + }, + { + "epoch": 44.33, + "learning_rate": 1.41853237251774e-08, + "loss": 3.7317, + "step": 4029000 + }, + { + "epoch": 44.33, + "learning_rate": 1.4171571593597007e-08, + "loss": 3.7533, + "step": 4029500 + }, + { + "epoch": 44.34, + "learning_rate": 1.4157819462016613e-08, + "loss": 3.7477, + "step": 4030000 + }, + { + "epoch": 44.34, + "learning_rate": 1.4144067330436216e-08, + "loss": 3.7545, + "step": 4030500 + }, + { + "epoch": 44.35, + "learning_rate": 1.4130315198855822e-08, + "loss": 3.7682, + "step": 4031000 + }, + { + "epoch": 44.35, + "learning_rate": 1.4116563067275428e-08, + "loss": 3.7334, + "step": 4031500 + }, + { + "epoch": 44.36, + "learning_rate": 1.410281093569503e-08, + "loss": 3.7458, + "step": 4032000 + }, + { + "epoch": 44.36, + "learning_rate": 1.4089058804114636e-08, + "loss": 3.7319, + "step": 4032500 + }, + { + "epoch": 44.37, + "learning_rate": 1.4075306672534242e-08, + "loss": 3.7357, + "step": 4033000 + }, + { + "epoch": 44.38, + "learning_rate": 1.4061554540953847e-08, + "loss": 3.7435, + "step": 4033500 + }, + { + "epoch": 44.38, + "learning_rate": 1.4047802409373453e-08, + "loss": 3.7371, + "step": 4034000 + }, + { + "epoch": 44.39, + "learning_rate": 1.4034050277793057e-08, + "loss": 3.7445, + "step": 4034500 + }, + { + "epoch": 44.39, + "learning_rate": 1.4020298146212662e-08, + "loss": 3.7634, + "step": 4035000 + }, + { + "epoch": 44.4, + "learning_rate": 1.4006546014632268e-08, + "loss": 3.7366, + "step": 4035500 + }, + { + "epoch": 44.4, + "learning_rate": 1.3992793883051874e-08, + "loss": 3.7398, + "step": 4036000 + }, + { + "epoch": 44.41, + "learning_rate": 1.3979041751471476e-08, + "loss": 3.7416, + "step": 4036500 + }, + { + "epoch": 44.41, + "learning_rate": 1.3965289619891082e-08, + "loss": 3.7362, + "step": 4037000 + }, + { + "epoch": 44.42, + "learning_rate": 1.3951537488310689e-08, + "loss": 3.74, + "step": 4037500 + }, + { + "epoch": 44.42, + "learning_rate": 1.3937785356730291e-08, + "loss": 3.7598, + "step": 4038000 + }, + { + "epoch": 44.43, + "learning_rate": 1.3924033225149897e-08, + "loss": 3.7672, + "step": 4038500 + }, + { + "epoch": 44.44, + "learning_rate": 1.3910281093569503e-08, + "loss": 3.7646, + "step": 4039000 + }, + { + "epoch": 44.44, + "learning_rate": 1.3896528961989106e-08, + "loss": 3.7472, + "step": 4039500 + }, + { + "epoch": 44.45, + "learning_rate": 1.3882776830408712e-08, + "loss": 3.7386, + "step": 4040000 + }, + { + "epoch": 44.45, + "learning_rate": 1.3869024698828318e-08, + "loss": 3.7505, + "step": 4040500 + }, + { + "epoch": 44.46, + "learning_rate": 1.3855272567247922e-08, + "loss": 3.7538, + "step": 4041000 + }, + { + "epoch": 44.46, + "learning_rate": 1.3841520435667529e-08, + "loss": 3.7566, + "step": 4041500 + }, + { + "epoch": 44.47, + "learning_rate": 1.3827768304087135e-08, + "loss": 3.7437, + "step": 4042000 + }, + { + "epoch": 44.47, + "learning_rate": 1.3814016172506737e-08, + "loss": 3.7429, + "step": 4042500 + }, + { + "epoch": 44.48, + "learning_rate": 1.3800264040926343e-08, + "loss": 3.7456, + "step": 4043000 + }, + { + "epoch": 44.49, + "learning_rate": 1.378651190934595e-08, + "loss": 3.7457, + "step": 4043500 + }, + { + "epoch": 44.49, + "learning_rate": 1.3772759777765552e-08, + "loss": 3.7341, + "step": 4044000 + }, + { + "epoch": 44.5, + "learning_rate": 1.3759007646185158e-08, + "loss": 3.7386, + "step": 4044500 + }, + { + "epoch": 44.5, + "learning_rate": 1.3745255514604764e-08, + "loss": 3.7658, + "step": 4045000 + }, + { + "epoch": 44.51, + "learning_rate": 1.3731503383024367e-08, + "loss": 3.7536, + "step": 4045500 + }, + { + "epoch": 44.51, + "learning_rate": 1.3717751251443973e-08, + "loss": 3.7437, + "step": 4046000 + }, + { + "epoch": 44.52, + "learning_rate": 1.3703999119863579e-08, + "loss": 3.7592, + "step": 4046500 + }, + { + "epoch": 44.52, + "learning_rate": 1.3690246988283183e-08, + "loss": 3.7724, + "step": 4047000 + }, + { + "epoch": 44.53, + "learning_rate": 1.367649485670279e-08, + "loss": 3.7576, + "step": 4047500 + }, + { + "epoch": 44.53, + "learning_rate": 1.3662742725122392e-08, + "loss": 3.7467, + "step": 4048000 + }, + { + "epoch": 44.54, + "learning_rate": 1.3648990593541998e-08, + "loss": 3.757, + "step": 4048500 + }, + { + "epoch": 44.55, + "learning_rate": 1.3635238461961604e-08, + "loss": 3.7465, + "step": 4049000 + }, + { + "epoch": 44.55, + "learning_rate": 1.3621486330381207e-08, + "loss": 3.7563, + "step": 4049500 + }, + { + "epoch": 44.56, + "learning_rate": 1.3607734198800813e-08, + "loss": 3.7482, + "step": 4050000 + }, + { + "epoch": 44.56, + "learning_rate": 1.3593982067220419e-08, + "loss": 3.7639, + "step": 4050500 + }, + { + "epoch": 44.57, + "learning_rate": 1.3580229935640022e-08, + "loss": 3.7585, + "step": 4051000 + }, + { + "epoch": 44.57, + "learning_rate": 1.3566477804059628e-08, + "loss": 3.7556, + "step": 4051500 + }, + { + "epoch": 44.58, + "learning_rate": 1.3552725672479234e-08, + "loss": 3.7473, + "step": 4052000 + }, + { + "epoch": 44.58, + "learning_rate": 1.3538973540898838e-08, + "loss": 3.7473, + "step": 4052500 + }, + { + "epoch": 44.59, + "learning_rate": 1.3525221409318444e-08, + "loss": 3.77, + "step": 4053000 + }, + { + "epoch": 44.6, + "learning_rate": 1.351146927773805e-08, + "loss": 3.772, + "step": 4053500 + }, + { + "epoch": 44.6, + "learning_rate": 1.3497717146157653e-08, + "loss": 3.7494, + "step": 4054000 + }, + { + "epoch": 44.61, + "learning_rate": 1.3483965014577259e-08, + "loss": 3.7489, + "step": 4054500 + }, + { + "epoch": 44.61, + "learning_rate": 1.3470212882996865e-08, + "loss": 3.747, + "step": 4055000 + }, + { + "epoch": 44.62, + "learning_rate": 1.3456460751416468e-08, + "loss": 3.7518, + "step": 4055500 + }, + { + "epoch": 44.62, + "learning_rate": 1.3442708619836074e-08, + "loss": 3.734, + "step": 4056000 + }, + { + "epoch": 44.63, + "learning_rate": 1.342895648825568e-08, + "loss": 3.7331, + "step": 4056500 + }, + { + "epoch": 44.63, + "learning_rate": 1.3415204356675283e-08, + "loss": 3.7727, + "step": 4057000 + }, + { + "epoch": 44.64, + "learning_rate": 1.3401452225094889e-08, + "loss": 3.7437, + "step": 4057500 + }, + { + "epoch": 44.64, + "learning_rate": 1.3387700093514495e-08, + "loss": 3.7533, + "step": 4058000 + }, + { + "epoch": 44.65, + "learning_rate": 1.3373947961934099e-08, + "loss": 3.763, + "step": 4058500 + }, + { + "epoch": 44.66, + "learning_rate": 1.3360195830353705e-08, + "loss": 3.7385, + "step": 4059000 + }, + { + "epoch": 44.66, + "learning_rate": 1.3346443698773311e-08, + "loss": 3.7564, + "step": 4059500 + }, + { + "epoch": 44.67, + "learning_rate": 1.3332691567192914e-08, + "loss": 3.7568, + "step": 4060000 + }, + { + "epoch": 44.67, + "learning_rate": 1.331893943561252e-08, + "loss": 3.7612, + "step": 4060500 + }, + { + "epoch": 44.68, + "learning_rate": 1.3305187304032126e-08, + "loss": 3.7619, + "step": 4061000 + }, + { + "epoch": 44.68, + "learning_rate": 1.3291435172451729e-08, + "loss": 3.7384, + "step": 4061500 + }, + { + "epoch": 44.69, + "learning_rate": 1.3277683040871335e-08, + "loss": 3.7491, + "step": 4062000 + }, + { + "epoch": 44.69, + "learning_rate": 1.326393090929094e-08, + "loss": 3.7497, + "step": 4062500 + }, + { + "epoch": 44.7, + "learning_rate": 1.3250178777710543e-08, + "loss": 3.7484, + "step": 4063000 + }, + { + "epoch": 44.71, + "learning_rate": 1.323642664613015e-08, + "loss": 3.756, + "step": 4063500 + }, + { + "epoch": 44.71, + "learning_rate": 1.3222674514549755e-08, + "loss": 3.7705, + "step": 4064000 + }, + { + "epoch": 44.72, + "learning_rate": 1.320892238296936e-08, + "loss": 3.7411, + "step": 4064500 + }, + { + "epoch": 44.72, + "learning_rate": 1.3195170251388966e-08, + "loss": 3.7412, + "step": 4065000 + }, + { + "epoch": 44.73, + "learning_rate": 1.3181418119808572e-08, + "loss": 3.7668, + "step": 4065500 + }, + { + "epoch": 44.73, + "learning_rate": 1.3167665988228175e-08, + "loss": 3.7734, + "step": 4066000 + }, + { + "epoch": 44.74, + "learning_rate": 1.315391385664778e-08, + "loss": 3.7539, + "step": 4066500 + }, + { + "epoch": 44.74, + "learning_rate": 1.3140161725067383e-08, + "loss": 3.7549, + "step": 4067000 + }, + { + "epoch": 44.75, + "learning_rate": 1.312640959348699e-08, + "loss": 3.7589, + "step": 4067500 + }, + { + "epoch": 44.75, + "learning_rate": 1.3112657461906595e-08, + "loss": 3.738, + "step": 4068000 + }, + { + "epoch": 44.76, + "learning_rate": 1.3098905330326198e-08, + "loss": 3.7565, + "step": 4068500 + }, + { + "epoch": 44.77, + "learning_rate": 1.3085153198745804e-08, + "loss": 3.7483, + "step": 4069000 + }, + { + "epoch": 44.77, + "learning_rate": 1.307140106716541e-08, + "loss": 3.7539, + "step": 4069500 + }, + { + "epoch": 44.78, + "learning_rate": 1.3057648935585015e-08, + "loss": 3.7416, + "step": 4070000 + }, + { + "epoch": 44.78, + "learning_rate": 1.304389680400462e-08, + "loss": 3.7318, + "step": 4070500 + }, + { + "epoch": 44.79, + "learning_rate": 1.3030144672424227e-08, + "loss": 3.7419, + "step": 4071000 + }, + { + "epoch": 44.79, + "learning_rate": 1.301639254084383e-08, + "loss": 3.7648, + "step": 4071500 + }, + { + "epoch": 44.8, + "learning_rate": 1.3002640409263435e-08, + "loss": 3.7393, + "step": 4072000 + }, + { + "epoch": 44.8, + "learning_rate": 1.2988888277683041e-08, + "loss": 3.747, + "step": 4072500 + }, + { + "epoch": 44.81, + "learning_rate": 1.2975136146102644e-08, + "loss": 3.7453, + "step": 4073000 + }, + { + "epoch": 44.82, + "learning_rate": 1.296138401452225e-08, + "loss": 3.7538, + "step": 4073500 + }, + { + "epoch": 44.82, + "learning_rate": 1.2947631882941856e-08, + "loss": 3.7526, + "step": 4074000 + }, + { + "epoch": 44.83, + "learning_rate": 1.2933879751361459e-08, + "loss": 3.7563, + "step": 4074500 + }, + { + "epoch": 44.83, + "learning_rate": 1.2920127619781065e-08, + "loss": 3.769, + "step": 4075000 + }, + { + "epoch": 44.84, + "learning_rate": 1.2906375488200671e-08, + "loss": 3.764, + "step": 4075500 + }, + { + "epoch": 44.84, + "learning_rate": 1.2892623356620275e-08, + "loss": 3.7602, + "step": 4076000 + }, + { + "epoch": 44.85, + "learning_rate": 1.2878871225039881e-08, + "loss": 3.756, + "step": 4076500 + }, + { + "epoch": 44.85, + "learning_rate": 1.2865119093459486e-08, + "loss": 3.7634, + "step": 4077000 + }, + { + "epoch": 44.86, + "learning_rate": 1.285136696187909e-08, + "loss": 3.7428, + "step": 4077500 + }, + { + "epoch": 44.86, + "learning_rate": 1.2837614830298696e-08, + "loss": 3.7492, + "step": 4078000 + }, + { + "epoch": 44.87, + "learning_rate": 1.2823862698718302e-08, + "loss": 3.7425, + "step": 4078500 + }, + { + "epoch": 44.88, + "learning_rate": 1.2810110567137905e-08, + "loss": 3.7259, + "step": 4079000 + }, + { + "epoch": 44.88, + "learning_rate": 1.2796358435557511e-08, + "loss": 3.7515, + "step": 4079500 + }, + { + "epoch": 44.89, + "learning_rate": 1.2782606303977117e-08, + "loss": 3.7533, + "step": 4080000 + }, + { + "epoch": 44.89, + "learning_rate": 1.276885417239672e-08, + "loss": 3.7605, + "step": 4080500 + }, + { + "epoch": 44.9, + "learning_rate": 1.2755102040816326e-08, + "loss": 3.735, + "step": 4081000 + }, + { + "epoch": 44.9, + "learning_rate": 1.2741349909235932e-08, + "loss": 3.7485, + "step": 4081500 + }, + { + "epoch": 44.91, + "learning_rate": 1.2727597777655535e-08, + "loss": 3.768, + "step": 4082000 + }, + { + "epoch": 44.91, + "learning_rate": 1.271384564607514e-08, + "loss": 3.7322, + "step": 4082500 + }, + { + "epoch": 44.92, + "learning_rate": 1.2700093514494747e-08, + "loss": 3.7676, + "step": 4083000 + }, + { + "epoch": 44.93, + "learning_rate": 1.2686341382914351e-08, + "loss": 3.7629, + "step": 4083500 + }, + { + "epoch": 44.93, + "learning_rate": 1.2672589251333957e-08, + "loss": 3.7592, + "step": 4084000 + }, + { + "epoch": 44.94, + "learning_rate": 1.2658837119753563e-08, + "loss": 3.7399, + "step": 4084500 + }, + { + "epoch": 44.94, + "learning_rate": 1.2645084988173166e-08, + "loss": 3.7557, + "step": 4085000 + }, + { + "epoch": 44.95, + "learning_rate": 1.2631332856592772e-08, + "loss": 3.7324, + "step": 4085500 + }, + { + "epoch": 44.95, + "learning_rate": 1.2617580725012375e-08, + "loss": 3.7374, + "step": 4086000 + }, + { + "epoch": 44.96, + "learning_rate": 1.260382859343198e-08, + "loss": 3.7437, + "step": 4086500 + }, + { + "epoch": 44.96, + "learning_rate": 1.2590076461851587e-08, + "loss": 3.7497, + "step": 4087000 + }, + { + "epoch": 44.97, + "learning_rate": 1.257632433027119e-08, + "loss": 3.7312, + "step": 4087500 + }, + { + "epoch": 44.97, + "learning_rate": 1.2562572198690795e-08, + "loss": 3.7563, + "step": 4088000 + }, + { + "epoch": 44.98, + "learning_rate": 1.2548820067110401e-08, + "loss": 3.739, + "step": 4088500 + }, + { + "epoch": 44.99, + "learning_rate": 1.2535067935530006e-08, + "loss": 3.7433, + "step": 4089000 + }, + { + "epoch": 44.99, + "learning_rate": 1.2521315803949612e-08, + "loss": 3.7447, + "step": 4089500 + }, + { + "epoch": 45.0, + "learning_rate": 1.2507563672369218e-08, + "loss": 3.7595, + "step": 4090000 + }, + { + "epoch": 45.0, + "eval_loss": 3.8258707523345947, + "eval_runtime": 6.1512, + "eval_samples_per_second": 252.635, + "step": 4090275 + }, + { + "epoch": 45.0, + "learning_rate": 1.2493811540788822e-08, + "loss": 3.7496, + "step": 4090500 + }, + { + "epoch": 45.01, + "learning_rate": 1.2480059409208427e-08, + "loss": 3.7437, + "step": 4091000 + }, + { + "epoch": 45.01, + "learning_rate": 1.2466307277628031e-08, + "loss": 3.764, + "step": 4091500 + }, + { + "epoch": 45.02, + "learning_rate": 1.2452555146047637e-08, + "loss": 3.7545, + "step": 4092000 + }, + { + "epoch": 45.02, + "learning_rate": 1.2438803014467241e-08, + "loss": 3.7465, + "step": 4092500 + }, + { + "epoch": 45.03, + "learning_rate": 1.2425050882886846e-08, + "loss": 3.7462, + "step": 4093000 + }, + { + "epoch": 45.04, + "learning_rate": 1.2411298751306452e-08, + "loss": 3.7541, + "step": 4093500 + }, + { + "epoch": 45.04, + "learning_rate": 1.2397546619726056e-08, + "loss": 3.7485, + "step": 4094000 + }, + { + "epoch": 45.05, + "learning_rate": 1.2383794488145662e-08, + "loss": 3.7406, + "step": 4094500 + }, + { + "epoch": 45.05, + "learning_rate": 1.2370042356565268e-08, + "loss": 3.7352, + "step": 4095000 + }, + { + "epoch": 45.06, + "learning_rate": 1.2356290224984873e-08, + "loss": 3.7515, + "step": 4095500 + }, + { + "epoch": 45.06, + "learning_rate": 1.2342538093404477e-08, + "loss": 3.7522, + "step": 4096000 + }, + { + "epoch": 45.07, + "learning_rate": 1.2328785961824083e-08, + "loss": 3.7386, + "step": 4096500 + }, + { + "epoch": 45.07, + "learning_rate": 1.2315033830243688e-08, + "loss": 3.7199, + "step": 4097000 + }, + { + "epoch": 45.08, + "learning_rate": 1.2301281698663292e-08, + "loss": 3.7305, + "step": 4097500 + }, + { + "epoch": 45.08, + "learning_rate": 1.2287529567082898e-08, + "loss": 3.7574, + "step": 4098000 + }, + { + "epoch": 45.09, + "learning_rate": 1.2273777435502502e-08, + "loss": 3.7462, + "step": 4098500 + }, + { + "epoch": 45.1, + "learning_rate": 1.2260025303922107e-08, + "loss": 3.7577, + "step": 4099000 + }, + { + "epoch": 45.1, + "learning_rate": 1.2246273172341711e-08, + "loss": 3.7398, + "step": 4099500 + }, + { + "epoch": 45.11, + "learning_rate": 1.2232521040761317e-08, + "loss": 3.7538, + "step": 4100000 + }, + { + "epoch": 45.11, + "learning_rate": 1.2218768909180923e-08, + "loss": 3.7667, + "step": 4100500 + }, + { + "epoch": 45.12, + "learning_rate": 1.2205016777600528e-08, + "loss": 3.7487, + "step": 4101000 + }, + { + "epoch": 45.12, + "learning_rate": 1.2191264646020134e-08, + "loss": 3.7554, + "step": 4101500 + }, + { + "epoch": 45.13, + "learning_rate": 1.2177512514439738e-08, + "loss": 3.7424, + "step": 4102000 + }, + { + "epoch": 45.13, + "learning_rate": 1.2163760382859342e-08, + "loss": 3.7491, + "step": 4102500 + }, + { + "epoch": 45.14, + "learning_rate": 1.2150008251278948e-08, + "loss": 3.7392, + "step": 4103000 + }, + { + "epoch": 45.15, + "learning_rate": 1.2136256119698553e-08, + "loss": 3.7595, + "step": 4103500 + }, + { + "epoch": 45.15, + "learning_rate": 1.2122503988118157e-08, + "loss": 3.7527, + "step": 4104000 + }, + { + "epoch": 45.16, + "learning_rate": 1.2108751856537763e-08, + "loss": 3.7458, + "step": 4104500 + }, + { + "epoch": 45.16, + "learning_rate": 1.2094999724957368e-08, + "loss": 3.7662, + "step": 4105000 + }, + { + "epoch": 45.17, + "learning_rate": 1.2081247593376972e-08, + "loss": 3.7586, + "step": 4105500 + }, + { + "epoch": 45.17, + "learning_rate": 1.2067495461796578e-08, + "loss": 3.7464, + "step": 4106000 + }, + { + "epoch": 45.18, + "learning_rate": 1.2053743330216182e-08, + "loss": 3.7525, + "step": 4106500 + }, + { + "epoch": 45.18, + "learning_rate": 1.2039991198635788e-08, + "loss": 3.7604, + "step": 4107000 + }, + { + "epoch": 45.19, + "learning_rate": 1.2026239067055394e-08, + "loss": 3.7511, + "step": 4107500 + }, + { + "epoch": 45.2, + "learning_rate": 1.2012486935474999e-08, + "loss": 3.7492, + "step": 4108000 + }, + { + "epoch": 45.2, + "learning_rate": 1.1998734803894603e-08, + "loss": 3.7479, + "step": 4108500 + }, + { + "epoch": 45.21, + "learning_rate": 1.1984982672314208e-08, + "loss": 3.7428, + "step": 4109000 + }, + { + "epoch": 45.21, + "learning_rate": 1.1971230540733814e-08, + "loss": 3.75, + "step": 4109500 + }, + { + "epoch": 45.22, + "learning_rate": 1.1957478409153418e-08, + "loss": 3.7434, + "step": 4110000 + }, + { + "epoch": 45.22, + "learning_rate": 1.1943726277573022e-08, + "loss": 3.7396, + "step": 4110500 + }, + { + "epoch": 45.23, + "learning_rate": 1.1929974145992628e-08, + "loss": 3.7595, + "step": 4111000 + }, + { + "epoch": 45.23, + "learning_rate": 1.1916222014412233e-08, + "loss": 3.74, + "step": 4111500 + }, + { + "epoch": 45.24, + "learning_rate": 1.1902469882831837e-08, + "loss": 3.7367, + "step": 4112000 + }, + { + "epoch": 45.24, + "learning_rate": 1.1888717751251443e-08, + "loss": 3.7594, + "step": 4112500 + }, + { + "epoch": 45.25, + "learning_rate": 1.187496561967105e-08, + "loss": 3.7456, + "step": 4113000 + }, + { + "epoch": 45.26, + "learning_rate": 1.1861213488090654e-08, + "loss": 3.7516, + "step": 4113500 + }, + { + "epoch": 45.26, + "learning_rate": 1.184746135651026e-08, + "loss": 3.7299, + "step": 4114000 + }, + { + "epoch": 45.27, + "learning_rate": 1.1833709224929864e-08, + "loss": 3.7423, + "step": 4114500 + }, + { + "epoch": 45.27, + "learning_rate": 1.1819957093349468e-08, + "loss": 3.7479, + "step": 4115000 + }, + { + "epoch": 45.28, + "learning_rate": 1.1806204961769074e-08, + "loss": 3.7647, + "step": 4115500 + }, + { + "epoch": 45.28, + "learning_rate": 1.1792452830188679e-08, + "loss": 3.748, + "step": 4116000 + }, + { + "epoch": 45.29, + "learning_rate": 1.1778700698608283e-08, + "loss": 3.762, + "step": 4116500 + }, + { + "epoch": 45.29, + "learning_rate": 1.176494856702789e-08, + "loss": 3.745, + "step": 4117000 + }, + { + "epoch": 45.3, + "learning_rate": 1.1751196435447494e-08, + "loss": 3.7647, + "step": 4117500 + }, + { + "epoch": 45.31, + "learning_rate": 1.1737444303867098e-08, + "loss": 3.7378, + "step": 4118000 + }, + { + "epoch": 45.31, + "learning_rate": 1.1723692172286704e-08, + "loss": 3.7709, + "step": 4118500 + }, + { + "epoch": 45.32, + "learning_rate": 1.170994004070631e-08, + "loss": 3.7667, + "step": 4119000 + }, + { + "epoch": 45.32, + "learning_rate": 1.1696187909125914e-08, + "loss": 3.7512, + "step": 4119500 + }, + { + "epoch": 45.33, + "learning_rate": 1.1682435777545519e-08, + "loss": 3.7629, + "step": 4120000 + }, + { + "epoch": 45.33, + "learning_rate": 1.1668683645965125e-08, + "loss": 3.7562, + "step": 4120500 + }, + { + "epoch": 45.34, + "learning_rate": 1.165493151438473e-08, + "loss": 3.754, + "step": 4121000 + }, + { + "epoch": 45.34, + "learning_rate": 1.1641179382804334e-08, + "loss": 3.745, + "step": 4121500 + }, + { + "epoch": 45.35, + "learning_rate": 1.162742725122394e-08, + "loss": 3.7402, + "step": 4122000 + }, + { + "epoch": 45.35, + "learning_rate": 1.1613675119643544e-08, + "loss": 3.7519, + "step": 4122500 + }, + { + "epoch": 45.36, + "learning_rate": 1.1599922988063148e-08, + "loss": 3.7237, + "step": 4123000 + }, + { + "epoch": 45.37, + "learning_rate": 1.1586170856482754e-08, + "loss": 3.7442, + "step": 4123500 + }, + { + "epoch": 45.37, + "learning_rate": 1.1572418724902359e-08, + "loss": 3.7638, + "step": 4124000 + }, + { + "epoch": 45.38, + "learning_rate": 1.1558666593321965e-08, + "loss": 3.7561, + "step": 4124500 + }, + { + "epoch": 45.38, + "learning_rate": 1.154491446174157e-08, + "loss": 3.7455, + "step": 4125000 + }, + { + "epoch": 45.39, + "learning_rate": 1.1531162330161175e-08, + "loss": 3.7481, + "step": 4125500 + }, + { + "epoch": 45.39, + "learning_rate": 1.151741019858078e-08, + "loss": 3.7645, + "step": 4126000 + }, + { + "epoch": 45.4, + "learning_rate": 1.1503658067000386e-08, + "loss": 3.7539, + "step": 4126500 + }, + { + "epoch": 45.4, + "learning_rate": 1.148990593541999e-08, + "loss": 3.7369, + "step": 4127000 + }, + { + "epoch": 45.41, + "learning_rate": 1.1476153803839594e-08, + "loss": 3.7523, + "step": 4127500 + }, + { + "epoch": 45.42, + "learning_rate": 1.1462401672259199e-08, + "loss": 3.7414, + "step": 4128000 + }, + { + "epoch": 45.42, + "learning_rate": 1.1448649540678805e-08, + "loss": 3.7732, + "step": 4128500 + }, + { + "epoch": 45.43, + "learning_rate": 1.143489740909841e-08, + "loss": 3.7408, + "step": 4129000 + }, + { + "epoch": 45.43, + "learning_rate": 1.1421145277518014e-08, + "loss": 3.7439, + "step": 4129500 + }, + { + "epoch": 45.44, + "learning_rate": 1.140739314593762e-08, + "loss": 3.7622, + "step": 4130000 + }, + { + "epoch": 45.44, + "learning_rate": 1.1393641014357224e-08, + "loss": 3.7326, + "step": 4130500 + }, + { + "epoch": 45.45, + "learning_rate": 1.137988888277683e-08, + "loss": 3.7452, + "step": 4131000 + }, + { + "epoch": 45.45, + "learning_rate": 1.1366136751196436e-08, + "loss": 3.7491, + "step": 4131500 + }, + { + "epoch": 45.46, + "learning_rate": 1.135238461961604e-08, + "loss": 3.742, + "step": 4132000 + }, + { + "epoch": 45.46, + "learning_rate": 1.1338632488035645e-08, + "loss": 3.7452, + "step": 4132500 + }, + { + "epoch": 45.47, + "learning_rate": 1.1324880356455251e-08, + "loss": 3.7576, + "step": 4133000 + }, + { + "epoch": 45.48, + "learning_rate": 1.1311128224874855e-08, + "loss": 3.7506, + "step": 4133500 + }, + { + "epoch": 45.48, + "learning_rate": 1.129737609329446e-08, + "loss": 3.7465, + "step": 4134000 + }, + { + "epoch": 45.49, + "learning_rate": 1.1283623961714066e-08, + "loss": 3.7637, + "step": 4134500 + }, + { + "epoch": 45.49, + "learning_rate": 1.126987183013367e-08, + "loss": 3.7578, + "step": 4135000 + }, + { + "epoch": 45.5, + "learning_rate": 1.1256119698553274e-08, + "loss": 3.7442, + "step": 4135500 + }, + { + "epoch": 45.5, + "learning_rate": 1.124236756697288e-08, + "loss": 3.7477, + "step": 4136000 + }, + { + "epoch": 45.51, + "learning_rate": 1.1228615435392485e-08, + "loss": 3.7518, + "step": 4136500 + }, + { + "epoch": 45.51, + "learning_rate": 1.1214863303812091e-08, + "loss": 3.7661, + "step": 4137000 + }, + { + "epoch": 45.52, + "learning_rate": 1.1201111172231695e-08, + "loss": 3.7731, + "step": 4137500 + }, + { + "epoch": 45.53, + "learning_rate": 1.1187359040651301e-08, + "loss": 3.7595, + "step": 4138000 + }, + { + "epoch": 45.53, + "learning_rate": 1.1173606909070906e-08, + "loss": 3.7497, + "step": 4138500 + }, + { + "epoch": 45.54, + "learning_rate": 1.115985477749051e-08, + "loss": 3.7492, + "step": 4139000 + }, + { + "epoch": 45.54, + "learning_rate": 1.1146102645910116e-08, + "loss": 3.743, + "step": 4139500 + }, + { + "epoch": 45.55, + "learning_rate": 1.113235051432972e-08, + "loss": 3.7207, + "step": 4140000 + }, + { + "epoch": 45.55, + "learning_rate": 1.1118598382749325e-08, + "loss": 3.7361, + "step": 4140500 + }, + { + "epoch": 45.56, + "learning_rate": 1.1104846251168931e-08, + "loss": 3.7357, + "step": 4141000 + }, + { + "epoch": 45.56, + "learning_rate": 1.1091094119588535e-08, + "loss": 3.7283, + "step": 4141500 + }, + { + "epoch": 45.57, + "learning_rate": 1.107734198800814e-08, + "loss": 3.7411, + "step": 4142000 + }, + { + "epoch": 45.57, + "learning_rate": 1.1063589856427746e-08, + "loss": 3.7527, + "step": 4142500 + }, + { + "epoch": 45.58, + "learning_rate": 1.1049837724847352e-08, + "loss": 3.742, + "step": 4143000 + }, + { + "epoch": 45.59, + "learning_rate": 1.1036085593266956e-08, + "loss": 3.7526, + "step": 4143500 + }, + { + "epoch": 45.59, + "learning_rate": 1.1022333461686562e-08, + "loss": 3.7642, + "step": 4144000 + }, + { + "epoch": 45.6, + "learning_rate": 1.1008581330106167e-08, + "loss": 3.7469, + "step": 4144500 + }, + { + "epoch": 45.6, + "learning_rate": 1.0994829198525771e-08, + "loss": 3.7305, + "step": 4145000 + }, + { + "epoch": 45.61, + "learning_rate": 1.0981077066945377e-08, + "loss": 3.752, + "step": 4145500 + }, + { + "epoch": 45.61, + "learning_rate": 1.0967324935364981e-08, + "loss": 3.7516, + "step": 4146000 + }, + { + "epoch": 45.62, + "learning_rate": 1.0953572803784586e-08, + "loss": 3.7748, + "step": 4146500 + }, + { + "epoch": 45.62, + "learning_rate": 1.093982067220419e-08, + "loss": 3.7414, + "step": 4147000 + }, + { + "epoch": 45.63, + "learning_rate": 1.0926068540623796e-08, + "loss": 3.7317, + "step": 4147500 + }, + { + "epoch": 45.64, + "learning_rate": 1.09123164090434e-08, + "loss": 3.7484, + "step": 4148000 + }, + { + "epoch": 45.64, + "learning_rate": 1.0898564277463005e-08, + "loss": 3.7328, + "step": 4148500 + }, + { + "epoch": 45.65, + "learning_rate": 1.0884812145882611e-08, + "loss": 3.7496, + "step": 4149000 + }, + { + "epoch": 45.65, + "learning_rate": 1.0871060014302217e-08, + "loss": 3.7617, + "step": 4149500 + }, + { + "epoch": 45.66, + "learning_rate": 1.0857307882721821e-08, + "loss": 3.7435, + "step": 4150000 + }, + { + "epoch": 45.66, + "learning_rate": 1.0843555751141427e-08, + "loss": 3.7438, + "step": 4150500 + }, + { + "epoch": 45.67, + "learning_rate": 1.0829803619561032e-08, + "loss": 3.7632, + "step": 4151000 + }, + { + "epoch": 45.67, + "learning_rate": 1.0816051487980636e-08, + "loss": 3.7296, + "step": 4151500 + }, + { + "epoch": 45.68, + "learning_rate": 1.0802299356400242e-08, + "loss": 3.7491, + "step": 4152000 + }, + { + "epoch": 45.68, + "learning_rate": 1.0788547224819847e-08, + "loss": 3.7551, + "step": 4152500 + }, + { + "epoch": 45.69, + "learning_rate": 1.0774795093239451e-08, + "loss": 3.7589, + "step": 4153000 + }, + { + "epoch": 45.7, + "learning_rate": 1.0761042961659057e-08, + "loss": 3.7543, + "step": 4153500 + }, + { + "epoch": 45.7, + "learning_rate": 1.0747290830078661e-08, + "loss": 3.7544, + "step": 4154000 + }, + { + "epoch": 45.71, + "learning_rate": 1.0733538698498266e-08, + "loss": 3.7683, + "step": 4154500 + }, + { + "epoch": 45.71, + "learning_rate": 1.0719786566917872e-08, + "loss": 3.769, + "step": 4155000 + }, + { + "epoch": 45.72, + "learning_rate": 1.0706034435337478e-08, + "loss": 3.7353, + "step": 4155500 + }, + { + "epoch": 45.72, + "learning_rate": 1.0692282303757082e-08, + "loss": 3.7388, + "step": 4156000 + }, + { + "epoch": 45.73, + "learning_rate": 1.0678530172176687e-08, + "loss": 3.7373, + "step": 4156500 + }, + { + "epoch": 45.73, + "learning_rate": 1.0664778040596293e-08, + "loss": 3.744, + "step": 4157000 + }, + { + "epoch": 45.74, + "learning_rate": 1.0651025909015897e-08, + "loss": 3.7501, + "step": 4157500 + }, + { + "epoch": 45.75, + "learning_rate": 1.0637273777435501e-08, + "loss": 3.7413, + "step": 4158000 + }, + { + "epoch": 45.75, + "learning_rate": 1.0623521645855107e-08, + "loss": 3.7434, + "step": 4158500 + }, + { + "epoch": 45.76, + "learning_rate": 1.0609769514274712e-08, + "loss": 3.7508, + "step": 4159000 + }, + { + "epoch": 45.76, + "learning_rate": 1.0596017382694316e-08, + "loss": 3.7676, + "step": 4159500 + }, + { + "epoch": 45.77, + "learning_rate": 1.0582265251113922e-08, + "loss": 3.7535, + "step": 4160000 + }, + { + "epoch": 45.77, + "learning_rate": 1.0568513119533527e-08, + "loss": 3.7582, + "step": 4160500 + }, + { + "epoch": 45.78, + "learning_rate": 1.0554760987953133e-08, + "loss": 3.7277, + "step": 4161000 + }, + { + "epoch": 45.78, + "learning_rate": 1.0541008856372739e-08, + "loss": 3.7672, + "step": 4161500 + }, + { + "epoch": 45.79, + "learning_rate": 1.0527256724792343e-08, + "loss": 3.7419, + "step": 4162000 + }, + { + "epoch": 45.79, + "learning_rate": 1.0513504593211947e-08, + "loss": 3.7788, + "step": 4162500 + }, + { + "epoch": 45.8, + "learning_rate": 1.0499752461631553e-08, + "loss": 3.7495, + "step": 4163000 + }, + { + "epoch": 45.81, + "learning_rate": 1.0486000330051158e-08, + "loss": 3.7481, + "step": 4163500 + }, + { + "epoch": 45.81, + "learning_rate": 1.0472248198470762e-08, + "loss": 3.7394, + "step": 4164000 + }, + { + "epoch": 45.82, + "learning_rate": 1.0458496066890367e-08, + "loss": 3.7333, + "step": 4164500 + }, + { + "epoch": 45.82, + "learning_rate": 1.0444743935309973e-08, + "loss": 3.7548, + "step": 4165000 + }, + { + "epoch": 45.83, + "learning_rate": 1.0430991803729577e-08, + "loss": 3.7429, + "step": 4165500 + }, + { + "epoch": 45.83, + "learning_rate": 1.0417239672149181e-08, + "loss": 3.7766, + "step": 4166000 + }, + { + "epoch": 45.84, + "learning_rate": 1.0403487540568787e-08, + "loss": 3.7629, + "step": 4166500 + }, + { + "epoch": 45.84, + "learning_rate": 1.0389735408988393e-08, + "loss": 3.7372, + "step": 4167000 + }, + { + "epoch": 45.85, + "learning_rate": 1.0375983277407998e-08, + "loss": 3.7685, + "step": 4167500 + }, + { + "epoch": 45.86, + "learning_rate": 1.0362231145827604e-08, + "loss": 3.7408, + "step": 4168000 + }, + { + "epoch": 45.86, + "learning_rate": 1.0348479014247208e-08, + "loss": 3.756, + "step": 4168500 + }, + { + "epoch": 45.87, + "learning_rate": 1.0334726882666813e-08, + "loss": 3.758, + "step": 4169000 + }, + { + "epoch": 45.87, + "learning_rate": 1.0320974751086419e-08, + "loss": 3.7398, + "step": 4169500 + }, + { + "epoch": 45.88, + "learning_rate": 1.0307222619506023e-08, + "loss": 3.7536, + "step": 4170000 + }, + { + "epoch": 45.88, + "learning_rate": 1.0293470487925627e-08, + "loss": 3.7517, + "step": 4170500 + }, + { + "epoch": 45.89, + "learning_rate": 1.0279718356345233e-08, + "loss": 3.7419, + "step": 4171000 + }, + { + "epoch": 45.89, + "learning_rate": 1.0265966224764838e-08, + "loss": 3.7461, + "step": 4171500 + }, + { + "epoch": 45.9, + "learning_rate": 1.0252214093184442e-08, + "loss": 3.7427, + "step": 4172000 + }, + { + "epoch": 45.9, + "learning_rate": 1.0238461961604048e-08, + "loss": 3.7483, + "step": 4172500 + }, + { + "epoch": 45.91, + "learning_rate": 1.0224709830023653e-08, + "loss": 3.768, + "step": 4173000 + }, + { + "epoch": 45.92, + "learning_rate": 1.0210957698443259e-08, + "loss": 3.7678, + "step": 4173500 + }, + { + "epoch": 45.92, + "learning_rate": 1.0197205566862863e-08, + "loss": 3.768, + "step": 4174000 + }, + { + "epoch": 45.93, + "learning_rate": 1.0183453435282469e-08, + "loss": 3.7499, + "step": 4174500 + }, + { + "epoch": 45.93, + "learning_rate": 1.0169701303702073e-08, + "loss": 3.7557, + "step": 4175000 + }, + { + "epoch": 45.94, + "learning_rate": 1.0155949172121678e-08, + "loss": 3.7549, + "step": 4175500 + }, + { + "epoch": 45.94, + "learning_rate": 1.0142197040541284e-08, + "loss": 3.7557, + "step": 4176000 + }, + { + "epoch": 45.95, + "learning_rate": 1.0128444908960888e-08, + "loss": 3.7415, + "step": 4176500 + }, + { + "epoch": 45.95, + "learning_rate": 1.0114692777380493e-08, + "loss": 3.7557, + "step": 4177000 + }, + { + "epoch": 45.96, + "learning_rate": 1.0100940645800099e-08, + "loss": 3.7376, + "step": 4177500 + }, + { + "epoch": 45.97, + "learning_rate": 1.0087188514219703e-08, + "loss": 3.7696, + "step": 4178000 + }, + { + "epoch": 45.97, + "learning_rate": 1.0073436382639307e-08, + "loss": 3.7413, + "step": 4178500 + }, + { + "epoch": 45.98, + "learning_rate": 1.0059684251058913e-08, + "loss": 3.7494, + "step": 4179000 + }, + { + "epoch": 45.98, + "learning_rate": 1.004593211947852e-08, + "loss": 3.7677, + "step": 4179500 + }, + { + "epoch": 45.99, + "learning_rate": 1.0032179987898124e-08, + "loss": 3.7453, + "step": 4180000 + }, + { + "epoch": 45.99, + "learning_rate": 1.001842785631773e-08, + "loss": 3.7573, + "step": 4180500 + }, + { + "epoch": 46.0, + "learning_rate": 1.0004675724737334e-08, + "loss": 3.7583, + "step": 4181000 + }, + { + "epoch": 46.0, + "eval_loss": 3.825601577758789, + "eval_runtime": 6.1432, + "eval_samples_per_second": 252.961, + "step": 4181170 + }, + { + "epoch": 46.0, + "learning_rate": 9.990923593156939e-09, + "loss": 3.7583, + "step": 4181500 + }, + { + "epoch": 46.01, + "learning_rate": 9.977171461576545e-09, + "loss": 3.739, + "step": 4182000 + }, + { + "epoch": 46.01, + "learning_rate": 9.963419329996149e-09, + "loss": 3.7312, + "step": 4182500 + }, + { + "epoch": 46.02, + "learning_rate": 9.949667198415753e-09, + "loss": 3.7493, + "step": 4183000 + }, + { + "epoch": 46.03, + "learning_rate": 9.935915066835358e-09, + "loss": 3.7652, + "step": 4183500 + }, + { + "epoch": 46.03, + "learning_rate": 9.922162935254964e-09, + "loss": 3.7633, + "step": 4184000 + }, + { + "epoch": 46.04, + "learning_rate": 9.908410803674568e-09, + "loss": 3.7392, + "step": 4184500 + }, + { + "epoch": 46.04, + "learning_rate": 9.894658672094174e-09, + "loss": 3.7682, + "step": 4185000 + }, + { + "epoch": 46.05, + "learning_rate": 9.88090654051378e-09, + "loss": 3.7548, + "step": 4185500 + }, + { + "epoch": 46.05, + "learning_rate": 9.867154408933385e-09, + "loss": 3.7413, + "step": 4186000 + }, + { + "epoch": 46.06, + "learning_rate": 9.853402277352989e-09, + "loss": 3.75, + "step": 4186500 + }, + { + "epoch": 46.06, + "learning_rate": 9.839650145772595e-09, + "loss": 3.7331, + "step": 4187000 + }, + { + "epoch": 46.07, + "learning_rate": 9.8258980141922e-09, + "loss": 3.742, + "step": 4187500 + }, + { + "epoch": 46.08, + "learning_rate": 9.812145882611804e-09, + "loss": 3.7521, + "step": 4188000 + }, + { + "epoch": 46.08, + "learning_rate": 9.79839375103141e-09, + "loss": 3.7447, + "step": 4188500 + }, + { + "epoch": 46.09, + "learning_rate": 9.784641619451014e-09, + "loss": 3.7471, + "step": 4189000 + }, + { + "epoch": 46.09, + "learning_rate": 9.770889487870619e-09, + "loss": 3.7498, + "step": 4189500 + }, + { + "epoch": 46.1, + "learning_rate": 9.757137356290225e-09, + "loss": 3.7647, + "step": 4190000 + }, + { + "epoch": 46.1, + "learning_rate": 9.743385224709829e-09, + "loss": 3.7574, + "step": 4190500 + }, + { + "epoch": 46.11, + "learning_rate": 9.729633093129435e-09, + "loss": 3.7517, + "step": 4191000 + }, + { + "epoch": 46.11, + "learning_rate": 9.71588096154904e-09, + "loss": 3.7473, + "step": 4191500 + }, + { + "epoch": 46.12, + "learning_rate": 9.702128829968646e-09, + "loss": 3.7429, + "step": 4192000 + }, + { + "epoch": 46.12, + "learning_rate": 9.68837669838825e-09, + "loss": 3.7611, + "step": 4192500 + }, + { + "epoch": 46.13, + "learning_rate": 9.674624566807854e-09, + "loss": 3.7541, + "step": 4193000 + }, + { + "epoch": 46.14, + "learning_rate": 9.66087243522746e-09, + "loss": 3.7592, + "step": 4193500 + }, + { + "epoch": 46.14, + "learning_rate": 9.647120303647065e-09, + "loss": 3.739, + "step": 4194000 + }, + { + "epoch": 46.15, + "learning_rate": 9.633368172066669e-09, + "loss": 3.7411, + "step": 4194500 + }, + { + "epoch": 46.15, + "learning_rate": 9.619616040486275e-09, + "loss": 3.7699, + "step": 4195000 + }, + { + "epoch": 46.16, + "learning_rate": 9.60586390890588e-09, + "loss": 3.76, + "step": 4195500 + }, + { + "epoch": 46.16, + "learning_rate": 9.592111777325484e-09, + "loss": 3.7401, + "step": 4196000 + }, + { + "epoch": 46.17, + "learning_rate": 9.57835964574509e-09, + "loss": 3.7612, + "step": 4196500 + }, + { + "epoch": 46.17, + "learning_rate": 9.564607514164694e-09, + "loss": 3.7595, + "step": 4197000 + }, + { + "epoch": 46.18, + "learning_rate": 9.5508553825843e-09, + "loss": 3.733, + "step": 4197500 + }, + { + "epoch": 46.19, + "learning_rate": 9.537103251003906e-09, + "loss": 3.7472, + "step": 4198000 + }, + { + "epoch": 46.19, + "learning_rate": 9.52335111942351e-09, + "loss": 3.7534, + "step": 4198500 + }, + { + "epoch": 46.2, + "learning_rate": 9.509598987843115e-09, + "loss": 3.7588, + "step": 4199000 + }, + { + "epoch": 46.2, + "learning_rate": 9.495846856262721e-09, + "loss": 3.7485, + "step": 4199500 + }, + { + "epoch": 46.21, + "learning_rate": 9.482094724682326e-09, + "loss": 3.7636, + "step": 4200000 + }, + { + "epoch": 46.21, + "learning_rate": 9.46834259310193e-09, + "loss": 3.7609, + "step": 4200500 + }, + { + "epoch": 46.22, + "learning_rate": 9.454590461521536e-09, + "loss": 3.7327, + "step": 4201000 + }, + { + "epoch": 46.22, + "learning_rate": 9.44083832994114e-09, + "loss": 3.7614, + "step": 4201500 + }, + { + "epoch": 46.23, + "learning_rate": 9.427086198360745e-09, + "loss": 3.7491, + "step": 4202000 + }, + { + "epoch": 46.23, + "learning_rate": 9.413334066780349e-09, + "loss": 3.7485, + "step": 4202500 + }, + { + "epoch": 46.24, + "learning_rate": 9.399581935199955e-09, + "loss": 3.7531, + "step": 4203000 + }, + { + "epoch": 46.25, + "learning_rate": 9.385829803619561e-09, + "loss": 3.7483, + "step": 4203500 + }, + { + "epoch": 46.25, + "learning_rate": 9.372077672039166e-09, + "loss": 3.7455, + "step": 4204000 + }, + { + "epoch": 46.26, + "learning_rate": 9.358325540458772e-09, + "loss": 3.7611, + "step": 4204500 + }, + { + "epoch": 46.26, + "learning_rate": 9.344573408878376e-09, + "loss": 3.7396, + "step": 4205000 + }, + { + "epoch": 46.27, + "learning_rate": 9.33082127729798e-09, + "loss": 3.7714, + "step": 4205500 + }, + { + "epoch": 46.27, + "learning_rate": 9.317069145717586e-09, + "loss": 3.747, + "step": 4206000 + }, + { + "epoch": 46.28, + "learning_rate": 9.30331701413719e-09, + "loss": 3.7326, + "step": 4206500 + }, + { + "epoch": 46.28, + "learning_rate": 9.289564882556795e-09, + "loss": 3.7457, + "step": 4207000 + }, + { + "epoch": 46.29, + "learning_rate": 9.275812750976401e-09, + "loss": 3.7599, + "step": 4207500 + }, + { + "epoch": 46.3, + "learning_rate": 9.262060619396006e-09, + "loss": 3.7569, + "step": 4208000 + }, + { + "epoch": 46.3, + "learning_rate": 9.24830848781561e-09, + "loss": 3.7482, + "step": 4208500 + }, + { + "epoch": 46.31, + "learning_rate": 9.234556356235216e-09, + "loss": 3.7334, + "step": 4209000 + }, + { + "epoch": 46.31, + "learning_rate": 9.220804224654822e-09, + "loss": 3.7508, + "step": 4209500 + }, + { + "epoch": 46.32, + "learning_rate": 9.207052093074426e-09, + "loss": 3.7505, + "step": 4210000 + }, + { + "epoch": 46.32, + "learning_rate": 9.193299961494032e-09, + "loss": 3.7411, + "step": 4210500 + }, + { + "epoch": 46.33, + "learning_rate": 9.179547829913637e-09, + "loss": 3.756, + "step": 4211000 + }, + { + "epoch": 46.33, + "learning_rate": 9.165795698333241e-09, + "loss": 3.7494, + "step": 4211500 + }, + { + "epoch": 46.34, + "learning_rate": 9.152043566752846e-09, + "loss": 3.7256, + "step": 4212000 + }, + { + "epoch": 46.34, + "learning_rate": 9.138291435172452e-09, + "loss": 3.7388, + "step": 4212500 + }, + { + "epoch": 46.35, + "learning_rate": 9.124539303592056e-09, + "loss": 3.7531, + "step": 4213000 + }, + { + "epoch": 46.36, + "learning_rate": 9.11078717201166e-09, + "loss": 3.7181, + "step": 4213500 + }, + { + "epoch": 46.36, + "learning_rate": 9.097035040431266e-09, + "loss": 3.7524, + "step": 4214000 + }, + { + "epoch": 46.37, + "learning_rate": 9.08328290885087e-09, + "loss": 3.7454, + "step": 4214500 + }, + { + "epoch": 46.37, + "learning_rate": 9.069530777270475e-09, + "loss": 3.7549, + "step": 4215000 + }, + { + "epoch": 46.38, + "learning_rate": 9.055778645690081e-09, + "loss": 3.7719, + "step": 4215500 + }, + { + "epoch": 46.38, + "learning_rate": 9.042026514109687e-09, + "loss": 3.762, + "step": 4216000 + }, + { + "epoch": 46.39, + "learning_rate": 9.028274382529292e-09, + "loss": 3.7523, + "step": 4216500 + }, + { + "epoch": 46.39, + "learning_rate": 9.014522250948898e-09, + "loss": 3.7605, + "step": 4217000 + }, + { + "epoch": 46.4, + "learning_rate": 9.000770119368502e-09, + "loss": 3.7542, + "step": 4217500 + }, + { + "epoch": 46.41, + "learning_rate": 8.987017987788106e-09, + "loss": 3.7569, + "step": 4218000 + }, + { + "epoch": 46.41, + "learning_rate": 8.973265856207712e-09, + "loss": 3.746, + "step": 4218500 + }, + { + "epoch": 46.42, + "learning_rate": 8.959513724627317e-09, + "loss": 3.7509, + "step": 4219000 + }, + { + "epoch": 46.42, + "learning_rate": 8.945761593046921e-09, + "loss": 3.759, + "step": 4219500 + }, + { + "epoch": 46.43, + "learning_rate": 8.932009461466527e-09, + "loss": 3.7529, + "step": 4220000 + }, + { + "epoch": 46.43, + "learning_rate": 8.918257329886132e-09, + "loss": 3.7752, + "step": 4220500 + }, + { + "epoch": 46.44, + "learning_rate": 8.904505198305736e-09, + "loss": 3.771, + "step": 4221000 + }, + { + "epoch": 46.44, + "learning_rate": 8.890753066725342e-09, + "loss": 3.7535, + "step": 4221500 + }, + { + "epoch": 46.45, + "learning_rate": 8.877000935144948e-09, + "loss": 3.7512, + "step": 4222000 + }, + { + "epoch": 46.45, + "learning_rate": 8.863248803564552e-09, + "loss": 3.7353, + "step": 4222500 + }, + { + "epoch": 46.46, + "learning_rate": 8.849496671984157e-09, + "loss": 3.7402, + "step": 4223000 + }, + { + "epoch": 46.47, + "learning_rate": 8.835744540403763e-09, + "loss": 3.7613, + "step": 4223500 + }, + { + "epoch": 46.47, + "learning_rate": 8.821992408823367e-09, + "loss": 3.7522, + "step": 4224000 + }, + { + "epoch": 46.48, + "learning_rate": 8.808240277242972e-09, + "loss": 3.7339, + "step": 4224500 + }, + { + "epoch": 46.48, + "learning_rate": 8.794488145662578e-09, + "loss": 3.7224, + "step": 4225000 + }, + { + "epoch": 46.49, + "learning_rate": 8.780736014082182e-09, + "loss": 3.7657, + "step": 4225500 + }, + { + "epoch": 46.49, + "learning_rate": 8.766983882501786e-09, + "loss": 3.7689, + "step": 4226000 + }, + { + "epoch": 46.5, + "learning_rate": 8.753231750921392e-09, + "loss": 3.7515, + "step": 4226500 + }, + { + "epoch": 46.5, + "learning_rate": 8.739479619340997e-09, + "loss": 3.7606, + "step": 4227000 + }, + { + "epoch": 46.51, + "learning_rate": 8.725727487760603e-09, + "loss": 3.7585, + "step": 4227500 + }, + { + "epoch": 46.52, + "learning_rate": 8.711975356180209e-09, + "loss": 3.7496, + "step": 4228000 + }, + { + "epoch": 46.52, + "learning_rate": 8.698223224599813e-09, + "loss": 3.7538, + "step": 4228500 + }, + { + "epoch": 46.53, + "learning_rate": 8.684471093019418e-09, + "loss": 3.7432, + "step": 4229000 + }, + { + "epoch": 46.53, + "learning_rate": 8.670718961439024e-09, + "loss": 3.7559, + "step": 4229500 + }, + { + "epoch": 46.54, + "learning_rate": 8.656966829858628e-09, + "loss": 3.7457, + "step": 4230000 + }, + { + "epoch": 46.54, + "learning_rate": 8.643214698278232e-09, + "loss": 3.7507, + "step": 4230500 + }, + { + "epoch": 46.55, + "learning_rate": 8.629462566697837e-09, + "loss": 3.7326, + "step": 4231000 + }, + { + "epoch": 46.55, + "learning_rate": 8.615710435117443e-09, + "loss": 3.7548, + "step": 4231500 + }, + { + "epoch": 46.56, + "learning_rate": 8.601958303537047e-09, + "loss": 3.7467, + "step": 4232000 + }, + { + "epoch": 46.56, + "learning_rate": 8.588206171956652e-09, + "loss": 3.7529, + "step": 4232500 + }, + { + "epoch": 46.57, + "learning_rate": 8.574454040376258e-09, + "loss": 3.7327, + "step": 4233000 + }, + { + "epoch": 46.58, + "learning_rate": 8.560701908795864e-09, + "loss": 3.75, + "step": 4233500 + }, + { + "epoch": 46.58, + "learning_rate": 8.546949777215468e-09, + "loss": 3.7522, + "step": 4234000 + }, + { + "epoch": 46.59, + "learning_rate": 8.533197645635074e-09, + "loss": 3.7521, + "step": 4234500 + }, + { + "epoch": 46.59, + "learning_rate": 8.519445514054679e-09, + "loss": 3.7557, + "step": 4235000 + }, + { + "epoch": 46.6, + "learning_rate": 8.505693382474283e-09, + "loss": 3.7453, + "step": 4235500 + }, + { + "epoch": 46.6, + "learning_rate": 8.491941250893889e-09, + "loss": 3.7609, + "step": 4236000 + }, + { + "epoch": 46.61, + "learning_rate": 8.478189119313493e-09, + "loss": 3.7459, + "step": 4236500 + }, + { + "epoch": 46.61, + "learning_rate": 8.464436987733098e-09, + "loss": 3.7573, + "step": 4237000 + }, + { + "epoch": 46.62, + "learning_rate": 8.450684856152704e-09, + "loss": 3.7448, + "step": 4237500 + }, + { + "epoch": 46.63, + "learning_rate": 8.436932724572308e-09, + "loss": 3.7427, + "step": 4238000 + }, + { + "epoch": 46.63, + "learning_rate": 8.423180592991912e-09, + "loss": 3.7449, + "step": 4238500 + }, + { + "epoch": 46.64, + "learning_rate": 8.409428461411519e-09, + "loss": 3.7645, + "step": 4239000 + }, + { + "epoch": 46.64, + "learning_rate": 8.395676329831123e-09, + "loss": 3.7327, + "step": 4239500 + }, + { + "epoch": 46.65, + "learning_rate": 8.381924198250729e-09, + "loss": 3.7288, + "step": 4240000 + }, + { + "epoch": 46.65, + "learning_rate": 8.368172066670333e-09, + "loss": 3.7416, + "step": 4240500 + }, + { + "epoch": 46.66, + "learning_rate": 8.35441993508994e-09, + "loss": 3.7467, + "step": 4241000 + }, + { + "epoch": 46.66, + "learning_rate": 8.340667803509544e-09, + "loss": 3.7332, + "step": 4241500 + }, + { + "epoch": 46.67, + "learning_rate": 8.326915671929148e-09, + "loss": 3.7471, + "step": 4242000 + }, + { + "epoch": 46.67, + "learning_rate": 8.313163540348754e-09, + "loss": 3.7525, + "step": 4242500 + }, + { + "epoch": 46.68, + "learning_rate": 8.299411408768359e-09, + "loss": 3.7472, + "step": 4243000 + }, + { + "epoch": 46.69, + "learning_rate": 8.285659277187963e-09, + "loss": 3.7564, + "step": 4243500 + }, + { + "epoch": 46.69, + "learning_rate": 8.271907145607569e-09, + "loss": 3.7347, + "step": 4244000 + }, + { + "epoch": 46.7, + "learning_rate": 8.258155014027173e-09, + "loss": 3.7725, + "step": 4244500 + }, + { + "epoch": 46.7, + "learning_rate": 8.244402882446778e-09, + "loss": 3.7367, + "step": 4245000 + }, + { + "epoch": 46.71, + "learning_rate": 8.230650750866384e-09, + "loss": 3.7711, + "step": 4245500 + }, + { + "epoch": 46.71, + "learning_rate": 8.21689861928599e-09, + "loss": 3.7377, + "step": 4246000 + }, + { + "epoch": 46.72, + "learning_rate": 8.203146487705594e-09, + "loss": 3.7571, + "step": 4246500 + }, + { + "epoch": 46.72, + "learning_rate": 8.1893943561252e-09, + "loss": 3.7292, + "step": 4247000 + }, + { + "epoch": 46.73, + "learning_rate": 8.175642224544805e-09, + "loss": 3.7541, + "step": 4247500 + }, + { + "epoch": 46.74, + "learning_rate": 8.161890092964409e-09, + "loss": 3.7395, + "step": 4248000 + }, + { + "epoch": 46.74, + "learning_rate": 8.148137961384015e-09, + "loss": 3.7485, + "step": 4248500 + }, + { + "epoch": 46.75, + "learning_rate": 8.13438582980362e-09, + "loss": 3.756, + "step": 4249000 + }, + { + "epoch": 46.75, + "learning_rate": 8.120633698223224e-09, + "loss": 3.7621, + "step": 4249500 + }, + { + "epoch": 46.76, + "learning_rate": 8.106881566642828e-09, + "loss": 3.7479, + "step": 4250000 + }, + { + "epoch": 46.76, + "learning_rate": 8.093129435062434e-09, + "loss": 3.7602, + "step": 4250500 + }, + { + "epoch": 46.77, + "learning_rate": 8.079377303482039e-09, + "loss": 3.7634, + "step": 4251000 + }, + { + "epoch": 46.77, + "learning_rate": 8.065625171901645e-09, + "loss": 3.7443, + "step": 4251500 + }, + { + "epoch": 46.78, + "learning_rate": 8.05187304032125e-09, + "loss": 3.7559, + "step": 4252000 + }, + { + "epoch": 46.78, + "learning_rate": 8.038120908740855e-09, + "loss": 3.7171, + "step": 4252500 + }, + { + "epoch": 46.79, + "learning_rate": 8.02436877716046e-09, + "loss": 3.75, + "step": 4253000 + }, + { + "epoch": 46.8, + "learning_rate": 8.010616645580065e-09, + "loss": 3.7479, + "step": 4253500 + }, + { + "epoch": 46.8, + "learning_rate": 7.99686451399967e-09, + "loss": 3.7599, + "step": 4254000 + }, + { + "epoch": 46.81, + "learning_rate": 7.983112382419274e-09, + "loss": 3.7456, + "step": 4254500 + }, + { + "epoch": 46.81, + "learning_rate": 7.96936025083888e-09, + "loss": 3.7565, + "step": 4255000 + }, + { + "epoch": 46.82, + "learning_rate": 7.955608119258485e-09, + "loss": 3.7389, + "step": 4255500 + }, + { + "epoch": 46.82, + "learning_rate": 7.941855987678089e-09, + "loss": 3.7445, + "step": 4256000 + }, + { + "epoch": 46.83, + "learning_rate": 7.928103856097695e-09, + "loss": 3.7421, + "step": 4256500 + }, + { + "epoch": 46.83, + "learning_rate": 7.9143517245173e-09, + "loss": 3.7476, + "step": 4257000 + }, + { + "epoch": 46.84, + "learning_rate": 7.900599592936905e-09, + "loss": 3.7461, + "step": 4257500 + }, + { + "epoch": 46.85, + "learning_rate": 7.88684746135651e-09, + "loss": 3.7429, + "step": 4258000 + }, + { + "epoch": 46.85, + "learning_rate": 7.873095329776116e-09, + "loss": 3.7518, + "step": 4258500 + }, + { + "epoch": 46.86, + "learning_rate": 7.85934319819572e-09, + "loss": 3.755, + "step": 4259000 + }, + { + "epoch": 46.86, + "learning_rate": 7.845591066615325e-09, + "loss": 3.7429, + "step": 4259500 + }, + { + "epoch": 46.87, + "learning_rate": 7.83183893503493e-09, + "loss": 3.7543, + "step": 4260000 + }, + { + "epoch": 46.87, + "learning_rate": 7.818086803454535e-09, + "loss": 3.7563, + "step": 4260500 + }, + { + "epoch": 46.88, + "learning_rate": 7.80433467187414e-09, + "loss": 3.7608, + "step": 4261000 + }, + { + "epoch": 46.88, + "learning_rate": 7.790582540293745e-09, + "loss": 3.7422, + "step": 4261500 + }, + { + "epoch": 46.89, + "learning_rate": 7.77683040871335e-09, + "loss": 3.7625, + "step": 4262000 + }, + { + "epoch": 46.89, + "learning_rate": 7.763078277132954e-09, + "loss": 3.7396, + "step": 4262500 + }, + { + "epoch": 46.9, + "learning_rate": 7.74932614555256e-09, + "loss": 3.767, + "step": 4263000 + }, + { + "epoch": 46.91, + "learning_rate": 7.735574013972165e-09, + "loss": 3.7305, + "step": 4263500 + }, + { + "epoch": 46.91, + "learning_rate": 7.72182188239177e-09, + "loss": 3.7202, + "step": 4264000 + }, + { + "epoch": 46.92, + "learning_rate": 7.708069750811377e-09, + "loss": 3.7485, + "step": 4264500 + }, + { + "epoch": 46.92, + "learning_rate": 7.694317619230981e-09, + "loss": 3.7391, + "step": 4265000 + }, + { + "epoch": 46.93, + "learning_rate": 7.680565487650585e-09, + "loss": 3.747, + "step": 4265500 + }, + { + "epoch": 46.93, + "learning_rate": 7.666813356070191e-09, + "loss": 3.7578, + "step": 4266000 + }, + { + "epoch": 46.94, + "learning_rate": 7.653061224489796e-09, + "loss": 3.7516, + "step": 4266500 + }, + { + "epoch": 46.94, + "learning_rate": 7.6393090929094e-09, + "loss": 3.738, + "step": 4267000 + }, + { + "epoch": 46.95, + "learning_rate": 7.625556961329006e-09, + "loss": 3.7409, + "step": 4267500 + }, + { + "epoch": 46.96, + "learning_rate": 7.61180482974861e-09, + "loss": 3.7632, + "step": 4268000 + }, + { + "epoch": 46.96, + "learning_rate": 7.598052698168215e-09, + "loss": 3.7623, + "step": 4268500 + }, + { + "epoch": 46.97, + "learning_rate": 7.58430056658782e-09, + "loss": 3.736, + "step": 4269000 + }, + { + "epoch": 46.97, + "learning_rate": 7.570548435007425e-09, + "loss": 3.7423, + "step": 4269500 + }, + { + "epoch": 46.98, + "learning_rate": 7.556796303427031e-09, + "loss": 3.7567, + "step": 4270000 + }, + { + "epoch": 46.98, + "learning_rate": 7.543044171846636e-09, + "loss": 3.7573, + "step": 4270500 + }, + { + "epoch": 46.99, + "learning_rate": 7.529292040266242e-09, + "loss": 3.7576, + "step": 4271000 + }, + { + "epoch": 46.99, + "learning_rate": 7.515539908685846e-09, + "loss": 3.7436, + "step": 4271500 + }, + { + "epoch": 47.0, + "learning_rate": 7.50178777710545e-09, + "loss": 3.7506, + "step": 4272000 + }, + { + "epoch": 47.0, + "eval_loss": 3.82542085647583, + "eval_runtime": 6.1493, + "eval_samples_per_second": 252.713, + "step": 4272065 + }, + { + "epoch": 47.0, + "learning_rate": 7.488035645525057e-09, + "loss": 3.7477, + "step": 4272500 + }, + { + "epoch": 47.01, + "learning_rate": 7.474283513944661e-09, + "loss": 3.7517, + "step": 4273000 + }, + { + "epoch": 47.02, + "learning_rate": 7.460531382364265e-09, + "loss": 3.7637, + "step": 4273500 + }, + { + "epoch": 47.02, + "learning_rate": 7.4467792507838715e-09, + "loss": 3.7417, + "step": 4274000 + }, + { + "epoch": 47.03, + "learning_rate": 7.433027119203477e-09, + "loss": 3.7648, + "step": 4274500 + }, + { + "epoch": 47.03, + "learning_rate": 7.419274987623081e-09, + "loss": 3.7351, + "step": 4275000 + }, + { + "epoch": 47.04, + "learning_rate": 7.405522856042687e-09, + "loss": 3.7618, + "step": 4275500 + }, + { + "epoch": 47.04, + "learning_rate": 7.3917707244622915e-09, + "loss": 3.7424, + "step": 4276000 + }, + { + "epoch": 47.05, + "learning_rate": 7.378018592881896e-09, + "loss": 3.7594, + "step": 4276500 + }, + { + "epoch": 47.05, + "learning_rate": 7.364266461301502e-09, + "loss": 3.75, + "step": 4277000 + }, + { + "epoch": 47.06, + "learning_rate": 7.350514329721107e-09, + "loss": 3.7568, + "step": 4277500 + }, + { + "epoch": 47.07, + "learning_rate": 7.3367621981407115e-09, + "loss": 3.7514, + "step": 4278000 + }, + { + "epoch": 47.07, + "learning_rate": 7.323010066560316e-09, + "loss": 3.7499, + "step": 4278500 + }, + { + "epoch": 47.08, + "learning_rate": 7.309257934979922e-09, + "loss": 3.7594, + "step": 4279000 + }, + { + "epoch": 47.08, + "learning_rate": 7.295505803399526e-09, + "loss": 3.7484, + "step": 4279500 + }, + { + "epoch": 47.09, + "learning_rate": 7.2817536718191315e-09, + "loss": 3.7633, + "step": 4280000 + }, + { + "epoch": 47.09, + "learning_rate": 7.268001540238737e-09, + "loss": 3.7397, + "step": 4280500 + }, + { + "epoch": 47.1, + "learning_rate": 7.254249408658342e-09, + "loss": 3.7449, + "step": 4281000 + }, + { + "epoch": 47.1, + "learning_rate": 7.240497277077946e-09, + "loss": 3.7485, + "step": 4281500 + }, + { + "epoch": 47.11, + "learning_rate": 7.226745145497552e-09, + "loss": 3.7519, + "step": 4282000 + }, + { + "epoch": 47.11, + "learning_rate": 7.212993013917157e-09, + "loss": 3.7414, + "step": 4282500 + }, + { + "epoch": 47.12, + "learning_rate": 7.199240882336761e-09, + "loss": 3.7679, + "step": 4283000 + }, + { + "epoch": 47.13, + "learning_rate": 7.185488750756367e-09, + "loss": 3.7401, + "step": 4283500 + }, + { + "epoch": 47.13, + "learning_rate": 7.171736619175972e-09, + "loss": 3.7319, + "step": 4284000 + }, + { + "epoch": 47.14, + "learning_rate": 7.157984487595577e-09, + "loss": 3.7643, + "step": 4284500 + }, + { + "epoch": 47.14, + "learning_rate": 7.144232356015183e-09, + "loss": 3.7513, + "step": 4285000 + }, + { + "epoch": 47.15, + "learning_rate": 7.130480224434787e-09, + "loss": 3.7522, + "step": 4285500 + }, + { + "epoch": 47.15, + "learning_rate": 7.1167280928543915e-09, + "loss": 3.7463, + "step": 4286000 + }, + { + "epoch": 47.16, + "learning_rate": 7.1029759612739975e-09, + "loss": 3.7363, + "step": 4286500 + }, + { + "epoch": 47.16, + "learning_rate": 7.089223829693603e-09, + "loss": 3.7428, + "step": 4287000 + }, + { + "epoch": 47.17, + "learning_rate": 7.075471698113207e-09, + "loss": 3.76, + "step": 4287500 + }, + { + "epoch": 47.18, + "learning_rate": 7.0617195665328115e-09, + "loss": 3.7648, + "step": 4288000 + }, + { + "epoch": 47.18, + "learning_rate": 7.0479674349524175e-09, + "loss": 3.7425, + "step": 4288500 + }, + { + "epoch": 47.19, + "learning_rate": 7.034215303372022e-09, + "loss": 3.7701, + "step": 4289000 + }, + { + "epoch": 47.19, + "learning_rate": 7.020463171791627e-09, + "loss": 3.7554, + "step": 4289500 + }, + { + "epoch": 47.2, + "learning_rate": 7.006711040211233e-09, + "loss": 3.7403, + "step": 4290000 + }, + { + "epoch": 47.2, + "learning_rate": 6.9929589086308375e-09, + "loss": 3.7454, + "step": 4290500 + }, + { + "epoch": 47.21, + "learning_rate": 6.979206777050442e-09, + "loss": 3.7688, + "step": 4291000 + }, + { + "epoch": 47.21, + "learning_rate": 6.965454645470048e-09, + "loss": 3.7511, + "step": 4291500 + }, + { + "epoch": 47.22, + "learning_rate": 6.951702513889652e-09, + "loss": 3.7574, + "step": 4292000 + }, + { + "epoch": 47.22, + "learning_rate": 6.9379503823092575e-09, + "loss": 3.7542, + "step": 4292500 + }, + { + "epoch": 47.23, + "learning_rate": 6.9241982507288636e-09, + "loss": 3.7353, + "step": 4293000 + }, + { + "epoch": 47.24, + "learning_rate": 6.910446119148468e-09, + "loss": 3.7622, + "step": 4293500 + }, + { + "epoch": 47.24, + "learning_rate": 6.896693987568072e-09, + "loss": 3.7537, + "step": 4294000 + }, + { + "epoch": 47.25, + "learning_rate": 6.882941855987678e-09, + "loss": 3.751, + "step": 4294500 + }, + { + "epoch": 47.25, + "learning_rate": 6.869189724407283e-09, + "loss": 3.7428, + "step": 4295000 + }, + { + "epoch": 47.26, + "learning_rate": 6.855437592826888e-09, + "loss": 3.7447, + "step": 4295500 + }, + { + "epoch": 47.26, + "learning_rate": 6.841685461246492e-09, + "loss": 3.7409, + "step": 4296000 + }, + { + "epoch": 47.27, + "learning_rate": 6.827933329666098e-09, + "loss": 3.7461, + "step": 4296500 + }, + { + "epoch": 47.27, + "learning_rate": 6.814181198085703e-09, + "loss": 3.7604, + "step": 4297000 + }, + { + "epoch": 47.28, + "learning_rate": 6.800429066505307e-09, + "loss": 3.7721, + "step": 4297500 + }, + { + "epoch": 47.29, + "learning_rate": 6.786676934924913e-09, + "loss": 3.7664, + "step": 4298000 + }, + { + "epoch": 47.29, + "learning_rate": 6.772924803344518e-09, + "loss": 3.7565, + "step": 4298500 + }, + { + "epoch": 47.3, + "learning_rate": 6.759172671764123e-09, + "loss": 3.7489, + "step": 4299000 + }, + { + "epoch": 47.3, + "learning_rate": 6.745420540183729e-09, + "loss": 3.7602, + "step": 4299500 + }, + { + "epoch": 47.31, + "learning_rate": 6.731668408603333e-09, + "loss": 3.751, + "step": 4300000 + }, + { + "epoch": 47.31, + "learning_rate": 6.7179162770229376e-09, + "loss": 3.7731, + "step": 4300500 + }, + { + "epoch": 47.32, + "learning_rate": 6.704164145442544e-09, + "loss": 3.7556, + "step": 4301000 + }, + { + "epoch": 47.32, + "learning_rate": 6.690412013862148e-09, + "loss": 3.7607, + "step": 4301500 + }, + { + "epoch": 47.33, + "learning_rate": 6.676659882281753e-09, + "loss": 3.7475, + "step": 4302000 + }, + { + "epoch": 47.33, + "learning_rate": 6.662907750701359e-09, + "loss": 3.7663, + "step": 4302500 + }, + { + "epoch": 47.34, + "learning_rate": 6.649155619120964e-09, + "loss": 3.7458, + "step": 4303000 + }, + { + "epoch": 47.35, + "learning_rate": 6.635403487540568e-09, + "loss": 3.7558, + "step": 4303500 + }, + { + "epoch": 47.35, + "learning_rate": 6.621651355960174e-09, + "loss": 3.7328, + "step": 4304000 + }, + { + "epoch": 47.36, + "learning_rate": 6.607899224379778e-09, + "loss": 3.728, + "step": 4304500 + }, + { + "epoch": 47.36, + "learning_rate": 6.594147092799384e-09, + "loss": 3.7344, + "step": 4305000 + }, + { + "epoch": 47.37, + "learning_rate": 6.580394961218988e-09, + "loss": 3.742, + "step": 4305500 + }, + { + "epoch": 47.37, + "learning_rate": 6.566642829638594e-09, + "loss": 3.7406, + "step": 4306000 + }, + { + "epoch": 47.38, + "learning_rate": 6.552890698058198e-09, + "loss": 3.7428, + "step": 4306500 + }, + { + "epoch": 47.38, + "learning_rate": 6.539138566477803e-09, + "loss": 3.7373, + "step": 4307000 + }, + { + "epoch": 47.39, + "learning_rate": 6.525386434897409e-09, + "loss": 3.7524, + "step": 4307500 + }, + { + "epoch": 47.4, + "learning_rate": 6.511634303317014e-09, + "loss": 3.7355, + "step": 4308000 + }, + { + "epoch": 47.4, + "learning_rate": 6.497882171736618e-09, + "loss": 3.7486, + "step": 4308500 + }, + { + "epoch": 47.41, + "learning_rate": 6.4841300401562244e-09, + "loss": 3.7519, + "step": 4309000 + }, + { + "epoch": 47.41, + "learning_rate": 6.470377908575829e-09, + "loss": 3.7372, + "step": 4309500 + }, + { + "epoch": 47.42, + "learning_rate": 6.456625776995433e-09, + "loss": 3.7493, + "step": 4310000 + }, + { + "epoch": 47.42, + "learning_rate": 6.442873645415039e-09, + "loss": 3.7614, + "step": 4310500 + }, + { + "epoch": 47.43, + "learning_rate": 6.4291215138346444e-09, + "loss": 3.7632, + "step": 4311000 + }, + { + "epoch": 47.43, + "learning_rate": 6.415369382254249e-09, + "loss": 3.736, + "step": 4311500 + }, + { + "epoch": 47.44, + "learning_rate": 6.401617250673855e-09, + "loss": 3.7527, + "step": 4312000 + }, + { + "epoch": 47.44, + "learning_rate": 6.387865119093459e-09, + "loss": 3.765, + "step": 4312500 + }, + { + "epoch": 47.45, + "learning_rate": 6.374112987513064e-09, + "loss": 3.7258, + "step": 4313000 + }, + { + "epoch": 47.46, + "learning_rate": 6.36036085593267e-09, + "loss": 3.7956, + "step": 4313500 + }, + { + "epoch": 47.46, + "learning_rate": 6.346608724352275e-09, + "loss": 3.7462, + "step": 4314000 + }, + { + "epoch": 47.47, + "learning_rate": 6.332856592771879e-09, + "loss": 3.7437, + "step": 4314500 + }, + { + "epoch": 47.47, + "learning_rate": 6.319104461191484e-09, + "loss": 3.7519, + "step": 4315000 + }, + { + "epoch": 47.48, + "learning_rate": 6.30535232961109e-09, + "loss": 3.7607, + "step": 4315500 + }, + { + "epoch": 47.48, + "learning_rate": 6.291600198030694e-09, + "loss": 3.7666, + "step": 4316000 + }, + { + "epoch": 47.49, + "learning_rate": 6.277848066450299e-09, + "loss": 3.7237, + "step": 4316500 + }, + { + "epoch": 47.49, + "learning_rate": 6.264095934869905e-09, + "loss": 3.7331, + "step": 4317000 + }, + { + "epoch": 47.5, + "learning_rate": 6.25034380328951e-09, + "loss": 3.7588, + "step": 4317500 + }, + { + "epoch": 47.51, + "learning_rate": 6.236591671709115e-09, + "loss": 3.744, + "step": 4318000 + }, + { + "epoch": 47.51, + "learning_rate": 6.222839540128719e-09, + "loss": 3.7605, + "step": 4318500 + }, + { + "epoch": 47.52, + "learning_rate": 6.2090874085483244e-09, + "loss": 3.7405, + "step": 4319000 + }, + { + "epoch": 47.52, + "learning_rate": 6.19533527696793e-09, + "loss": 3.7559, + "step": 4319500 + }, + { + "epoch": 47.53, + "learning_rate": 6.181583145387535e-09, + "loss": 3.7391, + "step": 4320000 + }, + { + "epoch": 47.53, + "learning_rate": 6.16783101380714e-09, + "loss": 3.7581, + "step": 4320500 + }, + { + "epoch": 47.54, + "learning_rate": 6.154078882226745e-09, + "loss": 3.7441, + "step": 4321000 + }, + { + "epoch": 47.54, + "learning_rate": 6.14032675064635e-09, + "loss": 3.74, + "step": 4321500 + }, + { + "epoch": 47.55, + "learning_rate": 6.126574619065955e-09, + "loss": 3.7476, + "step": 4322000 + }, + { + "epoch": 47.55, + "learning_rate": 6.11282248748556e-09, + "loss": 3.7406, + "step": 4322500 + }, + { + "epoch": 47.56, + "learning_rate": 6.099070355905165e-09, + "loss": 3.7379, + "step": 4323000 + }, + { + "epoch": 47.57, + "learning_rate": 6.0853182243247705e-09, + "loss": 3.7345, + "step": 4323500 + }, + { + "epoch": 47.57, + "learning_rate": 6.071566092744375e-09, + "loss": 3.743, + "step": 4324000 + }, + { + "epoch": 47.58, + "learning_rate": 6.05781396116398e-09, + "loss": 3.7558, + "step": 4324500 + }, + { + "epoch": 47.58, + "learning_rate": 6.044061829583585e-09, + "loss": 3.7497, + "step": 4325000 + }, + { + "epoch": 47.59, + "learning_rate": 6.03030969800319e-09, + "loss": 3.7557, + "step": 4325500 + }, + { + "epoch": 47.59, + "learning_rate": 6.016557566422796e-09, + "loss": 3.752, + "step": 4326000 + }, + { + "epoch": 47.6, + "learning_rate": 6.0028054348424e-09, + "loss": 3.7354, + "step": 4326500 + }, + { + "epoch": 47.6, + "learning_rate": 5.989053303262005e-09, + "loss": 3.7471, + "step": 4327000 + }, + { + "epoch": 47.61, + "learning_rate": 5.9753011716816105e-09, + "loss": 3.7491, + "step": 4327500 + }, + { + "epoch": 47.62, + "learning_rate": 5.961549040101215e-09, + "loss": 3.745, + "step": 4328000 + }, + { + "epoch": 47.62, + "learning_rate": 5.94779690852082e-09, + "loss": 3.7313, + "step": 4328500 + }, + { + "epoch": 47.63, + "learning_rate": 5.934044776940426e-09, + "loss": 3.7358, + "step": 4329000 + }, + { + "epoch": 47.63, + "learning_rate": 5.9202926453600305e-09, + "loss": 3.7578, + "step": 4329500 + }, + { + "epoch": 47.64, + "learning_rate": 5.906540513779636e-09, + "loss": 3.7633, + "step": 4330000 + }, + { + "epoch": 47.64, + "learning_rate": 5.892788382199241e-09, + "loss": 3.7476, + "step": 4330500 + }, + { + "epoch": 47.65, + "learning_rate": 5.879036250618845e-09, + "loss": 3.7415, + "step": 4331000 + }, + { + "epoch": 47.65, + "learning_rate": 5.8652841190384505e-09, + "loss": 3.738, + "step": 4331500 + }, + { + "epoch": 47.66, + "learning_rate": 5.851531987458056e-09, + "loss": 3.7562, + "step": 4332000 + }, + { + "epoch": 47.66, + "learning_rate": 5.837779855877661e-09, + "loss": 3.748, + "step": 4332500 + }, + { + "epoch": 47.67, + "learning_rate": 5.824027724297266e-09, + "loss": 3.7452, + "step": 4333000 + }, + { + "epoch": 47.68, + "learning_rate": 5.8102755927168705e-09, + "loss": 3.7453, + "step": 4333500 + }, + { + "epoch": 47.68, + "learning_rate": 5.796523461136476e-09, + "loss": 3.7358, + "step": 4334000 + }, + { + "epoch": 47.69, + "learning_rate": 5.782771329556081e-09, + "loss": 3.7481, + "step": 4334500 + }, + { + "epoch": 47.69, + "learning_rate": 5.769019197975686e-09, + "loss": 3.7374, + "step": 4335000 + }, + { + "epoch": 47.7, + "learning_rate": 5.755267066395291e-09, + "loss": 3.7508, + "step": 4335500 + }, + { + "epoch": 47.7, + "learning_rate": 5.741514934814896e-09, + "loss": 3.7381, + "step": 4336000 + }, + { + "epoch": 47.71, + "learning_rate": 5.727762803234501e-09, + "loss": 3.7378, + "step": 4336500 + }, + { + "epoch": 47.71, + "learning_rate": 5.714010671654106e-09, + "loss": 3.7532, + "step": 4337000 + }, + { + "epoch": 47.72, + "learning_rate": 5.7002585400737105e-09, + "loss": 3.7556, + "step": 4337500 + }, + { + "epoch": 47.73, + "learning_rate": 5.6865064084933165e-09, + "loss": 3.7525, + "step": 4338000 + }, + { + "epoch": 47.73, + "learning_rate": 5.672754276912922e-09, + "loss": 3.7514, + "step": 4338500 + }, + { + "epoch": 47.74, + "learning_rate": 5.659002145332526e-09, + "loss": 3.7528, + "step": 4339000 + }, + { + "epoch": 47.74, + "learning_rate": 5.645250013752131e-09, + "loss": 3.7392, + "step": 4339500 + }, + { + "epoch": 47.75, + "learning_rate": 5.6314978821717365e-09, + "loss": 3.7407, + "step": 4340000 + }, + { + "epoch": 47.75, + "learning_rate": 5.617745750591341e-09, + "loss": 3.7397, + "step": 4340500 + }, + { + "epoch": 47.76, + "learning_rate": 5.603993619010947e-09, + "loss": 3.7502, + "step": 4341000 + }, + { + "epoch": 47.76, + "learning_rate": 5.590241487430551e-09, + "loss": 3.7521, + "step": 4341500 + }, + { + "epoch": 47.77, + "learning_rate": 5.5764893558501566e-09, + "loss": 3.7812, + "step": 4342000 + }, + { + "epoch": 47.77, + "learning_rate": 5.562737224269762e-09, + "loss": 3.7593, + "step": 4342500 + }, + { + "epoch": 47.78, + "learning_rate": 5.548985092689366e-09, + "loss": 3.7577, + "step": 4343000 + }, + { + "epoch": 47.79, + "learning_rate": 5.535232961108971e-09, + "loss": 3.736, + "step": 4343500 + }, + { + "epoch": 47.79, + "learning_rate": 5.5214808295285766e-09, + "loss": 3.751, + "step": 4344000 + }, + { + "epoch": 47.8, + "learning_rate": 5.507728697948182e-09, + "loss": 3.7477, + "step": 4344500 + }, + { + "epoch": 47.8, + "learning_rate": 5.493976566367787e-09, + "loss": 3.7708, + "step": 4345000 + }, + { + "epoch": 47.81, + "learning_rate": 5.480224434787391e-09, + "loss": 3.7372, + "step": 4345500 + }, + { + "epoch": 47.81, + "learning_rate": 5.4664723032069966e-09, + "loss": 3.744, + "step": 4346000 + }, + { + "epoch": 47.82, + "learning_rate": 5.452720171626602e-09, + "loss": 3.7472, + "step": 4346500 + }, + { + "epoch": 47.82, + "learning_rate": 5.438968040046207e-09, + "loss": 3.7483, + "step": 4347000 + }, + { + "epoch": 47.83, + "learning_rate": 5.425215908465812e-09, + "loss": 3.752, + "step": 4347500 + }, + { + "epoch": 47.84, + "learning_rate": 5.411463776885417e-09, + "loss": 3.7489, + "step": 4348000 + }, + { + "epoch": 47.84, + "learning_rate": 5.397711645305022e-09, + "loss": 3.7664, + "step": 4348500 + }, + { + "epoch": 47.85, + "learning_rate": 5.383959513724627e-09, + "loss": 3.757, + "step": 4349000 + }, + { + "epoch": 47.85, + "learning_rate": 5.370207382144232e-09, + "loss": 3.7552, + "step": 4349500 + }, + { + "epoch": 47.86, + "learning_rate": 5.356455250563837e-09, + "loss": 3.7389, + "step": 4350000 + }, + { + "epoch": 47.86, + "learning_rate": 5.342703118983443e-09, + "loss": 3.7323, + "step": 4350500 + }, + { + "epoch": 47.87, + "learning_rate": 5.328950987403047e-09, + "loss": 3.76, + "step": 4351000 + }, + { + "epoch": 47.87, + "learning_rate": 5.315198855822652e-09, + "loss": 3.7661, + "step": 4351500 + }, + { + "epoch": 47.88, + "learning_rate": 5.301446724242257e-09, + "loss": 3.7539, + "step": 4352000 + }, + { + "epoch": 47.88, + "learning_rate": 5.287694592661862e-09, + "loss": 3.7369, + "step": 4352500 + }, + { + "epoch": 47.89, + "learning_rate": 5.273942461081468e-09, + "loss": 3.7596, + "step": 4353000 + }, + { + "epoch": 47.9, + "learning_rate": 5.260190329501073e-09, + "loss": 3.7501, + "step": 4353500 + }, + { + "epoch": 47.9, + "learning_rate": 5.246438197920677e-09, + "loss": 3.731, + "step": 4354000 + }, + { + "epoch": 47.91, + "learning_rate": 5.232686066340283e-09, + "loss": 3.7344, + "step": 4354500 + }, + { + "epoch": 47.91, + "learning_rate": 5.218933934759887e-09, + "loss": 3.7615, + "step": 4355000 + }, + { + "epoch": 47.92, + "learning_rate": 5.205181803179492e-09, + "loss": 3.7467, + "step": 4355500 + }, + { + "epoch": 47.92, + "learning_rate": 5.191429671599097e-09, + "loss": 3.7315, + "step": 4356000 + }, + { + "epoch": 47.93, + "learning_rate": 5.177677540018703e-09, + "loss": 3.7494, + "step": 4356500 + }, + { + "epoch": 47.93, + "learning_rate": 5.163925408438308e-09, + "loss": 3.7478, + "step": 4357000 + }, + { + "epoch": 47.94, + "learning_rate": 5.150173276857913e-09, + "loss": 3.7507, + "step": 4357500 + }, + { + "epoch": 47.95, + "learning_rate": 5.136421145277517e-09, + "loss": 3.7425, + "step": 4358000 + }, + { + "epoch": 47.95, + "learning_rate": 5.122669013697123e-09, + "loss": 3.7673, + "step": 4358500 + }, + { + "epoch": 47.96, + "learning_rate": 5.108916882116728e-09, + "loss": 3.7266, + "step": 4359000 + }, + { + "epoch": 47.96, + "learning_rate": 5.095164750536333e-09, + "loss": 3.7449, + "step": 4359500 + }, + { + "epoch": 47.97, + "learning_rate": 5.081412618955938e-09, + "loss": 3.7528, + "step": 4360000 + }, + { + "epoch": 47.97, + "learning_rate": 5.067660487375543e-09, + "loss": 3.7714, + "step": 4360500 + }, + { + "epoch": 47.98, + "learning_rate": 5.053908355795148e-09, + "loss": 3.75, + "step": 4361000 + }, + { + "epoch": 47.98, + "learning_rate": 5.040156224214753e-09, + "loss": 3.7531, + "step": 4361500 + }, + { + "epoch": 47.99, + "learning_rate": 5.026404092634358e-09, + "loss": 3.7609, + "step": 4362000 + }, + { + "epoch": 47.99, + "learning_rate": 5.0126519610539634e-09, + "loss": 3.7544, + "step": 4362500 + }, + { + "epoch": 48.0, + "eval_loss": 3.8254196643829346, + "eval_runtime": 6.1443, + "eval_samples_per_second": 252.916, + "step": 4362960 + }, + { + "epoch": 48.0, + "learning_rate": 4.998899829473569e-09, + "loss": 3.7166, + "step": 4363000 + }, + { + "epoch": 48.01, + "learning_rate": 4.985147697893173e-09, + "loss": 3.7402, + "step": 4363500 + }, + { + "epoch": 48.01, + "learning_rate": 4.971395566312778e-09, + "loss": 3.7379, + "step": 4364000 + }, + { + "epoch": 48.02, + "learning_rate": 4.957643434732383e-09, + "loss": 3.7461, + "step": 4364500 + }, + { + "epoch": 48.02, + "learning_rate": 4.943891303151989e-09, + "loss": 3.7438, + "step": 4365000 + }, + { + "epoch": 48.03, + "learning_rate": 4.930139171571594e-09, + "loss": 3.7506, + "step": 4365500 + }, + { + "epoch": 48.03, + "learning_rate": 4.916387039991198e-09, + "loss": 3.7528, + "step": 4366000 + }, + { + "epoch": 48.04, + "learning_rate": 4.9026349084108035e-09, + "loss": 3.7547, + "step": 4366500 + }, + { + "epoch": 48.04, + "learning_rate": 4.888882776830409e-09, + "loss": 3.7486, + "step": 4367000 + }, + { + "epoch": 48.05, + "learning_rate": 4.875130645250013e-09, + "loss": 3.7267, + "step": 4367500 + }, + { + "epoch": 48.06, + "learning_rate": 4.861378513669618e-09, + "loss": 3.7328, + "step": 4368000 + }, + { + "epoch": 48.06, + "learning_rate": 4.847626382089224e-09, + "loss": 3.7647, + "step": 4368500 + }, + { + "epoch": 48.07, + "learning_rate": 4.833874250508829e-09, + "loss": 3.7322, + "step": 4369000 + }, + { + "epoch": 48.07, + "learning_rate": 4.820122118928434e-09, + "loss": 3.7548, + "step": 4369500 + }, + { + "epoch": 48.08, + "learning_rate": 4.806369987348038e-09, + "loss": 3.7432, + "step": 4370000 + }, + { + "epoch": 48.08, + "learning_rate": 4.7926178557676435e-09, + "loss": 3.7728, + "step": 4370500 + }, + { + "epoch": 48.09, + "learning_rate": 4.778865724187249e-09, + "loss": 3.7384, + "step": 4371000 + }, + { + "epoch": 48.09, + "learning_rate": 4.765113592606854e-09, + "loss": 3.7466, + "step": 4371500 + }, + { + "epoch": 48.1, + "learning_rate": 4.751361461026459e-09, + "loss": 3.739, + "step": 4372000 + }, + { + "epoch": 48.1, + "learning_rate": 4.737609329446064e-09, + "loss": 3.7598, + "step": 4372500 + }, + { + "epoch": 48.11, + "learning_rate": 4.723857197865669e-09, + "loss": 3.7602, + "step": 4373000 + }, + { + "epoch": 48.12, + "learning_rate": 4.710105066285274e-09, + "loss": 3.7342, + "step": 4373500 + }, + { + "epoch": 48.12, + "learning_rate": 4.696352934704879e-09, + "loss": 3.7764, + "step": 4374000 + }, + { + "epoch": 48.13, + "learning_rate": 4.682600803124484e-09, + "loss": 3.7475, + "step": 4374500 + }, + { + "epoch": 48.13, + "learning_rate": 4.6688486715440895e-09, + "loss": 3.7529, + "step": 4375000 + }, + { + "epoch": 48.14, + "learning_rate": 4.655096539963694e-09, + "loss": 3.7386, + "step": 4375500 + }, + { + "epoch": 48.14, + "learning_rate": 4.641344408383299e-09, + "loss": 3.7499, + "step": 4376000 + }, + { + "epoch": 48.15, + "learning_rate": 4.627592276802904e-09, + "loss": 3.7413, + "step": 4376500 + }, + { + "epoch": 48.15, + "learning_rate": 4.6138401452225095e-09, + "loss": 3.7414, + "step": 4377000 + }, + { + "epoch": 48.16, + "learning_rate": 4.600088013642115e-09, + "loss": 3.7251, + "step": 4377500 + }, + { + "epoch": 48.17, + "learning_rate": 4.58633588206172e-09, + "loss": 3.7455, + "step": 4378000 + }, + { + "epoch": 48.17, + "learning_rate": 4.572583750481324e-09, + "loss": 3.7652, + "step": 4378500 + }, + { + "epoch": 48.18, + "learning_rate": 4.5588316189009295e-09, + "loss": 3.7359, + "step": 4379000 + }, + { + "epoch": 48.18, + "learning_rate": 4.545079487320534e-09, + "loss": 3.7437, + "step": 4379500 + }, + { + "epoch": 48.19, + "learning_rate": 4.531327355740139e-09, + "loss": 3.7568, + "step": 4380000 + }, + { + "epoch": 48.19, + "learning_rate": 4.517575224159745e-09, + "loss": 3.7435, + "step": 4380500 + }, + { + "epoch": 48.2, + "learning_rate": 4.5038230925793495e-09, + "loss": 3.7381, + "step": 4381000 + }, + { + "epoch": 48.2, + "learning_rate": 4.490070960998955e-09, + "loss": 3.7525, + "step": 4381500 + }, + { + "epoch": 48.21, + "learning_rate": 4.47631882941856e-09, + "loss": 3.7538, + "step": 4382000 + }, + { + "epoch": 48.21, + "learning_rate": 4.462566697838164e-09, + "loss": 3.7301, + "step": 4382500 + }, + { + "epoch": 48.22, + "learning_rate": 4.4488145662577695e-09, + "loss": 3.749, + "step": 4383000 + }, + { + "epoch": 48.23, + "learning_rate": 4.435062434677375e-09, + "loss": 3.7547, + "step": 4383500 + }, + { + "epoch": 48.23, + "learning_rate": 4.42131030309698e-09, + "loss": 3.7409, + "step": 4384000 + }, + { + "epoch": 48.24, + "learning_rate": 4.407558171516585e-09, + "loss": 3.7486, + "step": 4384500 + }, + { + "epoch": 48.24, + "learning_rate": 4.3938060399361895e-09, + "loss": 3.7376, + "step": 4385000 + }, + { + "epoch": 48.25, + "learning_rate": 4.380053908355795e-09, + "loss": 3.7496, + "step": 4385500 + }, + { + "epoch": 48.25, + "learning_rate": 4.3663017767754e-09, + "loss": 3.733, + "step": 4386000 + }, + { + "epoch": 48.26, + "learning_rate": 4.352549645195005e-09, + "loss": 3.721, + "step": 4386500 + }, + { + "epoch": 48.26, + "learning_rate": 4.33879751361461e-09, + "loss": 3.7447, + "step": 4387000 + }, + { + "epoch": 48.27, + "learning_rate": 4.325045382034215e-09, + "loss": 3.7583, + "step": 4387500 + }, + { + "epoch": 48.28, + "learning_rate": 4.31129325045382e-09, + "loss": 3.7467, + "step": 4388000 + }, + { + "epoch": 48.28, + "learning_rate": 4.297541118873425e-09, + "loss": 3.7578, + "step": 4388500 + }, + { + "epoch": 48.29, + "learning_rate": 4.28378898729303e-09, + "loss": 3.7593, + "step": 4389000 + }, + { + "epoch": 48.29, + "learning_rate": 4.2700368557126356e-09, + "loss": 3.7424, + "step": 4389500 + }, + { + "epoch": 48.3, + "learning_rate": 4.256284724132241e-09, + "loss": 3.7456, + "step": 4390000 + }, + { + "epoch": 48.3, + "learning_rate": 4.242532592551845e-09, + "loss": 3.757, + "step": 4390500 + }, + { + "epoch": 48.31, + "learning_rate": 4.22878046097145e-09, + "loss": 3.7493, + "step": 4391000 + }, + { + "epoch": 48.31, + "learning_rate": 4.2150283293910556e-09, + "loss": 3.7349, + "step": 4391500 + }, + { + "epoch": 48.32, + "learning_rate": 4.20127619781066e-09, + "loss": 3.7551, + "step": 4392000 + }, + { + "epoch": 48.32, + "learning_rate": 4.187524066230266e-09, + "loss": 3.7541, + "step": 4392500 + }, + { + "epoch": 48.33, + "learning_rate": 4.17377193464987e-09, + "loss": 3.7533, + "step": 4393000 + }, + { + "epoch": 48.34, + "learning_rate": 4.1600198030694756e-09, + "loss": 3.7539, + "step": 4393500 + }, + { + "epoch": 48.34, + "learning_rate": 4.146267671489081e-09, + "loss": 3.7386, + "step": 4394000 + }, + { + "epoch": 48.35, + "learning_rate": 4.132515539908685e-09, + "loss": 3.7399, + "step": 4394500 + }, + { + "epoch": 48.35, + "learning_rate": 4.11876340832829e-09, + "loss": 3.7529, + "step": 4395000 + }, + { + "epoch": 48.36, + "learning_rate": 4.105011276747896e-09, + "loss": 3.7637, + "step": 4395500 + }, + { + "epoch": 48.36, + "learning_rate": 4.091259145167501e-09, + "loss": 3.7651, + "step": 4396000 + }, + { + "epoch": 48.37, + "learning_rate": 4.077507013587106e-09, + "loss": 3.7618, + "step": 4396500 + }, + { + "epoch": 48.37, + "learning_rate": 4.06375488200671e-09, + "loss": 3.7608, + "step": 4397000 + }, + { + "epoch": 48.38, + "learning_rate": 4.0500027504263156e-09, + "loss": 3.7565, + "step": 4397500 + }, + { + "epoch": 48.39, + "learning_rate": 4.036250618845921e-09, + "loss": 3.7553, + "step": 4398000 + }, + { + "epoch": 48.39, + "learning_rate": 4.022498487265526e-09, + "loss": 3.7378, + "step": 4398500 + }, + { + "epoch": 48.4, + "learning_rate": 4.008746355685131e-09, + "loss": 3.7637, + "step": 4399000 + }, + { + "epoch": 48.4, + "learning_rate": 3.994994224104736e-09, + "loss": 3.7549, + "step": 4399500 + }, + { + "epoch": 48.41, + "learning_rate": 3.981242092524341e-09, + "loss": 3.7407, + "step": 4400000 + }, + { + "epoch": 48.41, + "learning_rate": 3.967489960943946e-09, + "loss": 3.7699, + "step": 4400500 + }, + { + "epoch": 48.42, + "learning_rate": 3.953737829363551e-09, + "loss": 3.7527, + "step": 4401000 + }, + { + "epoch": 48.42, + "learning_rate": 3.939985697783156e-09, + "loss": 3.7621, + "step": 4401500 + }, + { + "epoch": 48.43, + "learning_rate": 3.926233566202762e-09, + "loss": 3.7389, + "step": 4402000 + }, + { + "epoch": 48.44, + "learning_rate": 3.912481434622366e-09, + "loss": 3.7628, + "step": 4402500 + }, + { + "epoch": 48.44, + "learning_rate": 3.898729303041971e-09, + "loss": 3.7428, + "step": 4403000 + }, + { + "epoch": 48.45, + "learning_rate": 3.884977171461576e-09, + "loss": 3.7589, + "step": 4403500 + }, + { + "epoch": 48.45, + "learning_rate": 3.871225039881181e-09, + "loss": 3.749, + "step": 4404000 + }, + { + "epoch": 48.46, + "learning_rate": 3.857472908300787e-09, + "loss": 3.7487, + "step": 4404500 + }, + { + "epoch": 48.46, + "learning_rate": 3.843720776720392e-09, + "loss": 3.738, + "step": 4405000 + }, + { + "epoch": 48.47, + "learning_rate": 3.829968645139996e-09, + "loss": 3.7464, + "step": 4405500 + }, + { + "epoch": 48.47, + "learning_rate": 3.816216513559602e-09, + "loss": 3.7512, + "step": 4406000 + }, + { + "epoch": 48.48, + "learning_rate": 3.802464381979206e-09, + "loss": 3.759, + "step": 4406500 + }, + { + "epoch": 48.48, + "learning_rate": 3.788712250398811e-09, + "loss": 3.7587, + "step": 4407000 + }, + { + "epoch": 48.49, + "learning_rate": 3.774960118818417e-09, + "loss": 3.7469, + "step": 4407500 + }, + { + "epoch": 48.5, + "learning_rate": 3.761207987238022e-09, + "loss": 3.7476, + "step": 4408000 + }, + { + "epoch": 48.5, + "learning_rate": 3.747455855657627e-09, + "loss": 3.7413, + "step": 4408500 + }, + { + "epoch": 48.51, + "learning_rate": 3.733703724077232e-09, + "loss": 3.7479, + "step": 4409000 + }, + { + "epoch": 48.51, + "learning_rate": 3.719951592496837e-09, + "loss": 3.7538, + "step": 4409500 + }, + { + "epoch": 48.52, + "learning_rate": 3.706199460916442e-09, + "loss": 3.7385, + "step": 4410000 + }, + { + "epoch": 48.52, + "learning_rate": 3.6924473293360472e-09, + "loss": 3.7471, + "step": 4410500 + }, + { + "epoch": 48.53, + "learning_rate": 3.678695197755652e-09, + "loss": 3.7521, + "step": 4411000 + }, + { + "epoch": 48.53, + "learning_rate": 3.6649430661752572e-09, + "loss": 3.7512, + "step": 4411500 + }, + { + "epoch": 48.54, + "learning_rate": 3.6511909345948616e-09, + "loss": 3.7526, + "step": 4412000 + }, + { + "epoch": 48.55, + "learning_rate": 3.637438803014467e-09, + "loss": 3.7538, + "step": 4412500 + }, + { + "epoch": 48.55, + "learning_rate": 3.6236866714340725e-09, + "loss": 3.7581, + "step": 4413000 + }, + { + "epoch": 48.56, + "learning_rate": 3.609934539853677e-09, + "loss": 3.7472, + "step": 4413500 + }, + { + "epoch": 48.56, + "learning_rate": 3.596182408273282e-09, + "loss": 3.7459, + "step": 4414000 + }, + { + "epoch": 48.57, + "learning_rate": 3.5824302766928877e-09, + "loss": 3.7428, + "step": 4414500 + }, + { + "epoch": 48.57, + "learning_rate": 3.568678145112492e-09, + "loss": 3.7592, + "step": 4415000 + }, + { + "epoch": 48.58, + "learning_rate": 3.5549260135320973e-09, + "loss": 3.7543, + "step": 4415500 + }, + { + "epoch": 48.58, + "learning_rate": 3.541173881951702e-09, + "loss": 3.7522, + "step": 4416000 + }, + { + "epoch": 48.59, + "learning_rate": 3.5274217503713073e-09, + "loss": 3.735, + "step": 4416500 + }, + { + "epoch": 48.59, + "learning_rate": 3.5136696187909125e-09, + "loss": 3.7583, + "step": 4417000 + }, + { + "epoch": 48.6, + "learning_rate": 3.4999174872105173e-09, + "loss": 3.753, + "step": 4417500 + }, + { + "epoch": 48.61, + "learning_rate": 3.4861653556301225e-09, + "loss": 3.7427, + "step": 4418000 + }, + { + "epoch": 48.61, + "learning_rate": 3.4724132240497277e-09, + "loss": 3.7341, + "step": 4418500 + }, + { + "epoch": 48.62, + "learning_rate": 3.4586610924693325e-09, + "loss": 3.7573, + "step": 4419000 + }, + { + "epoch": 48.62, + "learning_rate": 3.4449089608889377e-09, + "loss": 3.7539, + "step": 4419500 + }, + { + "epoch": 48.63, + "learning_rate": 3.431156829308543e-09, + "loss": 3.7537, + "step": 4420000 + }, + { + "epoch": 48.63, + "learning_rate": 3.4174046977281477e-09, + "loss": 3.7311, + "step": 4420500 + }, + { + "epoch": 48.64, + "learning_rate": 3.403652566147753e-09, + "loss": 3.7608, + "step": 4421000 + }, + { + "epoch": 48.64, + "learning_rate": 3.3899004345673577e-09, + "loss": 3.7485, + "step": 4421500 + }, + { + "epoch": 48.65, + "learning_rate": 3.376148302986963e-09, + "loss": 3.7436, + "step": 4422000 + }, + { + "epoch": 48.66, + "learning_rate": 3.362396171406568e-09, + "loss": 3.7622, + "step": 4422500 + }, + { + "epoch": 48.66, + "learning_rate": 3.348644039826173e-09, + "loss": 3.757, + "step": 4423000 + }, + { + "epoch": 48.67, + "learning_rate": 3.334891908245778e-09, + "loss": 3.7577, + "step": 4423500 + }, + { + "epoch": 48.67, + "learning_rate": 3.3211397766653833e-09, + "loss": 3.7613, + "step": 4424000 + }, + { + "epoch": 48.68, + "learning_rate": 3.3073876450849877e-09, + "loss": 3.759, + "step": 4424500 + }, + { + "epoch": 48.68, + "learning_rate": 3.2936355135045933e-09, + "loss": 3.7421, + "step": 4425000 + }, + { + "epoch": 48.69, + "learning_rate": 3.2798833819241977e-09, + "loss": 3.7544, + "step": 4425500 + }, + { + "epoch": 48.69, + "learning_rate": 3.266131250343803e-09, + "loss": 3.7591, + "step": 4426000 + }, + { + "epoch": 48.7, + "learning_rate": 3.2523791187634085e-09, + "loss": 3.7473, + "step": 4426500 + }, + { + "epoch": 48.7, + "learning_rate": 3.238626987183013e-09, + "loss": 3.7574, + "step": 4427000 + }, + { + "epoch": 48.71, + "learning_rate": 3.224874855602618e-09, + "loss": 3.7379, + "step": 4427500 + }, + { + "epoch": 48.72, + "learning_rate": 3.2111227240222237e-09, + "loss": 3.7372, + "step": 4428000 + }, + { + "epoch": 48.72, + "learning_rate": 3.197370592441828e-09, + "loss": 3.7591, + "step": 4428500 + }, + { + "epoch": 48.73, + "learning_rate": 3.1836184608614333e-09, + "loss": 3.7525, + "step": 4429000 + }, + { + "epoch": 48.73, + "learning_rate": 3.169866329281039e-09, + "loss": 3.7368, + "step": 4429500 + }, + { + "epoch": 48.74, + "learning_rate": 3.1561141977006433e-09, + "loss": 3.7535, + "step": 4430000 + }, + { + "epoch": 48.74, + "learning_rate": 3.1423620661202485e-09, + "loss": 3.7208, + "step": 4430500 + }, + { + "epoch": 48.75, + "learning_rate": 3.1286099345398533e-09, + "loss": 3.7483, + "step": 4431000 + }, + { + "epoch": 48.75, + "learning_rate": 3.1148578029594585e-09, + "loss": 3.7433, + "step": 4431500 + }, + { + "epoch": 48.76, + "learning_rate": 3.1011056713790633e-09, + "loss": 3.7545, + "step": 4432000 + }, + { + "epoch": 48.77, + "learning_rate": 3.087353539798669e-09, + "loss": 3.7469, + "step": 4432500 + }, + { + "epoch": 48.77, + "learning_rate": 3.0736014082182737e-09, + "loss": 3.7419, + "step": 4433000 + }, + { + "epoch": 48.78, + "learning_rate": 3.0598492766378785e-09, + "loss": 3.7348, + "step": 4433500 + }, + { + "epoch": 48.78, + "learning_rate": 3.0460971450574837e-09, + "loss": 3.7509, + "step": 4434000 + }, + { + "epoch": 48.79, + "learning_rate": 3.032345013477089e-09, + "loss": 3.7584, + "step": 4434500 + }, + { + "epoch": 48.79, + "learning_rate": 3.0185928818966937e-09, + "loss": 3.7479, + "step": 4435000 + }, + { + "epoch": 48.8, + "learning_rate": 3.004840750316299e-09, + "loss": 3.766, + "step": 4435500 + }, + { + "epoch": 48.8, + "learning_rate": 2.9910886187359037e-09, + "loss": 3.7488, + "step": 4436000 + }, + { + "epoch": 48.81, + "learning_rate": 2.977336487155509e-09, + "loss": 3.7323, + "step": 4436500 + }, + { + "epoch": 48.81, + "learning_rate": 2.963584355575114e-09, + "loss": 3.7388, + "step": 4437000 + }, + { + "epoch": 48.82, + "learning_rate": 2.949832223994719e-09, + "loss": 3.7433, + "step": 4437500 + }, + { + "epoch": 48.83, + "learning_rate": 2.9360800924143237e-09, + "loss": 3.7477, + "step": 4438000 + }, + { + "epoch": 48.83, + "learning_rate": 2.9223279608339294e-09, + "loss": 3.7518, + "step": 4438500 + }, + { + "epoch": 48.84, + "learning_rate": 2.908575829253534e-09, + "loss": 3.7563, + "step": 4439000 + }, + { + "epoch": 48.84, + "learning_rate": 2.894823697673139e-09, + "loss": 3.7631, + "step": 4439500 + }, + { + "epoch": 48.85, + "learning_rate": 2.8810715660927446e-09, + "loss": 3.7484, + "step": 4440000 + }, + { + "epoch": 48.85, + "learning_rate": 2.8673194345123494e-09, + "loss": 3.7616, + "step": 4440500 + }, + { + "epoch": 48.86, + "learning_rate": 2.853567302931954e-09, + "loss": 3.7521, + "step": 4441000 + }, + { + "epoch": 48.86, + "learning_rate": 2.8398151713515594e-09, + "loss": 3.7335, + "step": 4441500 + }, + { + "epoch": 48.87, + "learning_rate": 2.8260630397711646e-09, + "loss": 3.7624, + "step": 4442000 + }, + { + "epoch": 48.88, + "learning_rate": 2.8123109081907694e-09, + "loss": 3.7353, + "step": 4442500 + }, + { + "epoch": 48.88, + "learning_rate": 2.7985587766103746e-09, + "loss": 3.7646, + "step": 4443000 + }, + { + "epoch": 48.89, + "learning_rate": 2.7848066450299794e-09, + "loss": 3.7634, + "step": 4443500 + }, + { + "epoch": 48.89, + "learning_rate": 2.7710545134495846e-09, + "loss": 3.739, + "step": 4444000 + }, + { + "epoch": 48.9, + "learning_rate": 2.7573023818691898e-09, + "loss": 3.7429, + "step": 4444500 + }, + { + "epoch": 48.9, + "learning_rate": 2.7435502502887946e-09, + "loss": 3.7424, + "step": 4445000 + }, + { + "epoch": 48.91, + "learning_rate": 2.7297981187083994e-09, + "loss": 3.742, + "step": 4445500 + }, + { + "epoch": 48.91, + "learning_rate": 2.716045987128005e-09, + "loss": 3.7559, + "step": 4446000 + }, + { + "epoch": 48.92, + "learning_rate": 2.7022938555476098e-09, + "loss": 3.7424, + "step": 4446500 + }, + { + "epoch": 48.92, + "learning_rate": 2.6885417239672146e-09, + "loss": 3.7497, + "step": 4447000 + }, + { + "epoch": 48.93, + "learning_rate": 2.6747895923868198e-09, + "loss": 3.7641, + "step": 4447500 + }, + { + "epoch": 48.94, + "learning_rate": 2.661037460806425e-09, + "loss": 3.7675, + "step": 4448000 + }, + { + "epoch": 48.94, + "learning_rate": 2.6472853292260298e-09, + "loss": 3.7631, + "step": 4448500 + }, + { + "epoch": 48.95, + "learning_rate": 2.633533197645635e-09, + "loss": 3.7345, + "step": 4449000 + }, + { + "epoch": 48.95, + "learning_rate": 2.6197810660652402e-09, + "loss": 3.7401, + "step": 4449500 + }, + { + "epoch": 48.96, + "learning_rate": 2.606028934484845e-09, + "loss": 3.7485, + "step": 4450000 + }, + { + "epoch": 48.96, + "learning_rate": 2.5922768029044502e-09, + "loss": 3.762, + "step": 4450500 + }, + { + "epoch": 48.97, + "learning_rate": 2.578524671324055e-09, + "loss": 3.7534, + "step": 4451000 + }, + { + "epoch": 48.97, + "learning_rate": 2.5647725397436602e-09, + "loss": 3.7362, + "step": 4451500 + }, + { + "epoch": 48.98, + "learning_rate": 2.5510204081632654e-09, + "loss": 3.7473, + "step": 4452000 + }, + { + "epoch": 48.99, + "learning_rate": 2.5372682765828702e-09, + "loss": 3.7627, + "step": 4452500 + }, + { + "epoch": 48.99, + "learning_rate": 2.523516145002475e-09, + "loss": 3.7469, + "step": 4453000 + }, + { + "epoch": 49.0, + "learning_rate": 2.5097640134220806e-09, + "loss": 3.7466, + "step": 4453500 + }, + { + "epoch": 49.0, + "eval_loss": 3.8253118991851807, + "eval_runtime": 6.1426, + "eval_samples_per_second": 252.987, + "step": 4453855 + }, + { + "epoch": 49.0, + "learning_rate": 2.4960118818416854e-09, + "loss": 3.75, + "step": 4454000 + }, + { + "epoch": 49.01, + "learning_rate": 2.4822597502612902e-09, + "loss": 3.7537, + "step": 4454500 + }, + { + "epoch": 49.01, + "learning_rate": 2.4685076186808954e-09, + "loss": 3.752, + "step": 4455000 + }, + { + "epoch": 49.02, + "learning_rate": 2.4547554871005006e-09, + "loss": 3.7553, + "step": 4455500 + }, + { + "epoch": 49.02, + "learning_rate": 2.4410033555201054e-09, + "loss": 3.771, + "step": 4456000 + }, + { + "epoch": 49.03, + "learning_rate": 2.4272512239397106e-09, + "loss": 3.7516, + "step": 4456500 + }, + { + "epoch": 49.03, + "learning_rate": 2.4134990923593154e-09, + "loss": 3.7428, + "step": 4457000 + }, + { + "epoch": 49.04, + "learning_rate": 2.3997469607789206e-09, + "loss": 3.7016, + "step": 4457500 + }, + { + "epoch": 49.05, + "learning_rate": 2.385994829198526e-09, + "loss": 3.743, + "step": 4458000 + }, + { + "epoch": 49.05, + "learning_rate": 2.3722426976181306e-09, + "loss": 3.7622, + "step": 4458500 + }, + { + "epoch": 49.06, + "learning_rate": 2.358490566037736e-09, + "loss": 3.7507, + "step": 4459000 + }, + { + "epoch": 49.06, + "learning_rate": 2.344738434457341e-09, + "loss": 3.7626, + "step": 4459500 + }, + { + "epoch": 49.07, + "learning_rate": 2.330986302876946e-09, + "loss": 3.753, + "step": 4460000 + }, + { + "epoch": 49.07, + "learning_rate": 2.3172341712965506e-09, + "loss": 3.7507, + "step": 4460500 + }, + { + "epoch": 49.08, + "learning_rate": 2.303482039716156e-09, + "loss": 3.7484, + "step": 4461000 + }, + { + "epoch": 49.08, + "learning_rate": 2.289729908135761e-09, + "loss": 3.7479, + "step": 4461500 + }, + { + "epoch": 49.09, + "learning_rate": 2.275977776555366e-09, + "loss": 3.743, + "step": 4462000 + }, + { + "epoch": 49.1, + "learning_rate": 2.262225644974971e-09, + "loss": 3.7613, + "step": 4462500 + }, + { + "epoch": 49.1, + "learning_rate": 2.2484735133945763e-09, + "loss": 3.7441, + "step": 4463000 + }, + { + "epoch": 49.11, + "learning_rate": 2.234721381814181e-09, + "loss": 3.7591, + "step": 4463500 + }, + { + "epoch": 49.11, + "learning_rate": 2.2209692502337863e-09, + "loss": 3.7476, + "step": 4464000 + }, + { + "epoch": 49.12, + "learning_rate": 2.207217118653391e-09, + "loss": 3.7556, + "step": 4464500 + }, + { + "epoch": 49.12, + "learning_rate": 2.1934649870729963e-09, + "loss": 3.7658, + "step": 4465000 + }, + { + "epoch": 49.13, + "learning_rate": 2.1797128554926015e-09, + "loss": 3.7483, + "step": 4465500 + }, + { + "epoch": 49.13, + "learning_rate": 2.1659607239122063e-09, + "loss": 3.7599, + "step": 4466000 + }, + { + "epoch": 49.14, + "learning_rate": 2.152208592331811e-09, + "loss": 3.7437, + "step": 4466500 + }, + { + "epoch": 49.14, + "learning_rate": 2.1384564607514163e-09, + "loss": 3.7256, + "step": 4467000 + }, + { + "epoch": 49.15, + "learning_rate": 2.1247043291710215e-09, + "loss": 3.745, + "step": 4467500 + }, + { + "epoch": 49.16, + "learning_rate": 2.1109521975906263e-09, + "loss": 3.751, + "step": 4468000 + }, + { + "epoch": 49.16, + "learning_rate": 2.0972000660102315e-09, + "loss": 3.7411, + "step": 4468500 + }, + { + "epoch": 49.17, + "learning_rate": 2.0834479344298367e-09, + "loss": 3.755, + "step": 4469000 + }, + { + "epoch": 49.17, + "learning_rate": 2.0696958028494415e-09, + "loss": 3.7544, + "step": 4469500 + }, + { + "epoch": 49.18, + "learning_rate": 2.0559436712690467e-09, + "loss": 3.7327, + "step": 4470000 + }, + { + "epoch": 49.18, + "learning_rate": 2.042191539688652e-09, + "loss": 3.7433, + "step": 4470500 + }, + { + "epoch": 49.19, + "learning_rate": 2.0284394081082567e-09, + "loss": 3.7432, + "step": 4471000 + }, + { + "epoch": 49.19, + "learning_rate": 2.014687276527862e-09, + "loss": 3.7357, + "step": 4471500 + }, + { + "epoch": 49.2, + "learning_rate": 2.0009351449474667e-09, + "loss": 3.7292, + "step": 4472000 + }, + { + "epoch": 49.21, + "learning_rate": 1.987183013367072e-09, + "loss": 3.7488, + "step": 4472500 + }, + { + "epoch": 49.21, + "learning_rate": 1.9734308817866767e-09, + "loss": 3.7682, + "step": 4473000 + }, + { + "epoch": 49.22, + "learning_rate": 1.959678750206282e-09, + "loss": 3.7617, + "step": 4473500 + }, + { + "epoch": 49.22, + "learning_rate": 1.9459266186258867e-09, + "loss": 3.7488, + "step": 4474000 + }, + { + "epoch": 49.23, + "learning_rate": 1.932174487045492e-09, + "loss": 3.7569, + "step": 4474500 + }, + { + "epoch": 49.23, + "learning_rate": 1.918422355465097e-09, + "loss": 3.7412, + "step": 4475000 + }, + { + "epoch": 49.24, + "learning_rate": 1.904670223884702e-09, + "loss": 3.7514, + "step": 4475500 + }, + { + "epoch": 49.24, + "learning_rate": 1.890918092304307e-09, + "loss": 3.7522, + "step": 4476000 + }, + { + "epoch": 49.25, + "learning_rate": 1.8771659607239123e-09, + "loss": 3.7524, + "step": 4476500 + }, + { + "epoch": 49.25, + "learning_rate": 1.863413829143517e-09, + "loss": 3.7455, + "step": 4477000 + }, + { + "epoch": 49.26, + "learning_rate": 1.8496616975631221e-09, + "loss": 3.7386, + "step": 4477500 + }, + { + "epoch": 49.27, + "learning_rate": 1.8359095659827271e-09, + "loss": 3.7489, + "step": 4478000 + }, + { + "epoch": 49.27, + "learning_rate": 1.8221574344023323e-09, + "loss": 3.7652, + "step": 4478500 + }, + { + "epoch": 49.28, + "learning_rate": 1.8084053028219373e-09, + "loss": 3.7492, + "step": 4479000 + }, + { + "epoch": 49.28, + "learning_rate": 1.7946531712415423e-09, + "loss": 3.7521, + "step": 4479500 + }, + { + "epoch": 49.29, + "learning_rate": 1.7809010396611475e-09, + "loss": 3.7324, + "step": 4480000 + }, + { + "epoch": 49.29, + "learning_rate": 1.7671489080807525e-09, + "loss": 3.75, + "step": 4480500 + }, + { + "epoch": 49.3, + "learning_rate": 1.7533967765003575e-09, + "loss": 3.7295, + "step": 4481000 + }, + { + "epoch": 49.3, + "learning_rate": 1.7396446449199623e-09, + "loss": 3.753, + "step": 4481500 + }, + { + "epoch": 49.31, + "learning_rate": 1.7258925133395677e-09, + "loss": 3.737, + "step": 4482000 + }, + { + "epoch": 49.32, + "learning_rate": 1.7121403817591727e-09, + "loss": 3.758, + "step": 4482500 + }, + { + "epoch": 49.32, + "learning_rate": 1.6983882501787775e-09, + "loss": 3.7548, + "step": 4483000 + }, + { + "epoch": 49.33, + "learning_rate": 1.6846361185983825e-09, + "loss": 3.7609, + "step": 4483500 + }, + { + "epoch": 49.33, + "learning_rate": 1.6708839870179877e-09, + "loss": 3.7303, + "step": 4484000 + }, + { + "epoch": 49.34, + "learning_rate": 1.6571318554375927e-09, + "loss": 3.7614, + "step": 4484500 + }, + { + "epoch": 49.34, + "learning_rate": 1.6433797238571977e-09, + "loss": 3.7552, + "step": 4485000 + }, + { + "epoch": 49.35, + "learning_rate": 1.6296275922768027e-09, + "loss": 3.7667, + "step": 4485500 + }, + { + "epoch": 49.35, + "learning_rate": 1.615875460696408e-09, + "loss": 3.7536, + "step": 4486000 + }, + { + "epoch": 49.36, + "learning_rate": 1.602123329116013e-09, + "loss": 3.774, + "step": 4486500 + }, + { + "epoch": 49.36, + "learning_rate": 1.588371197535618e-09, + "loss": 3.7311, + "step": 4487000 + }, + { + "epoch": 49.37, + "learning_rate": 1.5746190659552228e-09, + "loss": 3.7543, + "step": 4487500 + }, + { + "epoch": 49.38, + "learning_rate": 1.560866934374828e-09, + "loss": 3.7313, + "step": 4488000 + }, + { + "epoch": 49.38, + "learning_rate": 1.5471148027944332e-09, + "loss": 3.7429, + "step": 4488500 + }, + { + "epoch": 49.39, + "learning_rate": 1.533362671214038e-09, + "loss": 3.7407, + "step": 4489000 + }, + { + "epoch": 49.39, + "learning_rate": 1.5196105396336432e-09, + "loss": 3.7319, + "step": 4489500 + }, + { + "epoch": 49.4, + "learning_rate": 1.5058584080532482e-09, + "loss": 3.7465, + "step": 4490000 + }, + { + "epoch": 49.4, + "learning_rate": 1.4921062764728532e-09, + "loss": 3.7547, + "step": 4490500 + }, + { + "epoch": 49.41, + "learning_rate": 1.4783541448924584e-09, + "loss": 3.7521, + "step": 4491000 + }, + { + "epoch": 49.41, + "learning_rate": 1.4646020133120634e-09, + "loss": 3.7482, + "step": 4491500 + }, + { + "epoch": 49.42, + "learning_rate": 1.4508498817316684e-09, + "loss": 3.7532, + "step": 4492000 + }, + { + "epoch": 49.43, + "learning_rate": 1.4370977501512734e-09, + "loss": 3.7541, + "step": 4492500 + }, + { + "epoch": 49.43, + "learning_rate": 1.4233456185708784e-09, + "loss": 3.7561, + "step": 4493000 + }, + { + "epoch": 49.44, + "learning_rate": 1.4095934869904834e-09, + "loss": 3.7535, + "step": 4493500 + }, + { + "epoch": 49.44, + "learning_rate": 1.3958413554100886e-09, + "loss": 3.7457, + "step": 4494000 + }, + { + "epoch": 49.45, + "learning_rate": 1.3820892238296936e-09, + "loss": 3.7271, + "step": 4494500 + }, + { + "epoch": 49.45, + "learning_rate": 1.3683370922492986e-09, + "loss": 3.7551, + "step": 4495000 + }, + { + "epoch": 49.46, + "learning_rate": 1.3545849606689036e-09, + "loss": 3.769, + "step": 4495500 + }, + { + "epoch": 49.46, + "learning_rate": 1.3408328290885086e-09, + "loss": 3.7481, + "step": 4496000 + }, + { + "epoch": 49.47, + "learning_rate": 1.3270806975081136e-09, + "loss": 3.748, + "step": 4496500 + }, + { + "epoch": 49.47, + "learning_rate": 1.3133285659277188e-09, + "loss": 3.7513, + "step": 4497000 + }, + { + "epoch": 49.48, + "learning_rate": 1.2995764343473238e-09, + "loss": 3.7704, + "step": 4497500 + }, + { + "epoch": 49.49, + "learning_rate": 1.2858243027669288e-09, + "loss": 3.7598, + "step": 4498000 + }, + { + "epoch": 49.49, + "learning_rate": 1.2720721711865338e-09, + "loss": 3.7334, + "step": 4498500 + }, + { + "epoch": 49.5, + "learning_rate": 1.2583200396061388e-09, + "loss": 3.7528, + "step": 4499000 + }, + { + "epoch": 49.5, + "learning_rate": 1.244567908025744e-09, + "loss": 3.7469, + "step": 4499500 + }, + { + "epoch": 49.51, + "learning_rate": 1.230815776445349e-09, + "loss": 3.7613, + "step": 4500000 + }, + { + "epoch": 49.51, + "learning_rate": 1.217063644864954e-09, + "loss": 3.7406, + "step": 4500500 + }, + { + "epoch": 49.52, + "learning_rate": 1.203311513284559e-09, + "loss": 3.7452, + "step": 4501000 + }, + { + "epoch": 49.52, + "learning_rate": 1.1895593817041642e-09, + "loss": 3.7581, + "step": 4501500 + }, + { + "epoch": 49.53, + "learning_rate": 1.175807250123769e-09, + "loss": 3.7644, + "step": 4502000 + }, + { + "epoch": 49.54, + "learning_rate": 1.1620551185433742e-09, + "loss": 3.7417, + "step": 4502500 + }, + { + "epoch": 49.54, + "learning_rate": 1.1483029869629792e-09, + "loss": 3.7424, + "step": 4503000 + }, + { + "epoch": 49.55, + "learning_rate": 1.1345508553825842e-09, + "loss": 3.7492, + "step": 4503500 + }, + { + "epoch": 49.55, + "learning_rate": 1.1207987238021892e-09, + "loss": 3.7413, + "step": 4504000 + }, + { + "epoch": 49.56, + "learning_rate": 1.1070465922217944e-09, + "loss": 3.7517, + "step": 4504500 + }, + { + "epoch": 49.56, + "learning_rate": 1.0932944606413992e-09, + "loss": 3.7427, + "step": 4505000 + }, + { + "epoch": 49.57, + "learning_rate": 1.0795423290610044e-09, + "loss": 3.761, + "step": 4505500 + }, + { + "epoch": 49.57, + "learning_rate": 1.0657901974806094e-09, + "loss": 3.7443, + "step": 4506000 + }, + { + "epoch": 49.58, + "learning_rate": 1.0520380659002144e-09, + "loss": 3.752, + "step": 4506500 + }, + { + "epoch": 49.58, + "learning_rate": 1.0382859343198194e-09, + "loss": 3.734, + "step": 4507000 + }, + { + "epoch": 49.59, + "learning_rate": 1.0245338027394246e-09, + "loss": 3.746, + "step": 4507500 + }, + { + "epoch": 49.6, + "learning_rate": 1.0107816711590294e-09, + "loss": 3.7364, + "step": 4508000 + }, + { + "epoch": 49.6, + "learning_rate": 9.970295395786346e-10, + "loss": 3.7374, + "step": 4508500 + }, + { + "epoch": 49.61, + "learning_rate": 9.832774079982396e-10, + "loss": 3.7571, + "step": 4509000 + }, + { + "epoch": 49.61, + "learning_rate": 9.695252764178446e-10, + "loss": 3.7506, + "step": 4509500 + }, + { + "epoch": 49.62, + "learning_rate": 9.557731448374499e-10, + "loss": 3.7481, + "step": 4510000 + }, + { + "epoch": 49.62, + "learning_rate": 9.420210132570549e-10, + "loss": 3.7567, + "step": 4510500 + }, + { + "epoch": 49.63, + "learning_rate": 9.2826888167666e-10, + "loss": 3.7706, + "step": 4511000 + }, + { + "epoch": 49.63, + "learning_rate": 9.145167500962649e-10, + "loss": 3.7562, + "step": 4511500 + }, + { + "epoch": 49.64, + "learning_rate": 9.0076461851587e-10, + "loss": 3.7378, + "step": 4512000 + }, + { + "epoch": 49.65, + "learning_rate": 8.87012486935475e-10, + "loss": 3.7392, + "step": 4512500 + }, + { + "epoch": 49.65, + "learning_rate": 8.732603553550801e-10, + "loss": 3.7412, + "step": 4513000 + }, + { + "epoch": 49.66, + "learning_rate": 8.59508223774685e-10, + "loss": 3.7571, + "step": 4513500 + }, + { + "epoch": 49.66, + "learning_rate": 8.457560921942902e-10, + "loss": 3.75, + "step": 4514000 + }, + { + "epoch": 49.67, + "learning_rate": 8.320039606138951e-10, + "loss": 3.7675, + "step": 4514500 + }, + { + "epoch": 49.67, + "learning_rate": 8.182518290335002e-10, + "loss": 3.7498, + "step": 4515000 + }, + { + "epoch": 49.68, + "learning_rate": 8.044996974531052e-10, + "loss": 3.7628, + "step": 4515500 + }, + { + "epoch": 49.68, + "learning_rate": 7.907475658727103e-10, + "loss": 3.771, + "step": 4516000 + }, + { + "epoch": 49.69, + "learning_rate": 7.769954342923153e-10, + "loss": 3.7463, + "step": 4516500 + }, + { + "epoch": 49.69, + "learning_rate": 7.632433027119203e-10, + "loss": 3.7496, + "step": 4517000 + }, + { + "epoch": 49.7, + "learning_rate": 7.494911711315254e-10, + "loss": 3.754, + "step": 4517500 + }, + { + "epoch": 49.71, + "learning_rate": 7.357390395511304e-10, + "loss": 3.7322, + "step": 4518000 + }, + { + "epoch": 49.71, + "learning_rate": 7.219869079707354e-10, + "loss": 3.7296, + "step": 4518500 + }, + { + "epoch": 49.72, + "learning_rate": 7.082347763903405e-10, + "loss": 3.7719, + "step": 4519000 + }, + { + "epoch": 49.72, + "learning_rate": 6.944826448099455e-10, + "loss": 3.7419, + "step": 4519500 + }, + { + "epoch": 49.73, + "learning_rate": 6.807305132295505e-10, + "loss": 3.7552, + "step": 4520000 + }, + { + "epoch": 49.73, + "learning_rate": 6.669783816491556e-10, + "loss": 3.765, + "step": 4520500 + }, + { + "epoch": 49.74, + "learning_rate": 6.532262500687606e-10, + "loss": 3.7384, + "step": 4521000 + }, + { + "epoch": 49.74, + "learning_rate": 6.394741184883656e-10, + "loss": 3.7514, + "step": 4521500 + }, + { + "epoch": 49.75, + "learning_rate": 6.257219869079707e-10, + "loss": 3.7354, + "step": 4522000 + }, + { + "epoch": 49.76, + "learning_rate": 6.119698553275757e-10, + "loss": 3.7474, + "step": 4522500 + }, + { + "epoch": 49.76, + "learning_rate": 5.982177237471807e-10, + "loss": 3.7511, + "step": 4523000 + }, + { + "epoch": 49.77, + "learning_rate": 5.844655921667858e-10, + "loss": 3.739, + "step": 4523500 + }, + { + "epoch": 49.77, + "learning_rate": 5.707134605863908e-10, + "loss": 3.7345, + "step": 4524000 + }, + { + "epoch": 49.78, + "learning_rate": 5.569613290059959e-10, + "loss": 3.7544, + "step": 4524500 + }, + { + "epoch": 49.78, + "learning_rate": 5.43209197425601e-10, + "loss": 3.7472, + "step": 4525000 + }, + { + "epoch": 49.79, + "learning_rate": 5.29457065845206e-10, + "loss": 3.7622, + "step": 4525500 + }, + { + "epoch": 49.79, + "learning_rate": 5.15704934264811e-10, + "loss": 3.7751, + "step": 4526000 + }, + { + "epoch": 49.8, + "learning_rate": 5.019528026844161e-10, + "loss": 3.745, + "step": 4526500 + }, + { + "epoch": 49.8, + "learning_rate": 4.882006711040211e-10, + "loss": 3.7265, + "step": 4527000 + }, + { + "epoch": 49.81, + "learning_rate": 4.744485395236261e-10, + "loss": 3.739, + "step": 4527500 + }, + { + "epoch": 49.82, + "learning_rate": 4.606964079432312e-10, + "loss": 3.7298, + "step": 4528000 + }, + { + "epoch": 49.82, + "learning_rate": 4.4694427636283623e-10, + "loss": 3.7627, + "step": 4528500 + }, + { + "epoch": 49.83, + "learning_rate": 4.331921447824413e-10, + "loss": 3.78, + "step": 4529000 + }, + { + "epoch": 49.83, + "learning_rate": 4.194400132020463e-10, + "loss": 3.7387, + "step": 4529500 + }, + { + "epoch": 49.84, + "learning_rate": 4.0568788162165134e-10, + "loss": 3.7501, + "step": 4530000 + }, + { + "epoch": 49.84, + "learning_rate": 3.919357500412564e-10, + "loss": 3.7371, + "step": 4530500 + }, + { + "epoch": 49.85, + "learning_rate": 3.781836184608614e-10, + "loss": 3.7358, + "step": 4531000 + }, + { + "epoch": 49.85, + "learning_rate": 3.6443148688046644e-10, + "loss": 3.7356, + "step": 4531500 + }, + { + "epoch": 49.86, + "learning_rate": 3.506793553000715e-10, + "loss": 3.7474, + "step": 4532000 + }, + { + "epoch": 49.87, + "learning_rate": 3.3692722371967655e-10, + "loss": 3.7462, + "step": 4532500 + }, + { + "epoch": 49.87, + "learning_rate": 3.231750921392816e-10, + "loss": 3.745, + "step": 4533000 + }, + { + "epoch": 49.88, + "learning_rate": 3.094229605588866e-10, + "loss": 3.764, + "step": 4533500 + }, + { + "epoch": 49.88, + "learning_rate": 2.9567082897849165e-10, + "loss": 3.7514, + "step": 4534000 + }, + { + "epoch": 49.89, + "learning_rate": 2.819186973980967e-10, + "loss": 3.7585, + "step": 4534500 + }, + { + "epoch": 49.89, + "learning_rate": 2.681665658177017e-10, + "loss": 3.739, + "step": 4535000 + }, + { + "epoch": 49.9, + "learning_rate": 2.5441443423730676e-10, + "loss": 3.7291, + "step": 4535500 + }, + { + "epoch": 49.9, + "learning_rate": 2.406623026569118e-10, + "loss": 3.7672, + "step": 4536000 + }, + { + "epoch": 49.91, + "learning_rate": 2.2691017107651684e-10, + "loss": 3.7537, + "step": 4536500 + }, + { + "epoch": 49.91, + "learning_rate": 2.1315803949612187e-10, + "loss": 3.756, + "step": 4537000 + }, + { + "epoch": 49.92, + "learning_rate": 1.9940590791572695e-10, + "loss": 3.7495, + "step": 4537500 + }, + { + "epoch": 49.93, + "learning_rate": 1.8565377633533197e-10, + "loss": 3.7447, + "step": 4538000 + }, + { + "epoch": 49.93, + "learning_rate": 1.7190164475493702e-10, + "loss": 3.738, + "step": 4538500 + }, + { + "epoch": 49.94, + "learning_rate": 1.5814951317454205e-10, + "loss": 3.7381, + "step": 4539000 + }, + { + "epoch": 49.94, + "learning_rate": 1.4439738159414708e-10, + "loss": 3.7596, + "step": 4539500 + }, + { + "epoch": 49.95, + "learning_rate": 1.3064525001375213e-10, + "loss": 3.754, + "step": 4540000 + }, + { + "epoch": 49.95, + "learning_rate": 1.1689311843335716e-10, + "loss": 3.7449, + "step": 4540500 + }, + { + "epoch": 49.96, + "learning_rate": 1.0314098685296221e-10, + "loss": 3.7402, + "step": 4541000 + }, + { + "epoch": 49.96, + "learning_rate": 8.938885527256724e-11, + "loss": 3.7431, + "step": 4541500 + }, + { + "epoch": 49.97, + "learning_rate": 7.563672369217229e-11, + "loss": 3.7442, + "step": 4542000 + }, + { + "epoch": 49.98, + "learning_rate": 6.188459211177732e-11, + "loss": 3.7337, + "step": 4542500 + }, + { + "epoch": 49.98, + "learning_rate": 4.813246053138236e-11, + "loss": 3.7584, + "step": 4543000 + }, + { + "epoch": 49.99, + "learning_rate": 3.43803289509874e-11, + "loss": 3.7638, + "step": 4543500 + }, + { + "epoch": 49.99, + "learning_rate": 2.062819737059244e-11, + "loss": 3.7403, + "step": 4544000 + }, + { + "epoch": 50.0, + "learning_rate": 6.876065790197479e-12, + "loss": 3.7504, + "step": 4544500 + }, + { + "epoch": 50.0, + "eval_loss": 3.825317859649658, + "eval_runtime": 6.146, + "eval_samples_per_second": 252.849, + "step": 4544750 + } + ], + "max_steps": 4544750, + "num_train_epochs": 50, + "total_flos": 2.176739633488896e+17, + "trial_name": null, + "trial_params": null +}