diff --git "a/imagined/model/trainer_state.json" "b/imagined/model/trainer_state.json" new file mode 100644--- /dev/null +++ "b/imagined/model/trainer_state.json" @@ -0,0 +1,54390 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 50.0, + "global_step": 4502300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 5.552717499944473e-11, + "loss": 4.7837, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 1.1105434999888946e-10, + "loss": 4.7449, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 1.665815249983342e-10, + "loss": 4.762, + "step": 1500 + }, + { + "epoch": 0.02, + "learning_rate": 2.221086999977789e-10, + "loss": 4.7893, + "step": 2000 + }, + { + "epoch": 0.03, + "learning_rate": 2.7763587499722363e-10, + "loss": 4.7779, + "step": 2500 + }, + { + "epoch": 0.03, + "learning_rate": 3.331630499966684e-10, + "loss": 4.7454, + "step": 3000 + }, + { + "epoch": 0.04, + "learning_rate": 3.886902249961131e-10, + "loss": 4.7598, + "step": 3500 + }, + { + "epoch": 0.04, + "learning_rate": 4.442173999955578e-10, + "loss": 4.7754, + "step": 4000 + }, + { + "epoch": 0.05, + "learning_rate": 4.997445749950025e-10, + "loss": 4.7607, + "step": 4500 + }, + { + "epoch": 0.06, + "learning_rate": 5.552717499944473e-10, + "loss": 4.7713, + "step": 5000 + }, + { + "epoch": 0.06, + "learning_rate": 6.10798924993892e-10, + "loss": 4.7322, + "step": 5500 + }, + { + "epoch": 0.07, + "learning_rate": 6.663260999933368e-10, + "loss": 4.7939, + "step": 6000 + }, + { + "epoch": 0.07, + "learning_rate": 7.218532749927815e-10, + "loss": 4.7548, + "step": 6500 + }, + { + "epoch": 0.08, + "learning_rate": 7.773804499922262e-10, + "loss": 4.772, + "step": 7000 + }, + { + "epoch": 0.08, + "learning_rate": 8.32907624991671e-10, + "loss": 4.7405, + "step": 7500 + }, + { + "epoch": 0.09, + "learning_rate": 8.884347999911156e-10, + "loss": 4.7392, + "step": 8000 + }, + { + "epoch": 0.09, + "learning_rate": 9.439619749905603e-10, + "loss": 4.7745, + "step": 8500 + }, + { + "epoch": 0.1, + "learning_rate": 9.99489149990005e-10, + "loss": 4.7632, + "step": 9000 + }, + { + "epoch": 0.11, + "learning_rate": 1.0550163249894497e-09, + "loss": 4.7306, + "step": 9500 + }, + { + "epoch": 0.11, + "learning_rate": 1.1105434999888945e-09, + "loss": 4.7724, + "step": 10000 + }, + { + "epoch": 0.12, + "learning_rate": 1.1660706749883393e-09, + "loss": 4.7545, + "step": 10500 + }, + { + "epoch": 0.12, + "learning_rate": 1.221597849987784e-09, + "loss": 4.7522, + "step": 11000 + }, + { + "epoch": 0.13, + "learning_rate": 1.2771250249872287e-09, + "loss": 4.744, + "step": 11500 + }, + { + "epoch": 0.13, + "learning_rate": 1.3326521999866735e-09, + "loss": 4.7259, + "step": 12000 + }, + { + "epoch": 0.14, + "learning_rate": 1.3881793749861181e-09, + "loss": 4.7489, + "step": 12500 + }, + { + "epoch": 0.14, + "learning_rate": 1.443706549985563e-09, + "loss": 4.7674, + "step": 13000 + }, + { + "epoch": 0.15, + "learning_rate": 1.4992337249850075e-09, + "loss": 4.7464, + "step": 13500 + }, + { + "epoch": 0.16, + "learning_rate": 1.5547608999844523e-09, + "loss": 4.7427, + "step": 14000 + }, + { + "epoch": 0.16, + "learning_rate": 1.6102880749838971e-09, + "loss": 4.7386, + "step": 14500 + }, + { + "epoch": 0.17, + "learning_rate": 1.665815249983342e-09, + "loss": 4.7435, + "step": 15000 + }, + { + "epoch": 0.17, + "learning_rate": 1.7213424249827865e-09, + "loss": 4.7479, + "step": 15500 + }, + { + "epoch": 0.18, + "learning_rate": 1.7768695999822313e-09, + "loss": 4.7619, + "step": 16000 + }, + { + "epoch": 0.18, + "learning_rate": 1.832396774981676e-09, + "loss": 4.7526, + "step": 16500 + }, + { + "epoch": 0.19, + "learning_rate": 1.8879239499811207e-09, + "loss": 4.7487, + "step": 17000 + }, + { + "epoch": 0.19, + "learning_rate": 1.9434511249805653e-09, + "loss": 4.7383, + "step": 17500 + }, + { + "epoch": 0.2, + "learning_rate": 1.99897829998001e-09, + "loss": 4.7341, + "step": 18000 + }, + { + "epoch": 0.21, + "learning_rate": 2.054505474979455e-09, + "loss": 4.7364, + "step": 18500 + }, + { + "epoch": 0.21, + "learning_rate": 2.1100326499788995e-09, + "loss": 4.7539, + "step": 19000 + }, + { + "epoch": 0.22, + "learning_rate": 2.1655598249783445e-09, + "loss": 4.7276, + "step": 19500 + }, + { + "epoch": 0.22, + "learning_rate": 2.221086999977789e-09, + "loss": 4.7091, + "step": 20000 + }, + { + "epoch": 0.23, + "learning_rate": 2.2766141749772337e-09, + "loss": 4.7189, + "step": 20500 + }, + { + "epoch": 0.23, + "learning_rate": 2.3321413499766787e-09, + "loss": 4.727, + "step": 21000 + }, + { + "epoch": 0.24, + "learning_rate": 2.3876685249761233e-09, + "loss": 4.7139, + "step": 21500 + }, + { + "epoch": 0.24, + "learning_rate": 2.443195699975568e-09, + "loss": 4.7044, + "step": 22000 + }, + { + "epoch": 0.25, + "learning_rate": 2.4987228749750124e-09, + "loss": 4.7405, + "step": 22500 + }, + { + "epoch": 0.26, + "learning_rate": 2.5542500499744574e-09, + "loss": 4.7065, + "step": 23000 + }, + { + "epoch": 0.26, + "learning_rate": 2.609777224973902e-09, + "loss": 4.7193, + "step": 23500 + }, + { + "epoch": 0.27, + "learning_rate": 2.665304399973347e-09, + "loss": 4.6842, + "step": 24000 + }, + { + "epoch": 0.27, + "learning_rate": 2.7208315749727912e-09, + "loss": 4.7172, + "step": 24500 + }, + { + "epoch": 0.28, + "learning_rate": 2.7763587499722362e-09, + "loss": 4.6902, + "step": 25000 + }, + { + "epoch": 0.28, + "learning_rate": 2.831885924971681e-09, + "loss": 4.6976, + "step": 25500 + }, + { + "epoch": 0.29, + "learning_rate": 2.887413099971126e-09, + "loss": 4.6845, + "step": 26000 + }, + { + "epoch": 0.29, + "learning_rate": 2.9429402749705704e-09, + "loss": 4.6812, + "step": 26500 + }, + { + "epoch": 0.3, + "learning_rate": 2.998467449970015e-09, + "loss": 4.6999, + "step": 27000 + }, + { + "epoch": 0.31, + "learning_rate": 3.05399462496946e-09, + "loss": 4.7025, + "step": 27500 + }, + { + "epoch": 0.31, + "learning_rate": 3.1095217999689046e-09, + "loss": 4.6931, + "step": 28000 + }, + { + "epoch": 0.32, + "learning_rate": 3.165048974968349e-09, + "loss": 4.6684, + "step": 28500 + }, + { + "epoch": 0.32, + "learning_rate": 3.2205761499677942e-09, + "loss": 4.6757, + "step": 29000 + }, + { + "epoch": 0.33, + "learning_rate": 3.276103324967239e-09, + "loss": 4.652, + "step": 29500 + }, + { + "epoch": 0.33, + "learning_rate": 3.331630499966684e-09, + "loss": 4.6497, + "step": 30000 + }, + { + "epoch": 0.34, + "learning_rate": 3.3871576749661284e-09, + "loss": 4.665, + "step": 30500 + }, + { + "epoch": 0.34, + "learning_rate": 3.442684849965573e-09, + "loss": 4.6444, + "step": 31000 + }, + { + "epoch": 0.35, + "learning_rate": 3.498212024965018e-09, + "loss": 4.6672, + "step": 31500 + }, + { + "epoch": 0.36, + "learning_rate": 3.5537391999644626e-09, + "loss": 4.6469, + "step": 32000 + }, + { + "epoch": 0.36, + "learning_rate": 3.6092663749639068e-09, + "loss": 4.6385, + "step": 32500 + }, + { + "epoch": 0.37, + "learning_rate": 3.664793549963352e-09, + "loss": 4.6352, + "step": 33000 + }, + { + "epoch": 0.37, + "learning_rate": 3.7203207249627964e-09, + "loss": 4.6436, + "step": 33500 + }, + { + "epoch": 0.38, + "learning_rate": 3.775847899962241e-09, + "loss": 4.6293, + "step": 34000 + }, + { + "epoch": 0.38, + "learning_rate": 3.831375074961686e-09, + "loss": 4.6468, + "step": 34500 + }, + { + "epoch": 0.39, + "learning_rate": 3.8869022499611306e-09, + "loss": 4.6267, + "step": 35000 + }, + { + "epoch": 0.39, + "learning_rate": 3.9424294249605756e-09, + "loss": 4.6408, + "step": 35500 + }, + { + "epoch": 0.4, + "learning_rate": 3.99795659996002e-09, + "loss": 4.629, + "step": 36000 + }, + { + "epoch": 0.41, + "learning_rate": 4.0534837749594656e-09, + "loss": 4.6255, + "step": 36500 + }, + { + "epoch": 0.41, + "learning_rate": 4.10901094995891e-09, + "loss": 4.6302, + "step": 37000 + }, + { + "epoch": 0.42, + "learning_rate": 4.164538124958354e-09, + "loss": 4.5945, + "step": 37500 + }, + { + "epoch": 0.42, + "learning_rate": 4.220065299957799e-09, + "loss": 4.6059, + "step": 38000 + }, + { + "epoch": 0.43, + "learning_rate": 4.275592474957244e-09, + "loss": 4.5908, + "step": 38500 + }, + { + "epoch": 0.43, + "learning_rate": 4.331119649956689e-09, + "loss": 4.5667, + "step": 39000 + }, + { + "epoch": 0.44, + "learning_rate": 4.386646824956133e-09, + "loss": 4.5727, + "step": 39500 + }, + { + "epoch": 0.44, + "learning_rate": 4.442173999955578e-09, + "loss": 4.5981, + "step": 40000 + }, + { + "epoch": 0.45, + "learning_rate": 4.497701174955023e-09, + "loss": 4.5854, + "step": 40500 + }, + { + "epoch": 0.46, + "learning_rate": 4.553228349954467e-09, + "loss": 4.583, + "step": 41000 + }, + { + "epoch": 0.46, + "learning_rate": 4.608755524953912e-09, + "loss": 4.5917, + "step": 41500 + }, + { + "epoch": 0.47, + "learning_rate": 4.664282699953357e-09, + "loss": 4.5816, + "step": 42000 + }, + { + "epoch": 0.47, + "learning_rate": 4.7198098749528015e-09, + "loss": 4.5827, + "step": 42500 + }, + { + "epoch": 0.48, + "learning_rate": 4.7753370499522465e-09, + "loss": 4.5674, + "step": 43000 + }, + { + "epoch": 0.48, + "learning_rate": 4.830864224951691e-09, + "loss": 4.5866, + "step": 43500 + }, + { + "epoch": 0.49, + "learning_rate": 4.886391399951136e-09, + "loss": 4.5659, + "step": 44000 + }, + { + "epoch": 0.49, + "learning_rate": 4.941918574950581e-09, + "loss": 4.553, + "step": 44500 + }, + { + "epoch": 0.5, + "learning_rate": 4.997445749950025e-09, + "loss": 4.5593, + "step": 45000 + }, + { + "epoch": 0.51, + "learning_rate": 5.05297292494947e-09, + "loss": 4.5629, + "step": 45500 + }, + { + "epoch": 0.51, + "learning_rate": 5.108500099948915e-09, + "loss": 4.5519, + "step": 46000 + }, + { + "epoch": 0.52, + "learning_rate": 5.164027274948359e-09, + "loss": 4.5401, + "step": 46500 + }, + { + "epoch": 0.52, + "learning_rate": 5.219554449947804e-09, + "loss": 4.5342, + "step": 47000 + }, + { + "epoch": 0.53, + "learning_rate": 5.275081624947249e-09, + "loss": 4.5381, + "step": 47500 + }, + { + "epoch": 0.53, + "learning_rate": 5.330608799946694e-09, + "loss": 4.5256, + "step": 48000 + }, + { + "epoch": 0.54, + "learning_rate": 5.386135974946138e-09, + "loss": 4.5267, + "step": 48500 + }, + { + "epoch": 0.54, + "learning_rate": 5.4416631499455825e-09, + "loss": 4.543, + "step": 49000 + }, + { + "epoch": 0.55, + "learning_rate": 5.497190324945028e-09, + "loss": 4.5319, + "step": 49500 + }, + { + "epoch": 0.56, + "learning_rate": 5.5527174999444725e-09, + "loss": 4.5299, + "step": 50000 + }, + { + "epoch": 0.56, + "learning_rate": 5.6082446749439175e-09, + "loss": 4.5117, + "step": 50500 + }, + { + "epoch": 0.57, + "learning_rate": 5.663771849943362e-09, + "loss": 4.5291, + "step": 51000 + }, + { + "epoch": 0.57, + "learning_rate": 5.719299024942807e-09, + "loss": 4.5038, + "step": 51500 + }, + { + "epoch": 0.58, + "learning_rate": 5.774826199942252e-09, + "loss": 4.5173, + "step": 52000 + }, + { + "epoch": 0.58, + "learning_rate": 5.830353374941696e-09, + "loss": 4.5036, + "step": 52500 + }, + { + "epoch": 0.59, + "learning_rate": 5.885880549941141e-09, + "loss": 4.5334, + "step": 53000 + }, + { + "epoch": 0.59, + "learning_rate": 5.941407724940586e-09, + "loss": 4.4966, + "step": 53500 + }, + { + "epoch": 0.6, + "learning_rate": 5.99693489994003e-09, + "loss": 4.4683, + "step": 54000 + }, + { + "epoch": 0.61, + "learning_rate": 6.052462074939475e-09, + "loss": 4.5084, + "step": 54500 + }, + { + "epoch": 0.61, + "learning_rate": 6.10798924993892e-09, + "loss": 4.4961, + "step": 55000 + }, + { + "epoch": 0.62, + "learning_rate": 6.163516424938364e-09, + "loss": 4.511, + "step": 55500 + }, + { + "epoch": 0.62, + "learning_rate": 6.219043599937809e-09, + "loss": 4.4621, + "step": 56000 + }, + { + "epoch": 0.63, + "learning_rate": 6.274570774937254e-09, + "loss": 4.488, + "step": 56500 + }, + { + "epoch": 0.63, + "learning_rate": 6.330097949936698e-09, + "loss": 4.4761, + "step": 57000 + }, + { + "epoch": 0.64, + "learning_rate": 6.3856251249361434e-09, + "loss": 4.487, + "step": 57500 + }, + { + "epoch": 0.64, + "learning_rate": 6.4411522999355884e-09, + "loss": 4.4958, + "step": 58000 + }, + { + "epoch": 0.65, + "learning_rate": 6.496679474935033e-09, + "loss": 4.4825, + "step": 58500 + }, + { + "epoch": 0.66, + "learning_rate": 6.552206649934478e-09, + "loss": 4.4856, + "step": 59000 + }, + { + "epoch": 0.66, + "learning_rate": 6.607733824933923e-09, + "loss": 4.4724, + "step": 59500 + }, + { + "epoch": 0.67, + "learning_rate": 6.663260999933368e-09, + "loss": 4.4476, + "step": 60000 + }, + { + "epoch": 0.67, + "learning_rate": 6.718788174932812e-09, + "loss": 4.4582, + "step": 60500 + }, + { + "epoch": 0.68, + "learning_rate": 6.774315349932257e-09, + "loss": 4.4804, + "step": 61000 + }, + { + "epoch": 0.68, + "learning_rate": 6.829842524931702e-09, + "loss": 4.4752, + "step": 61500 + }, + { + "epoch": 0.69, + "learning_rate": 6.885369699931146e-09, + "loss": 4.4499, + "step": 62000 + }, + { + "epoch": 0.69, + "learning_rate": 6.940896874930591e-09, + "loss": 4.4592, + "step": 62500 + }, + { + "epoch": 0.7, + "learning_rate": 6.996424049930036e-09, + "loss": 4.4546, + "step": 63000 + }, + { + "epoch": 0.71, + "learning_rate": 7.051951224929479e-09, + "loss": 4.4364, + "step": 63500 + }, + { + "epoch": 0.71, + "learning_rate": 7.107478399928925e-09, + "loss": 4.4465, + "step": 64000 + }, + { + "epoch": 0.72, + "learning_rate": 7.16300557492837e-09, + "loss": 4.4404, + "step": 64500 + }, + { + "epoch": 0.72, + "learning_rate": 7.2185327499278135e-09, + "loss": 4.4519, + "step": 65000 + }, + { + "epoch": 0.73, + "learning_rate": 7.2740599249272585e-09, + "loss": 4.4402, + "step": 65500 + }, + { + "epoch": 0.73, + "learning_rate": 7.329587099926704e-09, + "loss": 4.4382, + "step": 66000 + }, + { + "epoch": 0.74, + "learning_rate": 7.385114274926149e-09, + "loss": 4.4386, + "step": 66500 + }, + { + "epoch": 0.74, + "learning_rate": 7.440641449925593e-09, + "loss": 4.4537, + "step": 67000 + }, + { + "epoch": 0.75, + "learning_rate": 7.496168624925039e-09, + "loss": 4.4171, + "step": 67500 + }, + { + "epoch": 0.76, + "learning_rate": 7.551695799924483e-09, + "loss": 4.425, + "step": 68000 + }, + { + "epoch": 0.76, + "learning_rate": 7.607222974923927e-09, + "loss": 4.4289, + "step": 68500 + }, + { + "epoch": 0.77, + "learning_rate": 7.662750149923373e-09, + "loss": 4.4365, + "step": 69000 + }, + { + "epoch": 0.77, + "learning_rate": 7.718277324922817e-09, + "loss": 4.4359, + "step": 69500 + }, + { + "epoch": 0.78, + "learning_rate": 7.773804499922261e-09, + "loss": 4.4229, + "step": 70000 + }, + { + "epoch": 0.78, + "learning_rate": 7.829331674921707e-09, + "loss": 4.4428, + "step": 70500 + }, + { + "epoch": 0.79, + "learning_rate": 7.884858849921151e-09, + "loss": 4.4071, + "step": 71000 + }, + { + "epoch": 0.79, + "learning_rate": 7.940386024920597e-09, + "loss": 4.419, + "step": 71500 + }, + { + "epoch": 0.8, + "learning_rate": 7.99591319992004e-09, + "loss": 4.4022, + "step": 72000 + }, + { + "epoch": 0.81, + "learning_rate": 8.051440374919485e-09, + "loss": 4.4226, + "step": 72500 + }, + { + "epoch": 0.81, + "learning_rate": 8.106967549918931e-09, + "loss": 4.4109, + "step": 73000 + }, + { + "epoch": 0.82, + "learning_rate": 8.162494724918374e-09, + "loss": 4.413, + "step": 73500 + }, + { + "epoch": 0.82, + "learning_rate": 8.21802189991782e-09, + "loss": 4.4194, + "step": 74000 + }, + { + "epoch": 0.83, + "learning_rate": 8.273549074917265e-09, + "loss": 4.3956, + "step": 74500 + }, + { + "epoch": 0.83, + "learning_rate": 8.329076249916708e-09, + "loss": 4.4074, + "step": 75000 + }, + { + "epoch": 0.84, + "learning_rate": 8.384603424916154e-09, + "loss": 4.4119, + "step": 75500 + }, + { + "epoch": 0.84, + "learning_rate": 8.440130599915598e-09, + "loss": 4.3935, + "step": 76000 + }, + { + "epoch": 0.85, + "learning_rate": 8.495657774915042e-09, + "loss": 4.4161, + "step": 76500 + }, + { + "epoch": 0.86, + "learning_rate": 8.551184949914488e-09, + "loss": 4.3867, + "step": 77000 + }, + { + "epoch": 0.86, + "learning_rate": 8.606712124913932e-09, + "loss": 4.425, + "step": 77500 + }, + { + "epoch": 0.87, + "learning_rate": 8.662239299913378e-09, + "loss": 4.3904, + "step": 78000 + }, + { + "epoch": 0.87, + "learning_rate": 8.717766474912822e-09, + "loss": 4.3925, + "step": 78500 + }, + { + "epoch": 0.88, + "learning_rate": 8.773293649912266e-09, + "loss": 4.3798, + "step": 79000 + }, + { + "epoch": 0.88, + "learning_rate": 8.828820824911712e-09, + "loss": 4.3841, + "step": 79500 + }, + { + "epoch": 0.89, + "learning_rate": 8.884347999911156e-09, + "loss": 4.3653, + "step": 80000 + }, + { + "epoch": 0.89, + "learning_rate": 8.9398751749106e-09, + "loss": 4.3873, + "step": 80500 + }, + { + "epoch": 0.9, + "learning_rate": 8.995402349910046e-09, + "loss": 4.3724, + "step": 81000 + }, + { + "epoch": 0.91, + "learning_rate": 9.05092952490949e-09, + "loss": 4.3679, + "step": 81500 + }, + { + "epoch": 0.91, + "learning_rate": 9.106456699908935e-09, + "loss": 4.3533, + "step": 82000 + }, + { + "epoch": 0.92, + "learning_rate": 9.16198387490838e-09, + "loss": 4.3635, + "step": 82500 + }, + { + "epoch": 0.92, + "learning_rate": 9.217511049907825e-09, + "loss": 4.361, + "step": 83000 + }, + { + "epoch": 0.93, + "learning_rate": 9.273038224907269e-09, + "loss": 4.3852, + "step": 83500 + }, + { + "epoch": 0.93, + "learning_rate": 9.328565399906715e-09, + "loss": 4.386, + "step": 84000 + }, + { + "epoch": 0.94, + "learning_rate": 9.384092574906159e-09, + "loss": 4.3636, + "step": 84500 + }, + { + "epoch": 0.94, + "learning_rate": 9.439619749905603e-09, + "loss": 4.3734, + "step": 85000 + }, + { + "epoch": 0.95, + "learning_rate": 9.495146924905049e-09, + "loss": 4.3527, + "step": 85500 + }, + { + "epoch": 0.96, + "learning_rate": 9.550674099904493e-09, + "loss": 4.3573, + "step": 86000 + }, + { + "epoch": 0.96, + "learning_rate": 9.606201274903937e-09, + "loss": 4.3753, + "step": 86500 + }, + { + "epoch": 0.97, + "learning_rate": 9.661728449903381e-09, + "loss": 4.3663, + "step": 87000 + }, + { + "epoch": 0.97, + "learning_rate": 9.717255624902827e-09, + "loss": 4.3617, + "step": 87500 + }, + { + "epoch": 0.98, + "learning_rate": 9.772782799902271e-09, + "loss": 4.3662, + "step": 88000 + }, + { + "epoch": 0.98, + "learning_rate": 9.828309974901716e-09, + "loss": 4.3652, + "step": 88500 + }, + { + "epoch": 0.99, + "learning_rate": 9.883837149901161e-09, + "loss": 4.3617, + "step": 89000 + }, + { + "epoch": 0.99, + "learning_rate": 9.939364324900607e-09, + "loss": 4.351, + "step": 89500 + }, + { + "epoch": 1.0, + "learning_rate": 9.99489149990005e-09, + "loss": 4.3559, + "step": 90000 + }, + { + "epoch": 1.0, + "eval_loss": 4.290574073791504, + "eval_runtime": 6.3081, + "eval_samples_per_second": 246.35, + "step": 90046 + }, + { + "epoch": 1.01, + "learning_rate": 1.0050418674899496e-08, + "loss": 4.357, + "step": 90500 + }, + { + "epoch": 1.01, + "learning_rate": 1.010594584989894e-08, + "loss": 4.324, + "step": 91000 + }, + { + "epoch": 1.02, + "learning_rate": 1.0161473024898384e-08, + "loss": 4.34, + "step": 91500 + }, + { + "epoch": 1.02, + "learning_rate": 1.021700019989783e-08, + "loss": 4.3296, + "step": 92000 + }, + { + "epoch": 1.03, + "learning_rate": 1.0272527374897274e-08, + "loss": 4.3347, + "step": 92500 + }, + { + "epoch": 1.03, + "learning_rate": 1.0328054549896718e-08, + "loss": 4.328, + "step": 93000 + }, + { + "epoch": 1.04, + "learning_rate": 1.0383581724896164e-08, + "loss": 4.337, + "step": 93500 + }, + { + "epoch": 1.04, + "learning_rate": 1.0439108899895608e-08, + "loss": 4.3266, + "step": 94000 + }, + { + "epoch": 1.05, + "learning_rate": 1.0494636074895054e-08, + "loss": 4.3219, + "step": 94500 + }, + { + "epoch": 1.06, + "learning_rate": 1.0550163249894498e-08, + "loss": 4.3348, + "step": 95000 + }, + { + "epoch": 1.06, + "learning_rate": 1.0605690424893942e-08, + "loss": 4.3462, + "step": 95500 + }, + { + "epoch": 1.07, + "learning_rate": 1.0661217599893388e-08, + "loss": 4.3301, + "step": 96000 + }, + { + "epoch": 1.07, + "learning_rate": 1.0716744774892832e-08, + "loss": 4.3296, + "step": 96500 + }, + { + "epoch": 1.08, + "learning_rate": 1.0772271949892277e-08, + "loss": 4.3241, + "step": 97000 + }, + { + "epoch": 1.08, + "learning_rate": 1.0827799124891722e-08, + "loss": 4.3226, + "step": 97500 + }, + { + "epoch": 1.09, + "learning_rate": 1.0883326299891165e-08, + "loss": 4.3247, + "step": 98000 + }, + { + "epoch": 1.09, + "learning_rate": 1.093885347489061e-08, + "loss": 4.3166, + "step": 98500 + }, + { + "epoch": 1.1, + "learning_rate": 1.0994380649890057e-08, + "loss": 4.3325, + "step": 99000 + }, + { + "epoch": 1.1, + "learning_rate": 1.1049907824889499e-08, + "loss": 4.3388, + "step": 99500 + }, + { + "epoch": 1.11, + "learning_rate": 1.1105434999888945e-08, + "loss": 4.3162, + "step": 100000 + }, + { + "epoch": 1.12, + "learning_rate": 1.116096217488839e-08, + "loss": 4.3019, + "step": 100500 + }, + { + "epoch": 1.12, + "learning_rate": 1.1216489349887835e-08, + "loss": 4.3087, + "step": 101000 + }, + { + "epoch": 1.13, + "learning_rate": 1.1272016524887279e-08, + "loss": 4.3023, + "step": 101500 + }, + { + "epoch": 1.13, + "learning_rate": 1.1327543699886723e-08, + "loss": 4.3206, + "step": 102000 + }, + { + "epoch": 1.14, + "learning_rate": 1.1383070874886169e-08, + "loss": 4.2908, + "step": 102500 + }, + { + "epoch": 1.14, + "learning_rate": 1.1438598049885613e-08, + "loss": 4.3311, + "step": 103000 + }, + { + "epoch": 1.15, + "learning_rate": 1.1494125224885057e-08, + "loss": 4.3057, + "step": 103500 + }, + { + "epoch": 1.15, + "learning_rate": 1.1549652399884503e-08, + "loss": 4.305, + "step": 104000 + }, + { + "epoch": 1.16, + "learning_rate": 1.1605179574883947e-08, + "loss": 4.2982, + "step": 104500 + }, + { + "epoch": 1.17, + "learning_rate": 1.1660706749883392e-08, + "loss": 4.2979, + "step": 105000 + }, + { + "epoch": 1.17, + "learning_rate": 1.1716233924882838e-08, + "loss": 4.2965, + "step": 105500 + }, + { + "epoch": 1.18, + "learning_rate": 1.1771761099882282e-08, + "loss": 4.3031, + "step": 106000 + }, + { + "epoch": 1.18, + "learning_rate": 1.1827288274881726e-08, + "loss": 4.2963, + "step": 106500 + }, + { + "epoch": 1.19, + "learning_rate": 1.1882815449881172e-08, + "loss": 4.2962, + "step": 107000 + }, + { + "epoch": 1.19, + "learning_rate": 1.1938342624880616e-08, + "loss": 4.3185, + "step": 107500 + }, + { + "epoch": 1.2, + "learning_rate": 1.199386979988006e-08, + "loss": 4.281, + "step": 108000 + }, + { + "epoch": 1.2, + "learning_rate": 1.2049396974879506e-08, + "loss": 4.2945, + "step": 108500 + }, + { + "epoch": 1.21, + "learning_rate": 1.210492414987895e-08, + "loss": 4.2983, + "step": 109000 + }, + { + "epoch": 1.22, + "learning_rate": 1.2160451324878394e-08, + "loss": 4.3044, + "step": 109500 + }, + { + "epoch": 1.22, + "learning_rate": 1.221597849987784e-08, + "loss": 4.2755, + "step": 110000 + }, + { + "epoch": 1.23, + "learning_rate": 1.2271505674877284e-08, + "loss": 4.2836, + "step": 110500 + }, + { + "epoch": 1.23, + "learning_rate": 1.2327032849876728e-08, + "loss": 4.2827, + "step": 111000 + }, + { + "epoch": 1.24, + "learning_rate": 1.2382560024876174e-08, + "loss": 4.2843, + "step": 111500 + }, + { + "epoch": 1.24, + "learning_rate": 1.2438087199875618e-08, + "loss": 4.2762, + "step": 112000 + }, + { + "epoch": 1.25, + "learning_rate": 1.2493614374875064e-08, + "loss": 4.2847, + "step": 112500 + }, + { + "epoch": 1.25, + "learning_rate": 1.2549141549874508e-08, + "loss": 4.2741, + "step": 113000 + }, + { + "epoch": 1.26, + "learning_rate": 1.2604668724873951e-08, + "loss": 4.2848, + "step": 113500 + }, + { + "epoch": 1.27, + "learning_rate": 1.2660195899873397e-08, + "loss": 4.257, + "step": 114000 + }, + { + "epoch": 1.27, + "learning_rate": 1.2715723074872841e-08, + "loss": 4.2695, + "step": 114500 + }, + { + "epoch": 1.28, + "learning_rate": 1.2771250249872287e-08, + "loss": 4.2666, + "step": 115000 + }, + { + "epoch": 1.28, + "learning_rate": 1.2826777424871733e-08, + "loss": 4.2889, + "step": 115500 + }, + { + "epoch": 1.29, + "learning_rate": 1.2882304599871177e-08, + "loss": 4.2677, + "step": 116000 + }, + { + "epoch": 1.29, + "learning_rate": 1.2937831774870623e-08, + "loss": 4.2612, + "step": 116500 + }, + { + "epoch": 1.3, + "learning_rate": 1.2993358949870065e-08, + "loss": 4.2597, + "step": 117000 + }, + { + "epoch": 1.3, + "learning_rate": 1.304888612486951e-08, + "loss": 4.2624, + "step": 117500 + }, + { + "epoch": 1.31, + "learning_rate": 1.3104413299868955e-08, + "loss": 4.2755, + "step": 118000 + }, + { + "epoch": 1.32, + "learning_rate": 1.31599404748684e-08, + "loss": 4.2701, + "step": 118500 + }, + { + "epoch": 1.32, + "learning_rate": 1.3215467649867845e-08, + "loss": 4.2737, + "step": 119000 + }, + { + "epoch": 1.33, + "learning_rate": 1.3270994824867291e-08, + "loss": 4.2509, + "step": 119500 + }, + { + "epoch": 1.33, + "learning_rate": 1.3326521999866735e-08, + "loss": 4.2591, + "step": 120000 + }, + { + "epoch": 1.34, + "learning_rate": 1.3382049174866178e-08, + "loss": 4.2735, + "step": 120500 + }, + { + "epoch": 1.34, + "learning_rate": 1.3437576349865624e-08, + "loss": 4.2559, + "step": 121000 + }, + { + "epoch": 1.35, + "learning_rate": 1.3493103524865068e-08, + "loss": 4.2355, + "step": 121500 + }, + { + "epoch": 1.35, + "learning_rate": 1.3548630699864514e-08, + "loss": 4.2611, + "step": 122000 + }, + { + "epoch": 1.36, + "learning_rate": 1.3604157874863958e-08, + "loss": 4.2562, + "step": 122500 + }, + { + "epoch": 1.37, + "learning_rate": 1.3659685049863404e-08, + "loss": 4.2578, + "step": 123000 + }, + { + "epoch": 1.37, + "learning_rate": 1.3715212224862846e-08, + "loss": 4.2806, + "step": 123500 + }, + { + "epoch": 1.38, + "learning_rate": 1.3770739399862292e-08, + "loss": 4.2339, + "step": 124000 + }, + { + "epoch": 1.38, + "learning_rate": 1.3826266574861736e-08, + "loss": 4.2419, + "step": 124500 + }, + { + "epoch": 1.39, + "learning_rate": 1.3881793749861182e-08, + "loss": 4.2429, + "step": 125000 + }, + { + "epoch": 1.39, + "learning_rate": 1.3937320924860626e-08, + "loss": 4.2233, + "step": 125500 + }, + { + "epoch": 1.4, + "learning_rate": 1.3992848099860072e-08, + "loss": 4.2532, + "step": 126000 + }, + { + "epoch": 1.4, + "learning_rate": 1.4048375274859516e-08, + "loss": 4.2413, + "step": 126500 + }, + { + "epoch": 1.41, + "learning_rate": 1.4103902449858959e-08, + "loss": 4.2578, + "step": 127000 + }, + { + "epoch": 1.42, + "learning_rate": 1.4159429624858405e-08, + "loss": 4.2444, + "step": 127500 + }, + { + "epoch": 1.42, + "learning_rate": 1.421495679985785e-08, + "loss": 4.2482, + "step": 128000 + }, + { + "epoch": 1.43, + "learning_rate": 1.4270483974857295e-08, + "loss": 4.2213, + "step": 128500 + }, + { + "epoch": 1.43, + "learning_rate": 1.432601114985674e-08, + "loss": 4.2461, + "step": 129000 + }, + { + "epoch": 1.44, + "learning_rate": 1.4381538324856185e-08, + "loss": 4.2304, + "step": 129500 + }, + { + "epoch": 1.44, + "learning_rate": 1.4437065499855627e-08, + "loss": 4.2312, + "step": 130000 + }, + { + "epoch": 1.45, + "learning_rate": 1.4492592674855073e-08, + "loss": 4.2371, + "step": 130500 + }, + { + "epoch": 1.45, + "learning_rate": 1.4548119849854517e-08, + "loss": 4.225, + "step": 131000 + }, + { + "epoch": 1.46, + "learning_rate": 1.4603647024853963e-08, + "loss": 4.2405, + "step": 131500 + }, + { + "epoch": 1.47, + "learning_rate": 1.4659174199853409e-08, + "loss": 4.2358, + "step": 132000 + }, + { + "epoch": 1.47, + "learning_rate": 1.4714701374852853e-08, + "loss": 4.2158, + "step": 132500 + }, + { + "epoch": 1.48, + "learning_rate": 1.4770228549852299e-08, + "loss": 4.2217, + "step": 133000 + }, + { + "epoch": 1.48, + "learning_rate": 1.4825755724851741e-08, + "loss": 4.2346, + "step": 133500 + }, + { + "epoch": 1.49, + "learning_rate": 1.4881282899851185e-08, + "loss": 4.2364, + "step": 134000 + }, + { + "epoch": 1.49, + "learning_rate": 1.493681007485063e-08, + "loss": 4.2242, + "step": 134500 + }, + { + "epoch": 1.5, + "learning_rate": 1.4992337249850077e-08, + "loss": 4.2359, + "step": 135000 + }, + { + "epoch": 1.5, + "learning_rate": 1.504786442484952e-08, + "loss": 4.2528, + "step": 135500 + }, + { + "epoch": 1.51, + "learning_rate": 1.5103391599848965e-08, + "loss": 4.2199, + "step": 136000 + }, + { + "epoch": 1.52, + "learning_rate": 1.515891877484841e-08, + "loss": 4.2272, + "step": 136500 + }, + { + "epoch": 1.52, + "learning_rate": 1.5214445949847854e-08, + "loss": 4.217, + "step": 137000 + }, + { + "epoch": 1.53, + "learning_rate": 1.5269973124847298e-08, + "loss": 4.2113, + "step": 137500 + }, + { + "epoch": 1.53, + "learning_rate": 1.5325500299846746e-08, + "loss": 4.2396, + "step": 138000 + }, + { + "epoch": 1.54, + "learning_rate": 1.538102747484619e-08, + "loss": 4.2435, + "step": 138500 + }, + { + "epoch": 1.54, + "learning_rate": 1.5436554649845634e-08, + "loss": 4.2247, + "step": 139000 + }, + { + "epoch": 1.55, + "learning_rate": 1.5492081824845078e-08, + "loss": 4.2265, + "step": 139500 + }, + { + "epoch": 1.55, + "learning_rate": 1.5547608999844522e-08, + "loss": 4.2093, + "step": 140000 + }, + { + "epoch": 1.56, + "learning_rate": 1.5603136174843966e-08, + "loss": 4.2299, + "step": 140500 + }, + { + "epoch": 1.57, + "learning_rate": 1.5658663349843414e-08, + "loss": 4.2198, + "step": 141000 + }, + { + "epoch": 1.57, + "learning_rate": 1.5714190524842858e-08, + "loss": 4.1949, + "step": 141500 + }, + { + "epoch": 1.58, + "learning_rate": 1.5769717699842302e-08, + "loss": 4.2192, + "step": 142000 + }, + { + "epoch": 1.58, + "learning_rate": 1.5825244874841746e-08, + "loss": 4.2192, + "step": 142500 + }, + { + "epoch": 1.59, + "learning_rate": 1.5880772049841194e-08, + "loss": 4.2136, + "step": 143000 + }, + { + "epoch": 1.59, + "learning_rate": 1.5936299224840635e-08, + "loss": 4.2102, + "step": 143500 + }, + { + "epoch": 1.6, + "learning_rate": 1.599182639984008e-08, + "loss": 4.2302, + "step": 144000 + }, + { + "epoch": 1.6, + "learning_rate": 1.6047353574839526e-08, + "loss": 4.2161, + "step": 144500 + }, + { + "epoch": 1.61, + "learning_rate": 1.610288074983897e-08, + "loss": 4.2064, + "step": 145000 + }, + { + "epoch": 1.62, + "learning_rate": 1.6158407924838415e-08, + "loss": 4.217, + "step": 145500 + }, + { + "epoch": 1.62, + "learning_rate": 1.6213935099837862e-08, + "loss": 4.2203, + "step": 146000 + }, + { + "epoch": 1.63, + "learning_rate": 1.6269462274837303e-08, + "loss": 4.1943, + "step": 146500 + }, + { + "epoch": 1.63, + "learning_rate": 1.6324989449836747e-08, + "loss": 4.2022, + "step": 147000 + }, + { + "epoch": 1.64, + "learning_rate": 1.6380516624836195e-08, + "loss": 4.2151, + "step": 147500 + }, + { + "epoch": 1.64, + "learning_rate": 1.643604379983564e-08, + "loss": 4.2117, + "step": 148000 + }, + { + "epoch": 1.65, + "learning_rate": 1.6491570974835083e-08, + "loss": 4.2076, + "step": 148500 + }, + { + "epoch": 1.65, + "learning_rate": 1.654709814983453e-08, + "loss": 4.1816, + "step": 149000 + }, + { + "epoch": 1.66, + "learning_rate": 1.6602625324833975e-08, + "loss": 4.2007, + "step": 149500 + }, + { + "epoch": 1.67, + "learning_rate": 1.6658152499833416e-08, + "loss": 4.1998, + "step": 150000 + }, + { + "epoch": 1.67, + "learning_rate": 1.6713679674832863e-08, + "loss": 4.1838, + "step": 150500 + }, + { + "epoch": 1.68, + "learning_rate": 1.6769206849832307e-08, + "loss": 4.2061, + "step": 151000 + }, + { + "epoch": 1.68, + "learning_rate": 1.682473402483175e-08, + "loss": 4.1925, + "step": 151500 + }, + { + "epoch": 1.69, + "learning_rate": 1.6880261199831196e-08, + "loss": 4.2042, + "step": 152000 + }, + { + "epoch": 1.69, + "learning_rate": 1.6935788374830643e-08, + "loss": 4.1919, + "step": 152500 + }, + { + "epoch": 1.7, + "learning_rate": 1.6991315549830084e-08, + "loss": 4.1963, + "step": 153000 + }, + { + "epoch": 1.7, + "learning_rate": 1.7046842724829528e-08, + "loss": 4.2059, + "step": 153500 + }, + { + "epoch": 1.71, + "learning_rate": 1.7102369899828976e-08, + "loss": 4.1924, + "step": 154000 + }, + { + "epoch": 1.72, + "learning_rate": 1.715789707482842e-08, + "loss": 4.1929, + "step": 154500 + }, + { + "epoch": 1.72, + "learning_rate": 1.7213424249827864e-08, + "loss": 4.1895, + "step": 155000 + }, + { + "epoch": 1.73, + "learning_rate": 1.726895142482731e-08, + "loss": 4.1929, + "step": 155500 + }, + { + "epoch": 1.73, + "learning_rate": 1.7324478599826756e-08, + "loss": 4.195, + "step": 156000 + }, + { + "epoch": 1.74, + "learning_rate": 1.7380005774826197e-08, + "loss": 4.1959, + "step": 156500 + }, + { + "epoch": 1.74, + "learning_rate": 1.7435532949825644e-08, + "loss": 4.1842, + "step": 157000 + }, + { + "epoch": 1.75, + "learning_rate": 1.7491060124825088e-08, + "loss": 4.1645, + "step": 157500 + }, + { + "epoch": 1.75, + "learning_rate": 1.7546587299824533e-08, + "loss": 4.1997, + "step": 158000 + }, + { + "epoch": 1.76, + "learning_rate": 1.760211447482398e-08, + "loss": 4.1824, + "step": 158500 + }, + { + "epoch": 1.77, + "learning_rate": 1.7657641649823424e-08, + "loss": 4.2048, + "step": 159000 + }, + { + "epoch": 1.77, + "learning_rate": 1.771316882482287e-08, + "loss": 4.1896, + "step": 159500 + }, + { + "epoch": 1.78, + "learning_rate": 1.7768695999822313e-08, + "loss": 4.168, + "step": 160000 + }, + { + "epoch": 1.78, + "learning_rate": 1.7824223174821757e-08, + "loss": 4.1701, + "step": 160500 + }, + { + "epoch": 1.79, + "learning_rate": 1.78797503498212e-08, + "loss": 4.1858, + "step": 161000 + }, + { + "epoch": 1.79, + "learning_rate": 1.7935277524820645e-08, + "loss": 4.162, + "step": 161500 + }, + { + "epoch": 1.8, + "learning_rate": 1.7990804699820093e-08, + "loss": 4.1677, + "step": 162000 + }, + { + "epoch": 1.8, + "learning_rate": 1.8046331874819537e-08, + "loss": 4.1765, + "step": 162500 + }, + { + "epoch": 1.81, + "learning_rate": 1.810185904981898e-08, + "loss": 4.1811, + "step": 163000 + }, + { + "epoch": 1.82, + "learning_rate": 1.8157386224818425e-08, + "loss": 4.184, + "step": 163500 + }, + { + "epoch": 1.82, + "learning_rate": 1.821291339981787e-08, + "loss": 4.1738, + "step": 164000 + }, + { + "epoch": 1.83, + "learning_rate": 1.8268440574817313e-08, + "loss": 4.1816, + "step": 164500 + }, + { + "epoch": 1.83, + "learning_rate": 1.832396774981676e-08, + "loss": 4.1815, + "step": 165000 + }, + { + "epoch": 1.84, + "learning_rate": 1.8379494924816205e-08, + "loss": 4.1761, + "step": 165500 + }, + { + "epoch": 1.84, + "learning_rate": 1.843502209981565e-08, + "loss": 4.1894, + "step": 166000 + }, + { + "epoch": 1.85, + "learning_rate": 1.8490549274815093e-08, + "loss": 4.1747, + "step": 166500 + }, + { + "epoch": 1.85, + "learning_rate": 1.8546076449814538e-08, + "loss": 4.1831, + "step": 167000 + }, + { + "epoch": 1.86, + "learning_rate": 1.8601603624813982e-08, + "loss": 4.1821, + "step": 167500 + }, + { + "epoch": 1.87, + "learning_rate": 1.865713079981343e-08, + "loss": 4.1538, + "step": 168000 + }, + { + "epoch": 1.87, + "learning_rate": 1.8712657974812874e-08, + "loss": 4.171, + "step": 168500 + }, + { + "epoch": 1.88, + "learning_rate": 1.8768185149812318e-08, + "loss": 4.1832, + "step": 169000 + }, + { + "epoch": 1.88, + "learning_rate": 1.8823712324811762e-08, + "loss": 4.1354, + "step": 169500 + }, + { + "epoch": 1.89, + "learning_rate": 1.8879239499811206e-08, + "loss": 4.1606, + "step": 170000 + }, + { + "epoch": 1.89, + "learning_rate": 1.893476667481065e-08, + "loss": 4.1688, + "step": 170500 + }, + { + "epoch": 1.9, + "learning_rate": 1.8990293849810098e-08, + "loss": 4.192, + "step": 171000 + }, + { + "epoch": 1.9, + "learning_rate": 1.9045821024809542e-08, + "loss": 4.1674, + "step": 171500 + }, + { + "epoch": 1.91, + "learning_rate": 1.9101348199808986e-08, + "loss": 4.175, + "step": 172000 + }, + { + "epoch": 1.92, + "learning_rate": 1.915687537480843e-08, + "loss": 4.1618, + "step": 172500 + }, + { + "epoch": 1.92, + "learning_rate": 1.9212402549807874e-08, + "loss": 4.1692, + "step": 173000 + }, + { + "epoch": 1.93, + "learning_rate": 1.926792972480732e-08, + "loss": 4.1469, + "step": 173500 + }, + { + "epoch": 1.93, + "learning_rate": 1.9323456899806763e-08, + "loss": 4.1824, + "step": 174000 + }, + { + "epoch": 1.94, + "learning_rate": 1.937898407480621e-08, + "loss": 4.138, + "step": 174500 + }, + { + "epoch": 1.94, + "learning_rate": 1.9434511249805654e-08, + "loss": 4.1668, + "step": 175000 + }, + { + "epoch": 1.95, + "learning_rate": 1.94900384248051e-08, + "loss": 4.1715, + "step": 175500 + }, + { + "epoch": 1.95, + "learning_rate": 1.9545565599804543e-08, + "loss": 4.1606, + "step": 176000 + }, + { + "epoch": 1.96, + "learning_rate": 1.9601092774803987e-08, + "loss": 4.1726, + "step": 176500 + }, + { + "epoch": 1.97, + "learning_rate": 1.965661994980343e-08, + "loss": 4.1387, + "step": 177000 + }, + { + "epoch": 1.97, + "learning_rate": 1.971214712480288e-08, + "loss": 4.1411, + "step": 177500 + }, + { + "epoch": 1.98, + "learning_rate": 1.9767674299802323e-08, + "loss": 4.1571, + "step": 178000 + }, + { + "epoch": 1.98, + "learning_rate": 1.9823201474801767e-08, + "loss": 4.1629, + "step": 178500 + }, + { + "epoch": 1.99, + "learning_rate": 1.9878728649801214e-08, + "loss": 4.1518, + "step": 179000 + }, + { + "epoch": 1.99, + "learning_rate": 1.9934255824800655e-08, + "loss": 4.175, + "step": 179500 + }, + { + "epoch": 2.0, + "learning_rate": 1.99897829998001e-08, + "loss": 4.1433, + "step": 180000 + }, + { + "epoch": 2.0, + "eval_loss": 4.123116493225098, + "eval_runtime": 6.3261, + "eval_samples_per_second": 245.648, + "step": 180092 + }, + { + "epoch": 2.0, + "learning_rate": 2.0045310174799547e-08, + "loss": 4.156, + "step": 180500 + }, + { + "epoch": 2.01, + "learning_rate": 2.010083734979899e-08, + "loss": 4.1567, + "step": 181000 + }, + { + "epoch": 2.02, + "learning_rate": 2.0156364524798435e-08, + "loss": 4.1371, + "step": 181500 + }, + { + "epoch": 2.02, + "learning_rate": 2.021189169979788e-08, + "loss": 4.1543, + "step": 182000 + }, + { + "epoch": 2.03, + "learning_rate": 2.0267418874797327e-08, + "loss": 4.1629, + "step": 182500 + }, + { + "epoch": 2.03, + "learning_rate": 2.0322946049796768e-08, + "loss": 4.1472, + "step": 183000 + }, + { + "epoch": 2.04, + "learning_rate": 2.0378473224796215e-08, + "loss": 4.149, + "step": 183500 + }, + { + "epoch": 2.04, + "learning_rate": 2.043400039979566e-08, + "loss": 4.1544, + "step": 184000 + }, + { + "epoch": 2.05, + "learning_rate": 2.0489527574795104e-08, + "loss": 4.1534, + "step": 184500 + }, + { + "epoch": 2.05, + "learning_rate": 2.0545054749794548e-08, + "loss": 4.1411, + "step": 185000 + }, + { + "epoch": 2.06, + "learning_rate": 2.0600581924793995e-08, + "loss": 4.1362, + "step": 185500 + }, + { + "epoch": 2.07, + "learning_rate": 2.0656109099793436e-08, + "loss": 4.1591, + "step": 186000 + }, + { + "epoch": 2.07, + "learning_rate": 2.071163627479288e-08, + "loss": 4.1516, + "step": 186500 + }, + { + "epoch": 2.08, + "learning_rate": 2.0767163449792328e-08, + "loss": 4.1445, + "step": 187000 + }, + { + "epoch": 2.08, + "learning_rate": 2.0822690624791772e-08, + "loss": 4.1516, + "step": 187500 + }, + { + "epoch": 2.09, + "learning_rate": 2.0878217799791216e-08, + "loss": 4.1523, + "step": 188000 + }, + { + "epoch": 2.09, + "learning_rate": 2.0933744974790664e-08, + "loss": 4.1315, + "step": 188500 + }, + { + "epoch": 2.1, + "learning_rate": 2.0989272149790108e-08, + "loss": 4.1512, + "step": 189000 + }, + { + "epoch": 2.1, + "learning_rate": 2.104479932478955e-08, + "loss": 4.1461, + "step": 189500 + }, + { + "epoch": 2.11, + "learning_rate": 2.1100326499788996e-08, + "loss": 4.1427, + "step": 190000 + }, + { + "epoch": 2.12, + "learning_rate": 2.115585367478844e-08, + "loss": 4.1519, + "step": 190500 + }, + { + "epoch": 2.12, + "learning_rate": 2.1211380849787885e-08, + "loss": 4.1469, + "step": 191000 + }, + { + "epoch": 2.13, + "learning_rate": 2.1266908024787332e-08, + "loss": 4.1438, + "step": 191500 + }, + { + "epoch": 2.13, + "learning_rate": 2.1322435199786776e-08, + "loss": 4.1581, + "step": 192000 + }, + { + "epoch": 2.14, + "learning_rate": 2.1377962374786217e-08, + "loss": 4.1525, + "step": 192500 + }, + { + "epoch": 2.14, + "learning_rate": 2.1433489549785665e-08, + "loss": 4.1381, + "step": 193000 + }, + { + "epoch": 2.15, + "learning_rate": 2.148901672478511e-08, + "loss": 4.1397, + "step": 193500 + }, + { + "epoch": 2.15, + "learning_rate": 2.1544543899784553e-08, + "loss": 4.1365, + "step": 194000 + }, + { + "epoch": 2.16, + "learning_rate": 2.1600071074783997e-08, + "loss": 4.1297, + "step": 194500 + }, + { + "epoch": 2.17, + "learning_rate": 2.1655598249783445e-08, + "loss": 4.1296, + "step": 195000 + }, + { + "epoch": 2.17, + "learning_rate": 2.171112542478289e-08, + "loss": 4.1545, + "step": 195500 + }, + { + "epoch": 2.18, + "learning_rate": 2.176665259978233e-08, + "loss": 4.1479, + "step": 196000 + }, + { + "epoch": 2.18, + "learning_rate": 2.1822179774781777e-08, + "loss": 4.1311, + "step": 196500 + }, + { + "epoch": 2.19, + "learning_rate": 2.187770694978122e-08, + "loss": 4.1384, + "step": 197000 + }, + { + "epoch": 2.19, + "learning_rate": 2.1933234124780666e-08, + "loss": 4.1299, + "step": 197500 + }, + { + "epoch": 2.2, + "learning_rate": 2.1988761299780113e-08, + "loss": 4.1386, + "step": 198000 + }, + { + "epoch": 2.2, + "learning_rate": 2.2044288474779557e-08, + "loss": 4.1577, + "step": 198500 + }, + { + "epoch": 2.21, + "learning_rate": 2.2099815649778998e-08, + "loss": 4.1175, + "step": 199000 + }, + { + "epoch": 2.22, + "learning_rate": 2.2155342824778446e-08, + "loss": 4.1354, + "step": 199500 + }, + { + "epoch": 2.22, + "learning_rate": 2.221086999977789e-08, + "loss": 4.1239, + "step": 200000 + }, + { + "epoch": 2.23, + "learning_rate": 2.2266397174777334e-08, + "loss": 4.1558, + "step": 200500 + }, + { + "epoch": 2.23, + "learning_rate": 2.232192434977678e-08, + "loss": 4.1277, + "step": 201000 + }, + { + "epoch": 2.24, + "learning_rate": 2.2377451524776226e-08, + "loss": 4.1317, + "step": 201500 + }, + { + "epoch": 2.24, + "learning_rate": 2.243297869977567e-08, + "loss": 4.1267, + "step": 202000 + }, + { + "epoch": 2.25, + "learning_rate": 2.2488505874775114e-08, + "loss": 4.1315, + "step": 202500 + }, + { + "epoch": 2.25, + "learning_rate": 2.2544033049774558e-08, + "loss": 4.1399, + "step": 203000 + }, + { + "epoch": 2.26, + "learning_rate": 2.2599560224774002e-08, + "loss": 4.1285, + "step": 203500 + }, + { + "epoch": 2.27, + "learning_rate": 2.2655087399773447e-08, + "loss": 4.1298, + "step": 204000 + }, + { + "epoch": 2.27, + "learning_rate": 2.2710614574772894e-08, + "loss": 4.1137, + "step": 204500 + }, + { + "epoch": 2.28, + "learning_rate": 2.2766141749772338e-08, + "loss": 4.1352, + "step": 205000 + }, + { + "epoch": 2.28, + "learning_rate": 2.2821668924771782e-08, + "loss": 4.1176, + "step": 205500 + }, + { + "epoch": 2.29, + "learning_rate": 2.2877196099771227e-08, + "loss": 4.1378, + "step": 206000 + }, + { + "epoch": 2.29, + "learning_rate": 2.293272327477067e-08, + "loss": 4.1138, + "step": 206500 + }, + { + "epoch": 2.3, + "learning_rate": 2.2988250449770115e-08, + "loss": 4.1411, + "step": 207000 + }, + { + "epoch": 2.3, + "learning_rate": 2.3043777624769562e-08, + "loss": 4.1264, + "step": 207500 + }, + { + "epoch": 2.31, + "learning_rate": 2.3099304799769007e-08, + "loss": 4.1252, + "step": 208000 + }, + { + "epoch": 2.32, + "learning_rate": 2.315483197476845e-08, + "loss": 4.1338, + "step": 208500 + }, + { + "epoch": 2.32, + "learning_rate": 2.3210359149767895e-08, + "loss": 4.1296, + "step": 209000 + }, + { + "epoch": 2.33, + "learning_rate": 2.326588632476734e-08, + "loss": 4.1215, + "step": 209500 + }, + { + "epoch": 2.33, + "learning_rate": 2.3321413499766783e-08, + "loss": 4.1278, + "step": 210000 + }, + { + "epoch": 2.34, + "learning_rate": 2.337694067476623e-08, + "loss": 4.136, + "step": 210500 + }, + { + "epoch": 2.34, + "learning_rate": 2.3432467849765675e-08, + "loss": 4.0958, + "step": 211000 + }, + { + "epoch": 2.35, + "learning_rate": 2.348799502476512e-08, + "loss": 4.1178, + "step": 211500 + }, + { + "epoch": 2.35, + "learning_rate": 2.3543522199764563e-08, + "loss": 4.1008, + "step": 212000 + }, + { + "epoch": 2.36, + "learning_rate": 2.3599049374764008e-08, + "loss": 4.095, + "step": 212500 + }, + { + "epoch": 2.37, + "learning_rate": 2.3654576549763452e-08, + "loss": 4.1155, + "step": 213000 + }, + { + "epoch": 2.37, + "learning_rate": 2.37101037247629e-08, + "loss": 4.1213, + "step": 213500 + }, + { + "epoch": 2.38, + "learning_rate": 2.3765630899762343e-08, + "loss": 4.096, + "step": 214000 + }, + { + "epoch": 2.38, + "learning_rate": 2.3821158074761788e-08, + "loss": 4.1187, + "step": 214500 + }, + { + "epoch": 2.39, + "learning_rate": 2.3876685249761232e-08, + "loss": 4.1168, + "step": 215000 + }, + { + "epoch": 2.39, + "learning_rate": 2.3932212424760676e-08, + "loss": 4.1148, + "step": 215500 + }, + { + "epoch": 2.4, + "learning_rate": 2.398773959976012e-08, + "loss": 4.1237, + "step": 216000 + }, + { + "epoch": 2.4, + "learning_rate": 2.4043266774759564e-08, + "loss": 4.111, + "step": 216500 + }, + { + "epoch": 2.41, + "learning_rate": 2.4098793949759012e-08, + "loss": 4.1143, + "step": 217000 + }, + { + "epoch": 2.42, + "learning_rate": 2.4154321124758456e-08, + "loss": 4.1118, + "step": 217500 + }, + { + "epoch": 2.42, + "learning_rate": 2.42098482997579e-08, + "loss": 4.1146, + "step": 218000 + }, + { + "epoch": 2.43, + "learning_rate": 2.4265375474757348e-08, + "loss": 4.1138, + "step": 218500 + }, + { + "epoch": 2.43, + "learning_rate": 2.432090264975679e-08, + "loss": 4.0978, + "step": 219000 + }, + { + "epoch": 2.44, + "learning_rate": 2.4376429824756233e-08, + "loss": 4.1065, + "step": 219500 + }, + { + "epoch": 2.44, + "learning_rate": 2.443195699975568e-08, + "loss": 4.1024, + "step": 220000 + }, + { + "epoch": 2.45, + "learning_rate": 2.4487484174755124e-08, + "loss": 4.1114, + "step": 220500 + }, + { + "epoch": 2.45, + "learning_rate": 2.454301134975457e-08, + "loss": 4.1118, + "step": 221000 + }, + { + "epoch": 2.46, + "learning_rate": 2.4598538524754016e-08, + "loss": 4.1131, + "step": 221500 + }, + { + "epoch": 2.47, + "learning_rate": 2.4654065699753457e-08, + "loss": 4.0993, + "step": 222000 + }, + { + "epoch": 2.47, + "learning_rate": 2.47095928747529e-08, + "loss": 4.1242, + "step": 222500 + }, + { + "epoch": 2.48, + "learning_rate": 2.476512004975235e-08, + "loss": 4.1001, + "step": 223000 + }, + { + "epoch": 2.48, + "learning_rate": 2.4820647224751793e-08, + "loss": 4.1123, + "step": 223500 + }, + { + "epoch": 2.49, + "learning_rate": 2.4876174399751237e-08, + "loss": 4.096, + "step": 224000 + }, + { + "epoch": 2.49, + "learning_rate": 2.493170157475068e-08, + "loss": 4.0955, + "step": 224500 + }, + { + "epoch": 2.5, + "learning_rate": 2.498722874975013e-08, + "loss": 4.1065, + "step": 225000 + }, + { + "epoch": 2.5, + "learning_rate": 2.504275592474957e-08, + "loss": 4.0978, + "step": 225500 + }, + { + "epoch": 2.51, + "learning_rate": 2.5098283099749017e-08, + "loss": 4.117, + "step": 226000 + }, + { + "epoch": 2.52, + "learning_rate": 2.515381027474846e-08, + "loss": 4.1116, + "step": 226500 + }, + { + "epoch": 2.52, + "learning_rate": 2.5209337449747902e-08, + "loss": 4.1183, + "step": 227000 + }, + { + "epoch": 2.53, + "learning_rate": 2.526486462474735e-08, + "loss": 4.092, + "step": 227500 + }, + { + "epoch": 2.53, + "learning_rate": 2.5320391799746794e-08, + "loss": 4.0974, + "step": 228000 + }, + { + "epoch": 2.54, + "learning_rate": 2.537591897474624e-08, + "loss": 4.1012, + "step": 228500 + }, + { + "epoch": 2.54, + "learning_rate": 2.5431446149745682e-08, + "loss": 4.1221, + "step": 229000 + }, + { + "epoch": 2.55, + "learning_rate": 2.5486973324745133e-08, + "loss": 4.1168, + "step": 229500 + }, + { + "epoch": 2.55, + "learning_rate": 2.5542500499744574e-08, + "loss": 4.1236, + "step": 230000 + }, + { + "epoch": 2.56, + "learning_rate": 2.5598027674744015e-08, + "loss": 4.107, + "step": 230500 + }, + { + "epoch": 2.57, + "learning_rate": 2.5653554849743465e-08, + "loss": 4.0949, + "step": 231000 + }, + { + "epoch": 2.57, + "learning_rate": 2.5709082024742906e-08, + "loss": 4.1096, + "step": 231500 + }, + { + "epoch": 2.58, + "learning_rate": 2.5764609199742354e-08, + "loss": 4.1036, + "step": 232000 + }, + { + "epoch": 2.58, + "learning_rate": 2.5820136374741798e-08, + "loss": 4.0994, + "step": 232500 + }, + { + "epoch": 2.59, + "learning_rate": 2.5875663549741245e-08, + "loss": 4.1042, + "step": 233000 + }, + { + "epoch": 2.59, + "learning_rate": 2.5931190724740686e-08, + "loss": 4.0824, + "step": 233500 + }, + { + "epoch": 2.6, + "learning_rate": 2.598671789974013e-08, + "loss": 4.1048, + "step": 234000 + }, + { + "epoch": 2.6, + "learning_rate": 2.6042245074739578e-08, + "loss": 4.0772, + "step": 234500 + }, + { + "epoch": 2.61, + "learning_rate": 2.609777224973902e-08, + "loss": 4.0978, + "step": 235000 + }, + { + "epoch": 2.62, + "learning_rate": 2.6153299424738466e-08, + "loss": 4.0774, + "step": 235500 + }, + { + "epoch": 2.62, + "learning_rate": 2.620882659973791e-08, + "loss": 4.1111, + "step": 236000 + }, + { + "epoch": 2.63, + "learning_rate": 2.6264353774737358e-08, + "loss": 4.0863, + "step": 236500 + }, + { + "epoch": 2.63, + "learning_rate": 2.63198809497368e-08, + "loss": 4.1038, + "step": 237000 + }, + { + "epoch": 2.64, + "learning_rate": 2.6375408124736243e-08, + "loss": 4.0634, + "step": 237500 + }, + { + "epoch": 2.64, + "learning_rate": 2.643093529973569e-08, + "loss": 4.0941, + "step": 238000 + }, + { + "epoch": 2.65, + "learning_rate": 2.648646247473513e-08, + "loss": 4.1001, + "step": 238500 + }, + { + "epoch": 2.65, + "learning_rate": 2.6541989649734582e-08, + "loss": 4.1073, + "step": 239000 + }, + { + "epoch": 2.66, + "learning_rate": 2.6597516824734023e-08, + "loss": 4.0594, + "step": 239500 + }, + { + "epoch": 2.67, + "learning_rate": 2.665304399973347e-08, + "loss": 4.084, + "step": 240000 + }, + { + "epoch": 2.67, + "learning_rate": 2.6708571174732915e-08, + "loss": 4.0941, + "step": 240500 + }, + { + "epoch": 2.68, + "learning_rate": 2.6764098349732356e-08, + "loss": 4.1004, + "step": 241000 + }, + { + "epoch": 2.68, + "learning_rate": 2.6819625524731803e-08, + "loss": 4.0788, + "step": 241500 + }, + { + "epoch": 2.69, + "learning_rate": 2.6875152699731247e-08, + "loss": 4.1075, + "step": 242000 + }, + { + "epoch": 2.69, + "learning_rate": 2.6930679874730695e-08, + "loss": 4.1066, + "step": 242500 + }, + { + "epoch": 2.7, + "learning_rate": 2.6986207049730136e-08, + "loss": 4.096, + "step": 243000 + }, + { + "epoch": 2.7, + "learning_rate": 2.704173422472958e-08, + "loss": 4.1113, + "step": 243500 + }, + { + "epoch": 2.71, + "learning_rate": 2.7097261399729027e-08, + "loss": 4.1107, + "step": 244000 + }, + { + "epoch": 2.72, + "learning_rate": 2.7152788574728468e-08, + "loss": 4.0997, + "step": 244500 + }, + { + "epoch": 2.72, + "learning_rate": 2.7208315749727916e-08, + "loss": 4.0944, + "step": 245000 + }, + { + "epoch": 2.73, + "learning_rate": 2.726384292472736e-08, + "loss": 4.0742, + "step": 245500 + }, + { + "epoch": 2.73, + "learning_rate": 2.7319370099726807e-08, + "loss": 4.0917, + "step": 246000 + }, + { + "epoch": 2.74, + "learning_rate": 2.7374897274726248e-08, + "loss": 4.0972, + "step": 246500 + }, + { + "epoch": 2.74, + "learning_rate": 2.7430424449725692e-08, + "loss": 4.0874, + "step": 247000 + }, + { + "epoch": 2.75, + "learning_rate": 2.748595162472514e-08, + "loss": 4.0942, + "step": 247500 + }, + { + "epoch": 2.75, + "learning_rate": 2.7541478799724584e-08, + "loss": 4.0869, + "step": 248000 + }, + { + "epoch": 2.76, + "learning_rate": 2.759700597472403e-08, + "loss": 4.079, + "step": 248500 + }, + { + "epoch": 2.77, + "learning_rate": 2.7652533149723472e-08, + "loss": 4.0971, + "step": 249000 + }, + { + "epoch": 2.77, + "learning_rate": 2.770806032472292e-08, + "loss": 4.0913, + "step": 249500 + }, + { + "epoch": 2.78, + "learning_rate": 2.7763587499722364e-08, + "loss": 4.085, + "step": 250000 + }, + { + "epoch": 2.78, + "learning_rate": 2.7819114674721805e-08, + "loss": 4.0759, + "step": 250500 + }, + { + "epoch": 2.79, + "learning_rate": 2.7874641849721252e-08, + "loss": 4.0712, + "step": 251000 + }, + { + "epoch": 2.79, + "learning_rate": 2.7930169024720697e-08, + "loss": 4.1092, + "step": 251500 + }, + { + "epoch": 2.8, + "learning_rate": 2.7985696199720144e-08, + "loss": 4.0749, + "step": 252000 + }, + { + "epoch": 2.8, + "learning_rate": 2.8041223374719585e-08, + "loss": 4.0832, + "step": 252500 + }, + { + "epoch": 2.81, + "learning_rate": 2.8096750549719032e-08, + "loss": 4.0867, + "step": 253000 + }, + { + "epoch": 2.82, + "learning_rate": 2.8152277724718477e-08, + "loss": 4.0784, + "step": 253500 + }, + { + "epoch": 2.82, + "learning_rate": 2.8207804899717917e-08, + "loss": 4.0646, + "step": 254000 + }, + { + "epoch": 2.83, + "learning_rate": 2.8263332074717365e-08, + "loss": 4.0779, + "step": 254500 + }, + { + "epoch": 2.83, + "learning_rate": 2.831885924971681e-08, + "loss": 4.0666, + "step": 255000 + }, + { + "epoch": 2.84, + "learning_rate": 2.8374386424716257e-08, + "loss": 4.0903, + "step": 255500 + }, + { + "epoch": 2.84, + "learning_rate": 2.84299135997157e-08, + "loss": 4.0937, + "step": 256000 + }, + { + "epoch": 2.85, + "learning_rate": 2.8485440774715148e-08, + "loss": 4.0922, + "step": 256500 + }, + { + "epoch": 2.85, + "learning_rate": 2.854096794971459e-08, + "loss": 4.0747, + "step": 257000 + }, + { + "epoch": 2.86, + "learning_rate": 2.8596495124714033e-08, + "loss": 4.0704, + "step": 257500 + }, + { + "epoch": 2.87, + "learning_rate": 2.865202229971348e-08, + "loss": 4.0716, + "step": 258000 + }, + { + "epoch": 2.87, + "learning_rate": 2.870754947471292e-08, + "loss": 4.0747, + "step": 258500 + }, + { + "epoch": 2.88, + "learning_rate": 2.876307664971237e-08, + "loss": 4.0942, + "step": 259000 + }, + { + "epoch": 2.88, + "learning_rate": 2.8818603824711813e-08, + "loss": 4.1021, + "step": 259500 + }, + { + "epoch": 2.89, + "learning_rate": 2.8874130999711254e-08, + "loss": 4.0918, + "step": 260000 + }, + { + "epoch": 2.89, + "learning_rate": 2.89296581747107e-08, + "loss": 4.0813, + "step": 260500 + }, + { + "epoch": 2.9, + "learning_rate": 2.8985185349710146e-08, + "loss": 4.0744, + "step": 261000 + }, + { + "epoch": 2.9, + "learning_rate": 2.9040712524709593e-08, + "loss": 4.0882, + "step": 261500 + }, + { + "epoch": 2.91, + "learning_rate": 2.9096239699709034e-08, + "loss": 4.0887, + "step": 262000 + }, + { + "epoch": 2.92, + "learning_rate": 2.9151766874708482e-08, + "loss": 4.0747, + "step": 262500 + }, + { + "epoch": 2.92, + "learning_rate": 2.9207294049707926e-08, + "loss": 4.0707, + "step": 263000 + }, + { + "epoch": 2.93, + "learning_rate": 2.9262821224707367e-08, + "loss": 4.0642, + "step": 263500 + }, + { + "epoch": 2.93, + "learning_rate": 2.9318348399706818e-08, + "loss": 4.0646, + "step": 264000 + }, + { + "epoch": 2.94, + "learning_rate": 2.937387557470626e-08, + "loss": 4.075, + "step": 264500 + }, + { + "epoch": 2.94, + "learning_rate": 2.9429402749705706e-08, + "loss": 4.0733, + "step": 265000 + }, + { + "epoch": 2.95, + "learning_rate": 2.948492992470515e-08, + "loss": 4.0734, + "step": 265500 + }, + { + "epoch": 2.95, + "learning_rate": 2.9540457099704598e-08, + "loss": 4.0871, + "step": 266000 + }, + { + "epoch": 2.96, + "learning_rate": 2.959598427470404e-08, + "loss": 4.0676, + "step": 266500 + }, + { + "epoch": 2.97, + "learning_rate": 2.9651511449703483e-08, + "loss": 4.0773, + "step": 267000 + }, + { + "epoch": 2.97, + "learning_rate": 2.970703862470293e-08, + "loss": 4.0733, + "step": 267500 + }, + { + "epoch": 2.98, + "learning_rate": 2.976256579970237e-08, + "loss": 4.0874, + "step": 268000 + }, + { + "epoch": 2.98, + "learning_rate": 2.981809297470182e-08, + "loss": 4.0765, + "step": 268500 + }, + { + "epoch": 2.99, + "learning_rate": 2.987362014970126e-08, + "loss": 4.0605, + "step": 269000 + }, + { + "epoch": 2.99, + "learning_rate": 2.992914732470071e-08, + "loss": 4.0506, + "step": 269500 + }, + { + "epoch": 3.0, + "learning_rate": 2.9984674499700154e-08, + "loss": 4.0806, + "step": 270000 + }, + { + "epoch": 3.0, + "eval_loss": 4.056514263153076, + "eval_runtime": 6.3026, + "eval_samples_per_second": 246.567, + "step": 270138 + }, + { + "epoch": 3.0, + "learning_rate": 3.004020167469959e-08, + "loss": 4.0646, + "step": 270500 + }, + { + "epoch": 3.01, + "learning_rate": 3.009572884969904e-08, + "loss": 4.0577, + "step": 271000 + }, + { + "epoch": 3.02, + "learning_rate": 3.015125602469849e-08, + "loss": 4.0721, + "step": 271500 + }, + { + "epoch": 3.02, + "learning_rate": 3.020678319969793e-08, + "loss": 4.0837, + "step": 272000 + }, + { + "epoch": 3.03, + "learning_rate": 3.0262310374697375e-08, + "loss": 4.0935, + "step": 272500 + }, + { + "epoch": 3.03, + "learning_rate": 3.031783754969682e-08, + "loss": 4.0564, + "step": 273000 + }, + { + "epoch": 3.04, + "learning_rate": 3.0373364724696264e-08, + "loss": 4.0866, + "step": 273500 + }, + { + "epoch": 3.04, + "learning_rate": 3.042889189969571e-08, + "loss": 4.0721, + "step": 274000 + }, + { + "epoch": 3.05, + "learning_rate": 3.048441907469516e-08, + "loss": 4.0722, + "step": 274500 + }, + { + "epoch": 3.05, + "learning_rate": 3.0539946249694596e-08, + "loss": 4.0812, + "step": 275000 + }, + { + "epoch": 3.06, + "learning_rate": 3.059547342469405e-08, + "loss": 4.0439, + "step": 275500 + }, + { + "epoch": 3.07, + "learning_rate": 3.065100059969349e-08, + "loss": 4.0733, + "step": 276000 + }, + { + "epoch": 3.07, + "learning_rate": 3.070652777469293e-08, + "loss": 4.0709, + "step": 276500 + }, + { + "epoch": 3.08, + "learning_rate": 3.076205494969238e-08, + "loss": 4.0633, + "step": 277000 + }, + { + "epoch": 3.08, + "learning_rate": 3.0817582124691824e-08, + "loss": 4.0608, + "step": 277500 + }, + { + "epoch": 3.09, + "learning_rate": 3.087310929969127e-08, + "loss": 4.0561, + "step": 278000 + }, + { + "epoch": 3.09, + "learning_rate": 3.092863647469071e-08, + "loss": 4.0637, + "step": 278500 + }, + { + "epoch": 3.1, + "learning_rate": 3.0984163649690156e-08, + "loss": 4.0655, + "step": 279000 + }, + { + "epoch": 3.1, + "learning_rate": 3.10396908246896e-08, + "loss": 4.0861, + "step": 279500 + }, + { + "epoch": 3.11, + "learning_rate": 3.1095217999689044e-08, + "loss": 4.0609, + "step": 280000 + }, + { + "epoch": 3.12, + "learning_rate": 3.115074517468849e-08, + "loss": 4.065, + "step": 280500 + }, + { + "epoch": 3.12, + "learning_rate": 3.120627234968793e-08, + "loss": 4.0679, + "step": 281000 + }, + { + "epoch": 3.13, + "learning_rate": 3.1261799524687384e-08, + "loss": 4.05, + "step": 281500 + }, + { + "epoch": 3.13, + "learning_rate": 3.131732669968683e-08, + "loss": 4.0283, + "step": 282000 + }, + { + "epoch": 3.14, + "learning_rate": 3.137285387468627e-08, + "loss": 4.0595, + "step": 282500 + }, + { + "epoch": 3.14, + "learning_rate": 3.1428381049685716e-08, + "loss": 4.0778, + "step": 283000 + }, + { + "epoch": 3.15, + "learning_rate": 3.148390822468516e-08, + "loss": 4.0679, + "step": 283500 + }, + { + "epoch": 3.15, + "learning_rate": 3.1539435399684605e-08, + "loss": 4.0653, + "step": 284000 + }, + { + "epoch": 3.16, + "learning_rate": 3.159496257468405e-08, + "loss": 4.0538, + "step": 284500 + }, + { + "epoch": 3.17, + "learning_rate": 3.165048974968349e-08, + "loss": 4.0479, + "step": 285000 + }, + { + "epoch": 3.17, + "learning_rate": 3.170601692468294e-08, + "loss": 4.0469, + "step": 285500 + }, + { + "epoch": 3.18, + "learning_rate": 3.176154409968239e-08, + "loss": 4.0756, + "step": 286000 + }, + { + "epoch": 3.18, + "learning_rate": 3.1817071274681825e-08, + "loss": 4.0609, + "step": 286500 + }, + { + "epoch": 3.19, + "learning_rate": 3.187259844968127e-08, + "loss": 4.0247, + "step": 287000 + }, + { + "epoch": 3.19, + "learning_rate": 3.192812562468072e-08, + "loss": 4.074, + "step": 287500 + }, + { + "epoch": 3.2, + "learning_rate": 3.198365279968016e-08, + "loss": 4.0597, + "step": 288000 + }, + { + "epoch": 3.2, + "learning_rate": 3.203917997467961e-08, + "loss": 4.0569, + "step": 288500 + }, + { + "epoch": 3.21, + "learning_rate": 3.209470714967905e-08, + "loss": 4.0393, + "step": 289000 + }, + { + "epoch": 3.22, + "learning_rate": 3.215023432467849e-08, + "loss": 4.0618, + "step": 289500 + }, + { + "epoch": 3.22, + "learning_rate": 3.220576149967794e-08, + "loss": 4.0426, + "step": 290000 + }, + { + "epoch": 3.23, + "learning_rate": 3.2261288674677385e-08, + "loss": 4.0593, + "step": 290500 + }, + { + "epoch": 3.23, + "learning_rate": 3.231681584967683e-08, + "loss": 4.0546, + "step": 291000 + }, + { + "epoch": 3.24, + "learning_rate": 3.2372343024676274e-08, + "loss": 4.0527, + "step": 291500 + }, + { + "epoch": 3.24, + "learning_rate": 3.2427870199675725e-08, + "loss": 4.0523, + "step": 292000 + }, + { + "epoch": 3.25, + "learning_rate": 3.248339737467516e-08, + "loss": 4.0676, + "step": 292500 + }, + { + "epoch": 3.25, + "learning_rate": 3.2538924549674606e-08, + "loss": 4.0683, + "step": 293000 + }, + { + "epoch": 3.26, + "learning_rate": 3.259445172467406e-08, + "loss": 4.0577, + "step": 293500 + }, + { + "epoch": 3.26, + "learning_rate": 3.2649978899673495e-08, + "loss": 4.0606, + "step": 294000 + }, + { + "epoch": 3.27, + "learning_rate": 3.2705506074672945e-08, + "loss": 4.0633, + "step": 294500 + }, + { + "epoch": 3.28, + "learning_rate": 3.276103324967239e-08, + "loss": 4.0385, + "step": 295000 + }, + { + "epoch": 3.28, + "learning_rate": 3.2816560424671834e-08, + "loss": 4.036, + "step": 295500 + }, + { + "epoch": 3.29, + "learning_rate": 3.287208759967128e-08, + "loss": 4.0401, + "step": 296000 + }, + { + "epoch": 3.29, + "learning_rate": 3.292761477467072e-08, + "loss": 4.0433, + "step": 296500 + }, + { + "epoch": 3.3, + "learning_rate": 3.2983141949670166e-08, + "loss": 4.0394, + "step": 297000 + }, + { + "epoch": 3.3, + "learning_rate": 3.303866912466961e-08, + "loss": 4.0434, + "step": 297500 + }, + { + "epoch": 3.31, + "learning_rate": 3.309419629966906e-08, + "loss": 4.0629, + "step": 298000 + }, + { + "epoch": 3.31, + "learning_rate": 3.31497234746685e-08, + "loss": 4.0306, + "step": 298500 + }, + { + "epoch": 3.32, + "learning_rate": 3.320525064966795e-08, + "loss": 4.0449, + "step": 299000 + }, + { + "epoch": 3.33, + "learning_rate": 3.3260777824667394e-08, + "loss": 4.0438, + "step": 299500 + }, + { + "epoch": 3.33, + "learning_rate": 3.331630499966683e-08, + "loss": 4.0442, + "step": 300000 + }, + { + "epoch": 3.34, + "learning_rate": 3.337183217466628e-08, + "loss": 4.0438, + "step": 300500 + }, + { + "epoch": 3.34, + "learning_rate": 3.3427359349665726e-08, + "loss": 4.0358, + "step": 301000 + }, + { + "epoch": 3.35, + "learning_rate": 3.348288652466517e-08, + "loss": 4.0654, + "step": 301500 + }, + { + "epoch": 3.35, + "learning_rate": 3.3538413699664615e-08, + "loss": 4.0596, + "step": 302000 + }, + { + "epoch": 3.36, + "learning_rate": 3.359394087466406e-08, + "loss": 4.0342, + "step": 302500 + }, + { + "epoch": 3.36, + "learning_rate": 3.36494680496635e-08, + "loss": 4.0457, + "step": 303000 + }, + { + "epoch": 3.37, + "learning_rate": 3.370499522466295e-08, + "loss": 4.0387, + "step": 303500 + }, + { + "epoch": 3.38, + "learning_rate": 3.376052239966239e-08, + "loss": 4.0336, + "step": 304000 + }, + { + "epoch": 3.38, + "learning_rate": 3.3816049574661836e-08, + "loss": 4.0226, + "step": 304500 + }, + { + "epoch": 3.39, + "learning_rate": 3.3871576749661286e-08, + "loss": 4.0373, + "step": 305000 + }, + { + "epoch": 3.39, + "learning_rate": 3.3927103924660724e-08, + "loss": 4.0409, + "step": 305500 + }, + { + "epoch": 3.4, + "learning_rate": 3.398263109966017e-08, + "loss": 4.0429, + "step": 306000 + }, + { + "epoch": 3.4, + "learning_rate": 3.403815827465962e-08, + "loss": 4.0652, + "step": 306500 + }, + { + "epoch": 3.41, + "learning_rate": 3.4093685449659057e-08, + "loss": 4.0422, + "step": 307000 + }, + { + "epoch": 3.41, + "learning_rate": 3.414921262465851e-08, + "loss": 4.0615, + "step": 307500 + }, + { + "epoch": 3.42, + "learning_rate": 3.420473979965795e-08, + "loss": 4.0351, + "step": 308000 + }, + { + "epoch": 3.43, + "learning_rate": 3.4260266974657396e-08, + "loss": 4.0478, + "step": 308500 + }, + { + "epoch": 3.43, + "learning_rate": 3.431579414965684e-08, + "loss": 4.0489, + "step": 309000 + }, + { + "epoch": 3.44, + "learning_rate": 3.4371321324656284e-08, + "loss": 4.0405, + "step": 309500 + }, + { + "epoch": 3.44, + "learning_rate": 3.442684849965573e-08, + "loss": 4.0276, + "step": 310000 + }, + { + "epoch": 3.45, + "learning_rate": 3.448237567465517e-08, + "loss": 4.0416, + "step": 310500 + }, + { + "epoch": 3.45, + "learning_rate": 3.453790284965462e-08, + "loss": 4.0589, + "step": 311000 + }, + { + "epoch": 3.46, + "learning_rate": 3.459343002465406e-08, + "loss": 4.0261, + "step": 311500 + }, + { + "epoch": 3.46, + "learning_rate": 3.464895719965351e-08, + "loss": 4.0363, + "step": 312000 + }, + { + "epoch": 3.47, + "learning_rate": 3.4704484374652956e-08, + "loss": 4.0582, + "step": 312500 + }, + { + "epoch": 3.48, + "learning_rate": 3.4760011549652393e-08, + "loss": 4.0281, + "step": 313000 + }, + { + "epoch": 3.48, + "learning_rate": 3.4815538724651844e-08, + "loss": 4.0409, + "step": 313500 + }, + { + "epoch": 3.49, + "learning_rate": 3.487106589965129e-08, + "loss": 4.0402, + "step": 314000 + }, + { + "epoch": 3.49, + "learning_rate": 3.492659307465073e-08, + "loss": 4.0204, + "step": 314500 + }, + { + "epoch": 3.5, + "learning_rate": 3.4982120249650177e-08, + "loss": 4.0187, + "step": 315000 + }, + { + "epoch": 3.5, + "learning_rate": 3.503764742464963e-08, + "loss": 4.0387, + "step": 315500 + }, + { + "epoch": 3.51, + "learning_rate": 3.5093174599649065e-08, + "loss": 4.028, + "step": 316000 + }, + { + "epoch": 3.51, + "learning_rate": 3.514870177464851e-08, + "loss": 4.0498, + "step": 316500 + }, + { + "epoch": 3.52, + "learning_rate": 3.520422894964796e-08, + "loss": 4.035, + "step": 317000 + }, + { + "epoch": 3.53, + "learning_rate": 3.52597561246474e-08, + "loss": 4.0359, + "step": 317500 + }, + { + "epoch": 3.53, + "learning_rate": 3.531528329964685e-08, + "loss": 4.0442, + "step": 318000 + }, + { + "epoch": 3.54, + "learning_rate": 3.537081047464629e-08, + "loss": 4.032, + "step": 318500 + }, + { + "epoch": 3.54, + "learning_rate": 3.542633764964574e-08, + "loss": 4.0291, + "step": 319000 + }, + { + "epoch": 3.55, + "learning_rate": 3.548186482464518e-08, + "loss": 4.0364, + "step": 319500 + }, + { + "epoch": 3.55, + "learning_rate": 3.5537391999644625e-08, + "loss": 4.0294, + "step": 320000 + }, + { + "epoch": 3.56, + "learning_rate": 3.559291917464407e-08, + "loss": 4.0276, + "step": 320500 + }, + { + "epoch": 3.56, + "learning_rate": 3.5648446349643513e-08, + "loss": 4.0239, + "step": 321000 + }, + { + "epoch": 3.57, + "learning_rate": 3.570397352464296e-08, + "loss": 4.0317, + "step": 321500 + }, + { + "epoch": 3.58, + "learning_rate": 3.57595006996424e-08, + "loss": 4.0278, + "step": 322000 + }, + { + "epoch": 3.58, + "learning_rate": 3.5815027874641846e-08, + "loss": 4.0462, + "step": 322500 + }, + { + "epoch": 3.59, + "learning_rate": 3.587055504964129e-08, + "loss": 4.0241, + "step": 323000 + }, + { + "epoch": 3.59, + "learning_rate": 3.5926082224640734e-08, + "loss": 4.0157, + "step": 323500 + }, + { + "epoch": 3.6, + "learning_rate": 3.5981609399640185e-08, + "loss": 4.0438, + "step": 324000 + }, + { + "epoch": 3.6, + "learning_rate": 3.603713657463963e-08, + "loss": 4.0424, + "step": 324500 + }, + { + "epoch": 3.61, + "learning_rate": 3.6092663749639073e-08, + "loss": 4.0295, + "step": 325000 + }, + { + "epoch": 3.61, + "learning_rate": 3.614819092463852e-08, + "loss": 4.0269, + "step": 325500 + }, + { + "epoch": 3.62, + "learning_rate": 3.620371809963796e-08, + "loss": 4.0431, + "step": 326000 + }, + { + "epoch": 3.63, + "learning_rate": 3.6259245274637406e-08, + "loss": 4.0322, + "step": 326500 + }, + { + "epoch": 3.63, + "learning_rate": 3.631477244963685e-08, + "loss": 4.0323, + "step": 327000 + }, + { + "epoch": 3.64, + "learning_rate": 3.6370299624636294e-08, + "loss": 4.0452, + "step": 327500 + }, + { + "epoch": 3.64, + "learning_rate": 3.642582679963574e-08, + "loss": 4.0394, + "step": 328000 + }, + { + "epoch": 3.65, + "learning_rate": 3.648135397463519e-08, + "loss": 4.0444, + "step": 328500 + }, + { + "epoch": 3.65, + "learning_rate": 3.653688114963463e-08, + "loss": 4.0201, + "step": 329000 + }, + { + "epoch": 3.66, + "learning_rate": 3.659240832463407e-08, + "loss": 4.0265, + "step": 329500 + }, + { + "epoch": 3.66, + "learning_rate": 3.664793549963352e-08, + "loss": 4.0365, + "step": 330000 + }, + { + "epoch": 3.67, + "learning_rate": 3.670346267463296e-08, + "loss": 4.0136, + "step": 330500 + }, + { + "epoch": 3.68, + "learning_rate": 3.675898984963241e-08, + "loss": 4.0301, + "step": 331000 + }, + { + "epoch": 3.68, + "learning_rate": 3.6814517024631854e-08, + "loss": 4.0269, + "step": 331500 + }, + { + "epoch": 3.69, + "learning_rate": 3.68700441996313e-08, + "loss": 4.0471, + "step": 332000 + }, + { + "epoch": 3.69, + "learning_rate": 3.692557137463074e-08, + "loss": 4.0178, + "step": 332500 + }, + { + "epoch": 3.7, + "learning_rate": 3.698109854963019e-08, + "loss": 4.0266, + "step": 333000 + }, + { + "epoch": 3.7, + "learning_rate": 3.703662572462963e-08, + "loss": 4.0309, + "step": 333500 + }, + { + "epoch": 3.71, + "learning_rate": 3.7092152899629075e-08, + "loss": 4.0293, + "step": 334000 + }, + { + "epoch": 3.71, + "learning_rate": 3.7147680074628526e-08, + "loss": 4.0329, + "step": 334500 + }, + { + "epoch": 3.72, + "learning_rate": 3.7203207249627964e-08, + "loss": 4.0213, + "step": 335000 + }, + { + "epoch": 3.73, + "learning_rate": 3.725873442462741e-08, + "loss": 4.0302, + "step": 335500 + }, + { + "epoch": 3.73, + "learning_rate": 3.731426159962686e-08, + "loss": 4.0358, + "step": 336000 + }, + { + "epoch": 3.74, + "learning_rate": 3.7369788774626296e-08, + "loss": 4.008, + "step": 336500 + }, + { + "epoch": 3.74, + "learning_rate": 3.742531594962575e-08, + "loss": 4.0094, + "step": 337000 + }, + { + "epoch": 3.75, + "learning_rate": 3.748084312462519e-08, + "loss": 4.0164, + "step": 337500 + }, + { + "epoch": 3.75, + "learning_rate": 3.7536370299624635e-08, + "loss": 4.0193, + "step": 338000 + }, + { + "epoch": 3.76, + "learning_rate": 3.759189747462408e-08, + "loss": 4.0287, + "step": 338500 + }, + { + "epoch": 3.76, + "learning_rate": 3.7647424649623524e-08, + "loss": 4.023, + "step": 339000 + }, + { + "epoch": 3.77, + "learning_rate": 3.770295182462297e-08, + "loss": 4.0225, + "step": 339500 + }, + { + "epoch": 3.78, + "learning_rate": 3.775847899962241e-08, + "loss": 4.0399, + "step": 340000 + }, + { + "epoch": 3.78, + "learning_rate": 3.781400617462186e-08, + "loss": 4.0306, + "step": 340500 + }, + { + "epoch": 3.79, + "learning_rate": 3.78695333496213e-08, + "loss": 4.022, + "step": 341000 + }, + { + "epoch": 3.79, + "learning_rate": 3.792506052462075e-08, + "loss": 4.0275, + "step": 341500 + }, + { + "epoch": 3.8, + "learning_rate": 3.7980587699620195e-08, + "loss": 4.0376, + "step": 342000 + }, + { + "epoch": 3.8, + "learning_rate": 3.803611487461963e-08, + "loss": 4.0126, + "step": 342500 + }, + { + "epoch": 3.81, + "learning_rate": 3.8091642049619084e-08, + "loss": 4.0314, + "step": 343000 + }, + { + "epoch": 3.81, + "learning_rate": 3.814716922461853e-08, + "loss": 4.0159, + "step": 343500 + }, + { + "epoch": 3.82, + "learning_rate": 3.820269639961797e-08, + "loss": 4.0281, + "step": 344000 + }, + { + "epoch": 3.83, + "learning_rate": 3.8258223574617416e-08, + "loss": 4.0251, + "step": 344500 + }, + { + "epoch": 3.83, + "learning_rate": 3.831375074961686e-08, + "loss": 4.0242, + "step": 345000 + }, + { + "epoch": 3.84, + "learning_rate": 3.8369277924616305e-08, + "loss": 4.0365, + "step": 345500 + }, + { + "epoch": 3.84, + "learning_rate": 3.842480509961575e-08, + "loss": 4.0134, + "step": 346000 + }, + { + "epoch": 3.85, + "learning_rate": 3.848033227461519e-08, + "loss": 4.0163, + "step": 346500 + }, + { + "epoch": 3.85, + "learning_rate": 3.853585944961464e-08, + "loss": 4.0371, + "step": 347000 + }, + { + "epoch": 3.86, + "learning_rate": 3.859138662461409e-08, + "loss": 4.0047, + "step": 347500 + }, + { + "epoch": 3.86, + "learning_rate": 3.8646913799613526e-08, + "loss": 4.016, + "step": 348000 + }, + { + "epoch": 3.87, + "learning_rate": 3.8702440974612976e-08, + "loss": 4.0389, + "step": 348500 + }, + { + "epoch": 3.88, + "learning_rate": 3.875796814961242e-08, + "loss": 4.0015, + "step": 349000 + }, + { + "epoch": 3.88, + "learning_rate": 3.881349532461186e-08, + "loss": 4.0132, + "step": 349500 + }, + { + "epoch": 3.89, + "learning_rate": 3.886902249961131e-08, + "loss": 4.0028, + "step": 350000 + }, + { + "epoch": 3.89, + "learning_rate": 3.892454967461075e-08, + "loss": 4.0079, + "step": 350500 + }, + { + "epoch": 3.9, + "learning_rate": 3.89800768496102e-08, + "loss": 4.0428, + "step": 351000 + }, + { + "epoch": 3.9, + "learning_rate": 3.903560402460964e-08, + "loss": 4.0088, + "step": 351500 + }, + { + "epoch": 3.91, + "learning_rate": 3.9091131199609086e-08, + "loss": 4.0158, + "step": 352000 + }, + { + "epoch": 3.91, + "learning_rate": 3.914665837460853e-08, + "loss": 4.0181, + "step": 352500 + }, + { + "epoch": 3.92, + "learning_rate": 3.9202185549607974e-08, + "loss": 4.0193, + "step": 353000 + }, + { + "epoch": 3.93, + "learning_rate": 3.9257712724607425e-08, + "loss": 4.0056, + "step": 353500 + }, + { + "epoch": 3.93, + "learning_rate": 3.931323989960686e-08, + "loss": 4.0166, + "step": 354000 + }, + { + "epoch": 3.94, + "learning_rate": 3.936876707460631e-08, + "loss": 4.0256, + "step": 354500 + }, + { + "epoch": 3.94, + "learning_rate": 3.942429424960576e-08, + "loss": 4.014, + "step": 355000 + }, + { + "epoch": 3.95, + "learning_rate": 3.9479821424605195e-08, + "loss": 4.0526, + "step": 355500 + }, + { + "epoch": 3.95, + "learning_rate": 3.9535348599604646e-08, + "loss": 4.0243, + "step": 356000 + }, + { + "epoch": 3.96, + "learning_rate": 3.959087577460409e-08, + "loss": 4.0224, + "step": 356500 + }, + { + "epoch": 3.96, + "learning_rate": 3.9646402949603534e-08, + "loss": 4.0319, + "step": 357000 + }, + { + "epoch": 3.97, + "learning_rate": 3.970193012460298e-08, + "loss": 4.0028, + "step": 357500 + }, + { + "epoch": 3.98, + "learning_rate": 3.975745729960243e-08, + "loss": 3.9964, + "step": 358000 + }, + { + "epoch": 3.98, + "learning_rate": 3.9812984474601867e-08, + "loss": 4.0005, + "step": 358500 + }, + { + "epoch": 3.99, + "learning_rate": 3.986851164960131e-08, + "loss": 4.0167, + "step": 359000 + }, + { + "epoch": 3.99, + "learning_rate": 3.992403882460076e-08, + "loss": 4.0178, + "step": 359500 + }, + { + "epoch": 4.0, + "learning_rate": 3.99795659996002e-08, + "loss": 4.0169, + "step": 360000 + }, + { + "epoch": 4.0, + "eval_loss": 4.012733459472656, + "eval_runtime": 6.3084, + "eval_samples_per_second": 246.337, + "step": 360184 + }, + { + "epoch": 4.0, + "learning_rate": 4.003509317459965e-08, + "loss": 4.0111, + "step": 360500 + }, + { + "epoch": 4.01, + "learning_rate": 4.0090620349599094e-08, + "loss": 4.0056, + "step": 361000 + }, + { + "epoch": 4.01, + "learning_rate": 4.014614752459854e-08, + "loss": 4.0092, + "step": 361500 + }, + { + "epoch": 4.02, + "learning_rate": 4.020167469959798e-08, + "loss": 4.0285, + "step": 362000 + }, + { + "epoch": 4.03, + "learning_rate": 4.0257201874597427e-08, + "loss": 4.0084, + "step": 362500 + }, + { + "epoch": 4.03, + "learning_rate": 4.031272904959687e-08, + "loss": 4.0263, + "step": 363000 + }, + { + "epoch": 4.04, + "learning_rate": 4.0368256224596315e-08, + "loss": 4.0244, + "step": 363500 + }, + { + "epoch": 4.04, + "learning_rate": 4.042378339959576e-08, + "loss": 3.9977, + "step": 364000 + }, + { + "epoch": 4.05, + "learning_rate": 4.04793105745952e-08, + "loss": 3.9967, + "step": 364500 + }, + { + "epoch": 4.05, + "learning_rate": 4.0534837749594654e-08, + "loss": 4.0248, + "step": 365000 + }, + { + "epoch": 4.06, + "learning_rate": 4.059036492459409e-08, + "loss": 4.0071, + "step": 365500 + }, + { + "epoch": 4.06, + "learning_rate": 4.0645892099593536e-08, + "loss": 4.016, + "step": 366000 + }, + { + "epoch": 4.07, + "learning_rate": 4.0701419274592987e-08, + "loss": 3.9927, + "step": 366500 + }, + { + "epoch": 4.08, + "learning_rate": 4.075694644959243e-08, + "loss": 4.0108, + "step": 367000 + }, + { + "epoch": 4.08, + "learning_rate": 4.0812473624591875e-08, + "loss": 4.0183, + "step": 367500 + }, + { + "epoch": 4.09, + "learning_rate": 4.086800079959132e-08, + "loss": 4.0027, + "step": 368000 + }, + { + "epoch": 4.09, + "learning_rate": 4.0923527974590763e-08, + "loss": 4.0056, + "step": 368500 + }, + { + "epoch": 4.1, + "learning_rate": 4.097905514959021e-08, + "loss": 4.0082, + "step": 369000 + }, + { + "epoch": 4.1, + "learning_rate": 4.103458232458965e-08, + "loss": 4.0038, + "step": 369500 + }, + { + "epoch": 4.11, + "learning_rate": 4.1090109499589096e-08, + "loss": 3.9956, + "step": 370000 + }, + { + "epoch": 4.11, + "learning_rate": 4.114563667458854e-08, + "loss": 4.0033, + "step": 370500 + }, + { + "epoch": 4.12, + "learning_rate": 4.120116384958799e-08, + "loss": 3.9961, + "step": 371000 + }, + { + "epoch": 4.13, + "learning_rate": 4.125669102458743e-08, + "loss": 4.0267, + "step": 371500 + }, + { + "epoch": 4.13, + "learning_rate": 4.131221819958687e-08, + "loss": 3.9941, + "step": 372000 + }, + { + "epoch": 4.14, + "learning_rate": 4.1367745374586323e-08, + "loss": 4.0191, + "step": 372500 + }, + { + "epoch": 4.14, + "learning_rate": 4.142327254958576e-08, + "loss": 4.0141, + "step": 373000 + }, + { + "epoch": 4.15, + "learning_rate": 4.147879972458521e-08, + "loss": 4.0063, + "step": 373500 + }, + { + "epoch": 4.15, + "learning_rate": 4.1534326899584656e-08, + "loss": 4.0067, + "step": 374000 + }, + { + "epoch": 4.16, + "learning_rate": 4.15898540745841e-08, + "loss": 4.0085, + "step": 374500 + }, + { + "epoch": 4.16, + "learning_rate": 4.1645381249583544e-08, + "loss": 4.0114, + "step": 375000 + }, + { + "epoch": 4.17, + "learning_rate": 4.170090842458299e-08, + "loss": 3.9937, + "step": 375500 + }, + { + "epoch": 4.18, + "learning_rate": 4.175643559958243e-08, + "loss": 4.001, + "step": 376000 + }, + { + "epoch": 4.18, + "learning_rate": 4.181196277458188e-08, + "loss": 4.0154, + "step": 376500 + }, + { + "epoch": 4.19, + "learning_rate": 4.186748994958133e-08, + "loss": 3.9987, + "step": 377000 + }, + { + "epoch": 4.19, + "learning_rate": 4.1923017124580765e-08, + "loss": 4.0165, + "step": 377500 + }, + { + "epoch": 4.2, + "learning_rate": 4.1978544299580216e-08, + "loss": 3.9932, + "step": 378000 + }, + { + "epoch": 4.2, + "learning_rate": 4.203407147457966e-08, + "loss": 4.0054, + "step": 378500 + }, + { + "epoch": 4.21, + "learning_rate": 4.20895986495791e-08, + "loss": 4.0084, + "step": 379000 + }, + { + "epoch": 4.21, + "learning_rate": 4.214512582457855e-08, + "loss": 4.0094, + "step": 379500 + }, + { + "epoch": 4.22, + "learning_rate": 4.220065299957799e-08, + "loss": 3.9905, + "step": 380000 + }, + { + "epoch": 4.23, + "learning_rate": 4.225618017457744e-08, + "loss": 4.0, + "step": 380500 + }, + { + "epoch": 4.23, + "learning_rate": 4.231170734957688e-08, + "loss": 4.0141, + "step": 381000 + }, + { + "epoch": 4.24, + "learning_rate": 4.2367234524576325e-08, + "loss": 4.0034, + "step": 381500 + }, + { + "epoch": 4.24, + "learning_rate": 4.242276169957577e-08, + "loss": 4.0126, + "step": 382000 + }, + { + "epoch": 4.25, + "learning_rate": 4.2478288874575214e-08, + "loss": 3.9771, + "step": 382500 + }, + { + "epoch": 4.25, + "learning_rate": 4.2533816049574664e-08, + "loss": 4.0017, + "step": 383000 + }, + { + "epoch": 4.26, + "learning_rate": 4.25893432245741e-08, + "loss": 4.0028, + "step": 383500 + }, + { + "epoch": 4.26, + "learning_rate": 4.264487039957355e-08, + "loss": 3.9836, + "step": 384000 + }, + { + "epoch": 4.27, + "learning_rate": 4.2700397574573e-08, + "loss": 3.987, + "step": 384500 + }, + { + "epoch": 4.28, + "learning_rate": 4.2755924749572434e-08, + "loss": 4.0078, + "step": 385000 + }, + { + "epoch": 4.28, + "learning_rate": 4.2811451924571885e-08, + "loss": 3.9905, + "step": 385500 + }, + { + "epoch": 4.29, + "learning_rate": 4.286697909957133e-08, + "loss": 3.9939, + "step": 386000 + }, + { + "epoch": 4.29, + "learning_rate": 4.2922506274570774e-08, + "loss": 4.0043, + "step": 386500 + }, + { + "epoch": 4.3, + "learning_rate": 4.297803344957022e-08, + "loss": 3.9899, + "step": 387000 + }, + { + "epoch": 4.3, + "learning_rate": 4.303356062456966e-08, + "loss": 3.9869, + "step": 387500 + }, + { + "epoch": 4.31, + "learning_rate": 4.3089087799569106e-08, + "loss": 3.9914, + "step": 388000 + }, + { + "epoch": 4.31, + "learning_rate": 4.314461497456855e-08, + "loss": 4.0073, + "step": 388500 + }, + { + "epoch": 4.32, + "learning_rate": 4.3200142149567995e-08, + "loss": 4.0109, + "step": 389000 + }, + { + "epoch": 4.33, + "learning_rate": 4.325566932456744e-08, + "loss": 3.9705, + "step": 389500 + }, + { + "epoch": 4.33, + "learning_rate": 4.331119649956689e-08, + "loss": 4.0004, + "step": 390000 + }, + { + "epoch": 4.34, + "learning_rate": 4.336672367456633e-08, + "loss": 4.0117, + "step": 390500 + }, + { + "epoch": 4.34, + "learning_rate": 4.342225084956578e-08, + "loss": 3.9868, + "step": 391000 + }, + { + "epoch": 4.35, + "learning_rate": 4.347777802456522e-08, + "loss": 3.9959, + "step": 391500 + }, + { + "epoch": 4.35, + "learning_rate": 4.353330519956466e-08, + "loss": 4.006, + "step": 392000 + }, + { + "epoch": 4.36, + "learning_rate": 4.358883237456411e-08, + "loss": 3.9973, + "step": 392500 + }, + { + "epoch": 4.36, + "learning_rate": 4.3644359549563555e-08, + "loss": 3.9842, + "step": 393000 + }, + { + "epoch": 4.37, + "learning_rate": 4.3699886724563e-08, + "loss": 3.993, + "step": 393500 + }, + { + "epoch": 4.38, + "learning_rate": 4.375541389956244e-08, + "loss": 3.9979, + "step": 394000 + }, + { + "epoch": 4.38, + "learning_rate": 4.3810941074561894e-08, + "loss": 3.9982, + "step": 394500 + }, + { + "epoch": 4.39, + "learning_rate": 4.386646824956133e-08, + "loss": 4.0057, + "step": 395000 + }, + { + "epoch": 4.39, + "learning_rate": 4.3921995424560775e-08, + "loss": 3.9749, + "step": 395500 + }, + { + "epoch": 4.4, + "learning_rate": 4.3977522599560226e-08, + "loss": 4.0134, + "step": 396000 + }, + { + "epoch": 4.4, + "learning_rate": 4.4033049774559664e-08, + "loss": 3.9945, + "step": 396500 + }, + { + "epoch": 4.41, + "learning_rate": 4.4088576949559115e-08, + "loss": 3.9779, + "step": 397000 + }, + { + "epoch": 4.41, + "learning_rate": 4.414410412455856e-08, + "loss": 3.9846, + "step": 397500 + }, + { + "epoch": 4.42, + "learning_rate": 4.4199631299557996e-08, + "loss": 4.002, + "step": 398000 + }, + { + "epoch": 4.43, + "learning_rate": 4.425515847455745e-08, + "loss": 3.9968, + "step": 398500 + }, + { + "epoch": 4.43, + "learning_rate": 4.431068564955689e-08, + "loss": 4.0025, + "step": 399000 + }, + { + "epoch": 4.44, + "learning_rate": 4.4366212824556336e-08, + "loss": 3.9933, + "step": 399500 + }, + { + "epoch": 4.44, + "learning_rate": 4.442173999955578e-08, + "loss": 3.9982, + "step": 400000 + }, + { + "epoch": 4.45, + "learning_rate": 4.447726717455523e-08, + "loss": 3.9919, + "step": 400500 + }, + { + "epoch": 4.45, + "learning_rate": 4.453279434955467e-08, + "loss": 3.9767, + "step": 401000 + }, + { + "epoch": 4.46, + "learning_rate": 4.458832152455411e-08, + "loss": 4.0057, + "step": 401500 + }, + { + "epoch": 4.46, + "learning_rate": 4.464384869955356e-08, + "loss": 4.0115, + "step": 402000 + }, + { + "epoch": 4.47, + "learning_rate": 4.4699375874553e-08, + "loss": 3.9792, + "step": 402500 + }, + { + "epoch": 4.48, + "learning_rate": 4.475490304955245e-08, + "loss": 3.9911, + "step": 403000 + }, + { + "epoch": 4.48, + "learning_rate": 4.4810430224551896e-08, + "loss": 3.9975, + "step": 403500 + }, + { + "epoch": 4.49, + "learning_rate": 4.486595739955134e-08, + "loss": 3.9752, + "step": 404000 + }, + { + "epoch": 4.49, + "learning_rate": 4.4921484574550784e-08, + "loss": 3.9821, + "step": 404500 + }, + { + "epoch": 4.5, + "learning_rate": 4.497701174955023e-08, + "loss": 3.986, + "step": 405000 + }, + { + "epoch": 4.5, + "learning_rate": 4.503253892454967e-08, + "loss": 4.0016, + "step": 405500 + }, + { + "epoch": 4.51, + "learning_rate": 4.5088066099549116e-08, + "loss": 3.9924, + "step": 406000 + }, + { + "epoch": 4.51, + "learning_rate": 4.514359327454856e-08, + "loss": 3.9965, + "step": 406500 + }, + { + "epoch": 4.52, + "learning_rate": 4.5199120449548005e-08, + "loss": 3.9853, + "step": 407000 + }, + { + "epoch": 4.53, + "learning_rate": 4.5254647624547456e-08, + "loss": 3.9895, + "step": 407500 + }, + { + "epoch": 4.53, + "learning_rate": 4.531017479954689e-08, + "loss": 3.9945, + "step": 408000 + }, + { + "epoch": 4.54, + "learning_rate": 4.536570197454634e-08, + "loss": 3.9956, + "step": 408500 + }, + { + "epoch": 4.54, + "learning_rate": 4.542122914954579e-08, + "loss": 3.9892, + "step": 409000 + }, + { + "epoch": 4.55, + "learning_rate": 4.547675632454523e-08, + "loss": 3.9985, + "step": 409500 + }, + { + "epoch": 4.55, + "learning_rate": 4.5532283499544677e-08, + "loss": 4.0087, + "step": 410000 + }, + { + "epoch": 4.56, + "learning_rate": 4.558781067454412e-08, + "loss": 3.9899, + "step": 410500 + }, + { + "epoch": 4.56, + "learning_rate": 4.5643337849543565e-08, + "loss": 4.0053, + "step": 411000 + }, + { + "epoch": 4.57, + "learning_rate": 4.569886502454301e-08, + "loss": 3.973, + "step": 411500 + }, + { + "epoch": 4.58, + "learning_rate": 4.575439219954245e-08, + "loss": 3.9917, + "step": 412000 + }, + { + "epoch": 4.58, + "learning_rate": 4.58099193745419e-08, + "loss": 3.9872, + "step": 412500 + }, + { + "epoch": 4.59, + "learning_rate": 4.586544654954134e-08, + "loss": 3.9766, + "step": 413000 + }, + { + "epoch": 4.59, + "learning_rate": 4.592097372454079e-08, + "loss": 3.9831, + "step": 413500 + }, + { + "epoch": 4.6, + "learning_rate": 4.597650089954023e-08, + "loss": 3.9769, + "step": 414000 + }, + { + "epoch": 4.6, + "learning_rate": 4.6032028074539674e-08, + "loss": 3.987, + "step": 414500 + }, + { + "epoch": 4.61, + "learning_rate": 4.6087555249539125e-08, + "loss": 3.9891, + "step": 415000 + }, + { + "epoch": 4.61, + "learning_rate": 4.614308242453856e-08, + "loss": 3.9911, + "step": 415500 + }, + { + "epoch": 4.62, + "learning_rate": 4.619860959953801e-08, + "loss": 3.9969, + "step": 416000 + }, + { + "epoch": 4.63, + "learning_rate": 4.625413677453746e-08, + "loss": 4.0026, + "step": 416500 + }, + { + "epoch": 4.63, + "learning_rate": 4.63096639495369e-08, + "loss": 3.9894, + "step": 417000 + }, + { + "epoch": 4.64, + "learning_rate": 4.6365191124536346e-08, + "loss": 3.9728, + "step": 417500 + }, + { + "epoch": 4.64, + "learning_rate": 4.642071829953579e-08, + "loss": 3.9784, + "step": 418000 + }, + { + "epoch": 4.65, + "learning_rate": 4.6476245474535234e-08, + "loss": 3.9917, + "step": 418500 + }, + { + "epoch": 4.65, + "learning_rate": 4.653177264953468e-08, + "loss": 3.9886, + "step": 419000 + }, + { + "epoch": 4.66, + "learning_rate": 4.658729982453413e-08, + "loss": 3.9813, + "step": 419500 + }, + { + "epoch": 4.66, + "learning_rate": 4.664282699953357e-08, + "loss": 3.9863, + "step": 420000 + }, + { + "epoch": 4.67, + "learning_rate": 4.669835417453302e-08, + "loss": 4.0092, + "step": 420500 + }, + { + "epoch": 4.68, + "learning_rate": 4.675388134953246e-08, + "loss": 3.9895, + "step": 421000 + }, + { + "epoch": 4.68, + "learning_rate": 4.68094085245319e-08, + "loss": 3.9985, + "step": 421500 + }, + { + "epoch": 4.69, + "learning_rate": 4.686493569953135e-08, + "loss": 3.9822, + "step": 422000 + }, + { + "epoch": 4.69, + "learning_rate": 4.6920462874530794e-08, + "loss": 3.9833, + "step": 422500 + }, + { + "epoch": 4.7, + "learning_rate": 4.697599004953024e-08, + "loss": 4.0023, + "step": 423000 + }, + { + "epoch": 4.7, + "learning_rate": 4.703151722452968e-08, + "loss": 3.9726, + "step": 423500 + }, + { + "epoch": 4.71, + "learning_rate": 4.708704439952913e-08, + "loss": 3.956, + "step": 424000 + }, + { + "epoch": 4.71, + "learning_rate": 4.714257157452857e-08, + "loss": 3.9939, + "step": 424500 + }, + { + "epoch": 4.72, + "learning_rate": 4.7198098749528015e-08, + "loss": 3.988, + "step": 425000 + }, + { + "epoch": 4.73, + "learning_rate": 4.7253625924527466e-08, + "loss": 3.9827, + "step": 425500 + }, + { + "epoch": 4.73, + "learning_rate": 4.7309153099526903e-08, + "loss": 3.9837, + "step": 426000 + }, + { + "epoch": 4.74, + "learning_rate": 4.7364680274526354e-08, + "loss": 3.9843, + "step": 426500 + }, + { + "epoch": 4.74, + "learning_rate": 4.74202074495258e-08, + "loss": 3.9812, + "step": 427000 + }, + { + "epoch": 4.75, + "learning_rate": 4.747573462452524e-08, + "loss": 3.9722, + "step": 427500 + }, + { + "epoch": 4.75, + "learning_rate": 4.753126179952469e-08, + "loss": 3.9839, + "step": 428000 + }, + { + "epoch": 4.76, + "learning_rate": 4.758678897452413e-08, + "loss": 3.9772, + "step": 428500 + }, + { + "epoch": 4.76, + "learning_rate": 4.7642316149523575e-08, + "loss": 4.0006, + "step": 429000 + }, + { + "epoch": 4.77, + "learning_rate": 4.769784332452302e-08, + "loss": 3.9973, + "step": 429500 + }, + { + "epoch": 4.78, + "learning_rate": 4.7753370499522464e-08, + "loss": 3.9879, + "step": 430000 + }, + { + "epoch": 4.78, + "learning_rate": 4.780889767452191e-08, + "loss": 3.9749, + "step": 430500 + }, + { + "epoch": 4.79, + "learning_rate": 4.786442484952135e-08, + "loss": 3.9939, + "step": 431000 + }, + { + "epoch": 4.79, + "learning_rate": 4.7919952024520796e-08, + "loss": 3.9714, + "step": 431500 + }, + { + "epoch": 4.8, + "learning_rate": 4.797547919952024e-08, + "loss": 3.9883, + "step": 432000 + }, + { + "epoch": 4.8, + "learning_rate": 4.803100637451969e-08, + "loss": 3.9879, + "step": 432500 + }, + { + "epoch": 4.81, + "learning_rate": 4.808653354951913e-08, + "loss": 3.9729, + "step": 433000 + }, + { + "epoch": 4.81, + "learning_rate": 4.814206072451858e-08, + "loss": 3.9623, + "step": 433500 + }, + { + "epoch": 4.82, + "learning_rate": 4.8197587899518024e-08, + "loss": 3.9784, + "step": 434000 + }, + { + "epoch": 4.83, + "learning_rate": 4.825311507451746e-08, + "loss": 3.9872, + "step": 434500 + }, + { + "epoch": 4.83, + "learning_rate": 4.830864224951691e-08, + "loss": 3.984, + "step": 435000 + }, + { + "epoch": 4.84, + "learning_rate": 4.8364169424516356e-08, + "loss": 3.9745, + "step": 435500 + }, + { + "epoch": 4.84, + "learning_rate": 4.84196965995158e-08, + "loss": 3.9719, + "step": 436000 + }, + { + "epoch": 4.85, + "learning_rate": 4.8475223774515244e-08, + "loss": 3.9711, + "step": 436500 + }, + { + "epoch": 4.85, + "learning_rate": 4.8530750949514695e-08, + "loss": 3.9703, + "step": 437000 + }, + { + "epoch": 4.86, + "learning_rate": 4.858627812451413e-08, + "loss": 3.9586, + "step": 437500 + }, + { + "epoch": 4.86, + "learning_rate": 4.864180529951358e-08, + "loss": 3.9699, + "step": 438000 + }, + { + "epoch": 4.87, + "learning_rate": 4.869733247451303e-08, + "loss": 3.9747, + "step": 438500 + }, + { + "epoch": 4.88, + "learning_rate": 4.8752859649512465e-08, + "loss": 3.9673, + "step": 439000 + }, + { + "epoch": 4.88, + "learning_rate": 4.8808386824511916e-08, + "loss": 3.9681, + "step": 439500 + }, + { + "epoch": 4.89, + "learning_rate": 4.886391399951136e-08, + "loss": 3.96, + "step": 440000 + }, + { + "epoch": 4.89, + "learning_rate": 4.8919441174510804e-08, + "loss": 3.9928, + "step": 440500 + }, + { + "epoch": 4.9, + "learning_rate": 4.897496834951025e-08, + "loss": 3.9976, + "step": 441000 + }, + { + "epoch": 4.9, + "learning_rate": 4.903049552450969e-08, + "loss": 3.9696, + "step": 441500 + }, + { + "epoch": 4.91, + "learning_rate": 4.908602269950914e-08, + "loss": 3.9719, + "step": 442000 + }, + { + "epoch": 4.91, + "learning_rate": 4.914154987450858e-08, + "loss": 3.9623, + "step": 442500 + }, + { + "epoch": 4.92, + "learning_rate": 4.919707704950803e-08, + "loss": 3.9835, + "step": 443000 + }, + { + "epoch": 4.93, + "learning_rate": 4.925260422450747e-08, + "loss": 3.9696, + "step": 443500 + }, + { + "epoch": 4.93, + "learning_rate": 4.9308131399506914e-08, + "loss": 3.9761, + "step": 444000 + }, + { + "epoch": 4.94, + "learning_rate": 4.9363658574506365e-08, + "loss": 3.9787, + "step": 444500 + }, + { + "epoch": 4.94, + "learning_rate": 4.94191857495058e-08, + "loss": 3.9933, + "step": 445000 + }, + { + "epoch": 4.95, + "learning_rate": 4.947471292450525e-08, + "loss": 3.9899, + "step": 445500 + }, + { + "epoch": 4.95, + "learning_rate": 4.95302400995047e-08, + "loss": 3.9763, + "step": 446000 + }, + { + "epoch": 4.96, + "learning_rate": 4.958576727450414e-08, + "loss": 3.9708, + "step": 446500 + }, + { + "epoch": 4.96, + "learning_rate": 4.9641294449503585e-08, + "loss": 3.9889, + "step": 447000 + }, + { + "epoch": 4.97, + "learning_rate": 4.969682162450303e-08, + "loss": 3.9769, + "step": 447500 + }, + { + "epoch": 4.98, + "learning_rate": 4.9752348799502474e-08, + "loss": 3.9738, + "step": 448000 + }, + { + "epoch": 4.98, + "learning_rate": 4.980787597450192e-08, + "loss": 3.9591, + "step": 448500 + }, + { + "epoch": 4.99, + "learning_rate": 4.986340314950136e-08, + "loss": 3.9796, + "step": 449000 + }, + { + "epoch": 4.99, + "learning_rate": 4.9918930324500806e-08, + "loss": 3.9772, + "step": 449500 + }, + { + "epoch": 5.0, + "learning_rate": 4.997445749950026e-08, + "loss": 3.9604, + "step": 450000 + }, + { + "epoch": 5.0, + "eval_loss": 3.979917526245117, + "eval_runtime": 6.3032, + "eval_samples_per_second": 246.54, + "step": 450230 + }, + { + "epoch": 5.0, + "learning_rate": 5.00299846744997e-08, + "loss": 3.9679, + "step": 450500 + }, + { + "epoch": 5.01, + "learning_rate": 5.008551184949914e-08, + "loss": 3.9582, + "step": 451000 + }, + { + "epoch": 5.01, + "learning_rate": 5.014103902449859e-08, + "loss": 3.9734, + "step": 451500 + }, + { + "epoch": 5.02, + "learning_rate": 5.0196566199498034e-08, + "loss": 3.9825, + "step": 452000 + }, + { + "epoch": 5.03, + "learning_rate": 5.025209337449747e-08, + "loss": 3.9878, + "step": 452500 + }, + { + "epoch": 5.03, + "learning_rate": 5.030762054949692e-08, + "loss": 3.969, + "step": 453000 + }, + { + "epoch": 5.04, + "learning_rate": 5.0363147724496366e-08, + "loss": 3.9616, + "step": 453500 + }, + { + "epoch": 5.04, + "learning_rate": 5.0418674899495804e-08, + "loss": 3.9926, + "step": 454000 + }, + { + "epoch": 5.05, + "learning_rate": 5.0474202074495255e-08, + "loss": 3.9751, + "step": 454500 + }, + { + "epoch": 5.05, + "learning_rate": 5.05297292494947e-08, + "loss": 3.9791, + "step": 455000 + }, + { + "epoch": 5.06, + "learning_rate": 5.058525642449415e-08, + "loss": 3.9649, + "step": 455500 + }, + { + "epoch": 5.06, + "learning_rate": 5.064078359949359e-08, + "loss": 3.9741, + "step": 456000 + }, + { + "epoch": 5.07, + "learning_rate": 5.069631077449303e-08, + "loss": 3.9608, + "step": 456500 + }, + { + "epoch": 5.08, + "learning_rate": 5.075183794949248e-08, + "loss": 3.9766, + "step": 457000 + }, + { + "epoch": 5.08, + "learning_rate": 5.080736512449192e-08, + "loss": 3.9694, + "step": 457500 + }, + { + "epoch": 5.09, + "learning_rate": 5.0862892299491364e-08, + "loss": 3.9779, + "step": 458000 + }, + { + "epoch": 5.09, + "learning_rate": 5.0918419474490815e-08, + "loss": 3.9594, + "step": 458500 + }, + { + "epoch": 5.1, + "learning_rate": 5.0973946649490266e-08, + "loss": 3.9589, + "step": 459000 + }, + { + "epoch": 5.1, + "learning_rate": 5.1029473824489697e-08, + "loss": 3.9799, + "step": 459500 + }, + { + "epoch": 5.11, + "learning_rate": 5.108500099948915e-08, + "loss": 3.9652, + "step": 460000 + }, + { + "epoch": 5.11, + "learning_rate": 5.11405281744886e-08, + "loss": 3.9576, + "step": 460500 + }, + { + "epoch": 5.12, + "learning_rate": 5.119605534948803e-08, + "loss": 3.9753, + "step": 461000 + }, + { + "epoch": 5.13, + "learning_rate": 5.125158252448748e-08, + "loss": 3.9681, + "step": 461500 + }, + { + "epoch": 5.13, + "learning_rate": 5.130710969948693e-08, + "loss": 3.9559, + "step": 462000 + }, + { + "epoch": 5.14, + "learning_rate": 5.1362636874486375e-08, + "loss": 3.9708, + "step": 462500 + }, + { + "epoch": 5.14, + "learning_rate": 5.141816404948581e-08, + "loss": 3.953, + "step": 463000 + }, + { + "epoch": 5.15, + "learning_rate": 5.147369122448526e-08, + "loss": 3.9638, + "step": 463500 + }, + { + "epoch": 5.15, + "learning_rate": 5.152921839948471e-08, + "loss": 3.9588, + "step": 464000 + }, + { + "epoch": 5.16, + "learning_rate": 5.1584745574484145e-08, + "loss": 3.9676, + "step": 464500 + }, + { + "epoch": 5.16, + "learning_rate": 5.1640272749483596e-08, + "loss": 3.9801, + "step": 465000 + }, + { + "epoch": 5.17, + "learning_rate": 5.169579992448304e-08, + "loss": 3.9834, + "step": 465500 + }, + { + "epoch": 5.18, + "learning_rate": 5.175132709948249e-08, + "loss": 3.9709, + "step": 466000 + }, + { + "epoch": 5.18, + "learning_rate": 5.180685427448193e-08, + "loss": 3.9634, + "step": 466500 + }, + { + "epoch": 5.19, + "learning_rate": 5.186238144948137e-08, + "loss": 3.9807, + "step": 467000 + }, + { + "epoch": 5.19, + "learning_rate": 5.191790862448082e-08, + "loss": 3.9807, + "step": 467500 + }, + { + "epoch": 5.2, + "learning_rate": 5.197343579948026e-08, + "loss": 3.9588, + "step": 468000 + }, + { + "epoch": 5.2, + "learning_rate": 5.2028962974479705e-08, + "loss": 3.9785, + "step": 468500 + }, + { + "epoch": 5.21, + "learning_rate": 5.2084490149479156e-08, + "loss": 3.9755, + "step": 469000 + }, + { + "epoch": 5.21, + "learning_rate": 5.21400173244786e-08, + "loss": 3.9691, + "step": 469500 + }, + { + "epoch": 5.22, + "learning_rate": 5.219554449947804e-08, + "loss": 3.9581, + "step": 470000 + }, + { + "epoch": 5.23, + "learning_rate": 5.225107167447749e-08, + "loss": 3.9749, + "step": 470500 + }, + { + "epoch": 5.23, + "learning_rate": 5.230659884947693e-08, + "loss": 3.9744, + "step": 471000 + }, + { + "epoch": 5.24, + "learning_rate": 5.236212602447637e-08, + "loss": 3.9533, + "step": 471500 + }, + { + "epoch": 5.24, + "learning_rate": 5.241765319947582e-08, + "loss": 3.9659, + "step": 472000 + }, + { + "epoch": 5.25, + "learning_rate": 5.2473180374475265e-08, + "loss": 3.9525, + "step": 472500 + }, + { + "epoch": 5.25, + "learning_rate": 5.2528707549474716e-08, + "loss": 3.9506, + "step": 473000 + }, + { + "epoch": 5.26, + "learning_rate": 5.2584234724474153e-08, + "loss": 3.9397, + "step": 473500 + }, + { + "epoch": 5.26, + "learning_rate": 5.26397618994736e-08, + "loss": 3.9579, + "step": 474000 + }, + { + "epoch": 5.27, + "learning_rate": 5.269528907447305e-08, + "loss": 3.9607, + "step": 474500 + }, + { + "epoch": 5.28, + "learning_rate": 5.2750816249472486e-08, + "loss": 3.9663, + "step": 475000 + }, + { + "epoch": 5.28, + "learning_rate": 5.280634342447193e-08, + "loss": 3.9651, + "step": 475500 + }, + { + "epoch": 5.29, + "learning_rate": 5.286187059947138e-08, + "loss": 3.953, + "step": 476000 + }, + { + "epoch": 5.29, + "learning_rate": 5.291739777447083e-08, + "loss": 3.9471, + "step": 476500 + }, + { + "epoch": 5.3, + "learning_rate": 5.297292494947026e-08, + "loss": 3.9738, + "step": 477000 + }, + { + "epoch": 5.3, + "learning_rate": 5.3028452124469713e-08, + "loss": 3.9717, + "step": 477500 + }, + { + "epoch": 5.31, + "learning_rate": 5.3083979299469164e-08, + "loss": 3.9548, + "step": 478000 + }, + { + "epoch": 5.31, + "learning_rate": 5.3139506474468595e-08, + "loss": 3.9373, + "step": 478500 + }, + { + "epoch": 5.32, + "learning_rate": 5.3195033649468046e-08, + "loss": 3.9567, + "step": 479000 + }, + { + "epoch": 5.33, + "learning_rate": 5.32505608244675e-08, + "loss": 3.9676, + "step": 479500 + }, + { + "epoch": 5.33, + "learning_rate": 5.330608799946694e-08, + "loss": 3.9717, + "step": 480000 + }, + { + "epoch": 5.34, + "learning_rate": 5.336161517446638e-08, + "loss": 3.957, + "step": 480500 + }, + { + "epoch": 5.34, + "learning_rate": 5.341714234946583e-08, + "loss": 3.9736, + "step": 481000 + }, + { + "epoch": 5.35, + "learning_rate": 5.3472669524465273e-08, + "loss": 3.9484, + "step": 481500 + }, + { + "epoch": 5.35, + "learning_rate": 5.352819669946471e-08, + "loss": 3.9439, + "step": 482000 + }, + { + "epoch": 5.36, + "learning_rate": 5.358372387446416e-08, + "loss": 3.9685, + "step": 482500 + }, + { + "epoch": 5.36, + "learning_rate": 5.3639251049463606e-08, + "loss": 3.9692, + "step": 483000 + }, + { + "epoch": 5.37, + "learning_rate": 5.3694778224463044e-08, + "loss": 3.9468, + "step": 483500 + }, + { + "epoch": 5.38, + "learning_rate": 5.3750305399462494e-08, + "loss": 3.947, + "step": 484000 + }, + { + "epoch": 5.38, + "learning_rate": 5.380583257446194e-08, + "loss": 3.9577, + "step": 484500 + }, + { + "epoch": 5.39, + "learning_rate": 5.386135974946139e-08, + "loss": 3.9634, + "step": 485000 + }, + { + "epoch": 5.39, + "learning_rate": 5.391688692446083e-08, + "loss": 3.9416, + "step": 485500 + }, + { + "epoch": 5.4, + "learning_rate": 5.397241409946027e-08, + "loss": 3.962, + "step": 486000 + }, + { + "epoch": 5.4, + "learning_rate": 5.402794127445972e-08, + "loss": 3.9677, + "step": 486500 + }, + { + "epoch": 5.41, + "learning_rate": 5.408346844945916e-08, + "loss": 3.9538, + "step": 487000 + }, + { + "epoch": 5.41, + "learning_rate": 5.4138995624458604e-08, + "loss": 3.9689, + "step": 487500 + }, + { + "epoch": 5.42, + "learning_rate": 5.4194522799458054e-08, + "loss": 3.9649, + "step": 488000 + }, + { + "epoch": 5.43, + "learning_rate": 5.42500499744575e-08, + "loss": 3.956, + "step": 488500 + }, + { + "epoch": 5.43, + "learning_rate": 5.4305577149456936e-08, + "loss": 3.964, + "step": 489000 + }, + { + "epoch": 5.44, + "learning_rate": 5.436110432445639e-08, + "loss": 3.9513, + "step": 489500 + }, + { + "epoch": 5.44, + "learning_rate": 5.441663149945583e-08, + "loss": 3.9641, + "step": 490000 + }, + { + "epoch": 5.45, + "learning_rate": 5.447215867445527e-08, + "loss": 3.9624, + "step": 490500 + }, + { + "epoch": 5.45, + "learning_rate": 5.452768584945472e-08, + "loss": 3.9448, + "step": 491000 + }, + { + "epoch": 5.46, + "learning_rate": 5.4583213024454164e-08, + "loss": 3.9784, + "step": 491500 + }, + { + "epoch": 5.46, + "learning_rate": 5.4638740199453614e-08, + "loss": 3.9588, + "step": 492000 + }, + { + "epoch": 5.47, + "learning_rate": 5.469426737445305e-08, + "loss": 3.9761, + "step": 492500 + }, + { + "epoch": 5.47, + "learning_rate": 5.4749794549452496e-08, + "loss": 3.9593, + "step": 493000 + }, + { + "epoch": 5.48, + "learning_rate": 5.480532172445195e-08, + "loss": 3.9539, + "step": 493500 + }, + { + "epoch": 5.49, + "learning_rate": 5.4860848899451385e-08, + "loss": 3.9435, + "step": 494000 + }, + { + "epoch": 5.49, + "learning_rate": 5.491637607445083e-08, + "loss": 3.9597, + "step": 494500 + }, + { + "epoch": 5.5, + "learning_rate": 5.497190324945028e-08, + "loss": 3.962, + "step": 495000 + }, + { + "epoch": 5.5, + "learning_rate": 5.502743042444973e-08, + "loss": 3.9746, + "step": 495500 + }, + { + "epoch": 5.51, + "learning_rate": 5.508295759944917e-08, + "loss": 3.9431, + "step": 496000 + }, + { + "epoch": 5.51, + "learning_rate": 5.513848477444861e-08, + "loss": 3.948, + "step": 496500 + }, + { + "epoch": 5.52, + "learning_rate": 5.519401194944806e-08, + "loss": 3.9546, + "step": 497000 + }, + { + "epoch": 5.52, + "learning_rate": 5.52495391244475e-08, + "loss": 3.9528, + "step": 497500 + }, + { + "epoch": 5.53, + "learning_rate": 5.5305066299446945e-08, + "loss": 3.9582, + "step": 498000 + }, + { + "epoch": 5.54, + "learning_rate": 5.5360593474446395e-08, + "loss": 3.9466, + "step": 498500 + }, + { + "epoch": 5.54, + "learning_rate": 5.541612064944584e-08, + "loss": 3.9822, + "step": 499000 + }, + { + "epoch": 5.55, + "learning_rate": 5.547164782444528e-08, + "loss": 3.9714, + "step": 499500 + }, + { + "epoch": 5.55, + "learning_rate": 5.552717499944473e-08, + "loss": 3.9581, + "step": 500000 + }, + { + "epoch": 5.56, + "learning_rate": 5.558270217444417e-08, + "loss": 3.9612, + "step": 500500 + }, + { + "epoch": 5.56, + "learning_rate": 5.563822934944361e-08, + "loss": 3.9731, + "step": 501000 + }, + { + "epoch": 5.57, + "learning_rate": 5.569375652444306e-08, + "loss": 3.9718, + "step": 501500 + }, + { + "epoch": 5.57, + "learning_rate": 5.5749283699442505e-08, + "loss": 3.9534, + "step": 502000 + }, + { + "epoch": 5.58, + "learning_rate": 5.5804810874441955e-08, + "loss": 3.9642, + "step": 502500 + }, + { + "epoch": 5.59, + "learning_rate": 5.586033804944139e-08, + "loss": 3.9509, + "step": 503000 + }, + { + "epoch": 5.59, + "learning_rate": 5.591586522444084e-08, + "loss": 3.9404, + "step": 503500 + }, + { + "epoch": 5.6, + "learning_rate": 5.597139239944029e-08, + "loss": 3.941, + "step": 504000 + }, + { + "epoch": 5.6, + "learning_rate": 5.6026919574439726e-08, + "loss": 3.9572, + "step": 504500 + }, + { + "epoch": 5.61, + "learning_rate": 5.608244674943917e-08, + "loss": 3.9336, + "step": 505000 + }, + { + "epoch": 5.61, + "learning_rate": 5.613797392443862e-08, + "loss": 3.9493, + "step": 505500 + }, + { + "epoch": 5.62, + "learning_rate": 5.6193501099438065e-08, + "loss": 3.9511, + "step": 506000 + }, + { + "epoch": 5.62, + "learning_rate": 5.62490282744375e-08, + "loss": 3.937, + "step": 506500 + }, + { + "epoch": 5.63, + "learning_rate": 5.630455544943695e-08, + "loss": 3.9533, + "step": 507000 + }, + { + "epoch": 5.64, + "learning_rate": 5.63600826244364e-08, + "loss": 3.9673, + "step": 507500 + }, + { + "epoch": 5.64, + "learning_rate": 5.6415609799435835e-08, + "loss": 3.9642, + "step": 508000 + }, + { + "epoch": 5.65, + "learning_rate": 5.6471136974435286e-08, + "loss": 3.9343, + "step": 508500 + }, + { + "epoch": 5.65, + "learning_rate": 5.652666414943473e-08, + "loss": 3.9659, + "step": 509000 + }, + { + "epoch": 5.66, + "learning_rate": 5.658219132443418e-08, + "loss": 3.9633, + "step": 509500 + }, + { + "epoch": 5.66, + "learning_rate": 5.663771849943362e-08, + "loss": 3.957, + "step": 510000 + }, + { + "epoch": 5.67, + "learning_rate": 5.669324567443306e-08, + "loss": 3.9357, + "step": 510500 + }, + { + "epoch": 5.67, + "learning_rate": 5.674877284943251e-08, + "loss": 3.9515, + "step": 511000 + }, + { + "epoch": 5.68, + "learning_rate": 5.680430002443195e-08, + "loss": 3.9693, + "step": 511500 + }, + { + "epoch": 5.69, + "learning_rate": 5.68598271994314e-08, + "loss": 3.9498, + "step": 512000 + }, + { + "epoch": 5.69, + "learning_rate": 5.6915354374430846e-08, + "loss": 3.9483, + "step": 512500 + }, + { + "epoch": 5.7, + "learning_rate": 5.6970881549430296e-08, + "loss": 3.96, + "step": 513000 + }, + { + "epoch": 5.7, + "learning_rate": 5.7026408724429734e-08, + "loss": 3.9513, + "step": 513500 + }, + { + "epoch": 5.71, + "learning_rate": 5.708193589942918e-08, + "loss": 3.94, + "step": 514000 + }, + { + "epoch": 5.71, + "learning_rate": 5.713746307442863e-08, + "loss": 3.9433, + "step": 514500 + }, + { + "epoch": 5.72, + "learning_rate": 5.7192990249428067e-08, + "loss": 3.938, + "step": 515000 + }, + { + "epoch": 5.72, + "learning_rate": 5.724851742442751e-08, + "loss": 3.944, + "step": 515500 + }, + { + "epoch": 5.73, + "learning_rate": 5.730404459942696e-08, + "loss": 3.9501, + "step": 516000 + }, + { + "epoch": 5.74, + "learning_rate": 5.73595717744264e-08, + "loss": 3.954, + "step": 516500 + }, + { + "epoch": 5.74, + "learning_rate": 5.741509894942584e-08, + "loss": 3.9366, + "step": 517000 + }, + { + "epoch": 5.75, + "learning_rate": 5.7470626124425294e-08, + "loss": 3.9513, + "step": 517500 + }, + { + "epoch": 5.75, + "learning_rate": 5.752615329942474e-08, + "loss": 3.9388, + "step": 518000 + }, + { + "epoch": 5.76, + "learning_rate": 5.7581680474424176e-08, + "loss": 3.9471, + "step": 518500 + }, + { + "epoch": 5.76, + "learning_rate": 5.7637207649423627e-08, + "loss": 3.9411, + "step": 519000 + }, + { + "epoch": 5.77, + "learning_rate": 5.769273482442307e-08, + "loss": 3.9785, + "step": 519500 + }, + { + "epoch": 5.77, + "learning_rate": 5.774826199942251e-08, + "loss": 3.9397, + "step": 520000 + }, + { + "epoch": 5.78, + "learning_rate": 5.780378917442196e-08, + "loss": 3.9626, + "step": 520500 + }, + { + "epoch": 5.79, + "learning_rate": 5.78593163494214e-08, + "loss": 3.9436, + "step": 521000 + }, + { + "epoch": 5.79, + "learning_rate": 5.7914843524420854e-08, + "loss": 3.9672, + "step": 521500 + }, + { + "epoch": 5.8, + "learning_rate": 5.797037069942029e-08, + "loss": 3.949, + "step": 522000 + }, + { + "epoch": 5.8, + "learning_rate": 5.8025897874419736e-08, + "loss": 3.9333, + "step": 522500 + }, + { + "epoch": 5.81, + "learning_rate": 5.8081425049419187e-08, + "loss": 3.963, + "step": 523000 + }, + { + "epoch": 5.81, + "learning_rate": 5.8136952224418624e-08, + "loss": 3.9595, + "step": 523500 + }, + { + "epoch": 5.82, + "learning_rate": 5.819247939941807e-08, + "loss": 3.9429, + "step": 524000 + }, + { + "epoch": 5.82, + "learning_rate": 5.824800657441752e-08, + "loss": 3.9546, + "step": 524500 + }, + { + "epoch": 5.83, + "learning_rate": 5.8303533749416963e-08, + "loss": 3.935, + "step": 525000 + }, + { + "epoch": 5.84, + "learning_rate": 5.83590609244164e-08, + "loss": 3.9467, + "step": 525500 + }, + { + "epoch": 5.84, + "learning_rate": 5.841458809941585e-08, + "loss": 3.9507, + "step": 526000 + }, + { + "epoch": 5.85, + "learning_rate": 5.84701152744153e-08, + "loss": 3.9562, + "step": 526500 + }, + { + "epoch": 5.85, + "learning_rate": 5.8525642449414733e-08, + "loss": 3.9703, + "step": 527000 + }, + { + "epoch": 5.86, + "learning_rate": 5.8581169624414184e-08, + "loss": 3.9301, + "step": 527500 + }, + { + "epoch": 5.86, + "learning_rate": 5.8636696799413635e-08, + "loss": 3.9348, + "step": 528000 + }, + { + "epoch": 5.87, + "learning_rate": 5.869222397441308e-08, + "loss": 3.9399, + "step": 528500 + }, + { + "epoch": 5.87, + "learning_rate": 5.874775114941252e-08, + "loss": 3.967, + "step": 529000 + }, + { + "epoch": 5.88, + "learning_rate": 5.880327832441197e-08, + "loss": 3.9575, + "step": 529500 + }, + { + "epoch": 5.89, + "learning_rate": 5.885880549941141e-08, + "loss": 3.9382, + "step": 530000 + }, + { + "epoch": 5.89, + "learning_rate": 5.891433267441085e-08, + "loss": 3.9322, + "step": 530500 + }, + { + "epoch": 5.9, + "learning_rate": 5.89698598494103e-08, + "loss": 3.9357, + "step": 531000 + }, + { + "epoch": 5.9, + "learning_rate": 5.9025387024409744e-08, + "loss": 3.9619, + "step": 531500 + }, + { + "epoch": 5.91, + "learning_rate": 5.9080914199409195e-08, + "loss": 3.9453, + "step": 532000 + }, + { + "epoch": 5.91, + "learning_rate": 5.913644137440863e-08, + "loss": 3.9507, + "step": 532500 + }, + { + "epoch": 5.92, + "learning_rate": 5.919196854940808e-08, + "loss": 3.9565, + "step": 533000 + }, + { + "epoch": 5.92, + "learning_rate": 5.924749572440753e-08, + "loss": 3.9495, + "step": 533500 + }, + { + "epoch": 5.93, + "learning_rate": 5.9303022899406965e-08, + "loss": 3.9427, + "step": 534000 + }, + { + "epoch": 5.94, + "learning_rate": 5.935855007440641e-08, + "loss": 3.9472, + "step": 534500 + }, + { + "epoch": 5.94, + "learning_rate": 5.941407724940586e-08, + "loss": 3.9472, + "step": 535000 + }, + { + "epoch": 5.95, + "learning_rate": 5.9469604424405304e-08, + "loss": 3.9411, + "step": 535500 + }, + { + "epoch": 5.95, + "learning_rate": 5.952513159940474e-08, + "loss": 3.9418, + "step": 536000 + }, + { + "epoch": 5.96, + "learning_rate": 5.958065877440419e-08, + "loss": 3.9293, + "step": 536500 + }, + { + "epoch": 5.96, + "learning_rate": 5.963618594940364e-08, + "loss": 3.9538, + "step": 537000 + }, + { + "epoch": 5.97, + "learning_rate": 5.969171312440307e-08, + "loss": 3.9491, + "step": 537500 + }, + { + "epoch": 5.97, + "learning_rate": 5.974724029940252e-08, + "loss": 3.9373, + "step": 538000 + }, + { + "epoch": 5.98, + "learning_rate": 5.980276747440198e-08, + "loss": 3.9537, + "step": 538500 + }, + { + "epoch": 5.99, + "learning_rate": 5.985829464940142e-08, + "loss": 3.9417, + "step": 539000 + }, + { + "epoch": 5.99, + "learning_rate": 5.991382182440085e-08, + "loss": 3.9653, + "step": 539500 + }, + { + "epoch": 6.0, + "learning_rate": 5.996934899940031e-08, + "loss": 3.9557, + "step": 540000 + }, + { + "epoch": 6.0, + "eval_loss": 3.9568018913269043, + "eval_runtime": 6.3151, + "eval_samples_per_second": 246.077, + "step": 540276 + }, + { + "epoch": 6.0, + "learning_rate": 6.002487617439975e-08, + "loss": 3.9361, + "step": 540500 + }, + { + "epoch": 6.01, + "learning_rate": 6.008040334939918e-08, + "loss": 3.945, + "step": 541000 + }, + { + "epoch": 6.01, + "learning_rate": 6.013593052439864e-08, + "loss": 3.9389, + "step": 541500 + }, + { + "epoch": 6.02, + "learning_rate": 6.019145769939809e-08, + "loss": 3.9583, + "step": 542000 + }, + { + "epoch": 6.02, + "learning_rate": 6.024698487439753e-08, + "loss": 3.9412, + "step": 542500 + }, + { + "epoch": 6.03, + "learning_rate": 6.030251204939697e-08, + "loss": 3.9477, + "step": 543000 + }, + { + "epoch": 6.04, + "learning_rate": 6.035803922439642e-08, + "loss": 3.9518, + "step": 543500 + }, + { + "epoch": 6.04, + "learning_rate": 6.041356639939586e-08, + "loss": 3.9443, + "step": 544000 + }, + { + "epoch": 6.05, + "learning_rate": 6.04690935743953e-08, + "loss": 3.9274, + "step": 544500 + }, + { + "epoch": 6.05, + "learning_rate": 6.052462074939475e-08, + "loss": 3.9232, + "step": 545000 + }, + { + "epoch": 6.06, + "learning_rate": 6.05801479243942e-08, + "loss": 3.9389, + "step": 545500 + }, + { + "epoch": 6.06, + "learning_rate": 6.063567509939364e-08, + "loss": 3.9506, + "step": 546000 + }, + { + "epoch": 6.07, + "learning_rate": 6.069120227439308e-08, + "loss": 3.9553, + "step": 546500 + }, + { + "epoch": 6.07, + "learning_rate": 6.074672944939253e-08, + "loss": 3.9612, + "step": 547000 + }, + { + "epoch": 6.08, + "learning_rate": 6.080225662439198e-08, + "loss": 3.94, + "step": 547500 + }, + { + "epoch": 6.09, + "learning_rate": 6.085778379939142e-08, + "loss": 3.953, + "step": 548000 + }, + { + "epoch": 6.09, + "learning_rate": 6.091331097439086e-08, + "loss": 3.9462, + "step": 548500 + }, + { + "epoch": 6.1, + "learning_rate": 6.096883814939032e-08, + "loss": 3.9316, + "step": 549000 + }, + { + "epoch": 6.1, + "learning_rate": 6.102436532438975e-08, + "loss": 3.9408, + "step": 549500 + }, + { + "epoch": 6.11, + "learning_rate": 6.107989249938919e-08, + "loss": 3.9347, + "step": 550000 + }, + { + "epoch": 6.11, + "learning_rate": 6.113541967438865e-08, + "loss": 3.9506, + "step": 550500 + }, + { + "epoch": 6.12, + "learning_rate": 6.11909468493881e-08, + "loss": 3.9241, + "step": 551000 + }, + { + "epoch": 6.12, + "learning_rate": 6.124647402438752e-08, + "loss": 3.9242, + "step": 551500 + }, + { + "epoch": 6.13, + "learning_rate": 6.130200119938698e-08, + "loss": 3.941, + "step": 552000 + }, + { + "epoch": 6.14, + "learning_rate": 6.135752837438643e-08, + "loss": 3.9496, + "step": 552500 + }, + { + "epoch": 6.14, + "learning_rate": 6.141305554938586e-08, + "loss": 3.9441, + "step": 553000 + }, + { + "epoch": 6.15, + "learning_rate": 6.146858272438531e-08, + "loss": 3.9308, + "step": 553500 + }, + { + "epoch": 6.15, + "learning_rate": 6.152410989938476e-08, + "loss": 3.9358, + "step": 554000 + }, + { + "epoch": 6.16, + "learning_rate": 6.15796370743842e-08, + "loss": 3.9401, + "step": 554500 + }, + { + "epoch": 6.16, + "learning_rate": 6.163516424938365e-08, + "loss": 3.9411, + "step": 555000 + }, + { + "epoch": 6.17, + "learning_rate": 6.169069142438309e-08, + "loss": 3.9419, + "step": 555500 + }, + { + "epoch": 6.17, + "learning_rate": 6.174621859938254e-08, + "loss": 3.9487, + "step": 556000 + }, + { + "epoch": 6.18, + "learning_rate": 6.180174577438198e-08, + "loss": 3.952, + "step": 556500 + }, + { + "epoch": 6.19, + "learning_rate": 6.185727294938142e-08, + "loss": 3.9228, + "step": 557000 + }, + { + "epoch": 6.19, + "learning_rate": 6.191280012438087e-08, + "loss": 3.9396, + "step": 557500 + }, + { + "epoch": 6.2, + "learning_rate": 6.196832729938031e-08, + "loss": 3.9284, + "step": 558000 + }, + { + "epoch": 6.2, + "learning_rate": 6.202385447437976e-08, + "loss": 3.9472, + "step": 558500 + }, + { + "epoch": 6.21, + "learning_rate": 6.20793816493792e-08, + "loss": 3.9273, + "step": 559000 + }, + { + "epoch": 6.21, + "learning_rate": 6.213490882437864e-08, + "loss": 3.9503, + "step": 559500 + }, + { + "epoch": 6.22, + "learning_rate": 6.219043599937809e-08, + "loss": 3.9372, + "step": 560000 + }, + { + "epoch": 6.22, + "learning_rate": 6.224596317437753e-08, + "loss": 3.9395, + "step": 560500 + }, + { + "epoch": 6.23, + "learning_rate": 6.230149034937698e-08, + "loss": 3.9382, + "step": 561000 + }, + { + "epoch": 6.24, + "learning_rate": 6.235701752437643e-08, + "loss": 3.9413, + "step": 561500 + }, + { + "epoch": 6.24, + "learning_rate": 6.241254469937587e-08, + "loss": 3.9435, + "step": 562000 + }, + { + "epoch": 6.25, + "learning_rate": 6.246807187437532e-08, + "loss": 3.9339, + "step": 562500 + }, + { + "epoch": 6.25, + "learning_rate": 6.252359904937477e-08, + "loss": 3.9205, + "step": 563000 + }, + { + "epoch": 6.26, + "learning_rate": 6.25791262243742e-08, + "loss": 3.9363, + "step": 563500 + }, + { + "epoch": 6.26, + "learning_rate": 6.263465339937366e-08, + "loss": 3.9355, + "step": 564000 + }, + { + "epoch": 6.27, + "learning_rate": 6.26901805743731e-08, + "loss": 3.9259, + "step": 564500 + }, + { + "epoch": 6.27, + "learning_rate": 6.274570774937254e-08, + "loss": 3.9428, + "step": 565000 + }, + { + "epoch": 6.28, + "learning_rate": 6.280123492437199e-08, + "loss": 3.9435, + "step": 565500 + }, + { + "epoch": 6.29, + "learning_rate": 6.285676209937143e-08, + "loss": 3.9427, + "step": 566000 + }, + { + "epoch": 6.29, + "learning_rate": 6.291228927437088e-08, + "loss": 3.9305, + "step": 566500 + }, + { + "epoch": 6.3, + "learning_rate": 6.296781644937032e-08, + "loss": 3.9313, + "step": 567000 + }, + { + "epoch": 6.3, + "learning_rate": 6.302334362436976e-08, + "loss": 3.9337, + "step": 567500 + }, + { + "epoch": 6.31, + "learning_rate": 6.307887079936921e-08, + "loss": 3.926, + "step": 568000 + }, + { + "epoch": 6.31, + "learning_rate": 6.313439797436865e-08, + "loss": 3.9373, + "step": 568500 + }, + { + "epoch": 6.32, + "learning_rate": 6.31899251493681e-08, + "loss": 3.9312, + "step": 569000 + }, + { + "epoch": 6.32, + "learning_rate": 6.324545232436754e-08, + "loss": 3.9607, + "step": 569500 + }, + { + "epoch": 6.33, + "learning_rate": 6.330097949936699e-08, + "loss": 3.9486, + "step": 570000 + }, + { + "epoch": 6.34, + "learning_rate": 6.335650667436643e-08, + "loss": 3.9454, + "step": 570500 + }, + { + "epoch": 6.34, + "learning_rate": 6.341203384936587e-08, + "loss": 3.9454, + "step": 571000 + }, + { + "epoch": 6.35, + "learning_rate": 6.346756102436532e-08, + "loss": 3.9355, + "step": 571500 + }, + { + "epoch": 6.35, + "learning_rate": 6.352308819936478e-08, + "loss": 3.9447, + "step": 572000 + }, + { + "epoch": 6.36, + "learning_rate": 6.357861537436421e-08, + "loss": 3.946, + "step": 572500 + }, + { + "epoch": 6.36, + "learning_rate": 6.363414254936365e-08, + "loss": 3.9447, + "step": 573000 + }, + { + "epoch": 6.37, + "learning_rate": 6.368966972436311e-08, + "loss": 3.9245, + "step": 573500 + }, + { + "epoch": 6.37, + "learning_rate": 6.374519689936254e-08, + "loss": 3.9139, + "step": 574000 + }, + { + "epoch": 6.38, + "learning_rate": 6.380072407436198e-08, + "loss": 3.9303, + "step": 574500 + }, + { + "epoch": 6.39, + "learning_rate": 6.385625124936144e-08, + "loss": 3.9308, + "step": 575000 + }, + { + "epoch": 6.39, + "learning_rate": 6.391177842436088e-08, + "loss": 3.9342, + "step": 575500 + }, + { + "epoch": 6.4, + "learning_rate": 6.396730559936032e-08, + "loss": 3.9615, + "step": 576000 + }, + { + "epoch": 6.4, + "learning_rate": 6.402283277435977e-08, + "loss": 3.9385, + "step": 576500 + }, + { + "epoch": 6.41, + "learning_rate": 6.407835994935922e-08, + "loss": 3.9267, + "step": 577000 + }, + { + "epoch": 6.41, + "learning_rate": 6.413388712435865e-08, + "loss": 3.9361, + "step": 577500 + }, + { + "epoch": 6.42, + "learning_rate": 6.41894142993581e-08, + "loss": 3.9193, + "step": 578000 + }, + { + "epoch": 6.42, + "learning_rate": 6.424494147435755e-08, + "loss": 3.9314, + "step": 578500 + }, + { + "epoch": 6.43, + "learning_rate": 6.430046864935698e-08, + "loss": 3.9402, + "step": 579000 + }, + { + "epoch": 6.44, + "learning_rate": 6.435599582435644e-08, + "loss": 3.9199, + "step": 579500 + }, + { + "epoch": 6.44, + "learning_rate": 6.441152299935588e-08, + "loss": 3.9192, + "step": 580000 + }, + { + "epoch": 6.45, + "learning_rate": 6.446705017435533e-08, + "loss": 3.9321, + "step": 580500 + }, + { + "epoch": 6.45, + "learning_rate": 6.452257734935477e-08, + "loss": 3.937, + "step": 581000 + }, + { + "epoch": 6.46, + "learning_rate": 6.457810452435422e-08, + "loss": 3.9252, + "step": 581500 + }, + { + "epoch": 6.46, + "learning_rate": 6.463363169935366e-08, + "loss": 3.9259, + "step": 582000 + }, + { + "epoch": 6.47, + "learning_rate": 6.46891588743531e-08, + "loss": 3.9177, + "step": 582500 + }, + { + "epoch": 6.47, + "learning_rate": 6.474468604935255e-08, + "loss": 3.9188, + "step": 583000 + }, + { + "epoch": 6.48, + "learning_rate": 6.480021322435199e-08, + "loss": 3.9307, + "step": 583500 + }, + { + "epoch": 6.49, + "learning_rate": 6.485574039935145e-08, + "loss": 3.9509, + "step": 584000 + }, + { + "epoch": 6.49, + "learning_rate": 6.491126757435088e-08, + "loss": 3.9356, + "step": 584500 + }, + { + "epoch": 6.5, + "learning_rate": 6.496679474935032e-08, + "loss": 3.9305, + "step": 585000 + }, + { + "epoch": 6.5, + "learning_rate": 6.502232192434978e-08, + "loss": 3.9323, + "step": 585500 + }, + { + "epoch": 6.51, + "learning_rate": 6.507784909934921e-08, + "loss": 3.9217, + "step": 586000 + }, + { + "epoch": 6.51, + "learning_rate": 6.513337627434866e-08, + "loss": 3.9264, + "step": 586500 + }, + { + "epoch": 6.52, + "learning_rate": 6.518890344934811e-08, + "loss": 3.9398, + "step": 587000 + }, + { + "epoch": 6.52, + "learning_rate": 6.524443062434756e-08, + "loss": 3.9205, + "step": 587500 + }, + { + "epoch": 6.53, + "learning_rate": 6.529995779934699e-08, + "loss": 3.937, + "step": 588000 + }, + { + "epoch": 6.54, + "learning_rate": 6.535548497434645e-08, + "loss": 3.9312, + "step": 588500 + }, + { + "epoch": 6.54, + "learning_rate": 6.541101214934589e-08, + "loss": 3.9102, + "step": 589000 + }, + { + "epoch": 6.55, + "learning_rate": 6.546653932434532e-08, + "loss": 3.9319, + "step": 589500 + }, + { + "epoch": 6.55, + "learning_rate": 6.552206649934478e-08, + "loss": 3.9355, + "step": 590000 + }, + { + "epoch": 6.56, + "learning_rate": 6.557759367434422e-08, + "loss": 3.9361, + "step": 590500 + }, + { + "epoch": 6.56, + "learning_rate": 6.563312084934367e-08, + "loss": 3.9254, + "step": 591000 + }, + { + "epoch": 6.57, + "learning_rate": 6.568864802434311e-08, + "loss": 3.9286, + "step": 591500 + }, + { + "epoch": 6.57, + "learning_rate": 6.574417519934256e-08, + "loss": 3.9225, + "step": 592000 + }, + { + "epoch": 6.58, + "learning_rate": 6.5799702374342e-08, + "loss": 3.9242, + "step": 592500 + }, + { + "epoch": 6.59, + "learning_rate": 6.585522954934144e-08, + "loss": 3.9423, + "step": 593000 + }, + { + "epoch": 6.59, + "learning_rate": 6.591075672434089e-08, + "loss": 3.9053, + "step": 593500 + }, + { + "epoch": 6.6, + "learning_rate": 6.596628389934033e-08, + "loss": 3.9128, + "step": 594000 + }, + { + "epoch": 6.6, + "learning_rate": 6.602181107433979e-08, + "loss": 3.9311, + "step": 594500 + }, + { + "epoch": 6.61, + "learning_rate": 6.607733824933922e-08, + "loss": 3.9216, + "step": 595000 + }, + { + "epoch": 6.61, + "learning_rate": 6.613286542433867e-08, + "loss": 3.9226, + "step": 595500 + }, + { + "epoch": 6.62, + "learning_rate": 6.618839259933812e-08, + "loss": 3.929, + "step": 596000 + }, + { + "epoch": 6.62, + "learning_rate": 6.624391977433755e-08, + "loss": 3.9284, + "step": 596500 + }, + { + "epoch": 6.63, + "learning_rate": 6.6299446949337e-08, + "loss": 3.9281, + "step": 597000 + }, + { + "epoch": 6.64, + "learning_rate": 6.635497412433646e-08, + "loss": 3.9203, + "step": 597500 + }, + { + "epoch": 6.64, + "learning_rate": 6.64105012993359e-08, + "loss": 3.9338, + "step": 598000 + }, + { + "epoch": 6.65, + "learning_rate": 6.646602847433533e-08, + "loss": 3.9226, + "step": 598500 + }, + { + "epoch": 6.65, + "learning_rate": 6.652155564933479e-08, + "loss": 3.9085, + "step": 599000 + }, + { + "epoch": 6.66, + "learning_rate": 6.657708282433423e-08, + "loss": 3.9467, + "step": 599500 + }, + { + "epoch": 6.66, + "learning_rate": 6.663260999933366e-08, + "loss": 3.9172, + "step": 600000 + }, + { + "epoch": 6.67, + "learning_rate": 6.668813717433312e-08, + "loss": 3.932, + "step": 600500 + }, + { + "epoch": 6.67, + "learning_rate": 6.674366434933256e-08, + "loss": 3.9004, + "step": 601000 + }, + { + "epoch": 6.68, + "learning_rate": 6.679919152433201e-08, + "loss": 3.916, + "step": 601500 + }, + { + "epoch": 6.69, + "learning_rate": 6.685471869933145e-08, + "loss": 3.9365, + "step": 602000 + }, + { + "epoch": 6.69, + "learning_rate": 6.69102458743309e-08, + "loss": 3.9196, + "step": 602500 + }, + { + "epoch": 6.7, + "learning_rate": 6.696577304933034e-08, + "loss": 3.9324, + "step": 603000 + }, + { + "epoch": 6.7, + "learning_rate": 6.702130022432979e-08, + "loss": 3.9126, + "step": 603500 + }, + { + "epoch": 6.71, + "learning_rate": 6.707682739932923e-08, + "loss": 3.9314, + "step": 604000 + }, + { + "epoch": 6.71, + "learning_rate": 6.713235457432867e-08, + "loss": 3.9297, + "step": 604500 + }, + { + "epoch": 6.72, + "learning_rate": 6.718788174932812e-08, + "loss": 3.9211, + "step": 605000 + }, + { + "epoch": 6.72, + "learning_rate": 6.724340892432756e-08, + "loss": 3.9295, + "step": 605500 + }, + { + "epoch": 6.73, + "learning_rate": 6.7298936099327e-08, + "loss": 3.9194, + "step": 606000 + }, + { + "epoch": 6.74, + "learning_rate": 6.735446327432645e-08, + "loss": 3.9063, + "step": 606500 + }, + { + "epoch": 6.74, + "learning_rate": 6.74099904493259e-08, + "loss": 3.9325, + "step": 607000 + }, + { + "epoch": 6.75, + "learning_rate": 6.746551762432534e-08, + "loss": 3.9129, + "step": 607500 + }, + { + "epoch": 6.75, + "learning_rate": 6.752104479932478e-08, + "loss": 3.922, + "step": 608000 + }, + { + "epoch": 6.76, + "learning_rate": 6.757657197432423e-08, + "loss": 3.937, + "step": 608500 + }, + { + "epoch": 6.76, + "learning_rate": 6.763209914932367e-08, + "loss": 3.9429, + "step": 609000 + }, + { + "epoch": 6.77, + "learning_rate": 6.768762632432312e-08, + "loss": 3.9045, + "step": 609500 + }, + { + "epoch": 6.77, + "learning_rate": 6.774315349932257e-08, + "loss": 3.9464, + "step": 610000 + }, + { + "epoch": 6.78, + "learning_rate": 6.7798680674322e-08, + "loss": 3.9216, + "step": 610500 + }, + { + "epoch": 6.79, + "learning_rate": 6.785420784932145e-08, + "loss": 3.9371, + "step": 611000 + }, + { + "epoch": 6.79, + "learning_rate": 6.79097350243209e-08, + "loss": 3.9265, + "step": 611500 + }, + { + "epoch": 6.8, + "learning_rate": 6.796526219932034e-08, + "loss": 3.938, + "step": 612000 + }, + { + "epoch": 6.8, + "learning_rate": 6.802078937431978e-08, + "loss": 3.9481, + "step": 612500 + }, + { + "epoch": 6.81, + "learning_rate": 6.807631654931924e-08, + "loss": 3.9402, + "step": 613000 + }, + { + "epoch": 6.81, + "learning_rate": 6.813184372431868e-08, + "loss": 3.9236, + "step": 613500 + }, + { + "epoch": 6.82, + "learning_rate": 6.818737089931811e-08, + "loss": 3.9269, + "step": 614000 + }, + { + "epoch": 6.82, + "learning_rate": 6.824289807431757e-08, + "loss": 3.9182, + "step": 614500 + }, + { + "epoch": 6.83, + "learning_rate": 6.829842524931701e-08, + "loss": 3.9493, + "step": 615000 + }, + { + "epoch": 6.84, + "learning_rate": 6.835395242431645e-08, + "loss": 3.9467, + "step": 615500 + }, + { + "epoch": 6.84, + "learning_rate": 6.84094795993159e-08, + "loss": 3.9133, + "step": 616000 + }, + { + "epoch": 6.85, + "learning_rate": 6.846500677431535e-08, + "loss": 3.9211, + "step": 616500 + }, + { + "epoch": 6.85, + "learning_rate": 6.852053394931479e-08, + "loss": 3.933, + "step": 617000 + }, + { + "epoch": 6.86, + "learning_rate": 6.857606112431424e-08, + "loss": 3.9194, + "step": 617500 + }, + { + "epoch": 6.86, + "learning_rate": 6.863158829931368e-08, + "loss": 3.9159, + "step": 618000 + }, + { + "epoch": 6.87, + "learning_rate": 6.868711547431312e-08, + "loss": 3.9272, + "step": 618500 + }, + { + "epoch": 6.87, + "learning_rate": 6.874264264931257e-08, + "loss": 3.9273, + "step": 619000 + }, + { + "epoch": 6.88, + "learning_rate": 6.879816982431201e-08, + "loss": 3.9304, + "step": 619500 + }, + { + "epoch": 6.89, + "learning_rate": 6.885369699931146e-08, + "loss": 3.9261, + "step": 620000 + }, + { + "epoch": 6.89, + "learning_rate": 6.890922417431091e-08, + "loss": 3.9158, + "step": 620500 + }, + { + "epoch": 6.9, + "learning_rate": 6.896475134931034e-08, + "loss": 3.929, + "step": 621000 + }, + { + "epoch": 6.9, + "learning_rate": 6.902027852430979e-08, + "loss": 3.916, + "step": 621500 + }, + { + "epoch": 6.91, + "learning_rate": 6.907580569930925e-08, + "loss": 3.9179, + "step": 622000 + }, + { + "epoch": 6.91, + "learning_rate": 6.913133287430868e-08, + "loss": 3.9178, + "step": 622500 + }, + { + "epoch": 6.92, + "learning_rate": 6.918686004930812e-08, + "loss": 3.9373, + "step": 623000 + }, + { + "epoch": 6.92, + "learning_rate": 6.924238722430758e-08, + "loss": 3.929, + "step": 623500 + }, + { + "epoch": 6.93, + "learning_rate": 6.929791439930702e-08, + "loss": 3.9087, + "step": 624000 + }, + { + "epoch": 6.94, + "learning_rate": 6.935344157430645e-08, + "loss": 3.9404, + "step": 624500 + }, + { + "epoch": 6.94, + "learning_rate": 6.940896874930591e-08, + "loss": 3.9256, + "step": 625000 + }, + { + "epoch": 6.95, + "learning_rate": 6.946449592430536e-08, + "loss": 3.9075, + "step": 625500 + }, + { + "epoch": 6.95, + "learning_rate": 6.952002309930479e-08, + "loss": 3.9193, + "step": 626000 + }, + { + "epoch": 6.96, + "learning_rate": 6.957555027430424e-08, + "loss": 3.9285, + "step": 626500 + }, + { + "epoch": 6.96, + "learning_rate": 6.963107744930369e-08, + "loss": 3.9006, + "step": 627000 + }, + { + "epoch": 6.97, + "learning_rate": 6.968660462430313e-08, + "loss": 3.9346, + "step": 627500 + }, + { + "epoch": 6.97, + "learning_rate": 6.974213179930258e-08, + "loss": 3.9115, + "step": 628000 + }, + { + "epoch": 6.98, + "learning_rate": 6.979765897430202e-08, + "loss": 3.9158, + "step": 628500 + }, + { + "epoch": 6.99, + "learning_rate": 6.985318614930146e-08, + "loss": 3.8951, + "step": 629000 + }, + { + "epoch": 6.99, + "learning_rate": 6.990871332430091e-08, + "loss": 3.9125, + "step": 629500 + }, + { + "epoch": 7.0, + "learning_rate": 6.996424049930035e-08, + "loss": 3.9119, + "step": 630000 + }, + { + "epoch": 7.0, + "eval_loss": 3.940045118331909, + "eval_runtime": 6.3095, + "eval_samples_per_second": 246.294, + "step": 630322 + }, + { + "epoch": 7.0, + "learning_rate": 7.00197676742998e-08, + "loss": 3.9333, + "step": 630500 + }, + { + "epoch": 7.01, + "learning_rate": 7.007529484929925e-08, + "loss": 3.9177, + "step": 631000 + }, + { + "epoch": 7.01, + "learning_rate": 7.013082202429869e-08, + "loss": 3.915, + "step": 631500 + }, + { + "epoch": 7.02, + "learning_rate": 7.018634919929813e-08, + "loss": 3.9332, + "step": 632000 + }, + { + "epoch": 7.02, + "learning_rate": 7.024187637429759e-08, + "loss": 3.9191, + "step": 632500 + }, + { + "epoch": 7.03, + "learning_rate": 7.029740354929702e-08, + "loss": 3.919, + "step": 633000 + }, + { + "epoch": 7.04, + "learning_rate": 7.035293072429646e-08, + "loss": 3.9161, + "step": 633500 + }, + { + "epoch": 7.04, + "learning_rate": 7.040845789929592e-08, + "loss": 3.9204, + "step": 634000 + }, + { + "epoch": 7.05, + "learning_rate": 7.046398507429536e-08, + "loss": 3.9058, + "step": 634500 + }, + { + "epoch": 7.05, + "learning_rate": 7.05195122492948e-08, + "loss": 3.9288, + "step": 635000 + }, + { + "epoch": 7.06, + "learning_rate": 7.057503942429425e-08, + "loss": 3.9139, + "step": 635500 + }, + { + "epoch": 7.06, + "learning_rate": 7.06305665992937e-08, + "loss": 3.9273, + "step": 636000 + }, + { + "epoch": 7.07, + "learning_rate": 7.068609377429313e-08, + "loss": 3.9258, + "step": 636500 + }, + { + "epoch": 7.07, + "learning_rate": 7.074162094929259e-08, + "loss": 3.9207, + "step": 637000 + }, + { + "epoch": 7.08, + "learning_rate": 7.079714812429203e-08, + "loss": 3.9064, + "step": 637500 + }, + { + "epoch": 7.09, + "learning_rate": 7.085267529929147e-08, + "loss": 3.9279, + "step": 638000 + }, + { + "epoch": 7.09, + "learning_rate": 7.090820247429092e-08, + "loss": 3.9297, + "step": 638500 + }, + { + "epoch": 7.1, + "learning_rate": 7.096372964929036e-08, + "loss": 3.9044, + "step": 639000 + }, + { + "epoch": 7.1, + "learning_rate": 7.10192568242898e-08, + "loss": 3.9123, + "step": 639500 + }, + { + "epoch": 7.11, + "learning_rate": 7.107478399928925e-08, + "loss": 3.8989, + "step": 640000 + }, + { + "epoch": 7.11, + "learning_rate": 7.11303111742887e-08, + "loss": 3.94, + "step": 640500 + }, + { + "epoch": 7.12, + "learning_rate": 7.118583834928814e-08, + "loss": 3.9098, + "step": 641000 + }, + { + "epoch": 7.12, + "learning_rate": 7.124136552428758e-08, + "loss": 3.8942, + "step": 641500 + }, + { + "epoch": 7.13, + "learning_rate": 7.129689269928703e-08, + "loss": 3.9097, + "step": 642000 + }, + { + "epoch": 7.14, + "learning_rate": 7.135241987428647e-08, + "loss": 3.9137, + "step": 642500 + }, + { + "epoch": 7.14, + "learning_rate": 7.140794704928592e-08, + "loss": 3.91, + "step": 643000 + }, + { + "epoch": 7.15, + "learning_rate": 7.146347422428536e-08, + "loss": 3.9204, + "step": 643500 + }, + { + "epoch": 7.15, + "learning_rate": 7.15190013992848e-08, + "loss": 3.9341, + "step": 644000 + }, + { + "epoch": 7.16, + "learning_rate": 7.157452857428425e-08, + "loss": 3.9228, + "step": 644500 + }, + { + "epoch": 7.16, + "learning_rate": 7.163005574928369e-08, + "loss": 3.9228, + "step": 645000 + }, + { + "epoch": 7.17, + "learning_rate": 7.168558292428314e-08, + "loss": 3.9125, + "step": 645500 + }, + { + "epoch": 7.17, + "learning_rate": 7.174111009928258e-08, + "loss": 3.9016, + "step": 646000 + }, + { + "epoch": 7.18, + "learning_rate": 7.179663727428204e-08, + "loss": 3.9306, + "step": 646500 + }, + { + "epoch": 7.19, + "learning_rate": 7.185216444928147e-08, + "loss": 3.9173, + "step": 647000 + }, + { + "epoch": 7.19, + "learning_rate": 7.190769162428093e-08, + "loss": 3.907, + "step": 647500 + }, + { + "epoch": 7.2, + "learning_rate": 7.196321879928037e-08, + "loss": 3.9289, + "step": 648000 + }, + { + "epoch": 7.2, + "learning_rate": 7.20187459742798e-08, + "loss": 3.9323, + "step": 648500 + }, + { + "epoch": 7.21, + "learning_rate": 7.207427314927926e-08, + "loss": 3.9233, + "step": 649000 + }, + { + "epoch": 7.21, + "learning_rate": 7.21298003242787e-08, + "loss": 3.9141, + "step": 649500 + }, + { + "epoch": 7.22, + "learning_rate": 7.218532749927815e-08, + "loss": 3.9246, + "step": 650000 + }, + { + "epoch": 7.22, + "learning_rate": 7.224085467427759e-08, + "loss": 3.9174, + "step": 650500 + }, + { + "epoch": 7.23, + "learning_rate": 7.229638184927704e-08, + "loss": 3.9108, + "step": 651000 + }, + { + "epoch": 7.24, + "learning_rate": 7.235190902427648e-08, + "loss": 3.9237, + "step": 651500 + }, + { + "epoch": 7.24, + "learning_rate": 7.240743619927592e-08, + "loss": 3.9005, + "step": 652000 + }, + { + "epoch": 7.25, + "learning_rate": 7.246296337427537e-08, + "loss": 3.9105, + "step": 652500 + }, + { + "epoch": 7.25, + "learning_rate": 7.251849054927481e-08, + "loss": 3.9028, + "step": 653000 + }, + { + "epoch": 7.26, + "learning_rate": 7.257401772427426e-08, + "loss": 3.9251, + "step": 653500 + }, + { + "epoch": 7.26, + "learning_rate": 7.26295448992737e-08, + "loss": 3.9139, + "step": 654000 + }, + { + "epoch": 7.27, + "learning_rate": 7.268507207427314e-08, + "loss": 3.9035, + "step": 654500 + }, + { + "epoch": 7.27, + "learning_rate": 7.274059924927259e-08, + "loss": 3.9356, + "step": 655000 + }, + { + "epoch": 7.28, + "learning_rate": 7.279612642427203e-08, + "loss": 3.8926, + "step": 655500 + }, + { + "epoch": 7.29, + "learning_rate": 7.285165359927148e-08, + "loss": 3.9119, + "step": 656000 + }, + { + "epoch": 7.29, + "learning_rate": 7.290718077427092e-08, + "loss": 3.9163, + "step": 656500 + }, + { + "epoch": 7.3, + "learning_rate": 7.296270794927038e-08, + "loss": 3.887, + "step": 657000 + }, + { + "epoch": 7.3, + "learning_rate": 7.301823512426981e-08, + "loss": 3.9306, + "step": 657500 + }, + { + "epoch": 7.31, + "learning_rate": 7.307376229926925e-08, + "loss": 3.9175, + "step": 658000 + }, + { + "epoch": 7.31, + "learning_rate": 7.312928947426871e-08, + "loss": 3.9034, + "step": 658500 + }, + { + "epoch": 7.32, + "learning_rate": 7.318481664926814e-08, + "loss": 3.912, + "step": 659000 + }, + { + "epoch": 7.32, + "learning_rate": 7.324034382426759e-08, + "loss": 3.9079, + "step": 659500 + }, + { + "epoch": 7.33, + "learning_rate": 7.329587099926704e-08, + "loss": 3.9262, + "step": 660000 + }, + { + "epoch": 7.34, + "learning_rate": 7.335139817426649e-08, + "loss": 3.9054, + "step": 660500 + }, + { + "epoch": 7.34, + "learning_rate": 7.340692534926592e-08, + "loss": 3.9286, + "step": 661000 + }, + { + "epoch": 7.35, + "learning_rate": 7.346245252426538e-08, + "loss": 3.9119, + "step": 661500 + }, + { + "epoch": 7.35, + "learning_rate": 7.351797969926482e-08, + "loss": 3.9145, + "step": 662000 + }, + { + "epoch": 7.36, + "learning_rate": 7.357350687426425e-08, + "loss": 3.8954, + "step": 662500 + }, + { + "epoch": 7.36, + "learning_rate": 7.362903404926371e-08, + "loss": 3.9087, + "step": 663000 + }, + { + "epoch": 7.37, + "learning_rate": 7.368456122426315e-08, + "loss": 3.9164, + "step": 663500 + }, + { + "epoch": 7.37, + "learning_rate": 7.37400883992626e-08, + "loss": 3.9016, + "step": 664000 + }, + { + "epoch": 7.38, + "learning_rate": 7.379561557426204e-08, + "loss": 3.9077, + "step": 664500 + }, + { + "epoch": 7.39, + "learning_rate": 7.385114274926149e-08, + "loss": 3.907, + "step": 665000 + }, + { + "epoch": 7.39, + "learning_rate": 7.390666992426093e-08, + "loss": 3.9029, + "step": 665500 + }, + { + "epoch": 7.4, + "learning_rate": 7.396219709926037e-08, + "loss": 3.9104, + "step": 666000 + }, + { + "epoch": 7.4, + "learning_rate": 7.401772427425982e-08, + "loss": 3.9427, + "step": 666500 + }, + { + "epoch": 7.41, + "learning_rate": 7.407325144925926e-08, + "loss": 3.9092, + "step": 667000 + }, + { + "epoch": 7.41, + "learning_rate": 7.412877862425872e-08, + "loss": 3.9066, + "step": 667500 + }, + { + "epoch": 7.42, + "learning_rate": 7.418430579925815e-08, + "loss": 3.9082, + "step": 668000 + }, + { + "epoch": 7.42, + "learning_rate": 7.42398329742576e-08, + "loss": 3.8989, + "step": 668500 + }, + { + "epoch": 7.43, + "learning_rate": 7.429536014925705e-08, + "loss": 3.9068, + "step": 669000 + }, + { + "epoch": 7.44, + "learning_rate": 7.435088732425648e-08, + "loss": 3.919, + "step": 669500 + }, + { + "epoch": 7.44, + "learning_rate": 7.440641449925593e-08, + "loss": 3.898, + "step": 670000 + }, + { + "epoch": 7.45, + "learning_rate": 7.446194167425538e-08, + "loss": 3.9319, + "step": 670500 + }, + { + "epoch": 7.45, + "learning_rate": 7.451746884925482e-08, + "loss": 3.9045, + "step": 671000 + }, + { + "epoch": 7.46, + "learning_rate": 7.457299602425426e-08, + "loss": 3.94, + "step": 671500 + }, + { + "epoch": 7.46, + "learning_rate": 7.462852319925372e-08, + "loss": 3.9074, + "step": 672000 + }, + { + "epoch": 7.47, + "learning_rate": 7.468405037425316e-08, + "loss": 3.9192, + "step": 672500 + }, + { + "epoch": 7.47, + "learning_rate": 7.473957754925259e-08, + "loss": 3.9254, + "step": 673000 + }, + { + "epoch": 7.48, + "learning_rate": 7.479510472425205e-08, + "loss": 3.8787, + "step": 673500 + }, + { + "epoch": 7.49, + "learning_rate": 7.48506318992515e-08, + "loss": 3.9068, + "step": 674000 + }, + { + "epoch": 7.49, + "learning_rate": 7.490615907425092e-08, + "loss": 3.9256, + "step": 674500 + }, + { + "epoch": 7.5, + "learning_rate": 7.496168624925038e-08, + "loss": 3.9091, + "step": 675000 + }, + { + "epoch": 7.5, + "learning_rate": 7.501721342424983e-08, + "loss": 3.9024, + "step": 675500 + }, + { + "epoch": 7.51, + "learning_rate": 7.507274059924927e-08, + "loss": 3.8895, + "step": 676000 + }, + { + "epoch": 7.51, + "learning_rate": 7.512826777424871e-08, + "loss": 3.8965, + "step": 676500 + }, + { + "epoch": 7.52, + "learning_rate": 7.518379494924816e-08, + "loss": 3.9147, + "step": 677000 + }, + { + "epoch": 7.52, + "learning_rate": 7.52393221242476e-08, + "loss": 3.9156, + "step": 677500 + }, + { + "epoch": 7.53, + "learning_rate": 7.529484929924705e-08, + "loss": 3.8979, + "step": 678000 + }, + { + "epoch": 7.54, + "learning_rate": 7.535037647424649e-08, + "loss": 3.9147, + "step": 678500 + }, + { + "epoch": 7.54, + "learning_rate": 7.540590364924594e-08, + "loss": 3.9017, + "step": 679000 + }, + { + "epoch": 7.55, + "learning_rate": 7.546143082424539e-08, + "loss": 3.9015, + "step": 679500 + }, + { + "epoch": 7.55, + "learning_rate": 7.551695799924482e-08, + "loss": 3.9117, + "step": 680000 + }, + { + "epoch": 7.56, + "learning_rate": 7.557248517424427e-08, + "loss": 3.9013, + "step": 680500 + }, + { + "epoch": 7.56, + "learning_rate": 7.562801234924373e-08, + "loss": 3.9035, + "step": 681000 + }, + { + "epoch": 7.57, + "learning_rate": 7.568353952424316e-08, + "loss": 3.9101, + "step": 681500 + }, + { + "epoch": 7.57, + "learning_rate": 7.57390666992426e-08, + "loss": 3.9151, + "step": 682000 + }, + { + "epoch": 7.58, + "learning_rate": 7.579459387424206e-08, + "loss": 3.9254, + "step": 682500 + }, + { + "epoch": 7.59, + "learning_rate": 7.58501210492415e-08, + "loss": 3.9193, + "step": 683000 + }, + { + "epoch": 7.59, + "learning_rate": 7.590564822424093e-08, + "loss": 3.8989, + "step": 683500 + }, + { + "epoch": 7.6, + "learning_rate": 7.596117539924039e-08, + "loss": 3.9069, + "step": 684000 + }, + { + "epoch": 7.6, + "learning_rate": 7.601670257423984e-08, + "loss": 3.8954, + "step": 684500 + }, + { + "epoch": 7.61, + "learning_rate": 7.607222974923927e-08, + "loss": 3.9021, + "step": 685000 + }, + { + "epoch": 7.61, + "learning_rate": 7.612775692423872e-08, + "loss": 3.9001, + "step": 685500 + }, + { + "epoch": 7.62, + "learning_rate": 7.618328409923817e-08, + "loss": 3.9204, + "step": 686000 + }, + { + "epoch": 7.62, + "learning_rate": 7.623881127423761e-08, + "loss": 3.9168, + "step": 686500 + }, + { + "epoch": 7.63, + "learning_rate": 7.629433844923706e-08, + "loss": 3.9207, + "step": 687000 + }, + { + "epoch": 7.63, + "learning_rate": 7.63498656242365e-08, + "loss": 3.9028, + "step": 687500 + }, + { + "epoch": 7.64, + "learning_rate": 7.640539279923594e-08, + "loss": 3.9302, + "step": 688000 + }, + { + "epoch": 7.65, + "learning_rate": 7.646091997423539e-08, + "loss": 3.93, + "step": 688500 + }, + { + "epoch": 7.65, + "learning_rate": 7.651644714923483e-08, + "loss": 3.9112, + "step": 689000 + }, + { + "epoch": 7.66, + "learning_rate": 7.657197432423428e-08, + "loss": 3.8938, + "step": 689500 + }, + { + "epoch": 7.66, + "learning_rate": 7.662750149923372e-08, + "loss": 3.9033, + "step": 690000 + }, + { + "epoch": 7.67, + "learning_rate": 7.668302867423317e-08, + "loss": 3.9132, + "step": 690500 + }, + { + "epoch": 7.67, + "learning_rate": 7.673855584923261e-08, + "loss": 3.9075, + "step": 691000 + }, + { + "epoch": 7.68, + "learning_rate": 7.679408302423205e-08, + "loss": 3.913, + "step": 691500 + }, + { + "epoch": 7.68, + "learning_rate": 7.68496101992315e-08, + "loss": 3.9108, + "step": 692000 + }, + { + "epoch": 7.69, + "learning_rate": 7.690513737423094e-08, + "loss": 3.8918, + "step": 692500 + }, + { + "epoch": 7.7, + "learning_rate": 7.696066454923039e-08, + "loss": 3.9039, + "step": 693000 + }, + { + "epoch": 7.7, + "learning_rate": 7.701619172422984e-08, + "loss": 3.9054, + "step": 693500 + }, + { + "epoch": 7.71, + "learning_rate": 7.707171889922927e-08, + "loss": 3.9161, + "step": 694000 + }, + { + "epoch": 7.71, + "learning_rate": 7.712724607422872e-08, + "loss": 3.9053, + "step": 694500 + }, + { + "epoch": 7.72, + "learning_rate": 7.718277324922818e-08, + "loss": 3.9258, + "step": 695000 + }, + { + "epoch": 7.72, + "learning_rate": 7.723830042422761e-08, + "loss": 3.8918, + "step": 695500 + }, + { + "epoch": 7.73, + "learning_rate": 7.729382759922705e-08, + "loss": 3.9156, + "step": 696000 + }, + { + "epoch": 7.73, + "learning_rate": 7.734935477422651e-08, + "loss": 3.9374, + "step": 696500 + }, + { + "epoch": 7.74, + "learning_rate": 7.740488194922595e-08, + "loss": 3.8993, + "step": 697000 + }, + { + "epoch": 7.75, + "learning_rate": 7.746040912422538e-08, + "loss": 3.9103, + "step": 697500 + }, + { + "epoch": 7.75, + "learning_rate": 7.751593629922484e-08, + "loss": 3.8828, + "step": 698000 + }, + { + "epoch": 7.76, + "learning_rate": 7.757146347422429e-08, + "loss": 3.9187, + "step": 698500 + }, + { + "epoch": 7.76, + "learning_rate": 7.762699064922372e-08, + "loss": 3.9223, + "step": 699000 + }, + { + "epoch": 7.77, + "learning_rate": 7.768251782422317e-08, + "loss": 3.9157, + "step": 699500 + }, + { + "epoch": 7.77, + "learning_rate": 7.773804499922262e-08, + "loss": 3.8826, + "step": 700000 + }, + { + "epoch": 7.78, + "learning_rate": 7.779357217422206e-08, + "loss": 3.9057, + "step": 700500 + }, + { + "epoch": 7.78, + "learning_rate": 7.78490993492215e-08, + "loss": 3.9149, + "step": 701000 + }, + { + "epoch": 7.79, + "learning_rate": 7.790462652422095e-08, + "loss": 3.8994, + "step": 701500 + }, + { + "epoch": 7.8, + "learning_rate": 7.79601536992204e-08, + "loss": 3.9032, + "step": 702000 + }, + { + "epoch": 7.8, + "learning_rate": 7.801568087421984e-08, + "loss": 3.8996, + "step": 702500 + }, + { + "epoch": 7.81, + "learning_rate": 7.807120804921928e-08, + "loss": 3.9012, + "step": 703000 + }, + { + "epoch": 7.81, + "learning_rate": 7.812673522421873e-08, + "loss": 3.8985, + "step": 703500 + }, + { + "epoch": 7.82, + "learning_rate": 7.818226239921817e-08, + "loss": 3.8717, + "step": 704000 + }, + { + "epoch": 7.82, + "learning_rate": 7.823778957421762e-08, + "loss": 3.8897, + "step": 704500 + }, + { + "epoch": 7.83, + "learning_rate": 7.829331674921706e-08, + "loss": 3.9134, + "step": 705000 + }, + { + "epoch": 7.83, + "learning_rate": 7.834884392421652e-08, + "loss": 3.9101, + "step": 705500 + }, + { + "epoch": 7.84, + "learning_rate": 7.840437109921595e-08, + "loss": 3.9264, + "step": 706000 + }, + { + "epoch": 7.85, + "learning_rate": 7.845989827421539e-08, + "loss": 3.919, + "step": 706500 + }, + { + "epoch": 7.85, + "learning_rate": 7.851542544921485e-08, + "loss": 3.9073, + "step": 707000 + }, + { + "epoch": 7.86, + "learning_rate": 7.857095262421428e-08, + "loss": 3.9105, + "step": 707500 + }, + { + "epoch": 7.86, + "learning_rate": 7.862647979921372e-08, + "loss": 3.9152, + "step": 708000 + }, + { + "epoch": 7.87, + "learning_rate": 7.868200697421318e-08, + "loss": 3.8899, + "step": 708500 + }, + { + "epoch": 7.87, + "learning_rate": 7.873753414921263e-08, + "loss": 3.8969, + "step": 709000 + }, + { + "epoch": 7.88, + "learning_rate": 7.879306132421206e-08, + "loss": 3.8993, + "step": 709500 + }, + { + "epoch": 7.88, + "learning_rate": 7.884858849921151e-08, + "loss": 3.8914, + "step": 710000 + }, + { + "epoch": 7.89, + "learning_rate": 7.890411567421096e-08, + "loss": 3.9001, + "step": 710500 + }, + { + "epoch": 7.9, + "learning_rate": 7.895964284921039e-08, + "loss": 3.9019, + "step": 711000 + }, + { + "epoch": 7.9, + "learning_rate": 7.901517002420985e-08, + "loss": 3.9078, + "step": 711500 + }, + { + "epoch": 7.91, + "learning_rate": 7.907069719920929e-08, + "loss": 3.9036, + "step": 712000 + }, + { + "epoch": 7.91, + "learning_rate": 7.912622437420874e-08, + "loss": 3.9092, + "step": 712500 + }, + { + "epoch": 7.92, + "learning_rate": 7.918175154920818e-08, + "loss": 3.8802, + "step": 713000 + }, + { + "epoch": 7.92, + "learning_rate": 7.923727872420762e-08, + "loss": 3.9023, + "step": 713500 + }, + { + "epoch": 7.93, + "learning_rate": 7.929280589920707e-08, + "loss": 3.9171, + "step": 714000 + }, + { + "epoch": 7.93, + "learning_rate": 7.934833307420651e-08, + "loss": 3.8838, + "step": 714500 + }, + { + "epoch": 7.94, + "learning_rate": 7.940386024920596e-08, + "loss": 3.8974, + "step": 715000 + }, + { + "epoch": 7.95, + "learning_rate": 7.94593874242054e-08, + "loss": 3.9014, + "step": 715500 + }, + { + "epoch": 7.95, + "learning_rate": 7.951491459920486e-08, + "loss": 3.8962, + "step": 716000 + }, + { + "epoch": 7.96, + "learning_rate": 7.957044177420429e-08, + "loss": 3.9001, + "step": 716500 + }, + { + "epoch": 7.96, + "learning_rate": 7.962596894920373e-08, + "loss": 3.9074, + "step": 717000 + }, + { + "epoch": 7.97, + "learning_rate": 7.968149612420319e-08, + "loss": 3.8985, + "step": 717500 + }, + { + "epoch": 7.97, + "learning_rate": 7.973702329920262e-08, + "loss": 3.9001, + "step": 718000 + }, + { + "epoch": 7.98, + "learning_rate": 7.979255047420207e-08, + "loss": 3.9109, + "step": 718500 + }, + { + "epoch": 7.98, + "learning_rate": 7.984807764920152e-08, + "loss": 3.8939, + "step": 719000 + }, + { + "epoch": 7.99, + "learning_rate": 7.990360482420097e-08, + "loss": 3.9072, + "step": 719500 + }, + { + "epoch": 8.0, + "learning_rate": 7.99591319992004e-08, + "loss": 3.9048, + "step": 720000 + }, + { + "epoch": 8.0, + "eval_loss": 3.9263479709625244, + "eval_runtime": 6.3049, + "eval_samples_per_second": 246.475, + "step": 720368 + }, + { + "epoch": 8.0, + "learning_rate": 8.001465917419986e-08, + "loss": 3.9229, + "step": 720500 + }, + { + "epoch": 8.01, + "learning_rate": 8.00701863491993e-08, + "loss": 3.8859, + "step": 721000 + }, + { + "epoch": 8.01, + "learning_rate": 8.012571352419873e-08, + "loss": 3.9058, + "step": 721500 + }, + { + "epoch": 8.02, + "learning_rate": 8.018124069919819e-08, + "loss": 3.9237, + "step": 722000 + }, + { + "epoch": 8.02, + "learning_rate": 8.023676787419763e-08, + "loss": 3.9156, + "step": 722500 + }, + { + "epoch": 8.03, + "learning_rate": 8.029229504919708e-08, + "loss": 3.8977, + "step": 723000 + }, + { + "epoch": 8.03, + "learning_rate": 8.034782222419652e-08, + "loss": 3.894, + "step": 723500 + }, + { + "epoch": 8.04, + "learning_rate": 8.040334939919596e-08, + "loss": 3.8925, + "step": 724000 + }, + { + "epoch": 8.05, + "learning_rate": 8.045887657419541e-08, + "loss": 3.8966, + "step": 724500 + }, + { + "epoch": 8.05, + "learning_rate": 8.051440374919485e-08, + "loss": 3.9057, + "step": 725000 + }, + { + "epoch": 8.06, + "learning_rate": 8.05699309241943e-08, + "loss": 3.9155, + "step": 725500 + }, + { + "epoch": 8.06, + "learning_rate": 8.062545809919374e-08, + "loss": 3.8936, + "step": 726000 + }, + { + "epoch": 8.07, + "learning_rate": 8.068098527419319e-08, + "loss": 3.8901, + "step": 726500 + }, + { + "epoch": 8.07, + "learning_rate": 8.073651244919263e-08, + "loss": 3.9014, + "step": 727000 + }, + { + "epoch": 8.08, + "learning_rate": 8.079203962419207e-08, + "loss": 3.8952, + "step": 727500 + }, + { + "epoch": 8.08, + "learning_rate": 8.084756679919152e-08, + "loss": 3.8791, + "step": 728000 + }, + { + "epoch": 8.09, + "learning_rate": 8.090309397419096e-08, + "loss": 3.8913, + "step": 728500 + }, + { + "epoch": 8.1, + "learning_rate": 8.09586211491904e-08, + "loss": 3.9013, + "step": 729000 + }, + { + "epoch": 8.1, + "learning_rate": 8.101414832418985e-08, + "loss": 3.9127, + "step": 729500 + }, + { + "epoch": 8.11, + "learning_rate": 8.106967549918931e-08, + "loss": 3.8911, + "step": 730000 + }, + { + "epoch": 8.11, + "learning_rate": 8.112520267418874e-08, + "loss": 3.9138, + "step": 730500 + }, + { + "epoch": 8.12, + "learning_rate": 8.118072984918818e-08, + "loss": 3.8962, + "step": 731000 + }, + { + "epoch": 8.12, + "learning_rate": 8.123625702418764e-08, + "loss": 3.8867, + "step": 731500 + }, + { + "epoch": 8.13, + "learning_rate": 8.129178419918707e-08, + "loss": 3.8883, + "step": 732000 + }, + { + "epoch": 8.13, + "learning_rate": 8.134731137418652e-08, + "loss": 3.8978, + "step": 732500 + }, + { + "epoch": 8.14, + "learning_rate": 8.140283854918597e-08, + "loss": 3.8904, + "step": 733000 + }, + { + "epoch": 8.15, + "learning_rate": 8.14583657241854e-08, + "loss": 3.9044, + "step": 733500 + }, + { + "epoch": 8.15, + "learning_rate": 8.151389289918486e-08, + "loss": 3.8837, + "step": 734000 + }, + { + "epoch": 8.16, + "learning_rate": 8.15694200741843e-08, + "loss": 3.8745, + "step": 734500 + }, + { + "epoch": 8.16, + "learning_rate": 8.162494724918375e-08, + "loss": 3.8998, + "step": 735000 + }, + { + "epoch": 8.17, + "learning_rate": 8.16804744241832e-08, + "loss": 3.886, + "step": 735500 + }, + { + "epoch": 8.17, + "learning_rate": 8.173600159918264e-08, + "loss": 3.8898, + "step": 736000 + }, + { + "epoch": 8.18, + "learning_rate": 8.179152877418208e-08, + "loss": 3.8781, + "step": 736500 + }, + { + "epoch": 8.18, + "learning_rate": 8.184705594918153e-08, + "loss": 3.8862, + "step": 737000 + }, + { + "epoch": 8.19, + "learning_rate": 8.190258312418097e-08, + "loss": 3.9022, + "step": 737500 + }, + { + "epoch": 8.2, + "learning_rate": 8.195811029918042e-08, + "loss": 3.8999, + "step": 738000 + }, + { + "epoch": 8.2, + "learning_rate": 8.201363747417986e-08, + "loss": 3.9063, + "step": 738500 + }, + { + "epoch": 8.21, + "learning_rate": 8.20691646491793e-08, + "loss": 3.8978, + "step": 739000 + }, + { + "epoch": 8.21, + "learning_rate": 8.212469182417875e-08, + "loss": 3.8926, + "step": 739500 + }, + { + "epoch": 8.22, + "learning_rate": 8.218021899917819e-08, + "loss": 3.9185, + "step": 740000 + }, + { + "epoch": 8.22, + "learning_rate": 8.223574617417764e-08, + "loss": 3.8823, + "step": 740500 + }, + { + "epoch": 8.23, + "learning_rate": 8.229127334917708e-08, + "loss": 3.9003, + "step": 741000 + }, + { + "epoch": 8.23, + "learning_rate": 8.234680052417652e-08, + "loss": 3.8836, + "step": 741500 + }, + { + "epoch": 8.24, + "learning_rate": 8.240232769917598e-08, + "loss": 3.9047, + "step": 742000 + }, + { + "epoch": 8.25, + "learning_rate": 8.245785487417541e-08, + "loss": 3.9053, + "step": 742500 + }, + { + "epoch": 8.25, + "learning_rate": 8.251338204917486e-08, + "loss": 3.8932, + "step": 743000 + }, + { + "epoch": 8.26, + "learning_rate": 8.256890922417431e-08, + "loss": 3.8972, + "step": 743500 + }, + { + "epoch": 8.26, + "learning_rate": 8.262443639917375e-08, + "loss": 3.8906, + "step": 744000 + }, + { + "epoch": 8.27, + "learning_rate": 8.267996357417319e-08, + "loss": 3.9004, + "step": 744500 + }, + { + "epoch": 8.27, + "learning_rate": 8.273549074917265e-08, + "loss": 3.9118, + "step": 745000 + }, + { + "epoch": 8.28, + "learning_rate": 8.279101792417209e-08, + "loss": 3.8961, + "step": 745500 + }, + { + "epoch": 8.28, + "learning_rate": 8.284654509917152e-08, + "loss": 3.8916, + "step": 746000 + }, + { + "epoch": 8.29, + "learning_rate": 8.290207227417098e-08, + "loss": 3.9015, + "step": 746500 + }, + { + "epoch": 8.3, + "learning_rate": 8.295759944917042e-08, + "loss": 3.902, + "step": 747000 + }, + { + "epoch": 8.3, + "learning_rate": 8.301312662416985e-08, + "loss": 3.9076, + "step": 747500 + }, + { + "epoch": 8.31, + "learning_rate": 8.306865379916931e-08, + "loss": 3.8931, + "step": 748000 + }, + { + "epoch": 8.31, + "learning_rate": 8.312418097416876e-08, + "loss": 3.9017, + "step": 748500 + }, + { + "epoch": 8.32, + "learning_rate": 8.31797081491682e-08, + "loss": 3.9095, + "step": 749000 + }, + { + "epoch": 8.32, + "learning_rate": 8.323523532416764e-08, + "loss": 3.8899, + "step": 749500 + }, + { + "epoch": 8.33, + "learning_rate": 8.329076249916709e-08, + "loss": 3.8843, + "step": 750000 + }, + { + "epoch": 8.33, + "learning_rate": 8.334628967416653e-08, + "loss": 3.89, + "step": 750500 + }, + { + "epoch": 8.34, + "learning_rate": 8.340181684916598e-08, + "loss": 3.8959, + "step": 751000 + }, + { + "epoch": 8.35, + "learning_rate": 8.345734402416542e-08, + "loss": 3.9121, + "step": 751500 + }, + { + "epoch": 8.35, + "learning_rate": 8.351287119916487e-08, + "loss": 3.8851, + "step": 752000 + }, + { + "epoch": 8.36, + "learning_rate": 8.356839837416432e-08, + "loss": 3.8879, + "step": 752500 + }, + { + "epoch": 8.36, + "learning_rate": 8.362392554916375e-08, + "loss": 3.884, + "step": 753000 + }, + { + "epoch": 8.37, + "learning_rate": 8.36794527241632e-08, + "loss": 3.8702, + "step": 753500 + }, + { + "epoch": 8.37, + "learning_rate": 8.373497989916266e-08, + "loss": 3.8989, + "step": 754000 + }, + { + "epoch": 8.38, + "learning_rate": 8.379050707416209e-08, + "loss": 3.9003, + "step": 754500 + }, + { + "epoch": 8.38, + "learning_rate": 8.384603424916153e-08, + "loss": 3.8784, + "step": 755000 + }, + { + "epoch": 8.39, + "learning_rate": 8.390156142416099e-08, + "loss": 3.8971, + "step": 755500 + }, + { + "epoch": 8.4, + "learning_rate": 8.395708859916043e-08, + "loss": 3.8979, + "step": 756000 + }, + { + "epoch": 8.4, + "learning_rate": 8.401261577415986e-08, + "loss": 3.8878, + "step": 756500 + }, + { + "epoch": 8.41, + "learning_rate": 8.406814294915932e-08, + "loss": 3.8994, + "step": 757000 + }, + { + "epoch": 8.41, + "learning_rate": 8.412367012415876e-08, + "loss": 3.8919, + "step": 757500 + }, + { + "epoch": 8.42, + "learning_rate": 8.41791972991582e-08, + "loss": 3.8979, + "step": 758000 + }, + { + "epoch": 8.42, + "learning_rate": 8.423472447415765e-08, + "loss": 3.8847, + "step": 758500 + }, + { + "epoch": 8.43, + "learning_rate": 8.42902516491571e-08, + "loss": 3.8884, + "step": 759000 + }, + { + "epoch": 8.43, + "learning_rate": 8.434577882415654e-08, + "loss": 3.8885, + "step": 759500 + }, + { + "epoch": 8.44, + "learning_rate": 8.440130599915599e-08, + "loss": 3.8975, + "step": 760000 + }, + { + "epoch": 8.45, + "learning_rate": 8.445683317415543e-08, + "loss": 3.8973, + "step": 760500 + }, + { + "epoch": 8.45, + "learning_rate": 8.451236034915487e-08, + "loss": 3.9104, + "step": 761000 + }, + { + "epoch": 8.46, + "learning_rate": 8.456788752415432e-08, + "loss": 3.908, + "step": 761500 + }, + { + "epoch": 8.46, + "learning_rate": 8.462341469915376e-08, + "loss": 3.8881, + "step": 762000 + }, + { + "epoch": 8.47, + "learning_rate": 8.46789418741532e-08, + "loss": 3.9037, + "step": 762500 + }, + { + "epoch": 8.47, + "learning_rate": 8.473446904915265e-08, + "loss": 3.8861, + "step": 763000 + }, + { + "epoch": 8.48, + "learning_rate": 8.47899962241521e-08, + "loss": 3.8965, + "step": 763500 + }, + { + "epoch": 8.48, + "learning_rate": 8.484552339915154e-08, + "loss": 3.8883, + "step": 764000 + }, + { + "epoch": 8.49, + "learning_rate": 8.4901050574151e-08, + "loss": 3.8852, + "step": 764500 + }, + { + "epoch": 8.5, + "learning_rate": 8.495657774915043e-08, + "loss": 3.9045, + "step": 765000 + }, + { + "epoch": 8.5, + "learning_rate": 8.501210492414987e-08, + "loss": 3.895, + "step": 765500 + }, + { + "epoch": 8.51, + "learning_rate": 8.506763209914933e-08, + "loss": 3.8931, + "step": 766000 + }, + { + "epoch": 8.51, + "learning_rate": 8.512315927414876e-08, + "loss": 3.8788, + "step": 766500 + }, + { + "epoch": 8.52, + "learning_rate": 8.51786864491482e-08, + "loss": 3.8993, + "step": 767000 + }, + { + "epoch": 8.52, + "learning_rate": 8.523421362414766e-08, + "loss": 3.9073, + "step": 767500 + }, + { + "epoch": 8.53, + "learning_rate": 8.52897407991471e-08, + "loss": 3.8985, + "step": 768000 + }, + { + "epoch": 8.53, + "learning_rate": 8.534526797414654e-08, + "loss": 3.8953, + "step": 768500 + }, + { + "epoch": 8.54, + "learning_rate": 8.5400795149146e-08, + "loss": 3.9153, + "step": 769000 + }, + { + "epoch": 8.55, + "learning_rate": 8.545632232414544e-08, + "loss": 3.8869, + "step": 769500 + }, + { + "epoch": 8.55, + "learning_rate": 8.551184949914487e-08, + "loss": 3.9031, + "step": 770000 + }, + { + "epoch": 8.56, + "learning_rate": 8.556737667414433e-08, + "loss": 3.8703, + "step": 770500 + }, + { + "epoch": 8.56, + "learning_rate": 8.562290384914377e-08, + "loss": 3.9019, + "step": 771000 + }, + { + "epoch": 8.57, + "learning_rate": 8.567843102414321e-08, + "loss": 3.9049, + "step": 771500 + }, + { + "epoch": 8.57, + "learning_rate": 8.573395819914266e-08, + "loss": 3.8816, + "step": 772000 + }, + { + "epoch": 8.58, + "learning_rate": 8.57894853741421e-08, + "loss": 3.8858, + "step": 772500 + }, + { + "epoch": 8.58, + "learning_rate": 8.584501254914155e-08, + "loss": 3.9054, + "step": 773000 + }, + { + "epoch": 8.59, + "learning_rate": 8.590053972414099e-08, + "loss": 3.8777, + "step": 773500 + }, + { + "epoch": 8.6, + "learning_rate": 8.595606689914044e-08, + "loss": 3.8941, + "step": 774000 + }, + { + "epoch": 8.6, + "learning_rate": 8.601159407413988e-08, + "loss": 3.883, + "step": 774500 + }, + { + "epoch": 8.61, + "learning_rate": 8.606712124913932e-08, + "loss": 3.8956, + "step": 775000 + }, + { + "epoch": 8.61, + "learning_rate": 8.612264842413877e-08, + "loss": 3.8736, + "step": 775500 + }, + { + "epoch": 8.62, + "learning_rate": 8.617817559913821e-08, + "loss": 3.8941, + "step": 776000 + }, + { + "epoch": 8.62, + "learning_rate": 8.623370277413766e-08, + "loss": 3.8871, + "step": 776500 + }, + { + "epoch": 8.63, + "learning_rate": 8.62892299491371e-08, + "loss": 3.8699, + "step": 777000 + }, + { + "epoch": 8.63, + "learning_rate": 8.634475712413654e-08, + "loss": 3.8748, + "step": 777500 + }, + { + "epoch": 8.64, + "learning_rate": 8.640028429913599e-08, + "loss": 3.8798, + "step": 778000 + }, + { + "epoch": 8.65, + "learning_rate": 8.645581147413545e-08, + "loss": 3.9068, + "step": 778500 + }, + { + "epoch": 8.65, + "learning_rate": 8.651133864913488e-08, + "loss": 3.8883, + "step": 779000 + }, + { + "epoch": 8.66, + "learning_rate": 8.656686582413432e-08, + "loss": 3.8784, + "step": 779500 + }, + { + "epoch": 8.66, + "learning_rate": 8.662239299913378e-08, + "loss": 3.8872, + "step": 780000 + }, + { + "epoch": 8.67, + "learning_rate": 8.667792017413321e-08, + "loss": 3.8873, + "step": 780500 + }, + { + "epoch": 8.67, + "learning_rate": 8.673344734913265e-08, + "loss": 3.8939, + "step": 781000 + }, + { + "epoch": 8.68, + "learning_rate": 8.678897452413211e-08, + "loss": 3.9098, + "step": 781500 + }, + { + "epoch": 8.68, + "learning_rate": 8.684450169913156e-08, + "loss": 3.8935, + "step": 782000 + }, + { + "epoch": 8.69, + "learning_rate": 8.690002887413099e-08, + "loss": 3.8902, + "step": 782500 + }, + { + "epoch": 8.7, + "learning_rate": 8.695555604913044e-08, + "loss": 3.8944, + "step": 783000 + }, + { + "epoch": 8.7, + "learning_rate": 8.701108322412989e-08, + "loss": 3.8918, + "step": 783500 + }, + { + "epoch": 8.71, + "learning_rate": 8.706661039912932e-08, + "loss": 3.8821, + "step": 784000 + }, + { + "epoch": 8.71, + "learning_rate": 8.712213757412878e-08, + "loss": 3.877, + "step": 784500 + }, + { + "epoch": 8.72, + "learning_rate": 8.717766474912822e-08, + "loss": 3.8996, + "step": 785000 + }, + { + "epoch": 8.72, + "learning_rate": 8.723319192412766e-08, + "loss": 3.9013, + "step": 785500 + }, + { + "epoch": 8.73, + "learning_rate": 8.728871909912711e-08, + "loss": 3.9013, + "step": 786000 + }, + { + "epoch": 8.73, + "learning_rate": 8.734424627412655e-08, + "loss": 3.907, + "step": 786500 + }, + { + "epoch": 8.74, + "learning_rate": 8.7399773449126e-08, + "loss": 3.8645, + "step": 787000 + }, + { + "epoch": 8.75, + "learning_rate": 8.745530062412544e-08, + "loss": 3.88, + "step": 787500 + }, + { + "epoch": 8.75, + "learning_rate": 8.751082779912489e-08, + "loss": 3.8873, + "step": 788000 + }, + { + "epoch": 8.76, + "learning_rate": 8.756635497412433e-08, + "loss": 3.884, + "step": 788500 + }, + { + "epoch": 8.76, + "learning_rate": 8.762188214912379e-08, + "loss": 3.8772, + "step": 789000 + }, + { + "epoch": 8.77, + "learning_rate": 8.767740932412322e-08, + "loss": 3.9034, + "step": 789500 + }, + { + "epoch": 8.77, + "learning_rate": 8.773293649912266e-08, + "loss": 3.869, + "step": 790000 + }, + { + "epoch": 8.78, + "learning_rate": 8.778846367412212e-08, + "loss": 3.8748, + "step": 790500 + }, + { + "epoch": 8.78, + "learning_rate": 8.784399084912155e-08, + "loss": 3.9012, + "step": 791000 + }, + { + "epoch": 8.79, + "learning_rate": 8.7899518024121e-08, + "loss": 3.894, + "step": 791500 + }, + { + "epoch": 8.8, + "learning_rate": 8.795504519912045e-08, + "loss": 3.8737, + "step": 792000 + }, + { + "epoch": 8.8, + "learning_rate": 8.80105723741199e-08, + "loss": 3.8753, + "step": 792500 + }, + { + "epoch": 8.81, + "learning_rate": 8.806609954911933e-08, + "loss": 3.8971, + "step": 793000 + }, + { + "epoch": 8.81, + "learning_rate": 8.812162672411879e-08, + "loss": 3.9061, + "step": 793500 + }, + { + "epoch": 8.82, + "learning_rate": 8.817715389911823e-08, + "loss": 3.8857, + "step": 794000 + }, + { + "epoch": 8.82, + "learning_rate": 8.823268107411766e-08, + "loss": 3.9059, + "step": 794500 + }, + { + "epoch": 8.83, + "learning_rate": 8.828820824911712e-08, + "loss": 3.889, + "step": 795000 + }, + { + "epoch": 8.83, + "learning_rate": 8.834373542411656e-08, + "loss": 3.8867, + "step": 795500 + }, + { + "epoch": 8.84, + "learning_rate": 8.839926259911599e-08, + "loss": 3.8931, + "step": 796000 + }, + { + "epoch": 8.85, + "learning_rate": 8.845478977411545e-08, + "loss": 3.8838, + "step": 796500 + }, + { + "epoch": 8.85, + "learning_rate": 8.85103169491149e-08, + "loss": 3.8986, + "step": 797000 + }, + { + "epoch": 8.86, + "learning_rate": 8.856584412411434e-08, + "loss": 3.9002, + "step": 797500 + }, + { + "epoch": 8.86, + "learning_rate": 8.862137129911378e-08, + "loss": 3.8997, + "step": 798000 + }, + { + "epoch": 8.87, + "learning_rate": 8.867689847411323e-08, + "loss": 3.8823, + "step": 798500 + }, + { + "epoch": 8.87, + "learning_rate": 8.873242564911267e-08, + "loss": 3.8719, + "step": 799000 + }, + { + "epoch": 8.88, + "learning_rate": 8.878795282411212e-08, + "loss": 3.8995, + "step": 799500 + }, + { + "epoch": 8.88, + "learning_rate": 8.884347999911156e-08, + "loss": 3.8661, + "step": 800000 + }, + { + "epoch": 8.89, + "learning_rate": 8.8899007174111e-08, + "loss": 3.8937, + "step": 800500 + }, + { + "epoch": 8.9, + "learning_rate": 8.895453434911046e-08, + "loss": 3.8786, + "step": 801000 + }, + { + "epoch": 8.9, + "learning_rate": 8.901006152410989e-08, + "loss": 3.9042, + "step": 801500 + }, + { + "epoch": 8.91, + "learning_rate": 8.906558869910934e-08, + "loss": 3.8988, + "step": 802000 + }, + { + "epoch": 8.91, + "learning_rate": 8.91211158741088e-08, + "loss": 3.8838, + "step": 802500 + }, + { + "epoch": 8.92, + "learning_rate": 8.917664304910822e-08, + "loss": 3.8926, + "step": 803000 + }, + { + "epoch": 8.92, + "learning_rate": 8.923217022410767e-08, + "loss": 3.8851, + "step": 803500 + }, + { + "epoch": 8.93, + "learning_rate": 8.928769739910713e-08, + "loss": 3.8916, + "step": 804000 + }, + { + "epoch": 8.93, + "learning_rate": 8.934322457410657e-08, + "loss": 3.879, + "step": 804500 + }, + { + "epoch": 8.94, + "learning_rate": 8.9398751749106e-08, + "loss": 3.8766, + "step": 805000 + }, + { + "epoch": 8.95, + "learning_rate": 8.945427892410546e-08, + "loss": 3.8952, + "step": 805500 + }, + { + "epoch": 8.95, + "learning_rate": 8.95098060991049e-08, + "loss": 3.8778, + "step": 806000 + }, + { + "epoch": 8.96, + "learning_rate": 8.956533327410433e-08, + "loss": 3.8736, + "step": 806500 + }, + { + "epoch": 8.96, + "learning_rate": 8.962086044910379e-08, + "loss": 3.9052, + "step": 807000 + }, + { + "epoch": 8.97, + "learning_rate": 8.967638762410324e-08, + "loss": 3.8695, + "step": 807500 + }, + { + "epoch": 8.97, + "learning_rate": 8.973191479910268e-08, + "loss": 3.8882, + "step": 808000 + }, + { + "epoch": 8.98, + "learning_rate": 8.978744197410212e-08, + "loss": 3.8876, + "step": 808500 + }, + { + "epoch": 8.98, + "learning_rate": 8.984296914910157e-08, + "loss": 3.8702, + "step": 809000 + }, + { + "epoch": 8.99, + "learning_rate": 8.989849632410101e-08, + "loss": 3.8824, + "step": 809500 + }, + { + "epoch": 9.0, + "learning_rate": 8.995402349910046e-08, + "loss": 3.8871, + "step": 810000 + }, + { + "epoch": 9.0, + "eval_loss": 3.914140462875366, + "eval_runtime": 6.3069, + "eval_samples_per_second": 246.396, + "step": 810414 + }, + { + "epoch": 9.0, + "learning_rate": 9.00095506740999e-08, + "loss": 3.8657, + "step": 810500 + }, + { + "epoch": 9.01, + "learning_rate": 9.006507784909934e-08, + "loss": 3.8844, + "step": 811000 + }, + { + "epoch": 9.01, + "learning_rate": 9.012060502409879e-08, + "loss": 3.8874, + "step": 811500 + }, + { + "epoch": 9.02, + "learning_rate": 9.017613219909823e-08, + "loss": 3.8844, + "step": 812000 + }, + { + "epoch": 9.02, + "learning_rate": 9.023165937409768e-08, + "loss": 3.8817, + "step": 812500 + }, + { + "epoch": 9.03, + "learning_rate": 9.028718654909712e-08, + "loss": 3.8831, + "step": 813000 + }, + { + "epoch": 9.03, + "learning_rate": 9.034271372409657e-08, + "loss": 3.8799, + "step": 813500 + }, + { + "epoch": 9.04, + "learning_rate": 9.039824089909601e-08, + "loss": 3.8895, + "step": 814000 + }, + { + "epoch": 9.05, + "learning_rate": 9.045376807409545e-08, + "loss": 3.8962, + "step": 814500 + }, + { + "epoch": 9.05, + "learning_rate": 9.050929524909491e-08, + "loss": 3.8889, + "step": 815000 + }, + { + "epoch": 9.06, + "learning_rate": 9.056482242409434e-08, + "loss": 3.8899, + "step": 815500 + }, + { + "epoch": 9.06, + "learning_rate": 9.062034959909379e-08, + "loss": 3.8798, + "step": 816000 + }, + { + "epoch": 9.07, + "learning_rate": 9.067587677409324e-08, + "loss": 3.8775, + "step": 816500 + }, + { + "epoch": 9.07, + "learning_rate": 9.073140394909267e-08, + "loss": 3.886, + "step": 817000 + }, + { + "epoch": 9.08, + "learning_rate": 9.078693112409212e-08, + "loss": 3.8718, + "step": 817500 + }, + { + "epoch": 9.08, + "learning_rate": 9.084245829909158e-08, + "loss": 3.8816, + "step": 818000 + }, + { + "epoch": 9.09, + "learning_rate": 9.089798547409102e-08, + "loss": 3.8599, + "step": 818500 + }, + { + "epoch": 9.1, + "learning_rate": 9.095351264909046e-08, + "loss": 3.8926, + "step": 819000 + }, + { + "epoch": 9.1, + "learning_rate": 9.100903982408991e-08, + "loss": 3.8887, + "step": 819500 + }, + { + "epoch": 9.11, + "learning_rate": 9.106456699908935e-08, + "loss": 3.8721, + "step": 820000 + }, + { + "epoch": 9.11, + "learning_rate": 9.11200941740888e-08, + "loss": 3.8773, + "step": 820500 + }, + { + "epoch": 9.12, + "learning_rate": 9.117562134908824e-08, + "loss": 3.8793, + "step": 821000 + }, + { + "epoch": 9.12, + "learning_rate": 9.123114852408769e-08, + "loss": 3.8708, + "step": 821500 + }, + { + "epoch": 9.13, + "learning_rate": 9.128667569908713e-08, + "loss": 3.8767, + "step": 822000 + }, + { + "epoch": 9.13, + "learning_rate": 9.134220287408657e-08, + "loss": 3.8931, + "step": 822500 + }, + { + "epoch": 9.14, + "learning_rate": 9.139773004908602e-08, + "loss": 3.8804, + "step": 823000 + }, + { + "epoch": 9.15, + "learning_rate": 9.145325722408546e-08, + "loss": 3.8907, + "step": 823500 + }, + { + "epoch": 9.15, + "learning_rate": 9.15087843990849e-08, + "loss": 3.8805, + "step": 824000 + }, + { + "epoch": 9.16, + "learning_rate": 9.156431157408435e-08, + "loss": 3.8702, + "step": 824500 + }, + { + "epoch": 9.16, + "learning_rate": 9.16198387490838e-08, + "loss": 3.8796, + "step": 825000 + }, + { + "epoch": 9.17, + "learning_rate": 9.167536592408324e-08, + "loss": 3.8664, + "step": 825500 + }, + { + "epoch": 9.17, + "learning_rate": 9.173089309908268e-08, + "loss": 3.8737, + "step": 826000 + }, + { + "epoch": 9.18, + "learning_rate": 9.178642027408213e-08, + "loss": 3.8827, + "step": 826500 + }, + { + "epoch": 9.18, + "learning_rate": 9.184194744908158e-08, + "loss": 3.8768, + "step": 827000 + }, + { + "epoch": 9.19, + "learning_rate": 9.189747462408102e-08, + "loss": 3.879, + "step": 827500 + }, + { + "epoch": 9.2, + "learning_rate": 9.195300179908046e-08, + "loss": 3.8686, + "step": 828000 + }, + { + "epoch": 9.2, + "learning_rate": 9.200852897407992e-08, + "loss": 3.8816, + "step": 828500 + }, + { + "epoch": 9.21, + "learning_rate": 9.206405614907935e-08, + "loss": 3.8592, + "step": 829000 + }, + { + "epoch": 9.21, + "learning_rate": 9.211958332407879e-08, + "loss": 3.8852, + "step": 829500 + }, + { + "epoch": 9.22, + "learning_rate": 9.217511049907825e-08, + "loss": 3.8721, + "step": 830000 + }, + { + "epoch": 9.22, + "learning_rate": 9.22306376740777e-08, + "loss": 3.8841, + "step": 830500 + }, + { + "epoch": 9.23, + "learning_rate": 9.228616484907712e-08, + "loss": 3.8828, + "step": 831000 + }, + { + "epoch": 9.23, + "learning_rate": 9.234169202407658e-08, + "loss": 3.8903, + "step": 831500 + }, + { + "epoch": 9.24, + "learning_rate": 9.239721919907603e-08, + "loss": 3.8985, + "step": 832000 + }, + { + "epoch": 9.25, + "learning_rate": 9.245274637407546e-08, + "loss": 3.8794, + "step": 832500 + }, + { + "epoch": 9.25, + "learning_rate": 9.250827354907491e-08, + "loss": 3.8764, + "step": 833000 + }, + { + "epoch": 9.26, + "learning_rate": 9.256380072407436e-08, + "loss": 3.8839, + "step": 833500 + }, + { + "epoch": 9.26, + "learning_rate": 9.26193278990738e-08, + "loss": 3.8986, + "step": 834000 + }, + { + "epoch": 9.27, + "learning_rate": 9.267485507407325e-08, + "loss": 3.8807, + "step": 834500 + }, + { + "epoch": 9.27, + "learning_rate": 9.273038224907269e-08, + "loss": 3.8898, + "step": 835000 + }, + { + "epoch": 9.28, + "learning_rate": 9.278590942407214e-08, + "loss": 3.8828, + "step": 835500 + }, + { + "epoch": 9.28, + "learning_rate": 9.284143659907158e-08, + "loss": 3.8808, + "step": 836000 + }, + { + "epoch": 9.29, + "learning_rate": 9.289696377407102e-08, + "loss": 3.8742, + "step": 836500 + }, + { + "epoch": 9.3, + "learning_rate": 9.295249094907047e-08, + "loss": 3.8849, + "step": 837000 + }, + { + "epoch": 9.3, + "learning_rate": 9.300801812406993e-08, + "loss": 3.8946, + "step": 837500 + }, + { + "epoch": 9.31, + "learning_rate": 9.306354529906936e-08, + "loss": 3.8681, + "step": 838000 + }, + { + "epoch": 9.31, + "learning_rate": 9.31190724740688e-08, + "loss": 3.8925, + "step": 838500 + }, + { + "epoch": 9.32, + "learning_rate": 9.317459964906826e-08, + "loss": 3.877, + "step": 839000 + }, + { + "epoch": 9.32, + "learning_rate": 9.323012682406769e-08, + "loss": 3.8872, + "step": 839500 + }, + { + "epoch": 9.33, + "learning_rate": 9.328565399906713e-08, + "loss": 3.883, + "step": 840000 + }, + { + "epoch": 9.33, + "learning_rate": 9.334118117406659e-08, + "loss": 3.8679, + "step": 840500 + }, + { + "epoch": 9.34, + "learning_rate": 9.339670834906603e-08, + "loss": 3.8822, + "step": 841000 + }, + { + "epoch": 9.35, + "learning_rate": 9.345223552406547e-08, + "loss": 3.8719, + "step": 841500 + }, + { + "epoch": 9.35, + "learning_rate": 9.350776269906492e-08, + "loss": 3.8754, + "step": 842000 + }, + { + "epoch": 9.36, + "learning_rate": 9.356328987406437e-08, + "loss": 3.8793, + "step": 842500 + }, + { + "epoch": 9.36, + "learning_rate": 9.36188170490638e-08, + "loss": 3.8817, + "step": 843000 + }, + { + "epoch": 9.37, + "learning_rate": 9.367434422406326e-08, + "loss": 3.878, + "step": 843500 + }, + { + "epoch": 9.37, + "learning_rate": 9.37298713990627e-08, + "loss": 3.8828, + "step": 844000 + }, + { + "epoch": 9.38, + "learning_rate": 9.378539857406214e-08, + "loss": 3.866, + "step": 844500 + }, + { + "epoch": 9.38, + "learning_rate": 9.384092574906159e-08, + "loss": 3.864, + "step": 845000 + }, + { + "epoch": 9.39, + "learning_rate": 9.389645292406103e-08, + "loss": 3.8654, + "step": 845500 + }, + { + "epoch": 9.4, + "learning_rate": 9.395198009906048e-08, + "loss": 3.8571, + "step": 846000 + }, + { + "epoch": 9.4, + "learning_rate": 9.400750727405992e-08, + "loss": 3.8963, + "step": 846500 + }, + { + "epoch": 9.41, + "learning_rate": 9.406303444905937e-08, + "loss": 3.8774, + "step": 847000 + }, + { + "epoch": 9.41, + "learning_rate": 9.411856162405881e-08, + "loss": 3.8724, + "step": 847500 + }, + { + "epoch": 9.42, + "learning_rate": 9.417408879905825e-08, + "loss": 3.8785, + "step": 848000 + }, + { + "epoch": 9.42, + "learning_rate": 9.42296159740577e-08, + "loss": 3.8871, + "step": 848500 + }, + { + "epoch": 9.43, + "learning_rate": 9.428514314905714e-08, + "loss": 3.8695, + "step": 849000 + }, + { + "epoch": 9.43, + "learning_rate": 9.43406703240566e-08, + "loss": 3.8861, + "step": 849500 + }, + { + "epoch": 9.44, + "learning_rate": 9.439619749905603e-08, + "loss": 3.8775, + "step": 850000 + }, + { + "epoch": 9.45, + "learning_rate": 9.445172467405547e-08, + "loss": 3.8893, + "step": 850500 + }, + { + "epoch": 9.45, + "learning_rate": 9.450725184905493e-08, + "loss": 3.8753, + "step": 851000 + }, + { + "epoch": 9.46, + "learning_rate": 9.456277902405438e-08, + "loss": 3.882, + "step": 851500 + }, + { + "epoch": 9.46, + "learning_rate": 9.461830619905381e-08, + "loss": 3.8722, + "step": 852000 + }, + { + "epoch": 9.47, + "learning_rate": 9.467383337405326e-08, + "loss": 3.8835, + "step": 852500 + }, + { + "epoch": 9.47, + "learning_rate": 9.472936054905271e-08, + "loss": 3.8712, + "step": 853000 + }, + { + "epoch": 9.48, + "learning_rate": 9.478488772405214e-08, + "loss": 3.8814, + "step": 853500 + }, + { + "epoch": 9.48, + "learning_rate": 9.48404148990516e-08, + "loss": 3.882, + "step": 854000 + }, + { + "epoch": 9.49, + "learning_rate": 9.489594207405104e-08, + "loss": 3.8849, + "step": 854500 + }, + { + "epoch": 9.5, + "learning_rate": 9.495146924905049e-08, + "loss": 3.8773, + "step": 855000 + }, + { + "epoch": 9.5, + "learning_rate": 9.500699642404993e-08, + "loss": 3.8978, + "step": 855500 + }, + { + "epoch": 9.51, + "learning_rate": 9.506252359904937e-08, + "loss": 3.8764, + "step": 856000 + }, + { + "epoch": 9.51, + "learning_rate": 9.511805077404882e-08, + "loss": 3.8656, + "step": 856500 + }, + { + "epoch": 9.52, + "learning_rate": 9.517357794904826e-08, + "loss": 3.8782, + "step": 857000 + }, + { + "epoch": 9.52, + "learning_rate": 9.52291051240477e-08, + "loss": 3.8779, + "step": 857500 + }, + { + "epoch": 9.53, + "learning_rate": 9.528463229904715e-08, + "loss": 3.8603, + "step": 858000 + }, + { + "epoch": 9.53, + "learning_rate": 9.53401594740466e-08, + "loss": 3.8769, + "step": 858500 + }, + { + "epoch": 9.54, + "learning_rate": 9.539568664904604e-08, + "loss": 3.8872, + "step": 859000 + }, + { + "epoch": 9.55, + "learning_rate": 9.545121382404548e-08, + "loss": 3.8783, + "step": 859500 + }, + { + "epoch": 9.55, + "learning_rate": 9.550674099904493e-08, + "loss": 3.8729, + "step": 860000 + }, + { + "epoch": 9.56, + "learning_rate": 9.556226817404437e-08, + "loss": 3.8633, + "step": 860500 + }, + { + "epoch": 9.56, + "learning_rate": 9.561779534904382e-08, + "loss": 3.8976, + "step": 861000 + }, + { + "epoch": 9.57, + "learning_rate": 9.567332252404326e-08, + "loss": 3.8628, + "step": 861500 + }, + { + "epoch": 9.57, + "learning_rate": 9.57288496990427e-08, + "loss": 3.8705, + "step": 862000 + }, + { + "epoch": 9.58, + "learning_rate": 9.578437687404215e-08, + "loss": 3.8724, + "step": 862500 + }, + { + "epoch": 9.58, + "learning_rate": 9.583990404904159e-08, + "loss": 3.8578, + "step": 863000 + }, + { + "epoch": 9.59, + "learning_rate": 9.589543122404105e-08, + "loss": 3.8741, + "step": 863500 + }, + { + "epoch": 9.6, + "learning_rate": 9.595095839904048e-08, + "loss": 3.874, + "step": 864000 + }, + { + "epoch": 9.6, + "learning_rate": 9.600648557403992e-08, + "loss": 3.8756, + "step": 864500 + }, + { + "epoch": 9.61, + "learning_rate": 9.606201274903938e-08, + "loss": 3.8754, + "step": 865000 + }, + { + "epoch": 9.61, + "learning_rate": 9.611753992403881e-08, + "loss": 3.8983, + "step": 865500 + }, + { + "epoch": 9.62, + "learning_rate": 9.617306709903826e-08, + "loss": 3.8807, + "step": 866000 + }, + { + "epoch": 9.62, + "learning_rate": 9.622859427403771e-08, + "loss": 3.874, + "step": 866500 + }, + { + "epoch": 9.63, + "learning_rate": 9.628412144903716e-08, + "loss": 3.8741, + "step": 867000 + }, + { + "epoch": 9.63, + "learning_rate": 9.633964862403659e-08, + "loss": 3.8721, + "step": 867500 + }, + { + "epoch": 9.64, + "learning_rate": 9.639517579903605e-08, + "loss": 3.8652, + "step": 868000 + }, + { + "epoch": 9.65, + "learning_rate": 9.645070297403549e-08, + "loss": 3.8871, + "step": 868500 + }, + { + "epoch": 9.65, + "learning_rate": 9.650623014903492e-08, + "loss": 3.8646, + "step": 869000 + }, + { + "epoch": 9.66, + "learning_rate": 9.656175732403438e-08, + "loss": 3.8755, + "step": 869500 + }, + { + "epoch": 9.66, + "learning_rate": 9.661728449903382e-08, + "loss": 3.9076, + "step": 870000 + }, + { + "epoch": 9.67, + "learning_rate": 9.667281167403327e-08, + "loss": 3.8841, + "step": 870500 + }, + { + "epoch": 9.67, + "learning_rate": 9.672833884903271e-08, + "loss": 3.8814, + "step": 871000 + }, + { + "epoch": 9.68, + "learning_rate": 9.678386602403216e-08, + "loss": 3.8679, + "step": 871500 + }, + { + "epoch": 9.68, + "learning_rate": 9.68393931990316e-08, + "loss": 3.8467, + "step": 872000 + }, + { + "epoch": 9.69, + "learning_rate": 9.689492037403104e-08, + "loss": 3.8771, + "step": 872500 + }, + { + "epoch": 9.7, + "learning_rate": 9.695044754903049e-08, + "loss": 3.8775, + "step": 873000 + }, + { + "epoch": 9.7, + "learning_rate": 9.700597472402993e-08, + "loss": 3.876, + "step": 873500 + }, + { + "epoch": 9.71, + "learning_rate": 9.706150189902939e-08, + "loss": 3.8744, + "step": 874000 + }, + { + "epoch": 9.71, + "learning_rate": 9.711702907402882e-08, + "loss": 3.8832, + "step": 874500 + }, + { + "epoch": 9.72, + "learning_rate": 9.717255624902827e-08, + "loss": 3.8682, + "step": 875000 + }, + { + "epoch": 9.72, + "learning_rate": 9.722808342402772e-08, + "loss": 3.8732, + "step": 875500 + }, + { + "epoch": 9.73, + "learning_rate": 9.728361059902715e-08, + "loss": 3.8604, + "step": 876000 + }, + { + "epoch": 9.73, + "learning_rate": 9.73391377740266e-08, + "loss": 3.8781, + "step": 876500 + }, + { + "epoch": 9.74, + "learning_rate": 9.739466494902606e-08, + "loss": 3.8699, + "step": 877000 + }, + { + "epoch": 9.75, + "learning_rate": 9.74501921240255e-08, + "loss": 3.8748, + "step": 877500 + }, + { + "epoch": 9.75, + "learning_rate": 9.750571929902493e-08, + "loss": 3.8769, + "step": 878000 + }, + { + "epoch": 9.76, + "learning_rate": 9.756124647402439e-08, + "loss": 3.8775, + "step": 878500 + }, + { + "epoch": 9.76, + "learning_rate": 9.761677364902383e-08, + "loss": 3.8547, + "step": 879000 + }, + { + "epoch": 9.77, + "learning_rate": 9.767230082402326e-08, + "loss": 3.8644, + "step": 879500 + }, + { + "epoch": 9.77, + "learning_rate": 9.772782799902272e-08, + "loss": 3.8892, + "step": 880000 + }, + { + "epoch": 9.78, + "learning_rate": 9.778335517402216e-08, + "loss": 3.8671, + "step": 880500 + }, + { + "epoch": 9.78, + "learning_rate": 9.783888234902161e-08, + "loss": 3.8643, + "step": 881000 + }, + { + "epoch": 9.79, + "learning_rate": 9.789440952402105e-08, + "loss": 3.8543, + "step": 881500 + }, + { + "epoch": 9.79, + "learning_rate": 9.79499366990205e-08, + "loss": 3.8538, + "step": 882000 + }, + { + "epoch": 9.8, + "learning_rate": 9.800546387401994e-08, + "loss": 3.8693, + "step": 882500 + }, + { + "epoch": 9.81, + "learning_rate": 9.806099104901939e-08, + "loss": 3.8696, + "step": 883000 + }, + { + "epoch": 9.81, + "learning_rate": 9.811651822401883e-08, + "loss": 3.8674, + "step": 883500 + }, + { + "epoch": 9.82, + "learning_rate": 9.817204539901827e-08, + "loss": 3.8574, + "step": 884000 + }, + { + "epoch": 9.82, + "learning_rate": 9.822757257401773e-08, + "loss": 3.8854, + "step": 884500 + }, + { + "epoch": 9.83, + "learning_rate": 9.828309974901716e-08, + "loss": 3.8651, + "step": 885000 + }, + { + "epoch": 9.83, + "learning_rate": 9.83386269240166e-08, + "loss": 3.8884, + "step": 885500 + }, + { + "epoch": 9.84, + "learning_rate": 9.839415409901606e-08, + "loss": 3.8805, + "step": 886000 + }, + { + "epoch": 9.84, + "learning_rate": 9.84496812740155e-08, + "loss": 3.8592, + "step": 886500 + }, + { + "epoch": 9.85, + "learning_rate": 9.850520844901494e-08, + "loss": 3.8688, + "step": 887000 + }, + { + "epoch": 9.86, + "learning_rate": 9.85607356240144e-08, + "loss": 3.8754, + "step": 887500 + }, + { + "epoch": 9.86, + "learning_rate": 9.861626279901383e-08, + "loss": 3.8672, + "step": 888000 + }, + { + "epoch": 9.87, + "learning_rate": 9.867178997401327e-08, + "loss": 3.8811, + "step": 888500 + }, + { + "epoch": 9.87, + "learning_rate": 9.872731714901273e-08, + "loss": 3.8959, + "step": 889000 + }, + { + "epoch": 9.88, + "learning_rate": 9.878284432401217e-08, + "loss": 3.8589, + "step": 889500 + }, + { + "epoch": 9.88, + "learning_rate": 9.88383714990116e-08, + "loss": 3.8649, + "step": 890000 + }, + { + "epoch": 9.89, + "learning_rate": 9.889389867401106e-08, + "loss": 3.8791, + "step": 890500 + }, + { + "epoch": 9.89, + "learning_rate": 9.89494258490105e-08, + "loss": 3.8653, + "step": 891000 + }, + { + "epoch": 9.9, + "learning_rate": 9.900495302400994e-08, + "loss": 3.8719, + "step": 891500 + }, + { + "epoch": 9.91, + "learning_rate": 9.90604801990094e-08, + "loss": 3.869, + "step": 892000 + }, + { + "epoch": 9.91, + "learning_rate": 9.911600737400884e-08, + "loss": 3.8764, + "step": 892500 + }, + { + "epoch": 9.92, + "learning_rate": 9.917153454900828e-08, + "loss": 3.856, + "step": 893000 + }, + { + "epoch": 9.92, + "learning_rate": 9.922706172400773e-08, + "loss": 3.8704, + "step": 893500 + }, + { + "epoch": 9.93, + "learning_rate": 9.928258889900717e-08, + "loss": 3.8762, + "step": 894000 + }, + { + "epoch": 9.93, + "learning_rate": 9.933811607400662e-08, + "loss": 3.8826, + "step": 894500 + }, + { + "epoch": 9.94, + "learning_rate": 9.939364324900606e-08, + "loss": 3.872, + "step": 895000 + }, + { + "epoch": 9.94, + "learning_rate": 9.94491704240055e-08, + "loss": 3.8834, + "step": 895500 + }, + { + "epoch": 9.95, + "learning_rate": 9.950469759900495e-08, + "loss": 3.8752, + "step": 896000 + }, + { + "epoch": 9.96, + "learning_rate": 9.956022477400439e-08, + "loss": 3.8826, + "step": 896500 + }, + { + "epoch": 9.96, + "learning_rate": 9.961575194900384e-08, + "loss": 3.8484, + "step": 897000 + }, + { + "epoch": 9.97, + "learning_rate": 9.967127912400328e-08, + "loss": 3.8693, + "step": 897500 + }, + { + "epoch": 9.97, + "learning_rate": 9.972680629900272e-08, + "loss": 3.8736, + "step": 898000 + }, + { + "epoch": 9.98, + "learning_rate": 9.978233347400217e-08, + "loss": 3.8756, + "step": 898500 + }, + { + "epoch": 9.98, + "learning_rate": 9.983786064900161e-08, + "loss": 3.8857, + "step": 899000 + }, + { + "epoch": 9.99, + "learning_rate": 9.989338782400106e-08, + "loss": 3.8839, + "step": 899500 + }, + { + "epoch": 9.99, + "learning_rate": 9.994891499900051e-08, + "loss": 3.8706, + "step": 900000 + }, + { + "epoch": 10.0, + "eval_loss": 3.904014825820923, + "eval_runtime": 6.3112, + "eval_samples_per_second": 246.227, + "step": 900460 + }, + { + "epoch": 10.0, + "learning_rate": 9.99988894565e-08, + "loss": 3.8669, + "step": 900500 + }, + { + "epoch": 10.01, + "learning_rate": 9.998500766275015e-08, + "loss": 3.859, + "step": 901000 + }, + { + "epoch": 10.01, + "learning_rate": 9.997112586900029e-08, + "loss": 3.8591, + "step": 901500 + }, + { + "epoch": 10.02, + "learning_rate": 9.995724407525042e-08, + "loss": 3.8883, + "step": 902000 + }, + { + "epoch": 10.02, + "learning_rate": 9.994336228150056e-08, + "loss": 3.8605, + "step": 902500 + }, + { + "epoch": 10.03, + "learning_rate": 9.99294804877507e-08, + "loss": 3.8563, + "step": 903000 + }, + { + "epoch": 10.03, + "learning_rate": 9.991559869400083e-08, + "loss": 3.8597, + "step": 903500 + }, + { + "epoch": 10.04, + "learning_rate": 9.990171690025097e-08, + "loss": 3.8487, + "step": 904000 + }, + { + "epoch": 10.04, + "learning_rate": 9.988783510650112e-08, + "loss": 3.8893, + "step": 904500 + }, + { + "epoch": 10.05, + "learning_rate": 9.987395331275126e-08, + "loss": 3.8711, + "step": 905000 + }, + { + "epoch": 10.06, + "learning_rate": 9.98600715190014e-08, + "loss": 3.8548, + "step": 905500 + }, + { + "epoch": 10.06, + "learning_rate": 9.984618972525153e-08, + "loss": 3.8593, + "step": 906000 + }, + { + "epoch": 10.07, + "learning_rate": 9.983230793150167e-08, + "loss": 3.8685, + "step": 906500 + }, + { + "epoch": 10.07, + "learning_rate": 9.981842613775181e-08, + "loss": 3.8532, + "step": 907000 + }, + { + "epoch": 10.08, + "learning_rate": 9.980454434400196e-08, + "loss": 3.8611, + "step": 907500 + }, + { + "epoch": 10.08, + "learning_rate": 9.979066255025209e-08, + "loss": 3.8682, + "step": 908000 + }, + { + "epoch": 10.09, + "learning_rate": 9.977678075650223e-08, + "loss": 3.8595, + "step": 908500 + }, + { + "epoch": 10.09, + "learning_rate": 9.976289896275236e-08, + "loss": 3.8471, + "step": 909000 + }, + { + "epoch": 10.1, + "learning_rate": 9.97490171690025e-08, + "loss": 3.8813, + "step": 909500 + }, + { + "epoch": 10.11, + "learning_rate": 9.973513537525264e-08, + "loss": 3.8517, + "step": 910000 + }, + { + "epoch": 10.11, + "learning_rate": 9.972125358150278e-08, + "loss": 3.857, + "step": 910500 + }, + { + "epoch": 10.12, + "learning_rate": 9.970737178775293e-08, + "loss": 3.889, + "step": 911000 + }, + { + "epoch": 10.12, + "learning_rate": 9.969348999400307e-08, + "loss": 3.8742, + "step": 911500 + }, + { + "epoch": 10.13, + "learning_rate": 9.96796082002532e-08, + "loss": 3.8673, + "step": 912000 + }, + { + "epoch": 10.13, + "learning_rate": 9.966572640650334e-08, + "loss": 3.8758, + "step": 912500 + }, + { + "epoch": 10.14, + "learning_rate": 9.965184461275348e-08, + "loss": 3.8907, + "step": 913000 + }, + { + "epoch": 10.14, + "learning_rate": 9.963796281900362e-08, + "loss": 3.8586, + "step": 913500 + }, + { + "epoch": 10.15, + "learning_rate": 9.962408102525375e-08, + "loss": 3.8576, + "step": 914000 + }, + { + "epoch": 10.16, + "learning_rate": 9.961019923150388e-08, + "loss": 3.8707, + "step": 914500 + }, + { + "epoch": 10.16, + "learning_rate": 9.959631743775402e-08, + "loss": 3.8731, + "step": 915000 + }, + { + "epoch": 10.17, + "learning_rate": 9.958243564400417e-08, + "loss": 3.8753, + "step": 915500 + }, + { + "epoch": 10.17, + "learning_rate": 9.956855385025431e-08, + "loss": 3.8705, + "step": 916000 + }, + { + "epoch": 10.18, + "learning_rate": 9.955467205650445e-08, + "loss": 3.8709, + "step": 916500 + }, + { + "epoch": 10.18, + "learning_rate": 9.95407902627546e-08, + "loss": 3.8832, + "step": 917000 + }, + { + "epoch": 10.19, + "learning_rate": 9.952690846900472e-08, + "loss": 3.8674, + "step": 917500 + }, + { + "epoch": 10.19, + "learning_rate": 9.951302667525486e-08, + "loss": 3.8332, + "step": 918000 + }, + { + "epoch": 10.2, + "learning_rate": 9.949914488150501e-08, + "loss": 3.8863, + "step": 918500 + }, + { + "epoch": 10.21, + "learning_rate": 9.948526308775515e-08, + "loss": 3.8694, + "step": 919000 + }, + { + "epoch": 10.21, + "learning_rate": 9.947138129400529e-08, + "loss": 3.8543, + "step": 919500 + }, + { + "epoch": 10.22, + "learning_rate": 9.945749950025542e-08, + "loss": 3.8511, + "step": 920000 + }, + { + "epoch": 10.22, + "learning_rate": 9.944361770650555e-08, + "loss": 3.8791, + "step": 920500 + }, + { + "epoch": 10.23, + "learning_rate": 9.942973591275569e-08, + "loss": 3.8505, + "step": 921000 + }, + { + "epoch": 10.23, + "learning_rate": 9.941585411900583e-08, + "loss": 3.8666, + "step": 921500 + }, + { + "epoch": 10.24, + "learning_rate": 9.940197232525598e-08, + "loss": 3.8796, + "step": 922000 + }, + { + "epoch": 10.24, + "learning_rate": 9.938809053150612e-08, + "loss": 3.8867, + "step": 922500 + }, + { + "epoch": 10.25, + "learning_rate": 9.937420873775625e-08, + "loss": 3.8659, + "step": 923000 + }, + { + "epoch": 10.26, + "learning_rate": 9.936032694400639e-08, + "loss": 3.8811, + "step": 923500 + }, + { + "epoch": 10.26, + "learning_rate": 9.934644515025653e-08, + "loss": 3.849, + "step": 924000 + }, + { + "epoch": 10.27, + "learning_rate": 9.933256335650667e-08, + "loss": 3.8673, + "step": 924500 + }, + { + "epoch": 10.27, + "learning_rate": 9.931868156275682e-08, + "loss": 3.8738, + "step": 925000 + }, + { + "epoch": 10.28, + "learning_rate": 9.930479976900696e-08, + "loss": 3.8682, + "step": 925500 + }, + { + "epoch": 10.28, + "learning_rate": 9.929091797525709e-08, + "loss": 3.8537, + "step": 926000 + }, + { + "epoch": 10.29, + "learning_rate": 9.927703618150722e-08, + "loss": 3.865, + "step": 926500 + }, + { + "epoch": 10.29, + "learning_rate": 9.926315438775736e-08, + "loss": 3.8733, + "step": 927000 + }, + { + "epoch": 10.3, + "learning_rate": 9.92492725940075e-08, + "loss": 3.8726, + "step": 927500 + }, + { + "epoch": 10.31, + "learning_rate": 9.923539080025764e-08, + "loss": 3.8549, + "step": 928000 + }, + { + "epoch": 10.31, + "learning_rate": 9.922150900650777e-08, + "loss": 3.8419, + "step": 928500 + }, + { + "epoch": 10.32, + "learning_rate": 9.920762721275792e-08, + "loss": 3.8696, + "step": 929000 + }, + { + "epoch": 10.32, + "learning_rate": 9.919374541900806e-08, + "loss": 3.8543, + "step": 929500 + }, + { + "epoch": 10.33, + "learning_rate": 9.91798636252582e-08, + "loss": 3.8731, + "step": 930000 + }, + { + "epoch": 10.33, + "learning_rate": 9.916598183150834e-08, + "loss": 3.8611, + "step": 930500 + }, + { + "epoch": 10.34, + "learning_rate": 9.915210003775848e-08, + "loss": 3.8893, + "step": 931000 + }, + { + "epoch": 10.34, + "learning_rate": 9.913821824400861e-08, + "loss": 3.8672, + "step": 931500 + }, + { + "epoch": 10.35, + "learning_rate": 9.912433645025876e-08, + "loss": 3.8572, + "step": 932000 + }, + { + "epoch": 10.36, + "learning_rate": 9.911045465650888e-08, + "loss": 3.8588, + "step": 932500 + }, + { + "epoch": 10.36, + "learning_rate": 9.909657286275903e-08, + "loss": 3.8452, + "step": 933000 + }, + { + "epoch": 10.37, + "learning_rate": 9.908269106900917e-08, + "loss": 3.8802, + "step": 933500 + }, + { + "epoch": 10.37, + "learning_rate": 9.90688092752593e-08, + "loss": 3.8591, + "step": 934000 + }, + { + "epoch": 10.38, + "learning_rate": 9.905492748150944e-08, + "loss": 3.8646, + "step": 934500 + }, + { + "epoch": 10.38, + "learning_rate": 9.904104568775958e-08, + "loss": 3.8622, + "step": 935000 + }, + { + "epoch": 10.39, + "learning_rate": 9.902716389400972e-08, + "loss": 3.8741, + "step": 935500 + }, + { + "epoch": 10.39, + "learning_rate": 9.901328210025987e-08, + "loss": 3.8691, + "step": 936000 + }, + { + "epoch": 10.4, + "learning_rate": 9.899940030651001e-08, + "loss": 3.8632, + "step": 936500 + }, + { + "epoch": 10.41, + "learning_rate": 9.898551851276014e-08, + "loss": 3.8659, + "step": 937000 + }, + { + "epoch": 10.41, + "learning_rate": 9.897163671901028e-08, + "loss": 3.8764, + "step": 937500 + }, + { + "epoch": 10.42, + "learning_rate": 9.895775492526042e-08, + "loss": 3.8729, + "step": 938000 + }, + { + "epoch": 10.42, + "learning_rate": 9.894387313151055e-08, + "loss": 3.8833, + "step": 938500 + }, + { + "epoch": 10.43, + "learning_rate": 9.89299913377607e-08, + "loss": 3.8762, + "step": 939000 + }, + { + "epoch": 10.43, + "learning_rate": 9.891610954401084e-08, + "loss": 3.8472, + "step": 939500 + }, + { + "epoch": 10.44, + "learning_rate": 9.890222775026097e-08, + "loss": 3.8667, + "step": 940000 + }, + { + "epoch": 10.44, + "learning_rate": 9.888834595651111e-08, + "loss": 3.8745, + "step": 940500 + }, + { + "epoch": 10.45, + "learning_rate": 9.887446416276125e-08, + "loss": 3.8837, + "step": 941000 + }, + { + "epoch": 10.46, + "learning_rate": 9.886058236901139e-08, + "loss": 3.8511, + "step": 941500 + }, + { + "epoch": 10.46, + "learning_rate": 9.884670057526153e-08, + "loss": 3.8796, + "step": 942000 + }, + { + "epoch": 10.47, + "learning_rate": 9.883281878151166e-08, + "loss": 3.8682, + "step": 942500 + }, + { + "epoch": 10.47, + "learning_rate": 9.88189369877618e-08, + "loss": 3.8515, + "step": 943000 + }, + { + "epoch": 10.48, + "learning_rate": 9.880505519401195e-08, + "loss": 3.8572, + "step": 943500 + }, + { + "epoch": 10.48, + "learning_rate": 9.879117340026209e-08, + "loss": 3.8599, + "step": 944000 + }, + { + "epoch": 10.49, + "learning_rate": 9.877729160651222e-08, + "loss": 3.8675, + "step": 944500 + }, + { + "epoch": 10.49, + "learning_rate": 9.876340981276236e-08, + "loss": 3.8707, + "step": 945000 + }, + { + "epoch": 10.5, + "learning_rate": 9.874952801901249e-08, + "loss": 3.8825, + "step": 945500 + }, + { + "epoch": 10.51, + "learning_rate": 9.873564622526263e-08, + "loss": 3.8788, + "step": 946000 + }, + { + "epoch": 10.51, + "learning_rate": 9.872176443151278e-08, + "loss": 3.8524, + "step": 946500 + }, + { + "epoch": 10.52, + "learning_rate": 9.870788263776292e-08, + "loss": 3.8666, + "step": 947000 + }, + { + "epoch": 10.52, + "learning_rate": 9.869400084401306e-08, + "loss": 3.8758, + "step": 947500 + }, + { + "epoch": 10.53, + "learning_rate": 9.86801190502632e-08, + "loss": 3.8605, + "step": 948000 + }, + { + "epoch": 10.53, + "learning_rate": 9.866623725651333e-08, + "loss": 3.8542, + "step": 948500 + }, + { + "epoch": 10.54, + "learning_rate": 9.865235546276347e-08, + "loss": 3.8557, + "step": 949000 + }, + { + "epoch": 10.54, + "learning_rate": 9.863847366901362e-08, + "loss": 3.8531, + "step": 949500 + }, + { + "epoch": 10.55, + "learning_rate": 9.862459187526376e-08, + "loss": 3.8527, + "step": 950000 + }, + { + "epoch": 10.56, + "learning_rate": 9.861071008151389e-08, + "loss": 3.8588, + "step": 950500 + }, + { + "epoch": 10.56, + "learning_rate": 9.859682828776402e-08, + "loss": 3.8679, + "step": 951000 + }, + { + "epoch": 10.57, + "learning_rate": 9.858294649401416e-08, + "loss": 3.8518, + "step": 951500 + }, + { + "epoch": 10.57, + "learning_rate": 9.85690647002643e-08, + "loss": 3.8525, + "step": 952000 + }, + { + "epoch": 10.58, + "learning_rate": 9.855518290651444e-08, + "loss": 3.8524, + "step": 952500 + }, + { + "epoch": 10.58, + "learning_rate": 9.854130111276459e-08, + "loss": 3.8605, + "step": 953000 + }, + { + "epoch": 10.59, + "learning_rate": 9.852741931901473e-08, + "loss": 3.8719, + "step": 953500 + }, + { + "epoch": 10.59, + "learning_rate": 9.851353752526486e-08, + "loss": 3.8659, + "step": 954000 + }, + { + "epoch": 10.6, + "learning_rate": 9.8499655731515e-08, + "loss": 3.8484, + "step": 954500 + }, + { + "epoch": 10.61, + "learning_rate": 9.848577393776514e-08, + "loss": 3.871, + "step": 955000 + }, + { + "epoch": 10.61, + "learning_rate": 9.847189214401528e-08, + "loss": 3.8532, + "step": 955500 + }, + { + "epoch": 10.62, + "learning_rate": 9.845801035026543e-08, + "loss": 3.8531, + "step": 956000 + }, + { + "epoch": 10.62, + "learning_rate": 9.844412855651555e-08, + "loss": 3.8591, + "step": 956500 + }, + { + "epoch": 10.63, + "learning_rate": 9.843024676276568e-08, + "loss": 3.8737, + "step": 957000 + }, + { + "epoch": 10.63, + "learning_rate": 9.841636496901583e-08, + "loss": 3.8497, + "step": 957500 + }, + { + "epoch": 10.64, + "learning_rate": 9.840248317526597e-08, + "loss": 3.8671, + "step": 958000 + }, + { + "epoch": 10.64, + "learning_rate": 9.838860138151611e-08, + "loss": 3.8644, + "step": 958500 + }, + { + "epoch": 10.65, + "learning_rate": 9.837471958776625e-08, + "loss": 3.8626, + "step": 959000 + }, + { + "epoch": 10.66, + "learning_rate": 9.836083779401638e-08, + "loss": 3.8502, + "step": 959500 + }, + { + "epoch": 10.66, + "learning_rate": 9.834695600026652e-08, + "loss": 3.8785, + "step": 960000 + }, + { + "epoch": 10.67, + "learning_rate": 9.833307420651667e-08, + "loss": 3.8665, + "step": 960500 + }, + { + "epoch": 10.67, + "learning_rate": 9.831919241276681e-08, + "loss": 3.853, + "step": 961000 + }, + { + "epoch": 10.68, + "learning_rate": 9.830531061901695e-08, + "loss": 3.8561, + "step": 961500 + }, + { + "epoch": 10.68, + "learning_rate": 9.829142882526709e-08, + "loss": 3.8521, + "step": 962000 + }, + { + "epoch": 10.69, + "learning_rate": 9.827754703151722e-08, + "loss": 3.8828, + "step": 962500 + }, + { + "epoch": 10.69, + "learning_rate": 9.826366523776735e-08, + "loss": 3.8706, + "step": 963000 + }, + { + "epoch": 10.7, + "learning_rate": 9.824978344401749e-08, + "loss": 3.8622, + "step": 963500 + }, + { + "epoch": 10.71, + "learning_rate": 9.823590165026764e-08, + "loss": 3.8459, + "step": 964000 + }, + { + "epoch": 10.71, + "learning_rate": 9.822201985651778e-08, + "loss": 3.8465, + "step": 964500 + }, + { + "epoch": 10.72, + "learning_rate": 9.820813806276791e-08, + "loss": 3.8832, + "step": 965000 + }, + { + "epoch": 10.72, + "learning_rate": 9.819425626901805e-08, + "loss": 3.8757, + "step": 965500 + }, + { + "epoch": 10.73, + "learning_rate": 9.818037447526819e-08, + "loss": 3.8731, + "step": 966000 + }, + { + "epoch": 10.73, + "learning_rate": 9.816649268151833e-08, + "loss": 3.8566, + "step": 966500 + }, + { + "epoch": 10.74, + "learning_rate": 9.815261088776848e-08, + "loss": 3.8529, + "step": 967000 + }, + { + "epoch": 10.74, + "learning_rate": 9.813872909401862e-08, + "loss": 3.8567, + "step": 967500 + }, + { + "epoch": 10.75, + "learning_rate": 9.812484730026875e-08, + "loss": 3.8475, + "step": 968000 + }, + { + "epoch": 10.76, + "learning_rate": 9.811096550651889e-08, + "loss": 3.8522, + "step": 968500 + }, + { + "epoch": 10.76, + "learning_rate": 9.809708371276902e-08, + "loss": 3.8692, + "step": 969000 + }, + { + "epoch": 10.77, + "learning_rate": 9.808320191901916e-08, + "loss": 3.8667, + "step": 969500 + }, + { + "epoch": 10.77, + "learning_rate": 9.80693201252693e-08, + "loss": 3.869, + "step": 970000 + }, + { + "epoch": 10.78, + "learning_rate": 9.805543833151945e-08, + "loss": 3.8616, + "step": 970500 + }, + { + "epoch": 10.78, + "learning_rate": 9.804155653776957e-08, + "loss": 3.8601, + "step": 971000 + }, + { + "epoch": 10.79, + "learning_rate": 9.802767474401972e-08, + "loss": 3.867, + "step": 971500 + }, + { + "epoch": 10.79, + "learning_rate": 9.801379295026986e-08, + "loss": 3.8644, + "step": 972000 + }, + { + "epoch": 10.8, + "learning_rate": 9.799991115652e-08, + "loss": 3.8434, + "step": 972500 + }, + { + "epoch": 10.81, + "learning_rate": 9.798602936277014e-08, + "loss": 3.8574, + "step": 973000 + }, + { + "epoch": 10.81, + "learning_rate": 9.797214756902027e-08, + "loss": 3.8648, + "step": 973500 + }, + { + "epoch": 10.82, + "learning_rate": 9.795826577527041e-08, + "loss": 3.846, + "step": 974000 + }, + { + "epoch": 10.82, + "learning_rate": 9.794438398152056e-08, + "loss": 3.8593, + "step": 974500 + }, + { + "epoch": 10.83, + "learning_rate": 9.793050218777069e-08, + "loss": 3.8662, + "step": 975000 + }, + { + "epoch": 10.83, + "learning_rate": 9.791662039402083e-08, + "loss": 3.8412, + "step": 975500 + }, + { + "epoch": 10.84, + "learning_rate": 9.790273860027097e-08, + "loss": 3.8641, + "step": 976000 + }, + { + "epoch": 10.84, + "learning_rate": 9.78888568065211e-08, + "loss": 3.8525, + "step": 976500 + }, + { + "epoch": 10.85, + "learning_rate": 9.787497501277124e-08, + "loss": 3.8524, + "step": 977000 + }, + { + "epoch": 10.86, + "learning_rate": 9.786109321902138e-08, + "loss": 3.8351, + "step": 977500 + }, + { + "epoch": 10.86, + "learning_rate": 9.784721142527153e-08, + "loss": 3.8825, + "step": 978000 + }, + { + "epoch": 10.87, + "learning_rate": 9.783332963152167e-08, + "loss": 3.8685, + "step": 978500 + }, + { + "epoch": 10.87, + "learning_rate": 9.78194478377718e-08, + "loss": 3.8586, + "step": 979000 + }, + { + "epoch": 10.88, + "learning_rate": 9.780556604402194e-08, + "loss": 3.8522, + "step": 979500 + }, + { + "epoch": 10.88, + "learning_rate": 9.779168425027208e-08, + "loss": 3.8554, + "step": 980000 + }, + { + "epoch": 10.89, + "learning_rate": 9.777780245652222e-08, + "loss": 3.8516, + "step": 980500 + }, + { + "epoch": 10.89, + "learning_rate": 9.776392066277235e-08, + "loss": 3.8519, + "step": 981000 + }, + { + "epoch": 10.9, + "learning_rate": 9.77500388690225e-08, + "loss": 3.8716, + "step": 981500 + }, + { + "epoch": 10.91, + "learning_rate": 9.773615707527262e-08, + "loss": 3.8573, + "step": 982000 + }, + { + "epoch": 10.91, + "learning_rate": 9.772227528152277e-08, + "loss": 3.8609, + "step": 982500 + }, + { + "epoch": 10.92, + "learning_rate": 9.770839348777291e-08, + "loss": 3.8734, + "step": 983000 + }, + { + "epoch": 10.92, + "learning_rate": 9.769451169402305e-08, + "loss": 3.8622, + "step": 983500 + }, + { + "epoch": 10.93, + "learning_rate": 9.76806299002732e-08, + "loss": 3.8566, + "step": 984000 + }, + { + "epoch": 10.93, + "learning_rate": 9.766674810652334e-08, + "loss": 3.8702, + "step": 984500 + }, + { + "epoch": 10.94, + "learning_rate": 9.765286631277347e-08, + "loss": 3.86, + "step": 985000 + }, + { + "epoch": 10.94, + "learning_rate": 9.763898451902361e-08, + "loss": 3.8647, + "step": 985500 + }, + { + "epoch": 10.95, + "learning_rate": 9.762510272527375e-08, + "loss": 3.8523, + "step": 986000 + }, + { + "epoch": 10.96, + "learning_rate": 9.761122093152389e-08, + "loss": 3.8478, + "step": 986500 + }, + { + "epoch": 10.96, + "learning_rate": 9.759733913777402e-08, + "loss": 3.8731, + "step": 987000 + }, + { + "epoch": 10.97, + "learning_rate": 9.758345734402415e-08, + "loss": 3.8746, + "step": 987500 + }, + { + "epoch": 10.97, + "learning_rate": 9.756957555027429e-08, + "loss": 3.8547, + "step": 988000 + }, + { + "epoch": 10.98, + "learning_rate": 9.755569375652443e-08, + "loss": 3.8569, + "step": 988500 + }, + { + "epoch": 10.98, + "learning_rate": 9.754181196277458e-08, + "loss": 3.8595, + "step": 989000 + }, + { + "epoch": 10.99, + "learning_rate": 9.752793016902472e-08, + "loss": 3.8365, + "step": 989500 + }, + { + "epoch": 10.99, + "learning_rate": 9.751404837527486e-08, + "loss": 3.8444, + "step": 990000 + }, + { + "epoch": 11.0, + "learning_rate": 9.750016658152499e-08, + "loss": 3.8648, + "step": 990500 + }, + { + "epoch": 11.0, + "eval_loss": 3.8952994346618652, + "eval_runtime": 6.3144, + "eval_samples_per_second": 246.105, + "step": 990506 + }, + { + "epoch": 11.01, + "learning_rate": 9.748628478777513e-08, + "loss": 3.8663, + "step": 991000 + }, + { + "epoch": 11.01, + "learning_rate": 9.747240299402527e-08, + "loss": 3.8649, + "step": 991500 + }, + { + "epoch": 11.02, + "learning_rate": 9.745852120027542e-08, + "loss": 3.8657, + "step": 992000 + }, + { + "epoch": 11.02, + "learning_rate": 9.744463940652556e-08, + "loss": 3.853, + "step": 992500 + }, + { + "epoch": 11.03, + "learning_rate": 9.743075761277569e-08, + "loss": 3.8628, + "step": 993000 + }, + { + "epoch": 11.03, + "learning_rate": 9.741687581902582e-08, + "loss": 3.8512, + "step": 993500 + }, + { + "epoch": 11.04, + "learning_rate": 9.740299402527596e-08, + "loss": 3.8547, + "step": 994000 + }, + { + "epoch": 11.04, + "learning_rate": 9.73891122315261e-08, + "loss": 3.8546, + "step": 994500 + }, + { + "epoch": 11.05, + "learning_rate": 9.737523043777624e-08, + "loss": 3.867, + "step": 995000 + }, + { + "epoch": 11.06, + "learning_rate": 9.736134864402639e-08, + "loss": 3.8618, + "step": 995500 + }, + { + "epoch": 11.06, + "learning_rate": 9.734746685027652e-08, + "loss": 3.8536, + "step": 996000 + }, + { + "epoch": 11.07, + "learning_rate": 9.733358505652666e-08, + "loss": 3.8588, + "step": 996500 + }, + { + "epoch": 11.07, + "learning_rate": 9.73197032627768e-08, + "loss": 3.8499, + "step": 997000 + }, + { + "epoch": 11.08, + "learning_rate": 9.730582146902694e-08, + "loss": 3.8623, + "step": 997500 + }, + { + "epoch": 11.08, + "learning_rate": 9.729193967527708e-08, + "loss": 3.854, + "step": 998000 + }, + { + "epoch": 11.09, + "learning_rate": 9.727805788152723e-08, + "loss": 3.842, + "step": 998500 + }, + { + "epoch": 11.09, + "learning_rate": 9.726417608777736e-08, + "loss": 3.872, + "step": 999000 + }, + { + "epoch": 11.1, + "learning_rate": 9.725029429402748e-08, + "loss": 3.8529, + "step": 999500 + }, + { + "epoch": 11.11, + "learning_rate": 9.723641250027763e-08, + "loss": 3.8463, + "step": 1000000 + }, + { + "epoch": 11.11, + "learning_rate": 9.722253070652777e-08, + "loss": 3.8362, + "step": 1000500 + }, + { + "epoch": 11.12, + "learning_rate": 9.720864891277791e-08, + "loss": 3.8544, + "step": 1001000 + }, + { + "epoch": 11.12, + "learning_rate": 9.719476711902804e-08, + "loss": 3.8483, + "step": 1001500 + }, + { + "epoch": 11.13, + "learning_rate": 9.718088532527818e-08, + "loss": 3.8695, + "step": 1002000 + }, + { + "epoch": 11.13, + "learning_rate": 9.716700353152833e-08, + "loss": 3.8459, + "step": 1002500 + }, + { + "epoch": 11.14, + "learning_rate": 9.715312173777847e-08, + "loss": 3.8633, + "step": 1003000 + }, + { + "epoch": 11.14, + "learning_rate": 9.713923994402861e-08, + "loss": 3.8359, + "step": 1003500 + }, + { + "epoch": 11.15, + "learning_rate": 9.712535815027875e-08, + "loss": 3.844, + "step": 1004000 + }, + { + "epoch": 11.16, + "learning_rate": 9.711147635652888e-08, + "loss": 3.8747, + "step": 1004500 + }, + { + "epoch": 11.16, + "learning_rate": 9.709759456277902e-08, + "loss": 3.8569, + "step": 1005000 + }, + { + "epoch": 11.17, + "learning_rate": 9.708371276902915e-08, + "loss": 3.8704, + "step": 1005500 + }, + { + "epoch": 11.17, + "learning_rate": 9.70698309752793e-08, + "loss": 3.847, + "step": 1006000 + }, + { + "epoch": 11.18, + "learning_rate": 9.705594918152944e-08, + "loss": 3.8777, + "step": 1006500 + }, + { + "epoch": 11.18, + "learning_rate": 9.704206738777958e-08, + "loss": 3.8748, + "step": 1007000 + }, + { + "epoch": 11.19, + "learning_rate": 9.702818559402971e-08, + "loss": 3.8703, + "step": 1007500 + }, + { + "epoch": 11.19, + "learning_rate": 9.701430380027985e-08, + "loss": 3.863, + "step": 1008000 + }, + { + "epoch": 11.2, + "learning_rate": 9.700042200652999e-08, + "loss": 3.8448, + "step": 1008500 + }, + { + "epoch": 11.21, + "learning_rate": 9.698654021278013e-08, + "loss": 3.8467, + "step": 1009000 + }, + { + "epoch": 11.21, + "learning_rate": 9.697265841903028e-08, + "loss": 3.8423, + "step": 1009500 + }, + { + "epoch": 11.22, + "learning_rate": 9.69587766252804e-08, + "loss": 3.8522, + "step": 1010000 + }, + { + "epoch": 11.22, + "learning_rate": 9.694489483153055e-08, + "loss": 3.8648, + "step": 1010500 + }, + { + "epoch": 11.23, + "learning_rate": 9.693101303778069e-08, + "loss": 3.8497, + "step": 1011000 + }, + { + "epoch": 11.23, + "learning_rate": 9.691713124403082e-08, + "loss": 3.8511, + "step": 1011500 + }, + { + "epoch": 11.24, + "learning_rate": 9.690324945028096e-08, + "loss": 3.8487, + "step": 1012000 + }, + { + "epoch": 11.24, + "learning_rate": 9.68893676565311e-08, + "loss": 3.8518, + "step": 1012500 + }, + { + "epoch": 11.25, + "learning_rate": 9.687548586278123e-08, + "loss": 3.8421, + "step": 1013000 + }, + { + "epoch": 11.26, + "learning_rate": 9.686160406903138e-08, + "loss": 3.8607, + "step": 1013500 + }, + { + "epoch": 11.26, + "learning_rate": 9.684772227528152e-08, + "loss": 3.8521, + "step": 1014000 + }, + { + "epoch": 11.27, + "learning_rate": 9.683384048153166e-08, + "loss": 3.8516, + "step": 1014500 + }, + { + "epoch": 11.27, + "learning_rate": 9.68199586877818e-08, + "loss": 3.8581, + "step": 1015000 + }, + { + "epoch": 11.28, + "learning_rate": 9.680607689403193e-08, + "loss": 3.8553, + "step": 1015500 + }, + { + "epoch": 11.28, + "learning_rate": 9.679219510028207e-08, + "loss": 3.8452, + "step": 1016000 + }, + { + "epoch": 11.29, + "learning_rate": 9.677831330653222e-08, + "loss": 3.8642, + "step": 1016500 + }, + { + "epoch": 11.29, + "learning_rate": 9.676443151278236e-08, + "loss": 3.8361, + "step": 1017000 + }, + { + "epoch": 11.3, + "learning_rate": 9.675054971903249e-08, + "loss": 3.8413, + "step": 1017500 + }, + { + "epoch": 11.31, + "learning_rate": 9.673666792528263e-08, + "loss": 3.8569, + "step": 1018000 + }, + { + "epoch": 11.31, + "learning_rate": 9.672278613153276e-08, + "loss": 3.8511, + "step": 1018500 + }, + { + "epoch": 11.32, + "learning_rate": 9.67089043377829e-08, + "loss": 3.837, + "step": 1019000 + }, + { + "epoch": 11.32, + "learning_rate": 9.669502254403304e-08, + "loss": 3.8386, + "step": 1019500 + }, + { + "epoch": 11.33, + "learning_rate": 9.668114075028319e-08, + "loss": 3.837, + "step": 1020000 + }, + { + "epoch": 11.33, + "learning_rate": 9.666725895653333e-08, + "loss": 3.8515, + "step": 1020500 + }, + { + "epoch": 11.34, + "learning_rate": 9.665337716278347e-08, + "loss": 3.8484, + "step": 1021000 + }, + { + "epoch": 11.34, + "learning_rate": 9.66394953690336e-08, + "loss": 3.85, + "step": 1021500 + }, + { + "epoch": 11.35, + "learning_rate": 9.662561357528374e-08, + "loss": 3.8509, + "step": 1022000 + }, + { + "epoch": 11.36, + "learning_rate": 9.661173178153388e-08, + "loss": 3.8674, + "step": 1022500 + }, + { + "epoch": 11.36, + "learning_rate": 9.659784998778403e-08, + "loss": 3.8413, + "step": 1023000 + }, + { + "epoch": 11.37, + "learning_rate": 9.658396819403415e-08, + "loss": 3.8277, + "step": 1023500 + }, + { + "epoch": 11.37, + "learning_rate": 9.657008640028428e-08, + "loss": 3.8627, + "step": 1024000 + }, + { + "epoch": 11.38, + "learning_rate": 9.655620460653443e-08, + "loss": 3.8431, + "step": 1024500 + }, + { + "epoch": 11.38, + "learning_rate": 9.654232281278457e-08, + "loss": 3.8492, + "step": 1025000 + }, + { + "epoch": 11.39, + "learning_rate": 9.652844101903471e-08, + "loss": 3.8423, + "step": 1025500 + }, + { + "epoch": 11.39, + "learning_rate": 9.651455922528485e-08, + "loss": 3.8626, + "step": 1026000 + }, + { + "epoch": 11.4, + "learning_rate": 9.6500677431535e-08, + "loss": 3.8437, + "step": 1026500 + }, + { + "epoch": 11.41, + "learning_rate": 9.648679563778512e-08, + "loss": 3.864, + "step": 1027000 + }, + { + "epoch": 11.41, + "learning_rate": 9.647291384403527e-08, + "loss": 3.8621, + "step": 1027500 + }, + { + "epoch": 11.42, + "learning_rate": 9.645903205028541e-08, + "loss": 3.8585, + "step": 1028000 + }, + { + "epoch": 11.42, + "learning_rate": 9.644515025653555e-08, + "loss": 3.848, + "step": 1028500 + }, + { + "epoch": 11.43, + "learning_rate": 9.643126846278569e-08, + "loss": 3.8561, + "step": 1029000 + }, + { + "epoch": 11.43, + "learning_rate": 9.641738666903582e-08, + "loss": 3.8447, + "step": 1029500 + }, + { + "epoch": 11.44, + "learning_rate": 9.640350487528595e-08, + "loss": 3.8637, + "step": 1030000 + }, + { + "epoch": 11.44, + "learning_rate": 9.63896230815361e-08, + "loss": 3.845, + "step": 1030500 + }, + { + "epoch": 11.45, + "learning_rate": 9.637574128778624e-08, + "loss": 3.8456, + "step": 1031000 + }, + { + "epoch": 11.46, + "learning_rate": 9.636185949403638e-08, + "loss": 3.8689, + "step": 1031500 + }, + { + "epoch": 11.46, + "learning_rate": 9.634797770028652e-08, + "loss": 3.8708, + "step": 1032000 + }, + { + "epoch": 11.47, + "learning_rate": 9.633409590653665e-08, + "loss": 3.8386, + "step": 1032500 + }, + { + "epoch": 11.47, + "learning_rate": 9.632021411278679e-08, + "loss": 3.8458, + "step": 1033000 + }, + { + "epoch": 11.48, + "learning_rate": 9.630633231903693e-08, + "loss": 3.8464, + "step": 1033500 + }, + { + "epoch": 11.48, + "learning_rate": 9.629245052528708e-08, + "loss": 3.8691, + "step": 1034000 + }, + { + "epoch": 11.49, + "learning_rate": 9.627856873153722e-08, + "loss": 3.8595, + "step": 1034500 + }, + { + "epoch": 11.49, + "learning_rate": 9.626468693778735e-08, + "loss": 3.8135, + "step": 1035000 + }, + { + "epoch": 11.5, + "learning_rate": 9.625080514403749e-08, + "loss": 3.8604, + "step": 1035500 + }, + { + "epoch": 11.51, + "learning_rate": 9.623692335028762e-08, + "loss": 3.8485, + "step": 1036000 + }, + { + "epoch": 11.51, + "learning_rate": 9.622304155653776e-08, + "loss": 3.8416, + "step": 1036500 + }, + { + "epoch": 11.52, + "learning_rate": 9.62091597627879e-08, + "loss": 3.8547, + "step": 1037000 + }, + { + "epoch": 11.52, + "learning_rate": 9.619527796903805e-08, + "loss": 3.8578, + "step": 1037500 + }, + { + "epoch": 11.53, + "learning_rate": 9.618139617528817e-08, + "loss": 3.8577, + "step": 1038000 + }, + { + "epoch": 11.53, + "learning_rate": 9.616751438153832e-08, + "loss": 3.8423, + "step": 1038500 + }, + { + "epoch": 11.54, + "learning_rate": 9.615363258778846e-08, + "loss": 3.8397, + "step": 1039000 + }, + { + "epoch": 11.54, + "learning_rate": 9.61397507940386e-08, + "loss": 3.8531, + "step": 1039500 + }, + { + "epoch": 11.55, + "learning_rate": 9.612586900028874e-08, + "loss": 3.8626, + "step": 1040000 + }, + { + "epoch": 11.56, + "learning_rate": 9.611198720653889e-08, + "loss": 3.8656, + "step": 1040500 + }, + { + "epoch": 11.56, + "learning_rate": 9.609810541278901e-08, + "loss": 3.8743, + "step": 1041000 + }, + { + "epoch": 11.57, + "learning_rate": 9.608422361903916e-08, + "loss": 3.8585, + "step": 1041500 + }, + { + "epoch": 11.57, + "learning_rate": 9.607034182528929e-08, + "loss": 3.8282, + "step": 1042000 + }, + { + "epoch": 11.58, + "learning_rate": 9.605646003153943e-08, + "loss": 3.8469, + "step": 1042500 + }, + { + "epoch": 11.58, + "learning_rate": 9.604257823778957e-08, + "loss": 3.8483, + "step": 1043000 + }, + { + "epoch": 11.59, + "learning_rate": 9.602869644403971e-08, + "loss": 3.85, + "step": 1043500 + }, + { + "epoch": 11.59, + "learning_rate": 9.601481465028984e-08, + "loss": 3.854, + "step": 1044000 + }, + { + "epoch": 11.6, + "learning_rate": 9.600093285653998e-08, + "loss": 3.8255, + "step": 1044500 + }, + { + "epoch": 11.61, + "learning_rate": 9.598705106279013e-08, + "loss": 3.8294, + "step": 1045000 + }, + { + "epoch": 11.61, + "learning_rate": 9.597316926904027e-08, + "loss": 3.8557, + "step": 1045500 + }, + { + "epoch": 11.62, + "learning_rate": 9.595928747529041e-08, + "loss": 3.8637, + "step": 1046000 + }, + { + "epoch": 11.62, + "learning_rate": 9.594540568154054e-08, + "loss": 3.84, + "step": 1046500 + }, + { + "epoch": 11.63, + "learning_rate": 9.593152388779068e-08, + "loss": 3.8525, + "step": 1047000 + }, + { + "epoch": 11.63, + "learning_rate": 9.591764209404082e-08, + "loss": 3.8453, + "step": 1047500 + }, + { + "epoch": 11.64, + "learning_rate": 9.590376030029095e-08, + "loss": 3.8652, + "step": 1048000 + }, + { + "epoch": 11.64, + "learning_rate": 9.58898785065411e-08, + "loss": 3.8566, + "step": 1048500 + }, + { + "epoch": 11.65, + "learning_rate": 9.587599671279124e-08, + "loss": 3.8502, + "step": 1049000 + }, + { + "epoch": 11.66, + "learning_rate": 9.586211491904137e-08, + "loss": 3.852, + "step": 1049500 + }, + { + "epoch": 11.66, + "learning_rate": 9.584823312529151e-08, + "loss": 3.853, + "step": 1050000 + }, + { + "epoch": 11.67, + "learning_rate": 9.583435133154165e-08, + "loss": 3.8677, + "step": 1050500 + }, + { + "epoch": 11.67, + "learning_rate": 9.58204695377918e-08, + "loss": 3.847, + "step": 1051000 + }, + { + "epoch": 11.68, + "learning_rate": 9.580658774404194e-08, + "loss": 3.8677, + "step": 1051500 + }, + { + "epoch": 11.68, + "learning_rate": 9.579270595029207e-08, + "loss": 3.8543, + "step": 1052000 + }, + { + "epoch": 11.69, + "learning_rate": 9.577882415654221e-08, + "loss": 3.8649, + "step": 1052500 + }, + { + "epoch": 11.69, + "learning_rate": 9.576494236279235e-08, + "loss": 3.8709, + "step": 1053000 + }, + { + "epoch": 11.7, + "learning_rate": 9.575106056904249e-08, + "loss": 3.853, + "step": 1053500 + }, + { + "epoch": 11.71, + "learning_rate": 9.573717877529262e-08, + "loss": 3.8393, + "step": 1054000 + }, + { + "epoch": 11.71, + "learning_rate": 9.572329698154276e-08, + "loss": 3.8714, + "step": 1054500 + }, + { + "epoch": 11.72, + "learning_rate": 9.570941518779289e-08, + "loss": 3.8414, + "step": 1055000 + }, + { + "epoch": 11.72, + "learning_rate": 9.569553339404303e-08, + "loss": 3.8584, + "step": 1055500 + }, + { + "epoch": 11.73, + "learning_rate": 9.568165160029318e-08, + "loss": 3.8517, + "step": 1056000 + }, + { + "epoch": 11.73, + "learning_rate": 9.566776980654332e-08, + "loss": 3.835, + "step": 1056500 + }, + { + "epoch": 11.74, + "learning_rate": 9.565388801279346e-08, + "loss": 3.8427, + "step": 1057000 + }, + { + "epoch": 11.74, + "learning_rate": 9.56400062190436e-08, + "loss": 3.8585, + "step": 1057500 + }, + { + "epoch": 11.75, + "learning_rate": 9.562612442529373e-08, + "loss": 3.8566, + "step": 1058000 + }, + { + "epoch": 11.76, + "learning_rate": 9.561224263154387e-08, + "loss": 3.8435, + "step": 1058500 + }, + { + "epoch": 11.76, + "learning_rate": 9.559836083779402e-08, + "loss": 3.8718, + "step": 1059000 + }, + { + "epoch": 11.77, + "learning_rate": 9.558447904404416e-08, + "loss": 3.8519, + "step": 1059500 + }, + { + "epoch": 11.77, + "learning_rate": 9.557059725029429e-08, + "loss": 3.8283, + "step": 1060000 + }, + { + "epoch": 11.78, + "learning_rate": 9.555671545654442e-08, + "loss": 3.8451, + "step": 1060500 + }, + { + "epoch": 11.78, + "learning_rate": 9.554283366279456e-08, + "loss": 3.8543, + "step": 1061000 + }, + { + "epoch": 11.79, + "learning_rate": 9.55289518690447e-08, + "loss": 3.8585, + "step": 1061500 + }, + { + "epoch": 11.79, + "learning_rate": 9.551507007529484e-08, + "loss": 3.8563, + "step": 1062000 + }, + { + "epoch": 11.8, + "learning_rate": 9.550118828154499e-08, + "loss": 3.852, + "step": 1062500 + }, + { + "epoch": 11.81, + "learning_rate": 9.548730648779513e-08, + "loss": 3.854, + "step": 1063000 + }, + { + "epoch": 11.81, + "learning_rate": 9.547342469404526e-08, + "loss": 3.8477, + "step": 1063500 + }, + { + "epoch": 11.82, + "learning_rate": 9.54595429002954e-08, + "loss": 3.8522, + "step": 1064000 + }, + { + "epoch": 11.82, + "learning_rate": 9.544566110654554e-08, + "loss": 3.8577, + "step": 1064500 + }, + { + "epoch": 11.83, + "learning_rate": 9.543177931279568e-08, + "loss": 3.861, + "step": 1065000 + }, + { + "epoch": 11.83, + "learning_rate": 9.541789751904581e-08, + "loss": 3.8263, + "step": 1065500 + }, + { + "epoch": 11.84, + "learning_rate": 9.540401572529596e-08, + "loss": 3.8297, + "step": 1066000 + }, + { + "epoch": 11.84, + "learning_rate": 9.539013393154608e-08, + "loss": 3.8488, + "step": 1066500 + }, + { + "epoch": 11.85, + "learning_rate": 9.537625213779623e-08, + "loss": 3.8334, + "step": 1067000 + }, + { + "epoch": 11.86, + "learning_rate": 9.536237034404637e-08, + "loss": 3.8351, + "step": 1067500 + }, + { + "epoch": 11.86, + "learning_rate": 9.534848855029651e-08, + "loss": 3.8691, + "step": 1068000 + }, + { + "epoch": 11.87, + "learning_rate": 9.533460675654665e-08, + "loss": 3.8423, + "step": 1068500 + }, + { + "epoch": 11.87, + "learning_rate": 9.532072496279678e-08, + "loss": 3.8619, + "step": 1069000 + }, + { + "epoch": 11.88, + "learning_rate": 9.530684316904693e-08, + "loss": 3.8632, + "step": 1069500 + }, + { + "epoch": 11.88, + "learning_rate": 9.529296137529707e-08, + "loss": 3.8607, + "step": 1070000 + }, + { + "epoch": 11.89, + "learning_rate": 9.527907958154721e-08, + "loss": 3.8384, + "step": 1070500 + }, + { + "epoch": 11.89, + "learning_rate": 9.526519778779735e-08, + "loss": 3.8599, + "step": 1071000 + }, + { + "epoch": 11.9, + "learning_rate": 9.525131599404748e-08, + "loss": 3.838, + "step": 1071500 + }, + { + "epoch": 11.91, + "learning_rate": 9.523743420029762e-08, + "loss": 3.8405, + "step": 1072000 + }, + { + "epoch": 11.91, + "learning_rate": 9.522355240654775e-08, + "loss": 3.8304, + "step": 1072500 + }, + { + "epoch": 11.92, + "learning_rate": 9.52096706127979e-08, + "loss": 3.8313, + "step": 1073000 + }, + { + "epoch": 11.92, + "learning_rate": 9.519578881904804e-08, + "loss": 3.8397, + "step": 1073500 + }, + { + "epoch": 11.93, + "learning_rate": 9.518190702529818e-08, + "loss": 3.8488, + "step": 1074000 + }, + { + "epoch": 11.93, + "learning_rate": 9.516802523154831e-08, + "loss": 3.8463, + "step": 1074500 + }, + { + "epoch": 11.94, + "learning_rate": 9.515414343779845e-08, + "loss": 3.8501, + "step": 1075000 + }, + { + "epoch": 11.94, + "learning_rate": 9.514026164404859e-08, + "loss": 3.8566, + "step": 1075500 + }, + { + "epoch": 11.95, + "learning_rate": 9.512637985029874e-08, + "loss": 3.8396, + "step": 1076000 + }, + { + "epoch": 11.96, + "learning_rate": 9.511249805654888e-08, + "loss": 3.8417, + "step": 1076500 + }, + { + "epoch": 11.96, + "learning_rate": 9.509861626279902e-08, + "loss": 3.8362, + "step": 1077000 + }, + { + "epoch": 11.97, + "learning_rate": 9.508473446904915e-08, + "loss": 3.8552, + "step": 1077500 + }, + { + "epoch": 11.97, + "learning_rate": 9.507085267529929e-08, + "loss": 3.8504, + "step": 1078000 + }, + { + "epoch": 11.98, + "learning_rate": 9.505697088154942e-08, + "loss": 3.8471, + "step": 1078500 + }, + { + "epoch": 11.98, + "learning_rate": 9.504308908779956e-08, + "loss": 3.8452, + "step": 1079000 + }, + { + "epoch": 11.99, + "learning_rate": 9.50292072940497e-08, + "loss": 3.8309, + "step": 1079500 + }, + { + "epoch": 11.99, + "learning_rate": 9.501532550029985e-08, + "loss": 3.8516, + "step": 1080000 + }, + { + "epoch": 12.0, + "learning_rate": 9.500144370654998e-08, + "loss": 3.83, + "step": 1080500 + }, + { + "epoch": 12.0, + "eval_loss": 3.888516426086426, + "eval_runtime": 6.3054, + "eval_samples_per_second": 246.455, + "step": 1080552 + }, + { + "epoch": 12.0, + "learning_rate": 9.498756191280012e-08, + "loss": 3.8563, + "step": 1081000 + }, + { + "epoch": 12.01, + "learning_rate": 9.497368011905026e-08, + "loss": 3.8492, + "step": 1081500 + }, + { + "epoch": 12.02, + "learning_rate": 9.49597983253004e-08, + "loss": 3.851, + "step": 1082000 + }, + { + "epoch": 12.02, + "learning_rate": 9.494591653155054e-08, + "loss": 3.8411, + "step": 1082500 + }, + { + "epoch": 12.03, + "learning_rate": 9.493203473780067e-08, + "loss": 3.8741, + "step": 1083000 + }, + { + "epoch": 12.03, + "learning_rate": 9.491815294405082e-08, + "loss": 3.8397, + "step": 1083500 + }, + { + "epoch": 12.04, + "learning_rate": 9.490427115030096e-08, + "loss": 3.8422, + "step": 1084000 + }, + { + "epoch": 12.04, + "learning_rate": 9.489038935655109e-08, + "loss": 3.8456, + "step": 1084500 + }, + { + "epoch": 12.05, + "learning_rate": 9.487650756280123e-08, + "loss": 3.8559, + "step": 1085000 + }, + { + "epoch": 12.05, + "learning_rate": 9.486262576905137e-08, + "loss": 3.8318, + "step": 1085500 + }, + { + "epoch": 12.06, + "learning_rate": 9.48487439753015e-08, + "loss": 3.8484, + "step": 1086000 + }, + { + "epoch": 12.07, + "learning_rate": 9.483486218155164e-08, + "loss": 3.8378, + "step": 1086500 + }, + { + "epoch": 12.07, + "learning_rate": 9.482098038780179e-08, + "loss": 3.8473, + "step": 1087000 + }, + { + "epoch": 12.08, + "learning_rate": 9.480709859405193e-08, + "loss": 3.8481, + "step": 1087500 + }, + { + "epoch": 12.08, + "learning_rate": 9.479321680030207e-08, + "loss": 3.8382, + "step": 1088000 + }, + { + "epoch": 12.09, + "learning_rate": 9.47793350065522e-08, + "loss": 3.8414, + "step": 1088500 + }, + { + "epoch": 12.09, + "learning_rate": 9.476545321280234e-08, + "loss": 3.861, + "step": 1089000 + }, + { + "epoch": 12.1, + "learning_rate": 9.475157141905248e-08, + "loss": 3.8713, + "step": 1089500 + }, + { + "epoch": 12.1, + "learning_rate": 9.473768962530263e-08, + "loss": 3.8539, + "step": 1090000 + }, + { + "epoch": 12.11, + "learning_rate": 9.472380783155275e-08, + "loss": 3.8533, + "step": 1090500 + }, + { + "epoch": 12.12, + "learning_rate": 9.47099260378029e-08, + "loss": 3.8408, + "step": 1091000 + }, + { + "epoch": 12.12, + "learning_rate": 9.469604424405303e-08, + "loss": 3.8527, + "step": 1091500 + }, + { + "epoch": 12.13, + "learning_rate": 9.468216245030317e-08, + "loss": 3.8252, + "step": 1092000 + }, + { + "epoch": 12.13, + "learning_rate": 9.466828065655331e-08, + "loss": 3.8371, + "step": 1092500 + }, + { + "epoch": 12.14, + "learning_rate": 9.465439886280345e-08, + "loss": 3.8442, + "step": 1093000 + }, + { + "epoch": 12.14, + "learning_rate": 9.46405170690536e-08, + "loss": 3.8413, + "step": 1093500 + }, + { + "epoch": 12.15, + "learning_rate": 9.462663527530374e-08, + "loss": 3.8494, + "step": 1094000 + }, + { + "epoch": 12.15, + "learning_rate": 9.461275348155387e-08, + "loss": 3.8702, + "step": 1094500 + }, + { + "epoch": 12.16, + "learning_rate": 9.459887168780401e-08, + "loss": 3.8266, + "step": 1095000 + }, + { + "epoch": 12.17, + "learning_rate": 9.458498989405415e-08, + "loss": 3.8341, + "step": 1095500 + }, + { + "epoch": 12.17, + "learning_rate": 9.457110810030428e-08, + "loss": 3.8437, + "step": 1096000 + }, + { + "epoch": 12.18, + "learning_rate": 9.455722630655442e-08, + "loss": 3.8279, + "step": 1096500 + }, + { + "epoch": 12.18, + "learning_rate": 9.454334451280455e-08, + "loss": 3.8534, + "step": 1097000 + }, + { + "epoch": 12.19, + "learning_rate": 9.45294627190547e-08, + "loss": 3.8467, + "step": 1097500 + }, + { + "epoch": 12.19, + "learning_rate": 9.451558092530484e-08, + "loss": 3.8438, + "step": 1098000 + }, + { + "epoch": 12.2, + "learning_rate": 9.450169913155498e-08, + "loss": 3.818, + "step": 1098500 + }, + { + "epoch": 12.2, + "learning_rate": 9.448781733780512e-08, + "loss": 3.8491, + "step": 1099000 + }, + { + "epoch": 12.21, + "learning_rate": 9.447393554405526e-08, + "loss": 3.8233, + "step": 1099500 + }, + { + "epoch": 12.22, + "learning_rate": 9.446005375030539e-08, + "loss": 3.8397, + "step": 1100000 + }, + { + "epoch": 12.22, + "learning_rate": 9.444617195655553e-08, + "loss": 3.8621, + "step": 1100500 + }, + { + "epoch": 12.23, + "learning_rate": 9.443229016280568e-08, + "loss": 3.852, + "step": 1101000 + }, + { + "epoch": 12.23, + "learning_rate": 9.441840836905582e-08, + "loss": 3.8483, + "step": 1101500 + }, + { + "epoch": 12.24, + "learning_rate": 9.440452657530595e-08, + "loss": 3.8358, + "step": 1102000 + }, + { + "epoch": 12.24, + "learning_rate": 9.439064478155609e-08, + "loss": 3.8365, + "step": 1102500 + }, + { + "epoch": 12.25, + "learning_rate": 9.437676298780622e-08, + "loss": 3.8265, + "step": 1103000 + }, + { + "epoch": 12.25, + "learning_rate": 9.436288119405636e-08, + "loss": 3.8515, + "step": 1103500 + }, + { + "epoch": 12.26, + "learning_rate": 9.43489994003065e-08, + "loss": 3.8318, + "step": 1104000 + }, + { + "epoch": 12.27, + "learning_rate": 9.433511760655665e-08, + "loss": 3.8362, + "step": 1104500 + }, + { + "epoch": 12.27, + "learning_rate": 9.432123581280679e-08, + "loss": 3.8564, + "step": 1105000 + }, + { + "epoch": 12.28, + "learning_rate": 9.430735401905692e-08, + "loss": 3.8415, + "step": 1105500 + }, + { + "epoch": 12.28, + "learning_rate": 9.429347222530706e-08, + "loss": 3.8442, + "step": 1106000 + }, + { + "epoch": 12.29, + "learning_rate": 9.42795904315572e-08, + "loss": 3.8349, + "step": 1106500 + }, + { + "epoch": 12.29, + "learning_rate": 9.426570863780734e-08, + "loss": 3.8474, + "step": 1107000 + }, + { + "epoch": 12.3, + "learning_rate": 9.425182684405749e-08, + "loss": 3.8379, + "step": 1107500 + }, + { + "epoch": 12.3, + "learning_rate": 9.423794505030761e-08, + "loss": 3.8386, + "step": 1108000 + }, + { + "epoch": 12.31, + "learning_rate": 9.422406325655776e-08, + "loss": 3.8351, + "step": 1108500 + }, + { + "epoch": 12.32, + "learning_rate": 9.421018146280789e-08, + "loss": 3.8518, + "step": 1109000 + }, + { + "epoch": 12.32, + "learning_rate": 9.419629966905803e-08, + "loss": 3.8268, + "step": 1109500 + }, + { + "epoch": 12.33, + "learning_rate": 9.418241787530817e-08, + "loss": 3.8354, + "step": 1110000 + }, + { + "epoch": 12.33, + "learning_rate": 9.416853608155831e-08, + "loss": 3.8409, + "step": 1110500 + }, + { + "epoch": 12.34, + "learning_rate": 9.415465428780844e-08, + "loss": 3.8453, + "step": 1111000 + }, + { + "epoch": 12.34, + "learning_rate": 9.414077249405858e-08, + "loss": 3.8582, + "step": 1111500 + }, + { + "epoch": 12.35, + "learning_rate": 9.412689070030873e-08, + "loss": 3.8493, + "step": 1112000 + }, + { + "epoch": 12.35, + "learning_rate": 9.411300890655887e-08, + "loss": 3.8446, + "step": 1112500 + }, + { + "epoch": 12.36, + "learning_rate": 9.409912711280901e-08, + "loss": 3.8368, + "step": 1113000 + }, + { + "epoch": 12.37, + "learning_rate": 9.408524531905915e-08, + "loss": 3.8305, + "step": 1113500 + }, + { + "epoch": 12.37, + "learning_rate": 9.407136352530928e-08, + "loss": 3.8499, + "step": 1114000 + }, + { + "epoch": 12.38, + "learning_rate": 9.405748173155942e-08, + "loss": 3.86, + "step": 1114500 + }, + { + "epoch": 12.38, + "learning_rate": 9.404359993780955e-08, + "loss": 3.8443, + "step": 1115000 + }, + { + "epoch": 12.39, + "learning_rate": 9.40297181440597e-08, + "loss": 3.8679, + "step": 1115500 + }, + { + "epoch": 12.39, + "learning_rate": 9.401583635030984e-08, + "loss": 3.8378, + "step": 1116000 + }, + { + "epoch": 12.4, + "learning_rate": 9.400195455655998e-08, + "loss": 3.8292, + "step": 1116500 + }, + { + "epoch": 12.4, + "learning_rate": 9.398807276281011e-08, + "loss": 3.8584, + "step": 1117000 + }, + { + "epoch": 12.41, + "learning_rate": 9.397419096906025e-08, + "loss": 3.866, + "step": 1117500 + }, + { + "epoch": 12.42, + "learning_rate": 9.39603091753104e-08, + "loss": 3.8483, + "step": 1118000 + }, + { + "epoch": 12.42, + "learning_rate": 9.394642738156054e-08, + "loss": 3.8627, + "step": 1118500 + }, + { + "epoch": 12.43, + "learning_rate": 9.393254558781068e-08, + "loss": 3.8488, + "step": 1119000 + }, + { + "epoch": 12.43, + "learning_rate": 9.391866379406081e-08, + "loss": 3.8252, + "step": 1119500 + }, + { + "epoch": 12.44, + "learning_rate": 9.390478200031095e-08, + "loss": 3.8566, + "step": 1120000 + }, + { + "epoch": 12.44, + "learning_rate": 9.389090020656109e-08, + "loss": 3.8523, + "step": 1120500 + }, + { + "epoch": 12.45, + "learning_rate": 9.387701841281122e-08, + "loss": 3.8203, + "step": 1121000 + }, + { + "epoch": 12.45, + "learning_rate": 9.386313661906136e-08, + "loss": 3.8511, + "step": 1121500 + }, + { + "epoch": 12.46, + "learning_rate": 9.38492548253115e-08, + "loss": 3.8347, + "step": 1122000 + }, + { + "epoch": 12.47, + "learning_rate": 9.383537303156163e-08, + "loss": 3.8507, + "step": 1122500 + }, + { + "epoch": 12.47, + "learning_rate": 9.382149123781178e-08, + "loss": 3.8291, + "step": 1123000 + }, + { + "epoch": 12.48, + "learning_rate": 9.380760944406192e-08, + "loss": 3.8277, + "step": 1123500 + }, + { + "epoch": 12.48, + "learning_rate": 9.379372765031206e-08, + "loss": 3.8396, + "step": 1124000 + }, + { + "epoch": 12.49, + "learning_rate": 9.37798458565622e-08, + "loss": 3.8276, + "step": 1124500 + }, + { + "epoch": 12.49, + "learning_rate": 9.376596406281235e-08, + "loss": 3.8473, + "step": 1125000 + }, + { + "epoch": 12.5, + "learning_rate": 9.375208226906248e-08, + "loss": 3.8325, + "step": 1125500 + }, + { + "epoch": 12.5, + "learning_rate": 9.373820047531262e-08, + "loss": 3.8774, + "step": 1126000 + }, + { + "epoch": 12.51, + "learning_rate": 9.372431868156275e-08, + "loss": 3.8307, + "step": 1126500 + }, + { + "epoch": 12.52, + "learning_rate": 9.371043688781289e-08, + "loss": 3.8285, + "step": 1127000 + }, + { + "epoch": 12.52, + "learning_rate": 9.369655509406303e-08, + "loss": 3.8353, + "step": 1127500 + }, + { + "epoch": 12.53, + "learning_rate": 9.368267330031316e-08, + "loss": 3.8449, + "step": 1128000 + }, + { + "epoch": 12.53, + "learning_rate": 9.36687915065633e-08, + "loss": 3.8381, + "step": 1128500 + }, + { + "epoch": 12.54, + "learning_rate": 9.365490971281344e-08, + "loss": 3.8458, + "step": 1129000 + }, + { + "epoch": 12.54, + "learning_rate": 9.364102791906359e-08, + "loss": 3.8363, + "step": 1129500 + }, + { + "epoch": 12.55, + "learning_rate": 9.362714612531373e-08, + "loss": 3.8648, + "step": 1130000 + }, + { + "epoch": 12.55, + "learning_rate": 9.361326433156387e-08, + "loss": 3.8383, + "step": 1130500 + }, + { + "epoch": 12.56, + "learning_rate": 9.3599382537814e-08, + "loss": 3.8339, + "step": 1131000 + }, + { + "epoch": 12.57, + "learning_rate": 9.358550074406414e-08, + "loss": 3.8529, + "step": 1131500 + }, + { + "epoch": 12.57, + "learning_rate": 9.357161895031428e-08, + "loss": 3.8507, + "step": 1132000 + }, + { + "epoch": 12.58, + "learning_rate": 9.355773715656441e-08, + "loss": 3.8485, + "step": 1132500 + }, + { + "epoch": 12.58, + "learning_rate": 9.354385536281456e-08, + "loss": 3.8221, + "step": 1133000 + }, + { + "epoch": 12.59, + "learning_rate": 9.352997356906469e-08, + "loss": 3.8151, + "step": 1133500 + }, + { + "epoch": 12.59, + "learning_rate": 9.351609177531483e-08, + "loss": 3.8608, + "step": 1134000 + }, + { + "epoch": 12.6, + "learning_rate": 9.350220998156497e-08, + "loss": 3.83, + "step": 1134500 + }, + { + "epoch": 12.6, + "learning_rate": 9.348832818781511e-08, + "loss": 3.839, + "step": 1135000 + }, + { + "epoch": 12.61, + "learning_rate": 9.347444639406525e-08, + "loss": 3.8346, + "step": 1135500 + }, + { + "epoch": 12.62, + "learning_rate": 9.34605646003154e-08, + "loss": 3.8454, + "step": 1136000 + }, + { + "epoch": 12.62, + "learning_rate": 9.344668280656553e-08, + "loss": 3.8569, + "step": 1136500 + }, + { + "epoch": 12.63, + "learning_rate": 9.343280101281567e-08, + "loss": 3.83, + "step": 1137000 + }, + { + "epoch": 12.63, + "learning_rate": 9.341891921906581e-08, + "loss": 3.8611, + "step": 1137500 + }, + { + "epoch": 12.64, + "learning_rate": 9.340503742531595e-08, + "loss": 3.855, + "step": 1138000 + }, + { + "epoch": 12.64, + "learning_rate": 9.339115563156608e-08, + "loss": 3.8463, + "step": 1138500 + }, + { + "epoch": 12.65, + "learning_rate": 9.337727383781622e-08, + "loss": 3.8259, + "step": 1139000 + }, + { + "epoch": 12.65, + "learning_rate": 9.336339204406635e-08, + "loss": 3.8309, + "step": 1139500 + }, + { + "epoch": 12.66, + "learning_rate": 9.33495102503165e-08, + "loss": 3.8177, + "step": 1140000 + }, + { + "epoch": 12.67, + "learning_rate": 9.333562845656664e-08, + "loss": 3.8511, + "step": 1140500 + }, + { + "epoch": 12.67, + "learning_rate": 9.332174666281678e-08, + "loss": 3.8532, + "step": 1141000 + }, + { + "epoch": 12.68, + "learning_rate": 9.330786486906692e-08, + "loss": 3.8362, + "step": 1141500 + }, + { + "epoch": 12.68, + "learning_rate": 9.329398307531705e-08, + "loss": 3.8447, + "step": 1142000 + }, + { + "epoch": 12.69, + "learning_rate": 9.328010128156719e-08, + "loss": 3.8472, + "step": 1142500 + }, + { + "epoch": 12.69, + "learning_rate": 9.326621948781734e-08, + "loss": 3.8493, + "step": 1143000 + }, + { + "epoch": 12.7, + "learning_rate": 9.325233769406748e-08, + "loss": 3.8513, + "step": 1143500 + }, + { + "epoch": 12.7, + "learning_rate": 9.323845590031762e-08, + "loss": 3.856, + "step": 1144000 + }, + { + "epoch": 12.71, + "learning_rate": 9.322457410656775e-08, + "loss": 3.837, + "step": 1144500 + }, + { + "epoch": 12.72, + "learning_rate": 9.321069231281789e-08, + "loss": 3.8405, + "step": 1145000 + }, + { + "epoch": 12.72, + "learning_rate": 9.319681051906802e-08, + "loss": 3.8275, + "step": 1145500 + }, + { + "epoch": 12.73, + "learning_rate": 9.318292872531816e-08, + "loss": 3.844, + "step": 1146000 + }, + { + "epoch": 12.73, + "learning_rate": 9.31690469315683e-08, + "loss": 3.8407, + "step": 1146500 + }, + { + "epoch": 12.74, + "learning_rate": 9.315516513781845e-08, + "loss": 3.8365, + "step": 1147000 + }, + { + "epoch": 12.74, + "learning_rate": 9.314128334406858e-08, + "loss": 3.8468, + "step": 1147500 + }, + { + "epoch": 12.75, + "learning_rate": 9.312740155031872e-08, + "loss": 3.8471, + "step": 1148000 + }, + { + "epoch": 12.75, + "learning_rate": 9.311351975656886e-08, + "loss": 3.8566, + "step": 1148500 + }, + { + "epoch": 12.76, + "learning_rate": 9.3099637962819e-08, + "loss": 3.8344, + "step": 1149000 + }, + { + "epoch": 12.77, + "learning_rate": 9.308575616906914e-08, + "loss": 3.8387, + "step": 1149500 + }, + { + "epoch": 12.77, + "learning_rate": 9.307187437531929e-08, + "loss": 3.8406, + "step": 1150000 + }, + { + "epoch": 12.78, + "learning_rate": 9.305799258156942e-08, + "loss": 3.8326, + "step": 1150500 + }, + { + "epoch": 12.78, + "learning_rate": 9.304411078781956e-08, + "loss": 3.8347, + "step": 1151000 + }, + { + "epoch": 12.79, + "learning_rate": 9.303022899406969e-08, + "loss": 3.8513, + "step": 1151500 + }, + { + "epoch": 12.79, + "learning_rate": 9.301634720031983e-08, + "loss": 3.8445, + "step": 1152000 + }, + { + "epoch": 12.8, + "learning_rate": 9.300246540656997e-08, + "loss": 3.833, + "step": 1152500 + }, + { + "epoch": 12.8, + "learning_rate": 9.298858361282011e-08, + "loss": 3.8304, + "step": 1153000 + }, + { + "epoch": 12.81, + "learning_rate": 9.297470181907024e-08, + "loss": 3.8581, + "step": 1153500 + }, + { + "epoch": 12.82, + "learning_rate": 9.296082002532039e-08, + "loss": 3.8312, + "step": 1154000 + }, + { + "epoch": 12.82, + "learning_rate": 9.294693823157053e-08, + "loss": 3.8399, + "step": 1154500 + }, + { + "epoch": 12.83, + "learning_rate": 9.293305643782067e-08, + "loss": 3.8477, + "step": 1155000 + }, + { + "epoch": 12.83, + "learning_rate": 9.291917464407081e-08, + "loss": 3.8314, + "step": 1155500 + }, + { + "epoch": 12.84, + "learning_rate": 9.290529285032094e-08, + "loss": 3.8202, + "step": 1156000 + }, + { + "epoch": 12.84, + "learning_rate": 9.289141105657108e-08, + "loss": 3.8501, + "step": 1156500 + }, + { + "epoch": 12.85, + "learning_rate": 9.287752926282121e-08, + "loss": 3.8339, + "step": 1157000 + }, + { + "epoch": 12.85, + "learning_rate": 9.286364746907135e-08, + "loss": 3.8303, + "step": 1157500 + }, + { + "epoch": 12.86, + "learning_rate": 9.28497656753215e-08, + "loss": 3.836, + "step": 1158000 + }, + { + "epoch": 12.87, + "learning_rate": 9.283588388157164e-08, + "loss": 3.8331, + "step": 1158500 + }, + { + "epoch": 12.87, + "learning_rate": 9.282200208782177e-08, + "loss": 3.8067, + "step": 1159000 + }, + { + "epoch": 12.88, + "learning_rate": 9.280812029407191e-08, + "loss": 3.8322, + "step": 1159500 + }, + { + "epoch": 12.88, + "learning_rate": 9.279423850032205e-08, + "loss": 3.8508, + "step": 1160000 + }, + { + "epoch": 12.89, + "learning_rate": 9.27803567065722e-08, + "loss": 3.8496, + "step": 1160500 + }, + { + "epoch": 12.89, + "learning_rate": 9.276647491282234e-08, + "loss": 3.8249, + "step": 1161000 + }, + { + "epoch": 12.9, + "learning_rate": 9.275259311907248e-08, + "loss": 3.8375, + "step": 1161500 + }, + { + "epoch": 12.9, + "learning_rate": 9.273871132532261e-08, + "loss": 3.8365, + "step": 1162000 + }, + { + "epoch": 12.91, + "learning_rate": 9.272482953157275e-08, + "loss": 3.8506, + "step": 1162500 + }, + { + "epoch": 12.92, + "learning_rate": 9.271094773782288e-08, + "loss": 3.8463, + "step": 1163000 + }, + { + "epoch": 12.92, + "learning_rate": 9.269706594407302e-08, + "loss": 3.8315, + "step": 1163500 + }, + { + "epoch": 12.93, + "learning_rate": 9.268318415032316e-08, + "loss": 3.8338, + "step": 1164000 + }, + { + "epoch": 12.93, + "learning_rate": 9.26693023565733e-08, + "loss": 3.8307, + "step": 1164500 + }, + { + "epoch": 12.94, + "learning_rate": 9.265542056282344e-08, + "loss": 3.8092, + "step": 1165000 + }, + { + "epoch": 12.94, + "learning_rate": 9.264153876907358e-08, + "loss": 3.8233, + "step": 1165500 + }, + { + "epoch": 12.95, + "learning_rate": 9.262765697532372e-08, + "loss": 3.851, + "step": 1166000 + }, + { + "epoch": 12.95, + "learning_rate": 9.261377518157386e-08, + "loss": 3.8444, + "step": 1166500 + }, + { + "epoch": 12.96, + "learning_rate": 9.2599893387824e-08, + "loss": 3.8309, + "step": 1167000 + }, + { + "epoch": 12.97, + "learning_rate": 9.258601159407413e-08, + "loss": 3.8357, + "step": 1167500 + }, + { + "epoch": 12.97, + "learning_rate": 9.257212980032428e-08, + "loss": 3.8593, + "step": 1168000 + }, + { + "epoch": 12.98, + "learning_rate": 9.255824800657442e-08, + "loss": 3.8359, + "step": 1168500 + }, + { + "epoch": 12.98, + "learning_rate": 9.254436621282455e-08, + "loss": 3.8585, + "step": 1169000 + }, + { + "epoch": 12.99, + "learning_rate": 9.253048441907469e-08, + "loss": 3.8193, + "step": 1169500 + }, + { + "epoch": 12.99, + "learning_rate": 9.251660262532482e-08, + "loss": 3.8318, + "step": 1170000 + }, + { + "epoch": 13.0, + "learning_rate": 9.250272083157496e-08, + "loss": 3.8407, + "step": 1170500 + }, + { + "epoch": 13.0, + "eval_loss": 3.8813984394073486, + "eval_runtime": 6.3111, + "eval_samples_per_second": 246.232, + "step": 1170598 + }, + { + "epoch": 13.0, + "learning_rate": 9.24888390378251e-08, + "loss": 3.8446, + "step": 1171000 + }, + { + "epoch": 13.01, + "learning_rate": 9.247495724407525e-08, + "loss": 3.8684, + "step": 1171500 + }, + { + "epoch": 13.02, + "learning_rate": 9.246107545032539e-08, + "loss": 3.844, + "step": 1172000 + }, + { + "epoch": 13.02, + "learning_rate": 9.244719365657553e-08, + "loss": 3.82, + "step": 1172500 + }, + { + "epoch": 13.03, + "learning_rate": 9.243331186282566e-08, + "loss": 3.8427, + "step": 1173000 + }, + { + "epoch": 13.03, + "learning_rate": 9.24194300690758e-08, + "loss": 3.8248, + "step": 1173500 + }, + { + "epoch": 13.04, + "learning_rate": 9.240554827532594e-08, + "loss": 3.8373, + "step": 1174000 + }, + { + "epoch": 13.04, + "learning_rate": 9.239166648157609e-08, + "loss": 3.8277, + "step": 1174500 + }, + { + "epoch": 13.05, + "learning_rate": 9.237778468782622e-08, + "loss": 3.8483, + "step": 1175000 + }, + { + "epoch": 13.05, + "learning_rate": 9.236390289407636e-08, + "loss": 3.8482, + "step": 1175500 + }, + { + "epoch": 13.06, + "learning_rate": 9.235002110032649e-08, + "loss": 3.843, + "step": 1176000 + }, + { + "epoch": 13.07, + "learning_rate": 9.233613930657663e-08, + "loss": 3.8175, + "step": 1176500 + }, + { + "epoch": 13.07, + "learning_rate": 9.232225751282677e-08, + "loss": 3.8232, + "step": 1177000 + }, + { + "epoch": 13.08, + "learning_rate": 9.230837571907691e-08, + "loss": 3.8571, + "step": 1177500 + }, + { + "epoch": 13.08, + "learning_rate": 9.229449392532706e-08, + "loss": 3.8311, + "step": 1178000 + }, + { + "epoch": 13.09, + "learning_rate": 9.228061213157718e-08, + "loss": 3.8371, + "step": 1178500 + }, + { + "epoch": 13.09, + "learning_rate": 9.226673033782733e-08, + "loss": 3.8236, + "step": 1179000 + }, + { + "epoch": 13.1, + "learning_rate": 9.225284854407747e-08, + "loss": 3.838, + "step": 1179500 + }, + { + "epoch": 13.1, + "learning_rate": 9.223896675032761e-08, + "loss": 3.8577, + "step": 1180000 + }, + { + "epoch": 13.11, + "learning_rate": 9.222508495657775e-08, + "loss": 3.825, + "step": 1180500 + }, + { + "epoch": 13.12, + "learning_rate": 9.221120316282788e-08, + "loss": 3.8368, + "step": 1181000 + }, + { + "epoch": 13.12, + "learning_rate": 9.219732136907802e-08, + "loss": 3.8097, + "step": 1181500 + }, + { + "epoch": 13.13, + "learning_rate": 9.218343957532815e-08, + "loss": 3.8258, + "step": 1182000 + }, + { + "epoch": 13.13, + "learning_rate": 9.21695577815783e-08, + "loss": 3.8237, + "step": 1182500 + }, + { + "epoch": 13.14, + "learning_rate": 9.215567598782844e-08, + "loss": 3.8195, + "step": 1183000 + }, + { + "epoch": 13.14, + "learning_rate": 9.214179419407858e-08, + "loss": 3.8304, + "step": 1183500 + }, + { + "epoch": 13.15, + "learning_rate": 9.212791240032872e-08, + "loss": 3.8469, + "step": 1184000 + }, + { + "epoch": 13.15, + "learning_rate": 9.211403060657885e-08, + "loss": 3.8369, + "step": 1184500 + }, + { + "epoch": 13.16, + "learning_rate": 9.2100148812829e-08, + "loss": 3.8422, + "step": 1185000 + }, + { + "epoch": 13.17, + "learning_rate": 9.208626701907914e-08, + "loss": 3.8232, + "step": 1185500 + }, + { + "epoch": 13.17, + "learning_rate": 9.207238522532928e-08, + "loss": 3.8285, + "step": 1186000 + }, + { + "epoch": 13.18, + "learning_rate": 9.205850343157942e-08, + "loss": 3.8418, + "step": 1186500 + }, + { + "epoch": 13.18, + "learning_rate": 9.204462163782955e-08, + "loss": 3.8435, + "step": 1187000 + }, + { + "epoch": 13.19, + "learning_rate": 9.203073984407968e-08, + "loss": 3.8342, + "step": 1187500 + }, + { + "epoch": 13.19, + "learning_rate": 9.201685805032982e-08, + "loss": 3.8415, + "step": 1188000 + }, + { + "epoch": 13.2, + "learning_rate": 9.200297625657996e-08, + "loss": 3.8621, + "step": 1188500 + }, + { + "epoch": 13.2, + "learning_rate": 9.19890944628301e-08, + "loss": 3.8267, + "step": 1189000 + }, + { + "epoch": 13.21, + "learning_rate": 9.197521266908025e-08, + "loss": 3.8353, + "step": 1189500 + }, + { + "epoch": 13.22, + "learning_rate": 9.196133087533038e-08, + "loss": 3.8356, + "step": 1190000 + }, + { + "epoch": 13.22, + "learning_rate": 9.194744908158052e-08, + "loss": 3.8301, + "step": 1190500 + }, + { + "epoch": 13.23, + "learning_rate": 9.193356728783066e-08, + "loss": 3.8331, + "step": 1191000 + }, + { + "epoch": 13.23, + "learning_rate": 9.19196854940808e-08, + "loss": 3.8421, + "step": 1191500 + }, + { + "epoch": 13.24, + "learning_rate": 9.190580370033095e-08, + "loss": 3.8236, + "step": 1192000 + }, + { + "epoch": 13.24, + "learning_rate": 9.189192190658108e-08, + "loss": 3.8194, + "step": 1192500 + }, + { + "epoch": 13.25, + "learning_rate": 9.187804011283122e-08, + "loss": 3.8467, + "step": 1193000 + }, + { + "epoch": 13.25, + "learning_rate": 9.186415831908135e-08, + "loss": 3.8361, + "step": 1193500 + }, + { + "epoch": 13.26, + "learning_rate": 9.185027652533149e-08, + "loss": 3.8425, + "step": 1194000 + }, + { + "epoch": 13.27, + "learning_rate": 9.183639473158163e-08, + "loss": 3.8211, + "step": 1194500 + }, + { + "epoch": 13.27, + "learning_rate": 9.182251293783177e-08, + "loss": 3.8441, + "step": 1195000 + }, + { + "epoch": 13.28, + "learning_rate": 9.18086311440819e-08, + "loss": 3.8334, + "step": 1195500 + }, + { + "epoch": 13.28, + "learning_rate": 9.179474935033204e-08, + "loss": 3.8259, + "step": 1196000 + }, + { + "epoch": 13.29, + "learning_rate": 9.178086755658219e-08, + "loss": 3.8333, + "step": 1196500 + }, + { + "epoch": 13.29, + "learning_rate": 9.176698576283233e-08, + "loss": 3.824, + "step": 1197000 + }, + { + "epoch": 13.3, + "learning_rate": 9.175310396908247e-08, + "loss": 3.8325, + "step": 1197500 + }, + { + "epoch": 13.3, + "learning_rate": 9.173922217533261e-08, + "loss": 3.8378, + "step": 1198000 + }, + { + "epoch": 13.31, + "learning_rate": 9.172534038158274e-08, + "loss": 3.814, + "step": 1198500 + }, + { + "epoch": 13.32, + "learning_rate": 9.171145858783288e-08, + "loss": 3.8298, + "step": 1199000 + }, + { + "epoch": 13.32, + "learning_rate": 9.169757679408301e-08, + "loss": 3.8539, + "step": 1199500 + }, + { + "epoch": 13.33, + "learning_rate": 9.168369500033316e-08, + "loss": 3.8354, + "step": 1200000 + }, + { + "epoch": 13.33, + "learning_rate": 9.16698132065833e-08, + "loss": 3.8356, + "step": 1200500 + }, + { + "epoch": 13.34, + "learning_rate": 9.165593141283343e-08, + "loss": 3.8324, + "step": 1201000 + }, + { + "epoch": 13.34, + "learning_rate": 9.164204961908357e-08, + "loss": 3.8284, + "step": 1201500 + }, + { + "epoch": 13.35, + "learning_rate": 9.162816782533371e-08, + "loss": 3.8494, + "step": 1202000 + }, + { + "epoch": 13.35, + "learning_rate": 9.161428603158385e-08, + "loss": 3.8236, + "step": 1202500 + }, + { + "epoch": 13.36, + "learning_rate": 9.1600404237834e-08, + "loss": 3.8249, + "step": 1203000 + }, + { + "epoch": 13.37, + "learning_rate": 9.158652244408414e-08, + "loss": 3.8159, + "step": 1203500 + }, + { + "epoch": 13.37, + "learning_rate": 9.157264065033427e-08, + "loss": 3.8306, + "step": 1204000 + }, + { + "epoch": 13.38, + "learning_rate": 9.155875885658441e-08, + "loss": 3.848, + "step": 1204500 + }, + { + "epoch": 13.38, + "learning_rate": 9.154487706283455e-08, + "loss": 3.8348, + "step": 1205000 + }, + { + "epoch": 13.39, + "learning_rate": 9.153099526908468e-08, + "loss": 3.8314, + "step": 1205500 + }, + { + "epoch": 13.39, + "learning_rate": 9.151711347533482e-08, + "loss": 3.8299, + "step": 1206000 + }, + { + "epoch": 13.4, + "learning_rate": 9.150323168158495e-08, + "loss": 3.8377, + "step": 1206500 + }, + { + "epoch": 13.4, + "learning_rate": 9.14893498878351e-08, + "loss": 3.8287, + "step": 1207000 + }, + { + "epoch": 13.41, + "learning_rate": 9.147546809408524e-08, + "loss": 3.8361, + "step": 1207500 + }, + { + "epoch": 13.42, + "learning_rate": 9.146158630033538e-08, + "loss": 3.8096, + "step": 1208000 + }, + { + "epoch": 13.42, + "learning_rate": 9.144770450658552e-08, + "loss": 3.8503, + "step": 1208500 + }, + { + "epoch": 13.43, + "learning_rate": 9.143382271283566e-08, + "loss": 3.8176, + "step": 1209000 + }, + { + "epoch": 13.43, + "learning_rate": 9.141994091908579e-08, + "loss": 3.8606, + "step": 1209500 + }, + { + "epoch": 13.44, + "learning_rate": 9.140605912533594e-08, + "loss": 3.8349, + "step": 1210000 + }, + { + "epoch": 13.44, + "learning_rate": 9.139217733158608e-08, + "loss": 3.8188, + "step": 1210500 + }, + { + "epoch": 13.45, + "learning_rate": 9.137829553783622e-08, + "loss": 3.8401, + "step": 1211000 + }, + { + "epoch": 13.45, + "learning_rate": 9.136441374408635e-08, + "loss": 3.8354, + "step": 1211500 + }, + { + "epoch": 13.46, + "learning_rate": 9.135053195033649e-08, + "loss": 3.8282, + "step": 1212000 + }, + { + "epoch": 13.47, + "learning_rate": 9.133665015658662e-08, + "loss": 3.8049, + "step": 1212500 + }, + { + "epoch": 13.47, + "learning_rate": 9.132276836283676e-08, + "loss": 3.8396, + "step": 1213000 + }, + { + "epoch": 13.48, + "learning_rate": 9.13088865690869e-08, + "loss": 3.8318, + "step": 1213500 + }, + { + "epoch": 13.48, + "learning_rate": 9.129500477533705e-08, + "loss": 3.8309, + "step": 1214000 + }, + { + "epoch": 13.49, + "learning_rate": 9.128112298158719e-08, + "loss": 3.8524, + "step": 1214500 + }, + { + "epoch": 13.49, + "learning_rate": 9.126724118783732e-08, + "loss": 3.8368, + "step": 1215000 + }, + { + "epoch": 13.5, + "learning_rate": 9.125335939408746e-08, + "loss": 3.8345, + "step": 1215500 + }, + { + "epoch": 13.5, + "learning_rate": 9.12394776003376e-08, + "loss": 3.8618, + "step": 1216000 + }, + { + "epoch": 13.51, + "learning_rate": 9.122559580658775e-08, + "loss": 3.836, + "step": 1216500 + }, + { + "epoch": 13.52, + "learning_rate": 9.121171401283789e-08, + "loss": 3.8349, + "step": 1217000 + }, + { + "epoch": 13.52, + "learning_rate": 9.119783221908802e-08, + "loss": 3.8282, + "step": 1217500 + }, + { + "epoch": 13.53, + "learning_rate": 9.118395042533815e-08, + "loss": 3.8279, + "step": 1218000 + }, + { + "epoch": 13.53, + "learning_rate": 9.117006863158829e-08, + "loss": 3.842, + "step": 1218500 + }, + { + "epoch": 13.54, + "learning_rate": 9.115618683783843e-08, + "loss": 3.8508, + "step": 1219000 + }, + { + "epoch": 13.54, + "learning_rate": 9.114230504408857e-08, + "loss": 3.828, + "step": 1219500 + }, + { + "epoch": 13.55, + "learning_rate": 9.112842325033871e-08, + "loss": 3.8032, + "step": 1220000 + }, + { + "epoch": 13.55, + "learning_rate": 9.111454145658886e-08, + "loss": 3.8283, + "step": 1220500 + }, + { + "epoch": 13.56, + "learning_rate": 9.110065966283899e-08, + "loss": 3.8415, + "step": 1221000 + }, + { + "epoch": 13.57, + "learning_rate": 9.108677786908913e-08, + "loss": 3.8396, + "step": 1221500 + }, + { + "epoch": 13.57, + "learning_rate": 9.107289607533927e-08, + "loss": 3.8329, + "step": 1222000 + }, + { + "epoch": 13.58, + "learning_rate": 9.105901428158941e-08, + "loss": 3.8312, + "step": 1222500 + }, + { + "epoch": 13.58, + "learning_rate": 9.104513248783955e-08, + "loss": 3.8564, + "step": 1223000 + }, + { + "epoch": 13.59, + "learning_rate": 9.103125069408968e-08, + "loss": 3.8346, + "step": 1223500 + }, + { + "epoch": 13.59, + "learning_rate": 9.101736890033981e-08, + "loss": 3.8216, + "step": 1224000 + }, + { + "epoch": 13.6, + "learning_rate": 9.100348710658996e-08, + "loss": 3.8521, + "step": 1224500 + }, + { + "epoch": 13.6, + "learning_rate": 9.09896053128401e-08, + "loss": 3.8444, + "step": 1225000 + }, + { + "epoch": 13.61, + "learning_rate": 9.097572351909024e-08, + "loss": 3.8353, + "step": 1225500 + }, + { + "epoch": 13.62, + "learning_rate": 9.096184172534038e-08, + "loss": 3.8281, + "step": 1226000 + }, + { + "epoch": 13.62, + "learning_rate": 9.094795993159051e-08, + "loss": 3.8366, + "step": 1226500 + }, + { + "epoch": 13.63, + "learning_rate": 9.093407813784065e-08, + "loss": 3.8381, + "step": 1227000 + }, + { + "epoch": 13.63, + "learning_rate": 9.09201963440908e-08, + "loss": 3.8248, + "step": 1227500 + }, + { + "epoch": 13.64, + "learning_rate": 9.090631455034094e-08, + "loss": 3.8382, + "step": 1228000 + }, + { + "epoch": 13.64, + "learning_rate": 9.089243275659108e-08, + "loss": 3.8441, + "step": 1228500 + }, + { + "epoch": 13.65, + "learning_rate": 9.087855096284121e-08, + "loss": 3.8471, + "step": 1229000 + }, + { + "epoch": 13.65, + "learning_rate": 9.086466916909135e-08, + "loss": 3.8133, + "step": 1229500 + }, + { + "epoch": 13.66, + "learning_rate": 9.085078737534148e-08, + "loss": 3.8399, + "step": 1230000 + }, + { + "epoch": 13.67, + "learning_rate": 9.083690558159162e-08, + "loss": 3.8245, + "step": 1230500 + }, + { + "epoch": 13.67, + "learning_rate": 9.082302378784176e-08, + "loss": 3.8368, + "step": 1231000 + }, + { + "epoch": 13.68, + "learning_rate": 9.080914199409191e-08, + "loss": 3.8226, + "step": 1231500 + }, + { + "epoch": 13.68, + "learning_rate": 9.079526020034204e-08, + "loss": 3.8517, + "step": 1232000 + }, + { + "epoch": 13.69, + "learning_rate": 9.078137840659218e-08, + "loss": 3.8241, + "step": 1232500 + }, + { + "epoch": 13.69, + "learning_rate": 9.076749661284232e-08, + "loss": 3.8246, + "step": 1233000 + }, + { + "epoch": 13.7, + "learning_rate": 9.075361481909246e-08, + "loss": 3.8316, + "step": 1233500 + }, + { + "epoch": 13.7, + "learning_rate": 9.07397330253426e-08, + "loss": 3.8368, + "step": 1234000 + }, + { + "epoch": 13.71, + "learning_rate": 9.072585123159275e-08, + "loss": 3.8381, + "step": 1234500 + }, + { + "epoch": 13.72, + "learning_rate": 9.071196943784288e-08, + "loss": 3.8406, + "step": 1235000 + }, + { + "epoch": 13.72, + "learning_rate": 9.069808764409302e-08, + "loss": 3.8354, + "step": 1235500 + }, + { + "epoch": 13.73, + "learning_rate": 9.068420585034315e-08, + "loss": 3.8179, + "step": 1236000 + }, + { + "epoch": 13.73, + "learning_rate": 9.067032405659329e-08, + "loss": 3.8327, + "step": 1236500 + }, + { + "epoch": 13.74, + "learning_rate": 9.065644226284343e-08, + "loss": 3.819, + "step": 1237000 + }, + { + "epoch": 13.74, + "learning_rate": 9.064256046909356e-08, + "loss": 3.8291, + "step": 1237500 + }, + { + "epoch": 13.75, + "learning_rate": 9.06286786753437e-08, + "loss": 3.825, + "step": 1238000 + }, + { + "epoch": 13.75, + "learning_rate": 9.061479688159385e-08, + "loss": 3.8373, + "step": 1238500 + }, + { + "epoch": 13.76, + "learning_rate": 9.060091508784399e-08, + "loss": 3.8331, + "step": 1239000 + }, + { + "epoch": 13.77, + "learning_rate": 9.058703329409413e-08, + "loss": 3.8387, + "step": 1239500 + }, + { + "epoch": 13.77, + "learning_rate": 9.057315150034427e-08, + "loss": 3.8382, + "step": 1240000 + }, + { + "epoch": 13.78, + "learning_rate": 9.05592697065944e-08, + "loss": 3.8133, + "step": 1240500 + }, + { + "epoch": 13.78, + "learning_rate": 9.054538791284454e-08, + "loss": 3.8421, + "step": 1241000 + }, + { + "epoch": 13.79, + "learning_rate": 9.053150611909469e-08, + "loss": 3.8277, + "step": 1241500 + }, + { + "epoch": 13.79, + "learning_rate": 9.051762432534482e-08, + "loss": 3.8341, + "step": 1242000 + }, + { + "epoch": 13.8, + "learning_rate": 9.050374253159496e-08, + "loss": 3.8259, + "step": 1242500 + }, + { + "epoch": 13.8, + "learning_rate": 9.048986073784509e-08, + "loss": 3.8276, + "step": 1243000 + }, + { + "epoch": 13.81, + "learning_rate": 9.047597894409523e-08, + "loss": 3.8129, + "step": 1243500 + }, + { + "epoch": 13.82, + "learning_rate": 9.046209715034537e-08, + "loss": 3.8465, + "step": 1244000 + }, + { + "epoch": 13.82, + "learning_rate": 9.044821535659551e-08, + "loss": 3.8372, + "step": 1244500 + }, + { + "epoch": 13.83, + "learning_rate": 9.043433356284566e-08, + "loss": 3.8241, + "step": 1245000 + }, + { + "epoch": 13.83, + "learning_rate": 9.04204517690958e-08, + "loss": 3.8219, + "step": 1245500 + }, + { + "epoch": 13.84, + "learning_rate": 9.040656997534593e-08, + "loss": 3.856, + "step": 1246000 + }, + { + "epoch": 13.84, + "learning_rate": 9.039268818159607e-08, + "loss": 3.84, + "step": 1246500 + }, + { + "epoch": 13.85, + "learning_rate": 9.037880638784621e-08, + "loss": 3.8275, + "step": 1247000 + }, + { + "epoch": 13.85, + "learning_rate": 9.036492459409635e-08, + "loss": 3.8464, + "step": 1247500 + }, + { + "epoch": 13.86, + "learning_rate": 9.035104280034648e-08, + "loss": 3.8119, + "step": 1248000 + }, + { + "epoch": 13.87, + "learning_rate": 9.033716100659663e-08, + "loss": 3.8291, + "step": 1248500 + }, + { + "epoch": 13.87, + "learning_rate": 9.032327921284675e-08, + "loss": 3.8286, + "step": 1249000 + }, + { + "epoch": 13.88, + "learning_rate": 9.03093974190969e-08, + "loss": 3.8381, + "step": 1249500 + }, + { + "epoch": 13.88, + "learning_rate": 9.029551562534704e-08, + "loss": 3.8245, + "step": 1250000 + }, + { + "epoch": 13.89, + "learning_rate": 9.028163383159718e-08, + "loss": 3.8428, + "step": 1250500 + }, + { + "epoch": 13.89, + "learning_rate": 9.026775203784732e-08, + "loss": 3.8363, + "step": 1251000 + }, + { + "epoch": 13.9, + "learning_rate": 9.025387024409745e-08, + "loss": 3.8453, + "step": 1251500 + }, + { + "epoch": 13.9, + "learning_rate": 9.02399884503476e-08, + "loss": 3.8276, + "step": 1252000 + }, + { + "epoch": 13.91, + "learning_rate": 9.022610665659774e-08, + "loss": 3.8127, + "step": 1252500 + }, + { + "epoch": 13.92, + "learning_rate": 9.021222486284788e-08, + "loss": 3.802, + "step": 1253000 + }, + { + "epoch": 13.92, + "learning_rate": 9.019834306909802e-08, + "loss": 3.8374, + "step": 1253500 + }, + { + "epoch": 13.93, + "learning_rate": 9.018446127534815e-08, + "loss": 3.8412, + "step": 1254000 + }, + { + "epoch": 13.93, + "learning_rate": 9.017057948159828e-08, + "loss": 3.8401, + "step": 1254500 + }, + { + "epoch": 13.94, + "learning_rate": 9.015669768784842e-08, + "loss": 3.8404, + "step": 1255000 + }, + { + "epoch": 13.94, + "learning_rate": 9.014281589409856e-08, + "loss": 3.827, + "step": 1255500 + }, + { + "epoch": 13.95, + "learning_rate": 9.01289341003487e-08, + "loss": 3.8031, + "step": 1256000 + }, + { + "epoch": 13.95, + "learning_rate": 9.011505230659885e-08, + "loss": 3.8386, + "step": 1256500 + }, + { + "epoch": 13.96, + "learning_rate": 9.010117051284899e-08, + "loss": 3.8371, + "step": 1257000 + }, + { + "epoch": 13.97, + "learning_rate": 9.008728871909912e-08, + "loss": 3.8458, + "step": 1257500 + }, + { + "epoch": 13.97, + "learning_rate": 9.007340692534926e-08, + "loss": 3.8242, + "step": 1258000 + }, + { + "epoch": 13.98, + "learning_rate": 9.00595251315994e-08, + "loss": 3.827, + "step": 1258500 + }, + { + "epoch": 13.98, + "learning_rate": 9.004564333784955e-08, + "loss": 3.8209, + "step": 1259000 + }, + { + "epoch": 13.99, + "learning_rate": 9.003176154409969e-08, + "loss": 3.8208, + "step": 1259500 + }, + { + "epoch": 13.99, + "learning_rate": 9.001787975034982e-08, + "loss": 3.8424, + "step": 1260000 + }, + { + "epoch": 14.0, + "learning_rate": 9.000399795659995e-08, + "loss": 3.8257, + "step": 1260500 + }, + { + "epoch": 14.0, + "eval_loss": 3.875882387161255, + "eval_runtime": 6.3086, + "eval_samples_per_second": 246.331, + "step": 1260644 + }, + { + "epoch": 14.0, + "learning_rate": 8.999011616285009e-08, + "loss": 3.8341, + "step": 1261000 + }, + { + "epoch": 14.01, + "learning_rate": 8.997623436910023e-08, + "loss": 3.829, + "step": 1261500 + }, + { + "epoch": 14.02, + "learning_rate": 8.996235257535037e-08, + "loss": 3.8324, + "step": 1262000 + }, + { + "epoch": 14.02, + "learning_rate": 8.994847078160052e-08, + "loss": 3.8381, + "step": 1262500 + }, + { + "epoch": 14.03, + "learning_rate": 8.993458898785064e-08, + "loss": 3.8326, + "step": 1263000 + }, + { + "epoch": 14.03, + "learning_rate": 8.992070719410079e-08, + "loss": 3.8233, + "step": 1263500 + }, + { + "epoch": 14.04, + "learning_rate": 8.990682540035093e-08, + "loss": 3.8228, + "step": 1264000 + }, + { + "epoch": 14.04, + "learning_rate": 8.989294360660107e-08, + "loss": 3.8312, + "step": 1264500 + }, + { + "epoch": 14.05, + "learning_rate": 8.987906181285121e-08, + "loss": 3.8304, + "step": 1265000 + }, + { + "epoch": 14.05, + "learning_rate": 8.986518001910134e-08, + "loss": 3.832, + "step": 1265500 + }, + { + "epoch": 14.06, + "learning_rate": 8.985129822535149e-08, + "loss": 3.8391, + "step": 1266000 + }, + { + "epoch": 14.07, + "learning_rate": 8.983741643160161e-08, + "loss": 3.8225, + "step": 1266500 + }, + { + "epoch": 14.07, + "learning_rate": 8.982353463785176e-08, + "loss": 3.8233, + "step": 1267000 + }, + { + "epoch": 14.08, + "learning_rate": 8.98096528441019e-08, + "loss": 3.7987, + "step": 1267500 + }, + { + "epoch": 14.08, + "learning_rate": 8.979577105035204e-08, + "loss": 3.8269, + "step": 1268000 + }, + { + "epoch": 14.09, + "learning_rate": 8.978188925660217e-08, + "loss": 3.8375, + "step": 1268500 + }, + { + "epoch": 14.09, + "learning_rate": 8.976800746285231e-08, + "loss": 3.8304, + "step": 1269000 + }, + { + "epoch": 14.1, + "learning_rate": 8.975412566910245e-08, + "loss": 3.826, + "step": 1269500 + }, + { + "epoch": 14.1, + "learning_rate": 8.97402438753526e-08, + "loss": 3.8233, + "step": 1270000 + }, + { + "epoch": 14.11, + "learning_rate": 8.972636208160274e-08, + "loss": 3.8151, + "step": 1270500 + }, + { + "epoch": 14.12, + "learning_rate": 8.971248028785288e-08, + "loss": 3.8232, + "step": 1271000 + }, + { + "epoch": 14.12, + "learning_rate": 8.969859849410301e-08, + "loss": 3.82, + "step": 1271500 + }, + { + "epoch": 14.13, + "learning_rate": 8.968471670035315e-08, + "loss": 3.8402, + "step": 1272000 + }, + { + "epoch": 14.13, + "learning_rate": 8.967083490660328e-08, + "loss": 3.8239, + "step": 1272500 + }, + { + "epoch": 14.14, + "learning_rate": 8.965695311285342e-08, + "loss": 3.8446, + "step": 1273000 + }, + { + "epoch": 14.14, + "learning_rate": 8.964307131910357e-08, + "loss": 3.8217, + "step": 1273500 + }, + { + "epoch": 14.15, + "learning_rate": 8.96291895253537e-08, + "loss": 3.8284, + "step": 1274000 + }, + { + "epoch": 14.15, + "learning_rate": 8.961530773160384e-08, + "loss": 3.8369, + "step": 1274500 + }, + { + "epoch": 14.16, + "learning_rate": 8.960142593785398e-08, + "loss": 3.8373, + "step": 1275000 + }, + { + "epoch": 14.16, + "learning_rate": 8.958754414410412e-08, + "loss": 3.8222, + "step": 1275500 + }, + { + "epoch": 14.17, + "learning_rate": 8.957366235035426e-08, + "loss": 3.8344, + "step": 1276000 + }, + { + "epoch": 14.18, + "learning_rate": 8.95597805566044e-08, + "loss": 3.8185, + "step": 1276500 + }, + { + "epoch": 14.18, + "learning_rate": 8.954589876285454e-08, + "loss": 3.8252, + "step": 1277000 + }, + { + "epoch": 14.19, + "learning_rate": 8.953201696910468e-08, + "loss": 3.8374, + "step": 1277500 + }, + { + "epoch": 14.19, + "learning_rate": 8.951813517535482e-08, + "loss": 3.8445, + "step": 1278000 + }, + { + "epoch": 14.2, + "learning_rate": 8.950425338160495e-08, + "loss": 3.8207, + "step": 1278500 + }, + { + "epoch": 14.2, + "learning_rate": 8.949037158785509e-08, + "loss": 3.8263, + "step": 1279000 + }, + { + "epoch": 14.21, + "learning_rate": 8.947648979410523e-08, + "loss": 3.8335, + "step": 1279500 + }, + { + "epoch": 14.21, + "learning_rate": 8.946260800035536e-08, + "loss": 3.817, + "step": 1280000 + }, + { + "epoch": 14.22, + "learning_rate": 8.94487262066055e-08, + "loss": 3.8187, + "step": 1280500 + }, + { + "epoch": 14.23, + "learning_rate": 8.943484441285565e-08, + "loss": 3.8278, + "step": 1281000 + }, + { + "epoch": 14.23, + "learning_rate": 8.942096261910579e-08, + "loss": 3.8334, + "step": 1281500 + }, + { + "epoch": 14.24, + "learning_rate": 8.940708082535593e-08, + "loss": 3.828, + "step": 1282000 + }, + { + "epoch": 14.24, + "learning_rate": 8.939319903160606e-08, + "loss": 3.8245, + "step": 1282500 + }, + { + "epoch": 14.25, + "learning_rate": 8.93793172378562e-08, + "loss": 3.8187, + "step": 1283000 + }, + { + "epoch": 14.25, + "learning_rate": 8.936543544410635e-08, + "loss": 3.8182, + "step": 1283500 + }, + { + "epoch": 14.26, + "learning_rate": 8.935155365035649e-08, + "loss": 3.8117, + "step": 1284000 + }, + { + "epoch": 14.26, + "learning_rate": 8.933767185660662e-08, + "loss": 3.8316, + "step": 1284500 + }, + { + "epoch": 14.27, + "learning_rate": 8.932379006285676e-08, + "loss": 3.8354, + "step": 1285000 + }, + { + "epoch": 14.28, + "learning_rate": 8.930990826910689e-08, + "loss": 3.8284, + "step": 1285500 + }, + { + "epoch": 14.28, + "learning_rate": 8.929602647535703e-08, + "loss": 3.8349, + "step": 1286000 + }, + { + "epoch": 14.29, + "learning_rate": 8.928214468160717e-08, + "loss": 3.8173, + "step": 1286500 + }, + { + "epoch": 14.29, + "learning_rate": 8.926826288785731e-08, + "loss": 3.8206, + "step": 1287000 + }, + { + "epoch": 14.3, + "learning_rate": 8.925438109410746e-08, + "loss": 3.8299, + "step": 1287500 + }, + { + "epoch": 14.3, + "learning_rate": 8.924049930035759e-08, + "loss": 3.8193, + "step": 1288000 + }, + { + "epoch": 14.31, + "learning_rate": 8.922661750660773e-08, + "loss": 3.8268, + "step": 1288500 + }, + { + "epoch": 14.31, + "learning_rate": 8.921273571285787e-08, + "loss": 3.8106, + "step": 1289000 + }, + { + "epoch": 14.32, + "learning_rate": 8.919885391910801e-08, + "loss": 3.819, + "step": 1289500 + }, + { + "epoch": 14.33, + "learning_rate": 8.918497212535815e-08, + "loss": 3.8268, + "step": 1290000 + }, + { + "epoch": 14.33, + "learning_rate": 8.917109033160828e-08, + "loss": 3.8401, + "step": 1290500 + }, + { + "epoch": 14.34, + "learning_rate": 8.915720853785841e-08, + "loss": 3.8225, + "step": 1291000 + }, + { + "epoch": 14.34, + "learning_rate": 8.914332674410856e-08, + "loss": 3.8301, + "step": 1291500 + }, + { + "epoch": 14.35, + "learning_rate": 8.91294449503587e-08, + "loss": 3.8297, + "step": 1292000 + }, + { + "epoch": 14.35, + "learning_rate": 8.911556315660884e-08, + "loss": 3.8346, + "step": 1292500 + }, + { + "epoch": 14.36, + "learning_rate": 8.910168136285898e-08, + "loss": 3.8534, + "step": 1293000 + }, + { + "epoch": 14.36, + "learning_rate": 8.908779956910912e-08, + "loss": 3.8296, + "step": 1293500 + }, + { + "epoch": 14.37, + "learning_rate": 8.907391777535925e-08, + "loss": 3.8473, + "step": 1294000 + }, + { + "epoch": 14.38, + "learning_rate": 8.90600359816094e-08, + "loss": 3.8173, + "step": 1294500 + }, + { + "epoch": 14.38, + "learning_rate": 8.904615418785954e-08, + "loss": 3.8274, + "step": 1295000 + }, + { + "epoch": 14.39, + "learning_rate": 8.903227239410968e-08, + "loss": 3.8126, + "step": 1295500 + }, + { + "epoch": 14.39, + "learning_rate": 8.901839060035982e-08, + "loss": 3.8168, + "step": 1296000 + }, + { + "epoch": 14.4, + "learning_rate": 8.900450880660995e-08, + "loss": 3.8136, + "step": 1296500 + }, + { + "epoch": 14.4, + "learning_rate": 8.899062701286008e-08, + "loss": 3.8212, + "step": 1297000 + }, + { + "epoch": 14.41, + "learning_rate": 8.897674521911022e-08, + "loss": 3.8245, + "step": 1297500 + }, + { + "epoch": 14.41, + "learning_rate": 8.896286342536037e-08, + "loss": 3.8219, + "step": 1298000 + }, + { + "epoch": 14.42, + "learning_rate": 8.894898163161051e-08, + "loss": 3.8133, + "step": 1298500 + }, + { + "epoch": 14.43, + "learning_rate": 8.893509983786065e-08, + "loss": 3.8344, + "step": 1299000 + }, + { + "epoch": 14.43, + "learning_rate": 8.892121804411078e-08, + "loss": 3.8299, + "step": 1299500 + }, + { + "epoch": 14.44, + "learning_rate": 8.890733625036092e-08, + "loss": 3.8283, + "step": 1300000 + }, + { + "epoch": 14.44, + "learning_rate": 8.889345445661106e-08, + "loss": 3.8263, + "step": 1300500 + }, + { + "epoch": 14.45, + "learning_rate": 8.88795726628612e-08, + "loss": 3.8288, + "step": 1301000 + }, + { + "epoch": 14.45, + "learning_rate": 8.886569086911135e-08, + "loss": 3.8437, + "step": 1301500 + }, + { + "epoch": 14.46, + "learning_rate": 8.885180907536148e-08, + "loss": 3.8196, + "step": 1302000 + }, + { + "epoch": 14.46, + "learning_rate": 8.883792728161162e-08, + "loss": 3.8317, + "step": 1302500 + }, + { + "epoch": 14.47, + "learning_rate": 8.882404548786175e-08, + "loss": 3.8008, + "step": 1303000 + }, + { + "epoch": 14.48, + "learning_rate": 8.881016369411189e-08, + "loss": 3.838, + "step": 1303500 + }, + { + "epoch": 14.48, + "learning_rate": 8.879628190036203e-08, + "loss": 3.8156, + "step": 1304000 + }, + { + "epoch": 14.49, + "learning_rate": 8.878240010661217e-08, + "loss": 3.8096, + "step": 1304500 + }, + { + "epoch": 14.49, + "learning_rate": 8.87685183128623e-08, + "loss": 3.8306, + "step": 1305000 + }, + { + "epoch": 14.5, + "learning_rate": 8.875463651911245e-08, + "loss": 3.8375, + "step": 1305500 + }, + { + "epoch": 14.5, + "learning_rate": 8.874075472536259e-08, + "loss": 3.8216, + "step": 1306000 + }, + { + "epoch": 14.51, + "learning_rate": 8.872687293161273e-08, + "loss": 3.8256, + "step": 1306500 + }, + { + "epoch": 14.51, + "learning_rate": 8.871299113786287e-08, + "loss": 3.8302, + "step": 1307000 + }, + { + "epoch": 14.52, + "learning_rate": 8.869910934411302e-08, + "loss": 3.8363, + "step": 1307500 + }, + { + "epoch": 14.53, + "learning_rate": 8.868522755036314e-08, + "loss": 3.8195, + "step": 1308000 + }, + { + "epoch": 14.53, + "learning_rate": 8.867134575661329e-08, + "loss": 3.8135, + "step": 1308500 + }, + { + "epoch": 14.54, + "learning_rate": 8.865746396286342e-08, + "loss": 3.8315, + "step": 1309000 + }, + { + "epoch": 14.54, + "learning_rate": 8.864358216911356e-08, + "loss": 3.8357, + "step": 1309500 + }, + { + "epoch": 14.55, + "learning_rate": 8.86297003753637e-08, + "loss": 3.8469, + "step": 1310000 + }, + { + "epoch": 14.55, + "learning_rate": 8.861581858161383e-08, + "loss": 3.8455, + "step": 1310500 + }, + { + "epoch": 14.56, + "learning_rate": 8.860193678786397e-08, + "loss": 3.8335, + "step": 1311000 + }, + { + "epoch": 14.56, + "learning_rate": 8.858805499411411e-08, + "loss": 3.8315, + "step": 1311500 + }, + { + "epoch": 14.57, + "learning_rate": 8.857417320036426e-08, + "loss": 3.8066, + "step": 1312000 + }, + { + "epoch": 14.58, + "learning_rate": 8.85602914066144e-08, + "loss": 3.8235, + "step": 1312500 + }, + { + "epoch": 14.58, + "learning_rate": 8.854640961286454e-08, + "loss": 3.7941, + "step": 1313000 + }, + { + "epoch": 14.59, + "learning_rate": 8.853252781911467e-08, + "loss": 3.8239, + "step": 1313500 + }, + { + "epoch": 14.59, + "learning_rate": 8.851864602536481e-08, + "loss": 3.8576, + "step": 1314000 + }, + { + "epoch": 14.6, + "learning_rate": 8.850476423161495e-08, + "loss": 3.8174, + "step": 1314500 + }, + { + "epoch": 14.6, + "learning_rate": 8.849088243786508e-08, + "loss": 3.8036, + "step": 1315000 + }, + { + "epoch": 14.61, + "learning_rate": 8.847700064411523e-08, + "loss": 3.8326, + "step": 1315500 + }, + { + "epoch": 14.61, + "learning_rate": 8.846311885036537e-08, + "loss": 3.8239, + "step": 1316000 + }, + { + "epoch": 14.62, + "learning_rate": 8.84492370566155e-08, + "loss": 3.8235, + "step": 1316500 + }, + { + "epoch": 14.63, + "learning_rate": 8.843535526286564e-08, + "loss": 3.8396, + "step": 1317000 + }, + { + "epoch": 14.63, + "learning_rate": 8.842147346911578e-08, + "loss": 3.8159, + "step": 1317500 + }, + { + "epoch": 14.64, + "learning_rate": 8.840759167536592e-08, + "loss": 3.8475, + "step": 1318000 + }, + { + "epoch": 14.64, + "learning_rate": 8.839370988161607e-08, + "loss": 3.8351, + "step": 1318500 + }, + { + "epoch": 14.65, + "learning_rate": 8.83798280878662e-08, + "loss": 3.8275, + "step": 1319000 + }, + { + "epoch": 14.65, + "learning_rate": 8.836594629411634e-08, + "loss": 3.8283, + "step": 1319500 + }, + { + "epoch": 14.66, + "learning_rate": 8.835206450036648e-08, + "loss": 3.8388, + "step": 1320000 + }, + { + "epoch": 14.66, + "learning_rate": 8.833818270661662e-08, + "loss": 3.8339, + "step": 1320500 + }, + { + "epoch": 14.67, + "learning_rate": 8.832430091286675e-08, + "loss": 3.8296, + "step": 1321000 + }, + { + "epoch": 14.68, + "learning_rate": 8.831041911911689e-08, + "loss": 3.8061, + "step": 1321500 + }, + { + "epoch": 14.68, + "learning_rate": 8.829653732536702e-08, + "loss": 3.8157, + "step": 1322000 + }, + { + "epoch": 14.69, + "learning_rate": 8.828265553161716e-08, + "loss": 3.8295, + "step": 1322500 + }, + { + "epoch": 14.69, + "learning_rate": 8.82687737378673e-08, + "loss": 3.8406, + "step": 1323000 + }, + { + "epoch": 14.7, + "learning_rate": 8.825489194411745e-08, + "loss": 3.8231, + "step": 1323500 + }, + { + "epoch": 14.7, + "learning_rate": 8.824101015036759e-08, + "loss": 3.8234, + "step": 1324000 + }, + { + "epoch": 14.71, + "learning_rate": 8.822712835661772e-08, + "loss": 3.8236, + "step": 1324500 + }, + { + "epoch": 14.71, + "learning_rate": 8.821324656286786e-08, + "loss": 3.8288, + "step": 1325000 + }, + { + "epoch": 14.72, + "learning_rate": 8.8199364769118e-08, + "loss": 3.8349, + "step": 1325500 + }, + { + "epoch": 14.73, + "learning_rate": 8.818548297536815e-08, + "loss": 3.822, + "step": 1326000 + }, + { + "epoch": 14.73, + "learning_rate": 8.817160118161829e-08, + "loss": 3.8215, + "step": 1326500 + }, + { + "epoch": 14.74, + "learning_rate": 8.815771938786842e-08, + "loss": 3.8243, + "step": 1327000 + }, + { + "epoch": 14.74, + "learning_rate": 8.814383759411855e-08, + "loss": 3.8323, + "step": 1327500 + }, + { + "epoch": 14.75, + "learning_rate": 8.812995580036869e-08, + "loss": 3.8057, + "step": 1328000 + }, + { + "epoch": 14.75, + "learning_rate": 8.811607400661883e-08, + "loss": 3.8359, + "step": 1328500 + }, + { + "epoch": 14.76, + "learning_rate": 8.810219221286897e-08, + "loss": 3.8208, + "step": 1329000 + }, + { + "epoch": 14.76, + "learning_rate": 8.808831041911912e-08, + "loss": 3.8197, + "step": 1329500 + }, + { + "epoch": 14.77, + "learning_rate": 8.807442862536926e-08, + "loss": 3.7929, + "step": 1330000 + }, + { + "epoch": 14.78, + "learning_rate": 8.806054683161939e-08, + "loss": 3.8472, + "step": 1330500 + }, + { + "epoch": 14.78, + "learning_rate": 8.804666503786953e-08, + "loss": 3.8148, + "step": 1331000 + }, + { + "epoch": 14.79, + "learning_rate": 8.803278324411967e-08, + "loss": 3.8319, + "step": 1331500 + }, + { + "epoch": 14.79, + "learning_rate": 8.801890145036981e-08, + "loss": 3.8321, + "step": 1332000 + }, + { + "epoch": 14.8, + "learning_rate": 8.800501965661996e-08, + "loss": 3.8233, + "step": 1332500 + }, + { + "epoch": 14.8, + "learning_rate": 8.799113786287009e-08, + "loss": 3.8252, + "step": 1333000 + }, + { + "epoch": 14.81, + "learning_rate": 8.797725606912021e-08, + "loss": 3.8348, + "step": 1333500 + }, + { + "epoch": 14.81, + "learning_rate": 8.796337427537036e-08, + "loss": 3.8349, + "step": 1334000 + }, + { + "epoch": 14.82, + "learning_rate": 8.79494924816205e-08, + "loss": 3.825, + "step": 1334500 + }, + { + "epoch": 14.83, + "learning_rate": 8.793561068787064e-08, + "loss": 3.8209, + "step": 1335000 + }, + { + "epoch": 14.83, + "learning_rate": 8.792172889412078e-08, + "loss": 3.8269, + "step": 1335500 + }, + { + "epoch": 14.84, + "learning_rate": 8.790784710037091e-08, + "loss": 3.8154, + "step": 1336000 + }, + { + "epoch": 14.84, + "learning_rate": 8.789396530662105e-08, + "loss": 3.8206, + "step": 1336500 + }, + { + "epoch": 14.85, + "learning_rate": 8.78800835128712e-08, + "loss": 3.8202, + "step": 1337000 + }, + { + "epoch": 14.85, + "learning_rate": 8.786620171912134e-08, + "loss": 3.8162, + "step": 1337500 + }, + { + "epoch": 14.86, + "learning_rate": 8.785231992537148e-08, + "loss": 3.832, + "step": 1338000 + }, + { + "epoch": 14.86, + "learning_rate": 8.783843813162161e-08, + "loss": 3.8283, + "step": 1338500 + }, + { + "epoch": 14.87, + "learning_rate": 8.782455633787175e-08, + "loss": 3.8234, + "step": 1339000 + }, + { + "epoch": 14.88, + "learning_rate": 8.781067454412188e-08, + "loss": 3.8269, + "step": 1339500 + }, + { + "epoch": 14.88, + "learning_rate": 8.779679275037202e-08, + "loss": 3.8264, + "step": 1340000 + }, + { + "epoch": 14.89, + "learning_rate": 8.778291095662217e-08, + "loss": 3.8198, + "step": 1340500 + }, + { + "epoch": 14.89, + "learning_rate": 8.776902916287231e-08, + "loss": 3.8232, + "step": 1341000 + }, + { + "epoch": 14.9, + "learning_rate": 8.775514736912244e-08, + "loss": 3.8087, + "step": 1341500 + }, + { + "epoch": 14.9, + "learning_rate": 8.774126557537258e-08, + "loss": 3.8175, + "step": 1342000 + }, + { + "epoch": 14.91, + "learning_rate": 8.772738378162272e-08, + "loss": 3.843, + "step": 1342500 + }, + { + "epoch": 14.91, + "learning_rate": 8.771350198787286e-08, + "loss": 3.8228, + "step": 1343000 + }, + { + "epoch": 14.92, + "learning_rate": 8.769962019412301e-08, + "loss": 3.8287, + "step": 1343500 + }, + { + "epoch": 14.93, + "learning_rate": 8.768573840037315e-08, + "loss": 3.825, + "step": 1344000 + }, + { + "epoch": 14.93, + "learning_rate": 8.767185660662328e-08, + "loss": 3.8371, + "step": 1344500 + }, + { + "epoch": 14.94, + "learning_rate": 8.765797481287342e-08, + "loss": 3.8076, + "step": 1345000 + }, + { + "epoch": 14.94, + "learning_rate": 8.764409301912355e-08, + "loss": 3.8097, + "step": 1345500 + }, + { + "epoch": 14.95, + "learning_rate": 8.763021122537369e-08, + "loss": 3.8304, + "step": 1346000 + }, + { + "epoch": 14.95, + "learning_rate": 8.761632943162383e-08, + "loss": 3.8157, + "step": 1346500 + }, + { + "epoch": 14.96, + "learning_rate": 8.760244763787396e-08, + "loss": 3.8159, + "step": 1347000 + }, + { + "epoch": 14.96, + "learning_rate": 8.75885658441241e-08, + "loss": 3.8155, + "step": 1347500 + }, + { + "epoch": 14.97, + "learning_rate": 8.757468405037425e-08, + "loss": 3.818, + "step": 1348000 + }, + { + "epoch": 14.98, + "learning_rate": 8.756080225662439e-08, + "loss": 3.835, + "step": 1348500 + }, + { + "epoch": 14.98, + "learning_rate": 8.754692046287453e-08, + "loss": 3.8305, + "step": 1349000 + }, + { + "epoch": 14.99, + "learning_rate": 8.753303866912467e-08, + "loss": 3.8271, + "step": 1349500 + }, + { + "epoch": 14.99, + "learning_rate": 8.75191568753748e-08, + "loss": 3.8227, + "step": 1350000 + }, + { + "epoch": 15.0, + "learning_rate": 8.750527508162495e-08, + "loss": 3.8244, + "step": 1350500 + }, + { + "epoch": 15.0, + "eval_loss": 3.8713603019714355, + "eval_runtime": 6.3095, + "eval_samples_per_second": 246.297, + "step": 1350690 + }, + { + "epoch": 15.0, + "learning_rate": 8.749139328787509e-08, + "loss": 3.8192, + "step": 1351000 + }, + { + "epoch": 15.01, + "learning_rate": 8.747751149412522e-08, + "loss": 3.8112, + "step": 1351500 + }, + { + "epoch": 15.01, + "learning_rate": 8.746362970037536e-08, + "loss": 3.8426, + "step": 1352000 + }, + { + "epoch": 15.02, + "learning_rate": 8.74497479066255e-08, + "loss": 3.8161, + "step": 1352500 + }, + { + "epoch": 15.03, + "learning_rate": 8.743586611287563e-08, + "loss": 3.8126, + "step": 1353000 + }, + { + "epoch": 15.03, + "learning_rate": 8.742198431912577e-08, + "loss": 3.8203, + "step": 1353500 + }, + { + "epoch": 15.04, + "learning_rate": 8.740810252537591e-08, + "loss": 3.8411, + "step": 1354000 + }, + { + "epoch": 15.04, + "learning_rate": 8.739422073162606e-08, + "loss": 3.834, + "step": 1354500 + }, + { + "epoch": 15.05, + "learning_rate": 8.73803389378762e-08, + "loss": 3.8202, + "step": 1355000 + }, + { + "epoch": 15.05, + "learning_rate": 8.736645714412633e-08, + "loss": 3.8192, + "step": 1355500 + }, + { + "epoch": 15.06, + "learning_rate": 8.735257535037647e-08, + "loss": 3.8115, + "step": 1356000 + }, + { + "epoch": 15.06, + "learning_rate": 8.733869355662661e-08, + "loss": 3.8107, + "step": 1356500 + }, + { + "epoch": 15.07, + "learning_rate": 8.732481176287676e-08, + "loss": 3.8264, + "step": 1357000 + }, + { + "epoch": 15.08, + "learning_rate": 8.731092996912688e-08, + "loss": 3.8289, + "step": 1357500 + }, + { + "epoch": 15.08, + "learning_rate": 8.729704817537703e-08, + "loss": 3.8233, + "step": 1358000 + }, + { + "epoch": 15.09, + "learning_rate": 8.728316638162716e-08, + "loss": 3.8138, + "step": 1358500 + }, + { + "epoch": 15.09, + "learning_rate": 8.72692845878773e-08, + "loss": 3.8318, + "step": 1359000 + }, + { + "epoch": 15.1, + "learning_rate": 8.725540279412744e-08, + "loss": 3.8336, + "step": 1359500 + }, + { + "epoch": 15.1, + "learning_rate": 8.724152100037758e-08, + "loss": 3.8161, + "step": 1360000 + }, + { + "epoch": 15.11, + "learning_rate": 8.722763920662772e-08, + "loss": 3.8088, + "step": 1360500 + }, + { + "epoch": 15.11, + "learning_rate": 8.721375741287785e-08, + "loss": 3.8286, + "step": 1361000 + }, + { + "epoch": 15.12, + "learning_rate": 8.7199875619128e-08, + "loss": 3.8334, + "step": 1361500 + }, + { + "epoch": 15.13, + "learning_rate": 8.718599382537814e-08, + "loss": 3.822, + "step": 1362000 + }, + { + "epoch": 15.13, + "learning_rate": 8.717211203162828e-08, + "loss": 3.8159, + "step": 1362500 + }, + { + "epoch": 15.14, + "learning_rate": 8.715823023787842e-08, + "loss": 3.8314, + "step": 1363000 + }, + { + "epoch": 15.14, + "learning_rate": 8.714434844412855e-08, + "loss": 3.8308, + "step": 1363500 + }, + { + "epoch": 15.15, + "learning_rate": 8.713046665037868e-08, + "loss": 3.8333, + "step": 1364000 + }, + { + "epoch": 15.15, + "learning_rate": 8.711658485662882e-08, + "loss": 3.8294, + "step": 1364500 + }, + { + "epoch": 15.16, + "learning_rate": 8.710270306287897e-08, + "loss": 3.7999, + "step": 1365000 + }, + { + "epoch": 15.16, + "learning_rate": 8.708882126912911e-08, + "loss": 3.823, + "step": 1365500 + }, + { + "epoch": 15.17, + "learning_rate": 8.707493947537925e-08, + "loss": 3.8281, + "step": 1366000 + }, + { + "epoch": 15.18, + "learning_rate": 8.706105768162939e-08, + "loss": 3.8191, + "step": 1366500 + }, + { + "epoch": 15.18, + "learning_rate": 8.704717588787952e-08, + "loss": 3.8142, + "step": 1367000 + }, + { + "epoch": 15.19, + "learning_rate": 8.703329409412966e-08, + "loss": 3.827, + "step": 1367500 + }, + { + "epoch": 15.19, + "learning_rate": 8.70194123003798e-08, + "loss": 3.8198, + "step": 1368000 + }, + { + "epoch": 15.2, + "learning_rate": 8.700553050662995e-08, + "loss": 3.8002, + "step": 1368500 + }, + { + "epoch": 15.2, + "learning_rate": 8.699164871288009e-08, + "loss": 3.8125, + "step": 1369000 + }, + { + "epoch": 15.21, + "learning_rate": 8.697776691913022e-08, + "loss": 3.8256, + "step": 1369500 + }, + { + "epoch": 15.21, + "learning_rate": 8.696388512538035e-08, + "loss": 3.8151, + "step": 1370000 + }, + { + "epoch": 15.22, + "learning_rate": 8.695000333163049e-08, + "loss": 3.842, + "step": 1370500 + }, + { + "epoch": 15.23, + "learning_rate": 8.693612153788063e-08, + "loss": 3.8203, + "step": 1371000 + }, + { + "epoch": 15.23, + "learning_rate": 8.692223974413077e-08, + "loss": 3.8075, + "step": 1371500 + }, + { + "epoch": 15.24, + "learning_rate": 8.690835795038092e-08, + "loss": 3.8278, + "step": 1372000 + }, + { + "epoch": 15.24, + "learning_rate": 8.689447615663105e-08, + "loss": 3.8106, + "step": 1372500 + }, + { + "epoch": 15.25, + "learning_rate": 8.688059436288119e-08, + "loss": 3.8194, + "step": 1373000 + }, + { + "epoch": 15.25, + "learning_rate": 8.686671256913133e-08, + "loss": 3.8153, + "step": 1373500 + }, + { + "epoch": 15.26, + "learning_rate": 8.685283077538147e-08, + "loss": 3.8231, + "step": 1374000 + }, + { + "epoch": 15.26, + "learning_rate": 8.683894898163162e-08, + "loss": 3.8125, + "step": 1374500 + }, + { + "epoch": 15.27, + "learning_rate": 8.682506718788174e-08, + "loss": 3.8376, + "step": 1375000 + }, + { + "epoch": 15.28, + "learning_rate": 8.681118539413189e-08, + "loss": 3.8077, + "step": 1375500 + }, + { + "epoch": 15.28, + "learning_rate": 8.679730360038202e-08, + "loss": 3.8267, + "step": 1376000 + }, + { + "epoch": 15.29, + "learning_rate": 8.678342180663216e-08, + "loss": 3.7899, + "step": 1376500 + }, + { + "epoch": 15.29, + "learning_rate": 8.67695400128823e-08, + "loss": 3.8168, + "step": 1377000 + }, + { + "epoch": 15.3, + "learning_rate": 8.675565821913244e-08, + "loss": 3.8086, + "step": 1377500 + }, + { + "epoch": 15.3, + "learning_rate": 8.674177642538257e-08, + "loss": 3.8206, + "step": 1378000 + }, + { + "epoch": 15.31, + "learning_rate": 8.672789463163271e-08, + "loss": 3.8108, + "step": 1378500 + }, + { + "epoch": 15.31, + "learning_rate": 8.671401283788286e-08, + "loss": 3.8131, + "step": 1379000 + }, + { + "epoch": 15.32, + "learning_rate": 8.6700131044133e-08, + "loss": 3.8269, + "step": 1379500 + }, + { + "epoch": 15.33, + "learning_rate": 8.668624925038314e-08, + "loss": 3.8095, + "step": 1380000 + }, + { + "epoch": 15.33, + "learning_rate": 8.667236745663328e-08, + "loss": 3.8163, + "step": 1380500 + }, + { + "epoch": 15.34, + "learning_rate": 8.665848566288341e-08, + "loss": 3.8233, + "step": 1381000 + }, + { + "epoch": 15.34, + "learning_rate": 8.664460386913355e-08, + "loss": 3.8134, + "step": 1381500 + }, + { + "epoch": 15.35, + "learning_rate": 8.663072207538368e-08, + "loss": 3.8263, + "step": 1382000 + }, + { + "epoch": 15.35, + "learning_rate": 8.661684028163383e-08, + "loss": 3.8218, + "step": 1382500 + }, + { + "epoch": 15.36, + "learning_rate": 8.660295848788397e-08, + "loss": 3.8359, + "step": 1383000 + }, + { + "epoch": 15.36, + "learning_rate": 8.65890766941341e-08, + "loss": 3.8273, + "step": 1383500 + }, + { + "epoch": 15.37, + "learning_rate": 8.657519490038424e-08, + "loss": 3.8112, + "step": 1384000 + }, + { + "epoch": 15.38, + "learning_rate": 8.656131310663438e-08, + "loss": 3.8098, + "step": 1384500 + }, + { + "epoch": 15.38, + "learning_rate": 8.654743131288452e-08, + "loss": 3.8206, + "step": 1385000 + }, + { + "epoch": 15.39, + "learning_rate": 8.653354951913467e-08, + "loss": 3.8287, + "step": 1385500 + }, + { + "epoch": 15.39, + "learning_rate": 8.651966772538481e-08, + "loss": 3.8085, + "step": 1386000 + }, + { + "epoch": 15.4, + "learning_rate": 8.650578593163494e-08, + "loss": 3.8165, + "step": 1386500 + }, + { + "epoch": 15.4, + "learning_rate": 8.649190413788508e-08, + "loss": 3.8133, + "step": 1387000 + }, + { + "epoch": 15.41, + "learning_rate": 8.647802234413522e-08, + "loss": 3.8217, + "step": 1387500 + }, + { + "epoch": 15.41, + "learning_rate": 8.646414055038535e-08, + "loss": 3.8273, + "step": 1388000 + }, + { + "epoch": 15.42, + "learning_rate": 8.645025875663549e-08, + "loss": 3.8122, + "step": 1388500 + }, + { + "epoch": 15.43, + "learning_rate": 8.643637696288564e-08, + "loss": 3.8134, + "step": 1389000 + }, + { + "epoch": 15.43, + "learning_rate": 8.642249516913576e-08, + "loss": 3.813, + "step": 1389500 + }, + { + "epoch": 15.44, + "learning_rate": 8.64086133753859e-08, + "loss": 3.81, + "step": 1390000 + }, + { + "epoch": 15.44, + "learning_rate": 8.639473158163605e-08, + "loss": 3.8207, + "step": 1390500 + }, + { + "epoch": 15.45, + "learning_rate": 8.638084978788619e-08, + "loss": 3.8134, + "step": 1391000 + }, + { + "epoch": 15.45, + "learning_rate": 8.636696799413633e-08, + "loss": 3.8155, + "step": 1391500 + }, + { + "epoch": 15.46, + "learning_rate": 8.635308620038646e-08, + "loss": 3.8221, + "step": 1392000 + }, + { + "epoch": 15.46, + "learning_rate": 8.63392044066366e-08, + "loss": 3.8384, + "step": 1392500 + }, + { + "epoch": 15.47, + "learning_rate": 8.632532261288675e-08, + "loss": 3.8383, + "step": 1393000 + }, + { + "epoch": 15.48, + "learning_rate": 8.631144081913689e-08, + "loss": 3.8442, + "step": 1393500 + }, + { + "epoch": 15.48, + "learning_rate": 8.629755902538702e-08, + "loss": 3.8267, + "step": 1394000 + }, + { + "epoch": 15.49, + "learning_rate": 8.628367723163716e-08, + "loss": 3.8281, + "step": 1394500 + }, + { + "epoch": 15.49, + "learning_rate": 8.626979543788729e-08, + "loss": 3.8247, + "step": 1395000 + }, + { + "epoch": 15.5, + "learning_rate": 8.625591364413743e-08, + "loss": 3.8135, + "step": 1395500 + }, + { + "epoch": 15.5, + "learning_rate": 8.624203185038757e-08, + "loss": 3.8152, + "step": 1396000 + }, + { + "epoch": 15.51, + "learning_rate": 8.622815005663772e-08, + "loss": 3.8322, + "step": 1396500 + }, + { + "epoch": 15.51, + "learning_rate": 8.621426826288786e-08, + "loss": 3.8272, + "step": 1397000 + }, + { + "epoch": 15.52, + "learning_rate": 8.620038646913799e-08, + "loss": 3.8155, + "step": 1397500 + }, + { + "epoch": 15.53, + "learning_rate": 8.618650467538813e-08, + "loss": 3.8181, + "step": 1398000 + }, + { + "epoch": 15.53, + "learning_rate": 8.617262288163827e-08, + "loss": 3.8254, + "step": 1398500 + }, + { + "epoch": 15.54, + "learning_rate": 8.615874108788841e-08, + "loss": 3.8095, + "step": 1399000 + }, + { + "epoch": 15.54, + "learning_rate": 8.614485929413856e-08, + "loss": 3.8181, + "step": 1399500 + }, + { + "epoch": 15.55, + "learning_rate": 8.613097750038869e-08, + "loss": 3.8184, + "step": 1400000 + }, + { + "epoch": 15.55, + "learning_rate": 8.611709570663881e-08, + "loss": 3.8115, + "step": 1400500 + }, + { + "epoch": 15.56, + "learning_rate": 8.610321391288896e-08, + "loss": 3.8327, + "step": 1401000 + }, + { + "epoch": 15.56, + "learning_rate": 8.60893321191391e-08, + "loss": 3.8096, + "step": 1401500 + }, + { + "epoch": 15.57, + "learning_rate": 8.607545032538924e-08, + "loss": 3.8092, + "step": 1402000 + }, + { + "epoch": 15.58, + "learning_rate": 8.606156853163938e-08, + "loss": 3.8297, + "step": 1402500 + }, + { + "epoch": 15.58, + "learning_rate": 8.604768673788953e-08, + "loss": 3.8328, + "step": 1403000 + }, + { + "epoch": 15.59, + "learning_rate": 8.603380494413965e-08, + "loss": 3.8203, + "step": 1403500 + }, + { + "epoch": 15.59, + "learning_rate": 8.60199231503898e-08, + "loss": 3.8042, + "step": 1404000 + }, + { + "epoch": 15.6, + "learning_rate": 8.600604135663994e-08, + "loss": 3.804, + "step": 1404500 + }, + { + "epoch": 15.6, + "learning_rate": 8.599215956289008e-08, + "loss": 3.8493, + "step": 1405000 + }, + { + "epoch": 15.61, + "learning_rate": 8.597827776914021e-08, + "loss": 3.8201, + "step": 1405500 + }, + { + "epoch": 15.61, + "learning_rate": 8.596439597539035e-08, + "loss": 3.8153, + "step": 1406000 + }, + { + "epoch": 15.62, + "learning_rate": 8.595051418164048e-08, + "loss": 3.8193, + "step": 1406500 + }, + { + "epoch": 15.63, + "learning_rate": 8.593663238789062e-08, + "loss": 3.8201, + "step": 1407000 + }, + { + "epoch": 15.63, + "learning_rate": 8.592275059414077e-08, + "loss": 3.8155, + "step": 1407500 + }, + { + "epoch": 15.64, + "learning_rate": 8.590886880039091e-08, + "loss": 3.8088, + "step": 1408000 + }, + { + "epoch": 15.64, + "learning_rate": 8.589498700664105e-08, + "loss": 3.8214, + "step": 1408500 + }, + { + "epoch": 15.65, + "learning_rate": 8.588110521289118e-08, + "loss": 3.806, + "step": 1409000 + }, + { + "epoch": 15.65, + "learning_rate": 8.586722341914132e-08, + "loss": 3.8114, + "step": 1409500 + }, + { + "epoch": 15.66, + "learning_rate": 8.585334162539146e-08, + "loss": 3.8107, + "step": 1410000 + }, + { + "epoch": 15.66, + "learning_rate": 8.583945983164161e-08, + "loss": 3.8379, + "step": 1410500 + }, + { + "epoch": 15.67, + "learning_rate": 8.582557803789175e-08, + "loss": 3.806, + "step": 1411000 + }, + { + "epoch": 15.68, + "learning_rate": 8.581169624414188e-08, + "loss": 3.8209, + "step": 1411500 + }, + { + "epoch": 15.68, + "learning_rate": 8.579781445039202e-08, + "loss": 3.821, + "step": 1412000 + }, + { + "epoch": 15.69, + "learning_rate": 8.578393265664215e-08, + "loss": 3.8236, + "step": 1412500 + }, + { + "epoch": 15.69, + "learning_rate": 8.577005086289229e-08, + "loss": 3.8055, + "step": 1413000 + }, + { + "epoch": 15.7, + "learning_rate": 8.575616906914243e-08, + "loss": 3.8257, + "step": 1413500 + }, + { + "epoch": 15.7, + "learning_rate": 8.574228727539258e-08, + "loss": 3.8202, + "step": 1414000 + }, + { + "epoch": 15.71, + "learning_rate": 8.57284054816427e-08, + "loss": 3.8182, + "step": 1414500 + }, + { + "epoch": 15.71, + "learning_rate": 8.571452368789285e-08, + "loss": 3.8128, + "step": 1415000 + }, + { + "epoch": 15.72, + "learning_rate": 8.570064189414299e-08, + "loss": 3.805, + "step": 1415500 + }, + { + "epoch": 15.73, + "learning_rate": 8.568676010039313e-08, + "loss": 3.8303, + "step": 1416000 + }, + { + "epoch": 15.73, + "learning_rate": 8.567287830664327e-08, + "loss": 3.8159, + "step": 1416500 + }, + { + "epoch": 15.74, + "learning_rate": 8.565899651289342e-08, + "loss": 3.8215, + "step": 1417000 + }, + { + "epoch": 15.74, + "learning_rate": 8.564511471914355e-08, + "loss": 3.8219, + "step": 1417500 + }, + { + "epoch": 15.75, + "learning_rate": 8.563123292539369e-08, + "loss": 3.7965, + "step": 1418000 + }, + { + "epoch": 15.75, + "learning_rate": 8.561735113164382e-08, + "loss": 3.8215, + "step": 1418500 + }, + { + "epoch": 15.76, + "learning_rate": 8.560346933789396e-08, + "loss": 3.8024, + "step": 1419000 + }, + { + "epoch": 15.76, + "learning_rate": 8.55895875441441e-08, + "loss": 3.8235, + "step": 1419500 + }, + { + "epoch": 15.77, + "learning_rate": 8.557570575039423e-08, + "loss": 3.8341, + "step": 1420000 + }, + { + "epoch": 15.78, + "learning_rate": 8.556182395664437e-08, + "loss": 3.8348, + "step": 1420500 + }, + { + "epoch": 15.78, + "learning_rate": 8.554794216289451e-08, + "loss": 3.8106, + "step": 1421000 + }, + { + "epoch": 15.79, + "learning_rate": 8.553406036914466e-08, + "loss": 3.8103, + "step": 1421500 + }, + { + "epoch": 15.79, + "learning_rate": 8.55201785753948e-08, + "loss": 3.8239, + "step": 1422000 + }, + { + "epoch": 15.8, + "learning_rate": 8.550629678164494e-08, + "loss": 3.815, + "step": 1422500 + }, + { + "epoch": 15.8, + "learning_rate": 8.549241498789507e-08, + "loss": 3.8078, + "step": 1423000 + }, + { + "epoch": 15.81, + "learning_rate": 8.547853319414521e-08, + "loss": 3.8191, + "step": 1423500 + }, + { + "epoch": 15.81, + "learning_rate": 8.546465140039536e-08, + "loss": 3.8114, + "step": 1424000 + }, + { + "epoch": 15.82, + "learning_rate": 8.545076960664548e-08, + "loss": 3.8183, + "step": 1424500 + }, + { + "epoch": 15.83, + "learning_rate": 8.543688781289563e-08, + "loss": 3.8089, + "step": 1425000 + }, + { + "epoch": 15.83, + "learning_rate": 8.542300601914577e-08, + "loss": 3.8328, + "step": 1425500 + }, + { + "epoch": 15.84, + "learning_rate": 8.54091242253959e-08, + "loss": 3.8204, + "step": 1426000 + }, + { + "epoch": 15.84, + "learning_rate": 8.539524243164604e-08, + "loss": 3.8005, + "step": 1426500 + }, + { + "epoch": 15.85, + "learning_rate": 8.538136063789618e-08, + "loss": 3.8259, + "step": 1427000 + }, + { + "epoch": 15.85, + "learning_rate": 8.536747884414632e-08, + "loss": 3.8232, + "step": 1427500 + }, + { + "epoch": 15.86, + "learning_rate": 8.535359705039647e-08, + "loss": 3.8196, + "step": 1428000 + }, + { + "epoch": 15.86, + "learning_rate": 8.53397152566466e-08, + "loss": 3.8154, + "step": 1428500 + }, + { + "epoch": 15.87, + "learning_rate": 8.532583346289674e-08, + "loss": 3.8187, + "step": 1429000 + }, + { + "epoch": 15.88, + "learning_rate": 8.531195166914688e-08, + "loss": 3.818, + "step": 1429500 + }, + { + "epoch": 15.88, + "learning_rate": 8.529806987539702e-08, + "loss": 3.8212, + "step": 1430000 + }, + { + "epoch": 15.89, + "learning_rate": 8.528418808164715e-08, + "loss": 3.8055, + "step": 1430500 + }, + { + "epoch": 15.89, + "learning_rate": 8.52703062878973e-08, + "loss": 3.8073, + "step": 1431000 + }, + { + "epoch": 15.9, + "learning_rate": 8.525642449414742e-08, + "loss": 3.8221, + "step": 1431500 + }, + { + "epoch": 15.9, + "learning_rate": 8.524254270039757e-08, + "loss": 3.8145, + "step": 1432000 + }, + { + "epoch": 15.91, + "learning_rate": 8.522866090664771e-08, + "loss": 3.8183, + "step": 1432500 + }, + { + "epoch": 15.91, + "learning_rate": 8.521477911289785e-08, + "loss": 3.8356, + "step": 1433000 + }, + { + "epoch": 15.92, + "learning_rate": 8.520089731914799e-08, + "loss": 3.84, + "step": 1433500 + }, + { + "epoch": 15.93, + "learning_rate": 8.518701552539813e-08, + "loss": 3.8109, + "step": 1434000 + }, + { + "epoch": 15.93, + "learning_rate": 8.517313373164826e-08, + "loss": 3.8183, + "step": 1434500 + }, + { + "epoch": 15.94, + "learning_rate": 8.51592519378984e-08, + "loss": 3.8327, + "step": 1435000 + }, + { + "epoch": 15.94, + "learning_rate": 8.514537014414855e-08, + "loss": 3.8311, + "step": 1435500 + }, + { + "epoch": 15.95, + "learning_rate": 8.513148835039868e-08, + "loss": 3.8188, + "step": 1436000 + }, + { + "epoch": 15.95, + "learning_rate": 8.511760655664882e-08, + "loss": 3.8091, + "step": 1436500 + }, + { + "epoch": 15.96, + "learning_rate": 8.510372476289895e-08, + "loss": 3.8162, + "step": 1437000 + }, + { + "epoch": 15.96, + "learning_rate": 8.508984296914909e-08, + "loss": 3.8364, + "step": 1437500 + }, + { + "epoch": 15.97, + "learning_rate": 8.507596117539923e-08, + "loss": 3.8147, + "step": 1438000 + }, + { + "epoch": 15.98, + "learning_rate": 8.506207938164938e-08, + "loss": 3.8158, + "step": 1438500 + }, + { + "epoch": 15.98, + "learning_rate": 8.504819758789952e-08, + "loss": 3.8208, + "step": 1439000 + }, + { + "epoch": 15.99, + "learning_rate": 8.503431579414966e-08, + "loss": 3.8241, + "step": 1439500 + }, + { + "epoch": 15.99, + "learning_rate": 8.502043400039979e-08, + "loss": 3.8143, + "step": 1440000 + }, + { + "epoch": 16.0, + "learning_rate": 8.500655220664993e-08, + "loss": 3.8116, + "step": 1440500 + }, + { + "epoch": 16.0, + "eval_loss": 3.8666203022003174, + "eval_runtime": 6.3142, + "eval_samples_per_second": 246.112, + "step": 1440736 + }, + { + "epoch": 16.0, + "learning_rate": 8.499267041290007e-08, + "loss": 3.8191, + "step": 1441000 + }, + { + "epoch": 16.01, + "learning_rate": 8.497878861915022e-08, + "loss": 3.8049, + "step": 1441500 + }, + { + "epoch": 16.01, + "learning_rate": 8.496490682540034e-08, + "loss": 3.813, + "step": 1442000 + }, + { + "epoch": 16.02, + "learning_rate": 8.495102503165049e-08, + "loss": 3.8038, + "step": 1442500 + }, + { + "epoch": 16.03, + "learning_rate": 8.493714323790062e-08, + "loss": 3.8249, + "step": 1443000 + }, + { + "epoch": 16.03, + "learning_rate": 8.492326144415076e-08, + "loss": 3.8166, + "step": 1443500 + }, + { + "epoch": 16.04, + "learning_rate": 8.49093796504009e-08, + "loss": 3.8103, + "step": 1444000 + }, + { + "epoch": 16.04, + "learning_rate": 8.489549785665104e-08, + "loss": 3.8248, + "step": 1444500 + }, + { + "epoch": 16.05, + "learning_rate": 8.488161606290118e-08, + "loss": 3.7897, + "step": 1445000 + }, + { + "epoch": 16.05, + "learning_rate": 8.486773426915131e-08, + "loss": 3.8007, + "step": 1445500 + }, + { + "epoch": 16.06, + "learning_rate": 8.485385247540146e-08, + "loss": 3.8071, + "step": 1446000 + }, + { + "epoch": 16.06, + "learning_rate": 8.48399706816516e-08, + "loss": 3.8224, + "step": 1446500 + }, + { + "epoch": 16.07, + "learning_rate": 8.482608888790174e-08, + "loss": 3.8002, + "step": 1447000 + }, + { + "epoch": 16.08, + "learning_rate": 8.481220709415188e-08, + "loss": 3.8207, + "step": 1447500 + }, + { + "epoch": 16.08, + "learning_rate": 8.479832530040201e-08, + "loss": 3.8086, + "step": 1448000 + }, + { + "epoch": 16.09, + "learning_rate": 8.478444350665215e-08, + "loss": 3.8017, + "step": 1448500 + }, + { + "epoch": 16.09, + "learning_rate": 8.477056171290228e-08, + "loss": 3.8285, + "step": 1449000 + }, + { + "epoch": 16.1, + "learning_rate": 8.475667991915243e-08, + "loss": 3.8077, + "step": 1449500 + }, + { + "epoch": 16.1, + "learning_rate": 8.474279812540257e-08, + "loss": 3.788, + "step": 1450000 + }, + { + "epoch": 16.11, + "learning_rate": 8.472891633165271e-08, + "loss": 3.8276, + "step": 1450500 + }, + { + "epoch": 16.11, + "learning_rate": 8.471503453790284e-08, + "loss": 3.834, + "step": 1451000 + }, + { + "epoch": 16.12, + "learning_rate": 8.470115274415298e-08, + "loss": 3.8128, + "step": 1451500 + }, + { + "epoch": 16.13, + "learning_rate": 8.468727095040312e-08, + "loss": 3.8144, + "step": 1452000 + }, + { + "epoch": 16.13, + "learning_rate": 8.467338915665327e-08, + "loss": 3.8088, + "step": 1452500 + }, + { + "epoch": 16.14, + "learning_rate": 8.465950736290341e-08, + "loss": 3.8208, + "step": 1453000 + }, + { + "epoch": 16.14, + "learning_rate": 8.464562556915355e-08, + "loss": 3.8222, + "step": 1453500 + }, + { + "epoch": 16.15, + "learning_rate": 8.463174377540368e-08, + "loss": 3.7955, + "step": 1454000 + }, + { + "epoch": 16.15, + "learning_rate": 8.461786198165382e-08, + "loss": 3.8453, + "step": 1454500 + }, + { + "epoch": 16.16, + "learning_rate": 8.460398018790395e-08, + "loss": 3.8136, + "step": 1455000 + }, + { + "epoch": 16.16, + "learning_rate": 8.459009839415409e-08, + "loss": 3.8154, + "step": 1455500 + }, + { + "epoch": 16.17, + "learning_rate": 8.457621660040424e-08, + "loss": 3.8284, + "step": 1456000 + }, + { + "epoch": 16.18, + "learning_rate": 8.456233480665436e-08, + "loss": 3.8186, + "step": 1456500 + }, + { + "epoch": 16.18, + "learning_rate": 8.45484530129045e-08, + "loss": 3.8002, + "step": 1457000 + }, + { + "epoch": 16.19, + "learning_rate": 8.453457121915465e-08, + "loss": 3.8042, + "step": 1457500 + }, + { + "epoch": 16.19, + "learning_rate": 8.452068942540479e-08, + "loss": 3.8248, + "step": 1458000 + }, + { + "epoch": 16.2, + "learning_rate": 8.450680763165493e-08, + "loss": 3.8115, + "step": 1458500 + }, + { + "epoch": 16.2, + "learning_rate": 8.449292583790508e-08, + "loss": 3.7705, + "step": 1459000 + }, + { + "epoch": 16.21, + "learning_rate": 8.44790440441552e-08, + "loss": 3.812, + "step": 1459500 + }, + { + "epoch": 16.21, + "learning_rate": 8.446516225040535e-08, + "loss": 3.815, + "step": 1460000 + }, + { + "epoch": 16.22, + "learning_rate": 8.445128045665549e-08, + "loss": 3.8179, + "step": 1460500 + }, + { + "epoch": 16.23, + "learning_rate": 8.443739866290562e-08, + "loss": 3.8306, + "step": 1461000 + }, + { + "epoch": 16.23, + "learning_rate": 8.442351686915576e-08, + "loss": 3.8334, + "step": 1461500 + }, + { + "epoch": 16.24, + "learning_rate": 8.44096350754059e-08, + "loss": 3.8042, + "step": 1462000 + }, + { + "epoch": 16.24, + "learning_rate": 8.439575328165603e-08, + "loss": 3.8125, + "step": 1462500 + }, + { + "epoch": 16.25, + "learning_rate": 8.438187148790617e-08, + "loss": 3.7856, + "step": 1463000 + }, + { + "epoch": 16.25, + "learning_rate": 8.436798969415632e-08, + "loss": 3.8237, + "step": 1463500 + }, + { + "epoch": 16.26, + "learning_rate": 8.435410790040646e-08, + "loss": 3.8156, + "step": 1464000 + }, + { + "epoch": 16.26, + "learning_rate": 8.43402261066566e-08, + "loss": 3.8183, + "step": 1464500 + }, + { + "epoch": 16.27, + "learning_rate": 8.432634431290673e-08, + "loss": 3.8305, + "step": 1465000 + }, + { + "epoch": 16.28, + "learning_rate": 8.431246251915687e-08, + "loss": 3.8205, + "step": 1465500 + }, + { + "epoch": 16.28, + "learning_rate": 8.429858072540701e-08, + "loss": 3.8137, + "step": 1466000 + }, + { + "epoch": 16.29, + "learning_rate": 8.428469893165714e-08, + "loss": 3.8264, + "step": 1466500 + }, + { + "epoch": 16.29, + "learning_rate": 8.427081713790729e-08, + "loss": 3.8302, + "step": 1467000 + }, + { + "epoch": 16.3, + "learning_rate": 8.425693534415743e-08, + "loss": 3.8016, + "step": 1467500 + }, + { + "epoch": 16.3, + "learning_rate": 8.424305355040756e-08, + "loss": 3.8179, + "step": 1468000 + }, + { + "epoch": 16.31, + "learning_rate": 8.42291717566577e-08, + "loss": 3.8046, + "step": 1468500 + }, + { + "epoch": 16.31, + "learning_rate": 8.421528996290784e-08, + "loss": 3.7876, + "step": 1469000 + }, + { + "epoch": 16.32, + "learning_rate": 8.420140816915798e-08, + "loss": 3.7978, + "step": 1469500 + }, + { + "epoch": 16.32, + "learning_rate": 8.418752637540813e-08, + "loss": 3.8062, + "step": 1470000 + }, + { + "epoch": 16.33, + "learning_rate": 8.417364458165827e-08, + "loss": 3.8247, + "step": 1470500 + }, + { + "epoch": 16.34, + "learning_rate": 8.41597627879084e-08, + "loss": 3.801, + "step": 1471000 + }, + { + "epoch": 16.34, + "learning_rate": 8.414588099415854e-08, + "loss": 3.8146, + "step": 1471500 + }, + { + "epoch": 16.35, + "learning_rate": 8.413199920040868e-08, + "loss": 3.8123, + "step": 1472000 + }, + { + "epoch": 16.35, + "learning_rate": 8.411811740665881e-08, + "loss": 3.8107, + "step": 1472500 + }, + { + "epoch": 16.36, + "learning_rate": 8.410423561290895e-08, + "loss": 3.8171, + "step": 1473000 + }, + { + "epoch": 16.36, + "learning_rate": 8.409035381915908e-08, + "loss": 3.8245, + "step": 1473500 + }, + { + "epoch": 16.37, + "learning_rate": 8.407647202540922e-08, + "loss": 3.7992, + "step": 1474000 + }, + { + "epoch": 16.37, + "learning_rate": 8.406259023165937e-08, + "loss": 3.8018, + "step": 1474500 + }, + { + "epoch": 16.38, + "learning_rate": 8.404870843790951e-08, + "loss": 3.8194, + "step": 1475000 + }, + { + "epoch": 16.39, + "learning_rate": 8.403482664415965e-08, + "loss": 3.8054, + "step": 1475500 + }, + { + "epoch": 16.39, + "learning_rate": 8.40209448504098e-08, + "loss": 3.8085, + "step": 1476000 + }, + { + "epoch": 16.4, + "learning_rate": 8.400706305665992e-08, + "loss": 3.811, + "step": 1476500 + }, + { + "epoch": 16.4, + "learning_rate": 8.399318126291006e-08, + "loss": 3.8089, + "step": 1477000 + }, + { + "epoch": 16.41, + "learning_rate": 8.397929946916021e-08, + "loss": 3.8236, + "step": 1477500 + }, + { + "epoch": 16.41, + "learning_rate": 8.396541767541035e-08, + "loss": 3.8247, + "step": 1478000 + }, + { + "epoch": 16.42, + "learning_rate": 8.395153588166048e-08, + "loss": 3.7988, + "step": 1478500 + }, + { + "epoch": 16.42, + "learning_rate": 8.393765408791062e-08, + "loss": 3.8104, + "step": 1479000 + }, + { + "epoch": 16.43, + "learning_rate": 8.392377229416075e-08, + "loss": 3.8342, + "step": 1479500 + }, + { + "epoch": 16.44, + "learning_rate": 8.390989050041089e-08, + "loss": 3.809, + "step": 1480000 + }, + { + "epoch": 16.44, + "learning_rate": 8.389600870666103e-08, + "loss": 3.7938, + "step": 1480500 + }, + { + "epoch": 16.45, + "learning_rate": 8.388212691291118e-08, + "loss": 3.8169, + "step": 1481000 + }, + { + "epoch": 16.45, + "learning_rate": 8.386824511916132e-08, + "loss": 3.8089, + "step": 1481500 + }, + { + "epoch": 16.46, + "learning_rate": 8.385436332541145e-08, + "loss": 3.8115, + "step": 1482000 + }, + { + "epoch": 16.46, + "learning_rate": 8.384048153166159e-08, + "loss": 3.8208, + "step": 1482500 + }, + { + "epoch": 16.47, + "learning_rate": 8.382659973791173e-08, + "loss": 3.8192, + "step": 1483000 + }, + { + "epoch": 16.47, + "learning_rate": 8.381271794416187e-08, + "loss": 3.8117, + "step": 1483500 + }, + { + "epoch": 16.48, + "learning_rate": 8.379883615041202e-08, + "loss": 3.8197, + "step": 1484000 + }, + { + "epoch": 16.49, + "learning_rate": 8.378495435666215e-08, + "loss": 3.8187, + "step": 1484500 + }, + { + "epoch": 16.49, + "learning_rate": 8.377107256291229e-08, + "loss": 3.8165, + "step": 1485000 + }, + { + "epoch": 16.5, + "learning_rate": 8.375719076916242e-08, + "loss": 3.7969, + "step": 1485500 + }, + { + "epoch": 16.5, + "learning_rate": 8.374330897541256e-08, + "loss": 3.8254, + "step": 1486000 + }, + { + "epoch": 16.51, + "learning_rate": 8.37294271816627e-08, + "loss": 3.8132, + "step": 1486500 + }, + { + "epoch": 16.51, + "learning_rate": 8.371554538791284e-08, + "loss": 3.8066, + "step": 1487000 + }, + { + "epoch": 16.52, + "learning_rate": 8.370166359416297e-08, + "loss": 3.8076, + "step": 1487500 + }, + { + "epoch": 16.52, + "learning_rate": 8.368778180041312e-08, + "loss": 3.8176, + "step": 1488000 + }, + { + "epoch": 16.53, + "learning_rate": 8.367390000666326e-08, + "loss": 3.7983, + "step": 1488500 + }, + { + "epoch": 16.54, + "learning_rate": 8.36600182129134e-08, + "loss": 3.8272, + "step": 1489000 + }, + { + "epoch": 16.54, + "learning_rate": 8.364613641916354e-08, + "loss": 3.8164, + "step": 1489500 + }, + { + "epoch": 16.55, + "learning_rate": 8.363225462541368e-08, + "loss": 3.8167, + "step": 1490000 + }, + { + "epoch": 16.55, + "learning_rate": 8.361837283166381e-08, + "loss": 3.8066, + "step": 1490500 + }, + { + "epoch": 16.56, + "learning_rate": 8.360449103791396e-08, + "loss": 3.7905, + "step": 1491000 + }, + { + "epoch": 16.56, + "learning_rate": 8.359060924416408e-08, + "loss": 3.8025, + "step": 1491500 + }, + { + "epoch": 16.57, + "learning_rate": 8.357672745041423e-08, + "loss": 3.8094, + "step": 1492000 + }, + { + "epoch": 16.57, + "learning_rate": 8.356284565666437e-08, + "loss": 3.8193, + "step": 1492500 + }, + { + "epoch": 16.58, + "learning_rate": 8.35489638629145e-08, + "loss": 3.8276, + "step": 1493000 + }, + { + "epoch": 16.59, + "learning_rate": 8.353508206916464e-08, + "loss": 3.82, + "step": 1493500 + }, + { + "epoch": 16.59, + "learning_rate": 8.352120027541478e-08, + "loss": 3.8219, + "step": 1494000 + }, + { + "epoch": 16.6, + "learning_rate": 8.350731848166492e-08, + "loss": 3.8418, + "step": 1494500 + }, + { + "epoch": 16.6, + "learning_rate": 8.349343668791507e-08, + "loss": 3.8105, + "step": 1495000 + }, + { + "epoch": 16.61, + "learning_rate": 8.347955489416521e-08, + "loss": 3.8177, + "step": 1495500 + }, + { + "epoch": 16.61, + "learning_rate": 8.346567310041534e-08, + "loss": 3.8042, + "step": 1496000 + }, + { + "epoch": 16.62, + "learning_rate": 8.345179130666548e-08, + "loss": 3.8262, + "step": 1496500 + }, + { + "epoch": 16.62, + "learning_rate": 8.343790951291561e-08, + "loss": 3.8134, + "step": 1497000 + }, + { + "epoch": 16.63, + "learning_rate": 8.342402771916575e-08, + "loss": 3.8195, + "step": 1497500 + }, + { + "epoch": 16.64, + "learning_rate": 8.34101459254159e-08, + "loss": 3.8213, + "step": 1498000 + }, + { + "epoch": 16.64, + "learning_rate": 8.339626413166604e-08, + "loss": 3.8041, + "step": 1498500 + }, + { + "epoch": 16.65, + "learning_rate": 8.338238233791617e-08, + "loss": 3.807, + "step": 1499000 + }, + { + "epoch": 16.65, + "learning_rate": 8.336850054416631e-08, + "loss": 3.808, + "step": 1499500 + }, + { + "epoch": 16.66, + "learning_rate": 8.335461875041645e-08, + "loss": 3.8179, + "step": 1500000 + }, + { + "epoch": 16.66, + "learning_rate": 8.334073695666659e-08, + "loss": 3.8197, + "step": 1500500 + }, + { + "epoch": 16.67, + "learning_rate": 8.332685516291673e-08, + "loss": 3.8118, + "step": 1501000 + }, + { + "epoch": 16.67, + "learning_rate": 8.331297336916686e-08, + "loss": 3.8238, + "step": 1501500 + }, + { + "epoch": 16.68, + "learning_rate": 8.3299091575417e-08, + "loss": 3.8, + "step": 1502000 + }, + { + "epoch": 16.69, + "learning_rate": 8.328520978166715e-08, + "loss": 3.8048, + "step": 1502500 + }, + { + "epoch": 16.69, + "learning_rate": 8.327132798791728e-08, + "loss": 3.821, + "step": 1503000 + }, + { + "epoch": 16.7, + "learning_rate": 8.325744619416742e-08, + "loss": 3.8155, + "step": 1503500 + }, + { + "epoch": 16.7, + "learning_rate": 8.324356440041756e-08, + "loss": 3.819, + "step": 1504000 + }, + { + "epoch": 16.71, + "learning_rate": 8.322968260666769e-08, + "loss": 3.8134, + "step": 1504500 + }, + { + "epoch": 16.71, + "learning_rate": 8.321580081291783e-08, + "loss": 3.8152, + "step": 1505000 + }, + { + "epoch": 16.72, + "learning_rate": 8.320191901916798e-08, + "loss": 3.806, + "step": 1505500 + }, + { + "epoch": 16.72, + "learning_rate": 8.318803722541812e-08, + "loss": 3.8079, + "step": 1506000 + }, + { + "epoch": 16.73, + "learning_rate": 8.317415543166826e-08, + "loss": 3.8318, + "step": 1506500 + }, + { + "epoch": 16.74, + "learning_rate": 8.31602736379184e-08, + "loss": 3.8015, + "step": 1507000 + }, + { + "epoch": 16.74, + "learning_rate": 8.314639184416853e-08, + "loss": 3.8231, + "step": 1507500 + }, + { + "epoch": 16.75, + "learning_rate": 8.313251005041867e-08, + "loss": 3.8106, + "step": 1508000 + }, + { + "epoch": 16.75, + "learning_rate": 8.311862825666882e-08, + "loss": 3.8139, + "step": 1508500 + }, + { + "epoch": 16.76, + "learning_rate": 8.310474646291894e-08, + "loss": 3.8296, + "step": 1509000 + }, + { + "epoch": 16.76, + "learning_rate": 8.309086466916909e-08, + "loss": 3.8036, + "step": 1509500 + }, + { + "epoch": 16.77, + "learning_rate": 8.307698287541922e-08, + "loss": 3.8279, + "step": 1510000 + }, + { + "epoch": 16.77, + "learning_rate": 8.306310108166936e-08, + "loss": 3.8196, + "step": 1510500 + }, + { + "epoch": 16.78, + "learning_rate": 8.30492192879195e-08, + "loss": 3.8155, + "step": 1511000 + }, + { + "epoch": 16.79, + "learning_rate": 8.303533749416964e-08, + "loss": 3.81, + "step": 1511500 + }, + { + "epoch": 16.79, + "learning_rate": 8.302145570041978e-08, + "loss": 3.8034, + "step": 1512000 + }, + { + "epoch": 16.8, + "learning_rate": 8.300757390666993e-08, + "loss": 3.8207, + "step": 1512500 + }, + { + "epoch": 16.8, + "learning_rate": 8.299369211292006e-08, + "loss": 3.8107, + "step": 1513000 + }, + { + "epoch": 16.81, + "learning_rate": 8.29798103191702e-08, + "loss": 3.8125, + "step": 1513500 + }, + { + "epoch": 16.81, + "learning_rate": 8.296592852542034e-08, + "loss": 3.8347, + "step": 1514000 + }, + { + "epoch": 16.82, + "learning_rate": 8.295204673167048e-08, + "loss": 3.8297, + "step": 1514500 + }, + { + "epoch": 16.82, + "learning_rate": 8.293816493792061e-08, + "loss": 3.8204, + "step": 1515000 + }, + { + "epoch": 16.83, + "learning_rate": 8.292428314417075e-08, + "loss": 3.82, + "step": 1515500 + }, + { + "epoch": 16.84, + "learning_rate": 8.291040135042088e-08, + "loss": 3.8202, + "step": 1516000 + }, + { + "epoch": 16.84, + "learning_rate": 8.289651955667103e-08, + "loss": 3.8214, + "step": 1516500 + }, + { + "epoch": 16.85, + "learning_rate": 8.288263776292117e-08, + "loss": 3.7997, + "step": 1517000 + }, + { + "epoch": 16.85, + "learning_rate": 8.286875596917131e-08, + "loss": 3.8186, + "step": 1517500 + }, + { + "epoch": 16.86, + "learning_rate": 8.285487417542145e-08, + "loss": 3.81, + "step": 1518000 + }, + { + "epoch": 16.86, + "learning_rate": 8.284099238167158e-08, + "loss": 3.8207, + "step": 1518500 + }, + { + "epoch": 16.87, + "learning_rate": 8.282711058792172e-08, + "loss": 3.8071, + "step": 1519000 + }, + { + "epoch": 16.87, + "learning_rate": 8.281322879417187e-08, + "loss": 3.8247, + "step": 1519500 + }, + { + "epoch": 16.88, + "learning_rate": 8.279934700042201e-08, + "loss": 3.8202, + "step": 1520000 + }, + { + "epoch": 16.89, + "learning_rate": 8.278546520667215e-08, + "loss": 3.8027, + "step": 1520500 + }, + { + "epoch": 16.89, + "learning_rate": 8.277158341292228e-08, + "loss": 3.8041, + "step": 1521000 + }, + { + "epoch": 16.9, + "learning_rate": 8.275770161917242e-08, + "loss": 3.8283, + "step": 1521500 + }, + { + "epoch": 16.9, + "learning_rate": 8.274381982542255e-08, + "loss": 3.8106, + "step": 1522000 + }, + { + "epoch": 16.91, + "learning_rate": 8.272993803167269e-08, + "loss": 3.8153, + "step": 1522500 + }, + { + "epoch": 16.91, + "learning_rate": 8.271605623792284e-08, + "loss": 3.8071, + "step": 1523000 + }, + { + "epoch": 16.92, + "learning_rate": 8.270217444417298e-08, + "loss": 3.8048, + "step": 1523500 + }, + { + "epoch": 16.92, + "learning_rate": 8.268829265042311e-08, + "loss": 3.8246, + "step": 1524000 + }, + { + "epoch": 16.93, + "learning_rate": 8.267441085667325e-08, + "loss": 3.8208, + "step": 1524500 + }, + { + "epoch": 16.94, + "learning_rate": 8.266052906292339e-08, + "loss": 3.8201, + "step": 1525000 + }, + { + "epoch": 16.94, + "learning_rate": 8.264664726917353e-08, + "loss": 3.8069, + "step": 1525500 + }, + { + "epoch": 16.95, + "learning_rate": 8.263276547542368e-08, + "loss": 3.8019, + "step": 1526000 + }, + { + "epoch": 16.95, + "learning_rate": 8.261888368167382e-08, + "loss": 3.8163, + "step": 1526500 + }, + { + "epoch": 16.96, + "learning_rate": 8.260500188792395e-08, + "loss": 3.7999, + "step": 1527000 + }, + { + "epoch": 16.96, + "learning_rate": 8.259112009417408e-08, + "loss": 3.8106, + "step": 1527500 + }, + { + "epoch": 16.97, + "learning_rate": 8.257723830042422e-08, + "loss": 3.7933, + "step": 1528000 + }, + { + "epoch": 16.97, + "learning_rate": 8.256335650667436e-08, + "loss": 3.8071, + "step": 1528500 + }, + { + "epoch": 16.98, + "learning_rate": 8.25494747129245e-08, + "loss": 3.7952, + "step": 1529000 + }, + { + "epoch": 16.99, + "learning_rate": 8.253559291917465e-08, + "loss": 3.7952, + "step": 1529500 + }, + { + "epoch": 16.99, + "learning_rate": 8.252171112542477e-08, + "loss": 3.8094, + "step": 1530000 + }, + { + "epoch": 17.0, + "learning_rate": 8.250782933167492e-08, + "loss": 3.805, + "step": 1530500 + }, + { + "epoch": 17.0, + "eval_loss": 3.8627829551696777, + "eval_runtime": 6.3079, + "eval_samples_per_second": 246.358, + "step": 1530782 + }, + { + "epoch": 17.0, + "learning_rate": 8.249394753792506e-08, + "loss": 3.8162, + "step": 1531000 + }, + { + "epoch": 17.01, + "learning_rate": 8.24800657441752e-08, + "loss": 3.8088, + "step": 1531500 + }, + { + "epoch": 17.01, + "learning_rate": 8.246618395042534e-08, + "loss": 3.8008, + "step": 1532000 + }, + { + "epoch": 17.02, + "learning_rate": 8.245230215667547e-08, + "loss": 3.8112, + "step": 1532500 + }, + { + "epoch": 17.02, + "learning_rate": 8.243842036292561e-08, + "loss": 3.8323, + "step": 1533000 + }, + { + "epoch": 17.03, + "learning_rate": 8.242453856917574e-08, + "loss": 3.7973, + "step": 1533500 + }, + { + "epoch": 17.04, + "learning_rate": 8.241065677542589e-08, + "loss": 3.7927, + "step": 1534000 + }, + { + "epoch": 17.04, + "learning_rate": 8.239677498167603e-08, + "loss": 3.8046, + "step": 1534500 + }, + { + "epoch": 17.05, + "learning_rate": 8.238289318792617e-08, + "loss": 3.794, + "step": 1535000 + }, + { + "epoch": 17.05, + "learning_rate": 8.23690113941763e-08, + "loss": 3.8122, + "step": 1535500 + }, + { + "epoch": 17.06, + "learning_rate": 8.235512960042644e-08, + "loss": 3.8133, + "step": 1536000 + }, + { + "epoch": 17.06, + "learning_rate": 8.234124780667658e-08, + "loss": 3.8241, + "step": 1536500 + }, + { + "epoch": 17.07, + "learning_rate": 8.232736601292673e-08, + "loss": 3.802, + "step": 1537000 + }, + { + "epoch": 17.07, + "learning_rate": 8.231348421917687e-08, + "loss": 3.8265, + "step": 1537500 + }, + { + "epoch": 17.08, + "learning_rate": 8.2299602425427e-08, + "loss": 3.8211, + "step": 1538000 + }, + { + "epoch": 17.09, + "learning_rate": 8.228572063167714e-08, + "loss": 3.7952, + "step": 1538500 + }, + { + "epoch": 17.09, + "learning_rate": 8.227183883792728e-08, + "loss": 3.8058, + "step": 1539000 + }, + { + "epoch": 17.1, + "learning_rate": 8.225795704417741e-08, + "loss": 3.7884, + "step": 1539500 + }, + { + "epoch": 17.1, + "learning_rate": 8.224407525042755e-08, + "loss": 3.8228, + "step": 1540000 + }, + { + "epoch": 17.11, + "learning_rate": 8.22301934566777e-08, + "loss": 3.8158, + "step": 1540500 + }, + { + "epoch": 17.11, + "learning_rate": 8.221631166292782e-08, + "loss": 3.8083, + "step": 1541000 + }, + { + "epoch": 17.12, + "learning_rate": 8.220242986917797e-08, + "loss": 3.8341, + "step": 1541500 + }, + { + "epoch": 17.12, + "learning_rate": 8.218854807542811e-08, + "loss": 3.7961, + "step": 1542000 + }, + { + "epoch": 17.13, + "learning_rate": 8.217466628167825e-08, + "loss": 3.8178, + "step": 1542500 + }, + { + "epoch": 17.14, + "learning_rate": 8.21607844879284e-08, + "loss": 3.802, + "step": 1543000 + }, + { + "epoch": 17.14, + "learning_rate": 8.214690269417854e-08, + "loss": 3.8048, + "step": 1543500 + }, + { + "epoch": 17.15, + "learning_rate": 8.213302090042866e-08, + "loss": 3.8061, + "step": 1544000 + }, + { + "epoch": 17.15, + "learning_rate": 8.211913910667881e-08, + "loss": 3.8264, + "step": 1544500 + }, + { + "epoch": 17.16, + "learning_rate": 8.210525731292895e-08, + "loss": 3.813, + "step": 1545000 + }, + { + "epoch": 17.16, + "learning_rate": 8.209137551917908e-08, + "loss": 3.7973, + "step": 1545500 + }, + { + "epoch": 17.17, + "learning_rate": 8.207749372542922e-08, + "loss": 3.8024, + "step": 1546000 + }, + { + "epoch": 17.17, + "learning_rate": 8.206361193167935e-08, + "loss": 3.8042, + "step": 1546500 + }, + { + "epoch": 17.18, + "learning_rate": 8.204973013792949e-08, + "loss": 3.803, + "step": 1547000 + }, + { + "epoch": 17.19, + "learning_rate": 8.203584834417963e-08, + "loss": 3.8072, + "step": 1547500 + }, + { + "epoch": 17.19, + "learning_rate": 8.202196655042978e-08, + "loss": 3.8096, + "step": 1548000 + }, + { + "epoch": 17.2, + "learning_rate": 8.200808475667992e-08, + "loss": 3.8117, + "step": 1548500 + }, + { + "epoch": 17.2, + "learning_rate": 8.199420296293006e-08, + "loss": 3.8039, + "step": 1549000 + }, + { + "epoch": 17.21, + "learning_rate": 8.198032116918019e-08, + "loss": 3.7965, + "step": 1549500 + }, + { + "epoch": 17.21, + "learning_rate": 8.196643937543033e-08, + "loss": 3.7996, + "step": 1550000 + }, + { + "epoch": 17.22, + "learning_rate": 8.195255758168047e-08, + "loss": 3.8054, + "step": 1550500 + }, + { + "epoch": 17.22, + "learning_rate": 8.193867578793062e-08, + "loss": 3.802, + "step": 1551000 + }, + { + "epoch": 17.23, + "learning_rate": 8.192479399418075e-08, + "loss": 3.7946, + "step": 1551500 + }, + { + "epoch": 17.24, + "learning_rate": 8.191091220043089e-08, + "loss": 3.8046, + "step": 1552000 + }, + { + "epoch": 17.24, + "learning_rate": 8.189703040668102e-08, + "loss": 3.8187, + "step": 1552500 + }, + { + "epoch": 17.25, + "learning_rate": 8.188314861293116e-08, + "loss": 3.8068, + "step": 1553000 + }, + { + "epoch": 17.25, + "learning_rate": 8.18692668191813e-08, + "loss": 3.8058, + "step": 1553500 + }, + { + "epoch": 17.26, + "learning_rate": 8.185538502543144e-08, + "loss": 3.8246, + "step": 1554000 + }, + { + "epoch": 17.26, + "learning_rate": 8.184150323168159e-08, + "loss": 3.8019, + "step": 1554500 + }, + { + "epoch": 17.27, + "learning_rate": 8.182762143793172e-08, + "loss": 3.8083, + "step": 1555000 + }, + { + "epoch": 17.27, + "learning_rate": 8.181373964418186e-08, + "loss": 3.8115, + "step": 1555500 + }, + { + "epoch": 17.28, + "learning_rate": 8.1799857850432e-08, + "loss": 3.8036, + "step": 1556000 + }, + { + "epoch": 17.29, + "learning_rate": 8.178597605668214e-08, + "loss": 3.7998, + "step": 1556500 + }, + { + "epoch": 17.29, + "learning_rate": 8.177209426293228e-08, + "loss": 3.794, + "step": 1557000 + }, + { + "epoch": 17.3, + "learning_rate": 8.175821246918241e-08, + "loss": 3.8139, + "step": 1557500 + }, + { + "epoch": 17.3, + "learning_rate": 8.174433067543254e-08, + "loss": 3.7824, + "step": 1558000 + }, + { + "epoch": 17.31, + "learning_rate": 8.173044888168268e-08, + "loss": 3.8173, + "step": 1558500 + }, + { + "epoch": 17.31, + "learning_rate": 8.171656708793283e-08, + "loss": 3.809, + "step": 1559000 + }, + { + "epoch": 17.32, + "learning_rate": 8.170268529418297e-08, + "loss": 3.8032, + "step": 1559500 + }, + { + "epoch": 17.32, + "learning_rate": 8.168880350043311e-08, + "loss": 3.8012, + "step": 1560000 + }, + { + "epoch": 17.33, + "learning_rate": 8.167492170668324e-08, + "loss": 3.8079, + "step": 1560500 + }, + { + "epoch": 17.34, + "learning_rate": 8.166103991293338e-08, + "loss": 3.8075, + "step": 1561000 + }, + { + "epoch": 17.34, + "learning_rate": 8.164715811918353e-08, + "loss": 3.7996, + "step": 1561500 + }, + { + "epoch": 17.35, + "learning_rate": 8.163327632543367e-08, + "loss": 3.825, + "step": 1562000 + }, + { + "epoch": 17.35, + "learning_rate": 8.161939453168381e-08, + "loss": 3.8108, + "step": 1562500 + }, + { + "epoch": 17.36, + "learning_rate": 8.160551273793395e-08, + "loss": 3.8177, + "step": 1563000 + }, + { + "epoch": 17.36, + "learning_rate": 8.159163094418408e-08, + "loss": 3.8176, + "step": 1563500 + }, + { + "epoch": 17.37, + "learning_rate": 8.157774915043421e-08, + "loss": 3.8287, + "step": 1564000 + }, + { + "epoch": 17.37, + "learning_rate": 8.156386735668435e-08, + "loss": 3.8034, + "step": 1564500 + }, + { + "epoch": 17.38, + "learning_rate": 8.15499855629345e-08, + "loss": 3.8146, + "step": 1565000 + }, + { + "epoch": 17.39, + "learning_rate": 8.153610376918464e-08, + "loss": 3.809, + "step": 1565500 + }, + { + "epoch": 17.39, + "learning_rate": 8.152222197543478e-08, + "loss": 3.8231, + "step": 1566000 + }, + { + "epoch": 17.4, + "learning_rate": 8.150834018168491e-08, + "loss": 3.8085, + "step": 1566500 + }, + { + "epoch": 17.4, + "learning_rate": 8.149445838793505e-08, + "loss": 3.8029, + "step": 1567000 + }, + { + "epoch": 17.41, + "learning_rate": 8.148057659418519e-08, + "loss": 3.821, + "step": 1567500 + }, + { + "epoch": 17.41, + "learning_rate": 8.146669480043533e-08, + "loss": 3.8029, + "step": 1568000 + }, + { + "epoch": 17.42, + "learning_rate": 8.145281300668548e-08, + "loss": 3.807, + "step": 1568500 + }, + { + "epoch": 17.42, + "learning_rate": 8.14389312129356e-08, + "loss": 3.7985, + "step": 1569000 + }, + { + "epoch": 17.43, + "learning_rate": 8.142504941918575e-08, + "loss": 3.8087, + "step": 1569500 + }, + { + "epoch": 17.44, + "learning_rate": 8.141116762543588e-08, + "loss": 3.8044, + "step": 1570000 + }, + { + "epoch": 17.44, + "learning_rate": 8.139728583168602e-08, + "loss": 3.8027, + "step": 1570500 + }, + { + "epoch": 17.45, + "learning_rate": 8.138340403793616e-08, + "loss": 3.7967, + "step": 1571000 + }, + { + "epoch": 17.45, + "learning_rate": 8.13695222441863e-08, + "loss": 3.8133, + "step": 1571500 + }, + { + "epoch": 17.46, + "learning_rate": 8.135564045043643e-08, + "loss": 3.8169, + "step": 1572000 + }, + { + "epoch": 17.46, + "learning_rate": 8.134175865668658e-08, + "loss": 3.8119, + "step": 1572500 + }, + { + "epoch": 17.47, + "learning_rate": 8.132787686293672e-08, + "loss": 3.8137, + "step": 1573000 + }, + { + "epoch": 17.47, + "learning_rate": 8.131399506918686e-08, + "loss": 3.8217, + "step": 1573500 + }, + { + "epoch": 17.48, + "learning_rate": 8.1300113275437e-08, + "loss": 3.8088, + "step": 1574000 + }, + { + "epoch": 17.49, + "learning_rate": 8.128623148168713e-08, + "loss": 3.8072, + "step": 1574500 + }, + { + "epoch": 17.49, + "learning_rate": 8.127234968793727e-08, + "loss": 3.8164, + "step": 1575000 + }, + { + "epoch": 17.5, + "learning_rate": 8.125846789418742e-08, + "loss": 3.8114, + "step": 1575500 + }, + { + "epoch": 17.5, + "learning_rate": 8.124458610043754e-08, + "loss": 3.8284, + "step": 1576000 + }, + { + "epoch": 17.51, + "learning_rate": 8.123070430668769e-08, + "loss": 3.8031, + "step": 1576500 + }, + { + "epoch": 17.51, + "learning_rate": 8.121682251293783e-08, + "loss": 3.8156, + "step": 1577000 + }, + { + "epoch": 17.52, + "learning_rate": 8.120294071918796e-08, + "loss": 3.8122, + "step": 1577500 + }, + { + "epoch": 17.52, + "learning_rate": 8.11890589254381e-08, + "loss": 3.806, + "step": 1578000 + }, + { + "epoch": 17.53, + "learning_rate": 8.117517713168824e-08, + "loss": 3.8084, + "step": 1578500 + }, + { + "epoch": 17.54, + "learning_rate": 8.116129533793839e-08, + "loss": 3.8027, + "step": 1579000 + }, + { + "epoch": 17.54, + "learning_rate": 8.114741354418853e-08, + "loss": 3.826, + "step": 1579500 + }, + { + "epoch": 17.55, + "learning_rate": 8.113353175043867e-08, + "loss": 3.8197, + "step": 1580000 + }, + { + "epoch": 17.55, + "learning_rate": 8.11196499566888e-08, + "loss": 3.7976, + "step": 1580500 + }, + { + "epoch": 17.56, + "learning_rate": 8.110576816293894e-08, + "loss": 3.8124, + "step": 1581000 + }, + { + "epoch": 17.56, + "learning_rate": 8.109188636918908e-08, + "loss": 3.7982, + "step": 1581500 + }, + { + "epoch": 17.57, + "learning_rate": 8.107800457543921e-08, + "loss": 3.8033, + "step": 1582000 + }, + { + "epoch": 17.57, + "learning_rate": 8.106412278168935e-08, + "loss": 3.8034, + "step": 1582500 + }, + { + "epoch": 17.58, + "learning_rate": 8.105024098793948e-08, + "loss": 3.8005, + "step": 1583000 + }, + { + "epoch": 17.59, + "learning_rate": 8.103635919418963e-08, + "loss": 3.8113, + "step": 1583500 + }, + { + "epoch": 17.59, + "learning_rate": 8.102247740043977e-08, + "loss": 3.8051, + "step": 1584000 + }, + { + "epoch": 17.6, + "learning_rate": 8.100859560668991e-08, + "loss": 3.8114, + "step": 1584500 + }, + { + "epoch": 17.6, + "learning_rate": 8.099471381294005e-08, + "loss": 3.7973, + "step": 1585000 + }, + { + "epoch": 17.61, + "learning_rate": 8.09808320191902e-08, + "loss": 3.8058, + "step": 1585500 + }, + { + "epoch": 17.61, + "learning_rate": 8.096695022544032e-08, + "loss": 3.8052, + "step": 1586000 + }, + { + "epoch": 17.62, + "learning_rate": 8.095306843169047e-08, + "loss": 3.8029, + "step": 1586500 + }, + { + "epoch": 17.62, + "learning_rate": 8.093918663794061e-08, + "loss": 3.8095, + "step": 1587000 + }, + { + "epoch": 17.63, + "learning_rate": 8.092530484419075e-08, + "loss": 3.7888, + "step": 1587500 + }, + { + "epoch": 17.64, + "learning_rate": 8.091142305044088e-08, + "loss": 3.8118, + "step": 1588000 + }, + { + "epoch": 17.64, + "learning_rate": 8.089754125669102e-08, + "loss": 3.8101, + "step": 1588500 + }, + { + "epoch": 17.65, + "learning_rate": 8.088365946294115e-08, + "loss": 3.8108, + "step": 1589000 + }, + { + "epoch": 17.65, + "learning_rate": 8.086977766919129e-08, + "loss": 3.8253, + "step": 1589500 + }, + { + "epoch": 17.66, + "learning_rate": 8.085589587544144e-08, + "loss": 3.7959, + "step": 1590000 + }, + { + "epoch": 17.66, + "learning_rate": 8.084201408169158e-08, + "loss": 3.8004, + "step": 1590500 + }, + { + "epoch": 17.67, + "learning_rate": 8.082813228794172e-08, + "loss": 3.8229, + "step": 1591000 + }, + { + "epoch": 17.67, + "learning_rate": 8.081425049419185e-08, + "loss": 3.7977, + "step": 1591500 + }, + { + "epoch": 17.68, + "learning_rate": 8.080036870044199e-08, + "loss": 3.8166, + "step": 1592000 + }, + { + "epoch": 17.69, + "learning_rate": 8.078648690669213e-08, + "loss": 3.8166, + "step": 1592500 + }, + { + "epoch": 17.69, + "learning_rate": 8.077260511294228e-08, + "loss": 3.8313, + "step": 1593000 + }, + { + "epoch": 17.7, + "learning_rate": 8.075872331919242e-08, + "loss": 3.8007, + "step": 1593500 + }, + { + "epoch": 17.7, + "learning_rate": 8.074484152544255e-08, + "loss": 3.8039, + "step": 1594000 + }, + { + "epoch": 17.71, + "learning_rate": 8.073095973169268e-08, + "loss": 3.8128, + "step": 1594500 + }, + { + "epoch": 17.71, + "learning_rate": 8.071707793794282e-08, + "loss": 3.7996, + "step": 1595000 + }, + { + "epoch": 17.72, + "learning_rate": 8.070319614419296e-08, + "loss": 3.8031, + "step": 1595500 + }, + { + "epoch": 17.72, + "learning_rate": 8.06893143504431e-08, + "loss": 3.8277, + "step": 1596000 + }, + { + "epoch": 17.73, + "learning_rate": 8.067543255669325e-08, + "loss": 3.8008, + "step": 1596500 + }, + { + "epoch": 17.74, + "learning_rate": 8.066155076294337e-08, + "loss": 3.8132, + "step": 1597000 + }, + { + "epoch": 17.74, + "learning_rate": 8.064766896919352e-08, + "loss": 3.8232, + "step": 1597500 + }, + { + "epoch": 17.75, + "learning_rate": 8.063378717544366e-08, + "loss": 3.8208, + "step": 1598000 + }, + { + "epoch": 17.75, + "learning_rate": 8.06199053816938e-08, + "loss": 3.8144, + "step": 1598500 + }, + { + "epoch": 17.76, + "learning_rate": 8.060602358794394e-08, + "loss": 3.825, + "step": 1599000 + }, + { + "epoch": 17.76, + "learning_rate": 8.059214179419409e-08, + "loss": 3.8091, + "step": 1599500 + }, + { + "epoch": 17.77, + "learning_rate": 8.057826000044421e-08, + "loss": 3.8149, + "step": 1600000 + }, + { + "epoch": 17.77, + "learning_rate": 8.056437820669434e-08, + "loss": 3.8123, + "step": 1600500 + }, + { + "epoch": 17.78, + "learning_rate": 8.055049641294449e-08, + "loss": 3.8083, + "step": 1601000 + }, + { + "epoch": 17.79, + "learning_rate": 8.053661461919463e-08, + "loss": 3.8047, + "step": 1601500 + }, + { + "epoch": 17.79, + "learning_rate": 8.052273282544477e-08, + "loss": 3.8127, + "step": 1602000 + }, + { + "epoch": 17.8, + "learning_rate": 8.050885103169491e-08, + "loss": 3.7996, + "step": 1602500 + }, + { + "epoch": 17.8, + "learning_rate": 8.049496923794504e-08, + "loss": 3.8156, + "step": 1603000 + }, + { + "epoch": 17.81, + "learning_rate": 8.048108744419518e-08, + "loss": 3.8025, + "step": 1603500 + }, + { + "epoch": 17.81, + "learning_rate": 8.046720565044533e-08, + "loss": 3.8005, + "step": 1604000 + }, + { + "epoch": 17.82, + "learning_rate": 8.045332385669547e-08, + "loss": 3.8118, + "step": 1604500 + }, + { + "epoch": 17.82, + "learning_rate": 8.043944206294561e-08, + "loss": 3.8133, + "step": 1605000 + }, + { + "epoch": 17.83, + "learning_rate": 8.042556026919574e-08, + "loss": 3.8172, + "step": 1605500 + }, + { + "epoch": 17.84, + "learning_rate": 8.041167847544588e-08, + "loss": 3.7863, + "step": 1606000 + }, + { + "epoch": 17.84, + "learning_rate": 8.039779668169601e-08, + "loss": 3.7791, + "step": 1606500 + }, + { + "epoch": 17.85, + "learning_rate": 8.038391488794615e-08, + "loss": 3.7922, + "step": 1607000 + }, + { + "epoch": 17.85, + "learning_rate": 8.03700330941963e-08, + "loss": 3.7973, + "step": 1607500 + }, + { + "epoch": 17.86, + "learning_rate": 8.035615130044644e-08, + "loss": 3.8094, + "step": 1608000 + }, + { + "epoch": 17.86, + "learning_rate": 8.034226950669657e-08, + "loss": 3.7928, + "step": 1608500 + }, + { + "epoch": 17.87, + "learning_rate": 8.032838771294671e-08, + "loss": 3.8039, + "step": 1609000 + }, + { + "epoch": 17.87, + "learning_rate": 8.031450591919685e-08, + "loss": 3.7949, + "step": 1609500 + }, + { + "epoch": 17.88, + "learning_rate": 8.0300624125447e-08, + "loss": 3.7983, + "step": 1610000 + }, + { + "epoch": 17.89, + "learning_rate": 8.028674233169714e-08, + "loss": 3.8129, + "step": 1610500 + }, + { + "epoch": 17.89, + "learning_rate": 8.027286053794727e-08, + "loss": 3.8007, + "step": 1611000 + }, + { + "epoch": 17.9, + "learning_rate": 8.025897874419741e-08, + "loss": 3.8029, + "step": 1611500 + }, + { + "epoch": 17.9, + "learning_rate": 8.024509695044755e-08, + "loss": 3.8091, + "step": 1612000 + }, + { + "epoch": 17.91, + "learning_rate": 8.023121515669768e-08, + "loss": 3.8038, + "step": 1612500 + }, + { + "epoch": 17.91, + "learning_rate": 8.021733336294782e-08, + "loss": 3.806, + "step": 1613000 + }, + { + "epoch": 17.92, + "learning_rate": 8.020345156919796e-08, + "loss": 3.8024, + "step": 1613500 + }, + { + "epoch": 17.92, + "learning_rate": 8.018956977544809e-08, + "loss": 3.801, + "step": 1614000 + }, + { + "epoch": 17.93, + "learning_rate": 8.017568798169823e-08, + "loss": 3.8097, + "step": 1614500 + }, + { + "epoch": 17.94, + "learning_rate": 8.016180618794838e-08, + "loss": 3.7985, + "step": 1615000 + }, + { + "epoch": 17.94, + "learning_rate": 8.014792439419852e-08, + "loss": 3.8051, + "step": 1615500 + }, + { + "epoch": 17.95, + "learning_rate": 8.013404260044866e-08, + "loss": 3.8034, + "step": 1616000 + }, + { + "epoch": 17.95, + "learning_rate": 8.01201608066988e-08, + "loss": 3.8052, + "step": 1616500 + }, + { + "epoch": 17.96, + "learning_rate": 8.010627901294893e-08, + "loss": 3.8057, + "step": 1617000 + }, + { + "epoch": 17.96, + "learning_rate": 8.009239721919907e-08, + "loss": 3.7846, + "step": 1617500 + }, + { + "epoch": 17.97, + "learning_rate": 8.007851542544922e-08, + "loss": 3.7851, + "step": 1618000 + }, + { + "epoch": 17.97, + "learning_rate": 8.006463363169935e-08, + "loss": 3.8106, + "step": 1618500 + }, + { + "epoch": 17.98, + "learning_rate": 8.005075183794949e-08, + "loss": 3.8231, + "step": 1619000 + }, + { + "epoch": 17.99, + "learning_rate": 8.003687004419962e-08, + "loss": 3.8266, + "step": 1619500 + }, + { + "epoch": 17.99, + "learning_rate": 8.002298825044976e-08, + "loss": 3.8091, + "step": 1620000 + }, + { + "epoch": 18.0, + "learning_rate": 8.00091064566999e-08, + "loss": 3.8298, + "step": 1620500 + }, + { + "epoch": 18.0, + "eval_loss": 3.8592498302459717, + "eval_runtime": 6.3075, + "eval_samples_per_second": 246.375, + "step": 1620828 + }, + { + "epoch": 18.0, + "learning_rate": 7.999522466295004e-08, + "loss": 3.8243, + "step": 1621000 + }, + { + "epoch": 18.01, + "learning_rate": 7.998134286920019e-08, + "loss": 3.8152, + "step": 1621500 + }, + { + "epoch": 18.01, + "learning_rate": 7.996746107545033e-08, + "loss": 3.793, + "step": 1622000 + }, + { + "epoch": 18.02, + "learning_rate": 7.995357928170046e-08, + "loss": 3.8098, + "step": 1622500 + }, + { + "epoch": 18.02, + "learning_rate": 7.99396974879506e-08, + "loss": 3.8123, + "step": 1623000 + }, + { + "epoch": 18.03, + "learning_rate": 7.992581569420074e-08, + "loss": 3.7995, + "step": 1623500 + }, + { + "epoch": 18.04, + "learning_rate": 7.991193390045088e-08, + "loss": 3.8055, + "step": 1624000 + }, + { + "epoch": 18.04, + "learning_rate": 7.989805210670101e-08, + "loss": 3.8155, + "step": 1624500 + }, + { + "epoch": 18.05, + "learning_rate": 7.988417031295116e-08, + "loss": 3.7985, + "step": 1625000 + }, + { + "epoch": 18.05, + "learning_rate": 7.987028851920128e-08, + "loss": 3.7892, + "step": 1625500 + }, + { + "epoch": 18.06, + "learning_rate": 7.985640672545143e-08, + "loss": 3.8237, + "step": 1626000 + }, + { + "epoch": 18.06, + "learning_rate": 7.984252493170157e-08, + "loss": 3.7959, + "step": 1626500 + }, + { + "epoch": 18.07, + "learning_rate": 7.982864313795171e-08, + "loss": 3.7915, + "step": 1627000 + }, + { + "epoch": 18.07, + "learning_rate": 7.981476134420185e-08, + "loss": 3.7881, + "step": 1627500 + }, + { + "epoch": 18.08, + "learning_rate": 7.980087955045198e-08, + "loss": 3.8044, + "step": 1628000 + }, + { + "epoch": 18.09, + "learning_rate": 7.978699775670213e-08, + "loss": 3.7891, + "step": 1628500 + }, + { + "epoch": 18.09, + "learning_rate": 7.977311596295227e-08, + "loss": 3.7869, + "step": 1629000 + }, + { + "epoch": 18.1, + "learning_rate": 7.975923416920241e-08, + "loss": 3.8062, + "step": 1629500 + }, + { + "epoch": 18.1, + "learning_rate": 7.974535237545255e-08, + "loss": 3.8081, + "step": 1630000 + }, + { + "epoch": 18.11, + "learning_rate": 7.973147058170268e-08, + "loss": 3.8021, + "step": 1630500 + }, + { + "epoch": 18.11, + "learning_rate": 7.971758878795281e-08, + "loss": 3.7914, + "step": 1631000 + }, + { + "epoch": 18.12, + "learning_rate": 7.970370699420295e-08, + "loss": 3.7964, + "step": 1631500 + }, + { + "epoch": 18.12, + "learning_rate": 7.96898252004531e-08, + "loss": 3.7964, + "step": 1632000 + }, + { + "epoch": 18.13, + "learning_rate": 7.967594340670324e-08, + "loss": 3.814, + "step": 1632500 + }, + { + "epoch": 18.14, + "learning_rate": 7.966206161295338e-08, + "loss": 3.8009, + "step": 1633000 + }, + { + "epoch": 18.14, + "learning_rate": 7.964817981920351e-08, + "loss": 3.8109, + "step": 1633500 + }, + { + "epoch": 18.15, + "learning_rate": 7.963429802545365e-08, + "loss": 3.8077, + "step": 1634000 + }, + { + "epoch": 18.15, + "learning_rate": 7.962041623170379e-08, + "loss": 3.8147, + "step": 1634500 + }, + { + "epoch": 18.16, + "learning_rate": 7.960653443795393e-08, + "loss": 3.8093, + "step": 1635000 + }, + { + "epoch": 18.16, + "learning_rate": 7.959265264420408e-08, + "loss": 3.8039, + "step": 1635500 + }, + { + "epoch": 18.17, + "learning_rate": 7.957877085045422e-08, + "loss": 3.8128, + "step": 1636000 + }, + { + "epoch": 18.17, + "learning_rate": 7.956488905670435e-08, + "loss": 3.7914, + "step": 1636500 + }, + { + "epoch": 18.18, + "learning_rate": 7.955100726295448e-08, + "loss": 3.798, + "step": 1637000 + }, + { + "epoch": 18.19, + "learning_rate": 7.953712546920462e-08, + "loss": 3.8156, + "step": 1637500 + }, + { + "epoch": 18.19, + "learning_rate": 7.952324367545476e-08, + "loss": 3.8045, + "step": 1638000 + }, + { + "epoch": 18.2, + "learning_rate": 7.95093618817049e-08, + "loss": 3.798, + "step": 1638500 + }, + { + "epoch": 18.2, + "learning_rate": 7.949548008795505e-08, + "loss": 3.7958, + "step": 1639000 + }, + { + "epoch": 18.21, + "learning_rate": 7.948159829420518e-08, + "loss": 3.7975, + "step": 1639500 + }, + { + "epoch": 18.21, + "learning_rate": 7.946771650045532e-08, + "loss": 3.8075, + "step": 1640000 + }, + { + "epoch": 18.22, + "learning_rate": 7.945383470670546e-08, + "loss": 3.7894, + "step": 1640500 + }, + { + "epoch": 18.22, + "learning_rate": 7.94399529129556e-08, + "loss": 3.8022, + "step": 1641000 + }, + { + "epoch": 18.23, + "learning_rate": 7.942607111920574e-08, + "loss": 3.8065, + "step": 1641500 + }, + { + "epoch": 18.24, + "learning_rate": 7.941218932545587e-08, + "loss": 3.8134, + "step": 1642000 + }, + { + "epoch": 18.24, + "learning_rate": 7.939830753170602e-08, + "loss": 3.7995, + "step": 1642500 + }, + { + "epoch": 18.25, + "learning_rate": 7.938442573795614e-08, + "loss": 3.8211, + "step": 1643000 + }, + { + "epoch": 18.25, + "learning_rate": 7.937054394420629e-08, + "loss": 3.8057, + "step": 1643500 + }, + { + "epoch": 18.26, + "learning_rate": 7.935666215045643e-08, + "loss": 3.8048, + "step": 1644000 + }, + { + "epoch": 18.26, + "learning_rate": 7.934278035670657e-08, + "loss": 3.8267, + "step": 1644500 + }, + { + "epoch": 18.27, + "learning_rate": 7.93288985629567e-08, + "loss": 3.8134, + "step": 1645000 + }, + { + "epoch": 18.27, + "learning_rate": 7.931501676920684e-08, + "loss": 3.7961, + "step": 1645500 + }, + { + "epoch": 18.28, + "learning_rate": 7.930113497545699e-08, + "loss": 3.8012, + "step": 1646000 + }, + { + "epoch": 18.29, + "learning_rate": 7.928725318170713e-08, + "loss": 3.8117, + "step": 1646500 + }, + { + "epoch": 18.29, + "learning_rate": 7.927337138795727e-08, + "loss": 3.8043, + "step": 1647000 + }, + { + "epoch": 18.3, + "learning_rate": 7.92594895942074e-08, + "loss": 3.8062, + "step": 1647500 + }, + { + "epoch": 18.3, + "learning_rate": 7.924560780045754e-08, + "loss": 3.7995, + "step": 1648000 + }, + { + "epoch": 18.31, + "learning_rate": 7.923172600670768e-08, + "loss": 3.7896, + "step": 1648500 + }, + { + "epoch": 18.31, + "learning_rate": 7.921784421295781e-08, + "loss": 3.7983, + "step": 1649000 + }, + { + "epoch": 18.32, + "learning_rate": 7.920396241920795e-08, + "loss": 3.8059, + "step": 1649500 + }, + { + "epoch": 18.32, + "learning_rate": 7.91900806254581e-08, + "loss": 3.7998, + "step": 1650000 + }, + { + "epoch": 18.33, + "learning_rate": 7.917619883170823e-08, + "loss": 3.8103, + "step": 1650500 + }, + { + "epoch": 18.34, + "learning_rate": 7.916231703795837e-08, + "loss": 3.8165, + "step": 1651000 + }, + { + "epoch": 18.34, + "learning_rate": 7.914843524420851e-08, + "loss": 3.8103, + "step": 1651500 + }, + { + "epoch": 18.35, + "learning_rate": 7.913455345045865e-08, + "loss": 3.7898, + "step": 1652000 + }, + { + "epoch": 18.35, + "learning_rate": 7.91206716567088e-08, + "loss": 3.8264, + "step": 1652500 + }, + { + "epoch": 18.36, + "learning_rate": 7.910678986295894e-08, + "loss": 3.801, + "step": 1653000 + }, + { + "epoch": 18.36, + "learning_rate": 7.909290806920907e-08, + "loss": 3.8075, + "step": 1653500 + }, + { + "epoch": 18.37, + "learning_rate": 7.907902627545921e-08, + "loss": 3.8058, + "step": 1654000 + }, + { + "epoch": 18.37, + "learning_rate": 7.906514448170935e-08, + "loss": 3.7893, + "step": 1654500 + }, + { + "epoch": 18.38, + "learning_rate": 7.905126268795948e-08, + "loss": 3.7998, + "step": 1655000 + }, + { + "epoch": 18.39, + "learning_rate": 7.903738089420962e-08, + "loss": 3.8315, + "step": 1655500 + }, + { + "epoch": 18.39, + "learning_rate": 7.902349910045975e-08, + "loss": 3.7957, + "step": 1656000 + }, + { + "epoch": 18.4, + "learning_rate": 7.90096173067099e-08, + "loss": 3.816, + "step": 1656500 + }, + { + "epoch": 18.4, + "learning_rate": 7.899573551296004e-08, + "loss": 3.7955, + "step": 1657000 + }, + { + "epoch": 18.41, + "learning_rate": 7.898185371921018e-08, + "loss": 3.8179, + "step": 1657500 + }, + { + "epoch": 18.41, + "learning_rate": 7.896797192546032e-08, + "loss": 3.7936, + "step": 1658000 + }, + { + "epoch": 18.42, + "learning_rate": 7.895409013171046e-08, + "loss": 3.8086, + "step": 1658500 + }, + { + "epoch": 18.42, + "learning_rate": 7.894020833796059e-08, + "loss": 3.8047, + "step": 1659000 + }, + { + "epoch": 18.43, + "learning_rate": 7.892632654421073e-08, + "loss": 3.7983, + "step": 1659500 + }, + { + "epoch": 18.44, + "learning_rate": 7.891244475046088e-08, + "loss": 3.8145, + "step": 1660000 + }, + { + "epoch": 18.44, + "learning_rate": 7.889856295671102e-08, + "loss": 3.7944, + "step": 1660500 + }, + { + "epoch": 18.45, + "learning_rate": 7.888468116296115e-08, + "loss": 3.8025, + "step": 1661000 + }, + { + "epoch": 18.45, + "learning_rate": 7.887079936921129e-08, + "loss": 3.79, + "step": 1661500 + }, + { + "epoch": 18.46, + "learning_rate": 7.885691757546142e-08, + "loss": 3.8295, + "step": 1662000 + }, + { + "epoch": 18.46, + "learning_rate": 7.884303578171156e-08, + "loss": 3.8068, + "step": 1662500 + }, + { + "epoch": 18.47, + "learning_rate": 7.88291539879617e-08, + "loss": 3.811, + "step": 1663000 + }, + { + "epoch": 18.47, + "learning_rate": 7.881527219421185e-08, + "loss": 3.7964, + "step": 1663500 + }, + { + "epoch": 18.48, + "learning_rate": 7.880139040046199e-08, + "loss": 3.8171, + "step": 1664000 + }, + { + "epoch": 18.48, + "learning_rate": 7.878750860671212e-08, + "loss": 3.8098, + "step": 1664500 + }, + { + "epoch": 18.49, + "learning_rate": 7.877362681296226e-08, + "loss": 3.7987, + "step": 1665000 + }, + { + "epoch": 18.5, + "learning_rate": 7.87597450192124e-08, + "loss": 3.8104, + "step": 1665500 + }, + { + "epoch": 18.5, + "learning_rate": 7.874586322546254e-08, + "loss": 3.8168, + "step": 1666000 + }, + { + "epoch": 18.51, + "learning_rate": 7.873198143171269e-08, + "loss": 3.8133, + "step": 1666500 + }, + { + "epoch": 18.51, + "learning_rate": 7.871809963796281e-08, + "loss": 3.799, + "step": 1667000 + }, + { + "epoch": 18.52, + "learning_rate": 7.870421784421294e-08, + "loss": 3.8096, + "step": 1667500 + }, + { + "epoch": 18.52, + "learning_rate": 7.869033605046309e-08, + "loss": 3.8316, + "step": 1668000 + }, + { + "epoch": 18.53, + "learning_rate": 7.867645425671323e-08, + "loss": 3.7886, + "step": 1668500 + }, + { + "epoch": 18.53, + "learning_rate": 7.866257246296337e-08, + "loss": 3.8021, + "step": 1669000 + }, + { + "epoch": 18.54, + "learning_rate": 7.864869066921351e-08, + "loss": 3.7834, + "step": 1669500 + }, + { + "epoch": 18.55, + "learning_rate": 7.863480887546364e-08, + "loss": 3.8008, + "step": 1670000 + }, + { + "epoch": 18.55, + "learning_rate": 7.862092708171378e-08, + "loss": 3.8158, + "step": 1670500 + }, + { + "epoch": 18.56, + "learning_rate": 7.860704528796393e-08, + "loss": 3.7952, + "step": 1671000 + }, + { + "epoch": 18.56, + "learning_rate": 7.859316349421407e-08, + "loss": 3.8093, + "step": 1671500 + }, + { + "epoch": 18.57, + "learning_rate": 7.857928170046421e-08, + "loss": 3.8114, + "step": 1672000 + }, + { + "epoch": 18.57, + "learning_rate": 7.856539990671435e-08, + "loss": 3.7972, + "step": 1672500 + }, + { + "epoch": 18.58, + "learning_rate": 7.855151811296448e-08, + "loss": 3.7824, + "step": 1673000 + }, + { + "epoch": 18.58, + "learning_rate": 7.853763631921461e-08, + "loss": 3.7884, + "step": 1673500 + }, + { + "epoch": 18.59, + "learning_rate": 7.852375452546475e-08, + "loss": 3.8032, + "step": 1674000 + }, + { + "epoch": 18.6, + "learning_rate": 7.85098727317149e-08, + "loss": 3.8057, + "step": 1674500 + }, + { + "epoch": 18.6, + "learning_rate": 7.849599093796504e-08, + "loss": 3.8021, + "step": 1675000 + }, + { + "epoch": 18.61, + "learning_rate": 7.848210914421518e-08, + "loss": 3.8104, + "step": 1675500 + }, + { + "epoch": 18.61, + "learning_rate": 7.846822735046531e-08, + "loss": 3.8176, + "step": 1676000 + }, + { + "epoch": 18.62, + "learning_rate": 7.845434555671545e-08, + "loss": 3.7965, + "step": 1676500 + }, + { + "epoch": 18.62, + "learning_rate": 7.84404637629656e-08, + "loss": 3.7893, + "step": 1677000 + }, + { + "epoch": 18.63, + "learning_rate": 7.842658196921574e-08, + "loss": 3.8028, + "step": 1677500 + }, + { + "epoch": 18.63, + "learning_rate": 7.841270017546588e-08, + "loss": 3.8143, + "step": 1678000 + }, + { + "epoch": 18.64, + "learning_rate": 7.839881838171601e-08, + "loss": 3.7986, + "step": 1678500 + }, + { + "epoch": 18.65, + "learning_rate": 7.838493658796615e-08, + "loss": 3.8016, + "step": 1679000 + }, + { + "epoch": 18.65, + "learning_rate": 7.837105479421628e-08, + "loss": 3.7878, + "step": 1679500 + }, + { + "epoch": 18.66, + "learning_rate": 7.835717300046642e-08, + "loss": 3.8143, + "step": 1680000 + }, + { + "epoch": 18.66, + "learning_rate": 7.834329120671656e-08, + "loss": 3.8011, + "step": 1680500 + }, + { + "epoch": 18.67, + "learning_rate": 7.83294094129667e-08, + "loss": 3.8107, + "step": 1681000 + }, + { + "epoch": 18.67, + "learning_rate": 7.831552761921683e-08, + "loss": 3.8064, + "step": 1681500 + }, + { + "epoch": 18.68, + "learning_rate": 7.830164582546698e-08, + "loss": 3.8115, + "step": 1682000 + }, + { + "epoch": 18.68, + "learning_rate": 7.828776403171712e-08, + "loss": 3.7995, + "step": 1682500 + }, + { + "epoch": 18.69, + "learning_rate": 7.827388223796726e-08, + "loss": 3.7899, + "step": 1683000 + }, + { + "epoch": 18.7, + "learning_rate": 7.82600004442174e-08, + "loss": 3.8011, + "step": 1683500 + }, + { + "epoch": 18.7, + "learning_rate": 7.824611865046755e-08, + "loss": 3.8036, + "step": 1684000 + }, + { + "epoch": 18.71, + "learning_rate": 7.823223685671767e-08, + "loss": 3.7953, + "step": 1684500 + }, + { + "epoch": 18.71, + "learning_rate": 7.821835506296782e-08, + "loss": 3.8062, + "step": 1685000 + }, + { + "epoch": 18.72, + "learning_rate": 7.820447326921795e-08, + "loss": 3.8018, + "step": 1685500 + }, + { + "epoch": 18.72, + "learning_rate": 7.819059147546809e-08, + "loss": 3.8173, + "step": 1686000 + }, + { + "epoch": 18.73, + "learning_rate": 7.817670968171823e-08, + "loss": 3.7973, + "step": 1686500 + }, + { + "epoch": 18.73, + "learning_rate": 7.816282788796836e-08, + "loss": 3.7905, + "step": 1687000 + }, + { + "epoch": 18.74, + "learning_rate": 7.81489460942185e-08, + "loss": 3.7987, + "step": 1687500 + }, + { + "epoch": 18.75, + "learning_rate": 7.813506430046864e-08, + "loss": 3.7968, + "step": 1688000 + }, + { + "epoch": 18.75, + "learning_rate": 7.812118250671879e-08, + "loss": 3.8173, + "step": 1688500 + }, + { + "epoch": 18.76, + "learning_rate": 7.810730071296893e-08, + "loss": 3.8063, + "step": 1689000 + }, + { + "epoch": 18.76, + "learning_rate": 7.809341891921907e-08, + "loss": 3.7957, + "step": 1689500 + }, + { + "epoch": 18.77, + "learning_rate": 7.80795371254692e-08, + "loss": 3.8068, + "step": 1690000 + }, + { + "epoch": 18.77, + "learning_rate": 7.806565533171934e-08, + "loss": 3.78, + "step": 1690500 + }, + { + "epoch": 18.78, + "learning_rate": 7.805177353796948e-08, + "loss": 3.8095, + "step": 1691000 + }, + { + "epoch": 18.78, + "learning_rate": 7.803789174421961e-08, + "loss": 3.8058, + "step": 1691500 + }, + { + "epoch": 18.79, + "learning_rate": 7.802400995046976e-08, + "loss": 3.7942, + "step": 1692000 + }, + { + "epoch": 18.8, + "learning_rate": 7.801012815671988e-08, + "loss": 3.8032, + "step": 1692500 + }, + { + "epoch": 18.8, + "learning_rate": 7.799624636297003e-08, + "loss": 3.7896, + "step": 1693000 + }, + { + "epoch": 18.81, + "learning_rate": 7.798236456922017e-08, + "loss": 3.7838, + "step": 1693500 + }, + { + "epoch": 18.81, + "learning_rate": 7.796848277547031e-08, + "loss": 3.7953, + "step": 1694000 + }, + { + "epoch": 18.82, + "learning_rate": 7.795460098172045e-08, + "loss": 3.7927, + "step": 1694500 + }, + { + "epoch": 18.82, + "learning_rate": 7.79407191879706e-08, + "loss": 3.7847, + "step": 1695000 + }, + { + "epoch": 18.83, + "learning_rate": 7.792683739422073e-08, + "loss": 3.8211, + "step": 1695500 + }, + { + "epoch": 18.83, + "learning_rate": 7.791295560047087e-08, + "loss": 3.8017, + "step": 1696000 + }, + { + "epoch": 18.84, + "learning_rate": 7.789907380672101e-08, + "loss": 3.7961, + "step": 1696500 + }, + { + "epoch": 18.85, + "learning_rate": 7.788519201297115e-08, + "loss": 3.8013, + "step": 1697000 + }, + { + "epoch": 18.85, + "learning_rate": 7.787131021922128e-08, + "loss": 3.8001, + "step": 1697500 + }, + { + "epoch": 18.86, + "learning_rate": 7.785742842547142e-08, + "loss": 3.814, + "step": 1698000 + }, + { + "epoch": 18.86, + "learning_rate": 7.784354663172155e-08, + "loss": 3.7919, + "step": 1698500 + }, + { + "epoch": 18.87, + "learning_rate": 7.78296648379717e-08, + "loss": 3.794, + "step": 1699000 + }, + { + "epoch": 18.87, + "learning_rate": 7.781578304422184e-08, + "loss": 3.7969, + "step": 1699500 + }, + { + "epoch": 18.88, + "learning_rate": 7.780190125047198e-08, + "loss": 3.7963, + "step": 1700000 + }, + { + "epoch": 18.88, + "learning_rate": 7.778801945672212e-08, + "loss": 3.7987, + "step": 1700500 + }, + { + "epoch": 18.89, + "learning_rate": 7.777413766297225e-08, + "loss": 3.7847, + "step": 1701000 + }, + { + "epoch": 18.9, + "learning_rate": 7.776025586922239e-08, + "loss": 3.8126, + "step": 1701500 + }, + { + "epoch": 18.9, + "learning_rate": 7.774637407547254e-08, + "loss": 3.8, + "step": 1702000 + }, + { + "epoch": 18.91, + "learning_rate": 7.773249228172268e-08, + "loss": 3.8206, + "step": 1702500 + }, + { + "epoch": 18.91, + "learning_rate": 7.771861048797282e-08, + "loss": 3.7828, + "step": 1703000 + }, + { + "epoch": 18.92, + "learning_rate": 7.770472869422295e-08, + "loss": 3.8022, + "step": 1703500 + }, + { + "epoch": 18.92, + "learning_rate": 7.769084690047308e-08, + "loss": 3.7976, + "step": 1704000 + }, + { + "epoch": 18.93, + "learning_rate": 7.767696510672322e-08, + "loss": 3.7949, + "step": 1704500 + }, + { + "epoch": 18.93, + "learning_rate": 7.766308331297336e-08, + "loss": 3.7969, + "step": 1705000 + }, + { + "epoch": 18.94, + "learning_rate": 7.76492015192235e-08, + "loss": 3.8113, + "step": 1705500 + }, + { + "epoch": 18.95, + "learning_rate": 7.763531972547365e-08, + "loss": 3.791, + "step": 1706000 + }, + { + "epoch": 18.95, + "learning_rate": 7.762143793172378e-08, + "loss": 3.7971, + "step": 1706500 + }, + { + "epoch": 18.96, + "learning_rate": 7.760755613797392e-08, + "loss": 3.7941, + "step": 1707000 + }, + { + "epoch": 18.96, + "learning_rate": 7.759367434422406e-08, + "loss": 3.8058, + "step": 1707500 + }, + { + "epoch": 18.97, + "learning_rate": 7.75797925504742e-08, + "loss": 3.8023, + "step": 1708000 + }, + { + "epoch": 18.97, + "learning_rate": 7.756591075672434e-08, + "loss": 3.8226, + "step": 1708500 + }, + { + "epoch": 18.98, + "learning_rate": 7.755202896297449e-08, + "loss": 3.7875, + "step": 1709000 + }, + { + "epoch": 18.98, + "learning_rate": 7.753814716922462e-08, + "loss": 3.8119, + "step": 1709500 + }, + { + "epoch": 18.99, + "learning_rate": 7.752426537547475e-08, + "loss": 3.8151, + "step": 1710000 + }, + { + "epoch": 19.0, + "learning_rate": 7.751038358172489e-08, + "loss": 3.8161, + "step": 1710500 + }, + { + "epoch": 19.0, + "eval_loss": 3.8565142154693604, + "eval_runtime": 6.3064, + "eval_samples_per_second": 246.416, + "step": 1710874 + }, + { + "epoch": 19.0, + "learning_rate": 7.749650178797503e-08, + "loss": 3.8229, + "step": 1711000 + }, + { + "epoch": 19.01, + "learning_rate": 7.748261999422517e-08, + "loss": 3.794, + "step": 1711500 + }, + { + "epoch": 19.01, + "learning_rate": 7.746873820047531e-08, + "loss": 3.7888, + "step": 1712000 + }, + { + "epoch": 19.02, + "learning_rate": 7.745485640672544e-08, + "loss": 3.7998, + "step": 1712500 + }, + { + "epoch": 19.02, + "learning_rate": 7.744097461297559e-08, + "loss": 3.7976, + "step": 1713000 + }, + { + "epoch": 19.03, + "learning_rate": 7.742709281922573e-08, + "loss": 3.7674, + "step": 1713500 + }, + { + "epoch": 19.03, + "learning_rate": 7.741321102547587e-08, + "loss": 3.7792, + "step": 1714000 + }, + { + "epoch": 19.04, + "learning_rate": 7.739932923172601e-08, + "loss": 3.7915, + "step": 1714500 + }, + { + "epoch": 19.05, + "learning_rate": 7.738544743797614e-08, + "loss": 3.8065, + "step": 1715000 + }, + { + "epoch": 19.05, + "learning_rate": 7.737156564422628e-08, + "loss": 3.8095, + "step": 1715500 + }, + { + "epoch": 19.06, + "learning_rate": 7.735768385047641e-08, + "loss": 3.816, + "step": 1716000 + }, + { + "epoch": 19.06, + "learning_rate": 7.734380205672655e-08, + "loss": 3.8047, + "step": 1716500 + }, + { + "epoch": 19.07, + "learning_rate": 7.73299202629767e-08, + "loss": 3.7947, + "step": 1717000 + }, + { + "epoch": 19.07, + "learning_rate": 7.731603846922684e-08, + "loss": 3.8103, + "step": 1717500 + }, + { + "epoch": 19.08, + "learning_rate": 7.730215667547697e-08, + "loss": 3.7911, + "step": 1718000 + }, + { + "epoch": 19.08, + "learning_rate": 7.728827488172711e-08, + "loss": 3.8011, + "step": 1718500 + }, + { + "epoch": 19.09, + "learning_rate": 7.727439308797725e-08, + "loss": 3.8143, + "step": 1719000 + }, + { + "epoch": 19.1, + "learning_rate": 7.72605112942274e-08, + "loss": 3.8125, + "step": 1719500 + }, + { + "epoch": 19.1, + "learning_rate": 7.724662950047754e-08, + "loss": 3.8094, + "step": 1720000 + }, + { + "epoch": 19.11, + "learning_rate": 7.723274770672768e-08, + "loss": 3.8021, + "step": 1720500 + }, + { + "epoch": 19.11, + "learning_rate": 7.721886591297781e-08, + "loss": 3.8011, + "step": 1721000 + }, + { + "epoch": 19.12, + "learning_rate": 7.720498411922795e-08, + "loss": 3.7851, + "step": 1721500 + }, + { + "epoch": 19.12, + "learning_rate": 7.719110232547808e-08, + "loss": 3.8019, + "step": 1722000 + }, + { + "epoch": 19.13, + "learning_rate": 7.717722053172822e-08, + "loss": 3.8182, + "step": 1722500 + }, + { + "epoch": 19.13, + "learning_rate": 7.716333873797836e-08, + "loss": 3.8107, + "step": 1723000 + }, + { + "epoch": 19.14, + "learning_rate": 7.71494569442285e-08, + "loss": 3.7883, + "step": 1723500 + }, + { + "epoch": 19.15, + "learning_rate": 7.713557515047864e-08, + "loss": 3.7994, + "step": 1724000 + }, + { + "epoch": 19.15, + "learning_rate": 7.712169335672878e-08, + "loss": 3.7832, + "step": 1724500 + }, + { + "epoch": 19.16, + "learning_rate": 7.710781156297892e-08, + "loss": 3.7814, + "step": 1725000 + }, + { + "epoch": 19.16, + "learning_rate": 7.709392976922906e-08, + "loss": 3.8007, + "step": 1725500 + }, + { + "epoch": 19.17, + "learning_rate": 7.70800479754792e-08, + "loss": 3.7963, + "step": 1726000 + }, + { + "epoch": 19.17, + "learning_rate": 7.706616618172933e-08, + "loss": 3.8004, + "step": 1726500 + }, + { + "epoch": 19.18, + "learning_rate": 7.705228438797948e-08, + "loss": 3.8011, + "step": 1727000 + }, + { + "epoch": 19.18, + "learning_rate": 7.703840259422962e-08, + "loss": 3.794, + "step": 1727500 + }, + { + "epoch": 19.19, + "learning_rate": 7.702452080047975e-08, + "loss": 3.7903, + "step": 1728000 + }, + { + "epoch": 19.2, + "learning_rate": 7.701063900672989e-08, + "loss": 3.8178, + "step": 1728500 + }, + { + "epoch": 19.2, + "learning_rate": 7.699675721298002e-08, + "loss": 3.7936, + "step": 1729000 + }, + { + "epoch": 19.21, + "learning_rate": 7.698287541923016e-08, + "loss": 3.7773, + "step": 1729500 + }, + { + "epoch": 19.21, + "learning_rate": 7.69689936254803e-08, + "loss": 3.8125, + "step": 1730000 + }, + { + "epoch": 19.22, + "learning_rate": 7.695511183173045e-08, + "loss": 3.8203, + "step": 1730500 + }, + { + "epoch": 19.22, + "learning_rate": 7.694123003798059e-08, + "loss": 3.7956, + "step": 1731000 + }, + { + "epoch": 19.23, + "learning_rate": 7.692734824423073e-08, + "loss": 3.7872, + "step": 1731500 + }, + { + "epoch": 19.23, + "learning_rate": 7.691346645048086e-08, + "loss": 3.8037, + "step": 1732000 + }, + { + "epoch": 19.24, + "learning_rate": 7.6899584656731e-08, + "loss": 3.7933, + "step": 1732500 + }, + { + "epoch": 19.25, + "learning_rate": 7.688570286298114e-08, + "loss": 3.8028, + "step": 1733000 + }, + { + "epoch": 19.25, + "learning_rate": 7.687182106923129e-08, + "loss": 3.7937, + "step": 1733500 + }, + { + "epoch": 19.26, + "learning_rate": 7.685793927548141e-08, + "loss": 3.7934, + "step": 1734000 + }, + { + "epoch": 19.26, + "learning_rate": 7.684405748173156e-08, + "loss": 3.7865, + "step": 1734500 + }, + { + "epoch": 19.27, + "learning_rate": 7.683017568798169e-08, + "loss": 3.7982, + "step": 1735000 + }, + { + "epoch": 19.27, + "learning_rate": 7.681629389423183e-08, + "loss": 3.7828, + "step": 1735500 + }, + { + "epoch": 19.28, + "learning_rate": 7.680241210048197e-08, + "loss": 3.7969, + "step": 1736000 + }, + { + "epoch": 19.28, + "learning_rate": 7.678853030673211e-08, + "loss": 3.7867, + "step": 1736500 + }, + { + "epoch": 19.29, + "learning_rate": 7.677464851298226e-08, + "loss": 3.7937, + "step": 1737000 + }, + { + "epoch": 19.3, + "learning_rate": 7.676076671923238e-08, + "loss": 3.8099, + "step": 1737500 + }, + { + "epoch": 19.3, + "learning_rate": 7.674688492548253e-08, + "loss": 3.8038, + "step": 1738000 + }, + { + "epoch": 19.31, + "learning_rate": 7.673300313173267e-08, + "loss": 3.805, + "step": 1738500 + }, + { + "epoch": 19.31, + "learning_rate": 7.671912133798281e-08, + "loss": 3.7951, + "step": 1739000 + }, + { + "epoch": 19.32, + "learning_rate": 7.670523954423295e-08, + "loss": 3.7824, + "step": 1739500 + }, + { + "epoch": 19.32, + "learning_rate": 7.669135775048308e-08, + "loss": 3.8023, + "step": 1740000 + }, + { + "epoch": 19.33, + "learning_rate": 7.667747595673321e-08, + "loss": 3.8166, + "step": 1740500 + }, + { + "epoch": 19.33, + "learning_rate": 7.666359416298335e-08, + "loss": 3.8183, + "step": 1741000 + }, + { + "epoch": 19.34, + "learning_rate": 7.66497123692335e-08, + "loss": 3.8044, + "step": 1741500 + }, + { + "epoch": 19.35, + "learning_rate": 7.663583057548364e-08, + "loss": 3.795, + "step": 1742000 + }, + { + "epoch": 19.35, + "learning_rate": 7.662194878173378e-08, + "loss": 3.8058, + "step": 1742500 + }, + { + "epoch": 19.36, + "learning_rate": 7.660806698798391e-08, + "loss": 3.7886, + "step": 1743000 + }, + { + "epoch": 19.36, + "learning_rate": 7.659418519423405e-08, + "loss": 3.7869, + "step": 1743500 + }, + { + "epoch": 19.37, + "learning_rate": 7.65803034004842e-08, + "loss": 3.8041, + "step": 1744000 + }, + { + "epoch": 19.37, + "learning_rate": 7.656642160673434e-08, + "loss": 3.7995, + "step": 1744500 + }, + { + "epoch": 19.38, + "learning_rate": 7.655253981298448e-08, + "loss": 3.8083, + "step": 1745000 + }, + { + "epoch": 19.38, + "learning_rate": 7.653865801923462e-08, + "loss": 3.7982, + "step": 1745500 + }, + { + "epoch": 19.39, + "learning_rate": 7.652477622548475e-08, + "loss": 3.8015, + "step": 1746000 + }, + { + "epoch": 19.4, + "learning_rate": 7.651089443173488e-08, + "loss": 3.7959, + "step": 1746500 + }, + { + "epoch": 19.4, + "learning_rate": 7.649701263798502e-08, + "loss": 3.8234, + "step": 1747000 + }, + { + "epoch": 19.41, + "learning_rate": 7.648313084423516e-08, + "loss": 3.824, + "step": 1747500 + }, + { + "epoch": 19.41, + "learning_rate": 7.64692490504853e-08, + "loss": 3.7907, + "step": 1748000 + }, + { + "epoch": 19.42, + "learning_rate": 7.645536725673545e-08, + "loss": 3.7896, + "step": 1748500 + }, + { + "epoch": 19.42, + "learning_rate": 7.644148546298558e-08, + "loss": 3.8168, + "step": 1749000 + }, + { + "epoch": 19.43, + "learning_rate": 7.642760366923572e-08, + "loss": 3.8147, + "step": 1749500 + }, + { + "epoch": 19.43, + "learning_rate": 7.641372187548586e-08, + "loss": 3.8147, + "step": 1750000 + }, + { + "epoch": 19.44, + "learning_rate": 7.6399840081736e-08, + "loss": 3.7868, + "step": 1750500 + }, + { + "epoch": 19.45, + "learning_rate": 7.638595828798615e-08, + "loss": 3.7938, + "step": 1751000 + }, + { + "epoch": 19.45, + "learning_rate": 7.637207649423628e-08, + "loss": 3.8054, + "step": 1751500 + }, + { + "epoch": 19.46, + "learning_rate": 7.635819470048642e-08, + "loss": 3.8005, + "step": 1752000 + }, + { + "epoch": 19.46, + "learning_rate": 7.634431290673655e-08, + "loss": 3.7959, + "step": 1752500 + }, + { + "epoch": 19.47, + "learning_rate": 7.633043111298669e-08, + "loss": 3.7931, + "step": 1753000 + }, + { + "epoch": 19.47, + "learning_rate": 7.631654931923683e-08, + "loss": 3.7825, + "step": 1753500 + }, + { + "epoch": 19.48, + "learning_rate": 7.630266752548697e-08, + "loss": 3.8054, + "step": 1754000 + }, + { + "epoch": 19.48, + "learning_rate": 7.62887857317371e-08, + "loss": 3.7934, + "step": 1754500 + }, + { + "epoch": 19.49, + "learning_rate": 7.627490393798724e-08, + "loss": 3.798, + "step": 1755000 + }, + { + "epoch": 19.5, + "learning_rate": 7.626102214423739e-08, + "loss": 3.8198, + "step": 1755500 + }, + { + "epoch": 19.5, + "learning_rate": 7.624714035048753e-08, + "loss": 3.7971, + "step": 1756000 + }, + { + "epoch": 19.51, + "learning_rate": 7.623325855673767e-08, + "loss": 3.7956, + "step": 1756500 + }, + { + "epoch": 19.51, + "learning_rate": 7.621937676298781e-08, + "loss": 3.7888, + "step": 1757000 + }, + { + "epoch": 19.52, + "learning_rate": 7.620549496923794e-08, + "loss": 3.7896, + "step": 1757500 + }, + { + "epoch": 19.52, + "learning_rate": 7.619161317548808e-08, + "loss": 3.799, + "step": 1758000 + }, + { + "epoch": 19.53, + "learning_rate": 7.617773138173821e-08, + "loss": 3.7932, + "step": 1758500 + }, + { + "epoch": 19.53, + "learning_rate": 7.616384958798836e-08, + "loss": 3.8061, + "step": 1759000 + }, + { + "epoch": 19.54, + "learning_rate": 7.61499677942385e-08, + "loss": 3.8021, + "step": 1759500 + }, + { + "epoch": 19.55, + "learning_rate": 7.613608600048863e-08, + "loss": 3.7909, + "step": 1760000 + }, + { + "epoch": 19.55, + "learning_rate": 7.612220420673877e-08, + "loss": 3.8025, + "step": 1760500 + }, + { + "epoch": 19.56, + "learning_rate": 7.610832241298891e-08, + "loss": 3.7997, + "step": 1761000 + }, + { + "epoch": 19.56, + "learning_rate": 7.609444061923905e-08, + "loss": 3.7918, + "step": 1761500 + }, + { + "epoch": 19.57, + "learning_rate": 7.60805588254892e-08, + "loss": 3.8004, + "step": 1762000 + }, + { + "epoch": 19.57, + "learning_rate": 7.606667703173934e-08, + "loss": 3.7896, + "step": 1762500 + }, + { + "epoch": 19.58, + "learning_rate": 7.605279523798947e-08, + "loss": 3.7981, + "step": 1763000 + }, + { + "epoch": 19.58, + "learning_rate": 7.603891344423961e-08, + "loss": 3.8126, + "step": 1763500 + }, + { + "epoch": 19.59, + "learning_rate": 7.602503165048975e-08, + "loss": 3.7757, + "step": 1764000 + }, + { + "epoch": 19.6, + "learning_rate": 7.601114985673988e-08, + "loss": 3.8005, + "step": 1764500 + }, + { + "epoch": 19.6, + "learning_rate": 7.599726806299002e-08, + "loss": 3.7864, + "step": 1765000 + }, + { + "epoch": 19.61, + "learning_rate": 7.598338626924015e-08, + "loss": 3.8066, + "step": 1765500 + }, + { + "epoch": 19.61, + "learning_rate": 7.59695044754903e-08, + "loss": 3.7919, + "step": 1766000 + }, + { + "epoch": 19.62, + "learning_rate": 7.595562268174044e-08, + "loss": 3.8104, + "step": 1766500 + }, + { + "epoch": 19.62, + "learning_rate": 7.594174088799058e-08, + "loss": 3.7901, + "step": 1767000 + }, + { + "epoch": 19.63, + "learning_rate": 7.592785909424072e-08, + "loss": 3.7801, + "step": 1767500 + }, + { + "epoch": 19.63, + "learning_rate": 7.591397730049086e-08, + "loss": 3.7929, + "step": 1768000 + }, + { + "epoch": 19.64, + "learning_rate": 7.590009550674099e-08, + "loss": 3.8095, + "step": 1768500 + }, + { + "epoch": 19.65, + "learning_rate": 7.588621371299114e-08, + "loss": 3.7961, + "step": 1769000 + }, + { + "epoch": 19.65, + "learning_rate": 7.587233191924128e-08, + "loss": 3.8034, + "step": 1769500 + }, + { + "epoch": 19.66, + "learning_rate": 7.585845012549142e-08, + "loss": 3.7942, + "step": 1770000 + }, + { + "epoch": 19.66, + "learning_rate": 7.584456833174155e-08, + "loss": 3.8079, + "step": 1770500 + }, + { + "epoch": 19.67, + "learning_rate": 7.583068653799169e-08, + "loss": 3.7797, + "step": 1771000 + }, + { + "epoch": 19.67, + "learning_rate": 7.581680474424182e-08, + "loss": 3.8036, + "step": 1771500 + }, + { + "epoch": 19.68, + "learning_rate": 7.580292295049196e-08, + "loss": 3.797, + "step": 1772000 + }, + { + "epoch": 19.68, + "learning_rate": 7.57890411567421e-08, + "loss": 3.8003, + "step": 1772500 + }, + { + "epoch": 19.69, + "learning_rate": 7.577515936299225e-08, + "loss": 3.7965, + "step": 1773000 + }, + { + "epoch": 19.7, + "learning_rate": 7.576127756924239e-08, + "loss": 3.7857, + "step": 1773500 + }, + { + "epoch": 19.7, + "learning_rate": 7.574739577549252e-08, + "loss": 3.8118, + "step": 1774000 + }, + { + "epoch": 19.71, + "learning_rate": 7.573351398174266e-08, + "loss": 3.805, + "step": 1774500 + }, + { + "epoch": 19.71, + "learning_rate": 7.57196321879928e-08, + "loss": 3.7956, + "step": 1775000 + }, + { + "epoch": 19.72, + "learning_rate": 7.570575039424294e-08, + "loss": 3.811, + "step": 1775500 + }, + { + "epoch": 19.72, + "learning_rate": 7.569186860049309e-08, + "loss": 3.8026, + "step": 1776000 + }, + { + "epoch": 19.73, + "learning_rate": 7.567798680674322e-08, + "loss": 3.7898, + "step": 1776500 + }, + { + "epoch": 19.73, + "learning_rate": 7.566410501299335e-08, + "loss": 3.7926, + "step": 1777000 + }, + { + "epoch": 19.74, + "learning_rate": 7.565022321924349e-08, + "loss": 3.7924, + "step": 1777500 + }, + { + "epoch": 19.75, + "learning_rate": 7.563634142549363e-08, + "loss": 3.7859, + "step": 1778000 + }, + { + "epoch": 19.75, + "learning_rate": 7.562245963174377e-08, + "loss": 3.8176, + "step": 1778500 + }, + { + "epoch": 19.76, + "learning_rate": 7.560857783799391e-08, + "loss": 3.7954, + "step": 1779000 + }, + { + "epoch": 19.76, + "learning_rate": 7.559469604424406e-08, + "loss": 3.7931, + "step": 1779500 + }, + { + "epoch": 19.77, + "learning_rate": 7.558081425049419e-08, + "loss": 3.8269, + "step": 1780000 + }, + { + "epoch": 19.77, + "learning_rate": 7.556693245674433e-08, + "loss": 3.7931, + "step": 1780500 + }, + { + "epoch": 19.78, + "learning_rate": 7.555305066299447e-08, + "loss": 3.8076, + "step": 1781000 + }, + { + "epoch": 19.78, + "learning_rate": 7.553916886924461e-08, + "loss": 3.7931, + "step": 1781500 + }, + { + "epoch": 19.79, + "learning_rate": 7.552528707549474e-08, + "loss": 3.7941, + "step": 1782000 + }, + { + "epoch": 19.8, + "learning_rate": 7.551140528174488e-08, + "loss": 3.7918, + "step": 1782500 + }, + { + "epoch": 19.8, + "learning_rate": 7.549752348799501e-08, + "loss": 3.7983, + "step": 1783000 + }, + { + "epoch": 19.81, + "learning_rate": 7.548364169424516e-08, + "loss": 3.7864, + "step": 1783500 + }, + { + "epoch": 19.81, + "learning_rate": 7.54697599004953e-08, + "loss": 3.7954, + "step": 1784000 + }, + { + "epoch": 19.82, + "learning_rate": 7.545587810674544e-08, + "loss": 3.7967, + "step": 1784500 + }, + { + "epoch": 19.82, + "learning_rate": 7.544199631299558e-08, + "loss": 3.787, + "step": 1785000 + }, + { + "epoch": 19.83, + "learning_rate": 7.542811451924571e-08, + "loss": 3.7962, + "step": 1785500 + }, + { + "epoch": 19.83, + "learning_rate": 7.541423272549585e-08, + "loss": 3.7912, + "step": 1786000 + }, + { + "epoch": 19.84, + "learning_rate": 7.5400350931746e-08, + "loss": 3.7921, + "step": 1786500 + }, + { + "epoch": 19.85, + "learning_rate": 7.538646913799614e-08, + "loss": 3.8113, + "step": 1787000 + }, + { + "epoch": 19.85, + "learning_rate": 7.537258734424628e-08, + "loss": 3.809, + "step": 1787500 + }, + { + "epoch": 19.86, + "learning_rate": 7.535870555049641e-08, + "loss": 3.8026, + "step": 1788000 + }, + { + "epoch": 19.86, + "learning_rate": 7.534482375674655e-08, + "loss": 3.7974, + "step": 1788500 + }, + { + "epoch": 19.87, + "learning_rate": 7.533094196299668e-08, + "loss": 3.7963, + "step": 1789000 + }, + { + "epoch": 19.87, + "learning_rate": 7.531706016924682e-08, + "loss": 3.8083, + "step": 1789500 + }, + { + "epoch": 19.88, + "learning_rate": 7.530317837549696e-08, + "loss": 3.7901, + "step": 1790000 + }, + { + "epoch": 19.88, + "learning_rate": 7.528929658174711e-08, + "loss": 3.8087, + "step": 1790500 + }, + { + "epoch": 19.89, + "learning_rate": 7.527541478799724e-08, + "loss": 3.8044, + "step": 1791000 + }, + { + "epoch": 19.9, + "learning_rate": 7.526153299424738e-08, + "loss": 3.8059, + "step": 1791500 + }, + { + "epoch": 19.9, + "learning_rate": 7.524765120049752e-08, + "loss": 3.8079, + "step": 1792000 + }, + { + "epoch": 19.91, + "learning_rate": 7.523376940674766e-08, + "loss": 3.812, + "step": 1792500 + }, + { + "epoch": 19.91, + "learning_rate": 7.52198876129978e-08, + "loss": 3.8006, + "step": 1793000 + }, + { + "epoch": 19.92, + "learning_rate": 7.520600581924795e-08, + "loss": 3.7945, + "step": 1793500 + }, + { + "epoch": 19.92, + "learning_rate": 7.519212402549808e-08, + "loss": 3.8061, + "step": 1794000 + }, + { + "epoch": 19.93, + "learning_rate": 7.517824223174822e-08, + "loss": 3.791, + "step": 1794500 + }, + { + "epoch": 19.93, + "learning_rate": 7.516436043799835e-08, + "loss": 3.7976, + "step": 1795000 + }, + { + "epoch": 19.94, + "learning_rate": 7.515047864424849e-08, + "loss": 3.7774, + "step": 1795500 + }, + { + "epoch": 19.95, + "learning_rate": 7.513659685049863e-08, + "loss": 3.7975, + "step": 1796000 + }, + { + "epoch": 19.95, + "learning_rate": 7.512271505674876e-08, + "loss": 3.7811, + "step": 1796500 + }, + { + "epoch": 19.96, + "learning_rate": 7.51088332629989e-08, + "loss": 3.8031, + "step": 1797000 + }, + { + "epoch": 19.96, + "learning_rate": 7.509495146924905e-08, + "loss": 3.785, + "step": 1797500 + }, + { + "epoch": 19.97, + "learning_rate": 7.508106967549919e-08, + "loss": 3.8107, + "step": 1798000 + }, + { + "epoch": 19.97, + "learning_rate": 7.506718788174933e-08, + "loss": 3.7846, + "step": 1798500 + }, + { + "epoch": 19.98, + "learning_rate": 7.505330608799947e-08, + "loss": 3.7925, + "step": 1799000 + }, + { + "epoch": 19.98, + "learning_rate": 7.50394242942496e-08, + "loss": 3.786, + "step": 1799500 + }, + { + "epoch": 19.99, + "learning_rate": 7.502554250049974e-08, + "loss": 3.8075, + "step": 1800000 + }, + { + "epoch": 20.0, + "learning_rate": 7.501166070674989e-08, + "loss": 3.8053, + "step": 1800500 + }, + { + "epoch": 20.0, + "eval_loss": 3.853641986846924, + "eval_runtime": 6.3154, + "eval_samples_per_second": 246.067, + "step": 1800920 + }, + { + "epoch": 20.0, + "learning_rate": 7.499777891300002e-08, + "loss": 3.7879, + "step": 1801000 + }, + { + "epoch": 20.01, + "learning_rate": 7.498389711925016e-08, + "loss": 3.8016, + "step": 1801500 + }, + { + "epoch": 20.01, + "learning_rate": 7.497001532550029e-08, + "loss": 3.7886, + "step": 1802000 + }, + { + "epoch": 20.02, + "learning_rate": 7.495613353175043e-08, + "loss": 3.7989, + "step": 1802500 + }, + { + "epoch": 20.02, + "learning_rate": 7.494225173800057e-08, + "loss": 3.7949, + "step": 1803000 + }, + { + "epoch": 20.03, + "learning_rate": 7.492836994425071e-08, + "loss": 3.8031, + "step": 1803500 + }, + { + "epoch": 20.03, + "learning_rate": 7.491448815050086e-08, + "loss": 3.804, + "step": 1804000 + }, + { + "epoch": 20.04, + "learning_rate": 7.4900606356751e-08, + "loss": 3.8101, + "step": 1804500 + }, + { + "epoch": 20.05, + "learning_rate": 7.488672456300113e-08, + "loss": 3.8003, + "step": 1805000 + }, + { + "epoch": 20.05, + "learning_rate": 7.487284276925127e-08, + "loss": 3.8068, + "step": 1805500 + }, + { + "epoch": 20.06, + "learning_rate": 7.485896097550141e-08, + "loss": 3.7904, + "step": 1806000 + }, + { + "epoch": 20.06, + "learning_rate": 7.484507918175155e-08, + "loss": 3.7845, + "step": 1806500 + }, + { + "epoch": 20.07, + "learning_rate": 7.483119738800168e-08, + "loss": 3.8028, + "step": 1807000 + }, + { + "epoch": 20.07, + "learning_rate": 7.481731559425182e-08, + "loss": 3.7913, + "step": 1807500 + }, + { + "epoch": 20.08, + "learning_rate": 7.480343380050195e-08, + "loss": 3.7935, + "step": 1808000 + }, + { + "epoch": 20.08, + "learning_rate": 7.47895520067521e-08, + "loss": 3.8113, + "step": 1808500 + }, + { + "epoch": 20.09, + "learning_rate": 7.477567021300224e-08, + "loss": 3.7969, + "step": 1809000 + }, + { + "epoch": 20.1, + "learning_rate": 7.476178841925238e-08, + "loss": 3.8077, + "step": 1809500 + }, + { + "epoch": 20.1, + "learning_rate": 7.474790662550252e-08, + "loss": 3.7805, + "step": 1810000 + }, + { + "epoch": 20.11, + "learning_rate": 7.473402483175265e-08, + "loss": 3.8041, + "step": 1810500 + }, + { + "epoch": 20.11, + "learning_rate": 7.47201430380028e-08, + "loss": 3.7961, + "step": 1811000 + }, + { + "epoch": 20.12, + "learning_rate": 7.470626124425294e-08, + "loss": 3.7882, + "step": 1811500 + }, + { + "epoch": 20.12, + "learning_rate": 7.469237945050308e-08, + "loss": 3.8015, + "step": 1812000 + }, + { + "epoch": 20.13, + "learning_rate": 7.467849765675321e-08, + "loss": 3.7987, + "step": 1812500 + }, + { + "epoch": 20.13, + "learning_rate": 7.466461586300335e-08, + "loss": 3.7985, + "step": 1813000 + }, + { + "epoch": 20.14, + "learning_rate": 7.465073406925348e-08, + "loss": 3.7907, + "step": 1813500 + }, + { + "epoch": 20.15, + "learning_rate": 7.463685227550362e-08, + "loss": 3.7879, + "step": 1814000 + }, + { + "epoch": 20.15, + "learning_rate": 7.462297048175376e-08, + "loss": 3.8134, + "step": 1814500 + }, + { + "epoch": 20.16, + "learning_rate": 7.46090886880039e-08, + "loss": 3.7943, + "step": 1815000 + }, + { + "epoch": 20.16, + "learning_rate": 7.459520689425405e-08, + "loss": 3.784, + "step": 1815500 + }, + { + "epoch": 20.17, + "learning_rate": 7.458132510050419e-08, + "loss": 3.7972, + "step": 1816000 + }, + { + "epoch": 20.17, + "learning_rate": 7.456744330675432e-08, + "loss": 3.7978, + "step": 1816500 + }, + { + "epoch": 20.18, + "learning_rate": 7.455356151300446e-08, + "loss": 3.7801, + "step": 1817000 + }, + { + "epoch": 20.18, + "learning_rate": 7.45396797192546e-08, + "loss": 3.8055, + "step": 1817500 + }, + { + "epoch": 20.19, + "learning_rate": 7.452579792550475e-08, + "loss": 3.7792, + "step": 1818000 + }, + { + "epoch": 20.2, + "learning_rate": 7.451191613175488e-08, + "loss": 3.8062, + "step": 1818500 + }, + { + "epoch": 20.2, + "learning_rate": 7.449803433800502e-08, + "loss": 3.7984, + "step": 1819000 + }, + { + "epoch": 20.21, + "learning_rate": 7.448415254425515e-08, + "loss": 3.7905, + "step": 1819500 + }, + { + "epoch": 20.21, + "learning_rate": 7.447027075050529e-08, + "loss": 3.7896, + "step": 1820000 + }, + { + "epoch": 20.22, + "learning_rate": 7.445638895675543e-08, + "loss": 3.8079, + "step": 1820500 + }, + { + "epoch": 20.22, + "learning_rate": 7.444250716300557e-08, + "loss": 3.7922, + "step": 1821000 + }, + { + "epoch": 20.23, + "learning_rate": 7.442862536925572e-08, + "loss": 3.8041, + "step": 1821500 + }, + { + "epoch": 20.23, + "learning_rate": 7.441474357550584e-08, + "loss": 3.7892, + "step": 1822000 + }, + { + "epoch": 20.24, + "learning_rate": 7.440086178175599e-08, + "loss": 3.7927, + "step": 1822500 + }, + { + "epoch": 20.25, + "learning_rate": 7.438697998800613e-08, + "loss": 3.7934, + "step": 1823000 + }, + { + "epoch": 20.25, + "learning_rate": 7.437309819425627e-08, + "loss": 3.7822, + "step": 1823500 + }, + { + "epoch": 20.26, + "learning_rate": 7.435921640050641e-08, + "loss": 3.7961, + "step": 1824000 + }, + { + "epoch": 20.26, + "learning_rate": 7.434533460675654e-08, + "loss": 3.7783, + "step": 1824500 + }, + { + "epoch": 20.27, + "learning_rate": 7.433145281300668e-08, + "loss": 3.7934, + "step": 1825000 + }, + { + "epoch": 20.27, + "learning_rate": 7.431757101925681e-08, + "loss": 3.8011, + "step": 1825500 + }, + { + "epoch": 20.28, + "learning_rate": 7.430368922550696e-08, + "loss": 3.7755, + "step": 1826000 + }, + { + "epoch": 20.28, + "learning_rate": 7.42898074317571e-08, + "loss": 3.7925, + "step": 1826500 + }, + { + "epoch": 20.29, + "learning_rate": 7.427592563800724e-08, + "loss": 3.8185, + "step": 1827000 + }, + { + "epoch": 20.3, + "learning_rate": 7.426204384425737e-08, + "loss": 3.7931, + "step": 1827500 + }, + { + "epoch": 20.3, + "learning_rate": 7.424816205050751e-08, + "loss": 3.7789, + "step": 1828000 + }, + { + "epoch": 20.31, + "learning_rate": 7.423428025675765e-08, + "loss": 3.8054, + "step": 1828500 + }, + { + "epoch": 20.31, + "learning_rate": 7.42203984630078e-08, + "loss": 3.7812, + "step": 1829000 + }, + { + "epoch": 20.32, + "learning_rate": 7.420651666925794e-08, + "loss": 3.7771, + "step": 1829500 + }, + { + "epoch": 20.32, + "learning_rate": 7.419263487550808e-08, + "loss": 3.7851, + "step": 1830000 + }, + { + "epoch": 20.33, + "learning_rate": 7.417875308175821e-08, + "loss": 3.7804, + "step": 1830500 + }, + { + "epoch": 20.33, + "learning_rate": 7.416487128800835e-08, + "loss": 3.7709, + "step": 1831000 + }, + { + "epoch": 20.34, + "learning_rate": 7.415098949425848e-08, + "loss": 3.8039, + "step": 1831500 + }, + { + "epoch": 20.35, + "learning_rate": 7.413710770050862e-08, + "loss": 3.7956, + "step": 1832000 + }, + { + "epoch": 20.35, + "learning_rate": 7.412322590675877e-08, + "loss": 3.795, + "step": 1832500 + }, + { + "epoch": 20.36, + "learning_rate": 7.41093441130089e-08, + "loss": 3.8189, + "step": 1833000 + }, + { + "epoch": 20.36, + "learning_rate": 7.409546231925904e-08, + "loss": 3.8009, + "step": 1833500 + }, + { + "epoch": 20.37, + "learning_rate": 7.408158052550918e-08, + "loss": 3.7999, + "step": 1834000 + }, + { + "epoch": 20.37, + "learning_rate": 7.406769873175932e-08, + "loss": 3.801, + "step": 1834500 + }, + { + "epoch": 20.38, + "learning_rate": 7.405381693800946e-08, + "loss": 3.783, + "step": 1835000 + }, + { + "epoch": 20.38, + "learning_rate": 7.40399351442596e-08, + "loss": 3.7943, + "step": 1835500 + }, + { + "epoch": 20.39, + "learning_rate": 7.402605335050974e-08, + "loss": 3.7877, + "step": 1836000 + }, + { + "epoch": 20.4, + "learning_rate": 7.401217155675988e-08, + "loss": 3.8005, + "step": 1836500 + }, + { + "epoch": 20.4, + "learning_rate": 7.399828976301002e-08, + "loss": 3.7856, + "step": 1837000 + }, + { + "epoch": 20.41, + "learning_rate": 7.398440796926015e-08, + "loss": 3.791, + "step": 1837500 + }, + { + "epoch": 20.41, + "learning_rate": 7.397052617551029e-08, + "loss": 3.7982, + "step": 1838000 + }, + { + "epoch": 20.42, + "learning_rate": 7.395664438176043e-08, + "loss": 3.7989, + "step": 1838500 + }, + { + "epoch": 20.42, + "learning_rate": 7.394276258801056e-08, + "loss": 3.7937, + "step": 1839000 + }, + { + "epoch": 20.43, + "learning_rate": 7.39288807942607e-08, + "loss": 3.7971, + "step": 1839500 + }, + { + "epoch": 20.43, + "learning_rate": 7.391499900051085e-08, + "loss": 3.7732, + "step": 1840000 + }, + { + "epoch": 20.44, + "learning_rate": 7.390111720676099e-08, + "loss": 3.7934, + "step": 1840500 + }, + { + "epoch": 20.45, + "learning_rate": 7.388723541301113e-08, + "loss": 3.8004, + "step": 1841000 + }, + { + "epoch": 20.45, + "learning_rate": 7.387335361926126e-08, + "loss": 3.7911, + "step": 1841500 + }, + { + "epoch": 20.46, + "learning_rate": 7.38594718255114e-08, + "loss": 3.7887, + "step": 1842000 + }, + { + "epoch": 20.46, + "learning_rate": 7.384559003176155e-08, + "loss": 3.7934, + "step": 1842500 + }, + { + "epoch": 20.47, + "learning_rate": 7.383170823801167e-08, + "loss": 3.7929, + "step": 1843000 + }, + { + "epoch": 20.47, + "learning_rate": 7.381782644426182e-08, + "loss": 3.7775, + "step": 1843500 + }, + { + "epoch": 20.48, + "learning_rate": 7.380394465051196e-08, + "loss": 3.7854, + "step": 1844000 + }, + { + "epoch": 20.48, + "learning_rate": 7.379006285676209e-08, + "loss": 3.7935, + "step": 1844500 + }, + { + "epoch": 20.49, + "learning_rate": 7.377618106301223e-08, + "loss": 3.7867, + "step": 1845000 + }, + { + "epoch": 20.5, + "learning_rate": 7.376229926926237e-08, + "loss": 3.7832, + "step": 1845500 + }, + { + "epoch": 20.5, + "learning_rate": 7.374841747551251e-08, + "loss": 3.8162, + "step": 1846000 + }, + { + "epoch": 20.51, + "learning_rate": 7.373453568176266e-08, + "loss": 3.7905, + "step": 1846500 + }, + { + "epoch": 20.51, + "learning_rate": 7.372065388801279e-08, + "loss": 3.8053, + "step": 1847000 + }, + { + "epoch": 20.52, + "learning_rate": 7.370677209426293e-08, + "loss": 3.7959, + "step": 1847500 + }, + { + "epoch": 20.52, + "learning_rate": 7.369289030051307e-08, + "loss": 3.7998, + "step": 1848000 + }, + { + "epoch": 20.53, + "learning_rate": 7.367900850676321e-08, + "loss": 3.7926, + "step": 1848500 + }, + { + "epoch": 20.53, + "learning_rate": 7.366512671301334e-08, + "loss": 3.7766, + "step": 1849000 + }, + { + "epoch": 20.54, + "learning_rate": 7.365124491926348e-08, + "loss": 3.791, + "step": 1849500 + }, + { + "epoch": 20.55, + "learning_rate": 7.363736312551361e-08, + "loss": 3.8024, + "step": 1850000 + }, + { + "epoch": 20.55, + "learning_rate": 7.362348133176376e-08, + "loss": 3.8076, + "step": 1850500 + }, + { + "epoch": 20.56, + "learning_rate": 7.36095995380139e-08, + "loss": 3.7737, + "step": 1851000 + }, + { + "epoch": 20.56, + "learning_rate": 7.359571774426404e-08, + "loss": 3.7721, + "step": 1851500 + }, + { + "epoch": 20.57, + "learning_rate": 7.358183595051418e-08, + "loss": 3.8007, + "step": 1852000 + }, + { + "epoch": 20.57, + "learning_rate": 7.356795415676432e-08, + "loss": 3.7948, + "step": 1852500 + }, + { + "epoch": 20.58, + "learning_rate": 7.355407236301445e-08, + "loss": 3.7961, + "step": 1853000 + }, + { + "epoch": 20.58, + "learning_rate": 7.35401905692646e-08, + "loss": 3.7918, + "step": 1853500 + }, + { + "epoch": 20.59, + "learning_rate": 7.352630877551474e-08, + "loss": 3.8145, + "step": 1854000 + }, + { + "epoch": 20.6, + "learning_rate": 7.351242698176488e-08, + "loss": 3.7967, + "step": 1854500 + }, + { + "epoch": 20.6, + "learning_rate": 7.349854518801501e-08, + "loss": 3.8052, + "step": 1855000 + }, + { + "epoch": 20.61, + "learning_rate": 7.348466339426515e-08, + "loss": 3.8081, + "step": 1855500 + }, + { + "epoch": 20.61, + "learning_rate": 7.347078160051528e-08, + "loss": 3.7957, + "step": 1856000 + }, + { + "epoch": 20.62, + "learning_rate": 7.345689980676542e-08, + "loss": 3.779, + "step": 1856500 + }, + { + "epoch": 20.62, + "learning_rate": 7.344301801301556e-08, + "loss": 3.7836, + "step": 1857000 + }, + { + "epoch": 20.63, + "learning_rate": 7.342913621926571e-08, + "loss": 3.8091, + "step": 1857500 + }, + { + "epoch": 20.63, + "learning_rate": 7.341525442551585e-08, + "loss": 3.8083, + "step": 1858000 + }, + { + "epoch": 20.64, + "learning_rate": 7.340137263176598e-08, + "loss": 3.8133, + "step": 1858500 + }, + { + "epoch": 20.65, + "learning_rate": 7.338749083801612e-08, + "loss": 3.7899, + "step": 1859000 + }, + { + "epoch": 20.65, + "learning_rate": 7.337360904426626e-08, + "loss": 3.7923, + "step": 1859500 + }, + { + "epoch": 20.66, + "learning_rate": 7.33597272505164e-08, + "loss": 3.7969, + "step": 1860000 + }, + { + "epoch": 20.66, + "learning_rate": 7.334584545676655e-08, + "loss": 3.7909, + "step": 1860500 + }, + { + "epoch": 20.67, + "learning_rate": 7.333196366301668e-08, + "loss": 3.7936, + "step": 1861000 + }, + { + "epoch": 20.67, + "learning_rate": 7.331808186926682e-08, + "loss": 3.7971, + "step": 1861500 + }, + { + "epoch": 20.68, + "learning_rate": 7.330420007551695e-08, + "loss": 3.792, + "step": 1862000 + }, + { + "epoch": 20.68, + "learning_rate": 7.329031828176709e-08, + "loss": 3.7835, + "step": 1862500 + }, + { + "epoch": 20.69, + "learning_rate": 7.327643648801723e-08, + "loss": 3.8016, + "step": 1863000 + }, + { + "epoch": 20.69, + "learning_rate": 7.326255469426737e-08, + "loss": 3.8168, + "step": 1863500 + }, + { + "epoch": 20.7, + "learning_rate": 7.32486729005175e-08, + "loss": 3.8032, + "step": 1864000 + }, + { + "epoch": 20.71, + "learning_rate": 7.323479110676765e-08, + "loss": 3.8184, + "step": 1864500 + }, + { + "epoch": 20.71, + "learning_rate": 7.322090931301779e-08, + "loss": 3.7962, + "step": 1865000 + }, + { + "epoch": 20.72, + "learning_rate": 7.320702751926793e-08, + "loss": 3.7996, + "step": 1865500 + }, + { + "epoch": 20.72, + "learning_rate": 7.319314572551807e-08, + "loss": 3.7789, + "step": 1866000 + }, + { + "epoch": 20.73, + "learning_rate": 7.317926393176821e-08, + "loss": 3.7911, + "step": 1866500 + }, + { + "epoch": 20.73, + "learning_rate": 7.316538213801834e-08, + "loss": 3.805, + "step": 1867000 + }, + { + "epoch": 20.74, + "learning_rate": 7.315150034426849e-08, + "loss": 3.79, + "step": 1867500 + }, + { + "epoch": 20.74, + "learning_rate": 7.313761855051862e-08, + "loss": 3.7989, + "step": 1868000 + }, + { + "epoch": 20.75, + "learning_rate": 7.312373675676876e-08, + "loss": 3.7835, + "step": 1868500 + }, + { + "epoch": 20.76, + "learning_rate": 7.31098549630189e-08, + "loss": 3.8064, + "step": 1869000 + }, + { + "epoch": 20.76, + "learning_rate": 7.309597316926903e-08, + "loss": 3.7765, + "step": 1869500 + }, + { + "epoch": 20.77, + "learning_rate": 7.308209137551917e-08, + "loss": 3.7731, + "step": 1870000 + }, + { + "epoch": 20.77, + "learning_rate": 7.306820958176931e-08, + "loss": 3.8029, + "step": 1870500 + }, + { + "epoch": 20.78, + "learning_rate": 7.305432778801946e-08, + "loss": 3.7952, + "step": 1871000 + }, + { + "epoch": 20.78, + "learning_rate": 7.30404459942696e-08, + "loss": 3.7889, + "step": 1871500 + }, + { + "epoch": 20.79, + "learning_rate": 7.302656420051974e-08, + "loss": 3.8002, + "step": 1872000 + }, + { + "epoch": 20.79, + "learning_rate": 7.301268240676987e-08, + "loss": 3.7875, + "step": 1872500 + }, + { + "epoch": 20.8, + "learning_rate": 7.299880061302001e-08, + "loss": 3.8035, + "step": 1873000 + }, + { + "epoch": 20.81, + "learning_rate": 7.298491881927014e-08, + "loss": 3.7853, + "step": 1873500 + }, + { + "epoch": 20.81, + "learning_rate": 7.297103702552028e-08, + "loss": 3.7991, + "step": 1874000 + }, + { + "epoch": 20.82, + "learning_rate": 7.295715523177043e-08, + "loss": 3.7909, + "step": 1874500 + }, + { + "epoch": 20.82, + "learning_rate": 7.294327343802057e-08, + "loss": 3.7939, + "step": 1875000 + }, + { + "epoch": 20.83, + "learning_rate": 7.29293916442707e-08, + "loss": 3.7988, + "step": 1875500 + }, + { + "epoch": 20.83, + "learning_rate": 7.291550985052084e-08, + "loss": 3.7876, + "step": 1876000 + }, + { + "epoch": 20.84, + "learning_rate": 7.290162805677098e-08, + "loss": 3.7942, + "step": 1876500 + }, + { + "epoch": 20.84, + "learning_rate": 7.288774626302112e-08, + "loss": 3.8014, + "step": 1877000 + }, + { + "epoch": 20.85, + "learning_rate": 7.287386446927127e-08, + "loss": 3.7714, + "step": 1877500 + }, + { + "epoch": 20.86, + "learning_rate": 7.28599826755214e-08, + "loss": 3.7876, + "step": 1878000 + }, + { + "epoch": 20.86, + "learning_rate": 7.284610088177154e-08, + "loss": 3.7936, + "step": 1878500 + }, + { + "epoch": 20.87, + "learning_rate": 7.283221908802168e-08, + "loss": 3.788, + "step": 1879000 + }, + { + "epoch": 20.87, + "learning_rate": 7.281833729427181e-08, + "loss": 3.8013, + "step": 1879500 + }, + { + "epoch": 20.88, + "learning_rate": 7.280445550052195e-08, + "loss": 3.7853, + "step": 1880000 + }, + { + "epoch": 20.88, + "learning_rate": 7.279057370677209e-08, + "loss": 3.776, + "step": 1880500 + }, + { + "epoch": 20.89, + "learning_rate": 7.277669191302222e-08, + "loss": 3.801, + "step": 1881000 + }, + { + "epoch": 20.89, + "learning_rate": 7.276281011927236e-08, + "loss": 3.795, + "step": 1881500 + }, + { + "epoch": 20.9, + "learning_rate": 7.27489283255225e-08, + "loss": 3.8004, + "step": 1882000 + }, + { + "epoch": 20.91, + "learning_rate": 7.273504653177265e-08, + "loss": 3.7954, + "step": 1882500 + }, + { + "epoch": 20.91, + "learning_rate": 7.272116473802279e-08, + "loss": 3.806, + "step": 1883000 + }, + { + "epoch": 20.92, + "learning_rate": 7.270728294427292e-08, + "loss": 3.7901, + "step": 1883500 + }, + { + "epoch": 20.92, + "learning_rate": 7.269340115052306e-08, + "loss": 3.7892, + "step": 1884000 + }, + { + "epoch": 20.93, + "learning_rate": 7.26795193567732e-08, + "loss": 3.7932, + "step": 1884500 + }, + { + "epoch": 20.93, + "learning_rate": 7.266563756302335e-08, + "loss": 3.7816, + "step": 1885000 + }, + { + "epoch": 20.94, + "learning_rate": 7.265175576927348e-08, + "loss": 3.7863, + "step": 1885500 + }, + { + "epoch": 20.94, + "learning_rate": 7.263787397552362e-08, + "loss": 3.7845, + "step": 1886000 + }, + { + "epoch": 20.95, + "learning_rate": 7.262399218177375e-08, + "loss": 3.7904, + "step": 1886500 + }, + { + "epoch": 20.96, + "learning_rate": 7.261011038802389e-08, + "loss": 3.7817, + "step": 1887000 + }, + { + "epoch": 20.96, + "learning_rate": 7.259622859427403e-08, + "loss": 3.7957, + "step": 1887500 + }, + { + "epoch": 20.97, + "learning_rate": 7.258234680052417e-08, + "loss": 3.8018, + "step": 1888000 + }, + { + "epoch": 20.97, + "learning_rate": 7.256846500677432e-08, + "loss": 3.8093, + "step": 1888500 + }, + { + "epoch": 20.98, + "learning_rate": 7.255458321302446e-08, + "loss": 3.8011, + "step": 1889000 + }, + { + "epoch": 20.98, + "learning_rate": 7.254070141927459e-08, + "loss": 3.8015, + "step": 1889500 + }, + { + "epoch": 20.99, + "learning_rate": 7.252681962552473e-08, + "loss": 3.7908, + "step": 1890000 + }, + { + "epoch": 20.99, + "learning_rate": 7.251293783177487e-08, + "loss": 3.7689, + "step": 1890500 + }, + { + "epoch": 21.0, + "eval_loss": 3.8509271144866943, + "eval_runtime": 6.3078, + "eval_samples_per_second": 246.362, + "step": 1890966 + }, + { + "epoch": 21.0, + "learning_rate": 7.249905603802501e-08, + "loss": 3.7892, + "step": 1891000 + }, + { + "epoch": 21.01, + "learning_rate": 7.248517424427514e-08, + "loss": 3.7717, + "step": 1891500 + }, + { + "epoch": 21.01, + "learning_rate": 7.247129245052529e-08, + "loss": 3.812, + "step": 1892000 + }, + { + "epoch": 21.02, + "learning_rate": 7.245741065677541e-08, + "loss": 3.8008, + "step": 1892500 + }, + { + "epoch": 21.02, + "learning_rate": 7.244352886302556e-08, + "loss": 3.8002, + "step": 1893000 + }, + { + "epoch": 21.03, + "learning_rate": 7.24296470692757e-08, + "loss": 3.7934, + "step": 1893500 + }, + { + "epoch": 21.03, + "learning_rate": 7.241576527552584e-08, + "loss": 3.7885, + "step": 1894000 + }, + { + "epoch": 21.04, + "learning_rate": 7.240188348177598e-08, + "loss": 3.796, + "step": 1894500 + }, + { + "epoch": 21.04, + "learning_rate": 7.238800168802611e-08, + "loss": 3.784, + "step": 1895000 + }, + { + "epoch": 21.05, + "learning_rate": 7.237411989427625e-08, + "loss": 3.8008, + "step": 1895500 + }, + { + "epoch": 21.06, + "learning_rate": 7.23602381005264e-08, + "loss": 3.7905, + "step": 1896000 + }, + { + "epoch": 21.06, + "learning_rate": 7.234635630677654e-08, + "loss": 3.8028, + "step": 1896500 + }, + { + "epoch": 21.07, + "learning_rate": 7.233247451302668e-08, + "loss": 3.7682, + "step": 1897000 + }, + { + "epoch": 21.07, + "learning_rate": 7.231859271927681e-08, + "loss": 3.801, + "step": 1897500 + }, + { + "epoch": 21.08, + "learning_rate": 7.230471092552695e-08, + "loss": 3.7704, + "step": 1898000 + }, + { + "epoch": 21.08, + "learning_rate": 7.229082913177708e-08, + "loss": 3.7966, + "step": 1898500 + }, + { + "epoch": 21.09, + "learning_rate": 7.227694733802722e-08, + "loss": 3.7775, + "step": 1899000 + }, + { + "epoch": 21.09, + "learning_rate": 7.226306554427737e-08, + "loss": 3.7871, + "step": 1899500 + }, + { + "epoch": 21.1, + "learning_rate": 7.224918375052751e-08, + "loss": 3.7854, + "step": 1900000 + }, + { + "epoch": 21.11, + "learning_rate": 7.223530195677764e-08, + "loss": 3.7765, + "step": 1900500 + }, + { + "epoch": 21.11, + "learning_rate": 7.222142016302778e-08, + "loss": 3.8002, + "step": 1901000 + }, + { + "epoch": 21.12, + "learning_rate": 7.220753836927792e-08, + "loss": 3.7949, + "step": 1901500 + }, + { + "epoch": 21.12, + "learning_rate": 7.219365657552806e-08, + "loss": 3.7935, + "step": 1902000 + }, + { + "epoch": 21.13, + "learning_rate": 7.21797747817782e-08, + "loss": 3.8165, + "step": 1902500 + }, + { + "epoch": 21.13, + "learning_rate": 7.216589298802835e-08, + "loss": 3.7842, + "step": 1903000 + }, + { + "epoch": 21.14, + "learning_rate": 7.215201119427848e-08, + "loss": 3.8086, + "step": 1903500 + }, + { + "epoch": 21.14, + "learning_rate": 7.213812940052861e-08, + "loss": 3.7924, + "step": 1904000 + }, + { + "epoch": 21.15, + "learning_rate": 7.212424760677875e-08, + "loss": 3.7946, + "step": 1904500 + }, + { + "epoch": 21.16, + "learning_rate": 7.211036581302889e-08, + "loss": 3.8132, + "step": 1905000 + }, + { + "epoch": 21.16, + "learning_rate": 7.209648401927903e-08, + "loss": 3.8024, + "step": 1905500 + }, + { + "epoch": 21.17, + "learning_rate": 7.208260222552916e-08, + "loss": 3.7942, + "step": 1906000 + }, + { + "epoch": 21.17, + "learning_rate": 7.20687204317793e-08, + "loss": 3.8022, + "step": 1906500 + }, + { + "epoch": 21.18, + "learning_rate": 7.205483863802945e-08, + "loss": 3.7964, + "step": 1907000 + }, + { + "epoch": 21.18, + "learning_rate": 7.204095684427959e-08, + "loss": 3.7667, + "step": 1907500 + }, + { + "epoch": 21.19, + "learning_rate": 7.202707505052973e-08, + "loss": 3.7926, + "step": 1908000 + }, + { + "epoch": 21.19, + "learning_rate": 7.201319325677987e-08, + "loss": 3.7961, + "step": 1908500 + }, + { + "epoch": 21.2, + "learning_rate": 7.199931146303e-08, + "loss": 3.8026, + "step": 1909000 + }, + { + "epoch": 21.21, + "learning_rate": 7.198542966928015e-08, + "loss": 3.7988, + "step": 1909500 + }, + { + "epoch": 21.21, + "learning_rate": 7.197154787553027e-08, + "loss": 3.7841, + "step": 1910000 + }, + { + "epoch": 21.22, + "learning_rate": 7.195766608178042e-08, + "loss": 3.7996, + "step": 1910500 + }, + { + "epoch": 21.22, + "learning_rate": 7.194378428803056e-08, + "loss": 3.797, + "step": 1911000 + }, + { + "epoch": 21.23, + "learning_rate": 7.19299024942807e-08, + "loss": 3.7973, + "step": 1911500 + }, + { + "epoch": 21.23, + "learning_rate": 7.191602070053083e-08, + "loss": 3.8092, + "step": 1912000 + }, + { + "epoch": 21.24, + "learning_rate": 7.190213890678097e-08, + "loss": 3.7942, + "step": 1912500 + }, + { + "epoch": 21.24, + "learning_rate": 7.188825711303111e-08, + "loss": 3.7885, + "step": 1913000 + }, + { + "epoch": 21.25, + "learning_rate": 7.187437531928126e-08, + "loss": 3.7892, + "step": 1913500 + }, + { + "epoch": 21.26, + "learning_rate": 7.18604935255314e-08, + "loss": 3.7898, + "step": 1914000 + }, + { + "epoch": 21.26, + "learning_rate": 7.184661173178153e-08, + "loss": 3.7825, + "step": 1914500 + }, + { + "epoch": 21.27, + "learning_rate": 7.183272993803167e-08, + "loss": 3.7905, + "step": 1915000 + }, + { + "epoch": 21.27, + "learning_rate": 7.181884814428181e-08, + "loss": 3.7796, + "step": 1915500 + }, + { + "epoch": 21.28, + "learning_rate": 7.180496635053194e-08, + "loss": 3.8, + "step": 1916000 + }, + { + "epoch": 21.28, + "learning_rate": 7.179108455678208e-08, + "loss": 3.7928, + "step": 1916500 + }, + { + "epoch": 21.29, + "learning_rate": 7.177720276303223e-08, + "loss": 3.7943, + "step": 1917000 + }, + { + "epoch": 21.29, + "learning_rate": 7.176332096928236e-08, + "loss": 3.7971, + "step": 1917500 + }, + { + "epoch": 21.3, + "learning_rate": 7.17494391755325e-08, + "loss": 3.7886, + "step": 1918000 + }, + { + "epoch": 21.31, + "learning_rate": 7.173555738178264e-08, + "loss": 3.7806, + "step": 1918500 + }, + { + "epoch": 21.31, + "learning_rate": 7.172167558803278e-08, + "loss": 3.7901, + "step": 1919000 + }, + { + "epoch": 21.32, + "learning_rate": 7.170779379428292e-08, + "loss": 3.7816, + "step": 1919500 + }, + { + "epoch": 21.32, + "learning_rate": 7.169391200053305e-08, + "loss": 3.8032, + "step": 1920000 + }, + { + "epoch": 21.33, + "learning_rate": 7.16800302067832e-08, + "loss": 3.7795, + "step": 1920500 + }, + { + "epoch": 21.33, + "learning_rate": 7.166614841303334e-08, + "loss": 3.7688, + "step": 1921000 + }, + { + "epoch": 21.34, + "learning_rate": 7.165226661928348e-08, + "loss": 3.7695, + "step": 1921500 + }, + { + "epoch": 21.34, + "learning_rate": 7.163838482553361e-08, + "loss": 3.8006, + "step": 1922000 + }, + { + "epoch": 21.35, + "learning_rate": 7.162450303178375e-08, + "loss": 3.8097, + "step": 1922500 + }, + { + "epoch": 21.36, + "learning_rate": 7.161062123803388e-08, + "loss": 3.7796, + "step": 1923000 + }, + { + "epoch": 21.36, + "learning_rate": 7.159673944428402e-08, + "loss": 3.7949, + "step": 1923500 + }, + { + "epoch": 21.37, + "learning_rate": 7.158285765053417e-08, + "loss": 3.776, + "step": 1924000 + }, + { + "epoch": 21.37, + "learning_rate": 7.156897585678431e-08, + "loss": 3.78, + "step": 1924500 + }, + { + "epoch": 21.38, + "learning_rate": 7.155509406303445e-08, + "loss": 3.7884, + "step": 1925000 + }, + { + "epoch": 21.38, + "learning_rate": 7.154121226928459e-08, + "loss": 3.803, + "step": 1925500 + }, + { + "epoch": 21.39, + "learning_rate": 7.152733047553472e-08, + "loss": 3.786, + "step": 1926000 + }, + { + "epoch": 21.39, + "learning_rate": 7.151344868178486e-08, + "loss": 3.7773, + "step": 1926500 + }, + { + "epoch": 21.4, + "learning_rate": 7.1499566888035e-08, + "loss": 3.798, + "step": 1927000 + }, + { + "epoch": 21.41, + "learning_rate": 7.148568509428515e-08, + "loss": 3.7925, + "step": 1927500 + }, + { + "epoch": 21.41, + "learning_rate": 7.147180330053528e-08, + "loss": 3.7903, + "step": 1928000 + }, + { + "epoch": 21.42, + "learning_rate": 7.145792150678542e-08, + "loss": 3.7757, + "step": 1928500 + }, + { + "epoch": 21.42, + "learning_rate": 7.144403971303555e-08, + "loss": 3.7916, + "step": 1929000 + }, + { + "epoch": 21.43, + "learning_rate": 7.143015791928569e-08, + "loss": 3.7897, + "step": 1929500 + }, + { + "epoch": 21.43, + "learning_rate": 7.141627612553583e-08, + "loss": 3.789, + "step": 1930000 + }, + { + "epoch": 21.44, + "learning_rate": 7.140239433178597e-08, + "loss": 3.8001, + "step": 1930500 + }, + { + "epoch": 21.44, + "learning_rate": 7.138851253803612e-08, + "loss": 3.7804, + "step": 1931000 + }, + { + "epoch": 21.45, + "learning_rate": 7.137463074428625e-08, + "loss": 3.7953, + "step": 1931500 + }, + { + "epoch": 21.46, + "learning_rate": 7.136074895053639e-08, + "loss": 3.7868, + "step": 1932000 + }, + { + "epoch": 21.46, + "learning_rate": 7.134686715678653e-08, + "loss": 3.7864, + "step": 1932500 + }, + { + "epoch": 21.47, + "learning_rate": 7.133298536303667e-08, + "loss": 3.8067, + "step": 1933000 + }, + { + "epoch": 21.47, + "learning_rate": 7.131910356928682e-08, + "loss": 3.7949, + "step": 1933500 + }, + { + "epoch": 21.48, + "learning_rate": 7.130522177553694e-08, + "loss": 3.8007, + "step": 1934000 + }, + { + "epoch": 21.48, + "learning_rate": 7.129133998178707e-08, + "loss": 3.7754, + "step": 1934500 + }, + { + "epoch": 21.49, + "learning_rate": 7.127745818803722e-08, + "loss": 3.8005, + "step": 1935000 + }, + { + "epoch": 21.49, + "learning_rate": 7.126357639428736e-08, + "loss": 3.7777, + "step": 1935500 + }, + { + "epoch": 21.5, + "learning_rate": 7.12496946005375e-08, + "loss": 3.7963, + "step": 1936000 + }, + { + "epoch": 21.51, + "learning_rate": 7.123581280678764e-08, + "loss": 3.7851, + "step": 1936500 + }, + { + "epoch": 21.51, + "learning_rate": 7.122193101303777e-08, + "loss": 3.7674, + "step": 1937000 + }, + { + "epoch": 21.52, + "learning_rate": 7.120804921928791e-08, + "loss": 3.8032, + "step": 1937500 + }, + { + "epoch": 21.52, + "learning_rate": 7.119416742553806e-08, + "loss": 3.7782, + "step": 1938000 + }, + { + "epoch": 21.53, + "learning_rate": 7.11802856317882e-08, + "loss": 3.7864, + "step": 1938500 + }, + { + "epoch": 21.53, + "learning_rate": 7.116640383803834e-08, + "loss": 3.789, + "step": 1939000 + }, + { + "epoch": 21.54, + "learning_rate": 7.115252204428848e-08, + "loss": 3.7701, + "step": 1939500 + }, + { + "epoch": 21.54, + "learning_rate": 7.113864025053861e-08, + "loss": 3.7921, + "step": 1940000 + }, + { + "epoch": 21.55, + "learning_rate": 7.112475845678874e-08, + "loss": 3.7854, + "step": 1940500 + }, + { + "epoch": 21.56, + "learning_rate": 7.111087666303888e-08, + "loss": 3.7882, + "step": 1941000 + }, + { + "epoch": 21.56, + "learning_rate": 7.109699486928903e-08, + "loss": 3.7841, + "step": 1941500 + }, + { + "epoch": 21.57, + "learning_rate": 7.108311307553917e-08, + "loss": 3.7935, + "step": 1942000 + }, + { + "epoch": 21.57, + "learning_rate": 7.10692312817893e-08, + "loss": 3.7878, + "step": 1942500 + }, + { + "epoch": 21.58, + "learning_rate": 7.105534948803944e-08, + "loss": 3.787, + "step": 1943000 + }, + { + "epoch": 21.58, + "learning_rate": 7.104146769428958e-08, + "loss": 3.8077, + "step": 1943500 + }, + { + "epoch": 21.59, + "learning_rate": 7.102758590053972e-08, + "loss": 3.7806, + "step": 1944000 + }, + { + "epoch": 21.59, + "learning_rate": 7.101370410678987e-08, + "loss": 3.7905, + "step": 1944500 + }, + { + "epoch": 21.6, + "learning_rate": 7.099982231304001e-08, + "loss": 3.7657, + "step": 1945000 + }, + { + "epoch": 21.61, + "learning_rate": 7.098594051929014e-08, + "loss": 3.796, + "step": 1945500 + }, + { + "epoch": 21.61, + "learning_rate": 7.097205872554028e-08, + "loss": 3.7807, + "step": 1946000 + }, + { + "epoch": 21.62, + "learning_rate": 7.095817693179041e-08, + "loss": 3.7795, + "step": 1946500 + }, + { + "epoch": 21.62, + "learning_rate": 7.094429513804055e-08, + "loss": 3.8025, + "step": 1947000 + }, + { + "epoch": 21.63, + "learning_rate": 7.093041334429069e-08, + "loss": 3.8012, + "step": 1947500 + }, + { + "epoch": 21.63, + "learning_rate": 7.091653155054083e-08, + "loss": 3.79, + "step": 1948000 + }, + { + "epoch": 21.64, + "learning_rate": 7.090264975679096e-08, + "loss": 3.7857, + "step": 1948500 + }, + { + "epoch": 21.64, + "learning_rate": 7.08887679630411e-08, + "loss": 3.7686, + "step": 1949000 + }, + { + "epoch": 21.65, + "learning_rate": 7.087488616929125e-08, + "loss": 3.7948, + "step": 1949500 + }, + { + "epoch": 21.66, + "learning_rate": 7.086100437554139e-08, + "loss": 3.7862, + "step": 1950000 + }, + { + "epoch": 21.66, + "learning_rate": 7.084712258179153e-08, + "loss": 3.7964, + "step": 1950500 + }, + { + "epoch": 21.67, + "learning_rate": 7.083324078804166e-08, + "loss": 3.7797, + "step": 1951000 + }, + { + "epoch": 21.67, + "learning_rate": 7.08193589942918e-08, + "loss": 3.8038, + "step": 1951500 + }, + { + "epoch": 21.68, + "learning_rate": 7.080547720054195e-08, + "loss": 3.7925, + "step": 1952000 + }, + { + "epoch": 21.68, + "learning_rate": 7.079159540679208e-08, + "loss": 3.7775, + "step": 1952500 + }, + { + "epoch": 21.69, + "learning_rate": 7.077771361304222e-08, + "loss": 3.788, + "step": 1953000 + }, + { + "epoch": 21.69, + "learning_rate": 7.076383181929236e-08, + "loss": 3.7817, + "step": 1953500 + }, + { + "epoch": 21.7, + "learning_rate": 7.074995002554249e-08, + "loss": 3.7919, + "step": 1954000 + }, + { + "epoch": 21.71, + "learning_rate": 7.073606823179263e-08, + "loss": 3.761, + "step": 1954500 + }, + { + "epoch": 21.71, + "learning_rate": 7.072218643804277e-08, + "loss": 3.8048, + "step": 1955000 + }, + { + "epoch": 21.72, + "learning_rate": 7.070830464429292e-08, + "loss": 3.801, + "step": 1955500 + }, + { + "epoch": 21.72, + "learning_rate": 7.069442285054306e-08, + "loss": 3.7954, + "step": 1956000 + }, + { + "epoch": 21.73, + "learning_rate": 7.068054105679319e-08, + "loss": 3.7729, + "step": 1956500 + }, + { + "epoch": 21.73, + "learning_rate": 7.066665926304333e-08, + "loss": 3.7781, + "step": 1957000 + }, + { + "epoch": 21.74, + "learning_rate": 7.065277746929347e-08, + "loss": 3.7812, + "step": 1957500 + }, + { + "epoch": 21.74, + "learning_rate": 7.063889567554361e-08, + "loss": 3.7885, + "step": 1958000 + }, + { + "epoch": 21.75, + "learning_rate": 7.062501388179374e-08, + "loss": 3.7942, + "step": 1958500 + }, + { + "epoch": 21.76, + "learning_rate": 7.061113208804389e-08, + "loss": 3.7899, + "step": 1959000 + }, + { + "epoch": 21.76, + "learning_rate": 7.059725029429401e-08, + "loss": 3.7894, + "step": 1959500 + }, + { + "epoch": 21.77, + "learning_rate": 7.058336850054416e-08, + "loss": 3.8002, + "step": 1960000 + }, + { + "epoch": 21.77, + "learning_rate": 7.05694867067943e-08, + "loss": 3.7883, + "step": 1960500 + }, + { + "epoch": 21.78, + "learning_rate": 7.055560491304444e-08, + "loss": 3.8024, + "step": 1961000 + }, + { + "epoch": 21.78, + "learning_rate": 7.054172311929458e-08, + "loss": 3.8021, + "step": 1961500 + }, + { + "epoch": 21.79, + "learning_rate": 7.052784132554473e-08, + "loss": 3.7879, + "step": 1962000 + }, + { + "epoch": 21.79, + "learning_rate": 7.051395953179485e-08, + "loss": 3.7841, + "step": 1962500 + }, + { + "epoch": 21.8, + "learning_rate": 7.0500077738045e-08, + "loss": 3.8009, + "step": 1963000 + }, + { + "epoch": 21.81, + "learning_rate": 7.048619594429514e-08, + "loss": 3.8139, + "step": 1963500 + }, + { + "epoch": 21.81, + "learning_rate": 7.047231415054528e-08, + "loss": 3.7907, + "step": 1964000 + }, + { + "epoch": 21.82, + "learning_rate": 7.045843235679541e-08, + "loss": 3.7904, + "step": 1964500 + }, + { + "epoch": 21.82, + "learning_rate": 7.044455056304554e-08, + "loss": 3.7757, + "step": 1965000 + }, + { + "epoch": 21.83, + "learning_rate": 7.043066876929568e-08, + "loss": 3.7891, + "step": 1965500 + }, + { + "epoch": 21.83, + "learning_rate": 7.041678697554582e-08, + "loss": 3.7992, + "step": 1966000 + }, + { + "epoch": 21.84, + "learning_rate": 7.040290518179597e-08, + "loss": 3.7669, + "step": 1966500 + }, + { + "epoch": 21.84, + "learning_rate": 7.038902338804611e-08, + "loss": 3.809, + "step": 1967000 + }, + { + "epoch": 21.85, + "learning_rate": 7.037514159429625e-08, + "loss": 3.7909, + "step": 1967500 + }, + { + "epoch": 21.86, + "learning_rate": 7.036125980054638e-08, + "loss": 3.7907, + "step": 1968000 + }, + { + "epoch": 21.86, + "learning_rate": 7.034737800679652e-08, + "loss": 3.7807, + "step": 1968500 + }, + { + "epoch": 21.87, + "learning_rate": 7.033349621304666e-08, + "loss": 3.787, + "step": 1969000 + }, + { + "epoch": 21.87, + "learning_rate": 7.031961441929681e-08, + "loss": 3.7761, + "step": 1969500 + }, + { + "epoch": 21.88, + "learning_rate": 7.030573262554695e-08, + "loss": 3.779, + "step": 1970000 + }, + { + "epoch": 21.88, + "learning_rate": 7.029185083179708e-08, + "loss": 3.8079, + "step": 1970500 + }, + { + "epoch": 21.89, + "learning_rate": 7.027796903804721e-08, + "loss": 3.7868, + "step": 1971000 + }, + { + "epoch": 21.89, + "learning_rate": 7.026408724429735e-08, + "loss": 3.7868, + "step": 1971500 + }, + { + "epoch": 21.9, + "learning_rate": 7.025020545054749e-08, + "loss": 3.7996, + "step": 1972000 + }, + { + "epoch": 21.91, + "learning_rate": 7.023632365679763e-08, + "loss": 3.7828, + "step": 1972500 + }, + { + "epoch": 21.91, + "learning_rate": 7.022244186304778e-08, + "loss": 3.7873, + "step": 1973000 + }, + { + "epoch": 21.92, + "learning_rate": 7.02085600692979e-08, + "loss": 3.7874, + "step": 1973500 + }, + { + "epoch": 21.92, + "learning_rate": 7.019467827554805e-08, + "loss": 3.7938, + "step": 1974000 + }, + { + "epoch": 21.93, + "learning_rate": 7.018079648179819e-08, + "loss": 3.7869, + "step": 1974500 + }, + { + "epoch": 21.93, + "learning_rate": 7.016691468804833e-08, + "loss": 3.798, + "step": 1975000 + }, + { + "epoch": 21.94, + "learning_rate": 7.015303289429847e-08, + "loss": 3.7919, + "step": 1975500 + }, + { + "epoch": 21.94, + "learning_rate": 7.013915110054862e-08, + "loss": 3.7861, + "step": 1976000 + }, + { + "epoch": 21.95, + "learning_rate": 7.012526930679875e-08, + "loss": 3.795, + "step": 1976500 + }, + { + "epoch": 21.96, + "learning_rate": 7.011138751304887e-08, + "loss": 3.7829, + "step": 1977000 + }, + { + "epoch": 21.96, + "learning_rate": 7.009750571929902e-08, + "loss": 3.8138, + "step": 1977500 + }, + { + "epoch": 21.97, + "learning_rate": 7.008362392554916e-08, + "loss": 3.7756, + "step": 1978000 + }, + { + "epoch": 21.97, + "learning_rate": 7.00697421317993e-08, + "loss": 3.7906, + "step": 1978500 + }, + { + "epoch": 21.98, + "learning_rate": 7.005586033804943e-08, + "loss": 3.79, + "step": 1979000 + }, + { + "epoch": 21.98, + "learning_rate": 7.004197854429957e-08, + "loss": 3.8046, + "step": 1979500 + }, + { + "epoch": 21.99, + "learning_rate": 7.002809675054971e-08, + "loss": 3.7864, + "step": 1980000 + }, + { + "epoch": 21.99, + "learning_rate": 7.001421495679986e-08, + "loss": 3.7936, + "step": 1980500 + }, + { + "epoch": 22.0, + "learning_rate": 7.000033316305e-08, + "loss": 3.7851, + "step": 1981000 + }, + { + "epoch": 22.0, + "eval_loss": 3.8484787940979004, + "eval_runtime": 6.3174, + "eval_samples_per_second": 245.986, + "step": 1981012 + }, + { + "epoch": 22.01, + "learning_rate": 6.998645136930014e-08, + "loss": 3.7871, + "step": 1981500 + }, + { + "epoch": 22.01, + "learning_rate": 6.997256957555027e-08, + "loss": 3.7924, + "step": 1982000 + }, + { + "epoch": 22.02, + "learning_rate": 6.995868778180041e-08, + "loss": 3.7953, + "step": 1982500 + }, + { + "epoch": 22.02, + "learning_rate": 6.994480598805054e-08, + "loss": 3.7907, + "step": 1983000 + }, + { + "epoch": 22.03, + "learning_rate": 6.993092419430068e-08, + "loss": 3.7666, + "step": 1983500 + }, + { + "epoch": 22.03, + "learning_rate": 6.991704240055083e-08, + "loss": 3.7855, + "step": 1984000 + }, + { + "epoch": 22.04, + "learning_rate": 6.990316060680097e-08, + "loss": 3.7738, + "step": 1984500 + }, + { + "epoch": 22.04, + "learning_rate": 6.98892788130511e-08, + "loss": 3.7925, + "step": 1985000 + }, + { + "epoch": 22.05, + "learning_rate": 6.987539701930124e-08, + "loss": 3.7909, + "step": 1985500 + }, + { + "epoch": 22.06, + "learning_rate": 6.986151522555138e-08, + "loss": 3.8002, + "step": 1986000 + }, + { + "epoch": 22.06, + "learning_rate": 6.984763343180152e-08, + "loss": 3.7835, + "step": 1986500 + }, + { + "epoch": 22.07, + "learning_rate": 6.983375163805167e-08, + "loss": 3.7926, + "step": 1987000 + }, + { + "epoch": 22.07, + "learning_rate": 6.98198698443018e-08, + "loss": 3.7992, + "step": 1987500 + }, + { + "epoch": 22.08, + "learning_rate": 6.980598805055194e-08, + "loss": 3.7971, + "step": 1988000 + }, + { + "epoch": 22.08, + "learning_rate": 6.979210625680208e-08, + "loss": 3.7985, + "step": 1988500 + }, + { + "epoch": 22.09, + "learning_rate": 6.977822446305221e-08, + "loss": 3.7874, + "step": 1989000 + }, + { + "epoch": 22.09, + "learning_rate": 6.976434266930235e-08, + "loss": 3.7873, + "step": 1989500 + }, + { + "epoch": 22.1, + "learning_rate": 6.97504608755525e-08, + "loss": 3.7859, + "step": 1990000 + }, + { + "epoch": 22.11, + "learning_rate": 6.973657908180262e-08, + "loss": 3.8092, + "step": 1990500 + }, + { + "epoch": 22.11, + "learning_rate": 6.972269728805277e-08, + "loss": 3.7872, + "step": 1991000 + }, + { + "epoch": 22.12, + "learning_rate": 6.970881549430291e-08, + "loss": 3.7697, + "step": 1991500 + }, + { + "epoch": 22.12, + "learning_rate": 6.969493370055305e-08, + "loss": 3.7745, + "step": 1992000 + }, + { + "epoch": 22.13, + "learning_rate": 6.968105190680319e-08, + "loss": 3.7949, + "step": 1992500 + }, + { + "epoch": 22.13, + "learning_rate": 6.966717011305333e-08, + "loss": 3.7931, + "step": 1993000 + }, + { + "epoch": 22.14, + "learning_rate": 6.965328831930346e-08, + "loss": 3.7968, + "step": 1993500 + }, + { + "epoch": 22.14, + "learning_rate": 6.96394065255536e-08, + "loss": 3.7767, + "step": 1994000 + }, + { + "epoch": 22.15, + "learning_rate": 6.962552473180375e-08, + "loss": 3.8058, + "step": 1994500 + }, + { + "epoch": 22.16, + "learning_rate": 6.961164293805388e-08, + "loss": 3.7957, + "step": 1995000 + }, + { + "epoch": 22.16, + "learning_rate": 6.959776114430402e-08, + "loss": 3.7939, + "step": 1995500 + }, + { + "epoch": 22.17, + "learning_rate": 6.958387935055415e-08, + "loss": 3.7841, + "step": 1996000 + }, + { + "epoch": 22.17, + "learning_rate": 6.956999755680429e-08, + "loss": 3.7881, + "step": 1996500 + }, + { + "epoch": 22.18, + "learning_rate": 6.955611576305443e-08, + "loss": 3.7743, + "step": 1997000 + }, + { + "epoch": 22.18, + "learning_rate": 6.954223396930457e-08, + "loss": 3.8086, + "step": 1997500 + }, + { + "epoch": 22.19, + "learning_rate": 6.952835217555472e-08, + "loss": 3.7994, + "step": 1998000 + }, + { + "epoch": 22.19, + "learning_rate": 6.951447038180486e-08, + "loss": 3.8022, + "step": 1998500 + }, + { + "epoch": 22.2, + "learning_rate": 6.950058858805499e-08, + "loss": 3.7674, + "step": 1999000 + }, + { + "epoch": 22.21, + "learning_rate": 6.948670679430513e-08, + "loss": 3.7945, + "step": 1999500 + }, + { + "epoch": 22.21, + "learning_rate": 6.947282500055527e-08, + "loss": 3.7764, + "step": 2000000 + }, + { + "epoch": 22.22, + "learning_rate": 6.945894320680542e-08, + "loss": 3.7923, + "step": 2000500 + }, + { + "epoch": 22.22, + "learning_rate": 6.944506141305554e-08, + "loss": 3.7838, + "step": 2001000 + }, + { + "epoch": 22.23, + "learning_rate": 6.943117961930567e-08, + "loss": 3.7779, + "step": 2001500 + }, + { + "epoch": 22.23, + "learning_rate": 6.941729782555582e-08, + "loss": 3.7668, + "step": 2002000 + }, + { + "epoch": 22.24, + "learning_rate": 6.940341603180596e-08, + "loss": 3.7736, + "step": 2002500 + }, + { + "epoch": 22.24, + "learning_rate": 6.93895342380561e-08, + "loss": 3.7946, + "step": 2003000 + }, + { + "epoch": 22.25, + "learning_rate": 6.937565244430624e-08, + "loss": 3.8017, + "step": 2003500 + }, + { + "epoch": 22.26, + "learning_rate": 6.936177065055638e-08, + "loss": 3.79, + "step": 2004000 + }, + { + "epoch": 22.26, + "learning_rate": 6.934788885680651e-08, + "loss": 3.7758, + "step": 2004500 + }, + { + "epoch": 22.27, + "learning_rate": 6.933400706305666e-08, + "loss": 3.7753, + "step": 2005000 + }, + { + "epoch": 22.27, + "learning_rate": 6.93201252693068e-08, + "loss": 3.7827, + "step": 2005500 + }, + { + "epoch": 22.28, + "learning_rate": 6.930624347555694e-08, + "loss": 3.7772, + "step": 2006000 + }, + { + "epoch": 22.28, + "learning_rate": 6.929236168180708e-08, + "loss": 3.7935, + "step": 2006500 + }, + { + "epoch": 22.29, + "learning_rate": 6.927847988805721e-08, + "loss": 3.7779, + "step": 2007000 + }, + { + "epoch": 22.29, + "learning_rate": 6.926459809430734e-08, + "loss": 3.796, + "step": 2007500 + }, + { + "epoch": 22.3, + "learning_rate": 6.925071630055748e-08, + "loss": 3.762, + "step": 2008000 + }, + { + "epoch": 22.31, + "learning_rate": 6.923683450680763e-08, + "loss": 3.7926, + "step": 2008500 + }, + { + "epoch": 22.31, + "learning_rate": 6.922295271305777e-08, + "loss": 3.7823, + "step": 2009000 + }, + { + "epoch": 22.32, + "learning_rate": 6.920907091930791e-08, + "loss": 3.802, + "step": 2009500 + }, + { + "epoch": 22.32, + "learning_rate": 6.919518912555804e-08, + "loss": 3.7753, + "step": 2010000 + }, + { + "epoch": 22.33, + "learning_rate": 6.918130733180818e-08, + "loss": 3.7734, + "step": 2010500 + }, + { + "epoch": 22.33, + "learning_rate": 6.916742553805832e-08, + "loss": 3.7947, + "step": 2011000 + }, + { + "epoch": 22.34, + "learning_rate": 6.915354374430847e-08, + "loss": 3.7967, + "step": 2011500 + }, + { + "epoch": 22.34, + "learning_rate": 6.913966195055861e-08, + "loss": 3.7954, + "step": 2012000 + }, + { + "epoch": 22.35, + "learning_rate": 6.912578015680875e-08, + "loss": 3.7812, + "step": 2012500 + }, + { + "epoch": 22.36, + "learning_rate": 6.911189836305888e-08, + "loss": 3.7872, + "step": 2013000 + }, + { + "epoch": 22.36, + "learning_rate": 6.909801656930901e-08, + "loss": 3.7915, + "step": 2013500 + }, + { + "epoch": 22.37, + "learning_rate": 6.908413477555915e-08, + "loss": 3.7899, + "step": 2014000 + }, + { + "epoch": 22.37, + "learning_rate": 6.907025298180929e-08, + "loss": 3.7875, + "step": 2014500 + }, + { + "epoch": 22.38, + "learning_rate": 6.905637118805944e-08, + "loss": 3.7868, + "step": 2015000 + }, + { + "epoch": 22.38, + "learning_rate": 6.904248939430956e-08, + "loss": 3.8169, + "step": 2015500 + }, + { + "epoch": 22.39, + "learning_rate": 6.90286076005597e-08, + "loss": 3.7773, + "step": 2016000 + }, + { + "epoch": 22.39, + "learning_rate": 6.901472580680985e-08, + "loss": 3.7923, + "step": 2016500 + }, + { + "epoch": 22.4, + "learning_rate": 6.900084401305999e-08, + "loss": 3.7977, + "step": 2017000 + }, + { + "epoch": 22.41, + "learning_rate": 6.898696221931013e-08, + "loss": 3.7897, + "step": 2017500 + }, + { + "epoch": 22.41, + "learning_rate": 6.897308042556028e-08, + "loss": 3.7929, + "step": 2018000 + }, + { + "epoch": 22.42, + "learning_rate": 6.89591986318104e-08, + "loss": 3.8075, + "step": 2018500 + }, + { + "epoch": 22.42, + "learning_rate": 6.894531683806055e-08, + "loss": 3.7867, + "step": 2019000 + }, + { + "epoch": 22.43, + "learning_rate": 6.893143504431068e-08, + "loss": 3.7928, + "step": 2019500 + }, + { + "epoch": 22.43, + "learning_rate": 6.891755325056082e-08, + "loss": 3.7794, + "step": 2020000 + }, + { + "epoch": 22.44, + "learning_rate": 6.890367145681096e-08, + "loss": 3.7838, + "step": 2020500 + }, + { + "epoch": 22.44, + "learning_rate": 6.88897896630611e-08, + "loss": 3.7674, + "step": 2021000 + }, + { + "epoch": 22.45, + "learning_rate": 6.887590786931123e-08, + "loss": 3.7865, + "step": 2021500 + }, + { + "epoch": 22.46, + "learning_rate": 6.886202607556137e-08, + "loss": 3.7721, + "step": 2022000 + }, + { + "epoch": 22.46, + "learning_rate": 6.884814428181152e-08, + "loss": 3.7869, + "step": 2022500 + }, + { + "epoch": 22.47, + "learning_rate": 6.883426248806166e-08, + "loss": 3.785, + "step": 2023000 + }, + { + "epoch": 22.47, + "learning_rate": 6.88203806943118e-08, + "loss": 3.7929, + "step": 2023500 + }, + { + "epoch": 22.48, + "learning_rate": 6.880649890056193e-08, + "loss": 3.7761, + "step": 2024000 + }, + { + "epoch": 22.48, + "learning_rate": 6.879261710681207e-08, + "loss": 3.7886, + "step": 2024500 + }, + { + "epoch": 22.49, + "learning_rate": 6.877873531306221e-08, + "loss": 3.7965, + "step": 2025000 + }, + { + "epoch": 22.49, + "learning_rate": 6.876485351931234e-08, + "loss": 3.7879, + "step": 2025500 + }, + { + "epoch": 22.5, + "learning_rate": 6.875097172556249e-08, + "loss": 3.7966, + "step": 2026000 + }, + { + "epoch": 22.51, + "learning_rate": 6.873708993181263e-08, + "loss": 3.7729, + "step": 2026500 + }, + { + "epoch": 22.51, + "learning_rate": 6.872320813806276e-08, + "loss": 3.7856, + "step": 2027000 + }, + { + "epoch": 22.52, + "learning_rate": 6.87093263443129e-08, + "loss": 3.7729, + "step": 2027500 + }, + { + "epoch": 22.52, + "learning_rate": 6.869544455056304e-08, + "loss": 3.7935, + "step": 2028000 + }, + { + "epoch": 22.53, + "learning_rate": 6.868156275681318e-08, + "loss": 3.7977, + "step": 2028500 + }, + { + "epoch": 22.53, + "learning_rate": 6.866768096306333e-08, + "loss": 3.7955, + "step": 2029000 + }, + { + "epoch": 22.54, + "learning_rate": 6.865379916931347e-08, + "loss": 3.7826, + "step": 2029500 + }, + { + "epoch": 22.54, + "learning_rate": 6.86399173755636e-08, + "loss": 3.7743, + "step": 2030000 + }, + { + "epoch": 22.55, + "learning_rate": 6.862603558181374e-08, + "loss": 3.8023, + "step": 2030500 + }, + { + "epoch": 22.56, + "learning_rate": 6.861215378806388e-08, + "loss": 3.7918, + "step": 2031000 + }, + { + "epoch": 22.56, + "learning_rate": 6.859827199431401e-08, + "loss": 3.7863, + "step": 2031500 + }, + { + "epoch": 22.57, + "learning_rate": 6.858439020056415e-08, + "loss": 3.7613, + "step": 2032000 + }, + { + "epoch": 22.57, + "learning_rate": 6.857050840681428e-08, + "loss": 3.7797, + "step": 2032500 + }, + { + "epoch": 22.58, + "learning_rate": 6.855662661306442e-08, + "loss": 3.798, + "step": 2033000 + }, + { + "epoch": 22.58, + "learning_rate": 6.854274481931457e-08, + "loss": 3.7839, + "step": 2033500 + }, + { + "epoch": 22.59, + "learning_rate": 6.852886302556471e-08, + "loss": 3.7743, + "step": 2034000 + }, + { + "epoch": 22.59, + "learning_rate": 6.851498123181485e-08, + "loss": 3.7922, + "step": 2034500 + }, + { + "epoch": 22.6, + "learning_rate": 6.850109943806499e-08, + "loss": 3.7975, + "step": 2035000 + }, + { + "epoch": 22.61, + "learning_rate": 6.848721764431512e-08, + "loss": 3.8052, + "step": 2035500 + }, + { + "epoch": 22.61, + "learning_rate": 6.847333585056526e-08, + "loss": 3.7909, + "step": 2036000 + }, + { + "epoch": 22.62, + "learning_rate": 6.845945405681541e-08, + "loss": 3.7927, + "step": 2036500 + }, + { + "epoch": 22.62, + "learning_rate": 6.844557226306555e-08, + "loss": 3.7935, + "step": 2037000 + }, + { + "epoch": 22.63, + "learning_rate": 6.843169046931568e-08, + "loss": 3.7803, + "step": 2037500 + }, + { + "epoch": 22.63, + "learning_rate": 6.841780867556581e-08, + "loss": 3.7838, + "step": 2038000 + }, + { + "epoch": 22.64, + "learning_rate": 6.840392688181595e-08, + "loss": 3.7787, + "step": 2038500 + }, + { + "epoch": 22.64, + "learning_rate": 6.839004508806609e-08, + "loss": 3.7879, + "step": 2039000 + }, + { + "epoch": 22.65, + "learning_rate": 6.837616329431623e-08, + "loss": 3.7746, + "step": 2039500 + }, + { + "epoch": 22.66, + "learning_rate": 6.836228150056638e-08, + "loss": 3.784, + "step": 2040000 + }, + { + "epoch": 22.66, + "learning_rate": 6.834839970681652e-08, + "loss": 3.7763, + "step": 2040500 + }, + { + "epoch": 22.67, + "learning_rate": 6.833451791306665e-08, + "loss": 3.7761, + "step": 2041000 + }, + { + "epoch": 22.67, + "learning_rate": 6.832063611931679e-08, + "loss": 3.7993, + "step": 2041500 + }, + { + "epoch": 22.68, + "learning_rate": 6.830675432556693e-08, + "loss": 3.8021, + "step": 2042000 + }, + { + "epoch": 22.68, + "learning_rate": 6.829287253181707e-08, + "loss": 3.7867, + "step": 2042500 + }, + { + "epoch": 22.69, + "learning_rate": 6.827899073806722e-08, + "loss": 3.7863, + "step": 2043000 + }, + { + "epoch": 22.69, + "learning_rate": 6.826510894431735e-08, + "loss": 3.789, + "step": 2043500 + }, + { + "epoch": 22.7, + "learning_rate": 6.825122715056747e-08, + "loss": 3.7828, + "step": 2044000 + }, + { + "epoch": 22.71, + "learning_rate": 6.823734535681762e-08, + "loss": 3.7976, + "step": 2044500 + }, + { + "epoch": 22.71, + "learning_rate": 6.822346356306776e-08, + "loss": 3.7772, + "step": 2045000 + }, + { + "epoch": 22.72, + "learning_rate": 6.82095817693179e-08, + "loss": 3.7953, + "step": 2045500 + }, + { + "epoch": 22.72, + "learning_rate": 6.819569997556804e-08, + "loss": 3.7684, + "step": 2046000 + }, + { + "epoch": 22.73, + "learning_rate": 6.818181818181817e-08, + "loss": 3.7981, + "step": 2046500 + }, + { + "epoch": 22.73, + "learning_rate": 6.816793638806831e-08, + "loss": 3.7904, + "step": 2047000 + }, + { + "epoch": 22.74, + "learning_rate": 6.815405459431846e-08, + "loss": 3.7839, + "step": 2047500 + }, + { + "epoch": 22.74, + "learning_rate": 6.81401728005686e-08, + "loss": 3.7794, + "step": 2048000 + }, + { + "epoch": 22.75, + "learning_rate": 6.812629100681874e-08, + "loss": 3.78, + "step": 2048500 + }, + { + "epoch": 22.76, + "learning_rate": 6.811240921306888e-08, + "loss": 3.7841, + "step": 2049000 + }, + { + "epoch": 22.76, + "learning_rate": 6.809852741931901e-08, + "loss": 3.7841, + "step": 2049500 + }, + { + "epoch": 22.77, + "learning_rate": 6.808464562556914e-08, + "loss": 3.7819, + "step": 2050000 + }, + { + "epoch": 22.77, + "learning_rate": 6.807076383181928e-08, + "loss": 3.786, + "step": 2050500 + }, + { + "epoch": 22.78, + "learning_rate": 6.805688203806943e-08, + "loss": 3.8044, + "step": 2051000 + }, + { + "epoch": 22.78, + "learning_rate": 6.804300024431957e-08, + "loss": 3.7804, + "step": 2051500 + }, + { + "epoch": 22.79, + "learning_rate": 6.80291184505697e-08, + "loss": 3.7735, + "step": 2052000 + }, + { + "epoch": 22.79, + "learning_rate": 6.801523665681984e-08, + "loss": 3.8017, + "step": 2052500 + }, + { + "epoch": 22.8, + "learning_rate": 6.800135486306998e-08, + "loss": 3.7766, + "step": 2053000 + }, + { + "epoch": 22.81, + "learning_rate": 6.798747306932012e-08, + "loss": 3.7809, + "step": 2053500 + }, + { + "epoch": 22.81, + "learning_rate": 6.797359127557027e-08, + "loss": 3.7856, + "step": 2054000 + }, + { + "epoch": 22.82, + "learning_rate": 6.795970948182041e-08, + "loss": 3.7727, + "step": 2054500 + }, + { + "epoch": 22.82, + "learning_rate": 6.794582768807054e-08, + "loss": 3.798, + "step": 2055000 + }, + { + "epoch": 22.83, + "learning_rate": 6.793194589432068e-08, + "loss": 3.7815, + "step": 2055500 + }, + { + "epoch": 22.83, + "learning_rate": 6.791806410057081e-08, + "loss": 3.7905, + "step": 2056000 + }, + { + "epoch": 22.84, + "learning_rate": 6.790418230682095e-08, + "loss": 3.7875, + "step": 2056500 + }, + { + "epoch": 22.84, + "learning_rate": 6.78903005130711e-08, + "loss": 3.7908, + "step": 2057000 + }, + { + "epoch": 22.85, + "learning_rate": 6.787641871932124e-08, + "loss": 3.7915, + "step": 2057500 + }, + { + "epoch": 22.85, + "learning_rate": 6.786253692557137e-08, + "loss": 3.7781, + "step": 2058000 + }, + { + "epoch": 22.86, + "learning_rate": 6.784865513182151e-08, + "loss": 3.7852, + "step": 2058500 + }, + { + "epoch": 22.87, + "learning_rate": 6.783477333807165e-08, + "loss": 3.796, + "step": 2059000 + }, + { + "epoch": 22.87, + "learning_rate": 6.782089154432179e-08, + "loss": 3.7695, + "step": 2059500 + }, + { + "epoch": 22.88, + "learning_rate": 6.780700975057193e-08, + "loss": 3.7731, + "step": 2060000 + }, + { + "epoch": 22.88, + "learning_rate": 6.779312795682206e-08, + "loss": 3.7799, + "step": 2060500 + }, + { + "epoch": 22.89, + "learning_rate": 6.77792461630722e-08, + "loss": 3.7956, + "step": 2061000 + }, + { + "epoch": 22.89, + "learning_rate": 6.776536436932235e-08, + "loss": 3.7715, + "step": 2061500 + }, + { + "epoch": 22.9, + "learning_rate": 6.775148257557248e-08, + "loss": 3.7685, + "step": 2062000 + }, + { + "epoch": 22.9, + "learning_rate": 6.773760078182262e-08, + "loss": 3.7821, + "step": 2062500 + }, + { + "epoch": 22.91, + "learning_rate": 6.772371898807276e-08, + "loss": 3.7799, + "step": 2063000 + }, + { + "epoch": 22.92, + "learning_rate": 6.770983719432289e-08, + "loss": 3.7798, + "step": 2063500 + }, + { + "epoch": 22.92, + "learning_rate": 6.769595540057303e-08, + "loss": 3.8001, + "step": 2064000 + }, + { + "epoch": 22.93, + "learning_rate": 6.768207360682318e-08, + "loss": 3.761, + "step": 2064500 + }, + { + "epoch": 22.93, + "learning_rate": 6.766819181307332e-08, + "loss": 3.7929, + "step": 2065000 + }, + { + "epoch": 22.94, + "learning_rate": 6.765431001932346e-08, + "loss": 3.7812, + "step": 2065500 + }, + { + "epoch": 22.94, + "learning_rate": 6.76404282255736e-08, + "loss": 3.7751, + "step": 2066000 + }, + { + "epoch": 22.95, + "learning_rate": 6.762654643182373e-08, + "loss": 3.778, + "step": 2066500 + }, + { + "epoch": 22.95, + "learning_rate": 6.761266463807387e-08, + "loss": 3.7746, + "step": 2067000 + }, + { + "epoch": 22.96, + "learning_rate": 6.759878284432402e-08, + "loss": 3.7818, + "step": 2067500 + }, + { + "epoch": 22.97, + "learning_rate": 6.758490105057414e-08, + "loss": 3.7983, + "step": 2068000 + }, + { + "epoch": 22.97, + "learning_rate": 6.757101925682429e-08, + "loss": 3.7863, + "step": 2068500 + }, + { + "epoch": 22.98, + "learning_rate": 6.755713746307442e-08, + "loss": 3.7871, + "step": 2069000 + }, + { + "epoch": 22.98, + "learning_rate": 6.754325566932456e-08, + "loss": 3.8034, + "step": 2069500 + }, + { + "epoch": 22.99, + "learning_rate": 6.75293738755747e-08, + "loss": 3.7894, + "step": 2070000 + }, + { + "epoch": 22.99, + "learning_rate": 6.751549208182484e-08, + "loss": 3.789, + "step": 2070500 + }, + { + "epoch": 23.0, + "learning_rate": 6.750161028807498e-08, + "loss": 3.7924, + "step": 2071000 + }, + { + "epoch": 23.0, + "eval_loss": 3.8470423221588135, + "eval_runtime": 6.312, + "eval_samples_per_second": 246.197, + "step": 2071058 + }, + { + "epoch": 23.0, + "learning_rate": 6.748772849432513e-08, + "loss": 3.7791, + "step": 2071500 + }, + { + "epoch": 23.01, + "learning_rate": 6.747384670057526e-08, + "loss": 3.788, + "step": 2072000 + }, + { + "epoch": 23.02, + "learning_rate": 6.74599649068254e-08, + "loss": 3.7828, + "step": 2072500 + }, + { + "epoch": 23.02, + "learning_rate": 6.744608311307554e-08, + "loss": 3.781, + "step": 2073000 + }, + { + "epoch": 23.03, + "learning_rate": 6.743220131932568e-08, + "loss": 3.7739, + "step": 2073500 + }, + { + "epoch": 23.03, + "learning_rate": 6.741831952557581e-08, + "loss": 3.7775, + "step": 2074000 + }, + { + "epoch": 23.04, + "learning_rate": 6.740443773182594e-08, + "loss": 3.7998, + "step": 2074500 + }, + { + "epoch": 23.04, + "learning_rate": 6.739055593807608e-08, + "loss": 3.7705, + "step": 2075000 + }, + { + "epoch": 23.05, + "learning_rate": 6.737667414432623e-08, + "loss": 3.7864, + "step": 2075500 + }, + { + "epoch": 23.05, + "learning_rate": 6.736279235057637e-08, + "loss": 3.7928, + "step": 2076000 + }, + { + "epoch": 23.06, + "learning_rate": 6.734891055682651e-08, + "loss": 3.7908, + "step": 2076500 + }, + { + "epoch": 23.07, + "learning_rate": 6.733502876307665e-08, + "loss": 3.778, + "step": 2077000 + }, + { + "epoch": 23.07, + "learning_rate": 6.732114696932678e-08, + "loss": 3.7924, + "step": 2077500 + }, + { + "epoch": 23.08, + "learning_rate": 6.730726517557692e-08, + "loss": 3.79, + "step": 2078000 + }, + { + "epoch": 23.08, + "learning_rate": 6.729338338182707e-08, + "loss": 3.781, + "step": 2078500 + }, + { + "epoch": 23.09, + "learning_rate": 6.727950158807721e-08, + "loss": 3.7905, + "step": 2079000 + }, + { + "epoch": 23.09, + "learning_rate": 6.726561979432735e-08, + "loss": 3.7688, + "step": 2079500 + }, + { + "epoch": 23.1, + "learning_rate": 6.725173800057748e-08, + "loss": 3.7778, + "step": 2080000 + }, + { + "epoch": 23.1, + "learning_rate": 6.723785620682761e-08, + "loss": 3.7955, + "step": 2080500 + }, + { + "epoch": 23.11, + "learning_rate": 6.722397441307775e-08, + "loss": 3.7835, + "step": 2081000 + }, + { + "epoch": 23.12, + "learning_rate": 6.721009261932789e-08, + "loss": 3.7987, + "step": 2081500 + }, + { + "epoch": 23.12, + "learning_rate": 6.719621082557804e-08, + "loss": 3.7814, + "step": 2082000 + }, + { + "epoch": 23.13, + "learning_rate": 6.718232903182818e-08, + "loss": 3.7963, + "step": 2082500 + }, + { + "epoch": 23.13, + "learning_rate": 6.71684472380783e-08, + "loss": 3.7843, + "step": 2083000 + }, + { + "epoch": 23.14, + "learning_rate": 6.715456544432845e-08, + "loss": 3.7916, + "step": 2083500 + }, + { + "epoch": 23.14, + "learning_rate": 6.714068365057859e-08, + "loss": 3.7684, + "step": 2084000 + }, + { + "epoch": 23.15, + "learning_rate": 6.712680185682873e-08, + "loss": 3.784, + "step": 2084500 + }, + { + "epoch": 23.15, + "learning_rate": 6.711292006307888e-08, + "loss": 3.7906, + "step": 2085000 + }, + { + "epoch": 23.16, + "learning_rate": 6.709903826932902e-08, + "loss": 3.8045, + "step": 2085500 + }, + { + "epoch": 23.17, + "learning_rate": 6.708515647557915e-08, + "loss": 3.7896, + "step": 2086000 + }, + { + "epoch": 23.17, + "learning_rate": 6.707127468182928e-08, + "loss": 3.7908, + "step": 2086500 + }, + { + "epoch": 23.18, + "learning_rate": 6.705739288807942e-08, + "loss": 3.7766, + "step": 2087000 + }, + { + "epoch": 23.18, + "learning_rate": 6.704351109432956e-08, + "loss": 3.7789, + "step": 2087500 + }, + { + "epoch": 23.19, + "learning_rate": 6.70296293005797e-08, + "loss": 3.7694, + "step": 2088000 + }, + { + "epoch": 23.19, + "learning_rate": 6.701574750682984e-08, + "loss": 3.7662, + "step": 2088500 + }, + { + "epoch": 23.2, + "learning_rate": 6.700186571307997e-08, + "loss": 3.7678, + "step": 2089000 + }, + { + "epoch": 23.2, + "learning_rate": 6.698798391933012e-08, + "loss": 3.7835, + "step": 2089500 + }, + { + "epoch": 23.21, + "learning_rate": 6.697410212558026e-08, + "loss": 3.7938, + "step": 2090000 + }, + { + "epoch": 23.22, + "learning_rate": 6.69602203318304e-08, + "loss": 3.8115, + "step": 2090500 + }, + { + "epoch": 23.22, + "learning_rate": 6.694633853808054e-08, + "loss": 3.7777, + "step": 2091000 + }, + { + "epoch": 23.23, + "learning_rate": 6.693245674433067e-08, + "loss": 3.7758, + "step": 2091500 + }, + { + "epoch": 23.23, + "learning_rate": 6.691857495058081e-08, + "loss": 3.8004, + "step": 2092000 + }, + { + "epoch": 23.24, + "learning_rate": 6.690469315683094e-08, + "loss": 3.7832, + "step": 2092500 + }, + { + "epoch": 23.24, + "learning_rate": 6.689081136308109e-08, + "loss": 3.788, + "step": 2093000 + }, + { + "epoch": 23.25, + "learning_rate": 6.687692956933123e-08, + "loss": 3.7774, + "step": 2093500 + }, + { + "epoch": 23.25, + "learning_rate": 6.686304777558137e-08, + "loss": 3.786, + "step": 2094000 + }, + { + "epoch": 23.26, + "learning_rate": 6.68491659818315e-08, + "loss": 3.7752, + "step": 2094500 + }, + { + "epoch": 23.27, + "learning_rate": 6.683528418808164e-08, + "loss": 3.7946, + "step": 2095000 + }, + { + "epoch": 23.27, + "learning_rate": 6.682140239433178e-08, + "loss": 3.7814, + "step": 2095500 + }, + { + "epoch": 23.28, + "learning_rate": 6.680752060058193e-08, + "loss": 3.7875, + "step": 2096000 + }, + { + "epoch": 23.28, + "learning_rate": 6.679363880683207e-08, + "loss": 3.7734, + "step": 2096500 + }, + { + "epoch": 23.29, + "learning_rate": 6.67797570130822e-08, + "loss": 3.7906, + "step": 2097000 + }, + { + "epoch": 23.29, + "learning_rate": 6.676587521933234e-08, + "loss": 3.7844, + "step": 2097500 + }, + { + "epoch": 23.3, + "learning_rate": 6.675199342558248e-08, + "loss": 3.7729, + "step": 2098000 + }, + { + "epoch": 23.3, + "learning_rate": 6.673811163183261e-08, + "loss": 3.7776, + "step": 2098500 + }, + { + "epoch": 23.31, + "learning_rate": 6.672422983808275e-08, + "loss": 3.7633, + "step": 2099000 + }, + { + "epoch": 23.32, + "learning_rate": 6.67103480443329e-08, + "loss": 3.7919, + "step": 2099500 + }, + { + "epoch": 23.32, + "learning_rate": 6.669646625058302e-08, + "loss": 3.7902, + "step": 2100000 + }, + { + "epoch": 23.33, + "learning_rate": 6.668258445683317e-08, + "loss": 3.7825, + "step": 2100500 + }, + { + "epoch": 23.33, + "learning_rate": 6.666870266308331e-08, + "loss": 3.779, + "step": 2101000 + }, + { + "epoch": 23.34, + "learning_rate": 6.665482086933345e-08, + "loss": 3.7957, + "step": 2101500 + }, + { + "epoch": 23.34, + "learning_rate": 6.66409390755836e-08, + "loss": 3.7888, + "step": 2102000 + }, + { + "epoch": 23.35, + "learning_rate": 6.662705728183374e-08, + "loss": 3.7831, + "step": 2102500 + }, + { + "epoch": 23.35, + "learning_rate": 6.661317548808386e-08, + "loss": 3.7662, + "step": 2103000 + }, + { + "epoch": 23.36, + "learning_rate": 6.659929369433401e-08, + "loss": 3.7787, + "step": 2103500 + }, + { + "epoch": 23.37, + "learning_rate": 6.658541190058415e-08, + "loss": 3.7825, + "step": 2104000 + }, + { + "epoch": 23.37, + "learning_rate": 6.657153010683428e-08, + "loss": 3.7757, + "step": 2104500 + }, + { + "epoch": 23.38, + "learning_rate": 6.655764831308442e-08, + "loss": 3.7915, + "step": 2105000 + }, + { + "epoch": 23.38, + "learning_rate": 6.654376651933455e-08, + "loss": 3.7791, + "step": 2105500 + }, + { + "epoch": 23.39, + "learning_rate": 6.652988472558469e-08, + "loss": 3.7841, + "step": 2106000 + }, + { + "epoch": 23.39, + "learning_rate": 6.651600293183483e-08, + "loss": 3.7806, + "step": 2106500 + }, + { + "epoch": 23.4, + "learning_rate": 6.650212113808498e-08, + "loss": 3.7761, + "step": 2107000 + }, + { + "epoch": 23.4, + "learning_rate": 6.648823934433512e-08, + "loss": 3.7916, + "step": 2107500 + }, + { + "epoch": 23.41, + "learning_rate": 6.647435755058526e-08, + "loss": 3.7705, + "step": 2108000 + }, + { + "epoch": 23.42, + "learning_rate": 6.646047575683539e-08, + "loss": 3.7778, + "step": 2108500 + }, + { + "epoch": 23.42, + "learning_rate": 6.644659396308553e-08, + "loss": 3.7875, + "step": 2109000 + }, + { + "epoch": 23.43, + "learning_rate": 6.643271216933567e-08, + "loss": 3.794, + "step": 2109500 + }, + { + "epoch": 23.43, + "learning_rate": 6.641883037558582e-08, + "loss": 3.7824, + "step": 2110000 + }, + { + "epoch": 23.44, + "learning_rate": 6.640494858183595e-08, + "loss": 3.7773, + "step": 2110500 + }, + { + "epoch": 23.44, + "learning_rate": 6.639106678808607e-08, + "loss": 3.7975, + "step": 2111000 + }, + { + "epoch": 23.45, + "learning_rate": 6.637718499433622e-08, + "loss": 3.8004, + "step": 2111500 + }, + { + "epoch": 23.45, + "learning_rate": 6.636330320058636e-08, + "loss": 3.7899, + "step": 2112000 + }, + { + "epoch": 23.46, + "learning_rate": 6.63494214068365e-08, + "loss": 3.7871, + "step": 2112500 + }, + { + "epoch": 23.47, + "learning_rate": 6.633553961308664e-08, + "loss": 3.7853, + "step": 2113000 + }, + { + "epoch": 23.47, + "learning_rate": 6.632165781933679e-08, + "loss": 3.7827, + "step": 2113500 + }, + { + "epoch": 23.48, + "learning_rate": 6.630777602558692e-08, + "loss": 3.7907, + "step": 2114000 + }, + { + "epoch": 23.48, + "learning_rate": 6.629389423183706e-08, + "loss": 3.7943, + "step": 2114500 + }, + { + "epoch": 23.49, + "learning_rate": 6.62800124380872e-08, + "loss": 3.793, + "step": 2115000 + }, + { + "epoch": 23.49, + "learning_rate": 6.626613064433734e-08, + "loss": 3.7769, + "step": 2115500 + }, + { + "epoch": 23.5, + "learning_rate": 6.625224885058748e-08, + "loss": 3.7895, + "step": 2116000 + }, + { + "epoch": 23.5, + "learning_rate": 6.623836705683761e-08, + "loss": 3.7738, + "step": 2116500 + }, + { + "epoch": 23.51, + "learning_rate": 6.622448526308774e-08, + "loss": 3.7806, + "step": 2117000 + }, + { + "epoch": 23.52, + "learning_rate": 6.621060346933788e-08, + "loss": 3.7863, + "step": 2117500 + }, + { + "epoch": 23.52, + "learning_rate": 6.619672167558803e-08, + "loss": 3.7868, + "step": 2118000 + }, + { + "epoch": 23.53, + "learning_rate": 6.618283988183817e-08, + "loss": 3.7666, + "step": 2118500 + }, + { + "epoch": 23.53, + "learning_rate": 6.616895808808831e-08, + "loss": 3.8007, + "step": 2119000 + }, + { + "epoch": 23.54, + "learning_rate": 6.615507629433844e-08, + "loss": 3.7921, + "step": 2119500 + }, + { + "epoch": 23.54, + "learning_rate": 6.614119450058858e-08, + "loss": 3.7645, + "step": 2120000 + }, + { + "epoch": 23.55, + "learning_rate": 6.612731270683872e-08, + "loss": 3.7755, + "step": 2120500 + }, + { + "epoch": 23.55, + "learning_rate": 6.611343091308887e-08, + "loss": 3.7808, + "step": 2121000 + }, + { + "epoch": 23.56, + "learning_rate": 6.609954911933901e-08, + "loss": 3.7815, + "step": 2121500 + }, + { + "epoch": 23.57, + "learning_rate": 6.608566732558914e-08, + "loss": 3.8045, + "step": 2122000 + }, + { + "epoch": 23.57, + "learning_rate": 6.607178553183928e-08, + "loss": 3.7643, + "step": 2122500 + }, + { + "epoch": 23.58, + "learning_rate": 6.605790373808941e-08, + "loss": 3.7958, + "step": 2123000 + }, + { + "epoch": 23.58, + "learning_rate": 6.604402194433955e-08, + "loss": 3.8002, + "step": 2123500 + }, + { + "epoch": 23.59, + "learning_rate": 6.60301401505897e-08, + "loss": 3.7913, + "step": 2124000 + }, + { + "epoch": 23.59, + "learning_rate": 6.601625835683984e-08, + "loss": 3.7967, + "step": 2124500 + }, + { + "epoch": 23.6, + "learning_rate": 6.600237656308998e-08, + "loss": 3.7851, + "step": 2125000 + }, + { + "epoch": 23.6, + "learning_rate": 6.598849476934011e-08, + "loss": 3.7898, + "step": 2125500 + }, + { + "epoch": 23.61, + "learning_rate": 6.597461297559025e-08, + "loss": 3.7706, + "step": 2126000 + }, + { + "epoch": 23.62, + "learning_rate": 6.596073118184039e-08, + "loss": 3.7717, + "step": 2126500 + }, + { + "epoch": 23.62, + "learning_rate": 6.594684938809053e-08, + "loss": 3.793, + "step": 2127000 + }, + { + "epoch": 23.63, + "learning_rate": 6.593296759434068e-08, + "loss": 3.7624, + "step": 2127500 + }, + { + "epoch": 23.63, + "learning_rate": 6.59190858005908e-08, + "loss": 3.8061, + "step": 2128000 + }, + { + "epoch": 23.64, + "learning_rate": 6.590520400684095e-08, + "loss": 3.7868, + "step": 2128500 + }, + { + "epoch": 23.64, + "learning_rate": 6.589132221309108e-08, + "loss": 3.7649, + "step": 2129000 + }, + { + "epoch": 23.65, + "learning_rate": 6.587744041934122e-08, + "loss": 3.7737, + "step": 2129500 + }, + { + "epoch": 23.65, + "learning_rate": 6.586355862559136e-08, + "loss": 3.7802, + "step": 2130000 + }, + { + "epoch": 23.66, + "learning_rate": 6.58496768318415e-08, + "loss": 3.777, + "step": 2130500 + }, + { + "epoch": 23.67, + "learning_rate": 6.583579503809163e-08, + "loss": 3.7966, + "step": 2131000 + }, + { + "epoch": 23.67, + "learning_rate": 6.582191324434178e-08, + "loss": 3.784, + "step": 2131500 + }, + { + "epoch": 23.68, + "learning_rate": 6.580803145059192e-08, + "loss": 3.7752, + "step": 2132000 + }, + { + "epoch": 23.68, + "learning_rate": 6.579414965684206e-08, + "loss": 3.7828, + "step": 2132500 + }, + { + "epoch": 23.69, + "learning_rate": 6.57802678630922e-08, + "loss": 3.7681, + "step": 2133000 + }, + { + "epoch": 23.69, + "learning_rate": 6.576638606934233e-08, + "loss": 3.7744, + "step": 2133500 + }, + { + "epoch": 23.7, + "learning_rate": 6.575250427559247e-08, + "loss": 3.7893, + "step": 2134000 + }, + { + "epoch": 23.7, + "learning_rate": 6.573862248184262e-08, + "loss": 3.77, + "step": 2134500 + }, + { + "epoch": 23.71, + "learning_rate": 6.572474068809274e-08, + "loss": 3.7745, + "step": 2135000 + }, + { + "epoch": 23.72, + "learning_rate": 6.571085889434289e-08, + "loss": 3.786, + "step": 2135500 + }, + { + "epoch": 23.72, + "learning_rate": 6.569697710059303e-08, + "loss": 3.7672, + "step": 2136000 + }, + { + "epoch": 23.73, + "learning_rate": 6.568309530684316e-08, + "loss": 3.8036, + "step": 2136500 + }, + { + "epoch": 23.73, + "learning_rate": 6.56692135130933e-08, + "loss": 3.7874, + "step": 2137000 + }, + { + "epoch": 23.74, + "learning_rate": 6.565533171934344e-08, + "loss": 3.765, + "step": 2137500 + }, + { + "epoch": 23.74, + "learning_rate": 6.564144992559358e-08, + "loss": 3.7837, + "step": 2138000 + }, + { + "epoch": 23.75, + "learning_rate": 6.562756813184373e-08, + "loss": 3.7807, + "step": 2138500 + }, + { + "epoch": 23.75, + "learning_rate": 6.561368633809387e-08, + "loss": 3.7927, + "step": 2139000 + }, + { + "epoch": 23.76, + "learning_rate": 6.5599804544344e-08, + "loss": 3.768, + "step": 2139500 + }, + { + "epoch": 23.77, + "learning_rate": 6.558592275059414e-08, + "loss": 3.7887, + "step": 2140000 + }, + { + "epoch": 23.77, + "learning_rate": 6.557204095684428e-08, + "loss": 3.775, + "step": 2140500 + }, + { + "epoch": 23.78, + "learning_rate": 6.555815916309441e-08, + "loss": 3.7894, + "step": 2141000 + }, + { + "epoch": 23.78, + "learning_rate": 6.554427736934455e-08, + "loss": 3.7793, + "step": 2141500 + }, + { + "epoch": 23.79, + "learning_rate": 6.553039557559468e-08, + "loss": 3.7877, + "step": 2142000 + }, + { + "epoch": 23.79, + "learning_rate": 6.551651378184483e-08, + "loss": 3.7821, + "step": 2142500 + }, + { + "epoch": 23.8, + "learning_rate": 6.550263198809497e-08, + "loss": 3.7771, + "step": 2143000 + }, + { + "epoch": 23.8, + "learning_rate": 6.548875019434511e-08, + "loss": 3.7745, + "step": 2143500 + }, + { + "epoch": 23.81, + "learning_rate": 6.547486840059525e-08, + "loss": 3.7832, + "step": 2144000 + }, + { + "epoch": 23.82, + "learning_rate": 6.54609866068454e-08, + "loss": 3.7857, + "step": 2144500 + }, + { + "epoch": 23.82, + "learning_rate": 6.544710481309552e-08, + "loss": 3.7874, + "step": 2145000 + }, + { + "epoch": 23.83, + "learning_rate": 6.543322301934567e-08, + "loss": 3.7822, + "step": 2145500 + }, + { + "epoch": 23.83, + "learning_rate": 6.541934122559581e-08, + "loss": 3.789, + "step": 2146000 + }, + { + "epoch": 23.84, + "learning_rate": 6.540545943184595e-08, + "loss": 3.7924, + "step": 2146500 + }, + { + "epoch": 23.84, + "learning_rate": 6.539157763809608e-08, + "loss": 3.7907, + "step": 2147000 + }, + { + "epoch": 23.85, + "learning_rate": 6.537769584434621e-08, + "loss": 3.7795, + "step": 2147500 + }, + { + "epoch": 23.85, + "learning_rate": 6.536381405059635e-08, + "loss": 3.7665, + "step": 2148000 + }, + { + "epoch": 23.86, + "learning_rate": 6.534993225684649e-08, + "loss": 3.7786, + "step": 2148500 + }, + { + "epoch": 23.87, + "learning_rate": 6.533605046309664e-08, + "loss": 3.7822, + "step": 2149000 + }, + { + "epoch": 23.87, + "learning_rate": 6.532216866934678e-08, + "loss": 3.7764, + "step": 2149500 + }, + { + "epoch": 23.88, + "learning_rate": 6.530828687559692e-08, + "loss": 3.7871, + "step": 2150000 + }, + { + "epoch": 23.88, + "learning_rate": 6.529440508184705e-08, + "loss": 3.7928, + "step": 2150500 + }, + { + "epoch": 23.89, + "learning_rate": 6.528052328809719e-08, + "loss": 3.787, + "step": 2151000 + }, + { + "epoch": 23.89, + "learning_rate": 6.526664149434733e-08, + "loss": 3.7874, + "step": 2151500 + }, + { + "epoch": 23.9, + "learning_rate": 6.525275970059748e-08, + "loss": 3.7823, + "step": 2152000 + }, + { + "epoch": 23.9, + "learning_rate": 6.52388779068476e-08, + "loss": 3.7776, + "step": 2152500 + }, + { + "epoch": 23.91, + "learning_rate": 6.522499611309775e-08, + "loss": 3.7992, + "step": 2153000 + }, + { + "epoch": 23.92, + "learning_rate": 6.521111431934788e-08, + "loss": 3.7724, + "step": 2153500 + }, + { + "epoch": 23.92, + "learning_rate": 6.519723252559802e-08, + "loss": 3.7752, + "step": 2154000 + }, + { + "epoch": 23.93, + "learning_rate": 6.518335073184816e-08, + "loss": 3.7777, + "step": 2154500 + }, + { + "epoch": 23.93, + "learning_rate": 6.51694689380983e-08, + "loss": 3.7808, + "step": 2155000 + }, + { + "epoch": 23.94, + "learning_rate": 6.515558714434845e-08, + "loss": 3.7812, + "step": 2155500 + }, + { + "epoch": 23.94, + "learning_rate": 6.514170535059857e-08, + "loss": 3.7915, + "step": 2156000 + }, + { + "epoch": 23.95, + "learning_rate": 6.512782355684872e-08, + "loss": 3.7822, + "step": 2156500 + }, + { + "epoch": 23.95, + "learning_rate": 6.511394176309886e-08, + "loss": 3.7834, + "step": 2157000 + }, + { + "epoch": 23.96, + "learning_rate": 6.5100059969349e-08, + "loss": 3.7774, + "step": 2157500 + }, + { + "epoch": 23.97, + "learning_rate": 6.508617817559914e-08, + "loss": 3.7848, + "step": 2158000 + }, + { + "epoch": 23.97, + "learning_rate": 6.507229638184927e-08, + "loss": 3.7703, + "step": 2158500 + }, + { + "epoch": 23.98, + "learning_rate": 6.505841458809941e-08, + "loss": 3.7652, + "step": 2159000 + }, + { + "epoch": 23.98, + "learning_rate": 6.504453279434954e-08, + "loss": 3.7697, + "step": 2159500 + }, + { + "epoch": 23.99, + "learning_rate": 6.503065100059969e-08, + "loss": 3.7894, + "step": 2160000 + }, + { + "epoch": 23.99, + "learning_rate": 6.501676920684983e-08, + "loss": 3.7768, + "step": 2160500 + }, + { + "epoch": 24.0, + "learning_rate": 6.500288741309997e-08, + "loss": 3.7882, + "step": 2161000 + }, + { + "epoch": 24.0, + "eval_loss": 3.845104455947876, + "eval_runtime": 6.3115, + "eval_samples_per_second": 246.219, + "step": 2161104 + }, + { + "epoch": 24.0, + "learning_rate": 6.498900561935011e-08, + "loss": 3.799, + "step": 2161500 + }, + { + "epoch": 24.01, + "learning_rate": 6.497512382560024e-08, + "loss": 3.8033, + "step": 2162000 + }, + { + "epoch": 24.02, + "learning_rate": 6.496124203185038e-08, + "loss": 3.7733, + "step": 2162500 + }, + { + "epoch": 24.02, + "learning_rate": 6.494736023810053e-08, + "loss": 3.7673, + "step": 2163000 + }, + { + "epoch": 24.03, + "learning_rate": 6.493347844435067e-08, + "loss": 3.7686, + "step": 2163500 + }, + { + "epoch": 24.03, + "learning_rate": 6.491959665060081e-08, + "loss": 3.7849, + "step": 2164000 + }, + { + "epoch": 24.04, + "learning_rate": 6.490571485685094e-08, + "loss": 3.7871, + "step": 2164500 + }, + { + "epoch": 24.04, + "learning_rate": 6.489183306310108e-08, + "loss": 3.7774, + "step": 2165000 + }, + { + "epoch": 24.05, + "learning_rate": 6.487795126935121e-08, + "loss": 3.7842, + "step": 2165500 + }, + { + "epoch": 24.05, + "learning_rate": 6.486406947560135e-08, + "loss": 3.781, + "step": 2166000 + }, + { + "epoch": 24.06, + "learning_rate": 6.48501876818515e-08, + "loss": 3.7829, + "step": 2166500 + }, + { + "epoch": 24.07, + "learning_rate": 6.483630588810164e-08, + "loss": 3.7887, + "step": 2167000 + }, + { + "epoch": 24.07, + "learning_rate": 6.482242409435177e-08, + "loss": 3.8049, + "step": 2167500 + }, + { + "epoch": 24.08, + "learning_rate": 6.480854230060191e-08, + "loss": 3.773, + "step": 2168000 + }, + { + "epoch": 24.08, + "learning_rate": 6.479466050685205e-08, + "loss": 3.7746, + "step": 2168500 + }, + { + "epoch": 24.09, + "learning_rate": 6.47807787131022e-08, + "loss": 3.7972, + "step": 2169000 + }, + { + "epoch": 24.09, + "learning_rate": 6.476689691935234e-08, + "loss": 3.792, + "step": 2169500 + }, + { + "epoch": 24.1, + "learning_rate": 6.475301512560246e-08, + "loss": 3.7721, + "step": 2170000 + }, + { + "epoch": 24.1, + "learning_rate": 6.473913333185261e-08, + "loss": 3.7841, + "step": 2170500 + }, + { + "epoch": 24.11, + "learning_rate": 6.472525153810275e-08, + "loss": 3.7675, + "step": 2171000 + }, + { + "epoch": 24.12, + "learning_rate": 6.471136974435288e-08, + "loss": 3.7843, + "step": 2171500 + }, + { + "epoch": 24.12, + "learning_rate": 6.469748795060302e-08, + "loss": 3.7789, + "step": 2172000 + }, + { + "epoch": 24.13, + "learning_rate": 6.468360615685316e-08, + "loss": 3.7668, + "step": 2172500 + }, + { + "epoch": 24.13, + "learning_rate": 6.466972436310329e-08, + "loss": 3.7814, + "step": 2173000 + }, + { + "epoch": 24.14, + "learning_rate": 6.465584256935343e-08, + "loss": 3.7736, + "step": 2173500 + }, + { + "epoch": 24.14, + "learning_rate": 6.464196077560358e-08, + "loss": 3.7837, + "step": 2174000 + }, + { + "epoch": 24.15, + "learning_rate": 6.462807898185372e-08, + "loss": 3.806, + "step": 2174500 + }, + { + "epoch": 24.15, + "learning_rate": 6.461419718810386e-08, + "loss": 3.7826, + "step": 2175000 + }, + { + "epoch": 24.16, + "learning_rate": 6.4600315394354e-08, + "loss": 3.7926, + "step": 2175500 + }, + { + "epoch": 24.17, + "learning_rate": 6.458643360060413e-08, + "loss": 3.7778, + "step": 2176000 + }, + { + "epoch": 24.17, + "learning_rate": 6.457255180685427e-08, + "loss": 3.7669, + "step": 2176500 + }, + { + "epoch": 24.18, + "learning_rate": 6.455867001310442e-08, + "loss": 3.7813, + "step": 2177000 + }, + { + "epoch": 24.18, + "learning_rate": 6.454478821935455e-08, + "loss": 3.7727, + "step": 2177500 + }, + { + "epoch": 24.19, + "learning_rate": 6.453090642560469e-08, + "loss": 3.7971, + "step": 2178000 + }, + { + "epoch": 24.19, + "learning_rate": 6.451702463185482e-08, + "loss": 3.7803, + "step": 2178500 + }, + { + "epoch": 24.2, + "learning_rate": 6.450314283810496e-08, + "loss": 3.7864, + "step": 2179000 + }, + { + "epoch": 24.2, + "learning_rate": 6.44892610443551e-08, + "loss": 3.7819, + "step": 2179500 + }, + { + "epoch": 24.21, + "learning_rate": 6.447537925060524e-08, + "loss": 3.7894, + "step": 2180000 + }, + { + "epoch": 24.22, + "learning_rate": 6.446149745685539e-08, + "loss": 3.7786, + "step": 2180500 + }, + { + "epoch": 24.22, + "learning_rate": 6.444761566310553e-08, + "loss": 3.7788, + "step": 2181000 + }, + { + "epoch": 24.23, + "learning_rate": 6.443373386935566e-08, + "loss": 3.7876, + "step": 2181500 + }, + { + "epoch": 24.23, + "learning_rate": 6.44198520756058e-08, + "loss": 3.7829, + "step": 2182000 + }, + { + "epoch": 24.24, + "learning_rate": 6.440597028185594e-08, + "loss": 3.7617, + "step": 2182500 + }, + { + "epoch": 24.24, + "learning_rate": 6.439208848810607e-08, + "loss": 3.7806, + "step": 2183000 + }, + { + "epoch": 24.25, + "learning_rate": 6.437820669435621e-08, + "loss": 3.7923, + "step": 2183500 + }, + { + "epoch": 24.25, + "learning_rate": 6.436432490060636e-08, + "loss": 3.7772, + "step": 2184000 + }, + { + "epoch": 24.26, + "learning_rate": 6.435044310685648e-08, + "loss": 3.7799, + "step": 2184500 + }, + { + "epoch": 24.27, + "learning_rate": 6.433656131310663e-08, + "loss": 3.787, + "step": 2185000 + }, + { + "epoch": 24.27, + "learning_rate": 6.432267951935677e-08, + "loss": 3.787, + "step": 2185500 + }, + { + "epoch": 24.28, + "learning_rate": 6.430879772560691e-08, + "loss": 3.7804, + "step": 2186000 + }, + { + "epoch": 24.28, + "learning_rate": 6.429491593185705e-08, + "loss": 3.7947, + "step": 2186500 + }, + { + "epoch": 24.29, + "learning_rate": 6.428103413810718e-08, + "loss": 3.7832, + "step": 2187000 + }, + { + "epoch": 24.29, + "learning_rate": 6.426715234435733e-08, + "loss": 3.7852, + "step": 2187500 + }, + { + "epoch": 24.3, + "learning_rate": 6.425327055060747e-08, + "loss": 3.7644, + "step": 2188000 + }, + { + "epoch": 24.3, + "learning_rate": 6.423938875685761e-08, + "loss": 3.8106, + "step": 2188500 + }, + { + "epoch": 24.31, + "learning_rate": 6.422550696310774e-08, + "loss": 3.7837, + "step": 2189000 + }, + { + "epoch": 24.32, + "learning_rate": 6.421162516935788e-08, + "loss": 3.7755, + "step": 2189500 + }, + { + "epoch": 24.32, + "learning_rate": 6.419774337560801e-08, + "loss": 3.7884, + "step": 2190000 + }, + { + "epoch": 24.33, + "learning_rate": 6.418386158185815e-08, + "loss": 3.7811, + "step": 2190500 + }, + { + "epoch": 24.33, + "learning_rate": 6.41699797881083e-08, + "loss": 3.7565, + "step": 2191000 + }, + { + "epoch": 24.34, + "learning_rate": 6.415609799435844e-08, + "loss": 3.7716, + "step": 2191500 + }, + { + "epoch": 24.34, + "learning_rate": 6.414221620060858e-08, + "loss": 3.7875, + "step": 2192000 + }, + { + "epoch": 24.35, + "learning_rate": 6.412833440685871e-08, + "loss": 3.7719, + "step": 2192500 + }, + { + "epoch": 24.35, + "learning_rate": 6.411445261310885e-08, + "loss": 3.7735, + "step": 2193000 + }, + { + "epoch": 24.36, + "learning_rate": 6.410057081935899e-08, + "loss": 3.7724, + "step": 2193500 + }, + { + "epoch": 24.37, + "learning_rate": 6.408668902560913e-08, + "loss": 3.7767, + "step": 2194000 + }, + { + "epoch": 24.37, + "learning_rate": 6.407280723185928e-08, + "loss": 3.787, + "step": 2194500 + }, + { + "epoch": 24.38, + "learning_rate": 6.40589254381094e-08, + "loss": 3.778, + "step": 2195000 + }, + { + "epoch": 24.38, + "learning_rate": 6.404504364435955e-08, + "loss": 3.7974, + "step": 2195500 + }, + { + "epoch": 24.39, + "learning_rate": 6.403116185060968e-08, + "loss": 3.7689, + "step": 2196000 + }, + { + "epoch": 24.39, + "learning_rate": 6.401728005685982e-08, + "loss": 3.7784, + "step": 2196500 + }, + { + "epoch": 24.4, + "learning_rate": 6.400339826310996e-08, + "loss": 3.7827, + "step": 2197000 + }, + { + "epoch": 24.4, + "learning_rate": 6.39895164693601e-08, + "loss": 3.7858, + "step": 2197500 + }, + { + "epoch": 24.41, + "learning_rate": 6.397563467561025e-08, + "loss": 3.7733, + "step": 2198000 + }, + { + "epoch": 24.42, + "learning_rate": 6.396175288186038e-08, + "loss": 3.7667, + "step": 2198500 + }, + { + "epoch": 24.42, + "learning_rate": 6.394787108811052e-08, + "loss": 3.7876, + "step": 2199000 + }, + { + "epoch": 24.43, + "learning_rate": 6.393398929436066e-08, + "loss": 3.7564, + "step": 2199500 + }, + { + "epoch": 24.43, + "learning_rate": 6.39201075006108e-08, + "loss": 3.7918, + "step": 2200000 + }, + { + "epoch": 24.44, + "learning_rate": 6.390622570686094e-08, + "loss": 3.7811, + "step": 2200500 + }, + { + "epoch": 24.44, + "learning_rate": 6.389234391311107e-08, + "loss": 3.7851, + "step": 2201000 + }, + { + "epoch": 24.45, + "learning_rate": 6.387846211936122e-08, + "loss": 3.7748, + "step": 2201500 + }, + { + "epoch": 24.45, + "learning_rate": 6.386458032561134e-08, + "loss": 3.7831, + "step": 2202000 + }, + { + "epoch": 24.46, + "learning_rate": 6.385069853186149e-08, + "loss": 3.7806, + "step": 2202500 + }, + { + "epoch": 24.47, + "learning_rate": 6.383681673811163e-08, + "loss": 3.7971, + "step": 2203000 + }, + { + "epoch": 24.47, + "learning_rate": 6.382293494436177e-08, + "loss": 3.7712, + "step": 2203500 + }, + { + "epoch": 24.48, + "learning_rate": 6.38090531506119e-08, + "loss": 3.7673, + "step": 2204000 + }, + { + "epoch": 24.48, + "learning_rate": 6.379517135686204e-08, + "loss": 3.7984, + "step": 2204500 + }, + { + "epoch": 24.49, + "learning_rate": 6.378128956311219e-08, + "loss": 3.7709, + "step": 2205000 + }, + { + "epoch": 24.49, + "learning_rate": 6.376740776936233e-08, + "loss": 3.7762, + "step": 2205500 + }, + { + "epoch": 24.5, + "learning_rate": 6.375352597561247e-08, + "loss": 3.7866, + "step": 2206000 + }, + { + "epoch": 24.5, + "learning_rate": 6.37396441818626e-08, + "loss": 3.7719, + "step": 2206500 + }, + { + "epoch": 24.51, + "learning_rate": 6.372576238811274e-08, + "loss": 3.7738, + "step": 2207000 + }, + { + "epoch": 24.52, + "learning_rate": 6.371188059436288e-08, + "loss": 3.7794, + "step": 2207500 + }, + { + "epoch": 24.52, + "learning_rate": 6.369799880061301e-08, + "loss": 3.7505, + "step": 2208000 + }, + { + "epoch": 24.53, + "learning_rate": 6.368411700686315e-08, + "loss": 3.7902, + "step": 2208500 + }, + { + "epoch": 24.53, + "learning_rate": 6.36702352131133e-08, + "loss": 3.7845, + "step": 2209000 + }, + { + "epoch": 24.54, + "learning_rate": 6.365635341936343e-08, + "loss": 3.7939, + "step": 2209500 + }, + { + "epoch": 24.54, + "learning_rate": 6.364247162561357e-08, + "loss": 3.7785, + "step": 2210000 + }, + { + "epoch": 24.55, + "learning_rate": 6.362858983186371e-08, + "loss": 3.7825, + "step": 2210500 + }, + { + "epoch": 24.55, + "learning_rate": 6.361470803811385e-08, + "loss": 3.7744, + "step": 2211000 + }, + { + "epoch": 24.56, + "learning_rate": 6.3600826244364e-08, + "loss": 3.7761, + "step": 2211500 + }, + { + "epoch": 24.57, + "learning_rate": 6.358694445061414e-08, + "loss": 3.7914, + "step": 2212000 + }, + { + "epoch": 24.57, + "learning_rate": 6.357306265686427e-08, + "loss": 3.7731, + "step": 2212500 + }, + { + "epoch": 24.58, + "learning_rate": 6.355918086311441e-08, + "loss": 3.7857, + "step": 2213000 + }, + { + "epoch": 24.58, + "learning_rate": 6.354529906936455e-08, + "loss": 3.7786, + "step": 2213500 + }, + { + "epoch": 24.59, + "learning_rate": 6.353141727561468e-08, + "loss": 3.7713, + "step": 2214000 + }, + { + "epoch": 24.59, + "learning_rate": 6.351753548186482e-08, + "loss": 3.7961, + "step": 2214500 + }, + { + "epoch": 24.6, + "learning_rate": 6.350365368811495e-08, + "loss": 3.7708, + "step": 2215000 + }, + { + "epoch": 24.6, + "learning_rate": 6.348977189436509e-08, + "loss": 3.7718, + "step": 2215500 + }, + { + "epoch": 24.61, + "learning_rate": 6.347589010061524e-08, + "loss": 3.7713, + "step": 2216000 + }, + { + "epoch": 24.62, + "learning_rate": 6.346200830686538e-08, + "loss": 3.763, + "step": 2216500 + }, + { + "epoch": 24.62, + "learning_rate": 6.344812651311552e-08, + "loss": 3.7869, + "step": 2217000 + }, + { + "epoch": 24.63, + "learning_rate": 6.343424471936566e-08, + "loss": 3.7868, + "step": 2217500 + }, + { + "epoch": 24.63, + "learning_rate": 6.342036292561579e-08, + "loss": 3.7583, + "step": 2218000 + }, + { + "epoch": 24.64, + "learning_rate": 6.340648113186593e-08, + "loss": 3.7679, + "step": 2218500 + }, + { + "epoch": 24.64, + "learning_rate": 6.339259933811608e-08, + "loss": 3.7673, + "step": 2219000 + }, + { + "epoch": 24.65, + "learning_rate": 6.33787175443662e-08, + "loss": 3.7673, + "step": 2219500 + }, + { + "epoch": 24.65, + "learning_rate": 6.336483575061635e-08, + "loss": 3.7909, + "step": 2220000 + }, + { + "epoch": 24.66, + "learning_rate": 6.335095395686649e-08, + "loss": 3.764, + "step": 2220500 + }, + { + "epoch": 24.67, + "learning_rate": 6.333707216311662e-08, + "loss": 3.7871, + "step": 2221000 + }, + { + "epoch": 24.67, + "learning_rate": 6.332319036936676e-08, + "loss": 3.7685, + "step": 2221500 + }, + { + "epoch": 24.68, + "learning_rate": 6.33093085756169e-08, + "loss": 3.7835, + "step": 2222000 + }, + { + "epoch": 24.68, + "learning_rate": 6.329542678186705e-08, + "loss": 3.7625, + "step": 2222500 + }, + { + "epoch": 24.69, + "learning_rate": 6.328154498811719e-08, + "loss": 3.7959, + "step": 2223000 + }, + { + "epoch": 24.69, + "learning_rate": 6.326766319436732e-08, + "loss": 3.7993, + "step": 2223500 + }, + { + "epoch": 24.7, + "learning_rate": 6.325378140061746e-08, + "loss": 3.7785, + "step": 2224000 + }, + { + "epoch": 24.7, + "learning_rate": 6.32398996068676e-08, + "loss": 3.7664, + "step": 2224500 + }, + { + "epoch": 24.71, + "learning_rate": 6.322601781311774e-08, + "loss": 3.7902, + "step": 2225000 + }, + { + "epoch": 24.72, + "learning_rate": 6.321213601936787e-08, + "loss": 3.7818, + "step": 2225500 + }, + { + "epoch": 24.72, + "learning_rate": 6.319825422561801e-08, + "loss": 3.7948, + "step": 2226000 + }, + { + "epoch": 24.73, + "learning_rate": 6.318437243186814e-08, + "loss": 3.7724, + "step": 2226500 + }, + { + "epoch": 24.73, + "learning_rate": 6.317049063811829e-08, + "loss": 3.7673, + "step": 2227000 + }, + { + "epoch": 24.74, + "learning_rate": 6.315660884436843e-08, + "loss": 3.7821, + "step": 2227500 + }, + { + "epoch": 24.74, + "learning_rate": 6.314272705061857e-08, + "loss": 3.7784, + "step": 2228000 + }, + { + "epoch": 24.75, + "learning_rate": 6.312884525686871e-08, + "loss": 3.781, + "step": 2228500 + }, + { + "epoch": 24.75, + "learning_rate": 6.311496346311884e-08, + "loss": 3.7779, + "step": 2229000 + }, + { + "epoch": 24.76, + "learning_rate": 6.310108166936898e-08, + "loss": 3.799, + "step": 2229500 + }, + { + "epoch": 24.77, + "learning_rate": 6.308719987561913e-08, + "loss": 3.7793, + "step": 2230000 + }, + { + "epoch": 24.77, + "learning_rate": 6.307331808186927e-08, + "loss": 3.7945, + "step": 2230500 + }, + { + "epoch": 24.78, + "learning_rate": 6.305943628811941e-08, + "loss": 3.7786, + "step": 2231000 + }, + { + "epoch": 24.78, + "learning_rate": 6.304555449436954e-08, + "loss": 3.7963, + "step": 2231500 + }, + { + "epoch": 24.79, + "learning_rate": 6.303167270061968e-08, + "loss": 3.7816, + "step": 2232000 + }, + { + "epoch": 24.79, + "learning_rate": 6.301779090686981e-08, + "loss": 3.7791, + "step": 2232500 + }, + { + "epoch": 24.8, + "learning_rate": 6.300390911311995e-08, + "loss": 3.7793, + "step": 2233000 + }, + { + "epoch": 24.8, + "learning_rate": 6.29900273193701e-08, + "loss": 3.7623, + "step": 2233500 + }, + { + "epoch": 24.81, + "learning_rate": 6.297614552562024e-08, + "loss": 3.7672, + "step": 2234000 + }, + { + "epoch": 24.82, + "learning_rate": 6.296226373187038e-08, + "loss": 3.7618, + "step": 2234500 + }, + { + "epoch": 24.82, + "learning_rate": 6.294838193812051e-08, + "loss": 3.7773, + "step": 2235000 + }, + { + "epoch": 24.83, + "learning_rate": 6.293450014437065e-08, + "loss": 3.7935, + "step": 2235500 + }, + { + "epoch": 24.83, + "learning_rate": 6.29206183506208e-08, + "loss": 3.7926, + "step": 2236000 + }, + { + "epoch": 24.84, + "learning_rate": 6.290673655687094e-08, + "loss": 3.7756, + "step": 2236500 + }, + { + "epoch": 24.84, + "learning_rate": 6.289285476312108e-08, + "loss": 3.7582, + "step": 2237000 + }, + { + "epoch": 24.85, + "learning_rate": 6.287897296937121e-08, + "loss": 3.758, + "step": 2237500 + }, + { + "epoch": 24.85, + "learning_rate": 6.286509117562135e-08, + "loss": 3.7717, + "step": 2238000 + }, + { + "epoch": 24.86, + "learning_rate": 6.285120938187148e-08, + "loss": 3.7701, + "step": 2238500 + }, + { + "epoch": 24.87, + "learning_rate": 6.283732758812162e-08, + "loss": 3.7683, + "step": 2239000 + }, + { + "epoch": 24.87, + "learning_rate": 6.282344579437176e-08, + "loss": 3.7978, + "step": 2239500 + }, + { + "epoch": 24.88, + "learning_rate": 6.28095640006219e-08, + "loss": 3.7998, + "step": 2240000 + }, + { + "epoch": 24.88, + "learning_rate": 6.279568220687203e-08, + "loss": 3.7641, + "step": 2240500 + }, + { + "epoch": 24.89, + "learning_rate": 6.278180041312218e-08, + "loss": 3.798, + "step": 2241000 + }, + { + "epoch": 24.89, + "learning_rate": 6.276791861937232e-08, + "loss": 3.7828, + "step": 2241500 + }, + { + "epoch": 24.9, + "learning_rate": 6.275403682562246e-08, + "loss": 3.7854, + "step": 2242000 + }, + { + "epoch": 24.9, + "learning_rate": 6.27401550318726e-08, + "loss": 3.7846, + "step": 2242500 + }, + { + "epoch": 24.91, + "learning_rate": 6.272627323812275e-08, + "loss": 3.7684, + "step": 2243000 + }, + { + "epoch": 24.92, + "learning_rate": 6.271239144437287e-08, + "loss": 3.7712, + "step": 2243500 + }, + { + "epoch": 24.92, + "learning_rate": 6.269850965062302e-08, + "loss": 3.7804, + "step": 2244000 + }, + { + "epoch": 24.93, + "learning_rate": 6.268462785687315e-08, + "loss": 3.7827, + "step": 2244500 + }, + { + "epoch": 24.93, + "learning_rate": 6.267074606312329e-08, + "loss": 3.7814, + "step": 2245000 + }, + { + "epoch": 24.94, + "learning_rate": 6.265686426937343e-08, + "loss": 3.7627, + "step": 2245500 + }, + { + "epoch": 24.94, + "learning_rate": 6.264298247562356e-08, + "loss": 3.7555, + "step": 2246000 + }, + { + "epoch": 24.95, + "learning_rate": 6.26291006818737e-08, + "loss": 3.804, + "step": 2246500 + }, + { + "epoch": 24.95, + "learning_rate": 6.261521888812384e-08, + "loss": 3.7932, + "step": 2247000 + }, + { + "epoch": 24.96, + "learning_rate": 6.260133709437399e-08, + "loss": 3.7669, + "step": 2247500 + }, + { + "epoch": 24.97, + "learning_rate": 6.258745530062413e-08, + "loss": 3.7754, + "step": 2248000 + }, + { + "epoch": 24.97, + "learning_rate": 6.257357350687427e-08, + "loss": 3.7734, + "step": 2248500 + }, + { + "epoch": 24.98, + "learning_rate": 6.25596917131244e-08, + "loss": 3.7856, + "step": 2249000 + }, + { + "epoch": 24.98, + "learning_rate": 6.254580991937454e-08, + "loss": 3.7783, + "step": 2249500 + }, + { + "epoch": 24.99, + "learning_rate": 6.253192812562467e-08, + "loss": 3.7844, + "step": 2250000 + }, + { + "epoch": 24.99, + "learning_rate": 6.251804633187481e-08, + "loss": 3.7854, + "step": 2250500 + }, + { + "epoch": 25.0, + "learning_rate": 6.250416453812496e-08, + "loss": 3.7824, + "step": 2251000 + }, + { + "epoch": 25.0, + "eval_loss": 3.843087911605835, + "eval_runtime": 6.3026, + "eval_samples_per_second": 246.566, + "step": 2251150 + }, + { + "epoch": 25.0, + "learning_rate": 6.249028274437508e-08, + "loss": 3.7844, + "step": 2251500 + }, + { + "epoch": 25.01, + "learning_rate": 6.247640095062523e-08, + "loss": 3.7535, + "step": 2252000 + }, + { + "epoch": 25.01, + "learning_rate": 6.246251915687537e-08, + "loss": 3.7942, + "step": 2252500 + }, + { + "epoch": 25.02, + "learning_rate": 6.244863736312551e-08, + "loss": 3.7737, + "step": 2253000 + }, + { + "epoch": 25.03, + "learning_rate": 6.243475556937565e-08, + "loss": 3.7851, + "step": 2253500 + }, + { + "epoch": 25.03, + "learning_rate": 6.24208737756258e-08, + "loss": 3.789, + "step": 2254000 + }, + { + "epoch": 25.04, + "learning_rate": 6.240699198187593e-08, + "loss": 3.7767, + "step": 2254500 + }, + { + "epoch": 25.04, + "learning_rate": 6.239311018812607e-08, + "loss": 3.7702, + "step": 2255000 + }, + { + "epoch": 25.05, + "learning_rate": 6.237922839437621e-08, + "loss": 3.7632, + "step": 2255500 + }, + { + "epoch": 25.05, + "learning_rate": 6.236534660062634e-08, + "loss": 3.7774, + "step": 2256000 + }, + { + "epoch": 25.06, + "learning_rate": 6.235146480687648e-08, + "loss": 3.7659, + "step": 2256500 + }, + { + "epoch": 25.06, + "learning_rate": 6.233758301312662e-08, + "loss": 3.7828, + "step": 2257000 + }, + { + "epoch": 25.07, + "learning_rate": 6.232370121937675e-08, + "loss": 3.7881, + "step": 2257500 + }, + { + "epoch": 25.08, + "learning_rate": 6.23098194256269e-08, + "loss": 3.7728, + "step": 2258000 + }, + { + "epoch": 25.08, + "learning_rate": 6.229593763187704e-08, + "loss": 3.7864, + "step": 2258500 + }, + { + "epoch": 25.09, + "learning_rate": 6.228205583812718e-08, + "loss": 3.7813, + "step": 2259000 + }, + { + "epoch": 25.09, + "learning_rate": 6.226817404437732e-08, + "loss": 3.7711, + "step": 2259500 + }, + { + "epoch": 25.1, + "learning_rate": 6.225429225062745e-08, + "loss": 3.7546, + "step": 2260000 + }, + { + "epoch": 25.1, + "learning_rate": 6.224041045687759e-08, + "loss": 3.7778, + "step": 2260500 + }, + { + "epoch": 25.11, + "learning_rate": 6.222652866312773e-08, + "loss": 3.7861, + "step": 2261000 + }, + { + "epoch": 25.11, + "learning_rate": 6.221264686937788e-08, + "loss": 3.7819, + "step": 2261500 + }, + { + "epoch": 25.12, + "learning_rate": 6.2198765075628e-08, + "loss": 3.768, + "step": 2262000 + }, + { + "epoch": 25.13, + "learning_rate": 6.218488328187815e-08, + "loss": 3.7939, + "step": 2262500 + }, + { + "epoch": 25.13, + "learning_rate": 6.217100148812828e-08, + "loss": 3.7827, + "step": 2263000 + }, + { + "epoch": 25.14, + "learning_rate": 6.215711969437842e-08, + "loss": 3.792, + "step": 2263500 + }, + { + "epoch": 25.14, + "learning_rate": 6.214323790062856e-08, + "loss": 3.7551, + "step": 2264000 + }, + { + "epoch": 25.15, + "learning_rate": 6.21293561068787e-08, + "loss": 3.7861, + "step": 2264500 + }, + { + "epoch": 25.15, + "learning_rate": 6.211547431312885e-08, + "loss": 3.7695, + "step": 2265000 + }, + { + "epoch": 25.16, + "learning_rate": 6.210159251937898e-08, + "loss": 3.7738, + "step": 2265500 + }, + { + "epoch": 25.16, + "learning_rate": 6.208771072562912e-08, + "loss": 3.7672, + "step": 2266000 + }, + { + "epoch": 25.17, + "learning_rate": 6.207382893187926e-08, + "loss": 3.7886, + "step": 2266500 + }, + { + "epoch": 25.18, + "learning_rate": 6.20599471381294e-08, + "loss": 3.7748, + "step": 2267000 + }, + { + "epoch": 25.18, + "learning_rate": 6.204606534437954e-08, + "loss": 3.7885, + "step": 2267500 + }, + { + "epoch": 25.19, + "learning_rate": 6.203218355062967e-08, + "loss": 3.7758, + "step": 2268000 + }, + { + "epoch": 25.19, + "learning_rate": 6.201830175687982e-08, + "loss": 3.775, + "step": 2268500 + }, + { + "epoch": 25.2, + "learning_rate": 6.200441996312994e-08, + "loss": 3.7799, + "step": 2269000 + }, + { + "epoch": 25.2, + "learning_rate": 6.199053816938009e-08, + "loss": 3.7692, + "step": 2269500 + }, + { + "epoch": 25.21, + "learning_rate": 6.197665637563023e-08, + "loss": 3.7766, + "step": 2270000 + }, + { + "epoch": 25.21, + "learning_rate": 6.196277458188037e-08, + "loss": 3.7793, + "step": 2270500 + }, + { + "epoch": 25.22, + "learning_rate": 6.194889278813051e-08, + "loss": 3.7781, + "step": 2271000 + }, + { + "epoch": 25.23, + "learning_rate": 6.193501099438064e-08, + "loss": 3.7762, + "step": 2271500 + }, + { + "epoch": 25.23, + "learning_rate": 6.192112920063079e-08, + "loss": 3.7535, + "step": 2272000 + }, + { + "epoch": 25.24, + "learning_rate": 6.190724740688093e-08, + "loss": 3.7742, + "step": 2272500 + }, + { + "epoch": 25.24, + "learning_rate": 6.189336561313107e-08, + "loss": 3.8031, + "step": 2273000 + }, + { + "epoch": 25.25, + "learning_rate": 6.187948381938121e-08, + "loss": 3.7771, + "step": 2273500 + }, + { + "epoch": 25.25, + "learning_rate": 6.186560202563134e-08, + "loss": 3.7847, + "step": 2274000 + }, + { + "epoch": 25.26, + "learning_rate": 6.185172023188148e-08, + "loss": 3.7708, + "step": 2274500 + }, + { + "epoch": 25.26, + "learning_rate": 6.183783843813161e-08, + "loss": 3.7818, + "step": 2275000 + }, + { + "epoch": 25.27, + "learning_rate": 6.182395664438175e-08, + "loss": 3.7806, + "step": 2275500 + }, + { + "epoch": 25.28, + "learning_rate": 6.18100748506319e-08, + "loss": 3.78, + "step": 2276000 + }, + { + "epoch": 25.28, + "learning_rate": 6.179619305688204e-08, + "loss": 3.773, + "step": 2276500 + }, + { + "epoch": 25.29, + "learning_rate": 6.178231126313217e-08, + "loss": 3.7871, + "step": 2277000 + }, + { + "epoch": 25.29, + "learning_rate": 6.176842946938231e-08, + "loss": 3.7756, + "step": 2277500 + }, + { + "epoch": 25.3, + "learning_rate": 6.175454767563245e-08, + "loss": 3.7719, + "step": 2278000 + }, + { + "epoch": 25.3, + "learning_rate": 6.17406658818826e-08, + "loss": 3.7703, + "step": 2278500 + }, + { + "epoch": 25.31, + "learning_rate": 6.172678408813274e-08, + "loss": 3.7784, + "step": 2279000 + }, + { + "epoch": 25.31, + "learning_rate": 6.171290229438288e-08, + "loss": 3.7751, + "step": 2279500 + }, + { + "epoch": 25.32, + "learning_rate": 6.169902050063301e-08, + "loss": 3.7622, + "step": 2280000 + }, + { + "epoch": 25.33, + "learning_rate": 6.168513870688314e-08, + "loss": 3.7569, + "step": 2280500 + }, + { + "epoch": 25.33, + "learning_rate": 6.167125691313328e-08, + "loss": 3.7805, + "step": 2281000 + }, + { + "epoch": 25.34, + "learning_rate": 6.165737511938342e-08, + "loss": 3.7769, + "step": 2281500 + }, + { + "epoch": 25.34, + "learning_rate": 6.164349332563356e-08, + "loss": 3.7592, + "step": 2282000 + }, + { + "epoch": 25.35, + "learning_rate": 6.16296115318837e-08, + "loss": 3.7694, + "step": 2282500 + }, + { + "epoch": 25.35, + "learning_rate": 6.161572973813384e-08, + "loss": 3.771, + "step": 2283000 + }, + { + "epoch": 25.36, + "learning_rate": 6.160184794438398e-08, + "loss": 3.7988, + "step": 2283500 + }, + { + "epoch": 25.36, + "learning_rate": 6.158796615063412e-08, + "loss": 3.7844, + "step": 2284000 + }, + { + "epoch": 25.37, + "learning_rate": 6.157408435688426e-08, + "loss": 3.7726, + "step": 2284500 + }, + { + "epoch": 25.38, + "learning_rate": 6.15602025631344e-08, + "loss": 3.7683, + "step": 2285000 + }, + { + "epoch": 25.38, + "learning_rate": 6.154632076938453e-08, + "loss": 3.7863, + "step": 2285500 + }, + { + "epoch": 25.39, + "learning_rate": 6.153243897563468e-08, + "loss": 3.7739, + "step": 2286000 + }, + { + "epoch": 25.39, + "learning_rate": 6.15185571818848e-08, + "loss": 3.7684, + "step": 2286500 + }, + { + "epoch": 25.4, + "learning_rate": 6.150467538813495e-08, + "loss": 3.7718, + "step": 2287000 + }, + { + "epoch": 25.4, + "learning_rate": 6.149079359438509e-08, + "loss": 3.775, + "step": 2287500 + }, + { + "epoch": 25.41, + "learning_rate": 6.147691180063522e-08, + "loss": 3.7615, + "step": 2288000 + }, + { + "epoch": 25.41, + "learning_rate": 6.146303000688536e-08, + "loss": 3.7717, + "step": 2288500 + }, + { + "epoch": 25.42, + "learning_rate": 6.14491482131355e-08, + "loss": 3.7709, + "step": 2289000 + }, + { + "epoch": 25.43, + "learning_rate": 6.143526641938565e-08, + "loss": 3.7846, + "step": 2289500 + }, + { + "epoch": 25.43, + "learning_rate": 6.142138462563579e-08, + "loss": 3.7955, + "step": 2290000 + }, + { + "epoch": 25.44, + "learning_rate": 6.140750283188593e-08, + "loss": 3.7863, + "step": 2290500 + }, + { + "epoch": 25.44, + "learning_rate": 6.139362103813606e-08, + "loss": 3.7812, + "step": 2291000 + }, + { + "epoch": 25.45, + "learning_rate": 6.13797392443862e-08, + "loss": 3.7727, + "step": 2291500 + }, + { + "epoch": 25.45, + "learning_rate": 6.136585745063634e-08, + "loss": 3.7847, + "step": 2292000 + }, + { + "epoch": 25.46, + "learning_rate": 6.135197565688647e-08, + "loss": 3.7922, + "step": 2292500 + }, + { + "epoch": 25.46, + "learning_rate": 6.133809386313661e-08, + "loss": 3.7741, + "step": 2293000 + }, + { + "epoch": 25.47, + "learning_rate": 6.132421206938676e-08, + "loss": 3.809, + "step": 2293500 + }, + { + "epoch": 25.48, + "learning_rate": 6.131033027563689e-08, + "loss": 3.7726, + "step": 2294000 + }, + { + "epoch": 25.48, + "learning_rate": 6.129644848188703e-08, + "loss": 3.7901, + "step": 2294500 + }, + { + "epoch": 25.49, + "learning_rate": 6.128256668813717e-08, + "loss": 3.7835, + "step": 2295000 + }, + { + "epoch": 25.49, + "learning_rate": 6.126868489438731e-08, + "loss": 3.7768, + "step": 2295500 + }, + { + "epoch": 25.5, + "learning_rate": 6.125480310063746e-08, + "loss": 3.7663, + "step": 2296000 + }, + { + "epoch": 25.5, + "learning_rate": 6.124092130688758e-08, + "loss": 3.7675, + "step": 2296500 + }, + { + "epoch": 25.51, + "learning_rate": 6.122703951313773e-08, + "loss": 3.7719, + "step": 2297000 + }, + { + "epoch": 25.51, + "learning_rate": 6.121315771938787e-08, + "loss": 3.7881, + "step": 2297500 + }, + { + "epoch": 25.52, + "learning_rate": 6.119927592563801e-08, + "loss": 3.7921, + "step": 2298000 + }, + { + "epoch": 25.53, + "learning_rate": 6.118539413188814e-08, + "loss": 3.7846, + "step": 2298500 + }, + { + "epoch": 25.53, + "learning_rate": 6.117151233813828e-08, + "loss": 3.791, + "step": 2299000 + }, + { + "epoch": 25.54, + "learning_rate": 6.115763054438841e-08, + "loss": 3.7707, + "step": 2299500 + }, + { + "epoch": 25.54, + "learning_rate": 6.114374875063855e-08, + "loss": 3.7853, + "step": 2300000 + }, + { + "epoch": 25.55, + "learning_rate": 6.11298669568887e-08, + "loss": 3.7911, + "step": 2300500 + }, + { + "epoch": 25.55, + "learning_rate": 6.111598516313884e-08, + "loss": 3.7801, + "step": 2301000 + }, + { + "epoch": 25.56, + "learning_rate": 6.110210336938898e-08, + "loss": 3.7801, + "step": 2301500 + }, + { + "epoch": 25.56, + "learning_rate": 6.108822157563911e-08, + "loss": 3.7603, + "step": 2302000 + }, + { + "epoch": 25.57, + "learning_rate": 6.107433978188925e-08, + "loss": 3.7835, + "step": 2302500 + }, + { + "epoch": 25.58, + "learning_rate": 6.10604579881394e-08, + "loss": 3.7807, + "step": 2303000 + }, + { + "epoch": 25.58, + "learning_rate": 6.104657619438954e-08, + "loss": 3.7854, + "step": 2303500 + }, + { + "epoch": 25.59, + "learning_rate": 6.103269440063968e-08, + "loss": 3.8086, + "step": 2304000 + }, + { + "epoch": 25.59, + "learning_rate": 6.101881260688981e-08, + "loss": 3.7595, + "step": 2304500 + }, + { + "epoch": 25.6, + "learning_rate": 6.100493081313995e-08, + "loss": 3.7767, + "step": 2305000 + }, + { + "epoch": 25.6, + "learning_rate": 6.099104901939008e-08, + "loss": 3.7811, + "step": 2305500 + }, + { + "epoch": 25.61, + "learning_rate": 6.097716722564022e-08, + "loss": 3.7619, + "step": 2306000 + }, + { + "epoch": 25.61, + "learning_rate": 6.096328543189036e-08, + "loss": 3.7782, + "step": 2306500 + }, + { + "epoch": 25.62, + "learning_rate": 6.09494036381405e-08, + "loss": 3.7546, + "step": 2307000 + }, + { + "epoch": 25.63, + "learning_rate": 6.093552184439065e-08, + "loss": 3.789, + "step": 2307500 + }, + { + "epoch": 25.63, + "learning_rate": 6.092164005064078e-08, + "loss": 3.7785, + "step": 2308000 + }, + { + "epoch": 25.64, + "learning_rate": 6.090775825689092e-08, + "loss": 3.7831, + "step": 2308500 + }, + { + "epoch": 25.64, + "learning_rate": 6.089387646314106e-08, + "loss": 3.7747, + "step": 2309000 + }, + { + "epoch": 25.65, + "learning_rate": 6.08799946693912e-08, + "loss": 3.7653, + "step": 2309500 + }, + { + "epoch": 25.65, + "learning_rate": 6.086611287564135e-08, + "loss": 3.7682, + "step": 2310000 + }, + { + "epoch": 25.66, + "learning_rate": 6.085223108189147e-08, + "loss": 3.7801, + "step": 2310500 + }, + { + "epoch": 25.66, + "learning_rate": 6.08383492881416e-08, + "loss": 3.7543, + "step": 2311000 + }, + { + "epoch": 25.67, + "learning_rate": 6.082446749439175e-08, + "loss": 3.8091, + "step": 2311500 + }, + { + "epoch": 25.68, + "learning_rate": 6.081058570064189e-08, + "loss": 3.7603, + "step": 2312000 + }, + { + "epoch": 25.68, + "learning_rate": 6.079670390689203e-08, + "loss": 3.7774, + "step": 2312500 + }, + { + "epoch": 25.69, + "learning_rate": 6.078282211314217e-08, + "loss": 3.7646, + "step": 2313000 + }, + { + "epoch": 25.69, + "learning_rate": 6.07689403193923e-08, + "loss": 3.7538, + "step": 2313500 + }, + { + "epoch": 25.7, + "learning_rate": 6.075505852564244e-08, + "loss": 3.7725, + "step": 2314000 + }, + { + "epoch": 25.7, + "learning_rate": 6.074117673189259e-08, + "loss": 3.7809, + "step": 2314500 + }, + { + "epoch": 25.71, + "learning_rate": 6.072729493814273e-08, + "loss": 3.7843, + "step": 2315000 + }, + { + "epoch": 25.71, + "learning_rate": 6.071341314439287e-08, + "loss": 3.7941, + "step": 2315500 + }, + { + "epoch": 25.72, + "learning_rate": 6.069953135064301e-08, + "loss": 3.7652, + "step": 2316000 + }, + { + "epoch": 25.73, + "learning_rate": 6.068564955689314e-08, + "loss": 3.7785, + "step": 2316500 + }, + { + "epoch": 25.73, + "learning_rate": 6.067176776314327e-08, + "loss": 3.7539, + "step": 2317000 + }, + { + "epoch": 25.74, + "learning_rate": 6.065788596939341e-08, + "loss": 3.7901, + "step": 2317500 + }, + { + "epoch": 25.74, + "learning_rate": 6.064400417564356e-08, + "loss": 3.7908, + "step": 2318000 + }, + { + "epoch": 25.75, + "learning_rate": 6.06301223818937e-08, + "loss": 3.7813, + "step": 2318500 + }, + { + "epoch": 25.75, + "learning_rate": 6.061624058814383e-08, + "loss": 3.7818, + "step": 2319000 + }, + { + "epoch": 25.76, + "learning_rate": 6.060235879439397e-08, + "loss": 3.7954, + "step": 2319500 + }, + { + "epoch": 25.76, + "learning_rate": 6.058847700064411e-08, + "loss": 3.7612, + "step": 2320000 + }, + { + "epoch": 25.77, + "learning_rate": 6.057459520689425e-08, + "loss": 3.7849, + "step": 2320500 + }, + { + "epoch": 25.78, + "learning_rate": 6.05607134131444e-08, + "loss": 3.793, + "step": 2321000 + }, + { + "epoch": 25.78, + "learning_rate": 6.054683161939454e-08, + "loss": 3.7967, + "step": 2321500 + }, + { + "epoch": 25.79, + "learning_rate": 6.053294982564467e-08, + "loss": 3.7936, + "step": 2322000 + }, + { + "epoch": 25.79, + "learning_rate": 6.051906803189481e-08, + "loss": 3.7671, + "step": 2322500 + }, + { + "epoch": 25.8, + "learning_rate": 6.050518623814494e-08, + "loss": 3.7697, + "step": 2323000 + }, + { + "epoch": 25.8, + "learning_rate": 6.049130444439508e-08, + "loss": 3.7808, + "step": 2323500 + }, + { + "epoch": 25.81, + "learning_rate": 6.047742265064522e-08, + "loss": 3.7815, + "step": 2324000 + }, + { + "epoch": 25.81, + "learning_rate": 6.046354085689535e-08, + "loss": 3.77, + "step": 2324500 + }, + { + "epoch": 25.82, + "learning_rate": 6.04496590631455e-08, + "loss": 3.7696, + "step": 2325000 + }, + { + "epoch": 25.83, + "learning_rate": 6.043577726939564e-08, + "loss": 3.7941, + "step": 2325500 + }, + { + "epoch": 25.83, + "learning_rate": 6.042189547564578e-08, + "loss": 3.7838, + "step": 2326000 + }, + { + "epoch": 25.84, + "learning_rate": 6.040801368189592e-08, + "loss": 3.7897, + "step": 2326500 + }, + { + "epoch": 25.84, + "learning_rate": 6.039413188814606e-08, + "loss": 3.7522, + "step": 2327000 + }, + { + "epoch": 25.85, + "learning_rate": 6.038025009439619e-08, + "loss": 3.7578, + "step": 2327500 + }, + { + "epoch": 25.85, + "learning_rate": 6.036636830064634e-08, + "loss": 3.7898, + "step": 2328000 + }, + { + "epoch": 25.86, + "learning_rate": 6.035248650689648e-08, + "loss": 3.7778, + "step": 2328500 + }, + { + "epoch": 25.86, + "learning_rate": 6.03386047131466e-08, + "loss": 3.7821, + "step": 2329000 + }, + { + "epoch": 25.87, + "learning_rate": 6.032472291939675e-08, + "loss": 3.7623, + "step": 2329500 + }, + { + "epoch": 25.88, + "learning_rate": 6.031084112564689e-08, + "loss": 3.7655, + "step": 2330000 + }, + { + "epoch": 25.88, + "learning_rate": 6.029695933189702e-08, + "loss": 3.7794, + "step": 2330500 + }, + { + "epoch": 25.89, + "learning_rate": 6.028307753814716e-08, + "loss": 3.7804, + "step": 2331000 + }, + { + "epoch": 25.89, + "learning_rate": 6.02691957443973e-08, + "loss": 3.787, + "step": 2331500 + }, + { + "epoch": 25.9, + "learning_rate": 6.025531395064745e-08, + "loss": 3.7841, + "step": 2332000 + }, + { + "epoch": 25.9, + "learning_rate": 6.024143215689759e-08, + "loss": 3.7678, + "step": 2332500 + }, + { + "epoch": 25.91, + "learning_rate": 6.022755036314772e-08, + "loss": 3.7612, + "step": 2333000 + }, + { + "epoch": 25.91, + "learning_rate": 6.021366856939786e-08, + "loss": 3.7752, + "step": 2333500 + }, + { + "epoch": 25.92, + "learning_rate": 6.0199786775648e-08, + "loss": 3.7913, + "step": 2334000 + }, + { + "epoch": 25.93, + "learning_rate": 6.018590498189814e-08, + "loss": 3.7827, + "step": 2334500 + }, + { + "epoch": 25.93, + "learning_rate": 6.017202318814827e-08, + "loss": 3.7714, + "step": 2335000 + }, + { + "epoch": 25.94, + "learning_rate": 6.015814139439842e-08, + "loss": 3.7884, + "step": 2335500 + }, + { + "epoch": 25.94, + "learning_rate": 6.014425960064855e-08, + "loss": 3.7875, + "step": 2336000 + }, + { + "epoch": 25.95, + "learning_rate": 6.013037780689869e-08, + "loss": 3.7807, + "step": 2336500 + }, + { + "epoch": 25.95, + "learning_rate": 6.011649601314883e-08, + "loss": 3.7625, + "step": 2337000 + }, + { + "epoch": 25.96, + "learning_rate": 6.010261421939897e-08, + "loss": 3.7606, + "step": 2337500 + }, + { + "epoch": 25.96, + "learning_rate": 6.008873242564911e-08, + "loss": 3.7747, + "step": 2338000 + }, + { + "epoch": 25.97, + "learning_rate": 6.007485063189926e-08, + "loss": 3.7736, + "step": 2338500 + }, + { + "epoch": 25.98, + "learning_rate": 6.006096883814939e-08, + "loss": 3.7746, + "step": 2339000 + }, + { + "epoch": 25.98, + "learning_rate": 6.004708704439953e-08, + "loss": 3.7684, + "step": 2339500 + }, + { + "epoch": 25.99, + "learning_rate": 6.003320525064967e-08, + "loss": 3.7687, + "step": 2340000 + }, + { + "epoch": 25.99, + "learning_rate": 6.001932345689981e-08, + "loss": 3.7736, + "step": 2340500 + }, + { + "epoch": 26.0, + "learning_rate": 6.000544166314994e-08, + "loss": 3.7579, + "step": 2341000 + }, + { + "epoch": 26.0, + "eval_loss": 3.8411214351654053, + "eval_runtime": 6.2991, + "eval_samples_per_second": 246.7, + "step": 2341196 + }, + { + "epoch": 26.0, + "learning_rate": 5.999155986940007e-08, + "loss": 3.7662, + "step": 2341500 + }, + { + "epoch": 26.01, + "learning_rate": 5.997767807565021e-08, + "loss": 3.785, + "step": 2342000 + }, + { + "epoch": 26.01, + "learning_rate": 5.996379628190035e-08, + "loss": 3.7827, + "step": 2342500 + }, + { + "epoch": 26.02, + "learning_rate": 5.99499144881505e-08, + "loss": 3.7691, + "step": 2343000 + }, + { + "epoch": 26.03, + "learning_rate": 5.993603269440064e-08, + "loss": 3.7776, + "step": 2343500 + }, + { + "epoch": 26.03, + "learning_rate": 5.992215090065078e-08, + "loss": 3.7821, + "step": 2344000 + }, + { + "epoch": 26.04, + "learning_rate": 5.990826910690091e-08, + "loss": 3.7668, + "step": 2344500 + }, + { + "epoch": 26.04, + "learning_rate": 5.989438731315105e-08, + "loss": 3.7789, + "step": 2345000 + }, + { + "epoch": 26.05, + "learning_rate": 5.98805055194012e-08, + "loss": 3.7851, + "step": 2345500 + }, + { + "epoch": 26.05, + "learning_rate": 5.986662372565134e-08, + "loss": 3.7689, + "step": 2346000 + }, + { + "epoch": 26.06, + "learning_rate": 5.985274193190148e-08, + "loss": 3.7674, + "step": 2346500 + }, + { + "epoch": 26.06, + "learning_rate": 5.983886013815161e-08, + "loss": 3.773, + "step": 2347000 + }, + { + "epoch": 26.07, + "learning_rate": 5.982497834440174e-08, + "loss": 3.7795, + "step": 2347500 + }, + { + "epoch": 26.08, + "learning_rate": 5.981109655065188e-08, + "loss": 3.7839, + "step": 2348000 + }, + { + "epoch": 26.08, + "learning_rate": 5.979721475690202e-08, + "loss": 3.7577, + "step": 2348500 + }, + { + "epoch": 26.09, + "learning_rate": 5.978333296315216e-08, + "loss": 3.7677, + "step": 2349000 + }, + { + "epoch": 26.09, + "learning_rate": 5.976945116940231e-08, + "loss": 3.7729, + "step": 2349500 + }, + { + "epoch": 26.1, + "learning_rate": 5.975556937565244e-08, + "loss": 3.7759, + "step": 2350000 + }, + { + "epoch": 26.1, + "learning_rate": 5.974168758190258e-08, + "loss": 3.7739, + "step": 2350500 + }, + { + "epoch": 26.11, + "learning_rate": 5.972780578815272e-08, + "loss": 3.7726, + "step": 2351000 + }, + { + "epoch": 26.11, + "learning_rate": 5.971392399440286e-08, + "loss": 3.7724, + "step": 2351500 + }, + { + "epoch": 26.12, + "learning_rate": 5.9700042200653e-08, + "loss": 3.7776, + "step": 2352000 + }, + { + "epoch": 26.13, + "learning_rate": 5.968616040690315e-08, + "loss": 3.7806, + "step": 2352500 + }, + { + "epoch": 26.13, + "learning_rate": 5.967227861315328e-08, + "loss": 3.7977, + "step": 2353000 + }, + { + "epoch": 26.14, + "learning_rate": 5.96583968194034e-08, + "loss": 3.7766, + "step": 2353500 + }, + { + "epoch": 26.14, + "learning_rate": 5.964451502565355e-08, + "loss": 3.7802, + "step": 2354000 + }, + { + "epoch": 26.15, + "learning_rate": 5.963063323190369e-08, + "loss": 3.7702, + "step": 2354500 + }, + { + "epoch": 26.15, + "learning_rate": 5.961675143815383e-08, + "loss": 3.7937, + "step": 2355000 + }, + { + "epoch": 26.16, + "learning_rate": 5.960286964440396e-08, + "loss": 3.7515, + "step": 2355500 + }, + { + "epoch": 26.16, + "learning_rate": 5.9588987850654103e-08, + "loss": 3.7776, + "step": 2356000 + }, + { + "epoch": 26.17, + "learning_rate": 5.9575106056904246e-08, + "loss": 3.7774, + "step": 2356500 + }, + { + "epoch": 26.18, + "learning_rate": 5.956122426315439e-08, + "loss": 3.7552, + "step": 2357000 + }, + { + "epoch": 26.18, + "learning_rate": 5.9547342469404524e-08, + "loss": 3.7695, + "step": 2357500 + }, + { + "epoch": 26.19, + "learning_rate": 5.9533460675654666e-08, + "loss": 3.7692, + "step": 2358000 + }, + { + "epoch": 26.19, + "learning_rate": 5.9519578881904795e-08, + "loss": 3.7857, + "step": 2358500 + }, + { + "epoch": 26.2, + "learning_rate": 5.950569708815494e-08, + "loss": 3.7701, + "step": 2359000 + }, + { + "epoch": 26.2, + "learning_rate": 5.949181529440508e-08, + "loss": 3.7657, + "step": 2359500 + }, + { + "epoch": 26.21, + "learning_rate": 5.947793350065522e-08, + "loss": 3.7755, + "step": 2360000 + }, + { + "epoch": 26.21, + "learning_rate": 5.946405170690536e-08, + "loss": 3.7925, + "step": 2360500 + }, + { + "epoch": 26.22, + "learning_rate": 5.945016991315549e-08, + "loss": 3.7727, + "step": 2361000 + }, + { + "epoch": 26.23, + "learning_rate": 5.943628811940563e-08, + "loss": 3.7822, + "step": 2361500 + }, + { + "epoch": 26.23, + "learning_rate": 5.942240632565577e-08, + "loss": 3.7901, + "step": 2362000 + }, + { + "epoch": 26.24, + "learning_rate": 5.940852453190591e-08, + "loss": 3.7575, + "step": 2362500 + }, + { + "epoch": 26.24, + "learning_rate": 5.9394642738156055e-08, + "loss": 3.7709, + "step": 2363000 + }, + { + "epoch": 26.25, + "learning_rate": 5.938076094440619e-08, + "loss": 3.7666, + "step": 2363500 + }, + { + "epoch": 26.25, + "learning_rate": 5.9366879150656327e-08, + "loss": 3.7754, + "step": 2364000 + }, + { + "epoch": 26.26, + "learning_rate": 5.935299735690646e-08, + "loss": 3.7759, + "step": 2364500 + }, + { + "epoch": 26.26, + "learning_rate": 5.9339115563156605e-08, + "loss": 3.7902, + "step": 2365000 + }, + { + "epoch": 26.27, + "learning_rate": 5.932523376940675e-08, + "loss": 3.7736, + "step": 2365500 + }, + { + "epoch": 26.28, + "learning_rate": 5.931135197565689e-08, + "loss": 3.7741, + "step": 2366000 + }, + { + "epoch": 26.28, + "learning_rate": 5.9297470181907025e-08, + "loss": 3.7795, + "step": 2366500 + }, + { + "epoch": 26.29, + "learning_rate": 5.928358838815716e-08, + "loss": 3.7559, + "step": 2367000 + }, + { + "epoch": 26.29, + "learning_rate": 5.9269706594407296e-08, + "loss": 3.7731, + "step": 2367500 + }, + { + "epoch": 26.3, + "learning_rate": 5.925582480065744e-08, + "loss": 3.7804, + "step": 2368000 + }, + { + "epoch": 26.3, + "learning_rate": 5.924194300690758e-08, + "loss": 3.7877, + "step": 2368500 + }, + { + "epoch": 26.31, + "learning_rate": 5.922806121315772e-08, + "loss": 3.771, + "step": 2369000 + }, + { + "epoch": 26.31, + "learning_rate": 5.921417941940785e-08, + "loss": 3.76, + "step": 2369500 + }, + { + "epoch": 26.32, + "learning_rate": 5.9200297625657994e-08, + "loss": 3.7905, + "step": 2370000 + }, + { + "epoch": 26.33, + "learning_rate": 5.918641583190813e-08, + "loss": 3.7537, + "step": 2370500 + }, + { + "epoch": 26.33, + "learning_rate": 5.917253403815827e-08, + "loss": 3.7657, + "step": 2371000 + }, + { + "epoch": 26.34, + "learning_rate": 5.9158652244408414e-08, + "loss": 3.7689, + "step": 2371500 + }, + { + "epoch": 26.34, + "learning_rate": 5.9144770450658557e-08, + "loss": 3.7825, + "step": 2372000 + }, + { + "epoch": 26.35, + "learning_rate": 5.9130888656908686e-08, + "loss": 3.778, + "step": 2372500 + }, + { + "epoch": 26.35, + "learning_rate": 5.911700686315883e-08, + "loss": 3.7542, + "step": 2373000 + }, + { + "epoch": 26.36, + "learning_rate": 5.9103125069408963e-08, + "loss": 3.7827, + "step": 2373500 + }, + { + "epoch": 26.36, + "learning_rate": 5.9089243275659106e-08, + "loss": 3.7815, + "step": 2374000 + }, + { + "epoch": 26.37, + "learning_rate": 5.907536148190925e-08, + "loss": 3.7804, + "step": 2374500 + }, + { + "epoch": 26.38, + "learning_rate": 5.906147968815939e-08, + "loss": 3.7731, + "step": 2375000 + }, + { + "epoch": 26.38, + "learning_rate": 5.904759789440952e-08, + "loss": 3.765, + "step": 2375500 + }, + { + "epoch": 26.39, + "learning_rate": 5.903371610065966e-08, + "loss": 3.7802, + "step": 2376000 + }, + { + "epoch": 26.39, + "learning_rate": 5.90198343069098e-08, + "loss": 3.7828, + "step": 2376500 + }, + { + "epoch": 26.4, + "learning_rate": 5.900595251315994e-08, + "loss": 3.7764, + "step": 2377000 + }, + { + "epoch": 26.4, + "learning_rate": 5.899207071941008e-08, + "loss": 3.7724, + "step": 2377500 + }, + { + "epoch": 26.41, + "learning_rate": 5.897818892566021e-08, + "loss": 3.7816, + "step": 2378000 + }, + { + "epoch": 26.41, + "learning_rate": 5.896430713191035e-08, + "loss": 3.7665, + "step": 2378500 + }, + { + "epoch": 26.42, + "learning_rate": 5.8950425338160495e-08, + "loss": 3.7869, + "step": 2379000 + }, + { + "epoch": 26.43, + "learning_rate": 5.893654354441063e-08, + "loss": 3.7587, + "step": 2379500 + }, + { + "epoch": 26.43, + "learning_rate": 5.892266175066077e-08, + "loss": 3.7765, + "step": 2380000 + }, + { + "epoch": 26.44, + "learning_rate": 5.8908779956910915e-08, + "loss": 3.7869, + "step": 2380500 + }, + { + "epoch": 26.44, + "learning_rate": 5.8894898163161044e-08, + "loss": 3.7881, + "step": 2381000 + }, + { + "epoch": 26.45, + "learning_rate": 5.888101636941119e-08, + "loss": 3.7907, + "step": 2381500 + }, + { + "epoch": 26.45, + "learning_rate": 5.886713457566133e-08, + "loss": 3.775, + "step": 2382000 + }, + { + "epoch": 26.46, + "learning_rate": 5.8853252781911465e-08, + "loss": 3.7772, + "step": 2382500 + }, + { + "epoch": 26.46, + "learning_rate": 5.883937098816161e-08, + "loss": 3.776, + "step": 2383000 + }, + { + "epoch": 26.47, + "learning_rate": 5.8825489194411736e-08, + "loss": 3.7497, + "step": 2383500 + }, + { + "epoch": 26.48, + "learning_rate": 5.881160740066188e-08, + "loss": 3.7557, + "step": 2384000 + }, + { + "epoch": 26.48, + "learning_rate": 5.879772560691202e-08, + "loss": 3.7666, + "step": 2384500 + }, + { + "epoch": 26.49, + "learning_rate": 5.878384381316216e-08, + "loss": 3.7546, + "step": 2385000 + }, + { + "epoch": 26.49, + "learning_rate": 5.87699620194123e-08, + "loss": 3.7955, + "step": 2385500 + }, + { + "epoch": 26.5, + "learning_rate": 5.875608022566244e-08, + "loss": 3.7892, + "step": 2386000 + }, + { + "epoch": 26.5, + "learning_rate": 5.874219843191257e-08, + "loss": 3.8018, + "step": 2386500 + }, + { + "epoch": 26.51, + "learning_rate": 5.872831663816271e-08, + "loss": 3.7711, + "step": 2387000 + }, + { + "epoch": 26.51, + "learning_rate": 5.8714434844412854e-08, + "loss": 3.7864, + "step": 2387500 + }, + { + "epoch": 26.52, + "learning_rate": 5.870055305066299e-08, + "loss": 3.7797, + "step": 2388000 + }, + { + "epoch": 26.53, + "learning_rate": 5.868667125691313e-08, + "loss": 3.7586, + "step": 2388500 + }, + { + "epoch": 26.53, + "learning_rate": 5.8672789463163274e-08, + "loss": 3.7873, + "step": 2389000 + }, + { + "epoch": 26.54, + "learning_rate": 5.8658907669413403e-08, + "loss": 3.7757, + "step": 2389500 + }, + { + "epoch": 26.54, + "learning_rate": 5.8645025875663546e-08, + "loss": 3.7946, + "step": 2390000 + }, + { + "epoch": 26.55, + "learning_rate": 5.863114408191369e-08, + "loss": 3.7511, + "step": 2390500 + }, + { + "epoch": 26.55, + "learning_rate": 5.8617262288163824e-08, + "loss": 3.7815, + "step": 2391000 + }, + { + "epoch": 26.56, + "learning_rate": 5.8603380494413966e-08, + "loss": 3.7808, + "step": 2391500 + }, + { + "epoch": 26.56, + "learning_rate": 5.8589498700664095e-08, + "loss": 3.7613, + "step": 2392000 + }, + { + "epoch": 26.57, + "learning_rate": 5.857561690691424e-08, + "loss": 3.7756, + "step": 2392500 + }, + { + "epoch": 26.58, + "learning_rate": 5.856173511316438e-08, + "loss": 3.7898, + "step": 2393000 + }, + { + "epoch": 26.58, + "learning_rate": 5.854785331941452e-08, + "loss": 3.7744, + "step": 2393500 + }, + { + "epoch": 26.59, + "learning_rate": 5.853397152566466e-08, + "loss": 3.7509, + "step": 2394000 + }, + { + "epoch": 26.59, + "learning_rate": 5.85200897319148e-08, + "loss": 3.7573, + "step": 2394500 + }, + { + "epoch": 26.6, + "learning_rate": 5.850620793816493e-08, + "loss": 3.7545, + "step": 2395000 + }, + { + "epoch": 26.6, + "learning_rate": 5.849232614441507e-08, + "loss": 3.7856, + "step": 2395500 + }, + { + "epoch": 26.61, + "learning_rate": 5.847844435066521e-08, + "loss": 3.7902, + "step": 2396000 + }, + { + "epoch": 26.61, + "learning_rate": 5.8464562556915355e-08, + "loss": 3.7875, + "step": 2396500 + }, + { + "epoch": 26.62, + "learning_rate": 5.845068076316549e-08, + "loss": 3.7675, + "step": 2397000 + }, + { + "epoch": 26.63, + "learning_rate": 5.8436798969415633e-08, + "loss": 3.7775, + "step": 2397500 + }, + { + "epoch": 26.63, + "learning_rate": 5.842291717566576e-08, + "loss": 3.7969, + "step": 2398000 + }, + { + "epoch": 26.64, + "learning_rate": 5.8409035381915905e-08, + "loss": 3.7738, + "step": 2398500 + }, + { + "epoch": 26.64, + "learning_rate": 5.839515358816605e-08, + "loss": 3.775, + "step": 2399000 + }, + { + "epoch": 26.65, + "learning_rate": 5.838127179441619e-08, + "loss": 3.7892, + "step": 2399500 + }, + { + "epoch": 26.65, + "learning_rate": 5.8367390000666325e-08, + "loss": 3.7576, + "step": 2400000 + }, + { + "epoch": 26.66, + "learning_rate": 5.835350820691646e-08, + "loss": 3.7613, + "step": 2400500 + }, + { + "epoch": 26.66, + "learning_rate": 5.8339626413166596e-08, + "loss": 3.7818, + "step": 2401000 + }, + { + "epoch": 26.67, + "learning_rate": 5.832574461941674e-08, + "loss": 3.7678, + "step": 2401500 + }, + { + "epoch": 26.68, + "learning_rate": 5.831186282566688e-08, + "loss": 3.7704, + "step": 2402000 + }, + { + "epoch": 26.68, + "learning_rate": 5.829798103191702e-08, + "loss": 3.7732, + "step": 2402500 + }, + { + "epoch": 26.69, + "learning_rate": 5.828409923816716e-08, + "loss": 3.7856, + "step": 2403000 + }, + { + "epoch": 26.69, + "learning_rate": 5.8270217444417294e-08, + "loss": 3.753, + "step": 2403500 + }, + { + "epoch": 26.7, + "learning_rate": 5.825633565066743e-08, + "loss": 3.7683, + "step": 2404000 + }, + { + "epoch": 26.7, + "learning_rate": 5.824245385691757e-08, + "loss": 3.7785, + "step": 2404500 + }, + { + "epoch": 26.71, + "learning_rate": 5.8228572063167714e-08, + "loss": 3.7612, + "step": 2405000 + }, + { + "epoch": 26.71, + "learning_rate": 5.8214690269417857e-08, + "loss": 3.8029, + "step": 2405500 + }, + { + "epoch": 26.72, + "learning_rate": 5.8200808475667986e-08, + "loss": 3.7592, + "step": 2406000 + }, + { + "epoch": 26.73, + "learning_rate": 5.818692668191813e-08, + "loss": 3.7703, + "step": 2406500 + }, + { + "epoch": 26.73, + "learning_rate": 5.8173044888168264e-08, + "loss": 3.8063, + "step": 2407000 + }, + { + "epoch": 26.74, + "learning_rate": 5.8159163094418406e-08, + "loss": 3.7556, + "step": 2407500 + }, + { + "epoch": 26.74, + "learning_rate": 5.814528130066855e-08, + "loss": 3.7629, + "step": 2408000 + }, + { + "epoch": 26.75, + "learning_rate": 5.813139950691869e-08, + "loss": 3.7713, + "step": 2408500 + }, + { + "epoch": 26.75, + "learning_rate": 5.811751771316882e-08, + "loss": 3.7854, + "step": 2409000 + }, + { + "epoch": 26.76, + "learning_rate": 5.810363591941896e-08, + "loss": 3.7742, + "step": 2409500 + }, + { + "epoch": 26.76, + "learning_rate": 5.80897541256691e-08, + "loss": 3.7741, + "step": 2410000 + }, + { + "epoch": 26.77, + "learning_rate": 5.807587233191924e-08, + "loss": 3.7968, + "step": 2410500 + }, + { + "epoch": 26.78, + "learning_rate": 5.806199053816938e-08, + "loss": 3.786, + "step": 2411000 + }, + { + "epoch": 26.78, + "learning_rate": 5.8048108744419524e-08, + "loss": 3.7779, + "step": 2411500 + }, + { + "epoch": 26.79, + "learning_rate": 5.803422695066965e-08, + "loss": 3.7668, + "step": 2412000 + }, + { + "epoch": 26.79, + "learning_rate": 5.8020345156919795e-08, + "loss": 3.7873, + "step": 2412500 + }, + { + "epoch": 26.8, + "learning_rate": 5.800646336316993e-08, + "loss": 3.7635, + "step": 2413000 + }, + { + "epoch": 26.8, + "learning_rate": 5.799258156942007e-08, + "loss": 3.7599, + "step": 2413500 + }, + { + "epoch": 26.81, + "learning_rate": 5.7978699775670216e-08, + "loss": 3.7678, + "step": 2414000 + }, + { + "epoch": 26.81, + "learning_rate": 5.7964817981920345e-08, + "loss": 3.794, + "step": 2414500 + }, + { + "epoch": 26.82, + "learning_rate": 5.795093618817049e-08, + "loss": 3.7688, + "step": 2415000 + }, + { + "epoch": 26.83, + "learning_rate": 5.793705439442063e-08, + "loss": 3.7707, + "step": 2415500 + }, + { + "epoch": 26.83, + "learning_rate": 5.7923172600670765e-08, + "loss": 3.7452, + "step": 2416000 + }, + { + "epoch": 26.84, + "learning_rate": 5.790929080692091e-08, + "loss": 3.7764, + "step": 2416500 + }, + { + "epoch": 26.84, + "learning_rate": 5.789540901317105e-08, + "loss": 3.7758, + "step": 2417000 + }, + { + "epoch": 26.85, + "learning_rate": 5.788152721942118e-08, + "loss": 3.7747, + "step": 2417500 + }, + { + "epoch": 26.85, + "learning_rate": 5.786764542567132e-08, + "loss": 3.7881, + "step": 2418000 + }, + { + "epoch": 26.86, + "learning_rate": 5.7853763631921456e-08, + "loss": 3.7616, + "step": 2418500 + }, + { + "epoch": 26.86, + "learning_rate": 5.78398818381716e-08, + "loss": 3.7473, + "step": 2419000 + }, + { + "epoch": 26.87, + "learning_rate": 5.782600004442174e-08, + "loss": 3.7767, + "step": 2419500 + }, + { + "epoch": 26.88, + "learning_rate": 5.781211825067187e-08, + "loss": 3.7812, + "step": 2420000 + }, + { + "epoch": 26.88, + "learning_rate": 5.779823645692201e-08, + "loss": 3.7537, + "step": 2420500 + }, + { + "epoch": 26.89, + "learning_rate": 5.7784354663172154e-08, + "loss": 3.7691, + "step": 2421000 + }, + { + "epoch": 26.89, + "learning_rate": 5.777047286942229e-08, + "loss": 3.7589, + "step": 2421500 + }, + { + "epoch": 26.9, + "learning_rate": 5.775659107567243e-08, + "loss": 3.7915, + "step": 2422000 + }, + { + "epoch": 26.9, + "learning_rate": 5.7742709281922574e-08, + "loss": 3.7548, + "step": 2422500 + }, + { + "epoch": 26.91, + "learning_rate": 5.7728827488172704e-08, + "loss": 3.7735, + "step": 2423000 + }, + { + "epoch": 26.91, + "learning_rate": 5.7714945694422846e-08, + "loss": 3.7637, + "step": 2423500 + }, + { + "epoch": 26.92, + "learning_rate": 5.770106390067299e-08, + "loss": 3.7853, + "step": 2424000 + }, + { + "epoch": 26.93, + "learning_rate": 5.7687182106923124e-08, + "loss": 3.7713, + "step": 2424500 + }, + { + "epoch": 26.93, + "learning_rate": 5.7673300313173266e-08, + "loss": 3.769, + "step": 2425000 + }, + { + "epoch": 26.94, + "learning_rate": 5.765941851942341e-08, + "loss": 3.7731, + "step": 2425500 + }, + { + "epoch": 26.94, + "learning_rate": 5.764553672567354e-08, + "loss": 3.7869, + "step": 2426000 + }, + { + "epoch": 26.95, + "learning_rate": 5.763165493192368e-08, + "loss": 3.7782, + "step": 2426500 + }, + { + "epoch": 26.95, + "learning_rate": 5.761777313817382e-08, + "loss": 3.7669, + "step": 2427000 + }, + { + "epoch": 26.96, + "learning_rate": 5.760389134442396e-08, + "loss": 3.7904, + "step": 2427500 + }, + { + "epoch": 26.96, + "learning_rate": 5.75900095506741e-08, + "loss": 3.7676, + "step": 2428000 + }, + { + "epoch": 26.97, + "learning_rate": 5.757612775692423e-08, + "loss": 3.7673, + "step": 2428500 + }, + { + "epoch": 26.98, + "learning_rate": 5.756224596317437e-08, + "loss": 3.7503, + "step": 2429000 + }, + { + "epoch": 26.98, + "learning_rate": 5.754836416942451e-08, + "loss": 3.7921, + "step": 2429500 + }, + { + "epoch": 26.99, + "learning_rate": 5.7534482375674656e-08, + "loss": 3.7747, + "step": 2430000 + }, + { + "epoch": 26.99, + "learning_rate": 5.752060058192479e-08, + "loss": 3.7642, + "step": 2430500 + }, + { + "epoch": 27.0, + "learning_rate": 5.7506718788174933e-08, + "loss": 3.7778, + "step": 2431000 + }, + { + "epoch": 27.0, + "eval_loss": 3.839319944381714, + "eval_runtime": 6.3037, + "eval_samples_per_second": 246.521, + "step": 2431242 + }, + { + "epoch": 27.0, + "learning_rate": 5.749283699442506e-08, + "loss": 3.7644, + "step": 2431500 + }, + { + "epoch": 27.01, + "learning_rate": 5.7478955200675205e-08, + "loss": 3.7694, + "step": 2432000 + }, + { + "epoch": 27.01, + "learning_rate": 5.746507340692535e-08, + "loss": 3.7794, + "step": 2432500 + }, + { + "epoch": 27.02, + "learning_rate": 5.745119161317549e-08, + "loss": 3.7575, + "step": 2433000 + }, + { + "epoch": 27.03, + "learning_rate": 5.7437309819425625e-08, + "loss": 3.786, + "step": 2433500 + }, + { + "epoch": 27.03, + "learning_rate": 5.742342802567577e-08, + "loss": 3.7761, + "step": 2434000 + }, + { + "epoch": 27.04, + "learning_rate": 5.7409546231925896e-08, + "loss": 3.7711, + "step": 2434500 + }, + { + "epoch": 27.04, + "learning_rate": 5.739566443817604e-08, + "loss": 3.7599, + "step": 2435000 + }, + { + "epoch": 27.05, + "learning_rate": 5.738178264442618e-08, + "loss": 3.7726, + "step": 2435500 + }, + { + "epoch": 27.05, + "learning_rate": 5.736790085067632e-08, + "loss": 3.7816, + "step": 2436000 + }, + { + "epoch": 27.06, + "learning_rate": 5.735401905692646e-08, + "loss": 3.7926, + "step": 2436500 + }, + { + "epoch": 27.06, + "learning_rate": 5.7340137263176594e-08, + "loss": 3.77, + "step": 2437000 + }, + { + "epoch": 27.07, + "learning_rate": 5.732625546942673e-08, + "loss": 3.7489, + "step": 2437500 + }, + { + "epoch": 27.08, + "learning_rate": 5.731237367567687e-08, + "loss": 3.7666, + "step": 2438000 + }, + { + "epoch": 27.08, + "learning_rate": 5.7298491881927014e-08, + "loss": 3.7658, + "step": 2438500 + }, + { + "epoch": 27.09, + "learning_rate": 5.728461008817716e-08, + "loss": 3.785, + "step": 2439000 + }, + { + "epoch": 27.09, + "learning_rate": 5.727072829442729e-08, + "loss": 3.7628, + "step": 2439500 + }, + { + "epoch": 27.1, + "learning_rate": 5.725684650067743e-08, + "loss": 3.7657, + "step": 2440000 + }, + { + "epoch": 27.1, + "learning_rate": 5.7242964706927564e-08, + "loss": 3.7782, + "step": 2440500 + }, + { + "epoch": 27.11, + "learning_rate": 5.7229082913177706e-08, + "loss": 3.7781, + "step": 2441000 + }, + { + "epoch": 27.11, + "learning_rate": 5.721520111942785e-08, + "loss": 3.7833, + "step": 2441500 + }, + { + "epoch": 27.12, + "learning_rate": 5.720131932567799e-08, + "loss": 3.7717, + "step": 2442000 + }, + { + "epoch": 27.13, + "learning_rate": 5.718743753192812e-08, + "loss": 3.7749, + "step": 2442500 + }, + { + "epoch": 27.13, + "learning_rate": 5.717355573817826e-08, + "loss": 3.7478, + "step": 2443000 + }, + { + "epoch": 27.14, + "learning_rate": 5.71596739444284e-08, + "loss": 3.7702, + "step": 2443500 + }, + { + "epoch": 27.14, + "learning_rate": 5.714579215067854e-08, + "loss": 3.7641, + "step": 2444000 + }, + { + "epoch": 27.15, + "learning_rate": 5.713191035692868e-08, + "loss": 3.7758, + "step": 2444500 + }, + { + "epoch": 27.15, + "learning_rate": 5.7118028563178824e-08, + "loss": 3.7961, + "step": 2445000 + }, + { + "epoch": 27.16, + "learning_rate": 5.710414676942895e-08, + "loss": 3.7872, + "step": 2445500 + }, + { + "epoch": 27.16, + "learning_rate": 5.7090264975679095e-08, + "loss": 3.7932, + "step": 2446000 + }, + { + "epoch": 27.17, + "learning_rate": 5.707638318192923e-08, + "loss": 3.7592, + "step": 2446500 + }, + { + "epoch": 27.17, + "learning_rate": 5.7062501388179373e-08, + "loss": 3.761, + "step": 2447000 + }, + { + "epoch": 27.18, + "learning_rate": 5.7048619594429516e-08, + "loss": 3.7795, + "step": 2447500 + }, + { + "epoch": 27.19, + "learning_rate": 5.703473780067966e-08, + "loss": 3.7765, + "step": 2448000 + }, + { + "epoch": 27.19, + "learning_rate": 5.702085600692979e-08, + "loss": 3.7743, + "step": 2448500 + }, + { + "epoch": 27.2, + "learning_rate": 5.700697421317992e-08, + "loss": 3.7592, + "step": 2449000 + }, + { + "epoch": 27.2, + "learning_rate": 5.6993092419430065e-08, + "loss": 3.7796, + "step": 2449500 + }, + { + "epoch": 27.21, + "learning_rate": 5.697921062568021e-08, + "loss": 3.773, + "step": 2450000 + }, + { + "epoch": 27.21, + "learning_rate": 5.696532883193035e-08, + "loss": 3.768, + "step": 2450500 + }, + { + "epoch": 27.22, + "learning_rate": 5.695144703818048e-08, + "loss": 3.7485, + "step": 2451000 + }, + { + "epoch": 27.22, + "learning_rate": 5.693756524443062e-08, + "loss": 3.7807, + "step": 2451500 + }, + { + "epoch": 27.23, + "learning_rate": 5.6923683450680756e-08, + "loss": 3.7811, + "step": 2452000 + }, + { + "epoch": 27.24, + "learning_rate": 5.69098016569309e-08, + "loss": 3.7592, + "step": 2452500 + }, + { + "epoch": 27.24, + "learning_rate": 5.689591986318104e-08, + "loss": 3.7698, + "step": 2453000 + }, + { + "epoch": 27.25, + "learning_rate": 5.688203806943118e-08, + "loss": 3.7714, + "step": 2453500 + }, + { + "epoch": 27.25, + "learning_rate": 5.686815627568131e-08, + "loss": 3.77, + "step": 2454000 + }, + { + "epoch": 27.26, + "learning_rate": 5.6854274481931454e-08, + "loss": 3.7745, + "step": 2454500 + }, + { + "epoch": 27.26, + "learning_rate": 5.684039268818159e-08, + "loss": 3.7805, + "step": 2455000 + }, + { + "epoch": 27.27, + "learning_rate": 5.682651089443173e-08, + "loss": 3.7796, + "step": 2455500 + }, + { + "epoch": 27.27, + "learning_rate": 5.6812629100681875e-08, + "loss": 3.7723, + "step": 2456000 + }, + { + "epoch": 27.28, + "learning_rate": 5.6798747306932004e-08, + "loss": 3.7536, + "step": 2456500 + }, + { + "epoch": 27.29, + "learning_rate": 5.6784865513182146e-08, + "loss": 3.7881, + "step": 2457000 + }, + { + "epoch": 27.29, + "learning_rate": 5.677098371943229e-08, + "loss": 3.7827, + "step": 2457500 + }, + { + "epoch": 27.3, + "learning_rate": 5.6757101925682424e-08, + "loss": 3.7577, + "step": 2458000 + }, + { + "epoch": 27.3, + "learning_rate": 5.6743220131932566e-08, + "loss": 3.7379, + "step": 2458500 + }, + { + "epoch": 27.31, + "learning_rate": 5.672933833818271e-08, + "loss": 3.7722, + "step": 2459000 + }, + { + "epoch": 27.31, + "learning_rate": 5.671545654443284e-08, + "loss": 3.7742, + "step": 2459500 + }, + { + "epoch": 27.32, + "learning_rate": 5.670157475068298e-08, + "loss": 3.7498, + "step": 2460000 + }, + { + "epoch": 27.32, + "learning_rate": 5.668769295693312e-08, + "loss": 3.7747, + "step": 2460500 + }, + { + "epoch": 27.33, + "learning_rate": 5.667381116318326e-08, + "loss": 3.7791, + "step": 2461000 + }, + { + "epoch": 27.34, + "learning_rate": 5.66599293694334e-08, + "loss": 3.7614, + "step": 2461500 + }, + { + "epoch": 27.34, + "learning_rate": 5.664604757568354e-08, + "loss": 3.7726, + "step": 2462000 + }, + { + "epoch": 27.35, + "learning_rate": 5.663216578193367e-08, + "loss": 3.7659, + "step": 2462500 + }, + { + "epoch": 27.35, + "learning_rate": 5.6618283988183813e-08, + "loss": 3.7647, + "step": 2463000 + }, + { + "epoch": 27.36, + "learning_rate": 5.6604402194433956e-08, + "loss": 3.7823, + "step": 2463500 + }, + { + "epoch": 27.36, + "learning_rate": 5.659052040068409e-08, + "loss": 3.747, + "step": 2464000 + }, + { + "epoch": 27.37, + "learning_rate": 5.6576638606934233e-08, + "loss": 3.7872, + "step": 2464500 + }, + { + "epoch": 27.37, + "learning_rate": 5.656275681318436e-08, + "loss": 3.7819, + "step": 2465000 + }, + { + "epoch": 27.38, + "learning_rate": 5.6548875019434505e-08, + "loss": 3.779, + "step": 2465500 + }, + { + "epoch": 27.39, + "learning_rate": 5.653499322568465e-08, + "loss": 3.7772, + "step": 2466000 + }, + { + "epoch": 27.39, + "learning_rate": 5.652111143193479e-08, + "loss": 3.7762, + "step": 2466500 + }, + { + "epoch": 27.4, + "learning_rate": 5.6507229638184925e-08, + "loss": 3.7859, + "step": 2467000 + }, + { + "epoch": 27.4, + "learning_rate": 5.649334784443507e-08, + "loss": 3.7633, + "step": 2467500 + }, + { + "epoch": 27.41, + "learning_rate": 5.6479466050685196e-08, + "loss": 3.7559, + "step": 2468000 + }, + { + "epoch": 27.41, + "learning_rate": 5.646558425693534e-08, + "loss": 3.7681, + "step": 2468500 + }, + { + "epoch": 27.42, + "learning_rate": 5.645170246318548e-08, + "loss": 3.7661, + "step": 2469000 + }, + { + "epoch": 27.42, + "learning_rate": 5.643782066943562e-08, + "loss": 3.7617, + "step": 2469500 + }, + { + "epoch": 27.43, + "learning_rate": 5.642393887568576e-08, + "loss": 3.7621, + "step": 2470000 + }, + { + "epoch": 27.44, + "learning_rate": 5.64100570819359e-08, + "loss": 3.7585, + "step": 2470500 + }, + { + "epoch": 27.44, + "learning_rate": 5.639617528818603e-08, + "loss": 3.7719, + "step": 2471000 + }, + { + "epoch": 27.45, + "learning_rate": 5.638229349443617e-08, + "loss": 3.7611, + "step": 2471500 + }, + { + "epoch": 27.45, + "learning_rate": 5.6368411700686315e-08, + "loss": 3.7682, + "step": 2472000 + }, + { + "epoch": 27.46, + "learning_rate": 5.635452990693646e-08, + "loss": 3.7916, + "step": 2472500 + }, + { + "epoch": 27.46, + "learning_rate": 5.634064811318659e-08, + "loss": 3.7689, + "step": 2473000 + }, + { + "epoch": 27.47, + "learning_rate": 5.632676631943673e-08, + "loss": 3.7816, + "step": 2473500 + }, + { + "epoch": 27.47, + "learning_rate": 5.6312884525686864e-08, + "loss": 3.772, + "step": 2474000 + }, + { + "epoch": 27.48, + "learning_rate": 5.6299002731937006e-08, + "loss": 3.7815, + "step": 2474500 + }, + { + "epoch": 27.49, + "learning_rate": 5.628512093818715e-08, + "loss": 3.7819, + "step": 2475000 + }, + { + "epoch": 27.49, + "learning_rate": 5.627123914443729e-08, + "loss": 3.7742, + "step": 2475500 + }, + { + "epoch": 27.5, + "learning_rate": 5.6257357350687426e-08, + "loss": 3.7708, + "step": 2476000 + }, + { + "epoch": 27.5, + "learning_rate": 5.624347555693756e-08, + "loss": 3.7639, + "step": 2476500 + }, + { + "epoch": 27.51, + "learning_rate": 5.62295937631877e-08, + "loss": 3.7575, + "step": 2477000 + }, + { + "epoch": 27.51, + "learning_rate": 5.621571196943784e-08, + "loss": 3.7897, + "step": 2477500 + }, + { + "epoch": 27.52, + "learning_rate": 5.620183017568798e-08, + "loss": 3.7764, + "step": 2478000 + }, + { + "epoch": 27.52, + "learning_rate": 5.6187948381938124e-08, + "loss": 3.7682, + "step": 2478500 + }, + { + "epoch": 27.53, + "learning_rate": 5.617406658818825e-08, + "loss": 3.7814, + "step": 2479000 + }, + { + "epoch": 27.54, + "learning_rate": 5.616018479443839e-08, + "loss": 3.7725, + "step": 2479500 + }, + { + "epoch": 27.54, + "learning_rate": 5.614630300068853e-08, + "loss": 3.7657, + "step": 2480000 + }, + { + "epoch": 27.55, + "learning_rate": 5.6132421206938673e-08, + "loss": 3.7681, + "step": 2480500 + }, + { + "epoch": 27.55, + "learning_rate": 5.6118539413188816e-08, + "loss": 3.7775, + "step": 2481000 + }, + { + "epoch": 27.56, + "learning_rate": 5.610465761943896e-08, + "loss": 3.7931, + "step": 2481500 + }, + { + "epoch": 27.56, + "learning_rate": 5.609077582568909e-08, + "loss": 3.8074, + "step": 2482000 + }, + { + "epoch": 27.57, + "learning_rate": 5.607689403193922e-08, + "loss": 3.783, + "step": 2482500 + }, + { + "epoch": 27.57, + "learning_rate": 5.6063012238189365e-08, + "loss": 3.7563, + "step": 2483000 + }, + { + "epoch": 27.58, + "learning_rate": 5.604913044443951e-08, + "loss": 3.766, + "step": 2483500 + }, + { + "epoch": 27.59, + "learning_rate": 5.603524865068965e-08, + "loss": 3.7731, + "step": 2484000 + }, + { + "epoch": 27.59, + "learning_rate": 5.602136685693979e-08, + "loss": 3.7682, + "step": 2484500 + }, + { + "epoch": 27.6, + "learning_rate": 5.600748506318992e-08, + "loss": 3.7857, + "step": 2485000 + }, + { + "epoch": 27.6, + "learning_rate": 5.5993603269440056e-08, + "loss": 3.7857, + "step": 2485500 + }, + { + "epoch": 27.61, + "learning_rate": 5.59797214756902e-08, + "loss": 3.765, + "step": 2486000 + }, + { + "epoch": 27.61, + "learning_rate": 5.596583968194034e-08, + "loss": 3.7875, + "step": 2486500 + }, + { + "epoch": 27.62, + "learning_rate": 5.595195788819048e-08, + "loss": 3.7687, + "step": 2487000 + }, + { + "epoch": 27.62, + "learning_rate": 5.593807609444061e-08, + "loss": 3.7513, + "step": 2487500 + }, + { + "epoch": 27.63, + "learning_rate": 5.5924194300690754e-08, + "loss": 3.7733, + "step": 2488000 + }, + { + "epoch": 27.64, + "learning_rate": 5.591031250694089e-08, + "loss": 3.7782, + "step": 2488500 + }, + { + "epoch": 27.64, + "learning_rate": 5.589643071319103e-08, + "loss": 3.7516, + "step": 2489000 + }, + { + "epoch": 27.65, + "learning_rate": 5.5882548919441175e-08, + "loss": 3.7798, + "step": 2489500 + }, + { + "epoch": 27.65, + "learning_rate": 5.586866712569132e-08, + "loss": 3.7559, + "step": 2490000 + }, + { + "epoch": 27.66, + "learning_rate": 5.5854785331941446e-08, + "loss": 3.7728, + "step": 2490500 + }, + { + "epoch": 27.66, + "learning_rate": 5.584090353819159e-08, + "loss": 3.7841, + "step": 2491000 + }, + { + "epoch": 27.67, + "learning_rate": 5.5827021744441724e-08, + "loss": 3.7556, + "step": 2491500 + }, + { + "epoch": 27.67, + "learning_rate": 5.5813139950691866e-08, + "loss": 3.7736, + "step": 2492000 + }, + { + "epoch": 27.68, + "learning_rate": 5.579925815694201e-08, + "loss": 3.7714, + "step": 2492500 + }, + { + "epoch": 27.69, + "learning_rate": 5.578537636319215e-08, + "loss": 3.7686, + "step": 2493000 + }, + { + "epoch": 27.69, + "learning_rate": 5.577149456944228e-08, + "loss": 3.7593, + "step": 2493500 + }, + { + "epoch": 27.7, + "learning_rate": 5.575761277569242e-08, + "loss": 3.7764, + "step": 2494000 + }, + { + "epoch": 27.7, + "learning_rate": 5.574373098194256e-08, + "loss": 3.7686, + "step": 2494500 + }, + { + "epoch": 27.71, + "learning_rate": 5.57298491881927e-08, + "loss": 3.7731, + "step": 2495000 + }, + { + "epoch": 27.71, + "learning_rate": 5.571596739444284e-08, + "loss": 3.7633, + "step": 2495500 + }, + { + "epoch": 27.72, + "learning_rate": 5.570208560069297e-08, + "loss": 3.7755, + "step": 2496000 + }, + { + "epoch": 27.72, + "learning_rate": 5.5688203806943113e-08, + "loss": 3.775, + "step": 2496500 + }, + { + "epoch": 27.73, + "learning_rate": 5.5674322013193256e-08, + "loss": 3.7678, + "step": 2497000 + }, + { + "epoch": 27.74, + "learning_rate": 5.566044021944339e-08, + "loss": 3.7694, + "step": 2497500 + }, + { + "epoch": 27.74, + "learning_rate": 5.5646558425693534e-08, + "loss": 3.7903, + "step": 2498000 + }, + { + "epoch": 27.75, + "learning_rate": 5.5632676631943676e-08, + "loss": 3.7774, + "step": 2498500 + }, + { + "epoch": 27.75, + "learning_rate": 5.5618794838193805e-08, + "loss": 3.7595, + "step": 2499000 + }, + { + "epoch": 27.76, + "learning_rate": 5.560491304444395e-08, + "loss": 3.7634, + "step": 2499500 + }, + { + "epoch": 27.76, + "learning_rate": 5.559103125069409e-08, + "loss": 3.7895, + "step": 2500000 + }, + { + "epoch": 27.77, + "learning_rate": 5.5577149456944225e-08, + "loss": 3.7898, + "step": 2500500 + }, + { + "epoch": 27.77, + "learning_rate": 5.556326766319437e-08, + "loss": 3.7875, + "step": 2501000 + }, + { + "epoch": 27.78, + "learning_rate": 5.5549385869444496e-08, + "loss": 3.7768, + "step": 2501500 + }, + { + "epoch": 27.79, + "learning_rate": 5.553550407569464e-08, + "loss": 3.7725, + "step": 2502000 + }, + { + "epoch": 27.79, + "learning_rate": 5.552162228194478e-08, + "loss": 3.7867, + "step": 2502500 + }, + { + "epoch": 27.8, + "learning_rate": 5.550774048819492e-08, + "loss": 3.7657, + "step": 2503000 + }, + { + "epoch": 27.8, + "learning_rate": 5.549385869444506e-08, + "loss": 3.7743, + "step": 2503500 + }, + { + "epoch": 27.81, + "learning_rate": 5.54799769006952e-08, + "loss": 3.7864, + "step": 2504000 + }, + { + "epoch": 27.81, + "learning_rate": 5.546609510694533e-08, + "loss": 3.7541, + "step": 2504500 + }, + { + "epoch": 27.82, + "learning_rate": 5.545221331319547e-08, + "loss": 3.7544, + "step": 2505000 + }, + { + "epoch": 27.82, + "learning_rate": 5.5438331519445615e-08, + "loss": 3.7779, + "step": 2505500 + }, + { + "epoch": 27.83, + "learning_rate": 5.542444972569576e-08, + "loss": 3.7703, + "step": 2506000 + }, + { + "epoch": 27.84, + "learning_rate": 5.541056793194589e-08, + "loss": 3.7863, + "step": 2506500 + }, + { + "epoch": 27.84, + "learning_rate": 5.5396686138196035e-08, + "loss": 3.7769, + "step": 2507000 + }, + { + "epoch": 27.85, + "learning_rate": 5.5382804344446164e-08, + "loss": 3.7691, + "step": 2507500 + }, + { + "epoch": 27.85, + "learning_rate": 5.5368922550696306e-08, + "loss": 3.7773, + "step": 2508000 + }, + { + "epoch": 27.86, + "learning_rate": 5.535504075694645e-08, + "loss": 3.7756, + "step": 2508500 + }, + { + "epoch": 27.86, + "learning_rate": 5.534115896319659e-08, + "loss": 3.7769, + "step": 2509000 + }, + { + "epoch": 27.87, + "learning_rate": 5.5327277169446726e-08, + "loss": 3.7518, + "step": 2509500 + }, + { + "epoch": 27.87, + "learning_rate": 5.5313395375696855e-08, + "loss": 3.7714, + "step": 2510000 + }, + { + "epoch": 27.88, + "learning_rate": 5.5299513581947e-08, + "loss": 3.7649, + "step": 2510500 + }, + { + "epoch": 27.89, + "learning_rate": 5.528563178819714e-08, + "loss": 3.7651, + "step": 2511000 + }, + { + "epoch": 27.89, + "learning_rate": 5.527174999444728e-08, + "loss": 3.7759, + "step": 2511500 + }, + { + "epoch": 27.9, + "learning_rate": 5.5257868200697424e-08, + "loss": 3.7776, + "step": 2512000 + }, + { + "epoch": 27.9, + "learning_rate": 5.524398640694756e-08, + "loss": 3.7678, + "step": 2512500 + }, + { + "epoch": 27.91, + "learning_rate": 5.523010461319769e-08, + "loss": 3.7774, + "step": 2513000 + }, + { + "epoch": 27.91, + "learning_rate": 5.521622281944783e-08, + "loss": 3.7678, + "step": 2513500 + }, + { + "epoch": 27.92, + "learning_rate": 5.5202341025697974e-08, + "loss": 3.7747, + "step": 2514000 + }, + { + "epoch": 27.92, + "learning_rate": 5.5188459231948116e-08, + "loss": 3.7619, + "step": 2514500 + }, + { + "epoch": 27.93, + "learning_rate": 5.517457743819826e-08, + "loss": 3.7457, + "step": 2515000 + }, + { + "epoch": 27.94, + "learning_rate": 5.516069564444839e-08, + "loss": 3.7715, + "step": 2515500 + }, + { + "epoch": 27.94, + "learning_rate": 5.514681385069852e-08, + "loss": 3.7597, + "step": 2516000 + }, + { + "epoch": 27.95, + "learning_rate": 5.5132932056948665e-08, + "loss": 3.7613, + "step": 2516500 + }, + { + "epoch": 27.95, + "learning_rate": 5.511905026319881e-08, + "loss": 3.7777, + "step": 2517000 + }, + { + "epoch": 27.96, + "learning_rate": 5.510516846944895e-08, + "loss": 3.7765, + "step": 2517500 + }, + { + "epoch": 27.96, + "learning_rate": 5.509128667569909e-08, + "loss": 3.77, + "step": 2518000 + }, + { + "epoch": 27.97, + "learning_rate": 5.507740488194922e-08, + "loss": 3.7971, + "step": 2518500 + }, + { + "epoch": 27.97, + "learning_rate": 5.5063523088199356e-08, + "loss": 3.7718, + "step": 2519000 + }, + { + "epoch": 27.98, + "learning_rate": 5.50496412944495e-08, + "loss": 3.7833, + "step": 2519500 + }, + { + "epoch": 27.99, + "learning_rate": 5.503575950069964e-08, + "loss": 3.7656, + "step": 2520000 + }, + { + "epoch": 27.99, + "learning_rate": 5.502187770694978e-08, + "loss": 3.7643, + "step": 2520500 + }, + { + "epoch": 28.0, + "learning_rate": 5.5007995913199926e-08, + "loss": 3.7503, + "step": 2521000 + }, + { + "epoch": 28.0, + "eval_loss": 3.837568759918213, + "eval_runtime": 6.3037, + "eval_samples_per_second": 246.521, + "step": 2521288 + }, + { + "epoch": 28.0, + "learning_rate": 5.4994114119450055e-08, + "loss": 3.7668, + "step": 2521500 + }, + { + "epoch": 28.01, + "learning_rate": 5.498023232570019e-08, + "loss": 3.7821, + "step": 2522000 + }, + { + "epoch": 28.01, + "learning_rate": 5.496635053195033e-08, + "loss": 3.7631, + "step": 2522500 + }, + { + "epoch": 28.02, + "learning_rate": 5.4952468738200475e-08, + "loss": 3.8038, + "step": 2523000 + }, + { + "epoch": 28.02, + "learning_rate": 5.493858694445062e-08, + "loss": 3.7596, + "step": 2523500 + }, + { + "epoch": 28.03, + "learning_rate": 5.4924705150700746e-08, + "loss": 3.7611, + "step": 2524000 + }, + { + "epoch": 28.04, + "learning_rate": 5.491082335695089e-08, + "loss": 3.7857, + "step": 2524500 + }, + { + "epoch": 28.04, + "learning_rate": 5.4896941563201024e-08, + "loss": 3.7609, + "step": 2525000 + }, + { + "epoch": 28.05, + "learning_rate": 5.4883059769451166e-08, + "loss": 3.7681, + "step": 2525500 + }, + { + "epoch": 28.05, + "learning_rate": 5.486917797570131e-08, + "loss": 3.7794, + "step": 2526000 + }, + { + "epoch": 28.06, + "learning_rate": 5.485529618195145e-08, + "loss": 3.7833, + "step": 2526500 + }, + { + "epoch": 28.06, + "learning_rate": 5.484141438820158e-08, + "loss": 3.7641, + "step": 2527000 + }, + { + "epoch": 28.07, + "learning_rate": 5.482753259445172e-08, + "loss": 3.7664, + "step": 2527500 + }, + { + "epoch": 28.07, + "learning_rate": 5.481365080070186e-08, + "loss": 3.7649, + "step": 2528000 + }, + { + "epoch": 28.08, + "learning_rate": 5.4799769006952e-08, + "loss": 3.7749, + "step": 2528500 + }, + { + "epoch": 28.09, + "learning_rate": 5.478588721320214e-08, + "loss": 3.7693, + "step": 2529000 + }, + { + "epoch": 28.09, + "learning_rate": 5.4772005419452284e-08, + "loss": 3.7652, + "step": 2529500 + }, + { + "epoch": 28.1, + "learning_rate": 5.4758123625702413e-08, + "loss": 3.7573, + "step": 2530000 + }, + { + "epoch": 28.1, + "learning_rate": 5.4744241831952556e-08, + "loss": 3.7508, + "step": 2530500 + }, + { + "epoch": 28.11, + "learning_rate": 5.473036003820269e-08, + "loss": 3.7849, + "step": 2531000 + }, + { + "epoch": 28.11, + "learning_rate": 5.4716478244452834e-08, + "loss": 3.7644, + "step": 2531500 + }, + { + "epoch": 28.12, + "learning_rate": 5.4702596450702976e-08, + "loss": 3.7669, + "step": 2532000 + }, + { + "epoch": 28.12, + "learning_rate": 5.4688714656953105e-08, + "loss": 3.7671, + "step": 2532500 + }, + { + "epoch": 28.13, + "learning_rate": 5.467483286320325e-08, + "loss": 3.7686, + "step": 2533000 + }, + { + "epoch": 28.14, + "learning_rate": 5.466095106945339e-08, + "loss": 3.7706, + "step": 2533500 + }, + { + "epoch": 28.14, + "learning_rate": 5.4647069275703525e-08, + "loss": 3.7591, + "step": 2534000 + }, + { + "epoch": 28.15, + "learning_rate": 5.463318748195367e-08, + "loss": 3.7771, + "step": 2534500 + }, + { + "epoch": 28.15, + "learning_rate": 5.461930568820381e-08, + "loss": 3.7745, + "step": 2535000 + }, + { + "epoch": 28.16, + "learning_rate": 5.460542389445394e-08, + "loss": 3.7868, + "step": 2535500 + }, + { + "epoch": 28.16, + "learning_rate": 5.459154210070408e-08, + "loss": 3.7652, + "step": 2536000 + }, + { + "epoch": 28.17, + "learning_rate": 5.457766030695422e-08, + "loss": 3.7696, + "step": 2536500 + }, + { + "epoch": 28.17, + "learning_rate": 5.456377851320436e-08, + "loss": 3.7645, + "step": 2537000 + }, + { + "epoch": 28.18, + "learning_rate": 5.45498967194545e-08, + "loss": 3.7591, + "step": 2537500 + }, + { + "epoch": 28.19, + "learning_rate": 5.453601492570463e-08, + "loss": 3.7625, + "step": 2538000 + }, + { + "epoch": 28.19, + "learning_rate": 5.452213313195477e-08, + "loss": 3.7703, + "step": 2538500 + }, + { + "epoch": 28.2, + "learning_rate": 5.4508251338204915e-08, + "loss": 3.7596, + "step": 2539000 + }, + { + "epoch": 28.2, + "learning_rate": 5.449436954445506e-08, + "loss": 3.7648, + "step": 2539500 + }, + { + "epoch": 28.21, + "learning_rate": 5.448048775070519e-08, + "loss": 3.7689, + "step": 2540000 + }, + { + "epoch": 28.21, + "learning_rate": 5.4466605956955335e-08, + "loss": 3.7598, + "step": 2540500 + }, + { + "epoch": 28.22, + "learning_rate": 5.4452724163205464e-08, + "loss": 3.7659, + "step": 2541000 + }, + { + "epoch": 28.22, + "learning_rate": 5.4438842369455606e-08, + "loss": 3.7738, + "step": 2541500 + }, + { + "epoch": 28.23, + "learning_rate": 5.442496057570575e-08, + "loss": 3.7589, + "step": 2542000 + }, + { + "epoch": 28.24, + "learning_rate": 5.441107878195589e-08, + "loss": 3.7769, + "step": 2542500 + }, + { + "epoch": 28.24, + "learning_rate": 5.4397196988206026e-08, + "loss": 3.7698, + "step": 2543000 + }, + { + "epoch": 28.25, + "learning_rate": 5.438331519445617e-08, + "loss": 3.773, + "step": 2543500 + }, + { + "epoch": 28.25, + "learning_rate": 5.43694334007063e-08, + "loss": 3.7597, + "step": 2544000 + }, + { + "epoch": 28.26, + "learning_rate": 5.435555160695644e-08, + "loss": 3.7762, + "step": 2544500 + }, + { + "epoch": 28.26, + "learning_rate": 5.434166981320658e-08, + "loss": 3.7725, + "step": 2545000 + }, + { + "epoch": 28.27, + "learning_rate": 5.4327788019456724e-08, + "loss": 3.773, + "step": 2545500 + }, + { + "epoch": 28.27, + "learning_rate": 5.431390622570686e-08, + "loss": 3.7687, + "step": 2546000 + }, + { + "epoch": 28.28, + "learning_rate": 5.430002443195699e-08, + "loss": 3.7913, + "step": 2546500 + }, + { + "epoch": 28.29, + "learning_rate": 5.428614263820713e-08, + "loss": 3.7714, + "step": 2547000 + }, + { + "epoch": 28.29, + "learning_rate": 5.4272260844457274e-08, + "loss": 3.7808, + "step": 2547500 + }, + { + "epoch": 28.3, + "learning_rate": 5.4258379050707416e-08, + "loss": 3.784, + "step": 2548000 + }, + { + "epoch": 28.3, + "learning_rate": 5.424449725695756e-08, + "loss": 3.7787, + "step": 2548500 + }, + { + "epoch": 28.31, + "learning_rate": 5.4230615463207694e-08, + "loss": 3.7822, + "step": 2549000 + }, + { + "epoch": 28.31, + "learning_rate": 5.421673366945782e-08, + "loss": 3.7806, + "step": 2549500 + }, + { + "epoch": 28.32, + "learning_rate": 5.4202851875707965e-08, + "loss": 3.7723, + "step": 2550000 + }, + { + "epoch": 28.32, + "learning_rate": 5.418897008195811e-08, + "loss": 3.7749, + "step": 2550500 + }, + { + "epoch": 28.33, + "learning_rate": 5.417508828820825e-08, + "loss": 3.757, + "step": 2551000 + }, + { + "epoch": 28.34, + "learning_rate": 5.416120649445839e-08, + "loss": 3.7731, + "step": 2551500 + }, + { + "epoch": 28.34, + "learning_rate": 5.414732470070852e-08, + "loss": 3.7605, + "step": 2552000 + }, + { + "epoch": 28.35, + "learning_rate": 5.4133442906958657e-08, + "loss": 3.7846, + "step": 2552500 + }, + { + "epoch": 28.35, + "learning_rate": 5.41195611132088e-08, + "loss": 3.7525, + "step": 2553000 + }, + { + "epoch": 28.36, + "learning_rate": 5.410567931945894e-08, + "loss": 3.7738, + "step": 2553500 + }, + { + "epoch": 28.36, + "learning_rate": 5.4091797525709083e-08, + "loss": 3.7572, + "step": 2554000 + }, + { + "epoch": 28.37, + "learning_rate": 5.4077915731959226e-08, + "loss": 3.77, + "step": 2554500 + }, + { + "epoch": 28.37, + "learning_rate": 5.4064033938209355e-08, + "loss": 3.7914, + "step": 2555000 + }, + { + "epoch": 28.38, + "learning_rate": 5.405015214445949e-08, + "loss": 3.7527, + "step": 2555500 + }, + { + "epoch": 28.39, + "learning_rate": 5.403627035070963e-08, + "loss": 3.7717, + "step": 2556000 + }, + { + "epoch": 28.39, + "learning_rate": 5.4022388556959775e-08, + "loss": 3.7775, + "step": 2556500 + }, + { + "epoch": 28.4, + "learning_rate": 5.400850676320992e-08, + "loss": 3.7579, + "step": 2557000 + }, + { + "epoch": 28.4, + "learning_rate": 5.399462496946006e-08, + "loss": 3.7778, + "step": 2557500 + }, + { + "epoch": 28.41, + "learning_rate": 5.398074317571019e-08, + "loss": 3.7643, + "step": 2558000 + }, + { + "epoch": 28.41, + "learning_rate": 5.3966861381960324e-08, + "loss": 3.785, + "step": 2558500 + }, + { + "epoch": 28.42, + "learning_rate": 5.3952979588210466e-08, + "loss": 3.7529, + "step": 2559000 + }, + { + "epoch": 28.42, + "learning_rate": 5.393909779446061e-08, + "loss": 3.7797, + "step": 2559500 + }, + { + "epoch": 28.43, + "learning_rate": 5.392521600071075e-08, + "loss": 3.7663, + "step": 2560000 + }, + { + "epoch": 28.44, + "learning_rate": 5.391133420696088e-08, + "loss": 3.7744, + "step": 2560500 + }, + { + "epoch": 28.44, + "learning_rate": 5.389745241321102e-08, + "loss": 3.7721, + "step": 2561000 + }, + { + "epoch": 28.45, + "learning_rate": 5.388357061946116e-08, + "loss": 3.7684, + "step": 2561500 + }, + { + "epoch": 28.45, + "learning_rate": 5.38696888257113e-08, + "loss": 3.7405, + "step": 2562000 + }, + { + "epoch": 28.46, + "learning_rate": 5.385580703196144e-08, + "loss": 3.7801, + "step": 2562500 + }, + { + "epoch": 28.46, + "learning_rate": 5.3841925238211585e-08, + "loss": 3.7697, + "step": 2563000 + }, + { + "epoch": 28.47, + "learning_rate": 5.3828043444461714e-08, + "loss": 3.7516, + "step": 2563500 + }, + { + "epoch": 28.47, + "learning_rate": 5.3814161650711856e-08, + "loss": 3.7671, + "step": 2564000 + }, + { + "epoch": 28.48, + "learning_rate": 5.380027985696199e-08, + "loss": 3.7682, + "step": 2564500 + }, + { + "epoch": 28.49, + "learning_rate": 5.3786398063212134e-08, + "loss": 3.7763, + "step": 2565000 + }, + { + "epoch": 28.49, + "learning_rate": 5.3772516269462276e-08, + "loss": 3.7905, + "step": 2565500 + }, + { + "epoch": 28.5, + "learning_rate": 5.375863447571242e-08, + "loss": 3.7611, + "step": 2566000 + }, + { + "epoch": 28.5, + "learning_rate": 5.374475268196255e-08, + "loss": 3.7803, + "step": 2566500 + }, + { + "epoch": 28.51, + "learning_rate": 5.373087088821269e-08, + "loss": 3.7443, + "step": 2567000 + }, + { + "epoch": 28.51, + "learning_rate": 5.3716989094462825e-08, + "loss": 3.7675, + "step": 2567500 + }, + { + "epoch": 28.52, + "learning_rate": 5.370310730071297e-08, + "loss": 3.759, + "step": 2568000 + }, + { + "epoch": 28.52, + "learning_rate": 5.368922550696311e-08, + "loss": 3.7613, + "step": 2568500 + }, + { + "epoch": 28.53, + "learning_rate": 5.367534371321324e-08, + "loss": 3.7768, + "step": 2569000 + }, + { + "epoch": 28.54, + "learning_rate": 5.366146191946338e-08, + "loss": 3.7792, + "step": 2569500 + }, + { + "epoch": 28.54, + "learning_rate": 5.364758012571352e-08, + "loss": 3.7777, + "step": 2570000 + }, + { + "epoch": 28.55, + "learning_rate": 5.363369833196366e-08, + "loss": 3.7599, + "step": 2570500 + }, + { + "epoch": 28.55, + "learning_rate": 5.36198165382138e-08, + "loss": 3.76, + "step": 2571000 + }, + { + "epoch": 28.56, + "learning_rate": 5.3605934744463943e-08, + "loss": 3.7725, + "step": 2571500 + }, + { + "epoch": 28.56, + "learning_rate": 5.359205295071407e-08, + "loss": 3.7684, + "step": 2572000 + }, + { + "epoch": 28.57, + "learning_rate": 5.3578171156964215e-08, + "loss": 3.7799, + "step": 2572500 + }, + { + "epoch": 28.57, + "learning_rate": 5.356428936321436e-08, + "loss": 3.7608, + "step": 2573000 + }, + { + "epoch": 28.58, + "learning_rate": 5.355040756946449e-08, + "loss": 3.7667, + "step": 2573500 + }, + { + "epoch": 28.59, + "learning_rate": 5.3536525775714635e-08, + "loss": 3.7679, + "step": 2574000 + }, + { + "epoch": 28.59, + "learning_rate": 5.3522643981964764e-08, + "loss": 3.7654, + "step": 2574500 + }, + { + "epoch": 28.6, + "learning_rate": 5.3508762188214906e-08, + "loss": 3.7837, + "step": 2575000 + }, + { + "epoch": 28.6, + "learning_rate": 5.349488039446505e-08, + "loss": 3.7658, + "step": 2575500 + }, + { + "epoch": 28.61, + "learning_rate": 5.348099860071519e-08, + "loss": 3.7712, + "step": 2576000 + }, + { + "epoch": 28.61, + "learning_rate": 5.3467116806965326e-08, + "loss": 3.7651, + "step": 2576500 + }, + { + "epoch": 28.62, + "learning_rate": 5.345323501321547e-08, + "loss": 3.7747, + "step": 2577000 + }, + { + "epoch": 28.62, + "learning_rate": 5.34393532194656e-08, + "loss": 3.7698, + "step": 2577500 + }, + { + "epoch": 28.63, + "learning_rate": 5.342547142571574e-08, + "loss": 3.775, + "step": 2578000 + }, + { + "epoch": 28.64, + "learning_rate": 5.341158963196588e-08, + "loss": 3.766, + "step": 2578500 + }, + { + "epoch": 28.64, + "learning_rate": 5.3397707838216024e-08, + "loss": 3.769, + "step": 2579000 + }, + { + "epoch": 28.65, + "learning_rate": 5.338382604446616e-08, + "loss": 3.7623, + "step": 2579500 + }, + { + "epoch": 28.65, + "learning_rate": 5.33699442507163e-08, + "loss": 3.7738, + "step": 2580000 + }, + { + "epoch": 28.66, + "learning_rate": 5.335606245696643e-08, + "loss": 3.7875, + "step": 2580500 + }, + { + "epoch": 28.66, + "learning_rate": 5.3342180663216574e-08, + "loss": 3.7705, + "step": 2581000 + }, + { + "epoch": 28.67, + "learning_rate": 5.3328298869466716e-08, + "loss": 3.7491, + "step": 2581500 + }, + { + "epoch": 28.67, + "learning_rate": 5.331441707571686e-08, + "loss": 3.8024, + "step": 2582000 + }, + { + "epoch": 28.68, + "learning_rate": 5.3300535281966994e-08, + "loss": 3.7934, + "step": 2582500 + }, + { + "epoch": 28.69, + "learning_rate": 5.328665348821712e-08, + "loss": 3.7658, + "step": 2583000 + }, + { + "epoch": 28.69, + "learning_rate": 5.3272771694467265e-08, + "loss": 3.7845, + "step": 2583500 + }, + { + "epoch": 28.7, + "learning_rate": 5.325888990071741e-08, + "loss": 3.7534, + "step": 2584000 + }, + { + "epoch": 28.7, + "learning_rate": 5.324500810696755e-08, + "loss": 3.7584, + "step": 2584500 + }, + { + "epoch": 28.71, + "learning_rate": 5.323112631321769e-08, + "loss": 3.7484, + "step": 2585000 + }, + { + "epoch": 28.71, + "learning_rate": 5.321724451946783e-08, + "loss": 3.7678, + "step": 2585500 + }, + { + "epoch": 28.72, + "learning_rate": 5.3203362725717957e-08, + "loss": 3.7768, + "step": 2586000 + }, + { + "epoch": 28.72, + "learning_rate": 5.31894809319681e-08, + "loss": 3.7751, + "step": 2586500 + }, + { + "epoch": 28.73, + "learning_rate": 5.317559913821824e-08, + "loss": 3.7677, + "step": 2587000 + }, + { + "epoch": 28.74, + "learning_rate": 5.3161717344468383e-08, + "loss": 3.7652, + "step": 2587500 + }, + { + "epoch": 28.74, + "learning_rate": 5.3147835550718526e-08, + "loss": 3.7557, + "step": 2588000 + }, + { + "epoch": 28.75, + "learning_rate": 5.313395375696866e-08, + "loss": 3.7673, + "step": 2588500 + }, + { + "epoch": 28.75, + "learning_rate": 5.312007196321879e-08, + "loss": 3.7609, + "step": 2589000 + }, + { + "epoch": 28.76, + "learning_rate": 5.310619016946893e-08, + "loss": 3.7527, + "step": 2589500 + }, + { + "epoch": 28.76, + "learning_rate": 5.3092308375719075e-08, + "loss": 3.7541, + "step": 2590000 + }, + { + "epoch": 28.77, + "learning_rate": 5.307842658196922e-08, + "loss": 3.7651, + "step": 2590500 + }, + { + "epoch": 28.77, + "learning_rate": 5.306454478821936e-08, + "loss": 3.7737, + "step": 2591000 + }, + { + "epoch": 28.78, + "learning_rate": 5.305066299446949e-08, + "loss": 3.7792, + "step": 2591500 + }, + { + "epoch": 28.79, + "learning_rate": 5.3036781200719624e-08, + "loss": 3.7542, + "step": 2592000 + }, + { + "epoch": 28.79, + "learning_rate": 5.3022899406969766e-08, + "loss": 3.7635, + "step": 2592500 + }, + { + "epoch": 28.8, + "learning_rate": 5.300901761321991e-08, + "loss": 3.7749, + "step": 2593000 + }, + { + "epoch": 28.8, + "learning_rate": 5.299513581947005e-08, + "loss": 3.7702, + "step": 2593500 + }, + { + "epoch": 28.81, + "learning_rate": 5.298125402572019e-08, + "loss": 3.775, + "step": 2594000 + }, + { + "epoch": 28.81, + "learning_rate": 5.296737223197032e-08, + "loss": 3.7669, + "step": 2594500 + }, + { + "epoch": 28.82, + "learning_rate": 5.295349043822046e-08, + "loss": 3.7697, + "step": 2595000 + }, + { + "epoch": 28.82, + "learning_rate": 5.29396086444706e-08, + "loss": 3.7737, + "step": 2595500 + }, + { + "epoch": 28.83, + "learning_rate": 5.292572685072074e-08, + "loss": 3.7578, + "step": 2596000 + }, + { + "epoch": 28.84, + "learning_rate": 5.2911845056970885e-08, + "loss": 3.7476, + "step": 2596500 + }, + { + "epoch": 28.84, + "learning_rate": 5.2897963263221014e-08, + "loss": 3.7765, + "step": 2597000 + }, + { + "epoch": 28.85, + "learning_rate": 5.2884081469471156e-08, + "loss": 3.7741, + "step": 2597500 + }, + { + "epoch": 28.85, + "learning_rate": 5.287019967572129e-08, + "loss": 3.7794, + "step": 2598000 + }, + { + "epoch": 28.86, + "learning_rate": 5.2856317881971434e-08, + "loss": 3.7458, + "step": 2598500 + }, + { + "epoch": 28.86, + "learning_rate": 5.2842436088221576e-08, + "loss": 3.7572, + "step": 2599000 + }, + { + "epoch": 28.87, + "learning_rate": 5.282855429447172e-08, + "loss": 3.7682, + "step": 2599500 + }, + { + "epoch": 28.87, + "learning_rate": 5.281467250072185e-08, + "loss": 3.7551, + "step": 2600000 + }, + { + "epoch": 28.88, + "learning_rate": 5.280079070697199e-08, + "loss": 3.7694, + "step": 2600500 + }, + { + "epoch": 28.89, + "learning_rate": 5.2786908913222125e-08, + "loss": 3.7704, + "step": 2601000 + }, + { + "epoch": 28.89, + "learning_rate": 5.277302711947227e-08, + "loss": 3.776, + "step": 2601500 + }, + { + "epoch": 28.9, + "learning_rate": 5.275914532572241e-08, + "loss": 3.7691, + "step": 2602000 + }, + { + "epoch": 28.9, + "learning_rate": 5.274526353197255e-08, + "loss": 3.7667, + "step": 2602500 + }, + { + "epoch": 28.91, + "learning_rate": 5.273138173822268e-08, + "loss": 3.7694, + "step": 2603000 + }, + { + "epoch": 28.91, + "learning_rate": 5.2717499944472823e-08, + "loss": 3.7928, + "step": 2603500 + }, + { + "epoch": 28.92, + "learning_rate": 5.270361815072296e-08, + "loss": 3.7794, + "step": 2604000 + }, + { + "epoch": 28.92, + "learning_rate": 5.26897363569731e-08, + "loss": 3.766, + "step": 2604500 + }, + { + "epoch": 28.93, + "learning_rate": 5.2675854563223244e-08, + "loss": 3.7884, + "step": 2605000 + }, + { + "epoch": 28.94, + "learning_rate": 5.266197276947337e-08, + "loss": 3.7749, + "step": 2605500 + }, + { + "epoch": 28.94, + "learning_rate": 5.2648090975723515e-08, + "loss": 3.7596, + "step": 2606000 + }, + { + "epoch": 28.95, + "learning_rate": 5.263420918197366e-08, + "loss": 3.7671, + "step": 2606500 + }, + { + "epoch": 28.95, + "learning_rate": 5.262032738822379e-08, + "loss": 3.7743, + "step": 2607000 + }, + { + "epoch": 28.96, + "learning_rate": 5.2606445594473935e-08, + "loss": 3.775, + "step": 2607500 + }, + { + "epoch": 28.96, + "learning_rate": 5.259256380072408e-08, + "loss": 3.7653, + "step": 2608000 + }, + { + "epoch": 28.97, + "learning_rate": 5.2578682006974206e-08, + "loss": 3.7734, + "step": 2608500 + }, + { + "epoch": 28.97, + "learning_rate": 5.256480021322435e-08, + "loss": 3.7429, + "step": 2609000 + }, + { + "epoch": 28.98, + "learning_rate": 5.255091841947449e-08, + "loss": 3.7762, + "step": 2609500 + }, + { + "epoch": 28.99, + "learning_rate": 5.2537036625724626e-08, + "loss": 3.7587, + "step": 2610000 + }, + { + "epoch": 28.99, + "learning_rate": 5.252315483197477e-08, + "loss": 3.7841, + "step": 2610500 + }, + { + "epoch": 29.0, + "learning_rate": 5.25092730382249e-08, + "loss": 3.7746, + "step": 2611000 + }, + { + "epoch": 29.0, + "eval_loss": 3.836146116256714, + "eval_runtime": 6.2989, + "eval_samples_per_second": 246.709, + "step": 2611334 + }, + { + "epoch": 29.0, + "learning_rate": 5.249539124447504e-08, + "loss": 3.7836, + "step": 2611500 + }, + { + "epoch": 29.01, + "learning_rate": 5.248150945072518e-08, + "loss": 3.7712, + "step": 2612000 + }, + { + "epoch": 29.01, + "learning_rate": 5.2467627656975325e-08, + "loss": 3.77, + "step": 2612500 + }, + { + "epoch": 29.02, + "learning_rate": 5.245374586322546e-08, + "loss": 3.7509, + "step": 2613000 + }, + { + "epoch": 29.02, + "learning_rate": 5.24398640694756e-08, + "loss": 3.7809, + "step": 2613500 + }, + { + "epoch": 29.03, + "learning_rate": 5.242598227572573e-08, + "loss": 3.7669, + "step": 2614000 + }, + { + "epoch": 29.04, + "learning_rate": 5.2412100481975874e-08, + "loss": 3.7776, + "step": 2614500 + }, + { + "epoch": 29.04, + "learning_rate": 5.2398218688226016e-08, + "loss": 3.7686, + "step": 2615000 + }, + { + "epoch": 29.05, + "learning_rate": 5.238433689447616e-08, + "loss": 3.767, + "step": 2615500 + }, + { + "epoch": 29.05, + "learning_rate": 5.2370455100726294e-08, + "loss": 3.7638, + "step": 2616000 + }, + { + "epoch": 29.06, + "learning_rate": 5.2356573306976436e-08, + "loss": 3.7723, + "step": 2616500 + }, + { + "epoch": 29.06, + "learning_rate": 5.2342691513226565e-08, + "loss": 3.7813, + "step": 2617000 + }, + { + "epoch": 29.07, + "learning_rate": 5.232880971947671e-08, + "loss": 3.7649, + "step": 2617500 + }, + { + "epoch": 29.07, + "learning_rate": 5.231492792572685e-08, + "loss": 3.7858, + "step": 2618000 + }, + { + "epoch": 29.08, + "learning_rate": 5.230104613197699e-08, + "loss": 3.7564, + "step": 2618500 + }, + { + "epoch": 29.09, + "learning_rate": 5.228716433822713e-08, + "loss": 3.7837, + "step": 2619000 + }, + { + "epoch": 29.09, + "learning_rate": 5.227328254447726e-08, + "loss": 3.7526, + "step": 2619500 + }, + { + "epoch": 29.1, + "learning_rate": 5.22594007507274e-08, + "loss": 3.7426, + "step": 2620000 + }, + { + "epoch": 29.1, + "learning_rate": 5.224551895697754e-08, + "loss": 3.7511, + "step": 2620500 + }, + { + "epoch": 29.11, + "learning_rate": 5.2231637163227683e-08, + "loss": 3.7685, + "step": 2621000 + }, + { + "epoch": 29.11, + "learning_rate": 5.2217755369477826e-08, + "loss": 3.7696, + "step": 2621500 + }, + { + "epoch": 29.12, + "learning_rate": 5.220387357572796e-08, + "loss": 3.7784, + "step": 2622000 + }, + { + "epoch": 29.12, + "learning_rate": 5.218999178197809e-08, + "loss": 3.7704, + "step": 2622500 + }, + { + "epoch": 29.13, + "learning_rate": 5.217610998822823e-08, + "loss": 3.7666, + "step": 2623000 + }, + { + "epoch": 29.14, + "learning_rate": 5.2162228194478375e-08, + "loss": 3.7656, + "step": 2623500 + }, + { + "epoch": 29.14, + "learning_rate": 5.214834640072852e-08, + "loss": 3.776, + "step": 2624000 + }, + { + "epoch": 29.15, + "learning_rate": 5.213446460697866e-08, + "loss": 3.7656, + "step": 2624500 + }, + { + "epoch": 29.15, + "learning_rate": 5.2120582813228795e-08, + "loss": 3.7611, + "step": 2625000 + }, + { + "epoch": 29.16, + "learning_rate": 5.2106701019478924e-08, + "loss": 3.7619, + "step": 2625500 + }, + { + "epoch": 29.16, + "learning_rate": 5.2092819225729066e-08, + "loss": 3.7549, + "step": 2626000 + }, + { + "epoch": 29.17, + "learning_rate": 5.207893743197921e-08, + "loss": 3.756, + "step": 2626500 + }, + { + "epoch": 29.17, + "learning_rate": 5.206505563822935e-08, + "loss": 3.775, + "step": 2627000 + }, + { + "epoch": 29.18, + "learning_rate": 5.205117384447949e-08, + "loss": 3.7486, + "step": 2627500 + }, + { + "epoch": 29.19, + "learning_rate": 5.203729205072962e-08, + "loss": 3.7472, + "step": 2628000 + }, + { + "epoch": 29.19, + "learning_rate": 5.202341025697976e-08, + "loss": 3.7592, + "step": 2628500 + }, + { + "epoch": 29.2, + "learning_rate": 5.20095284632299e-08, + "loss": 3.7796, + "step": 2629000 + }, + { + "epoch": 29.2, + "learning_rate": 5.199564666948004e-08, + "loss": 3.7748, + "step": 2629500 + }, + { + "epoch": 29.21, + "learning_rate": 5.1981764875730185e-08, + "loss": 3.7732, + "step": 2630000 + }, + { + "epoch": 29.21, + "learning_rate": 5.196788308198033e-08, + "loss": 3.7515, + "step": 2630500 + }, + { + "epoch": 29.22, + "learning_rate": 5.1954001288230456e-08, + "loss": 3.7697, + "step": 2631000 + }, + { + "epoch": 29.22, + "learning_rate": 5.194011949448059e-08, + "loss": 3.7469, + "step": 2631500 + }, + { + "epoch": 29.23, + "learning_rate": 5.1926237700730734e-08, + "loss": 3.7817, + "step": 2632000 + }, + { + "epoch": 29.24, + "learning_rate": 5.1912355906980876e-08, + "loss": 3.7532, + "step": 2632500 + }, + { + "epoch": 29.24, + "learning_rate": 5.189847411323102e-08, + "loss": 3.7805, + "step": 2633000 + }, + { + "epoch": 29.25, + "learning_rate": 5.188459231948115e-08, + "loss": 3.7704, + "step": 2633500 + }, + { + "epoch": 29.25, + "learning_rate": 5.187071052573129e-08, + "loss": 3.7754, + "step": 2634000 + }, + { + "epoch": 29.26, + "learning_rate": 5.1856828731981425e-08, + "loss": 3.7734, + "step": 2634500 + }, + { + "epoch": 29.26, + "learning_rate": 5.184294693823157e-08, + "loss": 3.7508, + "step": 2635000 + }, + { + "epoch": 29.27, + "learning_rate": 5.182906514448171e-08, + "loss": 3.771, + "step": 2635500 + }, + { + "epoch": 29.27, + "learning_rate": 5.181518335073185e-08, + "loss": 3.77, + "step": 2636000 + }, + { + "epoch": 29.28, + "learning_rate": 5.180130155698198e-08, + "loss": 3.7555, + "step": 2636500 + }, + { + "epoch": 29.29, + "learning_rate": 5.1787419763232123e-08, + "loss": 3.7738, + "step": 2637000 + }, + { + "epoch": 29.29, + "learning_rate": 5.177353796948226e-08, + "loss": 3.7749, + "step": 2637500 + }, + { + "epoch": 29.3, + "learning_rate": 5.17596561757324e-08, + "loss": 3.7692, + "step": 2638000 + }, + { + "epoch": 29.3, + "learning_rate": 5.1745774381982544e-08, + "loss": 3.7758, + "step": 2638500 + }, + { + "epoch": 29.31, + "learning_rate": 5.1731892588232686e-08, + "loss": 3.7551, + "step": 2639000 + }, + { + "epoch": 29.31, + "learning_rate": 5.1718010794482815e-08, + "loss": 3.7752, + "step": 2639500 + }, + { + "epoch": 29.32, + "learning_rate": 5.170412900073296e-08, + "loss": 3.7765, + "step": 2640000 + }, + { + "epoch": 29.32, + "learning_rate": 5.169024720698309e-08, + "loss": 3.7751, + "step": 2640500 + }, + { + "epoch": 29.33, + "learning_rate": 5.1676365413233235e-08, + "loss": 3.7584, + "step": 2641000 + }, + { + "epoch": 29.34, + "learning_rate": 5.166248361948338e-08, + "loss": 3.7634, + "step": 2641500 + }, + { + "epoch": 29.34, + "learning_rate": 5.1648601825733506e-08, + "loss": 3.781, + "step": 2642000 + }, + { + "epoch": 29.35, + "learning_rate": 5.163472003198365e-08, + "loss": 3.7671, + "step": 2642500 + }, + { + "epoch": 29.35, + "learning_rate": 5.162083823823379e-08, + "loss": 3.7683, + "step": 2643000 + }, + { + "epoch": 29.36, + "learning_rate": 5.1606956444483927e-08, + "loss": 3.7547, + "step": 2643500 + }, + { + "epoch": 29.36, + "learning_rate": 5.159307465073407e-08, + "loss": 3.7499, + "step": 2644000 + }, + { + "epoch": 29.37, + "learning_rate": 5.157919285698421e-08, + "loss": 3.781, + "step": 2644500 + }, + { + "epoch": 29.37, + "learning_rate": 5.156531106323434e-08, + "loss": 3.7786, + "step": 2645000 + }, + { + "epoch": 29.38, + "learning_rate": 5.155142926948448e-08, + "loss": 3.7796, + "step": 2645500 + }, + { + "epoch": 29.38, + "learning_rate": 5.1537547475734625e-08, + "loss": 3.7593, + "step": 2646000 + }, + { + "epoch": 29.39, + "learning_rate": 5.152366568198476e-08, + "loss": 3.7875, + "step": 2646500 + }, + { + "epoch": 29.4, + "learning_rate": 5.15097838882349e-08, + "loss": 3.7678, + "step": 2647000 + }, + { + "epoch": 29.4, + "learning_rate": 5.1495902094485045e-08, + "loss": 3.7732, + "step": 2647500 + }, + { + "epoch": 29.41, + "learning_rate": 5.1482020300735174e-08, + "loss": 3.7924, + "step": 2648000 + }, + { + "epoch": 29.41, + "learning_rate": 5.1468138506985316e-08, + "loss": 3.7593, + "step": 2648500 + }, + { + "epoch": 29.42, + "learning_rate": 5.145425671323546e-08, + "loss": 3.7542, + "step": 2649000 + }, + { + "epoch": 29.42, + "learning_rate": 5.1440374919485594e-08, + "loss": 3.7581, + "step": 2649500 + }, + { + "epoch": 29.43, + "learning_rate": 5.1426493125735736e-08, + "loss": 3.7624, + "step": 2650000 + }, + { + "epoch": 29.43, + "learning_rate": 5.1412611331985865e-08, + "loss": 3.7558, + "step": 2650500 + }, + { + "epoch": 29.44, + "learning_rate": 5.139872953823601e-08, + "loss": 3.7723, + "step": 2651000 + }, + { + "epoch": 29.45, + "learning_rate": 5.138484774448615e-08, + "loss": 3.7777, + "step": 2651500 + }, + { + "epoch": 29.45, + "learning_rate": 5.137096595073629e-08, + "loss": 3.7681, + "step": 2652000 + }, + { + "epoch": 29.46, + "learning_rate": 5.135708415698643e-08, + "loss": 3.7632, + "step": 2652500 + }, + { + "epoch": 29.46, + "learning_rate": 5.134320236323657e-08, + "loss": 3.7764, + "step": 2653000 + }, + { + "epoch": 29.47, + "learning_rate": 5.13293205694867e-08, + "loss": 3.7549, + "step": 2653500 + }, + { + "epoch": 29.47, + "learning_rate": 5.131543877573684e-08, + "loss": 3.78, + "step": 2654000 + }, + { + "epoch": 29.48, + "learning_rate": 5.1301556981986984e-08, + "loss": 3.7548, + "step": 2654500 + }, + { + "epoch": 29.48, + "learning_rate": 5.1287675188237126e-08, + "loss": 3.7605, + "step": 2655000 + }, + { + "epoch": 29.49, + "learning_rate": 5.127379339448726e-08, + "loss": 3.7659, + "step": 2655500 + }, + { + "epoch": 29.5, + "learning_rate": 5.125991160073739e-08, + "loss": 3.7814, + "step": 2656000 + }, + { + "epoch": 29.5, + "learning_rate": 5.124602980698753e-08, + "loss": 3.7644, + "step": 2656500 + }, + { + "epoch": 29.51, + "learning_rate": 5.1232148013237675e-08, + "loss": 3.7524, + "step": 2657000 + }, + { + "epoch": 29.51, + "learning_rate": 5.121826621948782e-08, + "loss": 3.7788, + "step": 2657500 + }, + { + "epoch": 29.52, + "learning_rate": 5.120438442573796e-08, + "loss": 3.7704, + "step": 2658000 + }, + { + "epoch": 29.52, + "learning_rate": 5.1190502631988095e-08, + "loss": 3.7571, + "step": 2658500 + }, + { + "epoch": 29.53, + "learning_rate": 5.1176620838238224e-08, + "loss": 3.7555, + "step": 2659000 + }, + { + "epoch": 29.53, + "learning_rate": 5.1162739044488367e-08, + "loss": 3.7691, + "step": 2659500 + }, + { + "epoch": 29.54, + "learning_rate": 5.114885725073851e-08, + "loss": 3.7753, + "step": 2660000 + }, + { + "epoch": 29.55, + "learning_rate": 5.113497545698865e-08, + "loss": 3.7795, + "step": 2660500 + }, + { + "epoch": 29.55, + "learning_rate": 5.112109366323879e-08, + "loss": 3.7595, + "step": 2661000 + }, + { + "epoch": 29.56, + "learning_rate": 5.110721186948893e-08, + "loss": 3.775, + "step": 2661500 + }, + { + "epoch": 29.56, + "learning_rate": 5.109333007573906e-08, + "loss": 3.7522, + "step": 2662000 + }, + { + "epoch": 29.57, + "learning_rate": 5.10794482819892e-08, + "loss": 3.7524, + "step": 2662500 + }, + { + "epoch": 29.57, + "learning_rate": 5.106556648823934e-08, + "loss": 3.7649, + "step": 2663000 + }, + { + "epoch": 29.58, + "learning_rate": 5.1051684694489485e-08, + "loss": 3.7763, + "step": 2663500 + }, + { + "epoch": 29.58, + "learning_rate": 5.103780290073963e-08, + "loss": 3.75, + "step": 2664000 + }, + { + "epoch": 29.59, + "learning_rate": 5.1023921106989756e-08, + "loss": 3.7703, + "step": 2664500 + }, + { + "epoch": 29.6, + "learning_rate": 5.101003931323989e-08, + "loss": 3.7751, + "step": 2665000 + }, + { + "epoch": 29.6, + "learning_rate": 5.0996157519490034e-08, + "loss": 3.7531, + "step": 2665500 + }, + { + "epoch": 29.61, + "learning_rate": 5.0982275725740176e-08, + "loss": 3.7502, + "step": 2666000 + }, + { + "epoch": 29.61, + "learning_rate": 5.096839393199032e-08, + "loss": 3.7666, + "step": 2666500 + }, + { + "epoch": 29.62, + "learning_rate": 5.095451213824046e-08, + "loss": 3.7664, + "step": 2667000 + }, + { + "epoch": 29.62, + "learning_rate": 5.094063034449059e-08, + "loss": 3.7618, + "step": 2667500 + }, + { + "epoch": 29.63, + "learning_rate": 5.0926748550740725e-08, + "loss": 3.7658, + "step": 2668000 + }, + { + "epoch": 29.63, + "learning_rate": 5.091286675699087e-08, + "loss": 3.7573, + "step": 2668500 + }, + { + "epoch": 29.64, + "learning_rate": 5.089898496324101e-08, + "loss": 3.7585, + "step": 2669000 + }, + { + "epoch": 29.65, + "learning_rate": 5.088510316949115e-08, + "loss": 3.7745, + "step": 2669500 + }, + { + "epoch": 29.65, + "learning_rate": 5.087122137574128e-08, + "loss": 3.7827, + "step": 2670000 + }, + { + "epoch": 29.66, + "learning_rate": 5.0857339581991424e-08, + "loss": 3.7803, + "step": 2670500 + }, + { + "epoch": 29.66, + "learning_rate": 5.084345778824156e-08, + "loss": 3.7764, + "step": 2671000 + }, + { + "epoch": 29.67, + "learning_rate": 5.08295759944917e-08, + "loss": 3.7813, + "step": 2671500 + }, + { + "epoch": 29.67, + "learning_rate": 5.0815694200741844e-08, + "loss": 3.7388, + "step": 2672000 + }, + { + "epoch": 29.68, + "learning_rate": 5.0801812406991986e-08, + "loss": 3.7636, + "step": 2672500 + }, + { + "epoch": 29.68, + "learning_rate": 5.0787930613242115e-08, + "loss": 3.7608, + "step": 2673000 + }, + { + "epoch": 29.69, + "learning_rate": 5.077404881949226e-08, + "loss": 3.7752, + "step": 2673500 + }, + { + "epoch": 29.7, + "learning_rate": 5.076016702574239e-08, + "loss": 3.7728, + "step": 2674000 + }, + { + "epoch": 29.7, + "learning_rate": 5.0746285231992535e-08, + "loss": 3.7556, + "step": 2674500 + }, + { + "epoch": 29.71, + "learning_rate": 5.073240343824268e-08, + "loss": 3.7533, + "step": 2675000 + }, + { + "epoch": 29.71, + "learning_rate": 5.071852164449282e-08, + "loss": 3.79, + "step": 2675500 + }, + { + "epoch": 29.72, + "learning_rate": 5.070463985074295e-08, + "loss": 3.7713, + "step": 2676000 + }, + { + "epoch": 29.72, + "learning_rate": 5.069075805699309e-08, + "loss": 3.7606, + "step": 2676500 + }, + { + "epoch": 29.73, + "learning_rate": 5.0676876263243227e-08, + "loss": 3.7742, + "step": 2677000 + }, + { + "epoch": 29.73, + "learning_rate": 5.066299446949337e-08, + "loss": 3.762, + "step": 2677500 + }, + { + "epoch": 29.74, + "learning_rate": 5.064911267574351e-08, + "loss": 3.7516, + "step": 2678000 + }, + { + "epoch": 29.75, + "learning_rate": 5.063523088199364e-08, + "loss": 3.774, + "step": 2678500 + }, + { + "epoch": 29.75, + "learning_rate": 5.062134908824378e-08, + "loss": 3.7729, + "step": 2679000 + }, + { + "epoch": 29.76, + "learning_rate": 5.0607467294493925e-08, + "loss": 3.7587, + "step": 2679500 + }, + { + "epoch": 29.76, + "learning_rate": 5.059358550074406e-08, + "loss": 3.7804, + "step": 2680000 + }, + { + "epoch": 29.77, + "learning_rate": 5.05797037069942e-08, + "loss": 3.7754, + "step": 2680500 + }, + { + "epoch": 29.77, + "learning_rate": 5.0565821913244345e-08, + "loss": 3.7708, + "step": 2681000 + }, + { + "epoch": 29.78, + "learning_rate": 5.0551940119494474e-08, + "loss": 3.7724, + "step": 2681500 + }, + { + "epoch": 29.78, + "learning_rate": 5.0538058325744616e-08, + "loss": 3.7744, + "step": 2682000 + }, + { + "epoch": 29.79, + "learning_rate": 5.052417653199476e-08, + "loss": 3.7566, + "step": 2682500 + }, + { + "epoch": 29.8, + "learning_rate": 5.0510294738244894e-08, + "loss": 3.7669, + "step": 2683000 + }, + { + "epoch": 29.8, + "learning_rate": 5.0496412944495036e-08, + "loss": 3.7627, + "step": 2683500 + }, + { + "epoch": 29.81, + "learning_rate": 5.048253115074518e-08, + "loss": 3.7537, + "step": 2684000 + }, + { + "epoch": 29.81, + "learning_rate": 5.046864935699531e-08, + "loss": 3.7803, + "step": 2684500 + }, + { + "epoch": 29.82, + "learning_rate": 5.045476756324545e-08, + "loss": 3.7685, + "step": 2685000 + }, + { + "epoch": 29.82, + "learning_rate": 5.044088576949559e-08, + "loss": 3.7589, + "step": 2685500 + }, + { + "epoch": 29.83, + "learning_rate": 5.042700397574573e-08, + "loss": 3.758, + "step": 2686000 + }, + { + "epoch": 29.83, + "learning_rate": 5.041312218199587e-08, + "loss": 3.772, + "step": 2686500 + }, + { + "epoch": 29.84, + "learning_rate": 5.0399240388246e-08, + "loss": 3.7625, + "step": 2687000 + }, + { + "epoch": 29.85, + "learning_rate": 5.038535859449614e-08, + "loss": 3.7611, + "step": 2687500 + }, + { + "epoch": 29.85, + "learning_rate": 5.0371476800746284e-08, + "loss": 3.7872, + "step": 2688000 + }, + { + "epoch": 29.86, + "learning_rate": 5.0357595006996426e-08, + "loss": 3.7626, + "step": 2688500 + }, + { + "epoch": 29.86, + "learning_rate": 5.034371321324656e-08, + "loss": 3.7752, + "step": 2689000 + }, + { + "epoch": 29.87, + "learning_rate": 5.0329831419496704e-08, + "loss": 3.7508, + "step": 2689500 + }, + { + "epoch": 29.87, + "learning_rate": 5.031594962574683e-08, + "loss": 3.7767, + "step": 2690000 + }, + { + "epoch": 29.88, + "learning_rate": 5.0302067831996975e-08, + "loss": 3.7667, + "step": 2690500 + }, + { + "epoch": 29.88, + "learning_rate": 5.028818603824712e-08, + "loss": 3.759, + "step": 2691000 + }, + { + "epoch": 29.89, + "learning_rate": 5.027430424449726e-08, + "loss": 3.7686, + "step": 2691500 + }, + { + "epoch": 29.9, + "learning_rate": 5.0260422450747395e-08, + "loss": 3.7747, + "step": 2692000 + }, + { + "epoch": 29.9, + "learning_rate": 5.0246540656997524e-08, + "loss": 3.7592, + "step": 2692500 + }, + { + "epoch": 29.91, + "learning_rate": 5.0232658863247667e-08, + "loss": 3.7826, + "step": 2693000 + }, + { + "epoch": 29.91, + "learning_rate": 5.021877706949781e-08, + "loss": 3.7786, + "step": 2693500 + }, + { + "epoch": 29.92, + "learning_rate": 5.020489527574795e-08, + "loss": 3.7856, + "step": 2694000 + }, + { + "epoch": 29.92, + "learning_rate": 5.0191013481998093e-08, + "loss": 3.7665, + "step": 2694500 + }, + { + "epoch": 29.93, + "learning_rate": 5.017713168824823e-08, + "loss": 3.7503, + "step": 2695000 + }, + { + "epoch": 29.93, + "learning_rate": 5.016324989449836e-08, + "loss": 3.77, + "step": 2695500 + }, + { + "epoch": 29.94, + "learning_rate": 5.01493681007485e-08, + "loss": 3.7702, + "step": 2696000 + }, + { + "epoch": 29.95, + "learning_rate": 5.013548630699864e-08, + "loss": 3.7729, + "step": 2696500 + }, + { + "epoch": 29.95, + "learning_rate": 5.0121604513248785e-08, + "loss": 3.7511, + "step": 2697000 + }, + { + "epoch": 29.96, + "learning_rate": 5.010772271949893e-08, + "loss": 3.7647, + "step": 2697500 + }, + { + "epoch": 29.96, + "learning_rate": 5.009384092574906e-08, + "loss": 3.7785, + "step": 2698000 + }, + { + "epoch": 29.97, + "learning_rate": 5.007995913199919e-08, + "loss": 3.7635, + "step": 2698500 + }, + { + "epoch": 29.97, + "learning_rate": 5.0066077338249334e-08, + "loss": 3.7817, + "step": 2699000 + }, + { + "epoch": 29.98, + "learning_rate": 5.0052195544499476e-08, + "loss": 3.7431, + "step": 2699500 + }, + { + "epoch": 29.98, + "learning_rate": 5.003831375074962e-08, + "loss": 3.751, + "step": 2700000 + }, + { + "epoch": 29.99, + "learning_rate": 5.0024431956999754e-08, + "loss": 3.7688, + "step": 2700500 + }, + { + "epoch": 30.0, + "learning_rate": 5.001055016324989e-08, + "loss": 3.7721, + "step": 2701000 + }, + { + "epoch": 30.0, + "eval_loss": 3.8352878093719482, + "eval_runtime": 6.305, + "eval_samples_per_second": 246.469, + "step": 2701380 + }, + { + "epoch": 30.0, + "learning_rate": 4.999666836950003e-08, + "loss": 3.7702, + "step": 2701500 + }, + { + "epoch": 30.01, + "learning_rate": 4.998278657575017e-08, + "loss": 3.7899, + "step": 2702000 + }, + { + "epoch": 30.01, + "learning_rate": 4.996890478200031e-08, + "loss": 3.7609, + "step": 2702500 + }, + { + "epoch": 30.02, + "learning_rate": 4.9955022988250446e-08, + "loss": 3.7615, + "step": 2703000 + }, + { + "epoch": 30.02, + "learning_rate": 4.994114119450059e-08, + "loss": 3.7746, + "step": 2703500 + }, + { + "epoch": 30.03, + "learning_rate": 4.9927259400750724e-08, + "loss": 3.7662, + "step": 2704000 + }, + { + "epoch": 30.03, + "learning_rate": 4.991337760700086e-08, + "loss": 3.7601, + "step": 2704500 + }, + { + "epoch": 30.04, + "learning_rate": 4.9899495813251e-08, + "loss": 3.7351, + "step": 2705000 + }, + { + "epoch": 30.05, + "learning_rate": 4.9885614019501144e-08, + "loss": 3.77, + "step": 2705500 + }, + { + "epoch": 30.05, + "learning_rate": 4.987173222575128e-08, + "loss": 3.7664, + "step": 2706000 + }, + { + "epoch": 30.06, + "learning_rate": 4.985785043200142e-08, + "loss": 3.7691, + "step": 2706500 + }, + { + "epoch": 30.06, + "learning_rate": 4.984396863825156e-08, + "loss": 3.7685, + "step": 2707000 + }, + { + "epoch": 30.07, + "learning_rate": 4.983008684450169e-08, + "loss": 3.7541, + "step": 2707500 + }, + { + "epoch": 30.07, + "learning_rate": 4.9816205050751835e-08, + "loss": 3.7622, + "step": 2708000 + }, + { + "epoch": 30.08, + "learning_rate": 4.980232325700198e-08, + "loss": 3.7779, + "step": 2708500 + }, + { + "epoch": 30.08, + "learning_rate": 4.978844146325211e-08, + "loss": 3.7647, + "step": 2709000 + }, + { + "epoch": 30.09, + "learning_rate": 4.9774559669502255e-08, + "loss": 3.7736, + "step": 2709500 + }, + { + "epoch": 30.1, + "learning_rate": 4.976067787575239e-08, + "loss": 3.7728, + "step": 2710000 + }, + { + "epoch": 30.1, + "learning_rate": 4.974679608200253e-08, + "loss": 3.7479, + "step": 2710500 + }, + { + "epoch": 30.11, + "learning_rate": 4.973291428825267e-08, + "loss": 3.7517, + "step": 2711000 + }, + { + "epoch": 30.11, + "learning_rate": 4.9719032494502805e-08, + "loss": 3.7607, + "step": 2711500 + }, + { + "epoch": 30.12, + "learning_rate": 4.970515070075295e-08, + "loss": 3.7558, + "step": 2712000 + }, + { + "epoch": 30.12, + "learning_rate": 4.969126890700309e-08, + "loss": 3.7703, + "step": 2712500 + }, + { + "epoch": 30.13, + "learning_rate": 4.9677387113253225e-08, + "loss": 3.7723, + "step": 2713000 + }, + { + "epoch": 30.13, + "learning_rate": 4.966350531950336e-08, + "loss": 3.7644, + "step": 2713500 + }, + { + "epoch": 30.14, + "learning_rate": 4.96496235257535e-08, + "loss": 3.7544, + "step": 2714000 + }, + { + "epoch": 30.15, + "learning_rate": 4.963574173200364e-08, + "loss": 3.7594, + "step": 2714500 + }, + { + "epoch": 30.15, + "learning_rate": 4.962185993825378e-08, + "loss": 3.769, + "step": 2715000 + }, + { + "epoch": 30.16, + "learning_rate": 4.960797814450392e-08, + "loss": 3.7682, + "step": 2715500 + }, + { + "epoch": 30.16, + "learning_rate": 4.959409635075406e-08, + "loss": 3.7493, + "step": 2716000 + }, + { + "epoch": 30.17, + "learning_rate": 4.9580214557004194e-08, + "loss": 3.761, + "step": 2716500 + }, + { + "epoch": 30.17, + "learning_rate": 4.9566332763254336e-08, + "loss": 3.7622, + "step": 2717000 + }, + { + "epoch": 30.18, + "learning_rate": 4.955245096950447e-08, + "loss": 3.7678, + "step": 2717500 + }, + { + "epoch": 30.18, + "learning_rate": 4.9538569175754614e-08, + "loss": 3.7566, + "step": 2718000 + }, + { + "epoch": 30.19, + "learning_rate": 4.952468738200475e-08, + "loss": 3.773, + "step": 2718500 + }, + { + "epoch": 30.2, + "learning_rate": 4.951080558825489e-08, + "loss": 3.7724, + "step": 2719000 + }, + { + "epoch": 30.2, + "learning_rate": 4.949692379450503e-08, + "loss": 3.7571, + "step": 2719500 + }, + { + "epoch": 30.21, + "learning_rate": 4.9483042000755164e-08, + "loss": 3.787, + "step": 2720000 + }, + { + "epoch": 30.21, + "learning_rate": 4.9469160207005306e-08, + "loss": 3.7637, + "step": 2720500 + }, + { + "epoch": 30.22, + "learning_rate": 4.945527841325545e-08, + "loss": 3.7551, + "step": 2721000 + }, + { + "epoch": 30.22, + "learning_rate": 4.9441396619505584e-08, + "loss": 3.7619, + "step": 2721500 + }, + { + "epoch": 30.23, + "learning_rate": 4.9427514825755726e-08, + "loss": 3.7712, + "step": 2722000 + }, + { + "epoch": 30.23, + "learning_rate": 4.941363303200586e-08, + "loss": 3.751, + "step": 2722500 + }, + { + "epoch": 30.24, + "learning_rate": 4.9399751238256e-08, + "loss": 3.7662, + "step": 2723000 + }, + { + "epoch": 30.25, + "learning_rate": 4.938586944450614e-08, + "loss": 3.7664, + "step": 2723500 + }, + { + "epoch": 30.25, + "learning_rate": 4.937198765075628e-08, + "loss": 3.7866, + "step": 2724000 + }, + { + "epoch": 30.26, + "learning_rate": 4.935810585700642e-08, + "loss": 3.771, + "step": 2724500 + }, + { + "epoch": 30.26, + "learning_rate": 4.934422406325656e-08, + "loss": 3.7731, + "step": 2725000 + }, + { + "epoch": 30.27, + "learning_rate": 4.933034226950669e-08, + "loss": 3.7862, + "step": 2725500 + }, + { + "epoch": 30.27, + "learning_rate": 4.931646047575683e-08, + "loss": 3.7815, + "step": 2726000 + }, + { + "epoch": 30.28, + "learning_rate": 4.930257868200697e-08, + "loss": 3.7546, + "step": 2726500 + }, + { + "epoch": 30.28, + "learning_rate": 4.928869688825711e-08, + "loss": 3.7842, + "step": 2727000 + }, + { + "epoch": 30.29, + "learning_rate": 4.927481509450725e-08, + "loss": 3.7531, + "step": 2727500 + }, + { + "epoch": 30.3, + "learning_rate": 4.9260933300757393e-08, + "loss": 3.7476, + "step": 2728000 + }, + { + "epoch": 30.3, + "learning_rate": 4.924705150700752e-08, + "loss": 3.7653, + "step": 2728500 + }, + { + "epoch": 30.31, + "learning_rate": 4.9233169713257665e-08, + "loss": 3.7563, + "step": 2729000 + }, + { + "epoch": 30.31, + "learning_rate": 4.921928791950781e-08, + "loss": 3.7709, + "step": 2729500 + }, + { + "epoch": 30.32, + "learning_rate": 4.920540612575794e-08, + "loss": 3.7798, + "step": 2730000 + }, + { + "epoch": 30.32, + "learning_rate": 4.9191524332008085e-08, + "loss": 3.7673, + "step": 2730500 + }, + { + "epoch": 30.33, + "learning_rate": 4.917764253825822e-08, + "loss": 3.7492, + "step": 2731000 + }, + { + "epoch": 30.33, + "learning_rate": 4.9163760744508356e-08, + "loss": 3.7526, + "step": 2731500 + }, + { + "epoch": 30.34, + "learning_rate": 4.91498789507585e-08, + "loss": 3.763, + "step": 2732000 + }, + { + "epoch": 30.35, + "learning_rate": 4.9135997157008634e-08, + "loss": 3.7607, + "step": 2732500 + }, + { + "epoch": 30.35, + "learning_rate": 4.9122115363258776e-08, + "loss": 3.7733, + "step": 2733000 + }, + { + "epoch": 30.36, + "learning_rate": 4.910823356950892e-08, + "loss": 3.7695, + "step": 2733500 + }, + { + "epoch": 30.36, + "learning_rate": 4.9094351775759054e-08, + "loss": 3.7745, + "step": 2734000 + }, + { + "epoch": 30.37, + "learning_rate": 4.908046998200919e-08, + "loss": 3.7843, + "step": 2734500 + }, + { + "epoch": 30.37, + "learning_rate": 4.906658818825933e-08, + "loss": 3.7719, + "step": 2735000 + }, + { + "epoch": 30.38, + "learning_rate": 4.905270639450947e-08, + "loss": 3.7753, + "step": 2735500 + }, + { + "epoch": 30.38, + "learning_rate": 4.903882460075961e-08, + "loss": 3.7879, + "step": 2736000 + }, + { + "epoch": 30.39, + "learning_rate": 4.902494280700975e-08, + "loss": 3.7513, + "step": 2736500 + }, + { + "epoch": 30.4, + "learning_rate": 4.901106101325989e-08, + "loss": 3.7607, + "step": 2737000 + }, + { + "epoch": 30.4, + "learning_rate": 4.8997179219510024e-08, + "loss": 3.7629, + "step": 2737500 + }, + { + "epoch": 30.41, + "learning_rate": 4.8983297425760166e-08, + "loss": 3.7629, + "step": 2738000 + }, + { + "epoch": 30.41, + "learning_rate": 4.89694156320103e-08, + "loss": 3.7572, + "step": 2738500 + }, + { + "epoch": 30.42, + "learning_rate": 4.8955533838260444e-08, + "loss": 3.7711, + "step": 2739000 + }, + { + "epoch": 30.42, + "learning_rate": 4.894165204451058e-08, + "loss": 3.7762, + "step": 2739500 + }, + { + "epoch": 30.43, + "learning_rate": 4.892777025076072e-08, + "loss": 3.7607, + "step": 2740000 + }, + { + "epoch": 30.43, + "learning_rate": 4.891388845701086e-08, + "loss": 3.7783, + "step": 2740500 + }, + { + "epoch": 30.44, + "learning_rate": 4.890000666326099e-08, + "loss": 3.7566, + "step": 2741000 + }, + { + "epoch": 30.45, + "learning_rate": 4.8886124869511135e-08, + "loss": 3.7483, + "step": 2741500 + }, + { + "epoch": 30.45, + "learning_rate": 4.887224307576128e-08, + "loss": 3.765, + "step": 2742000 + }, + { + "epoch": 30.46, + "learning_rate": 4.885836128201141e-08, + "loss": 3.7673, + "step": 2742500 + }, + { + "epoch": 30.46, + "learning_rate": 4.8844479488261556e-08, + "loss": 3.7414, + "step": 2743000 + }, + { + "epoch": 30.47, + "learning_rate": 4.883059769451169e-08, + "loss": 3.7608, + "step": 2743500 + }, + { + "epoch": 30.47, + "learning_rate": 4.881671590076183e-08, + "loss": 3.7728, + "step": 2744000 + }, + { + "epoch": 30.48, + "learning_rate": 4.880283410701197e-08, + "loss": 3.7762, + "step": 2744500 + }, + { + "epoch": 30.48, + "learning_rate": 4.878895231326211e-08, + "loss": 3.7671, + "step": 2745000 + }, + { + "epoch": 30.49, + "learning_rate": 4.877507051951225e-08, + "loss": 3.7777, + "step": 2745500 + }, + { + "epoch": 30.5, + "learning_rate": 4.876118872576239e-08, + "loss": 3.7601, + "step": 2746000 + }, + { + "epoch": 30.5, + "learning_rate": 4.8747306932012525e-08, + "loss": 3.7615, + "step": 2746500 + }, + { + "epoch": 30.51, + "learning_rate": 4.873342513826266e-08, + "loss": 3.7783, + "step": 2747000 + }, + { + "epoch": 30.51, + "learning_rate": 4.87195433445128e-08, + "loss": 3.7406, + "step": 2747500 + }, + { + "epoch": 30.52, + "learning_rate": 4.870566155076294e-08, + "loss": 3.7574, + "step": 2748000 + }, + { + "epoch": 30.52, + "learning_rate": 4.869177975701308e-08, + "loss": 3.7689, + "step": 2748500 + }, + { + "epoch": 30.53, + "learning_rate": 4.867789796326322e-08, + "loss": 3.7399, + "step": 2749000 + }, + { + "epoch": 30.53, + "learning_rate": 4.866401616951336e-08, + "loss": 3.7592, + "step": 2749500 + }, + { + "epoch": 30.54, + "learning_rate": 4.8650134375763494e-08, + "loss": 3.7522, + "step": 2750000 + }, + { + "epoch": 30.55, + "learning_rate": 4.8636252582013637e-08, + "loss": 3.7614, + "step": 2750500 + }, + { + "epoch": 30.55, + "learning_rate": 4.862237078826377e-08, + "loss": 3.7633, + "step": 2751000 + }, + { + "epoch": 30.56, + "learning_rate": 4.8608488994513914e-08, + "loss": 3.7892, + "step": 2751500 + }, + { + "epoch": 30.56, + "learning_rate": 4.859460720076406e-08, + "loss": 3.753, + "step": 2752000 + }, + { + "epoch": 30.57, + "learning_rate": 4.858072540701419e-08, + "loss": 3.7537, + "step": 2752500 + }, + { + "epoch": 30.57, + "learning_rate": 4.856684361326433e-08, + "loss": 3.7605, + "step": 2753000 + }, + { + "epoch": 30.58, + "learning_rate": 4.855296181951447e-08, + "loss": 3.7797, + "step": 2753500 + }, + { + "epoch": 30.58, + "learning_rate": 4.8539080025764606e-08, + "loss": 3.7838, + "step": 2754000 + }, + { + "epoch": 30.59, + "learning_rate": 4.852519823201475e-08, + "loss": 3.7578, + "step": 2754500 + }, + { + "epoch": 30.6, + "learning_rate": 4.8511316438264884e-08, + "loss": 3.7689, + "step": 2755000 + }, + { + "epoch": 30.6, + "learning_rate": 4.8497434644515026e-08, + "loss": 3.7545, + "step": 2755500 + }, + { + "epoch": 30.61, + "learning_rate": 4.848355285076516e-08, + "loss": 3.7466, + "step": 2756000 + }, + { + "epoch": 30.61, + "learning_rate": 4.84696710570153e-08, + "loss": 3.7481, + "step": 2756500 + }, + { + "epoch": 30.62, + "learning_rate": 4.845578926326544e-08, + "loss": 3.7532, + "step": 2757000 + }, + { + "epoch": 30.62, + "learning_rate": 4.844190746951558e-08, + "loss": 3.764, + "step": 2757500 + }, + { + "epoch": 30.63, + "learning_rate": 4.842802567576572e-08, + "loss": 3.7788, + "step": 2758000 + }, + { + "epoch": 30.63, + "learning_rate": 4.841414388201586e-08, + "loss": 3.7984, + "step": 2758500 + }, + { + "epoch": 30.64, + "learning_rate": 4.8400262088265995e-08, + "loss": 3.7503, + "step": 2759000 + }, + { + "epoch": 30.65, + "learning_rate": 4.838638029451613e-08, + "loss": 3.7653, + "step": 2759500 + }, + { + "epoch": 30.65, + "learning_rate": 4.8372498500766273e-08, + "loss": 3.745, + "step": 2760000 + }, + { + "epoch": 30.66, + "learning_rate": 4.8358616707016416e-08, + "loss": 3.7655, + "step": 2760500 + }, + { + "epoch": 30.66, + "learning_rate": 4.834473491326655e-08, + "loss": 3.7547, + "step": 2761000 + }, + { + "epoch": 30.67, + "learning_rate": 4.833085311951669e-08, + "loss": 3.7802, + "step": 2761500 + }, + { + "epoch": 30.67, + "learning_rate": 4.831697132576682e-08, + "loss": 3.7676, + "step": 2762000 + }, + { + "epoch": 30.68, + "learning_rate": 4.8303089532016965e-08, + "loss": 3.7852, + "step": 2762500 + }, + { + "epoch": 30.68, + "learning_rate": 4.828920773826711e-08, + "loss": 3.7764, + "step": 2763000 + }, + { + "epoch": 30.69, + "learning_rate": 4.827532594451724e-08, + "loss": 3.7755, + "step": 2763500 + }, + { + "epoch": 30.7, + "learning_rate": 4.8261444150767385e-08, + "loss": 3.785, + "step": 2764000 + }, + { + "epoch": 30.7, + "learning_rate": 4.824756235701752e-08, + "loss": 3.7597, + "step": 2764500 + }, + { + "epoch": 30.71, + "learning_rate": 4.8233680563267656e-08, + "loss": 3.7547, + "step": 2765000 + }, + { + "epoch": 30.71, + "learning_rate": 4.82197987695178e-08, + "loss": 3.7777, + "step": 2765500 + }, + { + "epoch": 30.72, + "learning_rate": 4.820591697576794e-08, + "loss": 3.7606, + "step": 2766000 + }, + { + "epoch": 30.72, + "learning_rate": 4.8192035182018076e-08, + "loss": 3.7601, + "step": 2766500 + }, + { + "epoch": 30.73, + "learning_rate": 4.817815338826822e-08, + "loss": 3.7643, + "step": 2767000 + }, + { + "epoch": 30.73, + "learning_rate": 4.8164271594518354e-08, + "loss": 3.7669, + "step": 2767500 + }, + { + "epoch": 30.74, + "learning_rate": 4.815038980076849e-08, + "loss": 3.7455, + "step": 2768000 + }, + { + "epoch": 30.75, + "learning_rate": 4.813650800701863e-08, + "loss": 3.7513, + "step": 2768500 + }, + { + "epoch": 30.75, + "learning_rate": 4.812262621326877e-08, + "loss": 3.7447, + "step": 2769000 + }, + { + "epoch": 30.76, + "learning_rate": 4.810874441951891e-08, + "loss": 3.7422, + "step": 2769500 + }, + { + "epoch": 30.76, + "learning_rate": 4.809486262576905e-08, + "loss": 3.7514, + "step": 2770000 + }, + { + "epoch": 30.77, + "learning_rate": 4.808098083201919e-08, + "loss": 3.7671, + "step": 2770500 + }, + { + "epoch": 30.77, + "learning_rate": 4.8067099038269324e-08, + "loss": 3.7531, + "step": 2771000 + }, + { + "epoch": 30.78, + "learning_rate": 4.8053217244519466e-08, + "loss": 3.7625, + "step": 2771500 + }, + { + "epoch": 30.78, + "learning_rate": 4.80393354507696e-08, + "loss": 3.7783, + "step": 2772000 + }, + { + "epoch": 30.79, + "learning_rate": 4.8025453657019744e-08, + "loss": 3.7741, + "step": 2772500 + }, + { + "epoch": 30.8, + "learning_rate": 4.8011571863269886e-08, + "loss": 3.7691, + "step": 2773000 + }, + { + "epoch": 30.8, + "learning_rate": 4.799769006952002e-08, + "loss": 3.7629, + "step": 2773500 + }, + { + "epoch": 30.81, + "learning_rate": 4.798380827577016e-08, + "loss": 3.7543, + "step": 2774000 + }, + { + "epoch": 30.81, + "learning_rate": 4.79699264820203e-08, + "loss": 3.738, + "step": 2774500 + }, + { + "epoch": 30.82, + "learning_rate": 4.7956044688270435e-08, + "loss": 3.752, + "step": 2775000 + }, + { + "epoch": 30.82, + "learning_rate": 4.794216289452058e-08, + "loss": 3.7767, + "step": 2775500 + }, + { + "epoch": 30.83, + "learning_rate": 4.792828110077072e-08, + "loss": 3.7817, + "step": 2776000 + }, + { + "epoch": 30.83, + "learning_rate": 4.7914399307020856e-08, + "loss": 3.7611, + "step": 2776500 + }, + { + "epoch": 30.84, + "learning_rate": 4.790051751327099e-08, + "loss": 3.7582, + "step": 2777000 + }, + { + "epoch": 30.85, + "learning_rate": 4.788663571952113e-08, + "loss": 3.7605, + "step": 2777500 + }, + { + "epoch": 30.85, + "learning_rate": 4.787275392577127e-08, + "loss": 3.7772, + "step": 2778000 + }, + { + "epoch": 30.86, + "learning_rate": 4.785887213202141e-08, + "loss": 3.7713, + "step": 2778500 + }, + { + "epoch": 30.86, + "learning_rate": 4.784499033827155e-08, + "loss": 3.7601, + "step": 2779000 + }, + { + "epoch": 30.87, + "learning_rate": 4.783110854452169e-08, + "loss": 3.7735, + "step": 2779500 + }, + { + "epoch": 30.87, + "learning_rate": 4.7817226750771825e-08, + "loss": 3.7685, + "step": 2780000 + }, + { + "epoch": 30.88, + "learning_rate": 4.780334495702196e-08, + "loss": 3.7622, + "step": 2780500 + }, + { + "epoch": 30.88, + "learning_rate": 4.77894631632721e-08, + "loss": 3.7698, + "step": 2781000 + }, + { + "epoch": 30.89, + "learning_rate": 4.7775581369522245e-08, + "loss": 3.774, + "step": 2781500 + }, + { + "epoch": 30.9, + "learning_rate": 4.776169957577238e-08, + "loss": 3.7798, + "step": 2782000 + }, + { + "epoch": 30.9, + "learning_rate": 4.774781778202252e-08, + "loss": 3.7505, + "step": 2782500 + }, + { + "epoch": 30.91, + "learning_rate": 4.773393598827266e-08, + "loss": 3.7601, + "step": 2783000 + }, + { + "epoch": 30.91, + "learning_rate": 4.7720054194522794e-08, + "loss": 3.7691, + "step": 2783500 + }, + { + "epoch": 30.92, + "learning_rate": 4.7706172400772937e-08, + "loss": 3.7786, + "step": 2784000 + }, + { + "epoch": 30.92, + "learning_rate": 4.769229060702307e-08, + "loss": 3.7866, + "step": 2784500 + }, + { + "epoch": 30.93, + "learning_rate": 4.7678408813273215e-08, + "loss": 3.7668, + "step": 2785000 + }, + { + "epoch": 30.93, + "learning_rate": 4.766452701952336e-08, + "loss": 3.7619, + "step": 2785500 + }, + { + "epoch": 30.94, + "learning_rate": 4.765064522577349e-08, + "loss": 3.7586, + "step": 2786000 + }, + { + "epoch": 30.95, + "learning_rate": 4.763676343202363e-08, + "loss": 3.7724, + "step": 2786500 + }, + { + "epoch": 30.95, + "learning_rate": 4.762288163827377e-08, + "loss": 3.7729, + "step": 2787000 + }, + { + "epoch": 30.96, + "learning_rate": 4.7608999844523906e-08, + "loss": 3.762, + "step": 2787500 + }, + { + "epoch": 30.96, + "learning_rate": 4.759511805077405e-08, + "loss": 3.7745, + "step": 2788000 + }, + { + "epoch": 30.97, + "learning_rate": 4.758123625702419e-08, + "loss": 3.7544, + "step": 2788500 + }, + { + "epoch": 30.97, + "learning_rate": 4.7567354463274326e-08, + "loss": 3.7569, + "step": 2789000 + }, + { + "epoch": 30.98, + "learning_rate": 4.755347266952446e-08, + "loss": 3.7644, + "step": 2789500 + }, + { + "epoch": 30.98, + "learning_rate": 4.7539590875774604e-08, + "loss": 3.77, + "step": 2790000 + }, + { + "epoch": 30.99, + "learning_rate": 4.752570908202474e-08, + "loss": 3.7653, + "step": 2790500 + }, + { + "epoch": 31.0, + "learning_rate": 4.751182728827488e-08, + "loss": 3.7616, + "step": 2791000 + }, + { + "epoch": 31.0, + "eval_loss": 3.8338966369628906, + "eval_runtime": 6.3047, + "eval_samples_per_second": 246.482, + "step": 2791426 + }, + { + "epoch": 31.0, + "learning_rate": 4.749794549452502e-08, + "loss": 3.7581, + "step": 2791500 + }, + { + "epoch": 31.01, + "learning_rate": 4.748406370077515e-08, + "loss": 3.7536, + "step": 2792000 + }, + { + "epoch": 31.01, + "learning_rate": 4.7470181907025296e-08, + "loss": 3.7805, + "step": 2792500 + }, + { + "epoch": 31.02, + "learning_rate": 4.745630011327543e-08, + "loss": 3.7684, + "step": 2793000 + }, + { + "epoch": 31.02, + "learning_rate": 4.7442418319525573e-08, + "loss": 3.7698, + "step": 2793500 + }, + { + "epoch": 31.03, + "learning_rate": 4.7428536525775716e-08, + "loss": 3.7578, + "step": 2794000 + }, + { + "epoch": 31.03, + "learning_rate": 4.741465473202585e-08, + "loss": 3.7723, + "step": 2794500 + }, + { + "epoch": 31.04, + "learning_rate": 4.740077293827599e-08, + "loss": 3.7905, + "step": 2795000 + }, + { + "epoch": 31.05, + "learning_rate": 4.738689114452613e-08, + "loss": 3.7624, + "step": 2795500 + }, + { + "epoch": 31.05, + "learning_rate": 4.7373009350776265e-08, + "loss": 3.7682, + "step": 2796000 + }, + { + "epoch": 31.06, + "learning_rate": 4.735912755702641e-08, + "loss": 3.7636, + "step": 2796500 + }, + { + "epoch": 31.06, + "learning_rate": 4.734524576327655e-08, + "loss": 3.7786, + "step": 2797000 + }, + { + "epoch": 31.07, + "learning_rate": 4.7331363969526685e-08, + "loss": 3.7601, + "step": 2797500 + }, + { + "epoch": 31.07, + "learning_rate": 4.731748217577682e-08, + "loss": 3.7745, + "step": 2798000 + }, + { + "epoch": 31.08, + "learning_rate": 4.7303600382026956e-08, + "loss": 3.7621, + "step": 2798500 + }, + { + "epoch": 31.08, + "learning_rate": 4.72897185882771e-08, + "loss": 3.783, + "step": 2799000 + }, + { + "epoch": 31.09, + "learning_rate": 4.727583679452724e-08, + "loss": 3.7673, + "step": 2799500 + }, + { + "epoch": 31.1, + "learning_rate": 4.7261955000777377e-08, + "loss": 3.7703, + "step": 2800000 + }, + { + "epoch": 31.1, + "learning_rate": 4.724807320702752e-08, + "loss": 3.7615, + "step": 2800500 + }, + { + "epoch": 31.11, + "learning_rate": 4.7234191413277654e-08, + "loss": 3.751, + "step": 2801000 + }, + { + "epoch": 31.11, + "learning_rate": 4.722030961952779e-08, + "loss": 3.7532, + "step": 2801500 + }, + { + "epoch": 31.12, + "learning_rate": 4.720642782577793e-08, + "loss": 3.7535, + "step": 2802000 + }, + { + "epoch": 31.12, + "learning_rate": 4.7192546032028075e-08, + "loss": 3.7627, + "step": 2802500 + }, + { + "epoch": 31.13, + "learning_rate": 4.717866423827821e-08, + "loss": 3.755, + "step": 2803000 + }, + { + "epoch": 31.13, + "learning_rate": 4.716478244452835e-08, + "loss": 3.7459, + "step": 2803500 + }, + { + "epoch": 31.14, + "learning_rate": 4.715090065077849e-08, + "loss": 3.755, + "step": 2804000 + }, + { + "epoch": 31.15, + "learning_rate": 4.7137018857028624e-08, + "loss": 3.7662, + "step": 2804500 + }, + { + "epoch": 31.15, + "learning_rate": 4.7123137063278766e-08, + "loss": 3.7722, + "step": 2805000 + }, + { + "epoch": 31.16, + "learning_rate": 4.710925526952891e-08, + "loss": 3.767, + "step": 2805500 + }, + { + "epoch": 31.16, + "learning_rate": 4.7095373475779044e-08, + "loss": 3.7613, + "step": 2806000 + }, + { + "epoch": 31.17, + "learning_rate": 4.7081491682029186e-08, + "loss": 3.7614, + "step": 2806500 + }, + { + "epoch": 31.17, + "learning_rate": 4.706760988827932e-08, + "loss": 3.7504, + "step": 2807000 + }, + { + "epoch": 31.18, + "learning_rate": 4.705372809452946e-08, + "loss": 3.7697, + "step": 2807500 + }, + { + "epoch": 31.18, + "learning_rate": 4.70398463007796e-08, + "loss": 3.7538, + "step": 2808000 + }, + { + "epoch": 31.19, + "learning_rate": 4.7025964507029735e-08, + "loss": 3.7673, + "step": 2808500 + }, + { + "epoch": 31.2, + "learning_rate": 4.701208271327988e-08, + "loss": 3.7548, + "step": 2809000 + }, + { + "epoch": 31.2, + "learning_rate": 4.699820091953002e-08, + "loss": 3.7716, + "step": 2809500 + }, + { + "epoch": 31.21, + "learning_rate": 4.6984319125780156e-08, + "loss": 3.7516, + "step": 2810000 + }, + { + "epoch": 31.21, + "learning_rate": 4.697043733203029e-08, + "loss": 3.7762, + "step": 2810500 + }, + { + "epoch": 31.22, + "learning_rate": 4.6956555538280434e-08, + "loss": 3.7706, + "step": 2811000 + }, + { + "epoch": 31.22, + "learning_rate": 4.694267374453057e-08, + "loss": 3.778, + "step": 2811500 + }, + { + "epoch": 31.23, + "learning_rate": 4.692879195078071e-08, + "loss": 3.7708, + "step": 2812000 + }, + { + "epoch": 31.23, + "learning_rate": 4.6914910157030854e-08, + "loss": 3.7671, + "step": 2812500 + }, + { + "epoch": 31.24, + "learning_rate": 4.690102836328099e-08, + "loss": 3.7641, + "step": 2813000 + }, + { + "epoch": 31.25, + "learning_rate": 4.6887146569531125e-08, + "loss": 3.7745, + "step": 2813500 + }, + { + "epoch": 31.25, + "learning_rate": 4.687326477578126e-08, + "loss": 3.7439, + "step": 2814000 + }, + { + "epoch": 31.26, + "learning_rate": 4.68593829820314e-08, + "loss": 3.7455, + "step": 2814500 + }, + { + "epoch": 31.26, + "learning_rate": 4.6845501188281545e-08, + "loss": 3.7705, + "step": 2815000 + }, + { + "epoch": 31.27, + "learning_rate": 4.683161939453168e-08, + "loss": 3.7672, + "step": 2815500 + }, + { + "epoch": 31.27, + "learning_rate": 4.681773760078182e-08, + "loss": 3.7728, + "step": 2816000 + }, + { + "epoch": 31.28, + "learning_rate": 4.680385580703196e-08, + "loss": 3.7733, + "step": 2816500 + }, + { + "epoch": 31.28, + "learning_rate": 4.6789974013282094e-08, + "loss": 3.7375, + "step": 2817000 + }, + { + "epoch": 31.29, + "learning_rate": 4.677609221953224e-08, + "loss": 3.7623, + "step": 2817500 + }, + { + "epoch": 31.3, + "learning_rate": 4.676221042578238e-08, + "loss": 3.7534, + "step": 2818000 + }, + { + "epoch": 31.3, + "learning_rate": 4.6748328632032515e-08, + "loss": 3.7799, + "step": 2818500 + }, + { + "epoch": 31.31, + "learning_rate": 4.673444683828266e-08, + "loss": 3.748, + "step": 2819000 + }, + { + "epoch": 31.31, + "learning_rate": 4.672056504453279e-08, + "loss": 3.7637, + "step": 2819500 + }, + { + "epoch": 31.32, + "learning_rate": 4.670668325078293e-08, + "loss": 3.7581, + "step": 2820000 + }, + { + "epoch": 31.32, + "learning_rate": 4.669280145703307e-08, + "loss": 3.7697, + "step": 2820500 + }, + { + "epoch": 31.33, + "learning_rate": 4.6678919663283206e-08, + "loss": 3.7616, + "step": 2821000 + }, + { + "epoch": 31.33, + "learning_rate": 4.666503786953335e-08, + "loss": 3.7594, + "step": 2821500 + }, + { + "epoch": 31.34, + "learning_rate": 4.665115607578349e-08, + "loss": 3.7485, + "step": 2822000 + }, + { + "epoch": 31.35, + "learning_rate": 4.663727428203362e-08, + "loss": 3.7614, + "step": 2822500 + }, + { + "epoch": 31.35, + "learning_rate": 4.662339248828376e-08, + "loss": 3.7604, + "step": 2823000 + }, + { + "epoch": 31.36, + "learning_rate": 4.6609510694533904e-08, + "loss": 3.7758, + "step": 2823500 + }, + { + "epoch": 31.36, + "learning_rate": 4.659562890078404e-08, + "loss": 3.7505, + "step": 2824000 + }, + { + "epoch": 31.37, + "learning_rate": 4.658174710703418e-08, + "loss": 3.7504, + "step": 2824500 + }, + { + "epoch": 31.37, + "learning_rate": 4.6567865313284324e-08, + "loss": 3.7532, + "step": 2825000 + }, + { + "epoch": 31.38, + "learning_rate": 4.6553983519534453e-08, + "loss": 3.7561, + "step": 2825500 + }, + { + "epoch": 31.38, + "learning_rate": 4.6540101725784596e-08, + "loss": 3.7645, + "step": 2826000 + }, + { + "epoch": 31.39, + "learning_rate": 4.652621993203474e-08, + "loss": 3.7773, + "step": 2826500 + }, + { + "epoch": 31.4, + "learning_rate": 4.6512338138284874e-08, + "loss": 3.769, + "step": 2827000 + }, + { + "epoch": 31.4, + "learning_rate": 4.6498456344535016e-08, + "loss": 3.7582, + "step": 2827500 + }, + { + "epoch": 31.41, + "learning_rate": 4.648457455078515e-08, + "loss": 3.7702, + "step": 2828000 + }, + { + "epoch": 31.41, + "learning_rate": 4.647069275703529e-08, + "loss": 3.7762, + "step": 2828500 + }, + { + "epoch": 31.42, + "learning_rate": 4.645681096328543e-08, + "loss": 3.7649, + "step": 2829000 + }, + { + "epoch": 31.42, + "learning_rate": 4.6442929169535565e-08, + "loss": 3.7741, + "step": 2829500 + }, + { + "epoch": 31.43, + "learning_rate": 4.642904737578571e-08, + "loss": 3.781, + "step": 2830000 + }, + { + "epoch": 31.43, + "learning_rate": 4.641516558203585e-08, + "loss": 3.7713, + "step": 2830500 + }, + { + "epoch": 31.44, + "learning_rate": 4.6401283788285985e-08, + "loss": 3.763, + "step": 2831000 + }, + { + "epoch": 31.45, + "learning_rate": 4.638740199453612e-08, + "loss": 3.7363, + "step": 2831500 + }, + { + "epoch": 31.45, + "learning_rate": 4.637352020078626e-08, + "loss": 3.773, + "step": 2832000 + }, + { + "epoch": 31.46, + "learning_rate": 4.63596384070364e-08, + "loss": 3.761, + "step": 2832500 + }, + { + "epoch": 31.46, + "learning_rate": 4.634575661328654e-08, + "loss": 3.7713, + "step": 2833000 + }, + { + "epoch": 31.47, + "learning_rate": 4.633187481953668e-08, + "loss": 3.7553, + "step": 2833500 + }, + { + "epoch": 31.47, + "learning_rate": 4.631799302578682e-08, + "loss": 3.7568, + "step": 2834000 + }, + { + "epoch": 31.48, + "learning_rate": 4.6304111232036955e-08, + "loss": 3.7464, + "step": 2834500 + }, + { + "epoch": 31.48, + "learning_rate": 4.62902294382871e-08, + "loss": 3.7585, + "step": 2835000 + }, + { + "epoch": 31.49, + "learning_rate": 4.627634764453723e-08, + "loss": 3.751, + "step": 2835500 + }, + { + "epoch": 31.5, + "learning_rate": 4.6262465850787375e-08, + "loss": 3.7632, + "step": 2836000 + }, + { + "epoch": 31.5, + "learning_rate": 4.624858405703751e-08, + "loss": 3.7599, + "step": 2836500 + }, + { + "epoch": 31.51, + "learning_rate": 4.623470226328765e-08, + "loss": 3.7642, + "step": 2837000 + }, + { + "epoch": 31.51, + "learning_rate": 4.622082046953779e-08, + "loss": 3.7762, + "step": 2837500 + }, + { + "epoch": 31.52, + "learning_rate": 4.6206938675787924e-08, + "loss": 3.7565, + "step": 2838000 + }, + { + "epoch": 31.52, + "learning_rate": 4.6193056882038066e-08, + "loss": 3.7649, + "step": 2838500 + }, + { + "epoch": 31.53, + "learning_rate": 4.617917508828821e-08, + "loss": 3.7484, + "step": 2839000 + }, + { + "epoch": 31.53, + "learning_rate": 4.6165293294538344e-08, + "loss": 3.7614, + "step": 2839500 + }, + { + "epoch": 31.54, + "learning_rate": 4.6151411500788486e-08, + "loss": 3.7485, + "step": 2840000 + }, + { + "epoch": 31.54, + "learning_rate": 4.613752970703862e-08, + "loss": 3.7689, + "step": 2840500 + }, + { + "epoch": 31.55, + "learning_rate": 4.612364791328876e-08, + "loss": 3.7763, + "step": 2841000 + }, + { + "epoch": 31.56, + "learning_rate": 4.61097661195389e-08, + "loss": 3.7628, + "step": 2841500 + }, + { + "epoch": 31.56, + "learning_rate": 4.609588432578904e-08, + "loss": 3.7629, + "step": 2842000 + }, + { + "epoch": 31.57, + "learning_rate": 4.608200253203918e-08, + "loss": 3.7501, + "step": 2842500 + }, + { + "epoch": 31.57, + "learning_rate": 4.606812073828932e-08, + "loss": 3.7719, + "step": 2843000 + }, + { + "epoch": 31.58, + "learning_rate": 4.6054238944539456e-08, + "loss": 3.7434, + "step": 2843500 + }, + { + "epoch": 31.58, + "learning_rate": 4.604035715078959e-08, + "loss": 3.7637, + "step": 2844000 + }, + { + "epoch": 31.59, + "learning_rate": 4.6026475357039734e-08, + "loss": 3.761, + "step": 2844500 + }, + { + "epoch": 31.59, + "learning_rate": 4.601259356328987e-08, + "loss": 3.7501, + "step": 2845000 + }, + { + "epoch": 31.6, + "learning_rate": 4.599871176954001e-08, + "loss": 3.7439, + "step": 2845500 + }, + { + "epoch": 31.61, + "learning_rate": 4.5984829975790154e-08, + "loss": 3.7883, + "step": 2846000 + }, + { + "epoch": 31.61, + "learning_rate": 4.597094818204029e-08, + "loss": 3.7409, + "step": 2846500 + }, + { + "epoch": 31.62, + "learning_rate": 4.5957066388290425e-08, + "loss": 3.76, + "step": 2847000 + }, + { + "epoch": 31.62, + "learning_rate": 4.594318459454057e-08, + "loss": 3.7451, + "step": 2847500 + }, + { + "epoch": 31.63, + "learning_rate": 4.59293028007907e-08, + "loss": 3.7725, + "step": 2848000 + }, + { + "epoch": 31.63, + "learning_rate": 4.5915421007040845e-08, + "loss": 3.7908, + "step": 2848500 + }, + { + "epoch": 31.64, + "learning_rate": 4.590153921329099e-08, + "loss": 3.7723, + "step": 2849000 + }, + { + "epoch": 31.64, + "learning_rate": 4.588765741954112e-08, + "loss": 3.7503, + "step": 2849500 + }, + { + "epoch": 31.65, + "learning_rate": 4.587377562579126e-08, + "loss": 3.7759, + "step": 2850000 + }, + { + "epoch": 31.66, + "learning_rate": 4.5859893832041395e-08, + "loss": 3.7487, + "step": 2850500 + }, + { + "epoch": 31.66, + "learning_rate": 4.584601203829154e-08, + "loss": 3.7704, + "step": 2851000 + }, + { + "epoch": 31.67, + "learning_rate": 4.583213024454168e-08, + "loss": 3.7585, + "step": 2851500 + }, + { + "epoch": 31.67, + "learning_rate": 4.5818248450791815e-08, + "loss": 3.7587, + "step": 2852000 + }, + { + "epoch": 31.68, + "learning_rate": 4.580436665704196e-08, + "loss": 3.7733, + "step": 2852500 + }, + { + "epoch": 31.68, + "learning_rate": 4.579048486329209e-08, + "loss": 3.77, + "step": 2853000 + }, + { + "epoch": 31.69, + "learning_rate": 4.577660306954223e-08, + "loss": 3.7599, + "step": 2853500 + }, + { + "epoch": 31.69, + "learning_rate": 4.576272127579237e-08, + "loss": 3.7651, + "step": 2854000 + }, + { + "epoch": 31.7, + "learning_rate": 4.574883948204251e-08, + "loss": 3.7514, + "step": 2854500 + }, + { + "epoch": 31.71, + "learning_rate": 4.573495768829265e-08, + "loss": 3.7541, + "step": 2855000 + }, + { + "epoch": 31.71, + "learning_rate": 4.572107589454279e-08, + "loss": 3.7595, + "step": 2855500 + }, + { + "epoch": 31.72, + "learning_rate": 4.5707194100792926e-08, + "loss": 3.7714, + "step": 2856000 + }, + { + "epoch": 31.72, + "learning_rate": 4.569331230704306e-08, + "loss": 3.7711, + "step": 2856500 + }, + { + "epoch": 31.73, + "learning_rate": 4.5679430513293204e-08, + "loss": 3.7599, + "step": 2857000 + }, + { + "epoch": 31.73, + "learning_rate": 4.566554871954334e-08, + "loss": 3.7643, + "step": 2857500 + }, + { + "epoch": 31.74, + "learning_rate": 4.565166692579348e-08, + "loss": 3.7623, + "step": 2858000 + }, + { + "epoch": 31.74, + "learning_rate": 4.5637785132043624e-08, + "loss": 3.7573, + "step": 2858500 + }, + { + "epoch": 31.75, + "learning_rate": 4.5623903338293753e-08, + "loss": 3.7769, + "step": 2859000 + }, + { + "epoch": 31.76, + "learning_rate": 4.5610021544543896e-08, + "loss": 3.7722, + "step": 2859500 + }, + { + "epoch": 31.76, + "learning_rate": 4.559613975079404e-08, + "loss": 3.7551, + "step": 2860000 + }, + { + "epoch": 31.77, + "learning_rate": 4.5582257957044174e-08, + "loss": 3.7694, + "step": 2860500 + }, + { + "epoch": 31.77, + "learning_rate": 4.5568376163294316e-08, + "loss": 3.7382, + "step": 2861000 + }, + { + "epoch": 31.78, + "learning_rate": 4.555449436954446e-08, + "loss": 3.7636, + "step": 2861500 + }, + { + "epoch": 31.78, + "learning_rate": 4.554061257579459e-08, + "loss": 3.7406, + "step": 2862000 + }, + { + "epoch": 31.79, + "learning_rate": 4.552673078204473e-08, + "loss": 3.7686, + "step": 2862500 + }, + { + "epoch": 31.79, + "learning_rate": 4.551284898829487e-08, + "loss": 3.7664, + "step": 2863000 + }, + { + "epoch": 31.8, + "learning_rate": 4.549896719454501e-08, + "loss": 3.7655, + "step": 2863500 + }, + { + "epoch": 31.81, + "learning_rate": 4.548508540079515e-08, + "loss": 3.7701, + "step": 2864000 + }, + { + "epoch": 31.81, + "learning_rate": 4.5471203607045285e-08, + "loss": 3.7878, + "step": 2864500 + }, + { + "epoch": 31.82, + "learning_rate": 4.545732181329542e-08, + "loss": 3.7815, + "step": 2865000 + }, + { + "epoch": 31.82, + "learning_rate": 4.544344001954556e-08, + "loss": 3.7646, + "step": 2865500 + }, + { + "epoch": 31.83, + "learning_rate": 4.54295582257957e-08, + "loss": 3.7722, + "step": 2866000 + }, + { + "epoch": 31.83, + "learning_rate": 4.541567643204584e-08, + "loss": 3.7622, + "step": 2866500 + }, + { + "epoch": 31.84, + "learning_rate": 4.5401794638295983e-08, + "loss": 3.7374, + "step": 2867000 + }, + { + "epoch": 31.84, + "learning_rate": 4.538791284454612e-08, + "loss": 3.778, + "step": 2867500 + }, + { + "epoch": 31.85, + "learning_rate": 4.5374031050796255e-08, + "loss": 3.7671, + "step": 2868000 + }, + { + "epoch": 31.86, + "learning_rate": 4.53601492570464e-08, + "loss": 3.7698, + "step": 2868500 + }, + { + "epoch": 31.86, + "learning_rate": 4.534626746329653e-08, + "loss": 3.7464, + "step": 2869000 + }, + { + "epoch": 31.87, + "learning_rate": 4.5332385669546675e-08, + "loss": 3.7634, + "step": 2869500 + }, + { + "epoch": 31.87, + "learning_rate": 4.531850387579682e-08, + "loss": 3.7616, + "step": 2870000 + }, + { + "epoch": 31.88, + "learning_rate": 4.530462208204695e-08, + "loss": 3.7455, + "step": 2870500 + }, + { + "epoch": 31.88, + "learning_rate": 4.529074028829709e-08, + "loss": 3.7615, + "step": 2871000 + }, + { + "epoch": 31.89, + "learning_rate": 4.527685849454723e-08, + "loss": 3.7356, + "step": 2871500 + }, + { + "epoch": 31.89, + "learning_rate": 4.5262976700797366e-08, + "loss": 3.7739, + "step": 2872000 + }, + { + "epoch": 31.9, + "learning_rate": 4.524909490704751e-08, + "loss": 3.7543, + "step": 2872500 + }, + { + "epoch": 31.91, + "learning_rate": 4.5235213113297644e-08, + "loss": 3.7721, + "step": 2873000 + }, + { + "epoch": 31.91, + "learning_rate": 4.5221331319547786e-08, + "loss": 3.7474, + "step": 2873500 + }, + { + "epoch": 31.92, + "learning_rate": 4.520744952579792e-08, + "loss": 3.7654, + "step": 2874000 + }, + { + "epoch": 31.92, + "learning_rate": 4.519356773204806e-08, + "loss": 3.7854, + "step": 2874500 + }, + { + "epoch": 31.93, + "learning_rate": 4.51796859382982e-08, + "loss": 3.7678, + "step": 2875000 + }, + { + "epoch": 31.93, + "learning_rate": 4.516580414454834e-08, + "loss": 3.7674, + "step": 2875500 + }, + { + "epoch": 31.94, + "learning_rate": 4.515192235079848e-08, + "loss": 3.7666, + "step": 2876000 + }, + { + "epoch": 31.94, + "learning_rate": 4.513804055704862e-08, + "loss": 3.7623, + "step": 2876500 + }, + { + "epoch": 31.95, + "learning_rate": 4.5124158763298756e-08, + "loss": 3.7535, + "step": 2877000 + }, + { + "epoch": 31.96, + "learning_rate": 4.511027696954889e-08, + "loss": 3.7513, + "step": 2877500 + }, + { + "epoch": 31.96, + "learning_rate": 4.5096395175799034e-08, + "loss": 3.7576, + "step": 2878000 + }, + { + "epoch": 31.97, + "learning_rate": 4.5082513382049176e-08, + "loss": 3.7583, + "step": 2878500 + }, + { + "epoch": 31.97, + "learning_rate": 4.506863158829931e-08, + "loss": 3.7719, + "step": 2879000 + }, + { + "epoch": 31.98, + "learning_rate": 4.5054749794549454e-08, + "loss": 3.7655, + "step": 2879500 + }, + { + "epoch": 31.98, + "learning_rate": 4.504086800079959e-08, + "loss": 3.7603, + "step": 2880000 + }, + { + "epoch": 31.99, + "learning_rate": 4.5026986207049725e-08, + "loss": 3.761, + "step": 2880500 + }, + { + "epoch": 31.99, + "learning_rate": 4.501310441329987e-08, + "loss": 3.7671, + "step": 2881000 + }, + { + "epoch": 32.0, + "eval_loss": 3.8329031467437744, + "eval_runtime": 6.2965, + "eval_samples_per_second": 246.805, + "step": 2881472 + }, + { + "epoch": 32.0, + "learning_rate": 4.499922261955e-08, + "loss": 3.7718, + "step": 2881500 + }, + { + "epoch": 32.01, + "learning_rate": 4.4985340825800145e-08, + "loss": 3.7558, + "step": 2882000 + }, + { + "epoch": 32.01, + "learning_rate": 4.497145903205029e-08, + "loss": 3.7661, + "step": 2882500 + }, + { + "epoch": 32.02, + "learning_rate": 4.495757723830042e-08, + "loss": 3.7756, + "step": 2883000 + }, + { + "epoch": 32.02, + "learning_rate": 4.494369544455056e-08, + "loss": 3.7572, + "step": 2883500 + }, + { + "epoch": 32.03, + "learning_rate": 4.49298136508007e-08, + "loss": 3.7516, + "step": 2884000 + }, + { + "epoch": 32.03, + "learning_rate": 4.491593185705084e-08, + "loss": 3.7526, + "step": 2884500 + }, + { + "epoch": 32.04, + "learning_rate": 4.490205006330098e-08, + "loss": 3.7636, + "step": 2885000 + }, + { + "epoch": 32.04, + "learning_rate": 4.488816826955112e-08, + "loss": 3.7707, + "step": 2885500 + }, + { + "epoch": 32.05, + "learning_rate": 4.487428647580126e-08, + "loss": 3.7604, + "step": 2886000 + }, + { + "epoch": 32.06, + "learning_rate": 4.486040468205139e-08, + "loss": 3.7571, + "step": 2886500 + }, + { + "epoch": 32.06, + "learning_rate": 4.484652288830153e-08, + "loss": 3.7739, + "step": 2887000 + }, + { + "epoch": 32.07, + "learning_rate": 4.483264109455167e-08, + "loss": 3.7697, + "step": 2887500 + }, + { + "epoch": 32.07, + "learning_rate": 4.481875930080181e-08, + "loss": 3.7586, + "step": 2888000 + }, + { + "epoch": 32.08, + "learning_rate": 4.480487750705195e-08, + "loss": 3.7649, + "step": 2888500 + }, + { + "epoch": 32.08, + "learning_rate": 4.479099571330209e-08, + "loss": 3.7708, + "step": 2889000 + }, + { + "epoch": 32.09, + "learning_rate": 4.4777113919552226e-08, + "loss": 3.7477, + "step": 2889500 + }, + { + "epoch": 32.09, + "learning_rate": 4.476323212580236e-08, + "loss": 3.7805, + "step": 2890000 + }, + { + "epoch": 32.1, + "learning_rate": 4.4749350332052504e-08, + "loss": 3.7628, + "step": 2890500 + }, + { + "epoch": 32.11, + "learning_rate": 4.4735468538302647e-08, + "loss": 3.7618, + "step": 2891000 + }, + { + "epoch": 32.11, + "learning_rate": 4.472158674455278e-08, + "loss": 3.7862, + "step": 2891500 + }, + { + "epoch": 32.12, + "learning_rate": 4.4707704950802924e-08, + "loss": 3.7491, + "step": 2892000 + }, + { + "epoch": 32.12, + "learning_rate": 4.469382315705306e-08, + "loss": 3.755, + "step": 2892500 + }, + { + "epoch": 32.13, + "learning_rate": 4.4679941363303196e-08, + "loss": 3.757, + "step": 2893000 + }, + { + "epoch": 32.13, + "learning_rate": 4.466605956955334e-08, + "loss": 3.771, + "step": 2893500 + }, + { + "epoch": 32.14, + "learning_rate": 4.4652177775803474e-08, + "loss": 3.7449, + "step": 2894000 + }, + { + "epoch": 32.14, + "learning_rate": 4.4638295982053616e-08, + "loss": 3.7597, + "step": 2894500 + }, + { + "epoch": 32.15, + "learning_rate": 4.462441418830376e-08, + "loss": 3.7508, + "step": 2895000 + }, + { + "epoch": 32.16, + "learning_rate": 4.461053239455389e-08, + "loss": 3.7661, + "step": 2895500 + }, + { + "epoch": 32.16, + "learning_rate": 4.459665060080403e-08, + "loss": 3.758, + "step": 2896000 + }, + { + "epoch": 32.17, + "learning_rate": 4.458276880705417e-08, + "loss": 3.7647, + "step": 2896500 + }, + { + "epoch": 32.17, + "learning_rate": 4.456888701330431e-08, + "loss": 3.7464, + "step": 2897000 + }, + { + "epoch": 32.18, + "learning_rate": 4.455500521955445e-08, + "loss": 3.7596, + "step": 2897500 + }, + { + "epoch": 32.18, + "learning_rate": 4.454112342580459e-08, + "loss": 3.756, + "step": 2898000 + }, + { + "epoch": 32.19, + "learning_rate": 4.452724163205472e-08, + "loss": 3.764, + "step": 2898500 + }, + { + "epoch": 32.19, + "learning_rate": 4.451335983830486e-08, + "loss": 3.7585, + "step": 2899000 + }, + { + "epoch": 32.2, + "learning_rate": 4.4499478044555006e-08, + "loss": 3.7627, + "step": 2899500 + }, + { + "epoch": 32.21, + "learning_rate": 4.448559625080514e-08, + "loss": 3.7478, + "step": 2900000 + }, + { + "epoch": 32.21, + "learning_rate": 4.4471714457055283e-08, + "loss": 3.7645, + "step": 2900500 + }, + { + "epoch": 32.22, + "learning_rate": 4.445783266330542e-08, + "loss": 3.7541, + "step": 2901000 + }, + { + "epoch": 32.22, + "learning_rate": 4.4443950869555555e-08, + "loss": 3.7643, + "step": 2901500 + }, + { + "epoch": 32.23, + "learning_rate": 4.44300690758057e-08, + "loss": 3.7763, + "step": 2902000 + }, + { + "epoch": 32.23, + "learning_rate": 4.441618728205583e-08, + "loss": 3.7658, + "step": 2902500 + }, + { + "epoch": 32.24, + "learning_rate": 4.4402305488305975e-08, + "loss": 3.7637, + "step": 2903000 + }, + { + "epoch": 32.24, + "learning_rate": 4.438842369455612e-08, + "loss": 3.7652, + "step": 2903500 + }, + { + "epoch": 32.25, + "learning_rate": 4.437454190080625e-08, + "loss": 3.7585, + "step": 2904000 + }, + { + "epoch": 32.26, + "learning_rate": 4.436066010705639e-08, + "loss": 3.7384, + "step": 2904500 + }, + { + "epoch": 32.26, + "learning_rate": 4.434677831330653e-08, + "loss": 3.7683, + "step": 2905000 + }, + { + "epoch": 32.27, + "learning_rate": 4.4332896519556666e-08, + "loss": 3.7554, + "step": 2905500 + }, + { + "epoch": 32.27, + "learning_rate": 4.431901472580681e-08, + "loss": 3.7596, + "step": 2906000 + }, + { + "epoch": 32.28, + "learning_rate": 4.430513293205695e-08, + "loss": 3.7407, + "step": 2906500 + }, + { + "epoch": 32.28, + "learning_rate": 4.4291251138307087e-08, + "loss": 3.7728, + "step": 2907000 + }, + { + "epoch": 32.29, + "learning_rate": 4.427736934455722e-08, + "loss": 3.7572, + "step": 2907500 + }, + { + "epoch": 32.29, + "learning_rate": 4.4263487550807364e-08, + "loss": 3.737, + "step": 2908000 + }, + { + "epoch": 32.3, + "learning_rate": 4.42496057570575e-08, + "loss": 3.7615, + "step": 2908500 + }, + { + "epoch": 32.31, + "learning_rate": 4.423572396330764e-08, + "loss": 3.7604, + "step": 2909000 + }, + { + "epoch": 32.31, + "learning_rate": 4.422184216955778e-08, + "loss": 3.7491, + "step": 2909500 + }, + { + "epoch": 32.32, + "learning_rate": 4.420796037580792e-08, + "loss": 3.7687, + "step": 2910000 + }, + { + "epoch": 32.32, + "learning_rate": 4.4194078582058056e-08, + "loss": 3.7661, + "step": 2910500 + }, + { + "epoch": 32.33, + "learning_rate": 4.418019678830819e-08, + "loss": 3.7766, + "step": 2911000 + }, + { + "epoch": 32.33, + "learning_rate": 4.4166314994558334e-08, + "loss": 3.765, + "step": 2911500 + }, + { + "epoch": 32.34, + "learning_rate": 4.4152433200808476e-08, + "loss": 3.7821, + "step": 2912000 + }, + { + "epoch": 32.34, + "learning_rate": 4.413855140705861e-08, + "loss": 3.7942, + "step": 2912500 + }, + { + "epoch": 32.35, + "learning_rate": 4.4124669613308754e-08, + "loss": 3.767, + "step": 2913000 + }, + { + "epoch": 32.36, + "learning_rate": 4.411078781955889e-08, + "loss": 3.7776, + "step": 2913500 + }, + { + "epoch": 32.36, + "learning_rate": 4.4096906025809025e-08, + "loss": 3.7549, + "step": 2914000 + }, + { + "epoch": 32.37, + "learning_rate": 4.408302423205917e-08, + "loss": 3.7848, + "step": 2914500 + }, + { + "epoch": 32.37, + "learning_rate": 4.406914243830931e-08, + "loss": 3.7703, + "step": 2915000 + }, + { + "epoch": 32.38, + "learning_rate": 4.4055260644559445e-08, + "loss": 3.7769, + "step": 2915500 + }, + { + "epoch": 32.38, + "learning_rate": 4.404137885080959e-08, + "loss": 3.7403, + "step": 2916000 + }, + { + "epoch": 32.39, + "learning_rate": 4.4027497057059723e-08, + "loss": 3.7579, + "step": 2916500 + }, + { + "epoch": 32.39, + "learning_rate": 4.401361526330986e-08, + "loss": 3.7853, + "step": 2917000 + }, + { + "epoch": 32.4, + "learning_rate": 4.399973346956e-08, + "loss": 3.7661, + "step": 2917500 + }, + { + "epoch": 32.41, + "learning_rate": 4.398585167581014e-08, + "loss": 3.7605, + "step": 2918000 + }, + { + "epoch": 32.41, + "learning_rate": 4.397196988206028e-08, + "loss": 3.7604, + "step": 2918500 + }, + { + "epoch": 32.42, + "learning_rate": 4.395808808831042e-08, + "loss": 3.7866, + "step": 2919000 + }, + { + "epoch": 32.42, + "learning_rate": 4.394420629456056e-08, + "loss": 3.7694, + "step": 2919500 + }, + { + "epoch": 32.43, + "learning_rate": 4.393032450081069e-08, + "loss": 3.7735, + "step": 2920000 + }, + { + "epoch": 32.43, + "learning_rate": 4.3916442707060835e-08, + "loss": 3.7436, + "step": 2920500 + }, + { + "epoch": 32.44, + "learning_rate": 4.390256091331097e-08, + "loss": 3.7456, + "step": 2921000 + }, + { + "epoch": 32.44, + "learning_rate": 4.388867911956111e-08, + "loss": 3.7682, + "step": 2921500 + }, + { + "epoch": 32.45, + "learning_rate": 4.3874797325811255e-08, + "loss": 3.7458, + "step": 2922000 + }, + { + "epoch": 32.46, + "learning_rate": 4.386091553206139e-08, + "loss": 3.7691, + "step": 2922500 + }, + { + "epoch": 32.46, + "learning_rate": 4.3847033738311526e-08, + "loss": 3.7418, + "step": 2923000 + }, + { + "epoch": 32.47, + "learning_rate": 4.383315194456166e-08, + "loss": 3.7662, + "step": 2923500 + }, + { + "epoch": 32.47, + "learning_rate": 4.3819270150811804e-08, + "loss": 3.7551, + "step": 2924000 + }, + { + "epoch": 32.48, + "learning_rate": 4.3805388357061947e-08, + "loss": 3.7528, + "step": 2924500 + }, + { + "epoch": 32.48, + "learning_rate": 4.379150656331208e-08, + "loss": 3.7577, + "step": 2925000 + }, + { + "epoch": 32.49, + "learning_rate": 4.3777624769562225e-08, + "loss": 3.7692, + "step": 2925500 + }, + { + "epoch": 32.49, + "learning_rate": 4.376374297581236e-08, + "loss": 3.7541, + "step": 2926000 + }, + { + "epoch": 32.5, + "learning_rate": 4.3749861182062496e-08, + "loss": 3.7732, + "step": 2926500 + }, + { + "epoch": 32.51, + "learning_rate": 4.373597938831264e-08, + "loss": 3.7502, + "step": 2927000 + }, + { + "epoch": 32.51, + "learning_rate": 4.372209759456278e-08, + "loss": 3.7676, + "step": 2927500 + }, + { + "epoch": 32.52, + "learning_rate": 4.3708215800812916e-08, + "loss": 3.7692, + "step": 2928000 + }, + { + "epoch": 32.52, + "learning_rate": 4.369433400706306e-08, + "loss": 3.7638, + "step": 2928500 + }, + { + "epoch": 32.53, + "learning_rate": 4.3680452213313194e-08, + "loss": 3.7326, + "step": 2929000 + }, + { + "epoch": 32.53, + "learning_rate": 4.366657041956333e-08, + "loss": 3.7494, + "step": 2929500 + }, + { + "epoch": 32.54, + "learning_rate": 4.365268862581347e-08, + "loss": 3.7645, + "step": 2930000 + }, + { + "epoch": 32.54, + "learning_rate": 4.3638806832063614e-08, + "loss": 3.7549, + "step": 2930500 + }, + { + "epoch": 32.55, + "learning_rate": 4.362492503831375e-08, + "loss": 3.7675, + "step": 2931000 + }, + { + "epoch": 32.56, + "learning_rate": 4.3611043244563885e-08, + "loss": 3.7538, + "step": 2931500 + }, + { + "epoch": 32.56, + "learning_rate": 4.359716145081402e-08, + "loss": 3.78, + "step": 2932000 + }, + { + "epoch": 32.57, + "learning_rate": 4.3583279657064163e-08, + "loss": 3.7514, + "step": 2932500 + }, + { + "epoch": 32.57, + "learning_rate": 4.3569397863314306e-08, + "loss": 3.7748, + "step": 2933000 + }, + { + "epoch": 32.58, + "learning_rate": 4.355551606956444e-08, + "loss": 3.7552, + "step": 2933500 + }, + { + "epoch": 32.58, + "learning_rate": 4.3541634275814584e-08, + "loss": 3.7586, + "step": 2934000 + }, + { + "epoch": 32.59, + "learning_rate": 4.352775248206472e-08, + "loss": 3.7554, + "step": 2934500 + }, + { + "epoch": 32.59, + "learning_rate": 4.3513870688314855e-08, + "loss": 3.7595, + "step": 2935000 + }, + { + "epoch": 32.6, + "learning_rate": 4.3499988894565e-08, + "loss": 3.7534, + "step": 2935500 + }, + { + "epoch": 32.61, + "learning_rate": 4.348610710081514e-08, + "loss": 3.7545, + "step": 2936000 + }, + { + "epoch": 32.61, + "learning_rate": 4.3472225307065275e-08, + "loss": 3.7649, + "step": 2936500 + }, + { + "epoch": 32.62, + "learning_rate": 4.345834351331542e-08, + "loss": 3.748, + "step": 2937000 + }, + { + "epoch": 32.62, + "learning_rate": 4.344446171956555e-08, + "loss": 3.7764, + "step": 2937500 + }, + { + "epoch": 32.63, + "learning_rate": 4.343057992581569e-08, + "loss": 3.7488, + "step": 2938000 + }, + { + "epoch": 32.63, + "learning_rate": 4.341669813206583e-08, + "loss": 3.7843, + "step": 2938500 + }, + { + "epoch": 32.64, + "learning_rate": 4.3402816338315966e-08, + "loss": 3.7544, + "step": 2939000 + }, + { + "epoch": 32.64, + "learning_rate": 4.338893454456611e-08, + "loss": 3.7571, + "step": 2939500 + }, + { + "epoch": 32.65, + "learning_rate": 4.337505275081625e-08, + "loss": 3.7696, + "step": 2940000 + }, + { + "epoch": 32.66, + "learning_rate": 4.3361170957066387e-08, + "loss": 3.7596, + "step": 2940500 + }, + { + "epoch": 32.66, + "learning_rate": 4.334728916331652e-08, + "loss": 3.7791, + "step": 2941000 + }, + { + "epoch": 32.67, + "learning_rate": 4.3333407369566665e-08, + "loss": 3.7632, + "step": 2941500 + }, + { + "epoch": 32.67, + "learning_rate": 4.33195255758168e-08, + "loss": 3.7645, + "step": 2942000 + }, + { + "epoch": 32.68, + "learning_rate": 4.330564378206694e-08, + "loss": 3.769, + "step": 2942500 + }, + { + "epoch": 32.68, + "learning_rate": 4.3291761988317085e-08, + "loss": 3.763, + "step": 2943000 + }, + { + "epoch": 32.69, + "learning_rate": 4.327788019456722e-08, + "loss": 3.763, + "step": 2943500 + }, + { + "epoch": 32.69, + "learning_rate": 4.3263998400817356e-08, + "loss": 3.7635, + "step": 2944000 + }, + { + "epoch": 32.7, + "learning_rate": 4.32501166070675e-08, + "loss": 3.7683, + "step": 2944500 + }, + { + "epoch": 32.71, + "learning_rate": 4.3236234813317634e-08, + "loss": 3.7428, + "step": 2945000 + }, + { + "epoch": 32.71, + "learning_rate": 4.3222353019567776e-08, + "loss": 3.7381, + "step": 2945500 + }, + { + "epoch": 32.72, + "learning_rate": 4.320847122581791e-08, + "loss": 3.7751, + "step": 2946000 + }, + { + "epoch": 32.72, + "learning_rate": 4.3194589432068054e-08, + "loss": 3.7728, + "step": 2946500 + }, + { + "epoch": 32.73, + "learning_rate": 4.318070763831819e-08, + "loss": 3.7699, + "step": 2947000 + }, + { + "epoch": 32.73, + "learning_rate": 4.3166825844568325e-08, + "loss": 3.7628, + "step": 2947500 + }, + { + "epoch": 32.74, + "learning_rate": 4.315294405081847e-08, + "loss": 3.757, + "step": 2948000 + }, + { + "epoch": 32.74, + "learning_rate": 4.313906225706861e-08, + "loss": 3.7636, + "step": 2948500 + }, + { + "epoch": 32.75, + "learning_rate": 4.3125180463318746e-08, + "loss": 3.7597, + "step": 2949000 + }, + { + "epoch": 32.76, + "learning_rate": 4.311129866956889e-08, + "loss": 3.7664, + "step": 2949500 + }, + { + "epoch": 32.76, + "learning_rate": 4.3097416875819023e-08, + "loss": 3.7609, + "step": 2950000 + }, + { + "epoch": 32.77, + "learning_rate": 4.308353508206916e-08, + "loss": 3.7506, + "step": 2950500 + }, + { + "epoch": 32.77, + "learning_rate": 4.30696532883193e-08, + "loss": 3.7589, + "step": 2951000 + }, + { + "epoch": 32.78, + "learning_rate": 4.3055771494569444e-08, + "loss": 3.7634, + "step": 2951500 + }, + { + "epoch": 32.78, + "learning_rate": 4.304188970081958e-08, + "loss": 3.757, + "step": 2952000 + }, + { + "epoch": 32.79, + "learning_rate": 4.302800790706972e-08, + "loss": 3.7416, + "step": 2952500 + }, + { + "epoch": 32.79, + "learning_rate": 4.301412611331986e-08, + "loss": 3.7597, + "step": 2953000 + }, + { + "epoch": 32.8, + "learning_rate": 4.300024431956999e-08, + "loss": 3.7543, + "step": 2953500 + }, + { + "epoch": 32.81, + "learning_rate": 4.2986362525820135e-08, + "loss": 3.7661, + "step": 2954000 + }, + { + "epoch": 32.81, + "learning_rate": 4.297248073207027e-08, + "loss": 3.7557, + "step": 2954500 + }, + { + "epoch": 32.82, + "learning_rate": 4.295859893832041e-08, + "loss": 3.7564, + "step": 2955000 + }, + { + "epoch": 32.82, + "learning_rate": 4.2944717144570555e-08, + "loss": 3.7538, + "step": 2955500 + }, + { + "epoch": 32.83, + "learning_rate": 4.293083535082069e-08, + "loss": 3.7671, + "step": 2956000 + }, + { + "epoch": 32.83, + "learning_rate": 4.2916953557070827e-08, + "loss": 3.7622, + "step": 2956500 + }, + { + "epoch": 32.84, + "learning_rate": 4.290307176332097e-08, + "loss": 3.7508, + "step": 2957000 + }, + { + "epoch": 32.84, + "learning_rate": 4.2889189969571104e-08, + "loss": 3.7583, + "step": 2957500 + }, + { + "epoch": 32.85, + "learning_rate": 4.287530817582125e-08, + "loss": 3.7672, + "step": 2958000 + }, + { + "epoch": 32.86, + "learning_rate": 4.286142638207139e-08, + "loss": 3.7523, + "step": 2958500 + }, + { + "epoch": 32.86, + "learning_rate": 4.2847544588321525e-08, + "loss": 3.7561, + "step": 2959000 + }, + { + "epoch": 32.87, + "learning_rate": 4.283366279457166e-08, + "loss": 3.7496, + "step": 2959500 + }, + { + "epoch": 32.87, + "learning_rate": 4.28197810008218e-08, + "loss": 3.7694, + "step": 2960000 + }, + { + "epoch": 32.88, + "learning_rate": 4.280589920707194e-08, + "loss": 3.7495, + "step": 2960500 + }, + { + "epoch": 32.88, + "learning_rate": 4.279201741332208e-08, + "loss": 3.7545, + "step": 2961000 + }, + { + "epoch": 32.89, + "learning_rate": 4.2778135619572216e-08, + "loss": 3.7625, + "step": 2961500 + }, + { + "epoch": 32.89, + "learning_rate": 4.276425382582235e-08, + "loss": 3.7661, + "step": 2962000 + }, + { + "epoch": 32.9, + "learning_rate": 4.2750372032072494e-08, + "loss": 3.7644, + "step": 2962500 + }, + { + "epoch": 32.91, + "learning_rate": 4.273649023832263e-08, + "loss": 3.7606, + "step": 2963000 + }, + { + "epoch": 32.91, + "learning_rate": 4.272260844457277e-08, + "loss": 3.7532, + "step": 2963500 + }, + { + "epoch": 32.92, + "learning_rate": 4.2708726650822914e-08, + "loss": 3.7812, + "step": 2964000 + }, + { + "epoch": 32.92, + "learning_rate": 4.269484485707305e-08, + "loss": 3.7498, + "step": 2964500 + }, + { + "epoch": 32.93, + "learning_rate": 4.2680963063323186e-08, + "loss": 3.7682, + "step": 2965000 + }, + { + "epoch": 32.93, + "learning_rate": 4.266708126957333e-08, + "loss": 3.7603, + "step": 2965500 + }, + { + "epoch": 32.94, + "learning_rate": 4.2653199475823463e-08, + "loss": 3.7575, + "step": 2966000 + }, + { + "epoch": 32.94, + "learning_rate": 4.2639317682073606e-08, + "loss": 3.7574, + "step": 2966500 + }, + { + "epoch": 32.95, + "learning_rate": 4.262543588832375e-08, + "loss": 3.7667, + "step": 2967000 + }, + { + "epoch": 32.96, + "learning_rate": 4.2611554094573884e-08, + "loss": 3.767, + "step": 2967500 + }, + { + "epoch": 32.96, + "learning_rate": 4.259767230082402e-08, + "loss": 3.7593, + "step": 2968000 + }, + { + "epoch": 32.97, + "learning_rate": 4.2583790507074155e-08, + "loss": 3.7385, + "step": 2968500 + }, + { + "epoch": 32.97, + "learning_rate": 4.25699087133243e-08, + "loss": 3.7377, + "step": 2969000 + }, + { + "epoch": 32.98, + "learning_rate": 4.255602691957444e-08, + "loss": 3.7575, + "step": 2969500 + }, + { + "epoch": 32.98, + "learning_rate": 4.2542145125824575e-08, + "loss": 3.7583, + "step": 2970000 + }, + { + "epoch": 32.99, + "learning_rate": 4.252826333207472e-08, + "loss": 3.7637, + "step": 2970500 + }, + { + "epoch": 32.99, + "learning_rate": 4.251438153832485e-08, + "loss": 3.7625, + "step": 2971000 + }, + { + "epoch": 33.0, + "learning_rate": 4.250049974457499e-08, + "loss": 3.7628, + "step": 2971500 + }, + { + "epoch": 33.0, + "eval_loss": 3.8318238258361816, + "eval_runtime": 6.2995, + "eval_samples_per_second": 246.685, + "step": 2971518 + }, + { + "epoch": 33.01, + "learning_rate": 4.248661795082513e-08, + "loss": 3.7672, + "step": 2972000 + }, + { + "epoch": 33.01, + "learning_rate": 4.247273615707527e-08, + "loss": 3.7588, + "step": 2972500 + }, + { + "epoch": 33.02, + "learning_rate": 4.245885436332541e-08, + "loss": 3.7506, + "step": 2973000 + }, + { + "epoch": 33.02, + "learning_rate": 4.244497256957555e-08, + "loss": 3.7592, + "step": 2973500 + }, + { + "epoch": 33.03, + "learning_rate": 4.243109077582569e-08, + "loss": 3.7514, + "step": 2974000 + }, + { + "epoch": 33.03, + "learning_rate": 4.241720898207582e-08, + "loss": 3.7465, + "step": 2974500 + }, + { + "epoch": 33.04, + "learning_rate": 4.2403327188325965e-08, + "loss": 3.7638, + "step": 2975000 + }, + { + "epoch": 33.04, + "learning_rate": 4.23894453945761e-08, + "loss": 3.7594, + "step": 2975500 + }, + { + "epoch": 33.05, + "learning_rate": 4.237556360082624e-08, + "loss": 3.751, + "step": 2976000 + }, + { + "epoch": 33.06, + "learning_rate": 4.2361681807076385e-08, + "loss": 3.7603, + "step": 2976500 + }, + { + "epoch": 33.06, + "learning_rate": 4.234780001332652e-08, + "loss": 3.7406, + "step": 2977000 + }, + { + "epoch": 33.07, + "learning_rate": 4.2333918219576656e-08, + "loss": 3.7563, + "step": 2977500 + }, + { + "epoch": 33.07, + "learning_rate": 4.23200364258268e-08, + "loss": 3.7728, + "step": 2978000 + }, + { + "epoch": 33.08, + "learning_rate": 4.2306154632076934e-08, + "loss": 3.7653, + "step": 2978500 + }, + { + "epoch": 33.08, + "learning_rate": 4.2292272838327076e-08, + "loss": 3.7594, + "step": 2979000 + }, + { + "epoch": 33.09, + "learning_rate": 4.227839104457722e-08, + "loss": 3.7651, + "step": 2979500 + }, + { + "epoch": 33.09, + "learning_rate": 4.2264509250827354e-08, + "loss": 3.7488, + "step": 2980000 + }, + { + "epoch": 33.1, + "learning_rate": 4.225062745707749e-08, + "loss": 3.7471, + "step": 2980500 + }, + { + "epoch": 33.11, + "learning_rate": 4.223674566332763e-08, + "loss": 3.7588, + "step": 2981000 + }, + { + "epoch": 33.11, + "learning_rate": 4.222286386957777e-08, + "loss": 3.7564, + "step": 2981500 + }, + { + "epoch": 33.12, + "learning_rate": 4.220898207582791e-08, + "loss": 3.7624, + "step": 2982000 + }, + { + "epoch": 33.12, + "learning_rate": 4.2195100282078046e-08, + "loss": 3.7513, + "step": 2982500 + }, + { + "epoch": 33.13, + "learning_rate": 4.218121848832819e-08, + "loss": 3.7534, + "step": 2983000 + }, + { + "epoch": 33.13, + "learning_rate": 4.2167336694578324e-08, + "loss": 3.7573, + "step": 2983500 + }, + { + "epoch": 33.14, + "learning_rate": 4.215345490082846e-08, + "loss": 3.746, + "step": 2984000 + }, + { + "epoch": 33.14, + "learning_rate": 4.21395731070786e-08, + "loss": 3.7505, + "step": 2984500 + }, + { + "epoch": 33.15, + "learning_rate": 4.2125691313328744e-08, + "loss": 3.7718, + "step": 2985000 + }, + { + "epoch": 33.16, + "learning_rate": 4.211180951957888e-08, + "loss": 3.7519, + "step": 2985500 + }, + { + "epoch": 33.16, + "learning_rate": 4.209792772582902e-08, + "loss": 3.7223, + "step": 2986000 + }, + { + "epoch": 33.17, + "learning_rate": 4.208404593207916e-08, + "loss": 3.762, + "step": 2986500 + }, + { + "epoch": 33.17, + "learning_rate": 4.207016413832929e-08, + "loss": 3.7671, + "step": 2987000 + }, + { + "epoch": 33.18, + "learning_rate": 4.2056282344579435e-08, + "loss": 3.7652, + "step": 2987500 + }, + { + "epoch": 33.18, + "learning_rate": 4.204240055082958e-08, + "loss": 3.7673, + "step": 2988000 + }, + { + "epoch": 33.19, + "learning_rate": 4.202851875707971e-08, + "loss": 3.7631, + "step": 2988500 + }, + { + "epoch": 33.19, + "learning_rate": 4.2014636963329855e-08, + "loss": 3.7392, + "step": 2989000 + }, + { + "epoch": 33.2, + "learning_rate": 4.200075516957999e-08, + "loss": 3.7444, + "step": 2989500 + }, + { + "epoch": 33.21, + "learning_rate": 4.1986873375830127e-08, + "loss": 3.7564, + "step": 2990000 + }, + { + "epoch": 33.21, + "learning_rate": 4.197299158208027e-08, + "loss": 3.7541, + "step": 2990500 + }, + { + "epoch": 33.22, + "learning_rate": 4.1959109788330405e-08, + "loss": 3.7734, + "step": 2991000 + }, + { + "epoch": 33.22, + "learning_rate": 4.194522799458055e-08, + "loss": 3.7489, + "step": 2991500 + }, + { + "epoch": 33.23, + "learning_rate": 4.193134620083069e-08, + "loss": 3.7701, + "step": 2992000 + }, + { + "epoch": 33.23, + "learning_rate": 4.1917464407080825e-08, + "loss": 3.7663, + "step": 2992500 + }, + { + "epoch": 33.24, + "learning_rate": 4.190358261333096e-08, + "loss": 3.7604, + "step": 2993000 + }, + { + "epoch": 33.24, + "learning_rate": 4.18897008195811e-08, + "loss": 3.7661, + "step": 2993500 + }, + { + "epoch": 33.25, + "learning_rate": 4.187581902583124e-08, + "loss": 3.7687, + "step": 2994000 + }, + { + "epoch": 33.26, + "learning_rate": 4.186193723208138e-08, + "loss": 3.7297, + "step": 2994500 + }, + { + "epoch": 33.26, + "learning_rate": 4.184805543833152e-08, + "loss": 3.7729, + "step": 2995000 + }, + { + "epoch": 33.27, + "learning_rate": 4.183417364458165e-08, + "loss": 3.7532, + "step": 2995500 + }, + { + "epoch": 33.27, + "learning_rate": 4.1820291850831794e-08, + "loss": 3.7651, + "step": 2996000 + }, + { + "epoch": 33.28, + "learning_rate": 4.1806410057081936e-08, + "loss": 3.7615, + "step": 2996500 + }, + { + "epoch": 33.28, + "learning_rate": 4.179252826333207e-08, + "loss": 3.7717, + "step": 2997000 + }, + { + "epoch": 33.29, + "learning_rate": 4.1778646469582214e-08, + "loss": 3.7716, + "step": 2997500 + }, + { + "epoch": 33.29, + "learning_rate": 4.176476467583235e-08, + "loss": 3.7686, + "step": 2998000 + }, + { + "epoch": 33.3, + "learning_rate": 4.1750882882082486e-08, + "loss": 3.7659, + "step": 2998500 + }, + { + "epoch": 33.31, + "learning_rate": 4.173700108833263e-08, + "loss": 3.765, + "step": 2999000 + }, + { + "epoch": 33.31, + "learning_rate": 4.1723119294582763e-08, + "loss": 3.759, + "step": 2999500 + }, + { + "epoch": 33.32, + "learning_rate": 4.1709237500832906e-08, + "loss": 3.7735, + "step": 3000000 + }, + { + "epoch": 33.32, + "learning_rate": 4.169535570708305e-08, + "loss": 3.7671, + "step": 3000500 + }, + { + "epoch": 33.33, + "learning_rate": 4.1681473913333184e-08, + "loss": 3.7485, + "step": 3001000 + }, + { + "epoch": 33.33, + "learning_rate": 4.166759211958332e-08, + "loss": 3.7685, + "step": 3001500 + }, + { + "epoch": 33.34, + "learning_rate": 4.165371032583346e-08, + "loss": 3.7545, + "step": 3002000 + }, + { + "epoch": 33.34, + "learning_rate": 4.16398285320836e-08, + "loss": 3.7649, + "step": 3002500 + }, + { + "epoch": 33.35, + "learning_rate": 4.162594673833374e-08, + "loss": 3.7882, + "step": 3003000 + }, + { + "epoch": 33.36, + "learning_rate": 4.161206494458388e-08, + "loss": 3.7508, + "step": 3003500 + }, + { + "epoch": 33.36, + "learning_rate": 4.159818315083402e-08, + "loss": 3.7391, + "step": 3004000 + }, + { + "epoch": 33.37, + "learning_rate": 4.158430135708415e-08, + "loss": 3.7653, + "step": 3004500 + }, + { + "epoch": 33.37, + "learning_rate": 4.157041956333429e-08, + "loss": 3.769, + "step": 3005000 + }, + { + "epoch": 33.38, + "learning_rate": 4.155653776958443e-08, + "loss": 3.7645, + "step": 3005500 + }, + { + "epoch": 33.38, + "learning_rate": 4.154265597583457e-08, + "loss": 3.7727, + "step": 3006000 + }, + { + "epoch": 33.39, + "learning_rate": 4.152877418208471e-08, + "loss": 3.7668, + "step": 3006500 + }, + { + "epoch": 33.39, + "learning_rate": 4.151489238833485e-08, + "loss": 3.747, + "step": 3007000 + }, + { + "epoch": 33.4, + "learning_rate": 4.150101059458499e-08, + "loss": 3.7493, + "step": 3007500 + }, + { + "epoch": 33.41, + "learning_rate": 4.148712880083512e-08, + "loss": 3.7632, + "step": 3008000 + }, + { + "epoch": 33.41, + "learning_rate": 4.1473247007085265e-08, + "loss": 3.7494, + "step": 3008500 + }, + { + "epoch": 33.42, + "learning_rate": 4.145936521333541e-08, + "loss": 3.7536, + "step": 3009000 + }, + { + "epoch": 33.42, + "learning_rate": 4.144548341958554e-08, + "loss": 3.7584, + "step": 3009500 + }, + { + "epoch": 33.43, + "learning_rate": 4.1431601625835685e-08, + "loss": 3.7868, + "step": 3010000 + }, + { + "epoch": 33.43, + "learning_rate": 4.141771983208582e-08, + "loss": 3.7545, + "step": 3010500 + }, + { + "epoch": 33.44, + "learning_rate": 4.1403838038335956e-08, + "loss": 3.7691, + "step": 3011000 + }, + { + "epoch": 33.44, + "learning_rate": 4.13899562445861e-08, + "loss": 3.7371, + "step": 3011500 + }, + { + "epoch": 33.45, + "learning_rate": 4.1376074450836234e-08, + "loss": 3.7668, + "step": 3012000 + }, + { + "epoch": 33.46, + "learning_rate": 4.1362192657086376e-08, + "loss": 3.7444, + "step": 3012500 + }, + { + "epoch": 33.46, + "learning_rate": 4.134831086333652e-08, + "loss": 3.747, + "step": 3013000 + }, + { + "epoch": 33.47, + "learning_rate": 4.1334429069586654e-08, + "loss": 3.7682, + "step": 3013500 + }, + { + "epoch": 33.47, + "learning_rate": 4.132054727583679e-08, + "loss": 3.7794, + "step": 3014000 + }, + { + "epoch": 33.48, + "learning_rate": 4.130666548208693e-08, + "loss": 3.7429, + "step": 3014500 + }, + { + "epoch": 33.48, + "learning_rate": 4.129278368833707e-08, + "loss": 3.7653, + "step": 3015000 + }, + { + "epoch": 33.49, + "learning_rate": 4.127890189458721e-08, + "loss": 3.7317, + "step": 3015500 + }, + { + "epoch": 33.49, + "learning_rate": 4.126502010083735e-08, + "loss": 3.7666, + "step": 3016000 + }, + { + "epoch": 33.5, + "learning_rate": 4.125113830708749e-08, + "loss": 3.7334, + "step": 3016500 + }, + { + "epoch": 33.51, + "learning_rate": 4.1237256513337624e-08, + "loss": 3.7695, + "step": 3017000 + }, + { + "epoch": 33.51, + "learning_rate": 4.1223374719587766e-08, + "loss": 3.7699, + "step": 3017500 + }, + { + "epoch": 33.52, + "learning_rate": 4.12094929258379e-08, + "loss": 3.7669, + "step": 3018000 + }, + { + "epoch": 33.52, + "learning_rate": 4.1195611132088044e-08, + "loss": 3.7645, + "step": 3018500 + }, + { + "epoch": 33.53, + "learning_rate": 4.118172933833818e-08, + "loss": 3.7424, + "step": 3019000 + }, + { + "epoch": 33.53, + "learning_rate": 4.116784754458832e-08, + "loss": 3.7601, + "step": 3019500 + }, + { + "epoch": 33.54, + "learning_rate": 4.115396575083846e-08, + "loss": 3.7823, + "step": 3020000 + }, + { + "epoch": 33.54, + "learning_rate": 4.114008395708859e-08, + "loss": 3.7645, + "step": 3020500 + }, + { + "epoch": 33.55, + "learning_rate": 4.1126202163338735e-08, + "loss": 3.7502, + "step": 3021000 + }, + { + "epoch": 33.56, + "learning_rate": 4.111232036958888e-08, + "loss": 3.7572, + "step": 3021500 + }, + { + "epoch": 33.56, + "learning_rate": 4.109843857583901e-08, + "loss": 3.7665, + "step": 3022000 + }, + { + "epoch": 33.57, + "learning_rate": 4.1084556782089155e-08, + "loss": 3.7527, + "step": 3022500 + }, + { + "epoch": 33.57, + "learning_rate": 4.107067498833929e-08, + "loss": 3.758, + "step": 3023000 + }, + { + "epoch": 33.58, + "learning_rate": 4.105679319458943e-08, + "loss": 3.7579, + "step": 3023500 + }, + { + "epoch": 33.58, + "learning_rate": 4.104291140083957e-08, + "loss": 3.7492, + "step": 3024000 + }, + { + "epoch": 33.59, + "learning_rate": 4.102902960708971e-08, + "loss": 3.7726, + "step": 3024500 + }, + { + "epoch": 33.59, + "learning_rate": 4.101514781333985e-08, + "loss": 3.7638, + "step": 3025000 + }, + { + "epoch": 33.6, + "learning_rate": 4.100126601958999e-08, + "loss": 3.7738, + "step": 3025500 + }, + { + "epoch": 33.61, + "learning_rate": 4.0987384225840125e-08, + "loss": 3.7373, + "step": 3026000 + }, + { + "epoch": 33.61, + "learning_rate": 4.097350243209026e-08, + "loss": 3.7331, + "step": 3026500 + }, + { + "epoch": 33.62, + "learning_rate": 4.09596206383404e-08, + "loss": 3.7807, + "step": 3027000 + }, + { + "epoch": 33.62, + "learning_rate": 4.094573884459054e-08, + "loss": 3.7439, + "step": 3027500 + }, + { + "epoch": 33.63, + "learning_rate": 4.093185705084068e-08, + "loss": 3.7509, + "step": 3028000 + }, + { + "epoch": 33.63, + "learning_rate": 4.091797525709082e-08, + "loss": 3.7572, + "step": 3028500 + }, + { + "epoch": 33.64, + "learning_rate": 4.090409346334095e-08, + "loss": 3.7641, + "step": 3029000 + }, + { + "epoch": 33.64, + "learning_rate": 4.0890211669591094e-08, + "loss": 3.7662, + "step": 3029500 + }, + { + "epoch": 33.65, + "learning_rate": 4.0876329875841236e-08, + "loss": 3.7678, + "step": 3030000 + }, + { + "epoch": 33.66, + "learning_rate": 4.086244808209137e-08, + "loss": 3.7716, + "step": 3030500 + }, + { + "epoch": 33.66, + "learning_rate": 4.0848566288341514e-08, + "loss": 3.7802, + "step": 3031000 + }, + { + "epoch": 33.67, + "learning_rate": 4.0834684494591657e-08, + "loss": 3.7705, + "step": 3031500 + }, + { + "epoch": 33.67, + "learning_rate": 4.0820802700841786e-08, + "loss": 3.7669, + "step": 3032000 + }, + { + "epoch": 33.68, + "learning_rate": 4.080692090709193e-08, + "loss": 3.7599, + "step": 3032500 + }, + { + "epoch": 33.68, + "learning_rate": 4.079303911334207e-08, + "loss": 3.7531, + "step": 3033000 + }, + { + "epoch": 33.69, + "learning_rate": 4.0779157319592206e-08, + "loss": 3.767, + "step": 3033500 + }, + { + "epoch": 33.69, + "learning_rate": 4.076527552584235e-08, + "loss": 3.7454, + "step": 3034000 + }, + { + "epoch": 33.7, + "learning_rate": 4.0751393732092484e-08, + "loss": 3.7582, + "step": 3034500 + }, + { + "epoch": 33.7, + "learning_rate": 4.073751193834262e-08, + "loss": 3.7813, + "step": 3035000 + }, + { + "epoch": 33.71, + "learning_rate": 4.072363014459276e-08, + "loss": 3.778, + "step": 3035500 + }, + { + "epoch": 33.72, + "learning_rate": 4.07097483508429e-08, + "loss": 3.7557, + "step": 3036000 + }, + { + "epoch": 33.72, + "learning_rate": 4.069586655709304e-08, + "loss": 3.7444, + "step": 3036500 + }, + { + "epoch": 33.73, + "learning_rate": 4.068198476334318e-08, + "loss": 3.7545, + "step": 3037000 + }, + { + "epoch": 33.73, + "learning_rate": 4.066810296959332e-08, + "loss": 3.7755, + "step": 3037500 + }, + { + "epoch": 33.74, + "learning_rate": 4.065422117584345e-08, + "loss": 3.7734, + "step": 3038000 + }, + { + "epoch": 33.74, + "learning_rate": 4.0640339382093595e-08, + "loss": 3.7723, + "step": 3038500 + }, + { + "epoch": 33.75, + "learning_rate": 4.062645758834373e-08, + "loss": 3.7541, + "step": 3039000 + }, + { + "epoch": 33.75, + "learning_rate": 4.061257579459387e-08, + "loss": 3.7627, + "step": 3039500 + }, + { + "epoch": 33.76, + "learning_rate": 4.0598694000844016e-08, + "loss": 3.7728, + "step": 3040000 + }, + { + "epoch": 33.77, + "learning_rate": 4.058481220709415e-08, + "loss": 3.7335, + "step": 3040500 + }, + { + "epoch": 33.77, + "learning_rate": 4.057093041334429e-08, + "loss": 3.7653, + "step": 3041000 + }, + { + "epoch": 33.78, + "learning_rate": 4.055704861959442e-08, + "loss": 3.767, + "step": 3041500 + }, + { + "epoch": 33.78, + "learning_rate": 4.0543166825844565e-08, + "loss": 3.782, + "step": 3042000 + }, + { + "epoch": 33.79, + "learning_rate": 4.052928503209471e-08, + "loss": 3.7644, + "step": 3042500 + }, + { + "epoch": 33.79, + "learning_rate": 4.051540323834484e-08, + "loss": 3.7563, + "step": 3043000 + }, + { + "epoch": 33.8, + "learning_rate": 4.0501521444594985e-08, + "loss": 3.7428, + "step": 3043500 + }, + { + "epoch": 33.8, + "learning_rate": 4.048763965084512e-08, + "loss": 3.7448, + "step": 3044000 + }, + { + "epoch": 33.81, + "learning_rate": 4.0473757857095256e-08, + "loss": 3.758, + "step": 3044500 + }, + { + "epoch": 33.82, + "learning_rate": 4.04598760633454e-08, + "loss": 3.7472, + "step": 3045000 + }, + { + "epoch": 33.82, + "learning_rate": 4.044599426959554e-08, + "loss": 3.751, + "step": 3045500 + }, + { + "epoch": 33.83, + "learning_rate": 4.0432112475845676e-08, + "loss": 3.755, + "step": 3046000 + }, + { + "epoch": 33.83, + "learning_rate": 4.041823068209582e-08, + "loss": 3.7539, + "step": 3046500 + }, + { + "epoch": 33.84, + "learning_rate": 4.0404348888345954e-08, + "loss": 3.7658, + "step": 3047000 + }, + { + "epoch": 33.84, + "learning_rate": 4.039046709459609e-08, + "loss": 3.7604, + "step": 3047500 + }, + { + "epoch": 33.85, + "learning_rate": 4.037658530084623e-08, + "loss": 3.7448, + "step": 3048000 + }, + { + "epoch": 33.85, + "learning_rate": 4.036270350709637e-08, + "loss": 3.7496, + "step": 3048500 + }, + { + "epoch": 33.86, + "learning_rate": 4.034882171334651e-08, + "loss": 3.7755, + "step": 3049000 + }, + { + "epoch": 33.87, + "learning_rate": 4.033493991959665e-08, + "loss": 3.7601, + "step": 3049500 + }, + { + "epoch": 33.87, + "learning_rate": 4.032105812584679e-08, + "loss": 3.7645, + "step": 3050000 + }, + { + "epoch": 33.88, + "learning_rate": 4.0307176332096924e-08, + "loss": 3.7533, + "step": 3050500 + }, + { + "epoch": 33.88, + "learning_rate": 4.0293294538347066e-08, + "loss": 3.7591, + "step": 3051000 + }, + { + "epoch": 33.89, + "learning_rate": 4.02794127445972e-08, + "loss": 3.7464, + "step": 3051500 + }, + { + "epoch": 33.89, + "learning_rate": 4.0265530950847344e-08, + "loss": 3.7609, + "step": 3052000 + }, + { + "epoch": 33.9, + "learning_rate": 4.0251649157097486e-08, + "loss": 3.7481, + "step": 3052500 + }, + { + "epoch": 33.9, + "learning_rate": 4.023776736334762e-08, + "loss": 3.7715, + "step": 3053000 + }, + { + "epoch": 33.91, + "learning_rate": 4.022388556959776e-08, + "loss": 3.7559, + "step": 3053500 + }, + { + "epoch": 33.92, + "learning_rate": 4.02100037758479e-08, + "loss": 3.7524, + "step": 3054000 + }, + { + "epoch": 33.92, + "learning_rate": 4.0196121982098035e-08, + "loss": 3.7324, + "step": 3054500 + }, + { + "epoch": 33.93, + "learning_rate": 4.018224018834818e-08, + "loss": 3.7638, + "step": 3055000 + }, + { + "epoch": 33.93, + "learning_rate": 4.016835839459832e-08, + "loss": 3.7416, + "step": 3055500 + }, + { + "epoch": 33.94, + "learning_rate": 4.0154476600848456e-08, + "loss": 3.7695, + "step": 3056000 + }, + { + "epoch": 33.94, + "learning_rate": 4.014059480709859e-08, + "loss": 3.7618, + "step": 3056500 + }, + { + "epoch": 33.95, + "learning_rate": 4.012671301334873e-08, + "loss": 3.7486, + "step": 3057000 + }, + { + "epoch": 33.95, + "learning_rate": 4.011283121959887e-08, + "loss": 3.7653, + "step": 3057500 + }, + { + "epoch": 33.96, + "learning_rate": 4.009894942584901e-08, + "loss": 3.7647, + "step": 3058000 + }, + { + "epoch": 33.97, + "learning_rate": 4.008506763209915e-08, + "loss": 3.7524, + "step": 3058500 + }, + { + "epoch": 33.97, + "learning_rate": 4.007118583834929e-08, + "loss": 3.7609, + "step": 3059000 + }, + { + "epoch": 33.98, + "learning_rate": 4.0057304044599425e-08, + "loss": 3.7714, + "step": 3059500 + }, + { + "epoch": 33.98, + "learning_rate": 4.004342225084956e-08, + "loss": 3.7555, + "step": 3060000 + }, + { + "epoch": 33.99, + "learning_rate": 4.00295404570997e-08, + "loss": 3.792, + "step": 3060500 + }, + { + "epoch": 33.99, + "learning_rate": 4.0015658663349845e-08, + "loss": 3.762, + "step": 3061000 + }, + { + "epoch": 34.0, + "learning_rate": 4.000177686959998e-08, + "loss": 3.7541, + "step": 3061500 + }, + { + "epoch": 34.0, + "eval_loss": 3.8307807445526123, + "eval_runtime": 6.3013, + "eval_samples_per_second": 246.615, + "step": 3061564 + }, + { + "epoch": 34.0, + "learning_rate": 3.998789507585012e-08, + "loss": 3.7586, + "step": 3062000 + }, + { + "epoch": 34.01, + "learning_rate": 3.997401328210026e-08, + "loss": 3.7673, + "step": 3062500 + }, + { + "epoch": 34.02, + "learning_rate": 3.9960131488350394e-08, + "loss": 3.7677, + "step": 3063000 + }, + { + "epoch": 34.02, + "learning_rate": 3.9946249694600537e-08, + "loss": 3.7803, + "step": 3063500 + }, + { + "epoch": 34.03, + "learning_rate": 3.993236790085067e-08, + "loss": 3.7327, + "step": 3064000 + }, + { + "epoch": 34.03, + "learning_rate": 3.9918486107100814e-08, + "loss": 3.7614, + "step": 3064500 + }, + { + "epoch": 34.04, + "learning_rate": 3.990460431335096e-08, + "loss": 3.7546, + "step": 3065000 + }, + { + "epoch": 34.04, + "learning_rate": 3.9890722519601086e-08, + "loss": 3.7374, + "step": 3065500 + }, + { + "epoch": 34.05, + "learning_rate": 3.987684072585123e-08, + "loss": 3.7578, + "step": 3066000 + }, + { + "epoch": 34.05, + "learning_rate": 3.986295893210137e-08, + "loss": 3.7645, + "step": 3066500 + }, + { + "epoch": 34.06, + "learning_rate": 3.9849077138351506e-08, + "loss": 3.7533, + "step": 3067000 + }, + { + "epoch": 34.07, + "learning_rate": 3.983519534460165e-08, + "loss": 3.7707, + "step": 3067500 + }, + { + "epoch": 34.07, + "learning_rate": 3.982131355085179e-08, + "loss": 3.77, + "step": 3068000 + }, + { + "epoch": 34.08, + "learning_rate": 3.980743175710192e-08, + "loss": 3.7595, + "step": 3068500 + }, + { + "epoch": 34.08, + "learning_rate": 3.979354996335206e-08, + "loss": 3.7671, + "step": 3069000 + }, + { + "epoch": 34.09, + "learning_rate": 3.9779668169602204e-08, + "loss": 3.7681, + "step": 3069500 + }, + { + "epoch": 34.09, + "learning_rate": 3.976578637585234e-08, + "loss": 3.7587, + "step": 3070000 + }, + { + "epoch": 34.1, + "learning_rate": 3.975190458210248e-08, + "loss": 3.7568, + "step": 3070500 + }, + { + "epoch": 34.1, + "learning_rate": 3.973802278835262e-08, + "loss": 3.7506, + "step": 3071000 + }, + { + "epoch": 34.11, + "learning_rate": 3.972414099460275e-08, + "loss": 3.7685, + "step": 3071500 + }, + { + "epoch": 34.12, + "learning_rate": 3.9710259200852895e-08, + "loss": 3.7535, + "step": 3072000 + }, + { + "epoch": 34.12, + "learning_rate": 3.969637740710303e-08, + "loss": 3.7549, + "step": 3072500 + }, + { + "epoch": 34.13, + "learning_rate": 3.9682495613353173e-08, + "loss": 3.738, + "step": 3073000 + }, + { + "epoch": 34.13, + "learning_rate": 3.9668613819603316e-08, + "loss": 3.7708, + "step": 3073500 + }, + { + "epoch": 34.14, + "learning_rate": 3.965473202585345e-08, + "loss": 3.76, + "step": 3074000 + }, + { + "epoch": 34.14, + "learning_rate": 3.964085023210359e-08, + "loss": 3.7726, + "step": 3074500 + }, + { + "epoch": 34.15, + "learning_rate": 3.962696843835373e-08, + "loss": 3.7713, + "step": 3075000 + }, + { + "epoch": 34.15, + "learning_rate": 3.9613086644603865e-08, + "loss": 3.7556, + "step": 3075500 + }, + { + "epoch": 34.16, + "learning_rate": 3.959920485085401e-08, + "loss": 3.7419, + "step": 3076000 + }, + { + "epoch": 34.17, + "learning_rate": 3.958532305710415e-08, + "loss": 3.7425, + "step": 3076500 + }, + { + "epoch": 34.17, + "learning_rate": 3.9571441263354285e-08, + "loss": 3.7538, + "step": 3077000 + }, + { + "epoch": 34.18, + "learning_rate": 3.955755946960442e-08, + "loss": 3.7672, + "step": 3077500 + }, + { + "epoch": 34.18, + "learning_rate": 3.9543677675854556e-08, + "loss": 3.7443, + "step": 3078000 + }, + { + "epoch": 34.19, + "learning_rate": 3.95297958821047e-08, + "loss": 3.7455, + "step": 3078500 + }, + { + "epoch": 34.19, + "learning_rate": 3.951591408835484e-08, + "loss": 3.7496, + "step": 3079000 + }, + { + "epoch": 34.2, + "learning_rate": 3.9502032294604976e-08, + "loss": 3.7551, + "step": 3079500 + }, + { + "epoch": 34.2, + "learning_rate": 3.948815050085512e-08, + "loss": 3.7451, + "step": 3080000 + }, + { + "epoch": 34.21, + "learning_rate": 3.9474268707105254e-08, + "loss": 3.746, + "step": 3080500 + }, + { + "epoch": 34.22, + "learning_rate": 3.946038691335539e-08, + "loss": 3.7701, + "step": 3081000 + }, + { + "epoch": 34.22, + "learning_rate": 3.944650511960553e-08, + "loss": 3.7606, + "step": 3081500 + }, + { + "epoch": 34.23, + "learning_rate": 3.9432623325855675e-08, + "loss": 3.7756, + "step": 3082000 + }, + { + "epoch": 34.23, + "learning_rate": 3.941874153210581e-08, + "loss": 3.7615, + "step": 3082500 + }, + { + "epoch": 34.24, + "learning_rate": 3.940485973835595e-08, + "loss": 3.7339, + "step": 3083000 + }, + { + "epoch": 34.24, + "learning_rate": 3.939097794460609e-08, + "loss": 3.7747, + "step": 3083500 + }, + { + "epoch": 34.25, + "learning_rate": 3.9377096150856224e-08, + "loss": 3.7732, + "step": 3084000 + }, + { + "epoch": 34.25, + "learning_rate": 3.9363214357106366e-08, + "loss": 3.7539, + "step": 3084500 + }, + { + "epoch": 34.26, + "learning_rate": 3.934933256335651e-08, + "loss": 3.7646, + "step": 3085000 + }, + { + "epoch": 34.27, + "learning_rate": 3.9335450769606644e-08, + "loss": 3.7511, + "step": 3085500 + }, + { + "epoch": 34.27, + "learning_rate": 3.9321568975856786e-08, + "loss": 3.751, + "step": 3086000 + }, + { + "epoch": 34.28, + "learning_rate": 3.930768718210692e-08, + "loss": 3.7548, + "step": 3086500 + }, + { + "epoch": 34.28, + "learning_rate": 3.929380538835706e-08, + "loss": 3.7585, + "step": 3087000 + }, + { + "epoch": 34.29, + "learning_rate": 3.92799235946072e-08, + "loss": 3.7645, + "step": 3087500 + }, + { + "epoch": 34.29, + "learning_rate": 3.9266041800857335e-08, + "loss": 3.7591, + "step": 3088000 + }, + { + "epoch": 34.3, + "learning_rate": 3.925216000710748e-08, + "loss": 3.7538, + "step": 3088500 + }, + { + "epoch": 34.3, + "learning_rate": 3.923827821335762e-08, + "loss": 3.7505, + "step": 3089000 + }, + { + "epoch": 34.31, + "learning_rate": 3.9224396419607756e-08, + "loss": 3.7544, + "step": 3089500 + }, + { + "epoch": 34.32, + "learning_rate": 3.921051462585789e-08, + "loss": 3.765, + "step": 3090000 + }, + { + "epoch": 34.32, + "learning_rate": 3.9196632832108034e-08, + "loss": 3.7463, + "step": 3090500 + }, + { + "epoch": 34.33, + "learning_rate": 3.918275103835817e-08, + "loss": 3.7681, + "step": 3091000 + }, + { + "epoch": 34.33, + "learning_rate": 3.916886924460831e-08, + "loss": 3.7592, + "step": 3091500 + }, + { + "epoch": 34.34, + "learning_rate": 3.9154987450858454e-08, + "loss": 3.7626, + "step": 3092000 + }, + { + "epoch": 34.34, + "learning_rate": 3.914110565710859e-08, + "loss": 3.7506, + "step": 3092500 + }, + { + "epoch": 34.35, + "learning_rate": 3.9127223863358725e-08, + "loss": 3.7755, + "step": 3093000 + }, + { + "epoch": 34.35, + "learning_rate": 3.911334206960886e-08, + "loss": 3.7635, + "step": 3093500 + }, + { + "epoch": 34.36, + "learning_rate": 3.9099460275859e-08, + "loss": 3.7514, + "step": 3094000 + }, + { + "epoch": 34.37, + "learning_rate": 3.9085578482109145e-08, + "loss": 3.7677, + "step": 3094500 + }, + { + "epoch": 34.37, + "learning_rate": 3.907169668835928e-08, + "loss": 3.7773, + "step": 3095000 + }, + { + "epoch": 34.38, + "learning_rate": 3.905781489460942e-08, + "loss": 3.7555, + "step": 3095500 + }, + { + "epoch": 34.38, + "learning_rate": 3.904393310085956e-08, + "loss": 3.735, + "step": 3096000 + }, + { + "epoch": 34.39, + "learning_rate": 3.9030051307109694e-08, + "loss": 3.7562, + "step": 3096500 + }, + { + "epoch": 34.39, + "learning_rate": 3.9016169513359837e-08, + "loss": 3.7827, + "step": 3097000 + }, + { + "epoch": 34.4, + "learning_rate": 3.900228771960998e-08, + "loss": 3.7614, + "step": 3097500 + }, + { + "epoch": 34.4, + "learning_rate": 3.8988405925860115e-08, + "loss": 3.759, + "step": 3098000 + }, + { + "epoch": 34.41, + "learning_rate": 3.897452413211026e-08, + "loss": 3.7534, + "step": 3098500 + }, + { + "epoch": 34.42, + "learning_rate": 3.896064233836039e-08, + "loss": 3.7772, + "step": 3099000 + }, + { + "epoch": 34.42, + "learning_rate": 3.894676054461053e-08, + "loss": 3.7426, + "step": 3099500 + }, + { + "epoch": 34.43, + "learning_rate": 3.893287875086067e-08, + "loss": 3.7605, + "step": 3100000 + }, + { + "epoch": 34.43, + "learning_rate": 3.8918996957110806e-08, + "loss": 3.7555, + "step": 3100500 + }, + { + "epoch": 34.44, + "learning_rate": 3.890511516336095e-08, + "loss": 3.7471, + "step": 3101000 + }, + { + "epoch": 34.44, + "learning_rate": 3.889123336961109e-08, + "loss": 3.7554, + "step": 3101500 + }, + { + "epoch": 34.45, + "learning_rate": 3.887735157586122e-08, + "loss": 3.7529, + "step": 3102000 + }, + { + "epoch": 34.45, + "learning_rate": 3.886346978211136e-08, + "loss": 3.767, + "step": 3102500 + }, + { + "epoch": 34.46, + "learning_rate": 3.8849587988361504e-08, + "loss": 3.7566, + "step": 3103000 + }, + { + "epoch": 34.47, + "learning_rate": 3.883570619461164e-08, + "loss": 3.7764, + "step": 3103500 + }, + { + "epoch": 34.47, + "learning_rate": 3.882182440086178e-08, + "loss": 3.7602, + "step": 3104000 + }, + { + "epoch": 34.48, + "learning_rate": 3.880794260711192e-08, + "loss": 3.7342, + "step": 3104500 + }, + { + "epoch": 34.48, + "learning_rate": 3.879406081336205e-08, + "loss": 3.7563, + "step": 3105000 + }, + { + "epoch": 34.49, + "learning_rate": 3.8780179019612196e-08, + "loss": 3.755, + "step": 3105500 + }, + { + "epoch": 34.49, + "learning_rate": 3.876629722586234e-08, + "loss": 3.7632, + "step": 3106000 + }, + { + "epoch": 34.5, + "learning_rate": 3.8752415432112473e-08, + "loss": 3.7747, + "step": 3106500 + }, + { + "epoch": 34.5, + "learning_rate": 3.8738533638362616e-08, + "loss": 3.7706, + "step": 3107000 + }, + { + "epoch": 34.51, + "learning_rate": 3.872465184461275e-08, + "loss": 3.7522, + "step": 3107500 + }, + { + "epoch": 34.52, + "learning_rate": 3.871077005086289e-08, + "loss": 3.7508, + "step": 3108000 + }, + { + "epoch": 34.52, + "learning_rate": 3.869688825711303e-08, + "loss": 3.7621, + "step": 3108500 + }, + { + "epoch": 34.53, + "learning_rate": 3.8683006463363165e-08, + "loss": 3.7503, + "step": 3109000 + }, + { + "epoch": 34.53, + "learning_rate": 3.866912466961331e-08, + "loss": 3.7479, + "step": 3109500 + }, + { + "epoch": 34.54, + "learning_rate": 3.865524287586345e-08, + "loss": 3.7465, + "step": 3110000 + }, + { + "epoch": 34.54, + "learning_rate": 3.8641361082113585e-08, + "loss": 3.7474, + "step": 3110500 + }, + { + "epoch": 34.55, + "learning_rate": 3.862747928836372e-08, + "loss": 3.7562, + "step": 3111000 + }, + { + "epoch": 34.55, + "learning_rate": 3.861359749461386e-08, + "loss": 3.7551, + "step": 3111500 + }, + { + "epoch": 34.56, + "learning_rate": 3.8599715700864e-08, + "loss": 3.7683, + "step": 3112000 + }, + { + "epoch": 34.57, + "learning_rate": 3.858583390711414e-08, + "loss": 3.7422, + "step": 3112500 + }, + { + "epoch": 34.57, + "learning_rate": 3.857195211336428e-08, + "loss": 3.7582, + "step": 3113000 + }, + { + "epoch": 34.58, + "learning_rate": 3.855807031961442e-08, + "loss": 3.7583, + "step": 3113500 + }, + { + "epoch": 34.58, + "learning_rate": 3.8544188525864554e-08, + "loss": 3.764, + "step": 3114000 + }, + { + "epoch": 34.59, + "learning_rate": 3.85303067321147e-08, + "loss": 3.7664, + "step": 3114500 + }, + { + "epoch": 34.59, + "learning_rate": 3.851642493836483e-08, + "loss": 3.7628, + "step": 3115000 + }, + { + "epoch": 34.6, + "learning_rate": 3.8502543144614975e-08, + "loss": 3.7654, + "step": 3115500 + }, + { + "epoch": 34.6, + "learning_rate": 3.848866135086511e-08, + "loss": 3.7717, + "step": 3116000 + }, + { + "epoch": 34.61, + "learning_rate": 3.847477955711525e-08, + "loss": 3.755, + "step": 3116500 + }, + { + "epoch": 34.62, + "learning_rate": 3.846089776336539e-08, + "loss": 3.768, + "step": 3117000 + }, + { + "epoch": 34.62, + "learning_rate": 3.8447015969615524e-08, + "loss": 3.7646, + "step": 3117500 + }, + { + "epoch": 34.63, + "learning_rate": 3.8433134175865666e-08, + "loss": 3.7688, + "step": 3118000 + }, + { + "epoch": 34.63, + "learning_rate": 3.841925238211581e-08, + "loss": 3.7484, + "step": 3118500 + }, + { + "epoch": 34.64, + "learning_rate": 3.8405370588365944e-08, + "loss": 3.7559, + "step": 3119000 + }, + { + "epoch": 34.64, + "learning_rate": 3.8391488794616086e-08, + "loss": 3.7616, + "step": 3119500 + }, + { + "epoch": 34.65, + "learning_rate": 3.837760700086622e-08, + "loss": 3.763, + "step": 3120000 + }, + { + "epoch": 34.65, + "learning_rate": 3.836372520711636e-08, + "loss": 3.7592, + "step": 3120500 + }, + { + "epoch": 34.66, + "learning_rate": 3.83498434133665e-08, + "loss": 3.7625, + "step": 3121000 + }, + { + "epoch": 34.67, + "learning_rate": 3.833596161961664e-08, + "loss": 3.7424, + "step": 3121500 + }, + { + "epoch": 34.67, + "learning_rate": 3.832207982586678e-08, + "loss": 3.7649, + "step": 3122000 + }, + { + "epoch": 34.68, + "learning_rate": 3.830819803211692e-08, + "loss": 3.7609, + "step": 3122500 + }, + { + "epoch": 34.68, + "learning_rate": 3.8294316238367056e-08, + "loss": 3.767, + "step": 3123000 + }, + { + "epoch": 34.69, + "learning_rate": 3.828043444461719e-08, + "loss": 3.752, + "step": 3123500 + }, + { + "epoch": 34.69, + "learning_rate": 3.8266552650867334e-08, + "loss": 3.7593, + "step": 3124000 + }, + { + "epoch": 34.7, + "learning_rate": 3.825267085711747e-08, + "loss": 3.7542, + "step": 3124500 + }, + { + "epoch": 34.7, + "learning_rate": 3.823878906336761e-08, + "loss": 3.7822, + "step": 3125000 + }, + { + "epoch": 34.71, + "learning_rate": 3.8224907269617754e-08, + "loss": 3.7662, + "step": 3125500 + }, + { + "epoch": 34.72, + "learning_rate": 3.821102547586789e-08, + "loss": 3.7438, + "step": 3126000 + }, + { + "epoch": 34.72, + "learning_rate": 3.8197143682118025e-08, + "loss": 3.7587, + "step": 3126500 + }, + { + "epoch": 34.73, + "learning_rate": 3.818326188836817e-08, + "loss": 3.7748, + "step": 3127000 + }, + { + "epoch": 34.73, + "learning_rate": 3.81693800946183e-08, + "loss": 3.7873, + "step": 3127500 + }, + { + "epoch": 34.74, + "learning_rate": 3.8155498300868445e-08, + "loss": 3.7582, + "step": 3128000 + }, + { + "epoch": 34.74, + "learning_rate": 3.814161650711859e-08, + "loss": 3.7499, + "step": 3128500 + }, + { + "epoch": 34.75, + "learning_rate": 3.812773471336872e-08, + "loss": 3.7235, + "step": 3129000 + }, + { + "epoch": 34.75, + "learning_rate": 3.811385291961886e-08, + "loss": 3.7482, + "step": 3129500 + }, + { + "epoch": 34.76, + "learning_rate": 3.8099971125868994e-08, + "loss": 3.7629, + "step": 3130000 + }, + { + "epoch": 34.77, + "learning_rate": 3.808608933211914e-08, + "loss": 3.7643, + "step": 3130500 + }, + { + "epoch": 34.77, + "learning_rate": 3.807220753836928e-08, + "loss": 3.751, + "step": 3131000 + }, + { + "epoch": 34.78, + "learning_rate": 3.8058325744619415e-08, + "loss": 3.7472, + "step": 3131500 + }, + { + "epoch": 34.78, + "learning_rate": 3.804444395086956e-08, + "loss": 3.7704, + "step": 3132000 + }, + { + "epoch": 34.79, + "learning_rate": 3.803056215711969e-08, + "loss": 3.7546, + "step": 3132500 + }, + { + "epoch": 34.79, + "learning_rate": 3.801668036336983e-08, + "loss": 3.7607, + "step": 3133000 + }, + { + "epoch": 34.8, + "learning_rate": 3.800279856961997e-08, + "loss": 3.7579, + "step": 3133500 + }, + { + "epoch": 34.8, + "learning_rate": 3.798891677587011e-08, + "loss": 3.7418, + "step": 3134000 + }, + { + "epoch": 34.81, + "learning_rate": 3.797503498212025e-08, + "loss": 3.765, + "step": 3134500 + }, + { + "epoch": 34.82, + "learning_rate": 3.7961153188370384e-08, + "loss": 3.7546, + "step": 3135000 + }, + { + "epoch": 34.82, + "learning_rate": 3.7947271394620526e-08, + "loss": 3.7518, + "step": 3135500 + }, + { + "epoch": 34.83, + "learning_rate": 3.793338960087066e-08, + "loss": 3.7348, + "step": 3136000 + }, + { + "epoch": 34.83, + "learning_rate": 3.7919507807120804e-08, + "loss": 3.7574, + "step": 3136500 + }, + { + "epoch": 34.84, + "learning_rate": 3.790562601337094e-08, + "loss": 3.7515, + "step": 3137000 + }, + { + "epoch": 34.84, + "learning_rate": 3.789174421962108e-08, + "loss": 3.7448, + "step": 3137500 + }, + { + "epoch": 34.85, + "learning_rate": 3.787786242587122e-08, + "loss": 3.7546, + "step": 3138000 + }, + { + "epoch": 34.85, + "learning_rate": 3.7863980632121353e-08, + "loss": 3.7513, + "step": 3138500 + }, + { + "epoch": 34.86, + "learning_rate": 3.7850098838371496e-08, + "loss": 3.7429, + "step": 3139000 + }, + { + "epoch": 34.87, + "learning_rate": 3.783621704462164e-08, + "loss": 3.7597, + "step": 3139500 + }, + { + "epoch": 34.87, + "learning_rate": 3.7822335250871774e-08, + "loss": 3.7675, + "step": 3140000 + }, + { + "epoch": 34.88, + "learning_rate": 3.7808453457121916e-08, + "loss": 3.7708, + "step": 3140500 + }, + { + "epoch": 34.88, + "learning_rate": 3.779457166337205e-08, + "loss": 3.7486, + "step": 3141000 + }, + { + "epoch": 34.89, + "learning_rate": 3.778068986962219e-08, + "loss": 3.7719, + "step": 3141500 + }, + { + "epoch": 34.89, + "learning_rate": 3.776680807587233e-08, + "loss": 3.7507, + "step": 3142000 + }, + { + "epoch": 34.9, + "learning_rate": 3.775292628212247e-08, + "loss": 3.7551, + "step": 3142500 + }, + { + "epoch": 34.9, + "learning_rate": 3.773904448837261e-08, + "loss": 3.7521, + "step": 3143000 + }, + { + "epoch": 34.91, + "learning_rate": 3.772516269462275e-08, + "loss": 3.7683, + "step": 3143500 + }, + { + "epoch": 34.92, + "learning_rate": 3.7711280900872885e-08, + "loss": 3.7665, + "step": 3144000 + }, + { + "epoch": 34.92, + "learning_rate": 3.769739910712302e-08, + "loss": 3.7494, + "step": 3144500 + }, + { + "epoch": 34.93, + "learning_rate": 3.768351731337316e-08, + "loss": 3.7558, + "step": 3145000 + }, + { + "epoch": 34.93, + "learning_rate": 3.76696355196233e-08, + "loss": 3.7423, + "step": 3145500 + }, + { + "epoch": 34.94, + "learning_rate": 3.765575372587344e-08, + "loss": 3.7395, + "step": 3146000 + }, + { + "epoch": 34.94, + "learning_rate": 3.764187193212358e-08, + "loss": 3.7369, + "step": 3146500 + }, + { + "epoch": 34.95, + "learning_rate": 3.762799013837372e-08, + "loss": 3.7865, + "step": 3147000 + }, + { + "epoch": 34.95, + "learning_rate": 3.7614108344623855e-08, + "loss": 3.7693, + "step": 3147500 + }, + { + "epoch": 34.96, + "learning_rate": 3.7600226550874e-08, + "loss": 3.7596, + "step": 3148000 + }, + { + "epoch": 34.97, + "learning_rate": 3.758634475712413e-08, + "loss": 3.7511, + "step": 3148500 + }, + { + "epoch": 34.97, + "learning_rate": 3.7572462963374275e-08, + "loss": 3.7729, + "step": 3149000 + }, + { + "epoch": 34.98, + "learning_rate": 3.755858116962442e-08, + "loss": 3.7677, + "step": 3149500 + }, + { + "epoch": 34.98, + "learning_rate": 3.754469937587455e-08, + "loss": 3.7457, + "step": 3150000 + }, + { + "epoch": 34.99, + "learning_rate": 3.753081758212469e-08, + "loss": 3.7405, + "step": 3150500 + }, + { + "epoch": 34.99, + "learning_rate": 3.751693578837483e-08, + "loss": 3.7475, + "step": 3151000 + }, + { + "epoch": 35.0, + "learning_rate": 3.7503053994624966e-08, + "loss": 3.7551, + "step": 3151500 + }, + { + "epoch": 35.0, + "eval_loss": 3.830213785171509, + "eval_runtime": 6.3055, + "eval_samples_per_second": 246.453, + "step": 3151610 + }, + { + "epoch": 35.0, + "learning_rate": 3.748917220087511e-08, + "loss": 3.7463, + "step": 3152000 + }, + { + "epoch": 35.01, + "learning_rate": 3.7475290407125244e-08, + "loss": 3.7487, + "step": 3152500 + }, + { + "epoch": 35.02, + "learning_rate": 3.7461408613375386e-08, + "loss": 3.7578, + "step": 3153000 + }, + { + "epoch": 35.02, + "learning_rate": 3.744752681962552e-08, + "loss": 3.7618, + "step": 3153500 + }, + { + "epoch": 35.03, + "learning_rate": 3.743364502587566e-08, + "loss": 3.7607, + "step": 3154000 + }, + { + "epoch": 35.03, + "learning_rate": 3.74197632321258e-08, + "loss": 3.7737, + "step": 3154500 + }, + { + "epoch": 35.04, + "learning_rate": 3.740588143837594e-08, + "loss": 3.7488, + "step": 3155000 + }, + { + "epoch": 35.04, + "learning_rate": 3.739199964462608e-08, + "loss": 3.7399, + "step": 3155500 + }, + { + "epoch": 35.05, + "learning_rate": 3.737811785087622e-08, + "loss": 3.7491, + "step": 3156000 + }, + { + "epoch": 35.05, + "learning_rate": 3.7364236057126356e-08, + "loss": 3.7565, + "step": 3156500 + }, + { + "epoch": 35.06, + "learning_rate": 3.735035426337649e-08, + "loss": 3.756, + "step": 3157000 + }, + { + "epoch": 35.07, + "learning_rate": 3.7336472469626634e-08, + "loss": 3.7755, + "step": 3157500 + }, + { + "epoch": 35.07, + "learning_rate": 3.7322590675876776e-08, + "loss": 3.7493, + "step": 3158000 + }, + { + "epoch": 35.08, + "learning_rate": 3.730870888212691e-08, + "loss": 3.7618, + "step": 3158500 + }, + { + "epoch": 35.08, + "learning_rate": 3.7294827088377054e-08, + "loss": 3.7549, + "step": 3159000 + }, + { + "epoch": 35.09, + "learning_rate": 3.728094529462719e-08, + "loss": 3.7502, + "step": 3159500 + }, + { + "epoch": 35.09, + "learning_rate": 3.7267063500877325e-08, + "loss": 3.7477, + "step": 3160000 + }, + { + "epoch": 35.1, + "learning_rate": 3.725318170712747e-08, + "loss": 3.734, + "step": 3160500 + }, + { + "epoch": 35.1, + "learning_rate": 3.72392999133776e-08, + "loss": 3.741, + "step": 3161000 + }, + { + "epoch": 35.11, + "learning_rate": 3.7225418119627745e-08, + "loss": 3.7469, + "step": 3161500 + }, + { + "epoch": 35.12, + "learning_rate": 3.721153632587789e-08, + "loss": 3.7429, + "step": 3162000 + }, + { + "epoch": 35.12, + "learning_rate": 3.719765453212802e-08, + "loss": 3.7518, + "step": 3162500 + }, + { + "epoch": 35.13, + "learning_rate": 3.718377273837816e-08, + "loss": 3.7553, + "step": 3163000 + }, + { + "epoch": 35.13, + "learning_rate": 3.71698909446283e-08, + "loss": 3.7537, + "step": 3163500 + }, + { + "epoch": 35.14, + "learning_rate": 3.715600915087844e-08, + "loss": 3.7698, + "step": 3164000 + }, + { + "epoch": 35.14, + "learning_rate": 3.714212735712858e-08, + "loss": 3.7763, + "step": 3164500 + }, + { + "epoch": 35.15, + "learning_rate": 3.712824556337872e-08, + "loss": 3.7581, + "step": 3165000 + }, + { + "epoch": 35.15, + "learning_rate": 3.711436376962885e-08, + "loss": 3.773, + "step": 3165500 + }, + { + "epoch": 35.16, + "learning_rate": 3.710048197587899e-08, + "loss": 3.7381, + "step": 3166000 + }, + { + "epoch": 35.17, + "learning_rate": 3.708660018212913e-08, + "loss": 3.7466, + "step": 3166500 + }, + { + "epoch": 35.17, + "learning_rate": 3.707271838837927e-08, + "loss": 3.7495, + "step": 3167000 + }, + { + "epoch": 35.18, + "learning_rate": 3.705883659462941e-08, + "loss": 3.7502, + "step": 3167500 + }, + { + "epoch": 35.18, + "learning_rate": 3.704495480087955e-08, + "loss": 3.7641, + "step": 3168000 + }, + { + "epoch": 35.19, + "learning_rate": 3.7031073007129684e-08, + "loss": 3.7522, + "step": 3168500 + }, + { + "epoch": 35.19, + "learning_rate": 3.7017191213379826e-08, + "loss": 3.759, + "step": 3169000 + }, + { + "epoch": 35.2, + "learning_rate": 3.700330941962996e-08, + "loss": 3.7438, + "step": 3169500 + }, + { + "epoch": 35.2, + "learning_rate": 3.6989427625880104e-08, + "loss": 3.7538, + "step": 3170000 + }, + { + "epoch": 35.21, + "learning_rate": 3.6975545832130247e-08, + "loss": 3.7502, + "step": 3170500 + }, + { + "epoch": 35.22, + "learning_rate": 3.696166403838038e-08, + "loss": 3.7547, + "step": 3171000 + }, + { + "epoch": 35.22, + "learning_rate": 3.694778224463052e-08, + "loss": 3.7578, + "step": 3171500 + }, + { + "epoch": 35.23, + "learning_rate": 3.693390045088066e-08, + "loss": 3.7497, + "step": 3172000 + }, + { + "epoch": 35.23, + "learning_rate": 3.6920018657130796e-08, + "loss": 3.7629, + "step": 3172500 + }, + { + "epoch": 35.24, + "learning_rate": 3.690613686338094e-08, + "loss": 3.7435, + "step": 3173000 + }, + { + "epoch": 35.24, + "learning_rate": 3.6892255069631074e-08, + "loss": 3.7534, + "step": 3173500 + }, + { + "epoch": 35.25, + "learning_rate": 3.6878373275881216e-08, + "loss": 3.7687, + "step": 3174000 + }, + { + "epoch": 35.25, + "learning_rate": 3.686449148213135e-08, + "loss": 3.7672, + "step": 3174500 + }, + { + "epoch": 35.26, + "learning_rate": 3.685060968838149e-08, + "loss": 3.7835, + "step": 3175000 + }, + { + "epoch": 35.27, + "learning_rate": 3.683672789463163e-08, + "loss": 3.7439, + "step": 3175500 + }, + { + "epoch": 35.27, + "learning_rate": 3.682284610088177e-08, + "loss": 3.744, + "step": 3176000 + }, + { + "epoch": 35.28, + "learning_rate": 3.680896430713191e-08, + "loss": 3.7513, + "step": 3176500 + }, + { + "epoch": 35.28, + "learning_rate": 3.679508251338205e-08, + "loss": 3.7625, + "step": 3177000 + }, + { + "epoch": 35.29, + "learning_rate": 3.6781200719632185e-08, + "loss": 3.7385, + "step": 3177500 + }, + { + "epoch": 35.29, + "learning_rate": 3.676731892588232e-08, + "loss": 3.7522, + "step": 3178000 + }, + { + "epoch": 35.3, + "learning_rate": 3.675343713213246e-08, + "loss": 3.7561, + "step": 3178500 + }, + { + "epoch": 35.3, + "learning_rate": 3.6739555338382605e-08, + "loss": 3.7456, + "step": 3179000 + }, + { + "epoch": 35.31, + "learning_rate": 3.672567354463274e-08, + "loss": 3.7454, + "step": 3179500 + }, + { + "epoch": 35.32, + "learning_rate": 3.6711791750882883e-08, + "loss": 3.7511, + "step": 3180000 + }, + { + "epoch": 35.32, + "learning_rate": 3.669790995713302e-08, + "loss": 3.7618, + "step": 3180500 + }, + { + "epoch": 35.33, + "learning_rate": 3.6684028163383155e-08, + "loss": 3.7713, + "step": 3181000 + }, + { + "epoch": 35.33, + "learning_rate": 3.66701463696333e-08, + "loss": 3.7798, + "step": 3181500 + }, + { + "epoch": 35.34, + "learning_rate": 3.665626457588343e-08, + "loss": 3.7554, + "step": 3182000 + }, + { + "epoch": 35.34, + "learning_rate": 3.6642382782133575e-08, + "loss": 3.7655, + "step": 3182500 + }, + { + "epoch": 35.35, + "learning_rate": 3.662850098838372e-08, + "loss": 3.7565, + "step": 3183000 + }, + { + "epoch": 35.35, + "learning_rate": 3.661461919463385e-08, + "loss": 3.7595, + "step": 3183500 + }, + { + "epoch": 35.36, + "learning_rate": 3.660073740088399e-08, + "loss": 3.764, + "step": 3184000 + }, + { + "epoch": 35.37, + "learning_rate": 3.658685560713413e-08, + "loss": 3.7733, + "step": 3184500 + }, + { + "epoch": 35.37, + "learning_rate": 3.6572973813384266e-08, + "loss": 3.7582, + "step": 3185000 + }, + { + "epoch": 35.38, + "learning_rate": 3.655909201963441e-08, + "loss": 3.7737, + "step": 3185500 + }, + { + "epoch": 35.38, + "learning_rate": 3.654521022588455e-08, + "loss": 3.7383, + "step": 3186000 + }, + { + "epoch": 35.39, + "learning_rate": 3.6531328432134686e-08, + "loss": 3.7705, + "step": 3186500 + }, + { + "epoch": 35.39, + "learning_rate": 3.651744663838482e-08, + "loss": 3.7517, + "step": 3187000 + }, + { + "epoch": 35.4, + "learning_rate": 3.6503564844634964e-08, + "loss": 3.7594, + "step": 3187500 + }, + { + "epoch": 35.4, + "learning_rate": 3.64896830508851e-08, + "loss": 3.7564, + "step": 3188000 + }, + { + "epoch": 35.41, + "learning_rate": 3.647580125713524e-08, + "loss": 3.7401, + "step": 3188500 + }, + { + "epoch": 35.42, + "learning_rate": 3.646191946338538e-08, + "loss": 3.7408, + "step": 3189000 + }, + { + "epoch": 35.42, + "learning_rate": 3.644803766963552e-08, + "loss": 3.7641, + "step": 3189500 + }, + { + "epoch": 35.43, + "learning_rate": 3.6434155875885656e-08, + "loss": 3.7475, + "step": 3190000 + }, + { + "epoch": 35.43, + "learning_rate": 3.642027408213579e-08, + "loss": 3.7772, + "step": 3190500 + }, + { + "epoch": 35.44, + "learning_rate": 3.6406392288385934e-08, + "loss": 3.7566, + "step": 3191000 + }, + { + "epoch": 35.44, + "learning_rate": 3.6392510494636076e-08, + "loss": 3.7662, + "step": 3191500 + }, + { + "epoch": 35.45, + "learning_rate": 3.637862870088621e-08, + "loss": 3.7541, + "step": 3192000 + }, + { + "epoch": 35.45, + "learning_rate": 3.6364746907136354e-08, + "loss": 3.77, + "step": 3192500 + }, + { + "epoch": 35.46, + "learning_rate": 3.635086511338649e-08, + "loss": 3.7598, + "step": 3193000 + }, + { + "epoch": 35.47, + "learning_rate": 3.6336983319636625e-08, + "loss": 3.761, + "step": 3193500 + }, + { + "epoch": 35.47, + "learning_rate": 3.632310152588677e-08, + "loss": 3.7398, + "step": 3194000 + }, + { + "epoch": 35.48, + "learning_rate": 3.630921973213691e-08, + "loss": 3.748, + "step": 3194500 + }, + { + "epoch": 35.48, + "learning_rate": 3.6295337938387045e-08, + "loss": 3.753, + "step": 3195000 + }, + { + "epoch": 35.49, + "learning_rate": 3.628145614463719e-08, + "loss": 3.769, + "step": 3195500 + }, + { + "epoch": 35.49, + "learning_rate": 3.626757435088732e-08, + "loss": 3.7737, + "step": 3196000 + }, + { + "epoch": 35.5, + "learning_rate": 3.625369255713746e-08, + "loss": 3.7464, + "step": 3196500 + }, + { + "epoch": 35.5, + "learning_rate": 3.62398107633876e-08, + "loss": 3.7455, + "step": 3197000 + }, + { + "epoch": 35.51, + "learning_rate": 3.622592896963774e-08, + "loss": 3.7634, + "step": 3197500 + }, + { + "epoch": 35.52, + "learning_rate": 3.621204717588788e-08, + "loss": 3.7662, + "step": 3198000 + }, + { + "epoch": 35.52, + "learning_rate": 3.619816538213802e-08, + "loss": 3.7411, + "step": 3198500 + }, + { + "epoch": 35.53, + "learning_rate": 3.618428358838815e-08, + "loss": 3.7484, + "step": 3199000 + }, + { + "epoch": 35.53, + "learning_rate": 3.617040179463829e-08, + "loss": 3.7596, + "step": 3199500 + }, + { + "epoch": 35.54, + "learning_rate": 3.6156520000888435e-08, + "loss": 3.748, + "step": 3200000 + }, + { + "epoch": 35.54, + "learning_rate": 3.614263820713857e-08, + "loss": 3.7569, + "step": 3200500 + }, + { + "epoch": 35.55, + "learning_rate": 3.612875641338871e-08, + "loss": 3.7558, + "step": 3201000 + }, + { + "epoch": 35.55, + "learning_rate": 3.6114874619638855e-08, + "loss": 3.7606, + "step": 3201500 + }, + { + "epoch": 35.56, + "learning_rate": 3.6100992825888984e-08, + "loss": 3.7583, + "step": 3202000 + }, + { + "epoch": 35.57, + "learning_rate": 3.6087111032139126e-08, + "loss": 3.7683, + "step": 3202500 + }, + { + "epoch": 35.57, + "learning_rate": 3.607322923838926e-08, + "loss": 3.7568, + "step": 3203000 + }, + { + "epoch": 35.58, + "learning_rate": 3.6059347444639404e-08, + "loss": 3.769, + "step": 3203500 + }, + { + "epoch": 35.58, + "learning_rate": 3.6045465650889547e-08, + "loss": 3.7725, + "step": 3204000 + }, + { + "epoch": 35.59, + "learning_rate": 3.603158385713968e-08, + "loss": 3.757, + "step": 3204500 + }, + { + "epoch": 35.59, + "learning_rate": 3.601770206338982e-08, + "loss": 3.7577, + "step": 3205000 + }, + { + "epoch": 35.6, + "learning_rate": 3.600382026963996e-08, + "loss": 3.76, + "step": 3205500 + }, + { + "epoch": 35.6, + "learning_rate": 3.5989938475890096e-08, + "loss": 3.7528, + "step": 3206000 + }, + { + "epoch": 35.61, + "learning_rate": 3.597605668214024e-08, + "loss": 3.7507, + "step": 3206500 + }, + { + "epoch": 35.62, + "learning_rate": 3.596217488839038e-08, + "loss": 3.7656, + "step": 3207000 + }, + { + "epoch": 35.62, + "learning_rate": 3.5948293094640516e-08, + "loss": 3.7352, + "step": 3207500 + }, + { + "epoch": 35.63, + "learning_rate": 3.593441130089065e-08, + "loss": 3.759, + "step": 3208000 + }, + { + "epoch": 35.63, + "learning_rate": 3.5920529507140794e-08, + "loss": 3.7655, + "step": 3208500 + }, + { + "epoch": 35.64, + "learning_rate": 3.590664771339093e-08, + "loss": 3.754, + "step": 3209000 + }, + { + "epoch": 35.64, + "learning_rate": 3.589276591964107e-08, + "loss": 3.73, + "step": 3209500 + }, + { + "epoch": 35.65, + "learning_rate": 3.5878884125891214e-08, + "loss": 3.747, + "step": 3210000 + }, + { + "epoch": 35.65, + "learning_rate": 3.586500233214135e-08, + "loss": 3.7393, + "step": 3210500 + }, + { + "epoch": 35.66, + "learning_rate": 3.5851120538391485e-08, + "loss": 3.7643, + "step": 3211000 + }, + { + "epoch": 35.67, + "learning_rate": 3.583723874464162e-08, + "loss": 3.7619, + "step": 3211500 + }, + { + "epoch": 35.67, + "learning_rate": 3.582335695089176e-08, + "loss": 3.7634, + "step": 3212000 + }, + { + "epoch": 35.68, + "learning_rate": 3.5809475157141906e-08, + "loss": 3.7449, + "step": 3212500 + }, + { + "epoch": 35.68, + "learning_rate": 3.579559336339204e-08, + "loss": 3.7526, + "step": 3213000 + }, + { + "epoch": 35.69, + "learning_rate": 3.5781711569642183e-08, + "loss": 3.7451, + "step": 3213500 + }, + { + "epoch": 35.69, + "learning_rate": 3.576782977589232e-08, + "loss": 3.7564, + "step": 3214000 + }, + { + "epoch": 35.7, + "learning_rate": 3.5753947982142455e-08, + "loss": 3.7436, + "step": 3214500 + }, + { + "epoch": 35.7, + "learning_rate": 3.57400661883926e-08, + "loss": 3.7375, + "step": 3215000 + }, + { + "epoch": 35.71, + "learning_rate": 3.572618439464274e-08, + "loss": 3.7577, + "step": 3215500 + }, + { + "epoch": 35.72, + "learning_rate": 3.5712302600892875e-08, + "loss": 3.7375, + "step": 3216000 + }, + { + "epoch": 35.72, + "learning_rate": 3.569842080714302e-08, + "loss": 3.7672, + "step": 3216500 + }, + { + "epoch": 35.73, + "learning_rate": 3.568453901339315e-08, + "loss": 3.7586, + "step": 3217000 + }, + { + "epoch": 35.73, + "learning_rate": 3.567065721964329e-08, + "loss": 3.7487, + "step": 3217500 + }, + { + "epoch": 35.74, + "learning_rate": 3.565677542589343e-08, + "loss": 3.7439, + "step": 3218000 + }, + { + "epoch": 35.74, + "learning_rate": 3.5642893632143566e-08, + "loss": 3.7588, + "step": 3218500 + }, + { + "epoch": 35.75, + "learning_rate": 3.562901183839371e-08, + "loss": 3.7543, + "step": 3219000 + }, + { + "epoch": 35.75, + "learning_rate": 3.561513004464385e-08, + "loss": 3.7582, + "step": 3219500 + }, + { + "epoch": 35.76, + "learning_rate": 3.5601248250893987e-08, + "loss": 3.7641, + "step": 3220000 + }, + { + "epoch": 35.77, + "learning_rate": 3.558736645714412e-08, + "loss": 3.7602, + "step": 3220500 + }, + { + "epoch": 35.77, + "learning_rate": 3.5573484663394264e-08, + "loss": 3.7685, + "step": 3221000 + }, + { + "epoch": 35.78, + "learning_rate": 3.55596028696444e-08, + "loss": 3.7328, + "step": 3221500 + }, + { + "epoch": 35.78, + "learning_rate": 3.554572107589454e-08, + "loss": 3.7602, + "step": 3222000 + }, + { + "epoch": 35.79, + "learning_rate": 3.5531839282144685e-08, + "loss": 3.742, + "step": 3222500 + }, + { + "epoch": 35.79, + "learning_rate": 3.551795748839482e-08, + "loss": 3.7656, + "step": 3223000 + }, + { + "epoch": 35.8, + "learning_rate": 3.5504075694644956e-08, + "loss": 3.7798, + "step": 3223500 + }, + { + "epoch": 35.8, + "learning_rate": 3.54901939008951e-08, + "loss": 3.7635, + "step": 3224000 + }, + { + "epoch": 35.81, + "learning_rate": 3.5476312107145234e-08, + "loss": 3.7718, + "step": 3224500 + }, + { + "epoch": 35.82, + "learning_rate": 3.5462430313395376e-08, + "loss": 3.7622, + "step": 3225000 + }, + { + "epoch": 35.82, + "learning_rate": 3.544854851964551e-08, + "loss": 3.7606, + "step": 3225500 + }, + { + "epoch": 35.83, + "learning_rate": 3.5434666725895654e-08, + "loss": 3.7641, + "step": 3226000 + }, + { + "epoch": 35.83, + "learning_rate": 3.542078493214579e-08, + "loss": 3.7644, + "step": 3226500 + }, + { + "epoch": 35.84, + "learning_rate": 3.5406903138395925e-08, + "loss": 3.7601, + "step": 3227000 + }, + { + "epoch": 35.84, + "learning_rate": 3.539302134464607e-08, + "loss": 3.7502, + "step": 3227500 + }, + { + "epoch": 35.85, + "learning_rate": 3.537913955089621e-08, + "loss": 3.7508, + "step": 3228000 + }, + { + "epoch": 35.85, + "learning_rate": 3.5365257757146345e-08, + "loss": 3.7565, + "step": 3228500 + }, + { + "epoch": 35.86, + "learning_rate": 3.535137596339649e-08, + "loss": 3.76, + "step": 3229000 + }, + { + "epoch": 35.87, + "learning_rate": 3.5337494169646623e-08, + "loss": 3.7603, + "step": 3229500 + }, + { + "epoch": 35.87, + "learning_rate": 3.532361237589676e-08, + "loss": 3.7698, + "step": 3230000 + }, + { + "epoch": 35.88, + "learning_rate": 3.53097305821469e-08, + "loss": 3.7701, + "step": 3230500 + }, + { + "epoch": 35.88, + "learning_rate": 3.5295848788397044e-08, + "loss": 3.7565, + "step": 3231000 + }, + { + "epoch": 35.89, + "learning_rate": 3.528196699464718e-08, + "loss": 3.7539, + "step": 3231500 + }, + { + "epoch": 35.89, + "learning_rate": 3.526808520089732e-08, + "loss": 3.7697, + "step": 3232000 + }, + { + "epoch": 35.9, + "learning_rate": 3.525420340714745e-08, + "loss": 3.7601, + "step": 3232500 + }, + { + "epoch": 35.9, + "learning_rate": 3.524032161339759e-08, + "loss": 3.7525, + "step": 3233000 + }, + { + "epoch": 35.91, + "learning_rate": 3.5226439819647735e-08, + "loss": 3.7544, + "step": 3233500 + }, + { + "epoch": 35.91, + "learning_rate": 3.521255802589787e-08, + "loss": 3.773, + "step": 3234000 + }, + { + "epoch": 35.92, + "learning_rate": 3.519867623214801e-08, + "loss": 3.7572, + "step": 3234500 + }, + { + "epoch": 35.93, + "learning_rate": 3.5184794438398155e-08, + "loss": 3.7558, + "step": 3235000 + }, + { + "epoch": 35.93, + "learning_rate": 3.5170912644648284e-08, + "loss": 3.756, + "step": 3235500 + }, + { + "epoch": 35.94, + "learning_rate": 3.5157030850898427e-08, + "loss": 3.7628, + "step": 3236000 + }, + { + "epoch": 35.94, + "learning_rate": 3.514314905714857e-08, + "loss": 3.7487, + "step": 3236500 + }, + { + "epoch": 35.95, + "learning_rate": 3.5129267263398704e-08, + "loss": 3.7499, + "step": 3237000 + }, + { + "epoch": 35.95, + "learning_rate": 3.5115385469648847e-08, + "loss": 3.7341, + "step": 3237500 + }, + { + "epoch": 35.96, + "learning_rate": 3.510150367589899e-08, + "loss": 3.7722, + "step": 3238000 + }, + { + "epoch": 35.96, + "learning_rate": 3.508762188214912e-08, + "loss": 3.7618, + "step": 3238500 + }, + { + "epoch": 35.97, + "learning_rate": 3.507374008839926e-08, + "loss": 3.7518, + "step": 3239000 + }, + { + "epoch": 35.98, + "learning_rate": 3.50598582946494e-08, + "loss": 3.7574, + "step": 3239500 + }, + { + "epoch": 35.98, + "learning_rate": 3.504597650089954e-08, + "loss": 3.7626, + "step": 3240000 + }, + { + "epoch": 35.99, + "learning_rate": 3.503209470714968e-08, + "loss": 3.7609, + "step": 3240500 + }, + { + "epoch": 35.99, + "learning_rate": 3.5018212913399816e-08, + "loss": 3.7792, + "step": 3241000 + }, + { + "epoch": 36.0, + "learning_rate": 3.500433111964995e-08, + "loss": 3.7314, + "step": 3241500 + }, + { + "epoch": 36.0, + "eval_loss": 3.829056978225708, + "eval_runtime": 6.3058, + "eval_samples_per_second": 246.44, + "step": 3241656 + }, + { + "epoch": 36.0, + "learning_rate": 3.4990449325900094e-08, + "loss": 3.7589, + "step": 3242000 + }, + { + "epoch": 36.01, + "learning_rate": 3.497656753215023e-08, + "loss": 3.7573, + "step": 3242500 + }, + { + "epoch": 36.01, + "learning_rate": 3.496268573840037e-08, + "loss": 3.755, + "step": 3243000 + }, + { + "epoch": 36.02, + "learning_rate": 3.4948803944650514e-08, + "loss": 3.7551, + "step": 3243500 + }, + { + "epoch": 36.03, + "learning_rate": 3.493492215090065e-08, + "loss": 3.7473, + "step": 3244000 + }, + { + "epoch": 36.03, + "learning_rate": 3.4921040357150785e-08, + "loss": 3.7427, + "step": 3244500 + }, + { + "epoch": 36.04, + "learning_rate": 3.490715856340093e-08, + "loss": 3.7479, + "step": 3245000 + }, + { + "epoch": 36.04, + "learning_rate": 3.4893276769651063e-08, + "loss": 3.7498, + "step": 3245500 + }, + { + "epoch": 36.05, + "learning_rate": 3.4879394975901206e-08, + "loss": 3.76, + "step": 3246000 + }, + { + "epoch": 36.05, + "learning_rate": 3.486551318215135e-08, + "loss": 3.7631, + "step": 3246500 + }, + { + "epoch": 36.06, + "learning_rate": 3.4851631388401484e-08, + "loss": 3.7717, + "step": 3247000 + }, + { + "epoch": 36.06, + "learning_rate": 3.483774959465162e-08, + "loss": 3.7592, + "step": 3247500 + }, + { + "epoch": 36.07, + "learning_rate": 3.4823867800901755e-08, + "loss": 3.7684, + "step": 3248000 + }, + { + "epoch": 36.08, + "learning_rate": 3.48099860071519e-08, + "loss": 3.7549, + "step": 3248500 + }, + { + "epoch": 36.08, + "learning_rate": 3.479610421340204e-08, + "loss": 3.7557, + "step": 3249000 + }, + { + "epoch": 36.09, + "learning_rate": 3.4782222419652175e-08, + "loss": 3.7463, + "step": 3249500 + }, + { + "epoch": 36.09, + "learning_rate": 3.476834062590232e-08, + "loss": 3.7789, + "step": 3250000 + }, + { + "epoch": 36.1, + "learning_rate": 3.475445883215245e-08, + "loss": 3.7622, + "step": 3250500 + }, + { + "epoch": 36.1, + "learning_rate": 3.474057703840259e-08, + "loss": 3.7508, + "step": 3251000 + }, + { + "epoch": 36.11, + "learning_rate": 3.472669524465273e-08, + "loss": 3.764, + "step": 3251500 + }, + { + "epoch": 36.11, + "learning_rate": 3.471281345090287e-08, + "loss": 3.7582, + "step": 3252000 + }, + { + "epoch": 36.12, + "learning_rate": 3.469893165715301e-08, + "loss": 3.747, + "step": 3252500 + }, + { + "epoch": 36.13, + "learning_rate": 3.468504986340315e-08, + "loss": 3.7286, + "step": 3253000 + }, + { + "epoch": 36.13, + "learning_rate": 3.4671168069653287e-08, + "loss": 3.7528, + "step": 3253500 + }, + { + "epoch": 36.14, + "learning_rate": 3.465728627590342e-08, + "loss": 3.7389, + "step": 3254000 + }, + { + "epoch": 36.14, + "learning_rate": 3.4643404482153565e-08, + "loss": 3.7494, + "step": 3254500 + }, + { + "epoch": 36.15, + "learning_rate": 3.46295226884037e-08, + "loss": 3.7599, + "step": 3255000 + }, + { + "epoch": 36.15, + "learning_rate": 3.461564089465384e-08, + "loss": 3.7641, + "step": 3255500 + }, + { + "epoch": 36.16, + "learning_rate": 3.4601759100903985e-08, + "loss": 3.7531, + "step": 3256000 + }, + { + "epoch": 36.16, + "learning_rate": 3.458787730715412e-08, + "loss": 3.7652, + "step": 3256500 + }, + { + "epoch": 36.17, + "learning_rate": 3.4573995513404256e-08, + "loss": 3.76, + "step": 3257000 + }, + { + "epoch": 36.18, + "learning_rate": 3.45601137196544e-08, + "loss": 3.7339, + "step": 3257500 + }, + { + "epoch": 36.18, + "learning_rate": 3.4546231925904534e-08, + "loss": 3.7297, + "step": 3258000 + }, + { + "epoch": 36.19, + "learning_rate": 3.4532350132154676e-08, + "loss": 3.7563, + "step": 3258500 + }, + { + "epoch": 36.19, + "learning_rate": 3.451846833840482e-08, + "loss": 3.7446, + "step": 3259000 + }, + { + "epoch": 36.2, + "learning_rate": 3.4504586544654954e-08, + "loss": 3.761, + "step": 3259500 + }, + { + "epoch": 36.2, + "learning_rate": 3.449070475090509e-08, + "loss": 3.7579, + "step": 3260000 + }, + { + "epoch": 36.21, + "learning_rate": 3.447682295715523e-08, + "loss": 3.7581, + "step": 3260500 + }, + { + "epoch": 36.21, + "learning_rate": 3.446294116340537e-08, + "loss": 3.7704, + "step": 3261000 + }, + { + "epoch": 36.22, + "learning_rate": 3.444905936965551e-08, + "loss": 3.754, + "step": 3261500 + }, + { + "epoch": 36.23, + "learning_rate": 3.4435177575905646e-08, + "loss": 3.7573, + "step": 3262000 + }, + { + "epoch": 36.23, + "learning_rate": 3.442129578215579e-08, + "loss": 3.7276, + "step": 3262500 + }, + { + "epoch": 36.24, + "learning_rate": 3.4407413988405923e-08, + "loss": 3.7663, + "step": 3263000 + }, + { + "epoch": 36.24, + "learning_rate": 3.439353219465606e-08, + "loss": 3.7356, + "step": 3263500 + }, + { + "epoch": 36.25, + "learning_rate": 3.43796504009062e-08, + "loss": 3.7452, + "step": 3264000 + }, + { + "epoch": 36.25, + "learning_rate": 3.4365768607156344e-08, + "loss": 3.756, + "step": 3264500 + }, + { + "epoch": 36.26, + "learning_rate": 3.435188681340648e-08, + "loss": 3.7943, + "step": 3265000 + }, + { + "epoch": 36.26, + "learning_rate": 3.433800501965662e-08, + "loss": 3.7502, + "step": 3265500 + }, + { + "epoch": 36.27, + "learning_rate": 3.432412322590676e-08, + "loss": 3.7672, + "step": 3266000 + }, + { + "epoch": 36.28, + "learning_rate": 3.431024143215689e-08, + "loss": 3.7641, + "step": 3266500 + }, + { + "epoch": 36.28, + "learning_rate": 3.4296359638407035e-08, + "loss": 3.7484, + "step": 3267000 + }, + { + "epoch": 36.29, + "learning_rate": 3.428247784465718e-08, + "loss": 3.7545, + "step": 3267500 + }, + { + "epoch": 36.29, + "learning_rate": 3.426859605090731e-08, + "loss": 3.7515, + "step": 3268000 + }, + { + "epoch": 36.3, + "learning_rate": 3.4254714257157455e-08, + "loss": 3.7541, + "step": 3268500 + }, + { + "epoch": 36.3, + "learning_rate": 3.4240832463407584e-08, + "loss": 3.7685, + "step": 3269000 + }, + { + "epoch": 36.31, + "learning_rate": 3.4226950669657727e-08, + "loss": 3.7552, + "step": 3269500 + }, + { + "epoch": 36.31, + "learning_rate": 3.421306887590787e-08, + "loss": 3.7673, + "step": 3270000 + }, + { + "epoch": 36.32, + "learning_rate": 3.4199187082158004e-08, + "loss": 3.7421, + "step": 3270500 + }, + { + "epoch": 36.33, + "learning_rate": 3.418530528840815e-08, + "loss": 3.7581, + "step": 3271000 + }, + { + "epoch": 36.33, + "learning_rate": 3.417142349465829e-08, + "loss": 3.7389, + "step": 3271500 + }, + { + "epoch": 36.34, + "learning_rate": 3.415754170090842e-08, + "loss": 3.7568, + "step": 3272000 + }, + { + "epoch": 36.34, + "learning_rate": 3.414365990715856e-08, + "loss": 3.7645, + "step": 3272500 + }, + { + "epoch": 36.35, + "learning_rate": 3.41297781134087e-08, + "loss": 3.7609, + "step": 3273000 + }, + { + "epoch": 36.35, + "learning_rate": 3.411589631965884e-08, + "loss": 3.7816, + "step": 3273500 + }, + { + "epoch": 36.36, + "learning_rate": 3.410201452590898e-08, + "loss": 3.7398, + "step": 3274000 + }, + { + "epoch": 36.36, + "learning_rate": 3.4088132732159116e-08, + "loss": 3.7734, + "step": 3274500 + }, + { + "epoch": 36.37, + "learning_rate": 3.407425093840925e-08, + "loss": 3.76, + "step": 3275000 + }, + { + "epoch": 36.38, + "learning_rate": 3.4060369144659394e-08, + "loss": 3.7396, + "step": 3275500 + }, + { + "epoch": 36.38, + "learning_rate": 3.4046487350909536e-08, + "loss": 3.7485, + "step": 3276000 + }, + { + "epoch": 36.39, + "learning_rate": 3.403260555715967e-08, + "loss": 3.7517, + "step": 3276500 + }, + { + "epoch": 36.39, + "learning_rate": 3.4018723763409814e-08, + "loss": 3.7601, + "step": 3277000 + }, + { + "epoch": 36.4, + "learning_rate": 3.400484196965995e-08, + "loss": 3.7581, + "step": 3277500 + }, + { + "epoch": 36.4, + "learning_rate": 3.3990960175910086e-08, + "loss": 3.7576, + "step": 3278000 + }, + { + "epoch": 36.41, + "learning_rate": 3.397707838216023e-08, + "loss": 3.7629, + "step": 3278500 + }, + { + "epoch": 36.41, + "learning_rate": 3.3963196588410363e-08, + "loss": 3.7686, + "step": 3279000 + }, + { + "epoch": 36.42, + "learning_rate": 3.3949314794660506e-08, + "loss": 3.757, + "step": 3279500 + }, + { + "epoch": 36.43, + "learning_rate": 3.393543300091065e-08, + "loss": 3.7583, + "step": 3280000 + }, + { + "epoch": 36.43, + "learning_rate": 3.3921551207160784e-08, + "loss": 3.7525, + "step": 3280500 + }, + { + "epoch": 36.44, + "learning_rate": 3.390766941341092e-08, + "loss": 3.7442, + "step": 3281000 + }, + { + "epoch": 36.44, + "learning_rate": 3.389378761966106e-08, + "loss": 3.7442, + "step": 3281500 + }, + { + "epoch": 36.45, + "learning_rate": 3.38799058259112e-08, + "loss": 3.777, + "step": 3282000 + }, + { + "epoch": 36.45, + "learning_rate": 3.386602403216134e-08, + "loss": 3.7565, + "step": 3282500 + }, + { + "epoch": 36.46, + "learning_rate": 3.385214223841148e-08, + "loss": 3.7799, + "step": 3283000 + }, + { + "epoch": 36.46, + "learning_rate": 3.383826044466162e-08, + "loss": 3.7727, + "step": 3283500 + }, + { + "epoch": 36.47, + "learning_rate": 3.382437865091175e-08, + "loss": 3.7478, + "step": 3284000 + }, + { + "epoch": 36.48, + "learning_rate": 3.381049685716189e-08, + "loss": 3.749, + "step": 3284500 + }, + { + "epoch": 36.48, + "learning_rate": 3.379661506341203e-08, + "loss": 3.7545, + "step": 3285000 + }, + { + "epoch": 36.49, + "learning_rate": 3.378273326966217e-08, + "loss": 3.7678, + "step": 3285500 + }, + { + "epoch": 36.49, + "learning_rate": 3.376885147591231e-08, + "loss": 3.7347, + "step": 3286000 + }, + { + "epoch": 36.5, + "learning_rate": 3.375496968216245e-08, + "loss": 3.7648, + "step": 3286500 + }, + { + "epoch": 36.5, + "learning_rate": 3.374108788841259e-08, + "loss": 3.7561, + "step": 3287000 + }, + { + "epoch": 36.51, + "learning_rate": 3.372720609466272e-08, + "loss": 3.7523, + "step": 3287500 + }, + { + "epoch": 36.51, + "learning_rate": 3.3713324300912865e-08, + "loss": 3.7595, + "step": 3288000 + }, + { + "epoch": 36.52, + "learning_rate": 3.369944250716301e-08, + "loss": 3.7514, + "step": 3288500 + }, + { + "epoch": 36.53, + "learning_rate": 3.368556071341314e-08, + "loss": 3.7581, + "step": 3289000 + }, + { + "epoch": 36.53, + "learning_rate": 3.3671678919663285e-08, + "loss": 3.7511, + "step": 3289500 + }, + { + "epoch": 36.54, + "learning_rate": 3.365779712591342e-08, + "loss": 3.7545, + "step": 3290000 + }, + { + "epoch": 36.54, + "learning_rate": 3.3643915332163556e-08, + "loss": 3.7466, + "step": 3290500 + }, + { + "epoch": 36.55, + "learning_rate": 3.36300335384137e-08, + "loss": 3.7731, + "step": 3291000 + }, + { + "epoch": 36.55, + "learning_rate": 3.3616151744663834e-08, + "loss": 3.7558, + "step": 3291500 + }, + { + "epoch": 36.56, + "learning_rate": 3.3602269950913976e-08, + "loss": 3.7731, + "step": 3292000 + }, + { + "epoch": 36.56, + "learning_rate": 3.358838815716412e-08, + "loss": 3.761, + "step": 3292500 + }, + { + "epoch": 36.57, + "learning_rate": 3.3574506363414254e-08, + "loss": 3.7406, + "step": 3293000 + }, + { + "epoch": 36.58, + "learning_rate": 3.356062456966439e-08, + "loss": 3.7477, + "step": 3293500 + }, + { + "epoch": 36.58, + "learning_rate": 3.354674277591453e-08, + "loss": 3.7609, + "step": 3294000 + }, + { + "epoch": 36.59, + "learning_rate": 3.353286098216467e-08, + "loss": 3.7491, + "step": 3294500 + }, + { + "epoch": 36.59, + "learning_rate": 3.351897918841481e-08, + "loss": 3.7619, + "step": 3295000 + }, + { + "epoch": 36.6, + "learning_rate": 3.350509739466495e-08, + "loss": 3.7434, + "step": 3295500 + }, + { + "epoch": 36.6, + "learning_rate": 3.349121560091509e-08, + "loss": 3.7527, + "step": 3296000 + }, + { + "epoch": 36.61, + "learning_rate": 3.3477333807165224e-08, + "loss": 3.75, + "step": 3296500 + }, + { + "epoch": 36.61, + "learning_rate": 3.3463452013415366e-08, + "loss": 3.7352, + "step": 3297000 + }, + { + "epoch": 36.62, + "learning_rate": 3.34495702196655e-08, + "loss": 3.7685, + "step": 3297500 + }, + { + "epoch": 36.63, + "learning_rate": 3.3435688425915644e-08, + "loss": 3.759, + "step": 3298000 + }, + { + "epoch": 36.63, + "learning_rate": 3.342180663216578e-08, + "loss": 3.758, + "step": 3298500 + }, + { + "epoch": 36.64, + "learning_rate": 3.340792483841592e-08, + "loss": 3.7538, + "step": 3299000 + }, + { + "epoch": 36.64, + "learning_rate": 3.339404304466606e-08, + "loss": 3.7657, + "step": 3299500 + }, + { + "epoch": 36.65, + "learning_rate": 3.338016125091619e-08, + "loss": 3.7775, + "step": 3300000 + }, + { + "epoch": 36.65, + "learning_rate": 3.3366279457166335e-08, + "loss": 3.7829, + "step": 3300500 + }, + { + "epoch": 36.66, + "learning_rate": 3.335239766341648e-08, + "loss": 3.7693, + "step": 3301000 + }, + { + "epoch": 36.66, + "learning_rate": 3.333851586966661e-08, + "loss": 3.7751, + "step": 3301500 + }, + { + "epoch": 36.67, + "learning_rate": 3.3324634075916755e-08, + "loss": 3.741, + "step": 3302000 + }, + { + "epoch": 36.68, + "learning_rate": 3.331075228216689e-08, + "loss": 3.7541, + "step": 3302500 + }, + { + "epoch": 36.68, + "learning_rate": 3.3296870488417027e-08, + "loss": 3.7384, + "step": 3303000 + }, + { + "epoch": 36.69, + "learning_rate": 3.328298869466717e-08, + "loss": 3.7503, + "step": 3303500 + }, + { + "epoch": 36.69, + "learning_rate": 3.326910690091731e-08, + "loss": 3.7439, + "step": 3304000 + }, + { + "epoch": 36.7, + "learning_rate": 3.325522510716745e-08, + "loss": 3.7484, + "step": 3304500 + }, + { + "epoch": 36.7, + "learning_rate": 3.324134331341759e-08, + "loss": 3.7363, + "step": 3305000 + }, + { + "epoch": 36.71, + "learning_rate": 3.3227461519667725e-08, + "loss": 3.7508, + "step": 3305500 + }, + { + "epoch": 36.71, + "learning_rate": 3.321357972591786e-08, + "loss": 3.7536, + "step": 3306000 + }, + { + "epoch": 36.72, + "learning_rate": 3.3199697932168e-08, + "loss": 3.7773, + "step": 3306500 + }, + { + "epoch": 36.73, + "learning_rate": 3.318581613841814e-08, + "loss": 3.7575, + "step": 3307000 + }, + { + "epoch": 36.73, + "learning_rate": 3.317193434466828e-08, + "loss": 3.7656, + "step": 3307500 + }, + { + "epoch": 36.74, + "learning_rate": 3.3158052550918416e-08, + "loss": 3.7407, + "step": 3308000 + }, + { + "epoch": 36.74, + "learning_rate": 3.314417075716855e-08, + "loss": 3.7642, + "step": 3308500 + }, + { + "epoch": 36.75, + "learning_rate": 3.3130288963418694e-08, + "loss": 3.749, + "step": 3309000 + }, + { + "epoch": 36.75, + "learning_rate": 3.3116407169668836e-08, + "loss": 3.7476, + "step": 3309500 + }, + { + "epoch": 36.76, + "learning_rate": 3.310252537591897e-08, + "loss": 3.74, + "step": 3310000 + }, + { + "epoch": 36.76, + "learning_rate": 3.3088643582169114e-08, + "loss": 3.7618, + "step": 3310500 + }, + { + "epoch": 36.77, + "learning_rate": 3.307476178841925e-08, + "loss": 3.7678, + "step": 3311000 + }, + { + "epoch": 36.78, + "learning_rate": 3.3060879994669386e-08, + "loss": 3.7612, + "step": 3311500 + }, + { + "epoch": 36.78, + "learning_rate": 3.304699820091953e-08, + "loss": 3.7596, + "step": 3312000 + }, + { + "epoch": 36.79, + "learning_rate": 3.303311640716967e-08, + "loss": 3.7429, + "step": 3312500 + }, + { + "epoch": 36.79, + "learning_rate": 3.3019234613419806e-08, + "loss": 3.7553, + "step": 3313000 + }, + { + "epoch": 36.8, + "learning_rate": 3.300535281966995e-08, + "loss": 3.748, + "step": 3313500 + }, + { + "epoch": 36.8, + "learning_rate": 3.2991471025920084e-08, + "loss": 3.7643, + "step": 3314000 + }, + { + "epoch": 36.81, + "learning_rate": 3.297758923217022e-08, + "loss": 3.7431, + "step": 3314500 + }, + { + "epoch": 36.81, + "learning_rate": 3.296370743842036e-08, + "loss": 3.7361, + "step": 3315000 + }, + { + "epoch": 36.82, + "learning_rate": 3.29498256446705e-08, + "loss": 3.729, + "step": 3315500 + }, + { + "epoch": 36.83, + "learning_rate": 3.293594385092064e-08, + "loss": 3.7562, + "step": 3316000 + }, + { + "epoch": 36.83, + "learning_rate": 3.292206205717078e-08, + "loss": 3.7377, + "step": 3316500 + }, + { + "epoch": 36.84, + "learning_rate": 3.290818026342092e-08, + "loss": 3.7573, + "step": 3317000 + }, + { + "epoch": 36.84, + "learning_rate": 3.289429846967105e-08, + "loss": 3.7563, + "step": 3317500 + }, + { + "epoch": 36.85, + "learning_rate": 3.2880416675921195e-08, + "loss": 3.7548, + "step": 3318000 + }, + { + "epoch": 36.85, + "learning_rate": 3.286653488217133e-08, + "loss": 3.757, + "step": 3318500 + }, + { + "epoch": 36.86, + "learning_rate": 3.285265308842147e-08, + "loss": 3.7806, + "step": 3319000 + }, + { + "epoch": 36.86, + "learning_rate": 3.2838771294671615e-08, + "loss": 3.7635, + "step": 3319500 + }, + { + "epoch": 36.87, + "learning_rate": 3.282488950092175e-08, + "loss": 3.7515, + "step": 3320000 + }, + { + "epoch": 36.88, + "learning_rate": 3.281100770717189e-08, + "loss": 3.7389, + "step": 3320500 + }, + { + "epoch": 36.88, + "learning_rate": 3.279712591342202e-08, + "loss": 3.7553, + "step": 3321000 + }, + { + "epoch": 36.89, + "learning_rate": 3.2783244119672165e-08, + "loss": 3.7461, + "step": 3321500 + }, + { + "epoch": 36.89, + "learning_rate": 3.276936232592231e-08, + "loss": 3.7716, + "step": 3322000 + }, + { + "epoch": 36.9, + "learning_rate": 3.275548053217244e-08, + "loss": 3.7601, + "step": 3322500 + }, + { + "epoch": 36.9, + "learning_rate": 3.2741598738422585e-08, + "loss": 3.7518, + "step": 3323000 + }, + { + "epoch": 36.91, + "learning_rate": 3.272771694467272e-08, + "loss": 3.7527, + "step": 3323500 + }, + { + "epoch": 36.91, + "learning_rate": 3.2713835150922856e-08, + "loss": 3.7434, + "step": 3324000 + }, + { + "epoch": 36.92, + "learning_rate": 3.2699953357173e-08, + "loss": 3.7581, + "step": 3324500 + }, + { + "epoch": 36.93, + "learning_rate": 3.268607156342314e-08, + "loss": 3.7519, + "step": 3325000 + }, + { + "epoch": 36.93, + "learning_rate": 3.2672189769673276e-08, + "loss": 3.753, + "step": 3325500 + }, + { + "epoch": 36.94, + "learning_rate": 3.265830797592342e-08, + "loss": 3.7534, + "step": 3326000 + }, + { + "epoch": 36.94, + "learning_rate": 3.2644426182173554e-08, + "loss": 3.7557, + "step": 3326500 + }, + { + "epoch": 36.95, + "learning_rate": 3.263054438842369e-08, + "loss": 3.7656, + "step": 3327000 + }, + { + "epoch": 36.95, + "learning_rate": 3.261666259467383e-08, + "loss": 3.7492, + "step": 3327500 + }, + { + "epoch": 36.96, + "learning_rate": 3.260278080092397e-08, + "loss": 3.7355, + "step": 3328000 + }, + { + "epoch": 36.96, + "learning_rate": 3.258889900717411e-08, + "loss": 3.7599, + "step": 3328500 + }, + { + "epoch": 36.97, + "learning_rate": 3.257501721342425e-08, + "loss": 3.761, + "step": 3329000 + }, + { + "epoch": 36.98, + "learning_rate": 3.256113541967439e-08, + "loss": 3.7553, + "step": 3329500 + }, + { + "epoch": 36.98, + "learning_rate": 3.2547253625924524e-08, + "loss": 3.7394, + "step": 3330000 + }, + { + "epoch": 36.99, + "learning_rate": 3.2533371832174666e-08, + "loss": 3.7374, + "step": 3330500 + }, + { + "epoch": 36.99, + "learning_rate": 3.25194900384248e-08, + "loss": 3.7448, + "step": 3331000 + }, + { + "epoch": 37.0, + "learning_rate": 3.2505608244674944e-08, + "loss": 3.7357, + "step": 3331500 + }, + { + "epoch": 37.0, + "eval_loss": 3.8285868167877197, + "eval_runtime": 6.3016, + "eval_samples_per_second": 246.605, + "step": 3331702 + }, + { + "epoch": 37.0, + "learning_rate": 3.2491726450925086e-08, + "loss": 3.7674, + "step": 3332000 + }, + { + "epoch": 37.01, + "learning_rate": 3.247784465717522e-08, + "loss": 3.7627, + "step": 3332500 + }, + { + "epoch": 37.01, + "learning_rate": 3.246396286342536e-08, + "loss": 3.7528, + "step": 3333000 + }, + { + "epoch": 37.02, + "learning_rate": 3.24500810696755e-08, + "loss": 3.758, + "step": 3333500 + }, + { + "epoch": 37.03, + "learning_rate": 3.2436199275925635e-08, + "loss": 3.7433, + "step": 3334000 + }, + { + "epoch": 37.03, + "learning_rate": 3.242231748217578e-08, + "loss": 3.7402, + "step": 3334500 + }, + { + "epoch": 37.04, + "learning_rate": 3.240843568842592e-08, + "loss": 3.7674, + "step": 3335000 + }, + { + "epoch": 37.04, + "learning_rate": 3.2394553894676055e-08, + "loss": 3.7388, + "step": 3335500 + }, + { + "epoch": 37.05, + "learning_rate": 3.238067210092619e-08, + "loss": 3.7443, + "step": 3336000 + }, + { + "epoch": 37.05, + "learning_rate": 3.236679030717633e-08, + "loss": 3.7599, + "step": 3336500 + }, + { + "epoch": 37.06, + "learning_rate": 3.235290851342647e-08, + "loss": 3.7855, + "step": 3337000 + }, + { + "epoch": 37.06, + "learning_rate": 3.233902671967661e-08, + "loss": 3.7608, + "step": 3337500 + }, + { + "epoch": 37.07, + "learning_rate": 3.232514492592675e-08, + "loss": 3.7549, + "step": 3338000 + }, + { + "epoch": 37.08, + "learning_rate": 3.231126313217688e-08, + "loss": 3.748, + "step": 3338500 + }, + { + "epoch": 37.08, + "learning_rate": 3.2297381338427025e-08, + "loss": 3.7428, + "step": 3339000 + }, + { + "epoch": 37.09, + "learning_rate": 3.228349954467716e-08, + "loss": 3.7516, + "step": 3339500 + }, + { + "epoch": 37.09, + "learning_rate": 3.22696177509273e-08, + "loss": 3.7639, + "step": 3340000 + }, + { + "epoch": 37.1, + "learning_rate": 3.2255735957177445e-08, + "loss": 3.752, + "step": 3340500 + }, + { + "epoch": 37.1, + "learning_rate": 3.224185416342758e-08, + "loss": 3.7565, + "step": 3341000 + }, + { + "epoch": 37.11, + "learning_rate": 3.2227972369677716e-08, + "loss": 3.7648, + "step": 3341500 + }, + { + "epoch": 37.11, + "learning_rate": 3.221409057592786e-08, + "loss": 3.7706, + "step": 3342000 + }, + { + "epoch": 37.12, + "learning_rate": 3.2200208782177994e-08, + "loss": 3.7527, + "step": 3342500 + }, + { + "epoch": 37.13, + "learning_rate": 3.2186326988428136e-08, + "loss": 3.7359, + "step": 3343000 + }, + { + "epoch": 37.13, + "learning_rate": 3.217244519467827e-08, + "loss": 3.7478, + "step": 3343500 + }, + { + "epoch": 37.14, + "learning_rate": 3.2158563400928414e-08, + "loss": 3.7665, + "step": 3344000 + }, + { + "epoch": 37.14, + "learning_rate": 3.214468160717855e-08, + "loss": 3.7741, + "step": 3344500 + }, + { + "epoch": 37.15, + "learning_rate": 3.2130799813428686e-08, + "loss": 3.7599, + "step": 3345000 + }, + { + "epoch": 37.15, + "learning_rate": 3.211691801967883e-08, + "loss": 3.7647, + "step": 3345500 + }, + { + "epoch": 37.16, + "learning_rate": 3.210303622592897e-08, + "loss": 3.7407, + "step": 3346000 + }, + { + "epoch": 37.16, + "learning_rate": 3.2089154432179106e-08, + "loss": 3.7566, + "step": 3346500 + }, + { + "epoch": 37.17, + "learning_rate": 3.207527263842925e-08, + "loss": 3.7688, + "step": 3347000 + }, + { + "epoch": 37.18, + "learning_rate": 3.2061390844679384e-08, + "loss": 3.7577, + "step": 3347500 + }, + { + "epoch": 37.18, + "learning_rate": 3.204750905092952e-08, + "loss": 3.7662, + "step": 3348000 + }, + { + "epoch": 37.19, + "learning_rate": 3.203362725717966e-08, + "loss": 3.759, + "step": 3348500 + }, + { + "epoch": 37.19, + "learning_rate": 3.2019745463429804e-08, + "loss": 3.749, + "step": 3349000 + }, + { + "epoch": 37.2, + "learning_rate": 3.200586366967994e-08, + "loss": 3.7605, + "step": 3349500 + }, + { + "epoch": 37.2, + "learning_rate": 3.199198187593008e-08, + "loss": 3.7458, + "step": 3350000 + }, + { + "epoch": 37.21, + "learning_rate": 3.197810008218022e-08, + "loss": 3.7652, + "step": 3350500 + }, + { + "epoch": 37.21, + "learning_rate": 3.196421828843035e-08, + "loss": 3.7664, + "step": 3351000 + }, + { + "epoch": 37.22, + "learning_rate": 3.1950336494680495e-08, + "loss": 3.759, + "step": 3351500 + }, + { + "epoch": 37.23, + "learning_rate": 3.193645470093063e-08, + "loss": 3.7555, + "step": 3352000 + }, + { + "epoch": 37.23, + "learning_rate": 3.1922572907180773e-08, + "loss": 3.7478, + "step": 3352500 + }, + { + "epoch": 37.24, + "learning_rate": 3.1908691113430916e-08, + "loss": 3.7581, + "step": 3353000 + }, + { + "epoch": 37.24, + "learning_rate": 3.189480931968105e-08, + "loss": 3.7562, + "step": 3353500 + }, + { + "epoch": 37.25, + "learning_rate": 3.188092752593119e-08, + "loss": 3.7527, + "step": 3354000 + }, + { + "epoch": 37.25, + "learning_rate": 3.186704573218133e-08, + "loss": 3.762, + "step": 3354500 + }, + { + "epoch": 37.26, + "learning_rate": 3.1853163938431465e-08, + "loss": 3.7496, + "step": 3355000 + }, + { + "epoch": 37.26, + "learning_rate": 3.183928214468161e-08, + "loss": 3.7467, + "step": 3355500 + }, + { + "epoch": 37.27, + "learning_rate": 3.182540035093175e-08, + "loss": 3.7363, + "step": 3356000 + }, + { + "epoch": 37.28, + "learning_rate": 3.1811518557181885e-08, + "loss": 3.7561, + "step": 3356500 + }, + { + "epoch": 37.28, + "learning_rate": 3.179763676343202e-08, + "loss": 3.7629, + "step": 3357000 + }, + { + "epoch": 37.29, + "learning_rate": 3.1783754969682156e-08, + "loss": 3.7565, + "step": 3357500 + }, + { + "epoch": 37.29, + "learning_rate": 3.17698731759323e-08, + "loss": 3.7675, + "step": 3358000 + }, + { + "epoch": 37.3, + "learning_rate": 3.175599138218244e-08, + "loss": 3.7512, + "step": 3358500 + }, + { + "epoch": 37.3, + "learning_rate": 3.1742109588432576e-08, + "loss": 3.755, + "step": 3359000 + }, + { + "epoch": 37.31, + "learning_rate": 3.172822779468272e-08, + "loss": 3.7664, + "step": 3359500 + }, + { + "epoch": 37.31, + "learning_rate": 3.1714346000932854e-08, + "loss": 3.7526, + "step": 3360000 + }, + { + "epoch": 37.32, + "learning_rate": 3.170046420718299e-08, + "loss": 3.7588, + "step": 3360500 + }, + { + "epoch": 37.33, + "learning_rate": 3.168658241343313e-08, + "loss": 3.7548, + "step": 3361000 + }, + { + "epoch": 37.33, + "learning_rate": 3.1672700619683275e-08, + "loss": 3.7603, + "step": 3361500 + }, + { + "epoch": 37.34, + "learning_rate": 3.165881882593341e-08, + "loss": 3.7507, + "step": 3362000 + }, + { + "epoch": 37.34, + "learning_rate": 3.164493703218355e-08, + "loss": 3.7634, + "step": 3362500 + }, + { + "epoch": 37.35, + "learning_rate": 3.163105523843369e-08, + "loss": 3.7463, + "step": 3363000 + }, + { + "epoch": 37.35, + "learning_rate": 3.1617173444683824e-08, + "loss": 3.7564, + "step": 3363500 + }, + { + "epoch": 37.36, + "learning_rate": 3.1603291650933966e-08, + "loss": 3.7643, + "step": 3364000 + }, + { + "epoch": 37.36, + "learning_rate": 3.158940985718411e-08, + "loss": 3.7553, + "step": 3364500 + }, + { + "epoch": 37.37, + "learning_rate": 3.1575528063434244e-08, + "loss": 3.7384, + "step": 3365000 + }, + { + "epoch": 37.38, + "learning_rate": 3.1561646269684386e-08, + "loss": 3.7471, + "step": 3365500 + }, + { + "epoch": 37.38, + "learning_rate": 3.154776447593452e-08, + "loss": 3.7494, + "step": 3366000 + }, + { + "epoch": 37.39, + "learning_rate": 3.153388268218466e-08, + "loss": 3.7442, + "step": 3366500 + }, + { + "epoch": 37.39, + "learning_rate": 3.15200008884348e-08, + "loss": 3.7769, + "step": 3367000 + }, + { + "epoch": 37.4, + "learning_rate": 3.1506119094684935e-08, + "loss": 3.7337, + "step": 3367500 + }, + { + "epoch": 37.4, + "learning_rate": 3.149223730093508e-08, + "loss": 3.7813, + "step": 3368000 + }, + { + "epoch": 37.41, + "learning_rate": 3.147835550718522e-08, + "loss": 3.7316, + "step": 3368500 + }, + { + "epoch": 37.41, + "learning_rate": 3.146447371343535e-08, + "loss": 3.7523, + "step": 3369000 + }, + { + "epoch": 37.42, + "learning_rate": 3.145059191968549e-08, + "loss": 3.7453, + "step": 3369500 + }, + { + "epoch": 37.43, + "learning_rate": 3.1436710125935633e-08, + "loss": 3.7436, + "step": 3370000 + }, + { + "epoch": 37.43, + "learning_rate": 3.142282833218577e-08, + "loss": 3.7641, + "step": 3370500 + }, + { + "epoch": 37.44, + "learning_rate": 3.140894653843591e-08, + "loss": 3.7519, + "step": 3371000 + }, + { + "epoch": 37.44, + "learning_rate": 3.1395064744686054e-08, + "loss": 3.7592, + "step": 3371500 + }, + { + "epoch": 37.45, + "learning_rate": 3.138118295093618e-08, + "loss": 3.7397, + "step": 3372000 + }, + { + "epoch": 37.45, + "learning_rate": 3.1367301157186325e-08, + "loss": 3.7388, + "step": 3372500 + }, + { + "epoch": 37.46, + "learning_rate": 3.135341936343646e-08, + "loss": 3.7509, + "step": 3373000 + }, + { + "epoch": 37.46, + "learning_rate": 3.13395375696866e-08, + "loss": 3.7596, + "step": 3373500 + }, + { + "epoch": 37.47, + "learning_rate": 3.1325655775936745e-08, + "loss": 3.7439, + "step": 3374000 + }, + { + "epoch": 37.48, + "learning_rate": 3.131177398218688e-08, + "loss": 3.7576, + "step": 3374500 + }, + { + "epoch": 37.48, + "learning_rate": 3.1297892188437016e-08, + "loss": 3.7385, + "step": 3375000 + }, + { + "epoch": 37.49, + "learning_rate": 3.128401039468716e-08, + "loss": 3.7527, + "step": 3375500 + }, + { + "epoch": 37.49, + "learning_rate": 3.1270128600937294e-08, + "loss": 3.7459, + "step": 3376000 + }, + { + "epoch": 37.5, + "learning_rate": 3.1256246807187437e-08, + "loss": 3.7583, + "step": 3376500 + }, + { + "epoch": 37.5, + "learning_rate": 3.124236501343758e-08, + "loss": 3.7548, + "step": 3377000 + }, + { + "epoch": 37.51, + "learning_rate": 3.1228483219687714e-08, + "loss": 3.7378, + "step": 3377500 + }, + { + "epoch": 37.51, + "learning_rate": 3.121460142593785e-08, + "loss": 3.7376, + "step": 3378000 + }, + { + "epoch": 37.52, + "learning_rate": 3.120071963218799e-08, + "loss": 3.7718, + "step": 3378500 + }, + { + "epoch": 37.53, + "learning_rate": 3.118683783843813e-08, + "loss": 3.746, + "step": 3379000 + }, + { + "epoch": 37.53, + "learning_rate": 3.117295604468827e-08, + "loss": 3.7418, + "step": 3379500 + }, + { + "epoch": 37.54, + "learning_rate": 3.1159074250938406e-08, + "loss": 3.745, + "step": 3380000 + }, + { + "epoch": 37.54, + "learning_rate": 3.114519245718855e-08, + "loss": 3.7314, + "step": 3380500 + }, + { + "epoch": 37.55, + "learning_rate": 3.1131310663438684e-08, + "loss": 3.7621, + "step": 3381000 + }, + { + "epoch": 37.55, + "learning_rate": 3.111742886968882e-08, + "loss": 3.7492, + "step": 3381500 + }, + { + "epoch": 37.56, + "learning_rate": 3.110354707593896e-08, + "loss": 3.7492, + "step": 3382000 + }, + { + "epoch": 37.56, + "learning_rate": 3.1089665282189104e-08, + "loss": 3.7376, + "step": 3382500 + }, + { + "epoch": 37.57, + "learning_rate": 3.107578348843924e-08, + "loss": 3.7621, + "step": 3383000 + }, + { + "epoch": 37.58, + "learning_rate": 3.106190169468938e-08, + "loss": 3.7644, + "step": 3383500 + }, + { + "epoch": 37.58, + "learning_rate": 3.104801990093952e-08, + "loss": 3.7444, + "step": 3384000 + }, + { + "epoch": 37.59, + "learning_rate": 3.103413810718965e-08, + "loss": 3.7507, + "step": 3384500 + }, + { + "epoch": 37.59, + "learning_rate": 3.1020256313439795e-08, + "loss": 3.7446, + "step": 3385000 + }, + { + "epoch": 37.6, + "learning_rate": 3.100637451968994e-08, + "loss": 3.7623, + "step": 3385500 + }, + { + "epoch": 37.6, + "learning_rate": 3.0992492725940073e-08, + "loss": 3.7418, + "step": 3386000 + }, + { + "epoch": 37.61, + "learning_rate": 3.0978610932190216e-08, + "loss": 3.7487, + "step": 3386500 + }, + { + "epoch": 37.61, + "learning_rate": 3.096472913844035e-08, + "loss": 3.7393, + "step": 3387000 + }, + { + "epoch": 37.62, + "learning_rate": 3.095084734469049e-08, + "loss": 3.7549, + "step": 3387500 + }, + { + "epoch": 37.63, + "learning_rate": 3.093696555094063e-08, + "loss": 3.7511, + "step": 3388000 + }, + { + "epoch": 37.63, + "learning_rate": 3.0923083757190765e-08, + "loss": 3.754, + "step": 3388500 + }, + { + "epoch": 37.64, + "learning_rate": 3.090920196344091e-08, + "loss": 3.7522, + "step": 3389000 + }, + { + "epoch": 37.64, + "learning_rate": 3.089532016969105e-08, + "loss": 3.7507, + "step": 3389500 + }, + { + "epoch": 37.65, + "learning_rate": 3.0881438375941185e-08, + "loss": 3.757, + "step": 3390000 + }, + { + "epoch": 37.65, + "learning_rate": 3.086755658219132e-08, + "loss": 3.7612, + "step": 3390500 + }, + { + "epoch": 37.66, + "learning_rate": 3.085367478844146e-08, + "loss": 3.7552, + "step": 3391000 + }, + { + "epoch": 37.66, + "learning_rate": 3.08397929946916e-08, + "loss": 3.7644, + "step": 3391500 + }, + { + "epoch": 37.67, + "learning_rate": 3.082591120094174e-08, + "loss": 3.7576, + "step": 3392000 + }, + { + "epoch": 37.68, + "learning_rate": 3.081202940719188e-08, + "loss": 3.7415, + "step": 3392500 + }, + { + "epoch": 37.68, + "learning_rate": 3.079814761344202e-08, + "loss": 3.7595, + "step": 3393000 + }, + { + "epoch": 37.69, + "learning_rate": 3.0784265819692154e-08, + "loss": 3.7499, + "step": 3393500 + }, + { + "epoch": 37.69, + "learning_rate": 3.0770384025942297e-08, + "loss": 3.7425, + "step": 3394000 + }, + { + "epoch": 37.7, + "learning_rate": 3.075650223219243e-08, + "loss": 3.7547, + "step": 3394500 + }, + { + "epoch": 37.7, + "learning_rate": 3.0742620438442575e-08, + "loss": 3.7531, + "step": 3395000 + }, + { + "epoch": 37.71, + "learning_rate": 3.072873864469271e-08, + "loss": 3.7303, + "step": 3395500 + }, + { + "epoch": 37.71, + "learning_rate": 3.071485685094285e-08, + "loss": 3.7471, + "step": 3396000 + }, + { + "epoch": 37.72, + "learning_rate": 3.070097505719299e-08, + "loss": 3.7319, + "step": 3396500 + }, + { + "epoch": 37.73, + "learning_rate": 3.0687093263443124e-08, + "loss": 3.7544, + "step": 3397000 + }, + { + "epoch": 37.73, + "learning_rate": 3.0673211469693266e-08, + "loss": 3.7547, + "step": 3397500 + }, + { + "epoch": 37.74, + "learning_rate": 3.065932967594341e-08, + "loss": 3.7591, + "step": 3398000 + }, + { + "epoch": 37.74, + "learning_rate": 3.0645447882193544e-08, + "loss": 3.7565, + "step": 3398500 + }, + { + "epoch": 37.75, + "learning_rate": 3.0631566088443686e-08, + "loss": 3.7447, + "step": 3399000 + }, + { + "epoch": 37.75, + "learning_rate": 3.061768429469382e-08, + "loss": 3.744, + "step": 3399500 + }, + { + "epoch": 37.76, + "learning_rate": 3.060380250094396e-08, + "loss": 3.7565, + "step": 3400000 + }, + { + "epoch": 37.76, + "learning_rate": 3.05899207071941e-08, + "loss": 3.7659, + "step": 3400500 + }, + { + "epoch": 37.77, + "learning_rate": 3.057603891344424e-08, + "loss": 3.7541, + "step": 3401000 + }, + { + "epoch": 37.78, + "learning_rate": 3.056215711969438e-08, + "loss": 3.769, + "step": 3401500 + }, + { + "epoch": 37.78, + "learning_rate": 3.054827532594452e-08, + "loss": 3.7546, + "step": 3402000 + }, + { + "epoch": 37.79, + "learning_rate": 3.053439353219465e-08, + "loss": 3.7453, + "step": 3402500 + }, + { + "epoch": 37.79, + "learning_rate": 3.052051173844479e-08, + "loss": 3.743, + "step": 3403000 + }, + { + "epoch": 37.8, + "learning_rate": 3.0506629944694934e-08, + "loss": 3.745, + "step": 3403500 + }, + { + "epoch": 37.8, + "learning_rate": 3.049274815094507e-08, + "loss": 3.7554, + "step": 3404000 + }, + { + "epoch": 37.81, + "learning_rate": 3.047886635719521e-08, + "loss": 3.7689, + "step": 3404500 + }, + { + "epoch": 37.81, + "learning_rate": 3.0464984563445354e-08, + "loss": 3.7539, + "step": 3405000 + }, + { + "epoch": 37.82, + "learning_rate": 3.045110276969548e-08, + "loss": 3.734, + "step": 3405500 + }, + { + "epoch": 37.83, + "learning_rate": 3.0437220975945625e-08, + "loss": 3.7547, + "step": 3406000 + }, + { + "epoch": 37.83, + "learning_rate": 3.042333918219577e-08, + "loss": 3.76, + "step": 3406500 + }, + { + "epoch": 37.84, + "learning_rate": 3.04094573884459e-08, + "loss": 3.7463, + "step": 3407000 + }, + { + "epoch": 37.84, + "learning_rate": 3.0395575594696045e-08, + "loss": 3.7683, + "step": 3407500 + }, + { + "epoch": 37.85, + "learning_rate": 3.038169380094619e-08, + "loss": 3.7528, + "step": 3408000 + }, + { + "epoch": 37.85, + "learning_rate": 3.0367812007196316e-08, + "loss": 3.7601, + "step": 3408500 + }, + { + "epoch": 37.86, + "learning_rate": 3.035393021344646e-08, + "loss": 3.7699, + "step": 3409000 + }, + { + "epoch": 37.86, + "learning_rate": 3.0340048419696594e-08, + "loss": 3.7559, + "step": 3409500 + }, + { + "epoch": 37.87, + "learning_rate": 3.0326166625946737e-08, + "loss": 3.7554, + "step": 3410000 + }, + { + "epoch": 37.88, + "learning_rate": 3.031228483219688e-08, + "loss": 3.7509, + "step": 3410500 + }, + { + "epoch": 37.88, + "learning_rate": 3.0298403038447015e-08, + "loss": 3.7593, + "step": 3411000 + }, + { + "epoch": 37.89, + "learning_rate": 3.028452124469715e-08, + "loss": 3.7492, + "step": 3411500 + }, + { + "epoch": 37.89, + "learning_rate": 3.027063945094729e-08, + "loss": 3.7598, + "step": 3412000 + }, + { + "epoch": 37.9, + "learning_rate": 3.025675765719743e-08, + "loss": 3.7339, + "step": 3412500 + }, + { + "epoch": 37.9, + "learning_rate": 3.024287586344757e-08, + "loss": 3.7586, + "step": 3413000 + }, + { + "epoch": 37.91, + "learning_rate": 3.022899406969771e-08, + "loss": 3.7677, + "step": 3413500 + }, + { + "epoch": 37.91, + "learning_rate": 3.021511227594785e-08, + "loss": 3.754, + "step": 3414000 + }, + { + "epoch": 37.92, + "learning_rate": 3.0201230482197984e-08, + "loss": 3.7471, + "step": 3414500 + }, + { + "epoch": 37.93, + "learning_rate": 3.0187348688448126e-08, + "loss": 3.7551, + "step": 3415000 + }, + { + "epoch": 37.93, + "learning_rate": 3.017346689469826e-08, + "loss": 3.7552, + "step": 3415500 + }, + { + "epoch": 37.94, + "learning_rate": 3.0159585100948404e-08, + "loss": 3.7701, + "step": 3416000 + }, + { + "epoch": 37.94, + "learning_rate": 3.014570330719854e-08, + "loss": 3.7689, + "step": 3416500 + }, + { + "epoch": 37.95, + "learning_rate": 3.013182151344868e-08, + "loss": 3.7536, + "step": 3417000 + }, + { + "epoch": 37.95, + "learning_rate": 3.011793971969882e-08, + "loss": 3.7631, + "step": 3417500 + }, + { + "epoch": 37.96, + "learning_rate": 3.010405792594895e-08, + "loss": 3.7546, + "step": 3418000 + }, + { + "epoch": 37.96, + "learning_rate": 3.0090176132199096e-08, + "loss": 3.7525, + "step": 3418500 + }, + { + "epoch": 37.97, + "learning_rate": 3.007629433844924e-08, + "loss": 3.7527, + "step": 3419000 + }, + { + "epoch": 37.98, + "learning_rate": 3.0062412544699373e-08, + "loss": 3.731, + "step": 3419500 + }, + { + "epoch": 37.98, + "learning_rate": 3.0048530750949516e-08, + "loss": 3.7615, + "step": 3420000 + }, + { + "epoch": 37.99, + "learning_rate": 3.003464895719965e-08, + "loss": 3.7465, + "step": 3420500 + }, + { + "epoch": 37.99, + "learning_rate": 3.002076716344979e-08, + "loss": 3.7562, + "step": 3421000 + }, + { + "epoch": 38.0, + "learning_rate": 3.000688536969993e-08, + "loss": 3.7605, + "step": 3421500 + }, + { + "epoch": 38.0, + "eval_loss": 3.8280832767486572, + "eval_runtime": 6.3072, + "eval_samples_per_second": 246.384, + "step": 3421748 + }, + { + "epoch": 38.0, + "learning_rate": 2.999300357595007e-08, + "loss": 3.7658, + "step": 3422000 + }, + { + "epoch": 38.01, + "learning_rate": 2.997912178220021e-08, + "loss": 3.7584, + "step": 3422500 + }, + { + "epoch": 38.01, + "learning_rate": 2.996523998845035e-08, + "loss": 3.7427, + "step": 3423000 + }, + { + "epoch": 38.02, + "learning_rate": 2.9951358194700485e-08, + "loss": 3.735, + "step": 3423500 + }, + { + "epoch": 38.03, + "learning_rate": 2.993747640095062e-08, + "loss": 3.735, + "step": 3424000 + }, + { + "epoch": 38.03, + "learning_rate": 2.992359460720076e-08, + "loss": 3.739, + "step": 3424500 + }, + { + "epoch": 38.04, + "learning_rate": 2.99097128134509e-08, + "loss": 3.7649, + "step": 3425000 + }, + { + "epoch": 38.04, + "learning_rate": 2.989583101970104e-08, + "loss": 3.7447, + "step": 3425500 + }, + { + "epoch": 38.05, + "learning_rate": 2.988194922595118e-08, + "loss": 3.7576, + "step": 3426000 + }, + { + "epoch": 38.05, + "learning_rate": 2.986806743220132e-08, + "loss": 3.7477, + "step": 3426500 + }, + { + "epoch": 38.06, + "learning_rate": 2.9854185638451454e-08, + "loss": 3.7629, + "step": 3427000 + }, + { + "epoch": 38.06, + "learning_rate": 2.98403038447016e-08, + "loss": 3.7544, + "step": 3427500 + }, + { + "epoch": 38.07, + "learning_rate": 2.982642205095173e-08, + "loss": 3.7341, + "step": 3428000 + }, + { + "epoch": 38.07, + "learning_rate": 2.9812540257201875e-08, + "loss": 3.7559, + "step": 3428500 + }, + { + "epoch": 38.08, + "learning_rate": 2.9798658463452014e-08, + "loss": 3.7492, + "step": 3429000 + }, + { + "epoch": 38.09, + "learning_rate": 2.978477666970215e-08, + "loss": 3.7555, + "step": 3429500 + }, + { + "epoch": 38.09, + "learning_rate": 2.977089487595229e-08, + "loss": 3.7478, + "step": 3430000 + }, + { + "epoch": 38.1, + "learning_rate": 2.975701308220243e-08, + "loss": 3.764, + "step": 3430500 + }, + { + "epoch": 38.1, + "learning_rate": 2.9743131288452566e-08, + "loss": 3.749, + "step": 3431000 + }, + { + "epoch": 38.11, + "learning_rate": 2.972924949470271e-08, + "loss": 3.7642, + "step": 3431500 + }, + { + "epoch": 38.11, + "learning_rate": 2.9715367700952844e-08, + "loss": 3.7605, + "step": 3432000 + }, + { + "epoch": 38.12, + "learning_rate": 2.9701485907202983e-08, + "loss": 3.7785, + "step": 3432500 + }, + { + "epoch": 38.12, + "learning_rate": 2.9687604113453125e-08, + "loss": 3.7455, + "step": 3433000 + }, + { + "epoch": 38.13, + "learning_rate": 2.967372231970326e-08, + "loss": 3.7649, + "step": 3433500 + }, + { + "epoch": 38.14, + "learning_rate": 2.96598405259534e-08, + "loss": 3.7552, + "step": 3434000 + }, + { + "epoch": 38.14, + "learning_rate": 2.9645958732203542e-08, + "loss": 3.7456, + "step": 3434500 + }, + { + "epoch": 38.15, + "learning_rate": 2.9632076938453678e-08, + "loss": 3.7597, + "step": 3435000 + }, + { + "epoch": 38.15, + "learning_rate": 2.9618195144703817e-08, + "loss": 3.7324, + "step": 3435500 + }, + { + "epoch": 38.16, + "learning_rate": 2.960431335095396e-08, + "loss": 3.7703, + "step": 3436000 + }, + { + "epoch": 38.16, + "learning_rate": 2.9590431557204095e-08, + "loss": 3.7595, + "step": 3436500 + }, + { + "epoch": 38.17, + "learning_rate": 2.9576549763454234e-08, + "loss": 3.733, + "step": 3437000 + }, + { + "epoch": 38.17, + "learning_rate": 2.9562667969704376e-08, + "loss": 3.7789, + "step": 3437500 + }, + { + "epoch": 38.18, + "learning_rate": 2.9548786175954508e-08, + "loss": 3.7548, + "step": 3438000 + }, + { + "epoch": 38.19, + "learning_rate": 2.953490438220465e-08, + "loss": 3.7639, + "step": 3438500 + }, + { + "epoch": 38.19, + "learning_rate": 2.9521022588454786e-08, + "loss": 3.773, + "step": 3439000 + }, + { + "epoch": 38.2, + "learning_rate": 2.9507140794704925e-08, + "loss": 3.7394, + "step": 3439500 + }, + { + "epoch": 38.2, + "learning_rate": 2.9493259000955067e-08, + "loss": 3.7502, + "step": 3440000 + }, + { + "epoch": 38.21, + "learning_rate": 2.9479377207205203e-08, + "loss": 3.7541, + "step": 3440500 + }, + { + "epoch": 38.21, + "learning_rate": 2.9465495413455342e-08, + "loss": 3.7506, + "step": 3441000 + }, + { + "epoch": 38.22, + "learning_rate": 2.9451613619705484e-08, + "loss": 3.7602, + "step": 3441500 + }, + { + "epoch": 38.22, + "learning_rate": 2.943773182595562e-08, + "loss": 3.7532, + "step": 3442000 + }, + { + "epoch": 38.23, + "learning_rate": 2.942385003220576e-08, + "loss": 3.7475, + "step": 3442500 + }, + { + "epoch": 38.24, + "learning_rate": 2.94099682384559e-08, + "loss": 3.7576, + "step": 3443000 + }, + { + "epoch": 38.24, + "learning_rate": 2.9396086444706037e-08, + "loss": 3.749, + "step": 3443500 + }, + { + "epoch": 38.25, + "learning_rate": 2.9382204650956176e-08, + "loss": 3.7464, + "step": 3444000 + }, + { + "epoch": 38.25, + "learning_rate": 2.9368322857206318e-08, + "loss": 3.7677, + "step": 3444500 + }, + { + "epoch": 38.26, + "learning_rate": 2.9354441063456454e-08, + "loss": 3.7481, + "step": 3445000 + }, + { + "epoch": 38.26, + "learning_rate": 2.9340559269706593e-08, + "loss": 3.7454, + "step": 3445500 + }, + { + "epoch": 38.27, + "learning_rate": 2.9326677475956728e-08, + "loss": 3.766, + "step": 3446000 + }, + { + "epoch": 38.27, + "learning_rate": 2.931279568220687e-08, + "loss": 3.746, + "step": 3446500 + }, + { + "epoch": 38.28, + "learning_rate": 2.929891388845701e-08, + "loss": 3.7496, + "step": 3447000 + }, + { + "epoch": 38.29, + "learning_rate": 2.9285032094707145e-08, + "loss": 3.7606, + "step": 3447500 + }, + { + "epoch": 38.29, + "learning_rate": 2.9271150300957287e-08, + "loss": 3.7598, + "step": 3448000 + }, + { + "epoch": 38.3, + "learning_rate": 2.9257268507207426e-08, + "loss": 3.7538, + "step": 3448500 + }, + { + "epoch": 38.3, + "learning_rate": 2.9243386713457562e-08, + "loss": 3.7618, + "step": 3449000 + }, + { + "epoch": 38.31, + "learning_rate": 2.9229504919707704e-08, + "loss": 3.7546, + "step": 3449500 + }, + { + "epoch": 38.31, + "learning_rate": 2.9215623125957843e-08, + "loss": 3.7478, + "step": 3450000 + }, + { + "epoch": 38.32, + "learning_rate": 2.920174133220798e-08, + "loss": 3.7631, + "step": 3450500 + }, + { + "epoch": 38.32, + "learning_rate": 2.918785953845812e-08, + "loss": 3.76, + "step": 3451000 + }, + { + "epoch": 38.33, + "learning_rate": 2.917397774470826e-08, + "loss": 3.7539, + "step": 3451500 + }, + { + "epoch": 38.34, + "learning_rate": 2.9160095950958396e-08, + "loss": 3.745, + "step": 3452000 + }, + { + "epoch": 38.34, + "learning_rate": 2.9146214157208538e-08, + "loss": 3.7284, + "step": 3452500 + }, + { + "epoch": 38.35, + "learning_rate": 2.9132332363458674e-08, + "loss": 3.7579, + "step": 3453000 + }, + { + "epoch": 38.35, + "learning_rate": 2.9118450569708813e-08, + "loss": 3.7519, + "step": 3453500 + }, + { + "epoch": 38.36, + "learning_rate": 2.9104568775958955e-08, + "loss": 3.7567, + "step": 3454000 + }, + { + "epoch": 38.36, + "learning_rate": 2.909068698220909e-08, + "loss": 3.7407, + "step": 3454500 + }, + { + "epoch": 38.37, + "learning_rate": 2.907680518845923e-08, + "loss": 3.7542, + "step": 3455000 + }, + { + "epoch": 38.37, + "learning_rate": 2.906292339470937e-08, + "loss": 3.7651, + "step": 3455500 + }, + { + "epoch": 38.38, + "learning_rate": 2.9049041600959507e-08, + "loss": 3.7661, + "step": 3456000 + }, + { + "epoch": 38.39, + "learning_rate": 2.9035159807209646e-08, + "loss": 3.7559, + "step": 3456500 + }, + { + "epoch": 38.39, + "learning_rate": 2.902127801345979e-08, + "loss": 3.7596, + "step": 3457000 + }, + { + "epoch": 38.4, + "learning_rate": 2.9007396219709924e-08, + "loss": 3.753, + "step": 3457500 + }, + { + "epoch": 38.4, + "learning_rate": 2.8993514425960063e-08, + "loss": 3.7527, + "step": 3458000 + }, + { + "epoch": 38.41, + "learning_rate": 2.8979632632210205e-08, + "loss": 3.7503, + "step": 3458500 + }, + { + "epoch": 38.41, + "learning_rate": 2.896575083846034e-08, + "loss": 3.7453, + "step": 3459000 + }, + { + "epoch": 38.42, + "learning_rate": 2.895186904471048e-08, + "loss": 3.7524, + "step": 3459500 + }, + { + "epoch": 38.42, + "learning_rate": 2.8937987250960622e-08, + "loss": 3.7489, + "step": 3460000 + }, + { + "epoch": 38.43, + "learning_rate": 2.8924105457210758e-08, + "loss": 3.745, + "step": 3460500 + }, + { + "epoch": 38.44, + "learning_rate": 2.8910223663460897e-08, + "loss": 3.765, + "step": 3461000 + }, + { + "epoch": 38.44, + "learning_rate": 2.8896341869711032e-08, + "loss": 3.7431, + "step": 3461500 + }, + { + "epoch": 38.45, + "learning_rate": 2.8882460075961175e-08, + "loss": 3.7287, + "step": 3462000 + }, + { + "epoch": 38.45, + "learning_rate": 2.8868578282211314e-08, + "loss": 3.7557, + "step": 3462500 + }, + { + "epoch": 38.46, + "learning_rate": 2.885469648846145e-08, + "loss": 3.7507, + "step": 3463000 + }, + { + "epoch": 38.46, + "learning_rate": 2.884081469471159e-08, + "loss": 3.7603, + "step": 3463500 + }, + { + "epoch": 38.47, + "learning_rate": 2.882693290096173e-08, + "loss": 3.7438, + "step": 3464000 + }, + { + "epoch": 38.47, + "learning_rate": 2.8813051107211866e-08, + "loss": 3.7376, + "step": 3464500 + }, + { + "epoch": 38.48, + "learning_rate": 2.879916931346201e-08, + "loss": 3.7416, + "step": 3465000 + }, + { + "epoch": 38.49, + "learning_rate": 2.8785287519712147e-08, + "loss": 3.7641, + "step": 3465500 + }, + { + "epoch": 38.49, + "learning_rate": 2.8771405725962283e-08, + "loss": 3.7652, + "step": 3466000 + }, + { + "epoch": 38.5, + "learning_rate": 2.8757523932212425e-08, + "loss": 3.7758, + "step": 3466500 + }, + { + "epoch": 38.5, + "learning_rate": 2.8743642138462564e-08, + "loss": 3.7463, + "step": 3467000 + }, + { + "epoch": 38.51, + "learning_rate": 2.87297603447127e-08, + "loss": 3.7515, + "step": 3467500 + }, + { + "epoch": 38.51, + "learning_rate": 2.8715878550962842e-08, + "loss": 3.7448, + "step": 3468000 + }, + { + "epoch": 38.52, + "learning_rate": 2.8701996757212975e-08, + "loss": 3.7526, + "step": 3468500 + }, + { + "epoch": 38.52, + "learning_rate": 2.8688114963463117e-08, + "loss": 3.7561, + "step": 3469000 + }, + { + "epoch": 38.53, + "learning_rate": 2.867423316971326e-08, + "loss": 3.7667, + "step": 3469500 + }, + { + "epoch": 38.54, + "learning_rate": 2.866035137596339e-08, + "loss": 3.7514, + "step": 3470000 + }, + { + "epoch": 38.54, + "learning_rate": 2.8646469582213534e-08, + "loss": 3.7546, + "step": 3470500 + }, + { + "epoch": 38.55, + "learning_rate": 2.8632587788463676e-08, + "loss": 3.7418, + "step": 3471000 + }, + { + "epoch": 38.55, + "learning_rate": 2.8618705994713808e-08, + "loss": 3.7401, + "step": 3471500 + }, + { + "epoch": 38.56, + "learning_rate": 2.860482420096395e-08, + "loss": 3.7646, + "step": 3472000 + }, + { + "epoch": 38.56, + "learning_rate": 2.8590942407214093e-08, + "loss": 3.751, + "step": 3472500 + }, + { + "epoch": 38.57, + "learning_rate": 2.8577060613464225e-08, + "loss": 3.7566, + "step": 3473000 + }, + { + "epoch": 38.57, + "learning_rate": 2.8563178819714367e-08, + "loss": 3.7523, + "step": 3473500 + }, + { + "epoch": 38.58, + "learning_rate": 2.854929702596451e-08, + "loss": 3.7539, + "step": 3474000 + }, + { + "epoch": 38.59, + "learning_rate": 2.8535415232214642e-08, + "loss": 3.7471, + "step": 3474500 + }, + { + "epoch": 38.59, + "learning_rate": 2.8521533438464784e-08, + "loss": 3.744, + "step": 3475000 + }, + { + "epoch": 38.6, + "learning_rate": 2.850765164471492e-08, + "loss": 3.7605, + "step": 3475500 + }, + { + "epoch": 38.6, + "learning_rate": 2.849376985096506e-08, + "loss": 3.7393, + "step": 3476000 + }, + { + "epoch": 38.61, + "learning_rate": 2.84798880572152e-08, + "loss": 3.7382, + "step": 3476500 + }, + { + "epoch": 38.61, + "learning_rate": 2.8466006263465337e-08, + "loss": 3.736, + "step": 3477000 + }, + { + "epoch": 38.62, + "learning_rate": 2.8452124469715476e-08, + "loss": 3.7554, + "step": 3477500 + }, + { + "epoch": 38.62, + "learning_rate": 2.8438242675965618e-08, + "loss": 3.7476, + "step": 3478000 + }, + { + "epoch": 38.63, + "learning_rate": 2.8424360882215754e-08, + "loss": 3.7556, + "step": 3478500 + }, + { + "epoch": 38.64, + "learning_rate": 2.8410479088465893e-08, + "loss": 3.7514, + "step": 3479000 + }, + { + "epoch": 38.64, + "learning_rate": 2.8396597294716035e-08, + "loss": 3.7473, + "step": 3479500 + }, + { + "epoch": 38.65, + "learning_rate": 2.838271550096617e-08, + "loss": 3.7356, + "step": 3480000 + }, + { + "epoch": 38.65, + "learning_rate": 2.836883370721631e-08, + "loss": 3.7608, + "step": 3480500 + }, + { + "epoch": 38.66, + "learning_rate": 2.8354951913466452e-08, + "loss": 3.7694, + "step": 3481000 + }, + { + "epoch": 38.66, + "learning_rate": 2.8341070119716587e-08, + "loss": 3.7454, + "step": 3481500 + }, + { + "epoch": 38.67, + "learning_rate": 2.8327188325966726e-08, + "loss": 3.7566, + "step": 3482000 + }, + { + "epoch": 38.67, + "learning_rate": 2.8313306532216862e-08, + "loss": 3.743, + "step": 3482500 + }, + { + "epoch": 38.68, + "learning_rate": 2.8299424738467004e-08, + "loss": 3.745, + "step": 3483000 + }, + { + "epoch": 38.69, + "learning_rate": 2.8285542944717143e-08, + "loss": 3.7596, + "step": 3483500 + }, + { + "epoch": 38.69, + "learning_rate": 2.827166115096728e-08, + "loss": 3.7651, + "step": 3484000 + }, + { + "epoch": 38.7, + "learning_rate": 2.825777935721742e-08, + "loss": 3.752, + "step": 3484500 + }, + { + "epoch": 38.7, + "learning_rate": 2.824389756346756e-08, + "loss": 3.746, + "step": 3485000 + }, + { + "epoch": 38.71, + "learning_rate": 2.8230015769717696e-08, + "loss": 3.7606, + "step": 3485500 + }, + { + "epoch": 38.71, + "learning_rate": 2.8216133975967838e-08, + "loss": 3.7707, + "step": 3486000 + }, + { + "epoch": 38.72, + "learning_rate": 2.8202252182217977e-08, + "loss": 3.742, + "step": 3486500 + }, + { + "epoch": 38.72, + "learning_rate": 2.8188370388468113e-08, + "loss": 3.745, + "step": 3487000 + }, + { + "epoch": 38.73, + "learning_rate": 2.8174488594718255e-08, + "loss": 3.7541, + "step": 3487500 + }, + { + "epoch": 38.74, + "learning_rate": 2.8160606800968394e-08, + "loss": 3.7565, + "step": 3488000 + }, + { + "epoch": 38.74, + "learning_rate": 2.814672500721853e-08, + "loss": 3.7212, + "step": 3488500 + }, + { + "epoch": 38.75, + "learning_rate": 2.8132843213468672e-08, + "loss": 3.7444, + "step": 3489000 + }, + { + "epoch": 38.75, + "learning_rate": 2.811896141971881e-08, + "loss": 3.75, + "step": 3489500 + }, + { + "epoch": 38.76, + "learning_rate": 2.8105079625968946e-08, + "loss": 3.7609, + "step": 3490000 + }, + { + "epoch": 38.76, + "learning_rate": 2.809119783221909e-08, + "loss": 3.7626, + "step": 3490500 + }, + { + "epoch": 38.77, + "learning_rate": 2.8077316038469224e-08, + "loss": 3.7413, + "step": 3491000 + }, + { + "epoch": 38.77, + "learning_rate": 2.8063434244719363e-08, + "loss": 3.739, + "step": 3491500 + }, + { + "epoch": 38.78, + "learning_rate": 2.8049552450969505e-08, + "loss": 3.7489, + "step": 3492000 + }, + { + "epoch": 38.79, + "learning_rate": 2.803567065721964e-08, + "loss": 3.7706, + "step": 3492500 + }, + { + "epoch": 38.79, + "learning_rate": 2.802178886346978e-08, + "loss": 3.7609, + "step": 3493000 + }, + { + "epoch": 38.8, + "learning_rate": 2.8007907069719922e-08, + "loss": 3.7575, + "step": 3493500 + }, + { + "epoch": 38.8, + "learning_rate": 2.7994025275970058e-08, + "loss": 3.7854, + "step": 3494000 + }, + { + "epoch": 38.81, + "learning_rate": 2.7980143482220197e-08, + "loss": 3.7427, + "step": 3494500 + }, + { + "epoch": 38.81, + "learning_rate": 2.796626168847034e-08, + "loss": 3.7524, + "step": 3495000 + }, + { + "epoch": 38.82, + "learning_rate": 2.7952379894720475e-08, + "loss": 3.7357, + "step": 3495500 + }, + { + "epoch": 38.82, + "learning_rate": 2.7938498100970614e-08, + "loss": 3.7485, + "step": 3496000 + }, + { + "epoch": 38.83, + "learning_rate": 2.7924616307220756e-08, + "loss": 3.7661, + "step": 3496500 + }, + { + "epoch": 38.84, + "learning_rate": 2.7910734513470892e-08, + "loss": 3.7673, + "step": 3497000 + }, + { + "epoch": 38.84, + "learning_rate": 2.789685271972103e-08, + "loss": 3.7539, + "step": 3497500 + }, + { + "epoch": 38.85, + "learning_rate": 2.7882970925971166e-08, + "loss": 3.7616, + "step": 3498000 + }, + { + "epoch": 38.85, + "learning_rate": 2.786908913222131e-08, + "loss": 3.7651, + "step": 3498500 + }, + { + "epoch": 38.86, + "learning_rate": 2.7855207338471448e-08, + "loss": 3.7554, + "step": 3499000 + }, + { + "epoch": 38.86, + "learning_rate": 2.7841325544721583e-08, + "loss": 3.7668, + "step": 3499500 + }, + { + "epoch": 38.87, + "learning_rate": 2.7827443750971725e-08, + "loss": 3.7682, + "step": 3500000 + }, + { + "epoch": 38.87, + "learning_rate": 2.7813561957221864e-08, + "loss": 3.7428, + "step": 3500500 + }, + { + "epoch": 38.88, + "learning_rate": 2.7799680163472e-08, + "loss": 3.7513, + "step": 3501000 + }, + { + "epoch": 38.89, + "learning_rate": 2.7785798369722142e-08, + "loss": 3.7702, + "step": 3501500 + }, + { + "epoch": 38.89, + "learning_rate": 2.777191657597228e-08, + "loss": 3.7647, + "step": 3502000 + }, + { + "epoch": 38.9, + "learning_rate": 2.7758034782222417e-08, + "loss": 3.7353, + "step": 3502500 + }, + { + "epoch": 38.9, + "learning_rate": 2.774415298847256e-08, + "loss": 3.7527, + "step": 3503000 + }, + { + "epoch": 38.91, + "learning_rate": 2.7730271194722698e-08, + "loss": 3.7537, + "step": 3503500 + }, + { + "epoch": 38.91, + "learning_rate": 2.7716389400972834e-08, + "loss": 3.7481, + "step": 3504000 + }, + { + "epoch": 38.92, + "learning_rate": 2.7702507607222976e-08, + "loss": 3.7445, + "step": 3504500 + }, + { + "epoch": 38.92, + "learning_rate": 2.768862581347311e-08, + "loss": 3.7493, + "step": 3505000 + }, + { + "epoch": 38.93, + "learning_rate": 2.767474401972325e-08, + "loss": 3.7441, + "step": 3505500 + }, + { + "epoch": 38.94, + "learning_rate": 2.7660862225973393e-08, + "loss": 3.7656, + "step": 3506000 + }, + { + "epoch": 38.94, + "learning_rate": 2.7646980432223525e-08, + "loss": 3.7527, + "step": 3506500 + }, + { + "epoch": 38.95, + "learning_rate": 2.7633098638473667e-08, + "loss": 3.7352, + "step": 3507000 + }, + { + "epoch": 38.95, + "learning_rate": 2.761921684472381e-08, + "loss": 3.7693, + "step": 3507500 + }, + { + "epoch": 38.96, + "learning_rate": 2.7605335050973942e-08, + "loss": 3.7429, + "step": 3508000 + }, + { + "epoch": 38.96, + "learning_rate": 2.7591453257224084e-08, + "loss": 3.7384, + "step": 3508500 + }, + { + "epoch": 38.97, + "learning_rate": 2.7577571463474227e-08, + "loss": 3.748, + "step": 3509000 + }, + { + "epoch": 38.97, + "learning_rate": 2.756368966972436e-08, + "loss": 3.7727, + "step": 3509500 + }, + { + "epoch": 38.98, + "learning_rate": 2.75498078759745e-08, + "loss": 3.7509, + "step": 3510000 + }, + { + "epoch": 38.99, + "learning_rate": 2.7535926082224643e-08, + "loss": 3.7379, + "step": 3510500 + }, + { + "epoch": 38.99, + "learning_rate": 2.7522044288474776e-08, + "loss": 3.7386, + "step": 3511000 + }, + { + "epoch": 39.0, + "learning_rate": 2.7508162494724918e-08, + "loss": 3.7521, + "step": 3511500 + }, + { + "epoch": 39.0, + "eval_loss": 3.82729172706604, + "eval_runtime": 6.3024, + "eval_samples_per_second": 246.573, + "step": 3511794 + }, + { + "epoch": 39.0, + "learning_rate": 2.7494280700975054e-08, + "loss": 3.7364, + "step": 3512000 + }, + { + "epoch": 39.01, + "learning_rate": 2.7480398907225193e-08, + "loss": 3.7451, + "step": 3512500 + }, + { + "epoch": 39.01, + "learning_rate": 2.7466517113475335e-08, + "loss": 3.7426, + "step": 3513000 + }, + { + "epoch": 39.02, + "learning_rate": 2.745263531972547e-08, + "loss": 3.764, + "step": 3513500 + }, + { + "epoch": 39.02, + "learning_rate": 2.743875352597561e-08, + "loss": 3.7632, + "step": 3514000 + }, + { + "epoch": 39.03, + "learning_rate": 2.7424871732225752e-08, + "loss": 3.7508, + "step": 3514500 + }, + { + "epoch": 39.04, + "learning_rate": 2.7410989938475887e-08, + "loss": 3.7539, + "step": 3515000 + }, + { + "epoch": 39.04, + "learning_rate": 2.7397108144726026e-08, + "loss": 3.7615, + "step": 3515500 + }, + { + "epoch": 39.05, + "learning_rate": 2.738322635097617e-08, + "loss": 3.7502, + "step": 3516000 + }, + { + "epoch": 39.05, + "learning_rate": 2.7369344557226304e-08, + "loss": 3.7465, + "step": 3516500 + }, + { + "epoch": 39.06, + "learning_rate": 2.7355462763476443e-08, + "loss": 3.7516, + "step": 3517000 + }, + { + "epoch": 39.06, + "learning_rate": 2.7341580969726586e-08, + "loss": 3.7589, + "step": 3517500 + }, + { + "epoch": 39.07, + "learning_rate": 2.732769917597672e-08, + "loss": 3.7642, + "step": 3518000 + }, + { + "epoch": 39.07, + "learning_rate": 2.731381738222686e-08, + "loss": 3.7521, + "step": 3518500 + }, + { + "epoch": 39.08, + "learning_rate": 2.7299935588477002e-08, + "loss": 3.7461, + "step": 3519000 + }, + { + "epoch": 39.09, + "learning_rate": 2.7286053794727138e-08, + "loss": 3.7551, + "step": 3519500 + }, + { + "epoch": 39.09, + "learning_rate": 2.7272172000977277e-08, + "loss": 3.7707, + "step": 3520000 + }, + { + "epoch": 39.1, + "learning_rate": 2.7258290207227413e-08, + "loss": 3.7479, + "step": 3520500 + }, + { + "epoch": 39.1, + "learning_rate": 2.7244408413477555e-08, + "loss": 3.7665, + "step": 3521000 + }, + { + "epoch": 39.11, + "learning_rate": 2.7230526619727694e-08, + "loss": 3.7669, + "step": 3521500 + }, + { + "epoch": 39.11, + "learning_rate": 2.721664482597783e-08, + "loss": 3.731, + "step": 3522000 + }, + { + "epoch": 39.12, + "learning_rate": 2.7202763032227972e-08, + "loss": 3.7581, + "step": 3522500 + }, + { + "epoch": 39.12, + "learning_rate": 2.718888123847811e-08, + "loss": 3.7739, + "step": 3523000 + }, + { + "epoch": 39.13, + "learning_rate": 2.7174999444728246e-08, + "loss": 3.7391, + "step": 3523500 + }, + { + "epoch": 39.14, + "learning_rate": 2.716111765097839e-08, + "loss": 3.7496, + "step": 3524000 + }, + { + "epoch": 39.14, + "learning_rate": 2.7147235857228528e-08, + "loss": 3.7448, + "step": 3524500 + }, + { + "epoch": 39.15, + "learning_rate": 2.7133354063478663e-08, + "loss": 3.7557, + "step": 3525000 + }, + { + "epoch": 39.15, + "learning_rate": 2.7119472269728806e-08, + "loss": 3.734, + "step": 3525500 + }, + { + "epoch": 39.16, + "learning_rate": 2.7105590475978944e-08, + "loss": 3.7516, + "step": 3526000 + }, + { + "epoch": 39.16, + "learning_rate": 2.709170868222908e-08, + "loss": 3.7597, + "step": 3526500 + }, + { + "epoch": 39.17, + "learning_rate": 2.7077826888479222e-08, + "loss": 3.7613, + "step": 3527000 + }, + { + "epoch": 39.17, + "learning_rate": 2.7063945094729358e-08, + "loss": 3.7499, + "step": 3527500 + }, + { + "epoch": 39.18, + "learning_rate": 2.7050063300979497e-08, + "loss": 3.7534, + "step": 3528000 + }, + { + "epoch": 39.19, + "learning_rate": 2.703618150722964e-08, + "loss": 3.7728, + "step": 3528500 + }, + { + "epoch": 39.19, + "learning_rate": 2.7022299713479775e-08, + "loss": 3.7418, + "step": 3529000 + }, + { + "epoch": 39.2, + "learning_rate": 2.7008417919729914e-08, + "loss": 3.7534, + "step": 3529500 + }, + { + "epoch": 39.2, + "learning_rate": 2.6994536125980056e-08, + "loss": 3.7543, + "step": 3530000 + }, + { + "epoch": 39.21, + "learning_rate": 2.6980654332230192e-08, + "loss": 3.7337, + "step": 3530500 + }, + { + "epoch": 39.21, + "learning_rate": 2.696677253848033e-08, + "loss": 3.7574, + "step": 3531000 + }, + { + "epoch": 39.22, + "learning_rate": 2.6952890744730473e-08, + "loss": 3.7431, + "step": 3531500 + }, + { + "epoch": 39.22, + "learning_rate": 2.693900895098061e-08, + "loss": 3.7423, + "step": 3532000 + }, + { + "epoch": 39.23, + "learning_rate": 2.6925127157230748e-08, + "loss": 3.7417, + "step": 3532500 + }, + { + "epoch": 39.24, + "learning_rate": 2.691124536348089e-08, + "loss": 3.7629, + "step": 3533000 + }, + { + "epoch": 39.24, + "learning_rate": 2.6897363569731026e-08, + "loss": 3.7343, + "step": 3533500 + }, + { + "epoch": 39.25, + "learning_rate": 2.6883481775981164e-08, + "loss": 3.7511, + "step": 3534000 + }, + { + "epoch": 39.25, + "learning_rate": 2.68695999822313e-08, + "loss": 3.7429, + "step": 3534500 + }, + { + "epoch": 39.26, + "learning_rate": 2.6855718188481442e-08, + "loss": 3.7562, + "step": 3535000 + }, + { + "epoch": 39.26, + "learning_rate": 2.684183639473158e-08, + "loss": 3.752, + "step": 3535500 + }, + { + "epoch": 39.27, + "learning_rate": 2.6827954600981717e-08, + "loss": 3.7493, + "step": 3536000 + }, + { + "epoch": 39.27, + "learning_rate": 2.681407280723186e-08, + "loss": 3.7525, + "step": 3536500 + }, + { + "epoch": 39.28, + "learning_rate": 2.6800191013481998e-08, + "loss": 3.7383, + "step": 3537000 + }, + { + "epoch": 39.29, + "learning_rate": 2.6786309219732134e-08, + "loss": 3.7495, + "step": 3537500 + }, + { + "epoch": 39.29, + "learning_rate": 2.6772427425982276e-08, + "loss": 3.7643, + "step": 3538000 + }, + { + "epoch": 39.3, + "learning_rate": 2.6758545632232415e-08, + "loss": 3.7354, + "step": 3538500 + }, + { + "epoch": 39.3, + "learning_rate": 2.674466383848255e-08, + "loss": 3.7526, + "step": 3539000 + }, + { + "epoch": 39.31, + "learning_rate": 2.6730782044732693e-08, + "loss": 3.7405, + "step": 3539500 + }, + { + "epoch": 39.31, + "learning_rate": 2.6716900250982832e-08, + "loss": 3.7404, + "step": 3540000 + }, + { + "epoch": 39.32, + "learning_rate": 2.6703018457232968e-08, + "loss": 3.7521, + "step": 3540500 + }, + { + "epoch": 39.32, + "learning_rate": 2.668913666348311e-08, + "loss": 3.728, + "step": 3541000 + }, + { + "epoch": 39.33, + "learning_rate": 2.6675254869733242e-08, + "loss": 3.7332, + "step": 3541500 + }, + { + "epoch": 39.34, + "learning_rate": 2.6661373075983384e-08, + "loss": 3.7437, + "step": 3542000 + }, + { + "epoch": 39.34, + "learning_rate": 2.6647491282233527e-08, + "loss": 3.7499, + "step": 3542500 + }, + { + "epoch": 39.35, + "learning_rate": 2.663360948848366e-08, + "loss": 3.7398, + "step": 3543000 + }, + { + "epoch": 39.35, + "learning_rate": 2.66197276947338e-08, + "loss": 3.7479, + "step": 3543500 + }, + { + "epoch": 39.36, + "learning_rate": 2.6605845900983944e-08, + "loss": 3.7543, + "step": 3544000 + }, + { + "epoch": 39.36, + "learning_rate": 2.6591964107234076e-08, + "loss": 3.7407, + "step": 3544500 + }, + { + "epoch": 39.37, + "learning_rate": 2.6578082313484218e-08, + "loss": 3.7523, + "step": 3545000 + }, + { + "epoch": 39.37, + "learning_rate": 2.656420051973436e-08, + "loss": 3.7489, + "step": 3545500 + }, + { + "epoch": 39.38, + "learning_rate": 2.6550318725984493e-08, + "loss": 3.7331, + "step": 3546000 + }, + { + "epoch": 39.39, + "learning_rate": 2.6536436932234635e-08, + "loss": 3.7488, + "step": 3546500 + }, + { + "epoch": 39.39, + "learning_rate": 2.6522555138484777e-08, + "loss": 3.7547, + "step": 3547000 + }, + { + "epoch": 39.4, + "learning_rate": 2.650867334473491e-08, + "loss": 3.7471, + "step": 3547500 + }, + { + "epoch": 39.4, + "learning_rate": 2.6494791550985052e-08, + "loss": 3.7585, + "step": 3548000 + }, + { + "epoch": 39.41, + "learning_rate": 2.6480909757235188e-08, + "loss": 3.7486, + "step": 3548500 + }, + { + "epoch": 39.41, + "learning_rate": 2.6467027963485327e-08, + "loss": 3.7359, + "step": 3549000 + }, + { + "epoch": 39.42, + "learning_rate": 2.645314616973547e-08, + "loss": 3.7748, + "step": 3549500 + }, + { + "epoch": 39.42, + "learning_rate": 2.6439264375985604e-08, + "loss": 3.7561, + "step": 3550000 + }, + { + "epoch": 39.43, + "learning_rate": 2.6425382582235743e-08, + "loss": 3.7611, + "step": 3550500 + }, + { + "epoch": 39.44, + "learning_rate": 2.6411500788485886e-08, + "loss": 3.7402, + "step": 3551000 + }, + { + "epoch": 39.44, + "learning_rate": 2.639761899473602e-08, + "loss": 3.7467, + "step": 3551500 + }, + { + "epoch": 39.45, + "learning_rate": 2.638373720098616e-08, + "loss": 3.7491, + "step": 3552000 + }, + { + "epoch": 39.45, + "learning_rate": 2.6369855407236303e-08, + "loss": 3.7595, + "step": 3552500 + }, + { + "epoch": 39.46, + "learning_rate": 2.6355973613486438e-08, + "loss": 3.7478, + "step": 3553000 + }, + { + "epoch": 39.46, + "learning_rate": 2.6342091819736577e-08, + "loss": 3.7376, + "step": 3553500 + }, + { + "epoch": 39.47, + "learning_rate": 2.632821002598672e-08, + "loss": 3.7532, + "step": 3554000 + }, + { + "epoch": 39.47, + "learning_rate": 2.6314328232236855e-08, + "loss": 3.7675, + "step": 3554500 + }, + { + "epoch": 39.48, + "learning_rate": 2.6300446438486994e-08, + "loss": 3.7457, + "step": 3555000 + }, + { + "epoch": 39.49, + "learning_rate": 2.6286564644737136e-08, + "loss": 3.7635, + "step": 3555500 + }, + { + "epoch": 39.49, + "learning_rate": 2.6272682850987272e-08, + "loss": 3.7608, + "step": 3556000 + }, + { + "epoch": 39.5, + "learning_rate": 2.625880105723741e-08, + "loss": 3.7744, + "step": 3556500 + }, + { + "epoch": 39.5, + "learning_rate": 2.6244919263487546e-08, + "loss": 3.752, + "step": 3557000 + }, + { + "epoch": 39.51, + "learning_rate": 2.623103746973769e-08, + "loss": 3.7555, + "step": 3557500 + }, + { + "epoch": 39.51, + "learning_rate": 2.6217155675987828e-08, + "loss": 3.7638, + "step": 3558000 + }, + { + "epoch": 39.52, + "learning_rate": 2.6203273882237963e-08, + "loss": 3.77, + "step": 3558500 + }, + { + "epoch": 39.52, + "learning_rate": 2.6189392088488106e-08, + "loss": 3.7632, + "step": 3559000 + }, + { + "epoch": 39.53, + "learning_rate": 2.6175510294738245e-08, + "loss": 3.7377, + "step": 3559500 + }, + { + "epoch": 39.54, + "learning_rate": 2.616162850098838e-08, + "loss": 3.7491, + "step": 3560000 + }, + { + "epoch": 39.54, + "learning_rate": 2.6147746707238522e-08, + "loss": 3.7468, + "step": 3560500 + }, + { + "epoch": 39.55, + "learning_rate": 2.613386491348866e-08, + "loss": 3.7298, + "step": 3561000 + }, + { + "epoch": 39.55, + "learning_rate": 2.6119983119738797e-08, + "loss": 3.7588, + "step": 3561500 + }, + { + "epoch": 39.56, + "learning_rate": 2.610610132598894e-08, + "loss": 3.7634, + "step": 3562000 + }, + { + "epoch": 39.56, + "learning_rate": 2.6092219532239078e-08, + "loss": 3.7626, + "step": 3562500 + }, + { + "epoch": 39.57, + "learning_rate": 2.6078337738489214e-08, + "loss": 3.7645, + "step": 3563000 + }, + { + "epoch": 39.57, + "learning_rate": 2.6064455944739356e-08, + "loss": 3.7373, + "step": 3563500 + }, + { + "epoch": 39.58, + "learning_rate": 2.6050574150989492e-08, + "loss": 3.7687, + "step": 3564000 + }, + { + "epoch": 39.59, + "learning_rate": 2.603669235723963e-08, + "loss": 3.7589, + "step": 3564500 + }, + { + "epoch": 39.59, + "learning_rate": 2.6022810563489773e-08, + "loss": 3.75, + "step": 3565000 + }, + { + "epoch": 39.6, + "learning_rate": 2.600892876973991e-08, + "loss": 3.7374, + "step": 3565500 + }, + { + "epoch": 39.6, + "learning_rate": 2.5995046975990048e-08, + "loss": 3.7585, + "step": 3566000 + }, + { + "epoch": 39.61, + "learning_rate": 2.598116518224019e-08, + "loss": 3.7565, + "step": 3566500 + }, + { + "epoch": 39.61, + "learning_rate": 2.5967283388490326e-08, + "loss": 3.7653, + "step": 3567000 + }, + { + "epoch": 39.62, + "learning_rate": 2.5953401594740465e-08, + "loss": 3.7511, + "step": 3567500 + }, + { + "epoch": 39.62, + "learning_rate": 2.5939519800990607e-08, + "loss": 3.7425, + "step": 3568000 + }, + { + "epoch": 39.63, + "learning_rate": 2.5925638007240742e-08, + "loss": 3.7602, + "step": 3568500 + }, + { + "epoch": 39.64, + "learning_rate": 2.591175621349088e-08, + "loss": 3.74, + "step": 3569000 + }, + { + "epoch": 39.64, + "learning_rate": 2.5897874419741024e-08, + "loss": 3.7633, + "step": 3569500 + }, + { + "epoch": 39.65, + "learning_rate": 2.588399262599116e-08, + "loss": 3.7459, + "step": 3570000 + }, + { + "epoch": 39.65, + "learning_rate": 2.5870110832241298e-08, + "loss": 3.7427, + "step": 3570500 + }, + { + "epoch": 39.66, + "learning_rate": 2.5856229038491434e-08, + "loss": 3.7378, + "step": 3571000 + }, + { + "epoch": 39.66, + "learning_rate": 2.5842347244741576e-08, + "loss": 3.7611, + "step": 3571500 + }, + { + "epoch": 39.67, + "learning_rate": 2.5828465450991715e-08, + "loss": 3.7641, + "step": 3572000 + }, + { + "epoch": 39.67, + "learning_rate": 2.581458365724185e-08, + "loss": 3.7389, + "step": 3572500 + }, + { + "epoch": 39.68, + "learning_rate": 2.5800701863491993e-08, + "loss": 3.7494, + "step": 3573000 + }, + { + "epoch": 39.69, + "learning_rate": 2.5786820069742132e-08, + "loss": 3.7645, + "step": 3573500 + }, + { + "epoch": 39.69, + "learning_rate": 2.5772938275992268e-08, + "loss": 3.7459, + "step": 3574000 + }, + { + "epoch": 39.7, + "learning_rate": 2.575905648224241e-08, + "loss": 3.7464, + "step": 3574500 + }, + { + "epoch": 39.7, + "learning_rate": 2.574517468849255e-08, + "loss": 3.7359, + "step": 3575000 + }, + { + "epoch": 39.71, + "learning_rate": 2.5731292894742685e-08, + "loss": 3.735, + "step": 3575500 + }, + { + "epoch": 39.71, + "learning_rate": 2.5717411100992827e-08, + "loss": 3.7456, + "step": 3576000 + }, + { + "epoch": 39.72, + "learning_rate": 2.5703529307242966e-08, + "loss": 3.7683, + "step": 3576500 + }, + { + "epoch": 39.72, + "learning_rate": 2.56896475134931e-08, + "loss": 3.7525, + "step": 3577000 + }, + { + "epoch": 39.73, + "learning_rate": 2.5675765719743244e-08, + "loss": 3.7698, + "step": 3577500 + }, + { + "epoch": 39.74, + "learning_rate": 2.5661883925993376e-08, + "loss": 3.7659, + "step": 3578000 + }, + { + "epoch": 39.74, + "learning_rate": 2.5648002132243518e-08, + "loss": 3.7428, + "step": 3578500 + }, + { + "epoch": 39.75, + "learning_rate": 2.5634120338493657e-08, + "loss": 3.7521, + "step": 3579000 + }, + { + "epoch": 39.75, + "learning_rate": 2.5620238544743793e-08, + "loss": 3.7483, + "step": 3579500 + }, + { + "epoch": 39.76, + "learning_rate": 2.5606356750993935e-08, + "loss": 3.762, + "step": 3580000 + }, + { + "epoch": 39.76, + "learning_rate": 2.5592474957244074e-08, + "loss": 3.7581, + "step": 3580500 + }, + { + "epoch": 39.77, + "learning_rate": 2.557859316349421e-08, + "loss": 3.7611, + "step": 3581000 + }, + { + "epoch": 39.77, + "learning_rate": 2.5564711369744352e-08, + "loss": 3.7358, + "step": 3581500 + }, + { + "epoch": 39.78, + "learning_rate": 2.555082957599449e-08, + "loss": 3.7613, + "step": 3582000 + }, + { + "epoch": 39.79, + "learning_rate": 2.5536947782244627e-08, + "loss": 3.7352, + "step": 3582500 + }, + { + "epoch": 39.79, + "learning_rate": 2.552306598849477e-08, + "loss": 3.7403, + "step": 3583000 + }, + { + "epoch": 39.8, + "learning_rate": 2.5509184194744908e-08, + "loss": 3.7634, + "step": 3583500 + }, + { + "epoch": 39.8, + "learning_rate": 2.5495302400995043e-08, + "loss": 3.7427, + "step": 3584000 + }, + { + "epoch": 39.81, + "learning_rate": 2.5481420607245186e-08, + "loss": 3.7565, + "step": 3584500 + }, + { + "epoch": 39.81, + "learning_rate": 2.5467538813495325e-08, + "loss": 3.7362, + "step": 3585000 + }, + { + "epoch": 39.82, + "learning_rate": 2.545365701974546e-08, + "loss": 3.7578, + "step": 3585500 + }, + { + "epoch": 39.82, + "learning_rate": 2.5439775225995603e-08, + "loss": 3.768, + "step": 3586000 + }, + { + "epoch": 39.83, + "learning_rate": 2.5425893432245738e-08, + "loss": 3.7456, + "step": 3586500 + }, + { + "epoch": 39.84, + "learning_rate": 2.5412011638495877e-08, + "loss": 3.7613, + "step": 3587000 + }, + { + "epoch": 39.84, + "learning_rate": 2.539812984474602e-08, + "loss": 3.7482, + "step": 3587500 + }, + { + "epoch": 39.85, + "learning_rate": 2.5384248050996155e-08, + "loss": 3.7567, + "step": 3588000 + }, + { + "epoch": 39.85, + "learning_rate": 2.5370366257246294e-08, + "loss": 3.7471, + "step": 3588500 + }, + { + "epoch": 39.86, + "learning_rate": 2.5356484463496436e-08, + "loss": 3.7614, + "step": 3589000 + }, + { + "epoch": 39.86, + "learning_rate": 2.5342602669746572e-08, + "loss": 3.7504, + "step": 3589500 + }, + { + "epoch": 39.87, + "learning_rate": 2.532872087599671e-08, + "loss": 3.7457, + "step": 3590000 + }, + { + "epoch": 39.87, + "learning_rate": 2.5314839082246853e-08, + "loss": 3.7433, + "step": 3590500 + }, + { + "epoch": 39.88, + "learning_rate": 2.530095728849699e-08, + "loss": 3.7718, + "step": 3591000 + }, + { + "epoch": 39.89, + "learning_rate": 2.5287075494747128e-08, + "loss": 3.749, + "step": 3591500 + }, + { + "epoch": 39.89, + "learning_rate": 2.527319370099727e-08, + "loss": 3.746, + "step": 3592000 + }, + { + "epoch": 39.9, + "learning_rate": 2.5259311907247406e-08, + "loss": 3.7566, + "step": 3592500 + }, + { + "epoch": 39.9, + "learning_rate": 2.5245430113497545e-08, + "loss": 3.7559, + "step": 3593000 + }, + { + "epoch": 39.91, + "learning_rate": 2.523154831974768e-08, + "loss": 3.7397, + "step": 3593500 + }, + { + "epoch": 39.91, + "learning_rate": 2.5217666525997823e-08, + "loss": 3.7608, + "step": 3594000 + }, + { + "epoch": 39.92, + "learning_rate": 2.520378473224796e-08, + "loss": 3.755, + "step": 3594500 + }, + { + "epoch": 39.92, + "learning_rate": 2.5189902938498097e-08, + "loss": 3.7598, + "step": 3595000 + }, + { + "epoch": 39.93, + "learning_rate": 2.517602114474824e-08, + "loss": 3.7525, + "step": 3595500 + }, + { + "epoch": 39.94, + "learning_rate": 2.516213935099838e-08, + "loss": 3.7196, + "step": 3596000 + }, + { + "epoch": 39.94, + "learning_rate": 2.5148257557248514e-08, + "loss": 3.7425, + "step": 3596500 + }, + { + "epoch": 39.95, + "learning_rate": 2.5134375763498656e-08, + "loss": 3.7719, + "step": 3597000 + }, + { + "epoch": 39.95, + "learning_rate": 2.5120493969748795e-08, + "loss": 3.7451, + "step": 3597500 + }, + { + "epoch": 39.96, + "learning_rate": 2.510661217599893e-08, + "loss": 3.7648, + "step": 3598000 + }, + { + "epoch": 39.96, + "learning_rate": 2.5092730382249073e-08, + "loss": 3.7464, + "step": 3598500 + }, + { + "epoch": 39.97, + "learning_rate": 2.5078848588499212e-08, + "loss": 3.7555, + "step": 3599000 + }, + { + "epoch": 39.97, + "learning_rate": 2.5064966794749348e-08, + "loss": 3.7586, + "step": 3599500 + }, + { + "epoch": 39.98, + "learning_rate": 2.505108500099949e-08, + "loss": 3.7495, + "step": 3600000 + }, + { + "epoch": 39.99, + "learning_rate": 2.5037203207249626e-08, + "loss": 3.767, + "step": 3600500 + }, + { + "epoch": 39.99, + "learning_rate": 2.5023321413499765e-08, + "loss": 3.7543, + "step": 3601000 + }, + { + "epoch": 40.0, + "learning_rate": 2.5009439619749907e-08, + "loss": 3.7413, + "step": 3601500 + }, + { + "epoch": 40.0, + "eval_loss": 3.8263349533081055, + "eval_runtime": 6.3013, + "eval_samples_per_second": 246.615, + "step": 3601840 + }, + { + "epoch": 40.0, + "learning_rate": 2.4995557826000043e-08, + "loss": 3.7471, + "step": 3602000 + }, + { + "epoch": 40.01, + "learning_rate": 2.498167603225018e-08, + "loss": 3.7482, + "step": 3602500 + }, + { + "epoch": 40.01, + "learning_rate": 2.496779423850032e-08, + "loss": 3.7522, + "step": 3603000 + }, + { + "epoch": 40.02, + "learning_rate": 2.495391244475046e-08, + "loss": 3.7459, + "step": 3603500 + }, + { + "epoch": 40.02, + "learning_rate": 2.49400306510006e-08, + "loss": 3.748, + "step": 3604000 + }, + { + "epoch": 40.03, + "learning_rate": 2.4926148857250737e-08, + "loss": 3.7795, + "step": 3604500 + }, + { + "epoch": 40.04, + "learning_rate": 2.4912267063500876e-08, + "loss": 3.7414, + "step": 3605000 + }, + { + "epoch": 40.04, + "learning_rate": 2.4898385269751015e-08, + "loss": 3.7623, + "step": 3605500 + }, + { + "epoch": 40.05, + "learning_rate": 2.4884503476001154e-08, + "loss": 3.7607, + "step": 3606000 + }, + { + "epoch": 40.05, + "learning_rate": 2.4870621682251293e-08, + "loss": 3.7598, + "step": 3606500 + }, + { + "epoch": 40.06, + "learning_rate": 2.4856739888501432e-08, + "loss": 3.7501, + "step": 3607000 + }, + { + "epoch": 40.06, + "learning_rate": 2.484285809475157e-08, + "loss": 3.7554, + "step": 3607500 + }, + { + "epoch": 40.07, + "learning_rate": 2.482897630100171e-08, + "loss": 3.7579, + "step": 3608000 + }, + { + "epoch": 40.07, + "learning_rate": 2.481509450725185e-08, + "loss": 3.7378, + "step": 3608500 + }, + { + "epoch": 40.08, + "learning_rate": 2.4801212713501988e-08, + "loss": 3.7683, + "step": 3609000 + }, + { + "epoch": 40.09, + "learning_rate": 2.4787330919752124e-08, + "loss": 3.7673, + "step": 3609500 + }, + { + "epoch": 40.09, + "learning_rate": 2.4773449126002263e-08, + "loss": 3.7278, + "step": 3610000 + }, + { + "epoch": 40.1, + "learning_rate": 2.4759567332252405e-08, + "loss": 3.7523, + "step": 3610500 + }, + { + "epoch": 40.1, + "learning_rate": 2.474568553850254e-08, + "loss": 3.7508, + "step": 3611000 + }, + { + "epoch": 40.11, + "learning_rate": 2.473180374475268e-08, + "loss": 3.7685, + "step": 3611500 + }, + { + "epoch": 40.11, + "learning_rate": 2.471792195100282e-08, + "loss": 3.7623, + "step": 3612000 + }, + { + "epoch": 40.12, + "learning_rate": 2.4704040157252957e-08, + "loss": 3.7518, + "step": 3612500 + }, + { + "epoch": 40.12, + "learning_rate": 2.4690158363503096e-08, + "loss": 3.7546, + "step": 3613000 + }, + { + "epoch": 40.13, + "learning_rate": 2.4676276569753235e-08, + "loss": 3.7676, + "step": 3613500 + }, + { + "epoch": 40.14, + "learning_rate": 2.4662394776003374e-08, + "loss": 3.7538, + "step": 3614000 + }, + { + "epoch": 40.14, + "learning_rate": 2.4648512982253513e-08, + "loss": 3.7411, + "step": 3614500 + }, + { + "epoch": 40.15, + "learning_rate": 2.4634631188503652e-08, + "loss": 3.761, + "step": 3615000 + }, + { + "epoch": 40.15, + "learning_rate": 2.462074939475379e-08, + "loss": 3.7577, + "step": 3615500 + }, + { + "epoch": 40.16, + "learning_rate": 2.460686760100393e-08, + "loss": 3.7587, + "step": 3616000 + }, + { + "epoch": 40.16, + "learning_rate": 2.459298580725407e-08, + "loss": 3.7592, + "step": 3616500 + }, + { + "epoch": 40.17, + "learning_rate": 2.4579104013504208e-08, + "loss": 3.7605, + "step": 3617000 + }, + { + "epoch": 40.17, + "learning_rate": 2.4565222219754347e-08, + "loss": 3.7463, + "step": 3617500 + }, + { + "epoch": 40.18, + "learning_rate": 2.4551340426004486e-08, + "loss": 3.7551, + "step": 3618000 + }, + { + "epoch": 40.19, + "learning_rate": 2.4537458632254625e-08, + "loss": 3.7524, + "step": 3618500 + }, + { + "epoch": 40.19, + "learning_rate": 2.4523576838504764e-08, + "loss": 3.7378, + "step": 3619000 + }, + { + "epoch": 40.2, + "learning_rate": 2.4509695044754903e-08, + "loss": 3.7618, + "step": 3619500 + }, + { + "epoch": 40.2, + "learning_rate": 2.449581325100504e-08, + "loss": 3.7568, + "step": 3620000 + }, + { + "epoch": 40.21, + "learning_rate": 2.4481931457255177e-08, + "loss": 3.7693, + "step": 3620500 + }, + { + "epoch": 40.21, + "learning_rate": 2.446804966350532e-08, + "loss": 3.743, + "step": 3621000 + }, + { + "epoch": 40.22, + "learning_rate": 2.445416786975546e-08, + "loss": 3.7329, + "step": 3621500 + }, + { + "epoch": 40.22, + "learning_rate": 2.4440286076005594e-08, + "loss": 3.7491, + "step": 3622000 + }, + { + "epoch": 40.23, + "learning_rate": 2.4426404282255736e-08, + "loss": 3.7615, + "step": 3622500 + }, + { + "epoch": 40.23, + "learning_rate": 2.4412522488505875e-08, + "loss": 3.7561, + "step": 3623000 + }, + { + "epoch": 40.24, + "learning_rate": 2.439864069475601e-08, + "loss": 3.7551, + "step": 3623500 + }, + { + "epoch": 40.25, + "learning_rate": 2.438475890100615e-08, + "loss": 3.7568, + "step": 3624000 + }, + { + "epoch": 40.25, + "learning_rate": 2.4370877107256292e-08, + "loss": 3.7457, + "step": 3624500 + }, + { + "epoch": 40.26, + "learning_rate": 2.4356995313506428e-08, + "loss": 3.7534, + "step": 3625000 + }, + { + "epoch": 40.26, + "learning_rate": 2.4343113519756567e-08, + "loss": 3.7401, + "step": 3625500 + }, + { + "epoch": 40.27, + "learning_rate": 2.432923172600671e-08, + "loss": 3.752, + "step": 3626000 + }, + { + "epoch": 40.27, + "learning_rate": 2.4315349932256845e-08, + "loss": 3.7561, + "step": 3626500 + }, + { + "epoch": 40.28, + "learning_rate": 2.4301468138506984e-08, + "loss": 3.7625, + "step": 3627000 + }, + { + "epoch": 40.28, + "learning_rate": 2.4287586344757126e-08, + "loss": 3.7336, + "step": 3627500 + }, + { + "epoch": 40.29, + "learning_rate": 2.427370455100726e-08, + "loss": 3.7718, + "step": 3628000 + }, + { + "epoch": 40.3, + "learning_rate": 2.42598227572574e-08, + "loss": 3.7393, + "step": 3628500 + }, + { + "epoch": 40.3, + "learning_rate": 2.424594096350754e-08, + "loss": 3.7511, + "step": 3629000 + }, + { + "epoch": 40.31, + "learning_rate": 2.423205916975768e-08, + "loss": 3.7438, + "step": 3629500 + }, + { + "epoch": 40.31, + "learning_rate": 2.4218177376007817e-08, + "loss": 3.7411, + "step": 3630000 + }, + { + "epoch": 40.32, + "learning_rate": 2.4204295582257956e-08, + "loss": 3.765, + "step": 3630500 + }, + { + "epoch": 40.32, + "learning_rate": 2.4190413788508095e-08, + "loss": 3.7321, + "step": 3631000 + }, + { + "epoch": 40.33, + "learning_rate": 2.4176531994758234e-08, + "loss": 3.7451, + "step": 3631500 + }, + { + "epoch": 40.33, + "learning_rate": 2.4162650201008373e-08, + "loss": 3.7369, + "step": 3632000 + }, + { + "epoch": 40.34, + "learning_rate": 2.414876840725851e-08, + "loss": 3.76, + "step": 3632500 + }, + { + "epoch": 40.35, + "learning_rate": 2.413488661350865e-08, + "loss": 3.7482, + "step": 3633000 + }, + { + "epoch": 40.35, + "learning_rate": 2.412100481975879e-08, + "loss": 3.7557, + "step": 3633500 + }, + { + "epoch": 40.36, + "learning_rate": 2.4107123026008926e-08, + "loss": 3.7295, + "step": 3634000 + }, + { + "epoch": 40.36, + "learning_rate": 2.4093241232259068e-08, + "loss": 3.7425, + "step": 3634500 + }, + { + "epoch": 40.37, + "learning_rate": 2.4079359438509207e-08, + "loss": 3.7603, + "step": 3635000 + }, + { + "epoch": 40.37, + "learning_rate": 2.4065477644759343e-08, + "loss": 3.7547, + "step": 3635500 + }, + { + "epoch": 40.38, + "learning_rate": 2.405159585100948e-08, + "loss": 3.7487, + "step": 3636000 + }, + { + "epoch": 40.38, + "learning_rate": 2.4037714057259624e-08, + "loss": 3.7447, + "step": 3636500 + }, + { + "epoch": 40.39, + "learning_rate": 2.402383226350976e-08, + "loss": 3.7536, + "step": 3637000 + }, + { + "epoch": 40.4, + "learning_rate": 2.40099504697599e-08, + "loss": 3.7613, + "step": 3637500 + }, + { + "epoch": 40.4, + "learning_rate": 2.399606867601004e-08, + "loss": 3.7338, + "step": 3638000 + }, + { + "epoch": 40.41, + "learning_rate": 2.3982186882260176e-08, + "loss": 3.752, + "step": 3638500 + }, + { + "epoch": 40.41, + "learning_rate": 2.3968305088510315e-08, + "loss": 3.7721, + "step": 3639000 + }, + { + "epoch": 40.42, + "learning_rate": 2.3954423294760454e-08, + "loss": 3.7477, + "step": 3639500 + }, + { + "epoch": 40.42, + "learning_rate": 2.3940541501010593e-08, + "loss": 3.751, + "step": 3640000 + }, + { + "epoch": 40.43, + "learning_rate": 2.3926659707260732e-08, + "loss": 3.7506, + "step": 3640500 + }, + { + "epoch": 40.43, + "learning_rate": 2.391277791351087e-08, + "loss": 3.76, + "step": 3641000 + }, + { + "epoch": 40.44, + "learning_rate": 2.389889611976101e-08, + "loss": 3.7506, + "step": 3641500 + }, + { + "epoch": 40.45, + "learning_rate": 2.388501432601115e-08, + "loss": 3.7783, + "step": 3642000 + }, + { + "epoch": 40.45, + "learning_rate": 2.3871132532261288e-08, + "loss": 3.7562, + "step": 3642500 + }, + { + "epoch": 40.46, + "learning_rate": 2.3857250738511424e-08, + "loss": 3.731, + "step": 3643000 + }, + { + "epoch": 40.46, + "learning_rate": 2.3843368944761566e-08, + "loss": 3.7573, + "step": 3643500 + }, + { + "epoch": 40.47, + "learning_rate": 2.3829487151011705e-08, + "loss": 3.7492, + "step": 3644000 + }, + { + "epoch": 40.47, + "learning_rate": 2.381560535726184e-08, + "loss": 3.7654, + "step": 3644500 + }, + { + "epoch": 40.48, + "learning_rate": 2.3801723563511983e-08, + "loss": 3.749, + "step": 3645000 + }, + { + "epoch": 40.48, + "learning_rate": 2.3787841769762122e-08, + "loss": 3.7503, + "step": 3645500 + }, + { + "epoch": 40.49, + "learning_rate": 2.3773959976012257e-08, + "loss": 3.7483, + "step": 3646000 + }, + { + "epoch": 40.5, + "learning_rate": 2.3760078182262396e-08, + "loss": 3.7522, + "step": 3646500 + }, + { + "epoch": 40.5, + "learning_rate": 2.374619638851254e-08, + "loss": 3.7469, + "step": 3647000 + }, + { + "epoch": 40.51, + "learning_rate": 2.3732314594762674e-08, + "loss": 3.7486, + "step": 3647500 + }, + { + "epoch": 40.51, + "learning_rate": 2.3718432801012813e-08, + "loss": 3.764, + "step": 3648000 + }, + { + "epoch": 40.52, + "learning_rate": 2.3704551007262955e-08, + "loss": 3.7301, + "step": 3648500 + }, + { + "epoch": 40.52, + "learning_rate": 2.369066921351309e-08, + "loss": 3.7509, + "step": 3649000 + }, + { + "epoch": 40.53, + "learning_rate": 2.367678741976323e-08, + "loss": 3.7427, + "step": 3649500 + }, + { + "epoch": 40.53, + "learning_rate": 2.366290562601337e-08, + "loss": 3.7599, + "step": 3650000 + }, + { + "epoch": 40.54, + "learning_rate": 2.3649023832263508e-08, + "loss": 3.7573, + "step": 3650500 + }, + { + "epoch": 40.55, + "learning_rate": 2.3635142038513647e-08, + "loss": 3.7548, + "step": 3651000 + }, + { + "epoch": 40.55, + "learning_rate": 2.3621260244763786e-08, + "loss": 3.725, + "step": 3651500 + }, + { + "epoch": 40.56, + "learning_rate": 2.3607378451013925e-08, + "loss": 3.7511, + "step": 3652000 + }, + { + "epoch": 40.56, + "learning_rate": 2.3593496657264064e-08, + "loss": 3.7554, + "step": 3652500 + }, + { + "epoch": 40.57, + "learning_rate": 2.3579614863514203e-08, + "loss": 3.7447, + "step": 3653000 + }, + { + "epoch": 40.57, + "learning_rate": 2.3565733069764342e-08, + "loss": 3.7581, + "step": 3653500 + }, + { + "epoch": 40.58, + "learning_rate": 2.355185127601448e-08, + "loss": 3.7376, + "step": 3654000 + }, + { + "epoch": 40.58, + "learning_rate": 2.353796948226462e-08, + "loss": 3.7443, + "step": 3654500 + }, + { + "epoch": 40.59, + "learning_rate": 2.352408768851476e-08, + "loss": 3.7425, + "step": 3655000 + }, + { + "epoch": 40.6, + "learning_rate": 2.3510205894764898e-08, + "loss": 3.7513, + "step": 3655500 + }, + { + "epoch": 40.6, + "learning_rate": 2.3496324101015036e-08, + "loss": 3.7469, + "step": 3656000 + }, + { + "epoch": 40.61, + "learning_rate": 2.3482442307265175e-08, + "loss": 3.7767, + "step": 3656500 + }, + { + "epoch": 40.61, + "learning_rate": 2.346856051351531e-08, + "loss": 3.7529, + "step": 3657000 + }, + { + "epoch": 40.62, + "learning_rate": 2.3454678719765453e-08, + "loss": 3.7441, + "step": 3657500 + }, + { + "epoch": 40.62, + "learning_rate": 2.3440796926015592e-08, + "loss": 3.7323, + "step": 3658000 + }, + { + "epoch": 40.63, + "learning_rate": 2.3426915132265728e-08, + "loss": 3.7337, + "step": 3658500 + }, + { + "epoch": 40.63, + "learning_rate": 2.341303333851587e-08, + "loss": 3.755, + "step": 3659000 + }, + { + "epoch": 40.64, + "learning_rate": 2.339915154476601e-08, + "loss": 3.7399, + "step": 3659500 + }, + { + "epoch": 40.65, + "learning_rate": 2.3385269751016145e-08, + "loss": 3.7402, + "step": 3660000 + }, + { + "epoch": 40.65, + "learning_rate": 2.3371387957266287e-08, + "loss": 3.7521, + "step": 3660500 + }, + { + "epoch": 40.66, + "learning_rate": 2.3357506163516426e-08, + "loss": 3.7618, + "step": 3661000 + }, + { + "epoch": 40.66, + "learning_rate": 2.3343624369766562e-08, + "loss": 3.747, + "step": 3661500 + }, + { + "epoch": 40.67, + "learning_rate": 2.33297425760167e-08, + "loss": 3.7524, + "step": 3662000 + }, + { + "epoch": 40.67, + "learning_rate": 2.3315860782266843e-08, + "loss": 3.7458, + "step": 3662500 + }, + { + "epoch": 40.68, + "learning_rate": 2.330197898851698e-08, + "loss": 3.7401, + "step": 3663000 + }, + { + "epoch": 40.68, + "learning_rate": 2.3288097194767118e-08, + "loss": 3.7383, + "step": 3663500 + }, + { + "epoch": 40.69, + "learning_rate": 2.3274215401017256e-08, + "loss": 3.7525, + "step": 3664000 + }, + { + "epoch": 40.7, + "learning_rate": 2.3260333607267395e-08, + "loss": 3.7486, + "step": 3664500 + }, + { + "epoch": 40.7, + "learning_rate": 2.3246451813517534e-08, + "loss": 3.7263, + "step": 3665000 + }, + { + "epoch": 40.71, + "learning_rate": 2.3232570019767673e-08, + "loss": 3.7393, + "step": 3665500 + }, + { + "epoch": 40.71, + "learning_rate": 2.3218688226017812e-08, + "loss": 3.7416, + "step": 3666000 + }, + { + "epoch": 40.72, + "learning_rate": 2.320480643226795e-08, + "loss": 3.7504, + "step": 3666500 + }, + { + "epoch": 40.72, + "learning_rate": 2.319092463851809e-08, + "loss": 3.7592, + "step": 3667000 + }, + { + "epoch": 40.73, + "learning_rate": 2.317704284476823e-08, + "loss": 3.7557, + "step": 3667500 + }, + { + "epoch": 40.73, + "learning_rate": 2.3163161051018368e-08, + "loss": 3.7418, + "step": 3668000 + }, + { + "epoch": 40.74, + "learning_rate": 2.3149279257268507e-08, + "loss": 3.7464, + "step": 3668500 + }, + { + "epoch": 40.75, + "learning_rate": 2.3135397463518643e-08, + "loss": 3.7624, + "step": 3669000 + }, + { + "epoch": 40.75, + "learning_rate": 2.3121515669768785e-08, + "loss": 3.7725, + "step": 3669500 + }, + { + "epoch": 40.76, + "learning_rate": 2.3107633876018924e-08, + "loss": 3.7491, + "step": 3670000 + }, + { + "epoch": 40.76, + "learning_rate": 2.309375208226906e-08, + "loss": 3.7542, + "step": 3670500 + }, + { + "epoch": 40.77, + "learning_rate": 2.3079870288519202e-08, + "loss": 3.7444, + "step": 3671000 + }, + { + "epoch": 40.77, + "learning_rate": 2.306598849476934e-08, + "loss": 3.7429, + "step": 3671500 + }, + { + "epoch": 40.78, + "learning_rate": 2.3052106701019476e-08, + "loss": 3.7363, + "step": 3672000 + }, + { + "epoch": 40.78, + "learning_rate": 2.3038224907269615e-08, + "loss": 3.7357, + "step": 3672500 + }, + { + "epoch": 40.79, + "learning_rate": 2.3024343113519758e-08, + "loss": 3.7491, + "step": 3673000 + }, + { + "epoch": 40.8, + "learning_rate": 2.3010461319769893e-08, + "loss": 3.7528, + "step": 3673500 + }, + { + "epoch": 40.8, + "learning_rate": 2.2996579526020032e-08, + "loss": 3.7521, + "step": 3674000 + }, + { + "epoch": 40.81, + "learning_rate": 2.2982697732270175e-08, + "loss": 3.7462, + "step": 3674500 + }, + { + "epoch": 40.81, + "learning_rate": 2.296881593852031e-08, + "loss": 3.7502, + "step": 3675000 + }, + { + "epoch": 40.82, + "learning_rate": 2.295493414477045e-08, + "loss": 3.7353, + "step": 3675500 + }, + { + "epoch": 40.82, + "learning_rate": 2.2941052351020588e-08, + "loss": 3.7356, + "step": 3676000 + }, + { + "epoch": 40.83, + "learning_rate": 2.2927170557270727e-08, + "loss": 3.7673, + "step": 3676500 + }, + { + "epoch": 40.83, + "learning_rate": 2.2913288763520866e-08, + "loss": 3.7283, + "step": 3677000 + }, + { + "epoch": 40.84, + "learning_rate": 2.2899406969771005e-08, + "loss": 3.7487, + "step": 3677500 + }, + { + "epoch": 40.85, + "learning_rate": 2.2885525176021144e-08, + "loss": 3.7555, + "step": 3678000 + }, + { + "epoch": 40.85, + "learning_rate": 2.2871643382271283e-08, + "loss": 3.7416, + "step": 3678500 + }, + { + "epoch": 40.86, + "learning_rate": 2.2857761588521422e-08, + "loss": 3.7408, + "step": 3679000 + }, + { + "epoch": 40.86, + "learning_rate": 2.2843879794771557e-08, + "loss": 3.7335, + "step": 3679500 + }, + { + "epoch": 40.87, + "learning_rate": 2.28299980010217e-08, + "loss": 3.7644, + "step": 3680000 + }, + { + "epoch": 40.87, + "learning_rate": 2.281611620727184e-08, + "loss": 3.7474, + "step": 3680500 + }, + { + "epoch": 40.88, + "learning_rate": 2.2802234413521974e-08, + "loss": 3.7358, + "step": 3681000 + }, + { + "epoch": 40.88, + "learning_rate": 2.2788352619772117e-08, + "loss": 3.7395, + "step": 3681500 + }, + { + "epoch": 40.89, + "learning_rate": 2.2774470826022256e-08, + "loss": 3.7353, + "step": 3682000 + }, + { + "epoch": 40.9, + "learning_rate": 2.276058903227239e-08, + "loss": 3.7454, + "step": 3682500 + }, + { + "epoch": 40.9, + "learning_rate": 2.274670723852253e-08, + "loss": 3.7577, + "step": 3683000 + }, + { + "epoch": 40.91, + "learning_rate": 2.2732825444772672e-08, + "loss": 3.7287, + "step": 3683500 + }, + { + "epoch": 40.91, + "learning_rate": 2.2718943651022808e-08, + "loss": 3.745, + "step": 3684000 + }, + { + "epoch": 40.92, + "learning_rate": 2.2705061857272947e-08, + "loss": 3.7586, + "step": 3684500 + }, + { + "epoch": 40.92, + "learning_rate": 2.269118006352309e-08, + "loss": 3.7679, + "step": 3685000 + }, + { + "epoch": 40.93, + "learning_rate": 2.2677298269773225e-08, + "loss": 3.7601, + "step": 3685500 + }, + { + "epoch": 40.93, + "learning_rate": 2.2663416476023364e-08, + "loss": 3.7607, + "step": 3686000 + }, + { + "epoch": 40.94, + "learning_rate": 2.2649534682273503e-08, + "loss": 3.7488, + "step": 3686500 + }, + { + "epoch": 40.95, + "learning_rate": 2.2635652888523642e-08, + "loss": 3.7738, + "step": 3687000 + }, + { + "epoch": 40.95, + "learning_rate": 2.262177109477378e-08, + "loss": 3.7532, + "step": 3687500 + }, + { + "epoch": 40.96, + "learning_rate": 2.260788930102392e-08, + "loss": 3.7496, + "step": 3688000 + }, + { + "epoch": 40.96, + "learning_rate": 2.259400750727406e-08, + "loss": 3.7555, + "step": 3688500 + }, + { + "epoch": 40.97, + "learning_rate": 2.2580125713524198e-08, + "loss": 3.7515, + "step": 3689000 + }, + { + "epoch": 40.97, + "learning_rate": 2.2566243919774337e-08, + "loss": 3.7609, + "step": 3689500 + }, + { + "epoch": 40.98, + "learning_rate": 2.2552362126024476e-08, + "loss": 3.7799, + "step": 3690000 + }, + { + "epoch": 40.98, + "learning_rate": 2.2538480332274614e-08, + "loss": 3.7663, + "step": 3690500 + }, + { + "epoch": 40.99, + "learning_rate": 2.2524598538524753e-08, + "loss": 3.7547, + "step": 3691000 + }, + { + "epoch": 41.0, + "learning_rate": 2.2510716744774892e-08, + "loss": 3.7379, + "step": 3691500 + }, + { + "epoch": 41.0, + "eval_loss": 3.826106548309326, + "eval_runtime": 6.3033, + "eval_samples_per_second": 246.537, + "step": 3691886 + }, + { + "epoch": 41.0, + "learning_rate": 2.249683495102503e-08, + "loss": 3.7603, + "step": 3692000 + }, + { + "epoch": 41.01, + "learning_rate": 2.248295315727517e-08, + "loss": 3.7643, + "step": 3692500 + }, + { + "epoch": 41.01, + "learning_rate": 2.246907136352531e-08, + "loss": 3.7455, + "step": 3693000 + }, + { + "epoch": 41.02, + "learning_rate": 2.2455189569775448e-08, + "loss": 3.7449, + "step": 3693500 + }, + { + "epoch": 41.02, + "learning_rate": 2.2441307776025587e-08, + "loss": 3.7448, + "step": 3694000 + }, + { + "epoch": 41.03, + "learning_rate": 2.2427425982275726e-08, + "loss": 3.7556, + "step": 3694500 + }, + { + "epoch": 41.03, + "learning_rate": 2.2413544188525862e-08, + "loss": 3.7418, + "step": 3695000 + }, + { + "epoch": 41.04, + "learning_rate": 2.2399662394776004e-08, + "loss": 3.7766, + "step": 3695500 + }, + { + "epoch": 41.05, + "learning_rate": 2.238578060102614e-08, + "loss": 3.7308, + "step": 3696000 + }, + { + "epoch": 41.05, + "learning_rate": 2.237189880727628e-08, + "loss": 3.7448, + "step": 3696500 + }, + { + "epoch": 41.06, + "learning_rate": 2.235801701352642e-08, + "loss": 3.7586, + "step": 3697000 + }, + { + "epoch": 41.06, + "learning_rate": 2.2344135219776557e-08, + "loss": 3.7438, + "step": 3697500 + }, + { + "epoch": 41.07, + "learning_rate": 2.2330253426026695e-08, + "loss": 3.7499, + "step": 3698000 + }, + { + "epoch": 41.07, + "learning_rate": 2.2316371632276834e-08, + "loss": 3.7575, + "step": 3698500 + }, + { + "epoch": 41.08, + "learning_rate": 2.2302489838526973e-08, + "loss": 3.7529, + "step": 3699000 + }, + { + "epoch": 41.08, + "learning_rate": 2.2288608044777112e-08, + "loss": 3.7491, + "step": 3699500 + }, + { + "epoch": 41.09, + "learning_rate": 2.227472625102725e-08, + "loss": 3.7335, + "step": 3700000 + }, + { + "epoch": 41.1, + "learning_rate": 2.226084445727739e-08, + "loss": 3.7453, + "step": 3700500 + }, + { + "epoch": 41.1, + "learning_rate": 2.224696266352753e-08, + "loss": 3.7685, + "step": 3701000 + }, + { + "epoch": 41.11, + "learning_rate": 2.2233080869777668e-08, + "loss": 3.7471, + "step": 3701500 + }, + { + "epoch": 41.11, + "learning_rate": 2.2219199076027807e-08, + "loss": 3.7441, + "step": 3702000 + }, + { + "epoch": 41.12, + "learning_rate": 2.2205317282277946e-08, + "loss": 3.7526, + "step": 3702500 + }, + { + "epoch": 41.12, + "learning_rate": 2.2191435488528085e-08, + "loss": 3.7518, + "step": 3703000 + }, + { + "epoch": 41.13, + "learning_rate": 2.2177553694778224e-08, + "loss": 3.7561, + "step": 3703500 + }, + { + "epoch": 41.13, + "learning_rate": 2.2163671901028363e-08, + "loss": 3.7603, + "step": 3704000 + }, + { + "epoch": 41.14, + "learning_rate": 2.2149790107278502e-08, + "loss": 3.7537, + "step": 3704500 + }, + { + "epoch": 41.15, + "learning_rate": 2.213590831352864e-08, + "loss": 3.7676, + "step": 3705000 + }, + { + "epoch": 41.15, + "learning_rate": 2.2122026519778777e-08, + "loss": 3.7581, + "step": 3705500 + }, + { + "epoch": 41.16, + "learning_rate": 2.210814472602892e-08, + "loss": 3.734, + "step": 3706000 + }, + { + "epoch": 41.16, + "learning_rate": 2.2094262932279058e-08, + "loss": 3.767, + "step": 3706500 + }, + { + "epoch": 41.17, + "learning_rate": 2.2080381138529193e-08, + "loss": 3.7578, + "step": 3707000 + }, + { + "epoch": 41.17, + "learning_rate": 2.2066499344779336e-08, + "loss": 3.74, + "step": 3707500 + }, + { + "epoch": 41.18, + "learning_rate": 2.2052617551029475e-08, + "loss": 3.7551, + "step": 3708000 + }, + { + "epoch": 41.18, + "learning_rate": 2.203873575727961e-08, + "loss": 3.7496, + "step": 3708500 + }, + { + "epoch": 41.19, + "learning_rate": 2.202485396352975e-08, + "loss": 3.7714, + "step": 3709000 + }, + { + "epoch": 41.2, + "learning_rate": 2.201097216977989e-08, + "loss": 3.7273, + "step": 3709500 + }, + { + "epoch": 41.2, + "learning_rate": 2.1997090376030027e-08, + "loss": 3.7518, + "step": 3710000 + }, + { + "epoch": 41.21, + "learning_rate": 2.1983208582280166e-08, + "loss": 3.7663, + "step": 3710500 + }, + { + "epoch": 41.21, + "learning_rate": 2.1969326788530308e-08, + "loss": 3.7453, + "step": 3711000 + }, + { + "epoch": 41.22, + "learning_rate": 2.1955444994780444e-08, + "loss": 3.7401, + "step": 3711500 + }, + { + "epoch": 41.22, + "learning_rate": 2.1941563201030583e-08, + "loss": 3.7375, + "step": 3712000 + }, + { + "epoch": 41.23, + "learning_rate": 2.1927681407280722e-08, + "loss": 3.7493, + "step": 3712500 + }, + { + "epoch": 41.23, + "learning_rate": 2.191379961353086e-08, + "loss": 3.7453, + "step": 3713000 + }, + { + "epoch": 41.24, + "learning_rate": 2.1899917819781e-08, + "loss": 3.756, + "step": 3713500 + }, + { + "epoch": 41.25, + "learning_rate": 2.188603602603114e-08, + "loss": 3.7488, + "step": 3714000 + }, + { + "epoch": 41.25, + "learning_rate": 2.1872154232281278e-08, + "loss": 3.7675, + "step": 3714500 + }, + { + "epoch": 41.26, + "learning_rate": 2.1858272438531417e-08, + "loss": 3.7388, + "step": 3715000 + }, + { + "epoch": 41.26, + "learning_rate": 2.1844390644781556e-08, + "loss": 3.733, + "step": 3715500 + }, + { + "epoch": 41.27, + "learning_rate": 2.183050885103169e-08, + "loss": 3.7553, + "step": 3716000 + }, + { + "epoch": 41.27, + "learning_rate": 2.1816627057281834e-08, + "loss": 3.7665, + "step": 3716500 + }, + { + "epoch": 41.28, + "learning_rate": 2.1802745263531972e-08, + "loss": 3.7475, + "step": 3717000 + }, + { + "epoch": 41.28, + "learning_rate": 2.1788863469782108e-08, + "loss": 3.7631, + "step": 3717500 + }, + { + "epoch": 41.29, + "learning_rate": 2.177498167603225e-08, + "loss": 3.7534, + "step": 3718000 + }, + { + "epoch": 41.3, + "learning_rate": 2.176109988228239e-08, + "loss": 3.7793, + "step": 3718500 + }, + { + "epoch": 41.3, + "learning_rate": 2.1747218088532525e-08, + "loss": 3.7399, + "step": 3719000 + }, + { + "epoch": 41.31, + "learning_rate": 2.1733336294782664e-08, + "loss": 3.7517, + "step": 3719500 + }, + { + "epoch": 41.31, + "learning_rate": 2.1719454501032806e-08, + "loss": 3.7603, + "step": 3720000 + }, + { + "epoch": 41.32, + "learning_rate": 2.1705572707282942e-08, + "loss": 3.7614, + "step": 3720500 + }, + { + "epoch": 41.32, + "learning_rate": 2.169169091353308e-08, + "loss": 3.7555, + "step": 3721000 + }, + { + "epoch": 41.33, + "learning_rate": 2.1677809119783223e-08, + "loss": 3.7324, + "step": 3721500 + }, + { + "epoch": 41.33, + "learning_rate": 2.166392732603336e-08, + "loss": 3.7434, + "step": 3722000 + }, + { + "epoch": 41.34, + "learning_rate": 2.1650045532283498e-08, + "loss": 3.744, + "step": 3722500 + }, + { + "epoch": 41.35, + "learning_rate": 2.163616373853364e-08, + "loss": 3.7342, + "step": 3723000 + }, + { + "epoch": 41.35, + "learning_rate": 2.1622281944783776e-08, + "loss": 3.7592, + "step": 3723500 + }, + { + "epoch": 41.36, + "learning_rate": 2.1608400151033915e-08, + "loss": 3.7443, + "step": 3724000 + }, + { + "epoch": 41.36, + "learning_rate": 2.1594518357284054e-08, + "loss": 3.7478, + "step": 3724500 + }, + { + "epoch": 41.37, + "learning_rate": 2.1580636563534192e-08, + "loss": 3.7618, + "step": 3725000 + }, + { + "epoch": 41.37, + "learning_rate": 2.156675476978433e-08, + "loss": 3.7597, + "step": 3725500 + }, + { + "epoch": 41.38, + "learning_rate": 2.155287297603447e-08, + "loss": 3.759, + "step": 3726000 + }, + { + "epoch": 41.38, + "learning_rate": 2.153899118228461e-08, + "loss": 3.7281, + "step": 3726500 + }, + { + "epoch": 41.39, + "learning_rate": 2.1525109388534748e-08, + "loss": 3.738, + "step": 3727000 + }, + { + "epoch": 41.4, + "learning_rate": 2.1511227594784887e-08, + "loss": 3.7371, + "step": 3727500 + }, + { + "epoch": 41.4, + "learning_rate": 2.1497345801035023e-08, + "loss": 3.7489, + "step": 3728000 + }, + { + "epoch": 41.41, + "learning_rate": 2.1483464007285165e-08, + "loss": 3.756, + "step": 3728500 + }, + { + "epoch": 41.41, + "learning_rate": 2.1469582213535304e-08, + "loss": 3.7448, + "step": 3729000 + }, + { + "epoch": 41.42, + "learning_rate": 2.145570041978544e-08, + "loss": 3.7548, + "step": 3729500 + }, + { + "epoch": 41.42, + "learning_rate": 2.1441818626035582e-08, + "loss": 3.7433, + "step": 3730000 + }, + { + "epoch": 41.43, + "learning_rate": 2.142793683228572e-08, + "loss": 3.7573, + "step": 3730500 + }, + { + "epoch": 41.43, + "learning_rate": 2.1414055038535857e-08, + "loss": 3.758, + "step": 3731000 + }, + { + "epoch": 41.44, + "learning_rate": 2.1400173244785996e-08, + "loss": 3.7446, + "step": 3731500 + }, + { + "epoch": 41.45, + "learning_rate": 2.1386291451036138e-08, + "loss": 3.7514, + "step": 3732000 + }, + { + "epoch": 41.45, + "learning_rate": 2.1372409657286273e-08, + "loss": 3.7488, + "step": 3732500 + }, + { + "epoch": 41.46, + "learning_rate": 2.1358527863536412e-08, + "loss": 3.7364, + "step": 3733000 + }, + { + "epoch": 41.46, + "learning_rate": 2.1344646069786555e-08, + "loss": 3.7523, + "step": 3733500 + }, + { + "epoch": 41.47, + "learning_rate": 2.133076427603669e-08, + "loss": 3.7517, + "step": 3734000 + }, + { + "epoch": 41.47, + "learning_rate": 2.131688248228683e-08, + "loss": 3.7579, + "step": 3734500 + }, + { + "epoch": 41.48, + "learning_rate": 2.1303000688536968e-08, + "loss": 3.7446, + "step": 3735000 + }, + { + "epoch": 41.48, + "learning_rate": 2.1289118894787107e-08, + "loss": 3.7363, + "step": 3735500 + }, + { + "epoch": 41.49, + "learning_rate": 2.1275237101037246e-08, + "loss": 3.752, + "step": 3736000 + }, + { + "epoch": 41.5, + "learning_rate": 2.1261355307287385e-08, + "loss": 3.7526, + "step": 3736500 + }, + { + "epoch": 41.5, + "learning_rate": 2.1247473513537524e-08, + "loss": 3.7606, + "step": 3737000 + }, + { + "epoch": 41.51, + "learning_rate": 2.1233591719787663e-08, + "loss": 3.7543, + "step": 3737500 + }, + { + "epoch": 41.51, + "learning_rate": 2.1219709926037802e-08, + "loss": 3.7508, + "step": 3738000 + }, + { + "epoch": 41.52, + "learning_rate": 2.120582813228794e-08, + "loss": 3.7646, + "step": 3738500 + }, + { + "epoch": 41.52, + "learning_rate": 2.119194633853808e-08, + "loss": 3.7461, + "step": 3739000 + }, + { + "epoch": 41.53, + "learning_rate": 2.117806454478822e-08, + "loss": 3.7575, + "step": 3739500 + }, + { + "epoch": 41.53, + "learning_rate": 2.1164182751038358e-08, + "loss": 3.7605, + "step": 3740000 + }, + { + "epoch": 41.54, + "learning_rate": 2.1150300957288497e-08, + "loss": 3.7633, + "step": 3740500 + }, + { + "epoch": 41.55, + "learning_rate": 2.1136419163538636e-08, + "loss": 3.7253, + "step": 3741000 + }, + { + "epoch": 41.55, + "learning_rate": 2.1122537369788775e-08, + "loss": 3.7594, + "step": 3741500 + }, + { + "epoch": 41.56, + "learning_rate": 2.110865557603891e-08, + "loss": 3.7596, + "step": 3742000 + }, + { + "epoch": 41.56, + "learning_rate": 2.1094773782289053e-08, + "loss": 3.7436, + "step": 3742500 + }, + { + "epoch": 41.57, + "learning_rate": 2.108089198853919e-08, + "loss": 3.7416, + "step": 3743000 + }, + { + "epoch": 41.57, + "learning_rate": 2.1067010194789327e-08, + "loss": 3.7358, + "step": 3743500 + }, + { + "epoch": 41.58, + "learning_rate": 2.105312840103947e-08, + "loss": 3.7479, + "step": 3744000 + }, + { + "epoch": 41.58, + "learning_rate": 2.103924660728961e-08, + "loss": 3.7513, + "step": 3744500 + }, + { + "epoch": 41.59, + "learning_rate": 2.1025364813539744e-08, + "loss": 3.7635, + "step": 3745000 + }, + { + "epoch": 41.6, + "learning_rate": 2.1011483019789883e-08, + "loss": 3.7684, + "step": 3745500 + }, + { + "epoch": 41.6, + "learning_rate": 2.0997601226040025e-08, + "loss": 3.7582, + "step": 3746000 + }, + { + "epoch": 41.61, + "learning_rate": 2.098371943229016e-08, + "loss": 3.7426, + "step": 3746500 + }, + { + "epoch": 41.61, + "learning_rate": 2.09698376385403e-08, + "loss": 3.752, + "step": 3747000 + }, + { + "epoch": 41.62, + "learning_rate": 2.0955955844790442e-08, + "loss": 3.7571, + "step": 3747500 + }, + { + "epoch": 41.62, + "learning_rate": 2.0942074051040578e-08, + "loss": 3.7346, + "step": 3748000 + }, + { + "epoch": 41.63, + "learning_rate": 2.0928192257290717e-08, + "loss": 3.7188, + "step": 3748500 + }, + { + "epoch": 41.63, + "learning_rate": 2.0914310463540856e-08, + "loss": 3.761, + "step": 3749000 + }, + { + "epoch": 41.64, + "learning_rate": 2.0900428669790995e-08, + "loss": 3.7587, + "step": 3749500 + }, + { + "epoch": 41.65, + "learning_rate": 2.0886546876041134e-08, + "loss": 3.7376, + "step": 3750000 + }, + { + "epoch": 41.65, + "learning_rate": 2.0872665082291273e-08, + "loss": 3.7643, + "step": 3750500 + }, + { + "epoch": 41.66, + "learning_rate": 2.085878328854141e-08, + "loss": 3.7413, + "step": 3751000 + }, + { + "epoch": 41.66, + "learning_rate": 2.084490149479155e-08, + "loss": 3.7477, + "step": 3751500 + }, + { + "epoch": 41.67, + "learning_rate": 2.083101970104169e-08, + "loss": 3.7441, + "step": 3752000 + }, + { + "epoch": 41.67, + "learning_rate": 2.081713790729183e-08, + "loss": 3.7458, + "step": 3752500 + }, + { + "epoch": 41.68, + "learning_rate": 2.0803256113541967e-08, + "loss": 3.748, + "step": 3753000 + }, + { + "epoch": 41.68, + "learning_rate": 2.0789374319792106e-08, + "loss": 3.7536, + "step": 3753500 + }, + { + "epoch": 41.69, + "learning_rate": 2.0775492526042242e-08, + "loss": 3.7538, + "step": 3754000 + }, + { + "epoch": 41.7, + "learning_rate": 2.0761610732292384e-08, + "loss": 3.7593, + "step": 3754500 + }, + { + "epoch": 41.7, + "learning_rate": 2.0747728938542523e-08, + "loss": 3.7641, + "step": 3755000 + }, + { + "epoch": 41.71, + "learning_rate": 2.073384714479266e-08, + "loss": 3.752, + "step": 3755500 + }, + { + "epoch": 41.71, + "learning_rate": 2.07199653510428e-08, + "loss": 3.7499, + "step": 3756000 + }, + { + "epoch": 41.72, + "learning_rate": 2.070608355729294e-08, + "loss": 3.7624, + "step": 3756500 + }, + { + "epoch": 41.72, + "learning_rate": 2.0692201763543076e-08, + "loss": 3.7275, + "step": 3757000 + }, + { + "epoch": 41.73, + "learning_rate": 2.0678319969793215e-08, + "loss": 3.7608, + "step": 3757500 + }, + { + "epoch": 41.73, + "learning_rate": 2.0664438176043357e-08, + "loss": 3.7511, + "step": 3758000 + }, + { + "epoch": 41.74, + "learning_rate": 2.0650556382293493e-08, + "loss": 3.7449, + "step": 3758500 + }, + { + "epoch": 41.75, + "learning_rate": 2.063667458854363e-08, + "loss": 3.7463, + "step": 3759000 + }, + { + "epoch": 41.75, + "learning_rate": 2.0622792794793774e-08, + "loss": 3.768, + "step": 3759500 + }, + { + "epoch": 41.76, + "learning_rate": 2.060891100104391e-08, + "loss": 3.752, + "step": 3760000 + }, + { + "epoch": 41.76, + "learning_rate": 2.059502920729405e-08, + "loss": 3.7528, + "step": 3760500 + }, + { + "epoch": 41.77, + "learning_rate": 2.0581147413544187e-08, + "loss": 3.7624, + "step": 3761000 + }, + { + "epoch": 41.77, + "learning_rate": 2.0567265619794326e-08, + "loss": 3.7313, + "step": 3761500 + }, + { + "epoch": 41.78, + "learning_rate": 2.0553383826044465e-08, + "loss": 3.7451, + "step": 3762000 + }, + { + "epoch": 41.78, + "learning_rate": 2.0539502032294604e-08, + "loss": 3.7545, + "step": 3762500 + }, + { + "epoch": 41.79, + "learning_rate": 2.0525620238544743e-08, + "loss": 3.7478, + "step": 3763000 + }, + { + "epoch": 41.8, + "learning_rate": 2.0511738444794882e-08, + "loss": 3.7376, + "step": 3763500 + }, + { + "epoch": 41.8, + "learning_rate": 2.049785665104502e-08, + "loss": 3.7487, + "step": 3764000 + }, + { + "epoch": 41.81, + "learning_rate": 2.0483974857295157e-08, + "loss": 3.7407, + "step": 3764500 + }, + { + "epoch": 41.81, + "learning_rate": 2.04700930635453e-08, + "loss": 3.758, + "step": 3765000 + }, + { + "epoch": 41.82, + "learning_rate": 2.0456211269795438e-08, + "loss": 3.7576, + "step": 3765500 + }, + { + "epoch": 41.82, + "learning_rate": 2.0442329476045574e-08, + "loss": 3.7412, + "step": 3766000 + }, + { + "epoch": 41.83, + "learning_rate": 2.0428447682295716e-08, + "loss": 3.7597, + "step": 3766500 + }, + { + "epoch": 41.83, + "learning_rate": 2.0414565888545855e-08, + "loss": 3.7439, + "step": 3767000 + }, + { + "epoch": 41.84, + "learning_rate": 2.040068409479599e-08, + "loss": 3.7394, + "step": 3767500 + }, + { + "epoch": 41.85, + "learning_rate": 2.038680230104613e-08, + "loss": 3.7137, + "step": 3768000 + }, + { + "epoch": 41.85, + "learning_rate": 2.037292050729627e-08, + "loss": 3.7631, + "step": 3768500 + }, + { + "epoch": 41.86, + "learning_rate": 2.0359038713546407e-08, + "loss": 3.7368, + "step": 3769000 + }, + { + "epoch": 41.86, + "learning_rate": 2.0345156919796546e-08, + "loss": 3.7518, + "step": 3769500 + }, + { + "epoch": 41.87, + "learning_rate": 2.033127512604669e-08, + "loss": 3.7503, + "step": 3770000 + }, + { + "epoch": 41.87, + "learning_rate": 2.0317393332296824e-08, + "loss": 3.7532, + "step": 3770500 + }, + { + "epoch": 41.88, + "learning_rate": 2.0303511538546963e-08, + "loss": 3.7434, + "step": 3771000 + }, + { + "epoch": 41.88, + "learning_rate": 2.0289629744797102e-08, + "loss": 3.7422, + "step": 3771500 + }, + { + "epoch": 41.89, + "learning_rate": 2.027574795104724e-08, + "loss": 3.7478, + "step": 3772000 + }, + { + "epoch": 41.9, + "learning_rate": 2.026186615729738e-08, + "loss": 3.7551, + "step": 3772500 + }, + { + "epoch": 41.9, + "learning_rate": 2.024798436354752e-08, + "loss": 3.7531, + "step": 3773000 + }, + { + "epoch": 41.91, + "learning_rate": 2.0234102569797658e-08, + "loss": 3.7409, + "step": 3773500 + }, + { + "epoch": 41.91, + "learning_rate": 2.0220220776047797e-08, + "loss": 3.757, + "step": 3774000 + }, + { + "epoch": 41.92, + "learning_rate": 2.0206338982297936e-08, + "loss": 3.7367, + "step": 3774500 + }, + { + "epoch": 41.92, + "learning_rate": 2.0192457188548075e-08, + "loss": 3.7459, + "step": 3775000 + }, + { + "epoch": 41.93, + "learning_rate": 2.0178575394798214e-08, + "loss": 3.7514, + "step": 3775500 + }, + { + "epoch": 41.93, + "learning_rate": 2.0164693601048353e-08, + "loss": 3.7525, + "step": 3776000 + }, + { + "epoch": 41.94, + "learning_rate": 2.015081180729849e-08, + "loss": 3.7296, + "step": 3776500 + }, + { + "epoch": 41.95, + "learning_rate": 2.013693001354863e-08, + "loss": 3.7335, + "step": 3777000 + }, + { + "epoch": 41.95, + "learning_rate": 2.012304821979877e-08, + "loss": 3.7467, + "step": 3777500 + }, + { + "epoch": 41.96, + "learning_rate": 2.010916642604891e-08, + "loss": 3.7427, + "step": 3778000 + }, + { + "epoch": 41.96, + "learning_rate": 2.0095284632299044e-08, + "loss": 3.7496, + "step": 3778500 + }, + { + "epoch": 41.97, + "learning_rate": 2.0081402838549186e-08, + "loss": 3.7426, + "step": 3779000 + }, + { + "epoch": 41.97, + "learning_rate": 2.0067521044799325e-08, + "loss": 3.7636, + "step": 3779500 + }, + { + "epoch": 41.98, + "learning_rate": 2.005363925104946e-08, + "loss": 3.7458, + "step": 3780000 + }, + { + "epoch": 41.98, + "learning_rate": 2.0039757457299603e-08, + "loss": 3.7414, + "step": 3780500 + }, + { + "epoch": 41.99, + "learning_rate": 2.002587566354974e-08, + "loss": 3.7337, + "step": 3781000 + }, + { + "epoch": 42.0, + "learning_rate": 2.0011993869799878e-08, + "loss": 3.7582, + "step": 3781500 + }, + { + "epoch": 42.0, + "eval_loss": 3.825657844543457, + "eval_runtime": 6.298, + "eval_samples_per_second": 246.746, + "step": 3781932 + }, + { + "epoch": 42.0, + "learning_rate": 1.9998112076050017e-08, + "loss": 3.7414, + "step": 3782000 + }, + { + "epoch": 42.01, + "learning_rate": 1.9984230282300156e-08, + "loss": 3.757, + "step": 3782500 + }, + { + "epoch": 42.01, + "learning_rate": 1.9970348488550295e-08, + "loss": 3.7462, + "step": 3783000 + }, + { + "epoch": 42.02, + "learning_rate": 1.9956466694800434e-08, + "loss": 3.7467, + "step": 3783500 + }, + { + "epoch": 42.02, + "learning_rate": 1.9942584901050573e-08, + "loss": 3.7842, + "step": 3784000 + }, + { + "epoch": 42.03, + "learning_rate": 1.992870310730071e-08, + "loss": 3.7375, + "step": 3784500 + }, + { + "epoch": 42.03, + "learning_rate": 1.991482131355085e-08, + "loss": 3.7367, + "step": 3785000 + }, + { + "epoch": 42.04, + "learning_rate": 1.990093951980099e-08, + "loss": 3.7641, + "step": 3785500 + }, + { + "epoch": 42.05, + "learning_rate": 1.988705772605113e-08, + "loss": 3.7621, + "step": 3786000 + }, + { + "epoch": 42.05, + "learning_rate": 1.9873175932301267e-08, + "loss": 3.7412, + "step": 3786500 + }, + { + "epoch": 42.06, + "learning_rate": 1.9859294138551406e-08, + "loss": 3.7382, + "step": 3787000 + }, + { + "epoch": 42.06, + "learning_rate": 1.9845412344801545e-08, + "loss": 3.7589, + "step": 3787500 + }, + { + "epoch": 42.07, + "learning_rate": 1.9831530551051684e-08, + "loss": 3.7544, + "step": 3788000 + }, + { + "epoch": 42.07, + "learning_rate": 1.9817648757301823e-08, + "loss": 3.7463, + "step": 3788500 + }, + { + "epoch": 42.08, + "learning_rate": 1.9803766963551962e-08, + "loss": 3.7774, + "step": 3789000 + }, + { + "epoch": 42.08, + "learning_rate": 1.97898851698021e-08, + "loss": 3.7423, + "step": 3789500 + }, + { + "epoch": 42.09, + "learning_rate": 1.977600337605224e-08, + "loss": 3.7436, + "step": 3790000 + }, + { + "epoch": 42.1, + "learning_rate": 1.9762121582302376e-08, + "loss": 3.7413, + "step": 3790500 + }, + { + "epoch": 42.1, + "learning_rate": 1.9748239788552518e-08, + "loss": 3.7564, + "step": 3791000 + }, + { + "epoch": 42.11, + "learning_rate": 1.9734357994802657e-08, + "loss": 3.7535, + "step": 3791500 + }, + { + "epoch": 42.11, + "learning_rate": 1.9720476201052793e-08, + "loss": 3.737, + "step": 3792000 + }, + { + "epoch": 42.12, + "learning_rate": 1.9706594407302935e-08, + "loss": 3.7388, + "step": 3792500 + }, + { + "epoch": 42.12, + "learning_rate": 1.9692712613553074e-08, + "loss": 3.7215, + "step": 3793000 + }, + { + "epoch": 42.13, + "learning_rate": 1.967883081980321e-08, + "loss": 3.7415, + "step": 3793500 + }, + { + "epoch": 42.13, + "learning_rate": 1.966494902605335e-08, + "loss": 3.7418, + "step": 3794000 + }, + { + "epoch": 42.14, + "learning_rate": 1.965106723230349e-08, + "loss": 3.7559, + "step": 3794500 + }, + { + "epoch": 42.15, + "learning_rate": 1.9637185438553626e-08, + "loss": 3.7462, + "step": 3795000 + }, + { + "epoch": 42.15, + "learning_rate": 1.9623303644803765e-08, + "loss": 3.754, + "step": 3795500 + }, + { + "epoch": 42.16, + "learning_rate": 1.9609421851053908e-08, + "loss": 3.7488, + "step": 3796000 + }, + { + "epoch": 42.16, + "learning_rate": 1.9595540057304043e-08, + "loss": 3.7534, + "step": 3796500 + }, + { + "epoch": 42.17, + "learning_rate": 1.9581658263554182e-08, + "loss": 3.7392, + "step": 3797000 + }, + { + "epoch": 42.17, + "learning_rate": 1.956777646980432e-08, + "loss": 3.744, + "step": 3797500 + }, + { + "epoch": 42.18, + "learning_rate": 1.955389467605446e-08, + "loss": 3.7621, + "step": 3798000 + }, + { + "epoch": 42.18, + "learning_rate": 1.95400128823046e-08, + "loss": 3.7517, + "step": 3798500 + }, + { + "epoch": 42.19, + "learning_rate": 1.9526131088554738e-08, + "loss": 3.7521, + "step": 3799000 + }, + { + "epoch": 42.2, + "learning_rate": 1.9512249294804877e-08, + "loss": 3.7508, + "step": 3799500 + }, + { + "epoch": 42.2, + "learning_rate": 1.9498367501055016e-08, + "loss": 3.7584, + "step": 3800000 + }, + { + "epoch": 42.21, + "learning_rate": 1.9484485707305155e-08, + "loss": 3.7598, + "step": 3800500 + }, + { + "epoch": 42.21, + "learning_rate": 1.947060391355529e-08, + "loss": 3.7573, + "step": 3801000 + }, + { + "epoch": 42.22, + "learning_rate": 1.9456722119805433e-08, + "loss": 3.7455, + "step": 3801500 + }, + { + "epoch": 42.22, + "learning_rate": 1.9442840326055572e-08, + "loss": 3.7331, + "step": 3802000 + }, + { + "epoch": 42.23, + "learning_rate": 1.9428958532305707e-08, + "loss": 3.7581, + "step": 3802500 + }, + { + "epoch": 42.23, + "learning_rate": 1.941507673855585e-08, + "loss": 3.7487, + "step": 3803000 + }, + { + "epoch": 42.24, + "learning_rate": 1.940119494480599e-08, + "loss": 3.7387, + "step": 3803500 + }, + { + "epoch": 42.25, + "learning_rate": 1.9387313151056124e-08, + "loss": 3.7291, + "step": 3804000 + }, + { + "epoch": 42.25, + "learning_rate": 1.9373431357306263e-08, + "loss": 3.7514, + "step": 3804500 + }, + { + "epoch": 42.26, + "learning_rate": 1.9359549563556405e-08, + "loss": 3.7562, + "step": 3805000 + }, + { + "epoch": 42.26, + "learning_rate": 1.934566776980654e-08, + "loss": 3.752, + "step": 3805500 + }, + { + "epoch": 42.27, + "learning_rate": 1.933178597605668e-08, + "loss": 3.7583, + "step": 3806000 + }, + { + "epoch": 42.27, + "learning_rate": 1.9317904182306822e-08, + "loss": 3.7646, + "step": 3806500 + }, + { + "epoch": 42.28, + "learning_rate": 1.9304022388556958e-08, + "loss": 3.7467, + "step": 3807000 + }, + { + "epoch": 42.28, + "learning_rate": 1.9290140594807097e-08, + "loss": 3.7651, + "step": 3807500 + }, + { + "epoch": 42.29, + "learning_rate": 1.9276258801057236e-08, + "loss": 3.7563, + "step": 3808000 + }, + { + "epoch": 42.3, + "learning_rate": 1.9262377007307375e-08, + "loss": 3.7503, + "step": 3808500 + }, + { + "epoch": 42.3, + "learning_rate": 1.9248495213557514e-08, + "loss": 3.7393, + "step": 3809000 + }, + { + "epoch": 42.31, + "learning_rate": 1.9234613419807653e-08, + "loss": 3.74, + "step": 3809500 + }, + { + "epoch": 42.31, + "learning_rate": 1.9220731626057792e-08, + "loss": 3.73, + "step": 3810000 + }, + { + "epoch": 42.32, + "learning_rate": 1.920684983230793e-08, + "loss": 3.7407, + "step": 3810500 + }, + { + "epoch": 42.32, + "learning_rate": 1.919296803855807e-08, + "loss": 3.719, + "step": 3811000 + }, + { + "epoch": 42.33, + "learning_rate": 1.9179086244808205e-08, + "loss": 3.751, + "step": 3811500 + }, + { + "epoch": 42.33, + "learning_rate": 1.9165204451058348e-08, + "loss": 3.7445, + "step": 3812000 + }, + { + "epoch": 42.34, + "learning_rate": 1.9151322657308486e-08, + "loss": 3.7381, + "step": 3812500 + }, + { + "epoch": 42.35, + "learning_rate": 1.9137440863558622e-08, + "loss": 3.7323, + "step": 3813000 + }, + { + "epoch": 42.35, + "learning_rate": 1.9123559069808764e-08, + "loss": 3.7512, + "step": 3813500 + }, + { + "epoch": 42.36, + "learning_rate": 1.9109677276058903e-08, + "loss": 3.7411, + "step": 3814000 + }, + { + "epoch": 42.36, + "learning_rate": 1.909579548230904e-08, + "loss": 3.7421, + "step": 3814500 + }, + { + "epoch": 42.37, + "learning_rate": 1.908191368855918e-08, + "loss": 3.7571, + "step": 3815000 + }, + { + "epoch": 42.37, + "learning_rate": 1.906803189480932e-08, + "loss": 3.7286, + "step": 3815500 + }, + { + "epoch": 42.38, + "learning_rate": 1.9054150101059456e-08, + "loss": 3.7598, + "step": 3816000 + }, + { + "epoch": 42.38, + "learning_rate": 1.9040268307309595e-08, + "loss": 3.7496, + "step": 3816500 + }, + { + "epoch": 42.39, + "learning_rate": 1.9026386513559737e-08, + "loss": 3.7444, + "step": 3817000 + }, + { + "epoch": 42.39, + "learning_rate": 1.9012504719809873e-08, + "loss": 3.7566, + "step": 3817500 + }, + { + "epoch": 42.4, + "learning_rate": 1.8998622926060012e-08, + "loss": 3.7505, + "step": 3818000 + }, + { + "epoch": 42.41, + "learning_rate": 1.8984741132310154e-08, + "loss": 3.7615, + "step": 3818500 + }, + { + "epoch": 42.41, + "learning_rate": 1.897085933856029e-08, + "loss": 3.7447, + "step": 3819000 + }, + { + "epoch": 42.42, + "learning_rate": 1.895697754481043e-08, + "loss": 3.7308, + "step": 3819500 + }, + { + "epoch": 42.42, + "learning_rate": 1.8943095751060568e-08, + "loss": 3.7757, + "step": 3820000 + }, + { + "epoch": 42.43, + "learning_rate": 1.8929213957310706e-08, + "loss": 3.757, + "step": 3820500 + }, + { + "epoch": 42.43, + "learning_rate": 1.8915332163560845e-08, + "loss": 3.7345, + "step": 3821000 + }, + { + "epoch": 42.44, + "learning_rate": 1.8901450369810984e-08, + "loss": 3.7321, + "step": 3821500 + }, + { + "epoch": 42.44, + "learning_rate": 1.8887568576061123e-08, + "loss": 3.7498, + "step": 3822000 + }, + { + "epoch": 42.45, + "learning_rate": 1.8873686782311262e-08, + "loss": 3.7938, + "step": 3822500 + }, + { + "epoch": 42.46, + "learning_rate": 1.88598049885614e-08, + "loss": 3.7506, + "step": 3823000 + }, + { + "epoch": 42.46, + "learning_rate": 1.884592319481154e-08, + "loss": 3.7895, + "step": 3823500 + }, + { + "epoch": 42.47, + "learning_rate": 1.883204140106168e-08, + "loss": 3.7406, + "step": 3824000 + }, + { + "epoch": 42.47, + "learning_rate": 1.8818159607311818e-08, + "loss": 3.7373, + "step": 3824500 + }, + { + "epoch": 42.48, + "learning_rate": 1.8804277813561957e-08, + "loss": 3.736, + "step": 3825000 + }, + { + "epoch": 42.48, + "learning_rate": 1.8790396019812096e-08, + "loss": 3.758, + "step": 3825500 + }, + { + "epoch": 42.49, + "learning_rate": 1.8776514226062235e-08, + "loss": 3.7689, + "step": 3826000 + }, + { + "epoch": 42.49, + "learning_rate": 1.8762632432312374e-08, + "loss": 3.7511, + "step": 3826500 + }, + { + "epoch": 42.5, + "learning_rate": 1.874875063856251e-08, + "loss": 3.7557, + "step": 3827000 + }, + { + "epoch": 42.51, + "learning_rate": 1.8734868844812652e-08, + "loss": 3.7505, + "step": 3827500 + }, + { + "epoch": 42.51, + "learning_rate": 1.872098705106279e-08, + "loss": 3.7355, + "step": 3828000 + }, + { + "epoch": 42.52, + "learning_rate": 1.8707105257312926e-08, + "loss": 3.7454, + "step": 3828500 + }, + { + "epoch": 42.52, + "learning_rate": 1.869322346356307e-08, + "loss": 3.7542, + "step": 3829000 + }, + { + "epoch": 42.53, + "learning_rate": 1.8679341669813208e-08, + "loss": 3.7695, + "step": 3829500 + }, + { + "epoch": 42.53, + "learning_rate": 1.8665459876063343e-08, + "loss": 3.7371, + "step": 3830000 + }, + { + "epoch": 42.54, + "learning_rate": 1.8651578082313482e-08, + "loss": 3.725, + "step": 3830500 + }, + { + "epoch": 42.54, + "learning_rate": 1.8637696288563625e-08, + "loss": 3.7504, + "step": 3831000 + }, + { + "epoch": 42.55, + "learning_rate": 1.862381449481376e-08, + "loss": 3.7388, + "step": 3831500 + }, + { + "epoch": 42.56, + "learning_rate": 1.86099327010639e-08, + "loss": 3.7387, + "step": 3832000 + }, + { + "epoch": 42.56, + "learning_rate": 1.859605090731404e-08, + "loss": 3.7515, + "step": 3832500 + }, + { + "epoch": 42.57, + "learning_rate": 1.8582169113564177e-08, + "loss": 3.7474, + "step": 3833000 + }, + { + "epoch": 42.57, + "learning_rate": 1.8568287319814316e-08, + "loss": 3.7727, + "step": 3833500 + }, + { + "epoch": 42.58, + "learning_rate": 1.8554405526064455e-08, + "loss": 3.7489, + "step": 3834000 + }, + { + "epoch": 42.58, + "learning_rate": 1.8540523732314594e-08, + "loss": 3.7557, + "step": 3834500 + }, + { + "epoch": 42.59, + "learning_rate": 1.8526641938564733e-08, + "loss": 3.7484, + "step": 3835000 + }, + { + "epoch": 42.59, + "learning_rate": 1.8512760144814872e-08, + "loss": 3.7547, + "step": 3835500 + }, + { + "epoch": 42.6, + "learning_rate": 1.849887835106501e-08, + "loss": 3.7546, + "step": 3836000 + }, + { + "epoch": 42.61, + "learning_rate": 1.848499655731515e-08, + "loss": 3.7361, + "step": 3836500 + }, + { + "epoch": 42.61, + "learning_rate": 1.847111476356529e-08, + "loss": 3.7453, + "step": 3837000 + }, + { + "epoch": 42.62, + "learning_rate": 1.8457232969815424e-08, + "loss": 3.7468, + "step": 3837500 + }, + { + "epoch": 42.62, + "learning_rate": 1.8443351176065567e-08, + "loss": 3.7656, + "step": 3838000 + }, + { + "epoch": 42.63, + "learning_rate": 1.8429469382315706e-08, + "loss": 3.7545, + "step": 3838500 + }, + { + "epoch": 42.63, + "learning_rate": 1.841558758856584e-08, + "loss": 3.7415, + "step": 3839000 + }, + { + "epoch": 42.64, + "learning_rate": 1.8401705794815983e-08, + "loss": 3.7599, + "step": 3839500 + }, + { + "epoch": 42.64, + "learning_rate": 1.8387824001066122e-08, + "loss": 3.754, + "step": 3840000 + }, + { + "epoch": 42.65, + "learning_rate": 1.8373942207316258e-08, + "loss": 3.7493, + "step": 3840500 + }, + { + "epoch": 42.66, + "learning_rate": 1.8360060413566397e-08, + "loss": 3.7377, + "step": 3841000 + }, + { + "epoch": 42.66, + "learning_rate": 1.834617861981654e-08, + "loss": 3.7589, + "step": 3841500 + }, + { + "epoch": 42.67, + "learning_rate": 1.8332296826066675e-08, + "loss": 3.7618, + "step": 3842000 + }, + { + "epoch": 42.67, + "learning_rate": 1.8318415032316814e-08, + "loss": 3.7473, + "step": 3842500 + }, + { + "epoch": 42.68, + "learning_rate": 1.8304533238566956e-08, + "loss": 3.7514, + "step": 3843000 + }, + { + "epoch": 42.68, + "learning_rate": 1.8290651444817092e-08, + "loss": 3.7404, + "step": 3843500 + }, + { + "epoch": 42.69, + "learning_rate": 1.827676965106723e-08, + "loss": 3.743, + "step": 3844000 + }, + { + "epoch": 42.69, + "learning_rate": 1.826288785731737e-08, + "loss": 3.7384, + "step": 3844500 + }, + { + "epoch": 42.7, + "learning_rate": 1.824900606356751e-08, + "loss": 3.7588, + "step": 3845000 + }, + { + "epoch": 42.71, + "learning_rate": 1.8235124269817648e-08, + "loss": 3.7412, + "step": 3845500 + }, + { + "epoch": 42.71, + "learning_rate": 1.8221242476067787e-08, + "loss": 3.7569, + "step": 3846000 + }, + { + "epoch": 42.72, + "learning_rate": 1.8207360682317926e-08, + "loss": 3.743, + "step": 3846500 + }, + { + "epoch": 42.72, + "learning_rate": 1.8193478888568064e-08, + "loss": 3.7522, + "step": 3847000 + }, + { + "epoch": 42.73, + "learning_rate": 1.8179597094818203e-08, + "loss": 3.7464, + "step": 3847500 + }, + { + "epoch": 42.73, + "learning_rate": 1.8165715301068342e-08, + "loss": 3.7456, + "step": 3848000 + }, + { + "epoch": 42.74, + "learning_rate": 1.815183350731848e-08, + "loss": 3.7551, + "step": 3848500 + }, + { + "epoch": 42.74, + "learning_rate": 1.813795171356862e-08, + "loss": 3.7489, + "step": 3849000 + }, + { + "epoch": 42.75, + "learning_rate": 1.8124069919818756e-08, + "loss": 3.7553, + "step": 3849500 + }, + { + "epoch": 42.76, + "learning_rate": 1.8110188126068898e-08, + "loss": 3.7448, + "step": 3850000 + }, + { + "epoch": 42.76, + "learning_rate": 1.8096306332319037e-08, + "loss": 3.7474, + "step": 3850500 + }, + { + "epoch": 42.77, + "learning_rate": 1.8082424538569173e-08, + "loss": 3.759, + "step": 3851000 + }, + { + "epoch": 42.77, + "learning_rate": 1.8068542744819315e-08, + "loss": 3.723, + "step": 3851500 + }, + { + "epoch": 42.78, + "learning_rate": 1.8054660951069454e-08, + "loss": 3.7386, + "step": 3852000 + }, + { + "epoch": 42.78, + "learning_rate": 1.804077915731959e-08, + "loss": 3.7521, + "step": 3852500 + }, + { + "epoch": 42.79, + "learning_rate": 1.802689736356973e-08, + "loss": 3.7472, + "step": 3853000 + }, + { + "epoch": 42.79, + "learning_rate": 1.801301556981987e-08, + "loss": 3.7389, + "step": 3853500 + }, + { + "epoch": 42.8, + "learning_rate": 1.7999133776070007e-08, + "loss": 3.736, + "step": 3854000 + }, + { + "epoch": 42.81, + "learning_rate": 1.7985251982320145e-08, + "loss": 3.7141, + "step": 3854500 + }, + { + "epoch": 42.81, + "learning_rate": 1.7971370188570288e-08, + "loss": 3.7617, + "step": 3855000 + }, + { + "epoch": 42.82, + "learning_rate": 1.7957488394820423e-08, + "loss": 3.7578, + "step": 3855500 + }, + { + "epoch": 42.82, + "learning_rate": 1.7943606601070562e-08, + "loss": 3.7573, + "step": 3856000 + }, + { + "epoch": 42.83, + "learning_rate": 1.79297248073207e-08, + "loss": 3.7686, + "step": 3856500 + }, + { + "epoch": 42.83, + "learning_rate": 1.791584301357084e-08, + "loss": 3.7626, + "step": 3857000 + }, + { + "epoch": 42.84, + "learning_rate": 1.790196121982098e-08, + "loss": 3.7464, + "step": 3857500 + }, + { + "epoch": 42.84, + "learning_rate": 1.7888079426071118e-08, + "loss": 3.7486, + "step": 3858000 + }, + { + "epoch": 42.85, + "learning_rate": 1.7874197632321257e-08, + "loss": 3.7533, + "step": 3858500 + }, + { + "epoch": 42.86, + "learning_rate": 1.7860315838571396e-08, + "loss": 3.7628, + "step": 3859000 + }, + { + "epoch": 42.86, + "learning_rate": 1.7846434044821535e-08, + "loss": 3.7502, + "step": 3859500 + }, + { + "epoch": 42.87, + "learning_rate": 1.7832552251071674e-08, + "loss": 3.7589, + "step": 3860000 + }, + { + "epoch": 42.87, + "learning_rate": 1.7818670457321813e-08, + "loss": 3.7314, + "step": 3860500 + }, + { + "epoch": 42.88, + "learning_rate": 1.7804788663571952e-08, + "loss": 3.7613, + "step": 3861000 + }, + { + "epoch": 42.88, + "learning_rate": 1.779090686982209e-08, + "loss": 3.7562, + "step": 3861500 + }, + { + "epoch": 42.89, + "learning_rate": 1.777702507607223e-08, + "loss": 3.7591, + "step": 3862000 + }, + { + "epoch": 42.89, + "learning_rate": 1.776314328232237e-08, + "loss": 3.7408, + "step": 3862500 + }, + { + "epoch": 42.9, + "learning_rate": 1.7749261488572508e-08, + "loss": 3.7382, + "step": 3863000 + }, + { + "epoch": 42.91, + "learning_rate": 1.7735379694822643e-08, + "loss": 3.7479, + "step": 3863500 + }, + { + "epoch": 42.91, + "learning_rate": 1.7721497901072786e-08, + "loss": 3.7333, + "step": 3864000 + }, + { + "epoch": 42.92, + "learning_rate": 1.7707616107322925e-08, + "loss": 3.7654, + "step": 3864500 + }, + { + "epoch": 42.92, + "learning_rate": 1.769373431357306e-08, + "loss": 3.75, + "step": 3865000 + }, + { + "epoch": 42.93, + "learning_rate": 1.7679852519823203e-08, + "loss": 3.7363, + "step": 3865500 + }, + { + "epoch": 42.93, + "learning_rate": 1.766597072607334e-08, + "loss": 3.7382, + "step": 3866000 + }, + { + "epoch": 42.94, + "learning_rate": 1.7652088932323477e-08, + "loss": 3.743, + "step": 3866500 + }, + { + "epoch": 42.94, + "learning_rate": 1.7638207138573616e-08, + "loss": 3.7381, + "step": 3867000 + }, + { + "epoch": 42.95, + "learning_rate": 1.7624325344823755e-08, + "loss": 3.7507, + "step": 3867500 + }, + { + "epoch": 42.96, + "learning_rate": 1.7610443551073894e-08, + "loss": 3.7458, + "step": 3868000 + }, + { + "epoch": 42.96, + "learning_rate": 1.7596561757324033e-08, + "loss": 3.7623, + "step": 3868500 + }, + { + "epoch": 42.97, + "learning_rate": 1.7582679963574172e-08, + "loss": 3.7407, + "step": 3869000 + }, + { + "epoch": 42.97, + "learning_rate": 1.756879816982431e-08, + "loss": 3.7582, + "step": 3869500 + }, + { + "epoch": 42.98, + "learning_rate": 1.755491637607445e-08, + "loss": 3.7495, + "step": 3870000 + }, + { + "epoch": 42.98, + "learning_rate": 1.754103458232459e-08, + "loss": 3.7433, + "step": 3870500 + }, + { + "epoch": 42.99, + "learning_rate": 1.7527152788574728e-08, + "loss": 3.751, + "step": 3871000 + }, + { + "epoch": 42.99, + "learning_rate": 1.7513270994824867e-08, + "loss": 3.7533, + "step": 3871500 + }, + { + "epoch": 43.0, + "eval_loss": 3.8255672454833984, + "eval_runtime": 6.3042, + "eval_samples_per_second": 246.502, + "step": 3871978 + }, + { + "epoch": 43.0, + "learning_rate": 1.7499389201075006e-08, + "loss": 3.7528, + "step": 3872000 + }, + { + "epoch": 43.01, + "learning_rate": 1.7485507407325145e-08, + "loss": 3.7406, + "step": 3872500 + }, + { + "epoch": 43.01, + "learning_rate": 1.7471625613575284e-08, + "loss": 3.7506, + "step": 3873000 + }, + { + "epoch": 43.02, + "learning_rate": 1.7457743819825422e-08, + "loss": 3.7445, + "step": 3873500 + }, + { + "epoch": 43.02, + "learning_rate": 1.7443862026075558e-08, + "loss": 3.7486, + "step": 3874000 + }, + { + "epoch": 43.03, + "learning_rate": 1.74299802323257e-08, + "loss": 3.7601, + "step": 3874500 + }, + { + "epoch": 43.03, + "learning_rate": 1.741609843857584e-08, + "loss": 3.7414, + "step": 3875000 + }, + { + "epoch": 43.04, + "learning_rate": 1.7402216644825975e-08, + "loss": 3.7485, + "step": 3875500 + }, + { + "epoch": 43.04, + "learning_rate": 1.7388334851076117e-08, + "loss": 3.7608, + "step": 3876000 + }, + { + "epoch": 43.05, + "learning_rate": 1.7374453057326256e-08, + "loss": 3.7693, + "step": 3876500 + }, + { + "epoch": 43.06, + "learning_rate": 1.7360571263576392e-08, + "loss": 3.7726, + "step": 3877000 + }, + { + "epoch": 43.06, + "learning_rate": 1.7346689469826534e-08, + "loss": 3.7592, + "step": 3877500 + }, + { + "epoch": 43.07, + "learning_rate": 1.7332807676076673e-08, + "loss": 3.7737, + "step": 3878000 + }, + { + "epoch": 43.07, + "learning_rate": 1.731892588232681e-08, + "loss": 3.7501, + "step": 3878500 + }, + { + "epoch": 43.08, + "learning_rate": 1.7305044088576948e-08, + "loss": 3.7322, + "step": 3879000 + }, + { + "epoch": 43.08, + "learning_rate": 1.729116229482709e-08, + "loss": 3.7318, + "step": 3879500 + }, + { + "epoch": 43.09, + "learning_rate": 1.7277280501077226e-08, + "loss": 3.7381, + "step": 3880000 + }, + { + "epoch": 43.09, + "learning_rate": 1.7263398707327365e-08, + "loss": 3.7397, + "step": 3880500 + }, + { + "epoch": 43.1, + "learning_rate": 1.7249516913577507e-08, + "loss": 3.7475, + "step": 3881000 + }, + { + "epoch": 43.11, + "learning_rate": 1.7235635119827642e-08, + "loss": 3.7414, + "step": 3881500 + }, + { + "epoch": 43.11, + "learning_rate": 1.722175332607778e-08, + "loss": 3.7565, + "step": 3882000 + }, + { + "epoch": 43.12, + "learning_rate": 1.720787153232792e-08, + "loss": 3.7487, + "step": 3882500 + }, + { + "epoch": 43.12, + "learning_rate": 1.719398973857806e-08, + "loss": 3.7308, + "step": 3883000 + }, + { + "epoch": 43.13, + "learning_rate": 1.7180107944828198e-08, + "loss": 3.7622, + "step": 3883500 + }, + { + "epoch": 43.13, + "learning_rate": 1.7166226151078337e-08, + "loss": 3.7475, + "step": 3884000 + }, + { + "epoch": 43.14, + "learning_rate": 1.7152344357328476e-08, + "loss": 3.7635, + "step": 3884500 + }, + { + "epoch": 43.14, + "learning_rate": 1.7138462563578615e-08, + "loss": 3.7495, + "step": 3885000 + }, + { + "epoch": 43.15, + "learning_rate": 1.7124580769828754e-08, + "loss": 3.7445, + "step": 3885500 + }, + { + "epoch": 43.16, + "learning_rate": 1.711069897607889e-08, + "loss": 3.7559, + "step": 3886000 + }, + { + "epoch": 43.16, + "learning_rate": 1.7096817182329032e-08, + "loss": 3.7689, + "step": 3886500 + }, + { + "epoch": 43.17, + "learning_rate": 1.708293538857917e-08, + "loss": 3.7366, + "step": 3887000 + }, + { + "epoch": 43.17, + "learning_rate": 1.7069053594829307e-08, + "loss": 3.7438, + "step": 3887500 + }, + { + "epoch": 43.18, + "learning_rate": 1.705517180107945e-08, + "loss": 3.7305, + "step": 3888000 + }, + { + "epoch": 43.18, + "learning_rate": 1.7041290007329588e-08, + "loss": 3.7471, + "step": 3888500 + }, + { + "epoch": 43.19, + "learning_rate": 1.7027408213579723e-08, + "loss": 3.7479, + "step": 3889000 + }, + { + "epoch": 43.19, + "learning_rate": 1.7013526419829862e-08, + "loss": 3.7439, + "step": 3889500 + }, + { + "epoch": 43.2, + "learning_rate": 1.6999644626080005e-08, + "loss": 3.7548, + "step": 3890000 + }, + { + "epoch": 43.21, + "learning_rate": 1.698576283233014e-08, + "loss": 3.7437, + "step": 3890500 + }, + { + "epoch": 43.21, + "learning_rate": 1.697188103858028e-08, + "loss": 3.7404, + "step": 3891000 + }, + { + "epoch": 43.22, + "learning_rate": 1.695799924483042e-08, + "loss": 3.7559, + "step": 3891500 + }, + { + "epoch": 43.22, + "learning_rate": 1.6944117451080557e-08, + "loss": 3.7526, + "step": 3892000 + }, + { + "epoch": 43.23, + "learning_rate": 1.6930235657330696e-08, + "loss": 3.7628, + "step": 3892500 + }, + { + "epoch": 43.23, + "learning_rate": 1.6916353863580835e-08, + "loss": 3.755, + "step": 3893000 + }, + { + "epoch": 43.24, + "learning_rate": 1.6902472069830974e-08, + "loss": 3.7483, + "step": 3893500 + }, + { + "epoch": 43.24, + "learning_rate": 1.6888590276081113e-08, + "loss": 3.732, + "step": 3894000 + }, + { + "epoch": 43.25, + "learning_rate": 1.6874708482331252e-08, + "loss": 3.7459, + "step": 3894500 + }, + { + "epoch": 43.26, + "learning_rate": 1.686082668858139e-08, + "loss": 3.7528, + "step": 3895000 + }, + { + "epoch": 43.26, + "learning_rate": 1.684694489483153e-08, + "loss": 3.7382, + "step": 3895500 + }, + { + "epoch": 43.27, + "learning_rate": 1.683306310108167e-08, + "loss": 3.7536, + "step": 3896000 + }, + { + "epoch": 43.27, + "learning_rate": 1.6819181307331808e-08, + "loss": 3.7462, + "step": 3896500 + }, + { + "epoch": 43.28, + "learning_rate": 1.6805299513581947e-08, + "loss": 3.7549, + "step": 3897000 + }, + { + "epoch": 43.28, + "learning_rate": 1.6791417719832086e-08, + "loss": 3.7586, + "step": 3897500 + }, + { + "epoch": 43.29, + "learning_rate": 1.677753592608222e-08, + "loss": 3.7454, + "step": 3898000 + }, + { + "epoch": 43.29, + "learning_rate": 1.6763654132332364e-08, + "loss": 3.7264, + "step": 3898500 + }, + { + "epoch": 43.3, + "learning_rate": 1.6749772338582503e-08, + "loss": 3.7493, + "step": 3899000 + }, + { + "epoch": 43.31, + "learning_rate": 1.6735890544832638e-08, + "loss": 3.7701, + "step": 3899500 + }, + { + "epoch": 43.31, + "learning_rate": 1.6722008751082777e-08, + "loss": 3.7509, + "step": 3900000 + }, + { + "epoch": 43.32, + "learning_rate": 1.670812695733292e-08, + "loss": 3.7465, + "step": 3900500 + }, + { + "epoch": 43.32, + "learning_rate": 1.6694245163583055e-08, + "loss": 3.7447, + "step": 3901000 + }, + { + "epoch": 43.33, + "learning_rate": 1.6680363369833194e-08, + "loss": 3.7437, + "step": 3901500 + }, + { + "epoch": 43.33, + "learning_rate": 1.6666481576083336e-08, + "loss": 3.7702, + "step": 3902000 + }, + { + "epoch": 43.34, + "learning_rate": 1.6652599782333472e-08, + "loss": 3.7513, + "step": 3902500 + }, + { + "epoch": 43.34, + "learning_rate": 1.663871798858361e-08, + "loss": 3.7491, + "step": 3903000 + }, + { + "epoch": 43.35, + "learning_rate": 1.662483619483375e-08, + "loss": 3.7572, + "step": 3903500 + }, + { + "epoch": 43.36, + "learning_rate": 1.661095440108389e-08, + "loss": 3.7428, + "step": 3904000 + }, + { + "epoch": 43.36, + "learning_rate": 1.6597072607334028e-08, + "loss": 3.735, + "step": 3904500 + }, + { + "epoch": 43.37, + "learning_rate": 1.6583190813584167e-08, + "loss": 3.7267, + "step": 3905000 + }, + { + "epoch": 43.37, + "learning_rate": 1.6569309019834306e-08, + "loss": 3.7736, + "step": 3905500 + }, + { + "epoch": 43.38, + "learning_rate": 1.6555427226084445e-08, + "loss": 3.749, + "step": 3906000 + }, + { + "epoch": 43.38, + "learning_rate": 1.6541545432334584e-08, + "loss": 3.7508, + "step": 3906500 + }, + { + "epoch": 43.39, + "learning_rate": 1.6527663638584723e-08, + "loss": 3.7404, + "step": 3907000 + }, + { + "epoch": 43.39, + "learning_rate": 1.651378184483486e-08, + "loss": 3.7419, + "step": 3907500 + }, + { + "epoch": 43.4, + "learning_rate": 1.6499900051085e-08, + "loss": 3.7409, + "step": 3908000 + }, + { + "epoch": 43.41, + "learning_rate": 1.648601825733514e-08, + "loss": 3.7618, + "step": 3908500 + }, + { + "epoch": 43.41, + "learning_rate": 1.647213646358528e-08, + "loss": 3.7564, + "step": 3909000 + }, + { + "epoch": 43.42, + "learning_rate": 1.6458254669835417e-08, + "loss": 3.755, + "step": 3909500 + }, + { + "epoch": 43.42, + "learning_rate": 1.6444372876085556e-08, + "loss": 3.744, + "step": 3910000 + }, + { + "epoch": 43.43, + "learning_rate": 1.6430491082335695e-08, + "loss": 3.7483, + "step": 3910500 + }, + { + "epoch": 43.43, + "learning_rate": 1.6416609288585834e-08, + "loss": 3.7437, + "step": 3911000 + }, + { + "epoch": 43.44, + "learning_rate": 1.6402727494835973e-08, + "loss": 3.7425, + "step": 3911500 + }, + { + "epoch": 43.44, + "learning_rate": 1.638884570108611e-08, + "loss": 3.7548, + "step": 3912000 + }, + { + "epoch": 43.45, + "learning_rate": 1.637496390733625e-08, + "loss": 3.7564, + "step": 3912500 + }, + { + "epoch": 43.46, + "learning_rate": 1.636108211358639e-08, + "loss": 3.7471, + "step": 3913000 + }, + { + "epoch": 43.46, + "learning_rate": 1.6347200319836526e-08, + "loss": 3.7405, + "step": 3913500 + }, + { + "epoch": 43.47, + "learning_rate": 1.6333318526086668e-08, + "loss": 3.7463, + "step": 3914000 + }, + { + "epoch": 43.47, + "learning_rate": 1.6319436732336807e-08, + "loss": 3.7577, + "step": 3914500 + }, + { + "epoch": 43.48, + "learning_rate": 1.6305554938586943e-08, + "loss": 3.7576, + "step": 3915000 + }, + { + "epoch": 43.48, + "learning_rate": 1.629167314483708e-08, + "loss": 3.7591, + "step": 3915500 + }, + { + "epoch": 43.49, + "learning_rate": 1.6277791351087224e-08, + "loss": 3.7517, + "step": 3916000 + }, + { + "epoch": 43.49, + "learning_rate": 1.626390955733736e-08, + "loss": 3.7541, + "step": 3916500 + }, + { + "epoch": 43.5, + "learning_rate": 1.62500277635875e-08, + "loss": 3.7497, + "step": 3917000 + }, + { + "epoch": 43.51, + "learning_rate": 1.623614596983764e-08, + "loss": 3.7563, + "step": 3917500 + }, + { + "epoch": 43.51, + "learning_rate": 1.6222264176087776e-08, + "loss": 3.7538, + "step": 3918000 + }, + { + "epoch": 43.52, + "learning_rate": 1.6208382382337915e-08, + "loss": 3.75, + "step": 3918500 + }, + { + "epoch": 43.52, + "learning_rate": 1.6194500588588054e-08, + "loss": 3.7583, + "step": 3919000 + }, + { + "epoch": 43.53, + "learning_rate": 1.6180618794838193e-08, + "loss": 3.7424, + "step": 3919500 + }, + { + "epoch": 43.53, + "learning_rate": 1.6166737001088332e-08, + "loss": 3.7493, + "step": 3920000 + }, + { + "epoch": 43.54, + "learning_rate": 1.615285520733847e-08, + "loss": 3.7348, + "step": 3920500 + }, + { + "epoch": 43.54, + "learning_rate": 1.613897341358861e-08, + "loss": 3.7469, + "step": 3921000 + }, + { + "epoch": 43.55, + "learning_rate": 1.612509161983875e-08, + "loss": 3.732, + "step": 3921500 + }, + { + "epoch": 43.56, + "learning_rate": 1.6111209826088888e-08, + "loss": 3.7543, + "step": 3922000 + }, + { + "epoch": 43.56, + "learning_rate": 1.6097328032339024e-08, + "loss": 3.7402, + "step": 3922500 + }, + { + "epoch": 43.57, + "learning_rate": 1.6083446238589166e-08, + "loss": 3.7561, + "step": 3923000 + }, + { + "epoch": 43.57, + "learning_rate": 1.6069564444839305e-08, + "loss": 3.7522, + "step": 3923500 + }, + { + "epoch": 43.58, + "learning_rate": 1.605568265108944e-08, + "loss": 3.7392, + "step": 3924000 + }, + { + "epoch": 43.58, + "learning_rate": 1.6041800857339583e-08, + "loss": 3.755, + "step": 3924500 + }, + { + "epoch": 43.59, + "learning_rate": 1.6027919063589722e-08, + "loss": 3.7657, + "step": 3925000 + }, + { + "epoch": 43.59, + "learning_rate": 1.6014037269839857e-08, + "loss": 3.7469, + "step": 3925500 + }, + { + "epoch": 43.6, + "learning_rate": 1.6000155476089996e-08, + "loss": 3.7536, + "step": 3926000 + }, + { + "epoch": 43.61, + "learning_rate": 1.598627368234014e-08, + "loss": 3.7645, + "step": 3926500 + }, + { + "epoch": 43.61, + "learning_rate": 1.5972391888590274e-08, + "loss": 3.761, + "step": 3927000 + }, + { + "epoch": 43.62, + "learning_rate": 1.5958510094840413e-08, + "loss": 3.7535, + "step": 3927500 + }, + { + "epoch": 43.62, + "learning_rate": 1.5944628301090555e-08, + "loss": 3.7536, + "step": 3928000 + }, + { + "epoch": 43.63, + "learning_rate": 1.593074650734069e-08, + "loss": 3.7379, + "step": 3928500 + }, + { + "epoch": 43.63, + "learning_rate": 1.591686471359083e-08, + "loss": 3.7404, + "step": 3929000 + }, + { + "epoch": 43.64, + "learning_rate": 1.590298291984097e-08, + "loss": 3.7342, + "step": 3929500 + }, + { + "epoch": 43.64, + "learning_rate": 1.5889101126091108e-08, + "loss": 3.7661, + "step": 3930000 + }, + { + "epoch": 43.65, + "learning_rate": 1.5875219332341247e-08, + "loss": 3.7399, + "step": 3930500 + }, + { + "epoch": 43.66, + "learning_rate": 1.5861337538591386e-08, + "loss": 3.7416, + "step": 3931000 + }, + { + "epoch": 43.66, + "learning_rate": 1.5847455744841525e-08, + "loss": 3.7442, + "step": 3931500 + }, + { + "epoch": 43.67, + "learning_rate": 1.5833573951091664e-08, + "loss": 3.7418, + "step": 3932000 + }, + { + "epoch": 43.67, + "learning_rate": 1.5819692157341803e-08, + "loss": 3.7529, + "step": 3932500 + }, + { + "epoch": 43.68, + "learning_rate": 1.5805810363591938e-08, + "loss": 3.7489, + "step": 3933000 + }, + { + "epoch": 43.68, + "learning_rate": 1.579192856984208e-08, + "loss": 3.7534, + "step": 3933500 + }, + { + "epoch": 43.69, + "learning_rate": 1.577804677609222e-08, + "loss": 3.7475, + "step": 3934000 + }, + { + "epoch": 43.69, + "learning_rate": 1.5764164982342355e-08, + "loss": 3.754, + "step": 3934500 + }, + { + "epoch": 43.7, + "learning_rate": 1.5750283188592497e-08, + "loss": 3.7412, + "step": 3935000 + }, + { + "epoch": 43.71, + "learning_rate": 1.5736401394842636e-08, + "loss": 3.7617, + "step": 3935500 + }, + { + "epoch": 43.71, + "learning_rate": 1.5722519601092772e-08, + "loss": 3.7397, + "step": 3936000 + }, + { + "epoch": 43.72, + "learning_rate": 1.570863780734291e-08, + "loss": 3.7407, + "step": 3936500 + }, + { + "epoch": 43.72, + "learning_rate": 1.5694756013593053e-08, + "loss": 3.7395, + "step": 3937000 + }, + { + "epoch": 43.73, + "learning_rate": 1.568087421984319e-08, + "loss": 3.7607, + "step": 3937500 + }, + { + "epoch": 43.73, + "learning_rate": 1.5666992426093328e-08, + "loss": 3.7512, + "step": 3938000 + }, + { + "epoch": 43.74, + "learning_rate": 1.565311063234347e-08, + "loss": 3.758, + "step": 3938500 + }, + { + "epoch": 43.74, + "learning_rate": 1.5639228838593606e-08, + "loss": 3.7398, + "step": 3939000 + }, + { + "epoch": 43.75, + "learning_rate": 1.5625347044843745e-08, + "loss": 3.7476, + "step": 3939500 + }, + { + "epoch": 43.76, + "learning_rate": 1.5611465251093887e-08, + "loss": 3.7398, + "step": 3940000 + }, + { + "epoch": 43.76, + "learning_rate": 1.5597583457344023e-08, + "loss": 3.7547, + "step": 3940500 + }, + { + "epoch": 43.77, + "learning_rate": 1.558370166359416e-08, + "loss": 3.7469, + "step": 3941000 + }, + { + "epoch": 43.77, + "learning_rate": 1.55698198698443e-08, + "loss": 3.746, + "step": 3941500 + }, + { + "epoch": 43.78, + "learning_rate": 1.555593807609444e-08, + "loss": 3.7542, + "step": 3942000 + }, + { + "epoch": 43.78, + "learning_rate": 1.554205628234458e-08, + "loss": 3.743, + "step": 3942500 + }, + { + "epoch": 43.79, + "learning_rate": 1.5528174488594717e-08, + "loss": 3.7359, + "step": 3943000 + }, + { + "epoch": 43.79, + "learning_rate": 1.5514292694844856e-08, + "loss": 3.7464, + "step": 3943500 + }, + { + "epoch": 43.8, + "learning_rate": 1.5500410901094995e-08, + "loss": 3.7674, + "step": 3944000 + }, + { + "epoch": 43.81, + "learning_rate": 1.5486529107345134e-08, + "loss": 3.7369, + "step": 3944500 + }, + { + "epoch": 43.81, + "learning_rate": 1.5472647313595273e-08, + "loss": 3.7428, + "step": 3945000 + }, + { + "epoch": 43.82, + "learning_rate": 1.5458765519845412e-08, + "loss": 3.7261, + "step": 3945500 + }, + { + "epoch": 43.82, + "learning_rate": 1.544488372609555e-08, + "loss": 3.7375, + "step": 3946000 + }, + { + "epoch": 43.83, + "learning_rate": 1.543100193234569e-08, + "loss": 3.7342, + "step": 3946500 + }, + { + "epoch": 43.83, + "learning_rate": 1.541712013859583e-08, + "loss": 3.7523, + "step": 3947000 + }, + { + "epoch": 43.84, + "learning_rate": 1.5403238344845968e-08, + "loss": 3.7373, + "step": 3947500 + }, + { + "epoch": 43.84, + "learning_rate": 1.5389356551096107e-08, + "loss": 3.7577, + "step": 3948000 + }, + { + "epoch": 43.85, + "learning_rate": 1.5375474757346243e-08, + "loss": 3.7551, + "step": 3948500 + }, + { + "epoch": 43.86, + "learning_rate": 1.5361592963596385e-08, + "loss": 3.7512, + "step": 3949000 + }, + { + "epoch": 43.86, + "learning_rate": 1.5347711169846524e-08, + "loss": 3.7569, + "step": 3949500 + }, + { + "epoch": 43.87, + "learning_rate": 1.533382937609666e-08, + "loss": 3.7532, + "step": 3950000 + }, + { + "epoch": 43.87, + "learning_rate": 1.5319947582346802e-08, + "loss": 3.7384, + "step": 3950500 + }, + { + "epoch": 43.88, + "learning_rate": 1.530606578859694e-08, + "loss": 3.7415, + "step": 3951000 + }, + { + "epoch": 43.88, + "learning_rate": 1.5292183994847076e-08, + "loss": 3.742, + "step": 3951500 + }, + { + "epoch": 43.89, + "learning_rate": 1.5278302201097215e-08, + "loss": 3.7574, + "step": 3952000 + }, + { + "epoch": 43.89, + "learning_rate": 1.5264420407347354e-08, + "loss": 3.7507, + "step": 3952500 + }, + { + "epoch": 43.9, + "learning_rate": 1.5250538613597493e-08, + "loss": 3.7542, + "step": 3953000 + }, + { + "epoch": 43.91, + "learning_rate": 1.5236656819847632e-08, + "loss": 3.7392, + "step": 3953500 + }, + { + "epoch": 43.91, + "learning_rate": 1.522277502609777e-08, + "loss": 3.7515, + "step": 3954000 + }, + { + "epoch": 43.92, + "learning_rate": 1.520889323234791e-08, + "loss": 3.7611, + "step": 3954500 + }, + { + "epoch": 43.92, + "learning_rate": 1.519501143859805e-08, + "loss": 3.77, + "step": 3955000 + }, + { + "epoch": 43.93, + "learning_rate": 1.5181129644848188e-08, + "loss": 3.743, + "step": 3955500 + }, + { + "epoch": 43.93, + "learning_rate": 1.5167247851098327e-08, + "loss": 3.7243, + "step": 3956000 + }, + { + "epoch": 43.94, + "learning_rate": 1.5153366057348466e-08, + "loss": 3.7485, + "step": 3956500 + }, + { + "epoch": 43.94, + "learning_rate": 1.5139484263598605e-08, + "loss": 3.7357, + "step": 3957000 + }, + { + "epoch": 43.95, + "learning_rate": 1.5125602469848744e-08, + "loss": 3.7501, + "step": 3957500 + }, + { + "epoch": 43.96, + "learning_rate": 1.5111720676098883e-08, + "loss": 3.7529, + "step": 3958000 + }, + { + "epoch": 43.96, + "learning_rate": 1.5097838882349022e-08, + "loss": 3.7492, + "step": 3958500 + }, + { + "epoch": 43.97, + "learning_rate": 1.5083957088599157e-08, + "loss": 3.7333, + "step": 3959000 + }, + { + "epoch": 43.97, + "learning_rate": 1.50700752948493e-08, + "loss": 3.7454, + "step": 3959500 + }, + { + "epoch": 43.98, + "learning_rate": 1.505619350109944e-08, + "loss": 3.7475, + "step": 3960000 + }, + { + "epoch": 43.98, + "learning_rate": 1.5042311707349574e-08, + "loss": 3.7323, + "step": 3960500 + }, + { + "epoch": 43.99, + "learning_rate": 1.5028429913599717e-08, + "loss": 3.7384, + "step": 3961000 + }, + { + "epoch": 43.99, + "learning_rate": 1.5014548119849855e-08, + "loss": 3.7494, + "step": 3961500 + }, + { + "epoch": 44.0, + "learning_rate": 1.500066632609999e-08, + "loss": 3.7449, + "step": 3962000 + }, + { + "epoch": 44.0, + "eval_loss": 3.8248701095581055, + "eval_runtime": 6.3081, + "eval_samples_per_second": 246.351, + "step": 3962024 + }, + { + "epoch": 44.01, + "learning_rate": 1.498678453235013e-08, + "loss": 3.7435, + "step": 3962500 + }, + { + "epoch": 44.01, + "learning_rate": 1.4972902738600272e-08, + "loss": 3.7607, + "step": 3963000 + }, + { + "epoch": 44.02, + "learning_rate": 1.4959020944850408e-08, + "loss": 3.7453, + "step": 3963500 + }, + { + "epoch": 44.02, + "learning_rate": 1.4945139151100547e-08, + "loss": 3.7395, + "step": 3964000 + }, + { + "epoch": 44.03, + "learning_rate": 1.493125735735069e-08, + "loss": 3.73, + "step": 3964500 + }, + { + "epoch": 44.03, + "learning_rate": 1.4917375563600825e-08, + "loss": 3.7628, + "step": 3965000 + }, + { + "epoch": 44.04, + "learning_rate": 1.4903493769850964e-08, + "loss": 3.7502, + "step": 3965500 + }, + { + "epoch": 44.04, + "learning_rate": 1.4889611976101101e-08, + "loss": 3.7414, + "step": 3966000 + }, + { + "epoch": 44.05, + "learning_rate": 1.4875730182351243e-08, + "loss": 3.7374, + "step": 3966500 + }, + { + "epoch": 44.06, + "learning_rate": 1.486184838860138e-08, + "loss": 3.7627, + "step": 3967000 + }, + { + "epoch": 44.06, + "learning_rate": 1.4847966594851518e-08, + "loss": 3.7364, + "step": 3967500 + }, + { + "epoch": 44.07, + "learning_rate": 1.483408480110166e-08, + "loss": 3.7583, + "step": 3968000 + }, + { + "epoch": 44.07, + "learning_rate": 1.4820203007351798e-08, + "loss": 3.7499, + "step": 3968500 + }, + { + "epoch": 44.08, + "learning_rate": 1.4806321213601935e-08, + "loss": 3.7442, + "step": 3969000 + }, + { + "epoch": 44.08, + "learning_rate": 1.4792439419852074e-08, + "loss": 3.7424, + "step": 3969500 + }, + { + "epoch": 44.09, + "learning_rate": 1.4778557626102214e-08, + "loss": 3.7549, + "step": 3970000 + }, + { + "epoch": 44.09, + "learning_rate": 1.4764675832352352e-08, + "loss": 3.7386, + "step": 3970500 + }, + { + "epoch": 44.1, + "learning_rate": 1.475079403860249e-08, + "loss": 3.7489, + "step": 3971000 + }, + { + "epoch": 44.11, + "learning_rate": 1.4736912244852631e-08, + "loss": 3.7499, + "step": 3971500 + }, + { + "epoch": 44.11, + "learning_rate": 1.4723030451102769e-08, + "loss": 3.7492, + "step": 3972000 + }, + { + "epoch": 44.12, + "learning_rate": 1.4709148657352908e-08, + "loss": 3.7406, + "step": 3972500 + }, + { + "epoch": 44.12, + "learning_rate": 1.4695266863603048e-08, + "loss": 3.7284, + "step": 3973000 + }, + { + "epoch": 44.13, + "learning_rate": 1.4681385069853185e-08, + "loss": 3.7371, + "step": 3973500 + }, + { + "epoch": 44.13, + "learning_rate": 1.4667503276103324e-08, + "loss": 3.7484, + "step": 3974000 + }, + { + "epoch": 44.14, + "learning_rate": 1.4653621482353462e-08, + "loss": 3.762, + "step": 3974500 + }, + { + "epoch": 44.14, + "learning_rate": 1.4639739688603602e-08, + "loss": 3.7443, + "step": 3975000 + }, + { + "epoch": 44.15, + "learning_rate": 1.4625857894853741e-08, + "loss": 3.7604, + "step": 3975500 + }, + { + "epoch": 44.16, + "learning_rate": 1.4611976101103879e-08, + "loss": 3.7367, + "step": 3976000 + }, + { + "epoch": 44.16, + "learning_rate": 1.4598094307354019e-08, + "loss": 3.7637, + "step": 3976500 + }, + { + "epoch": 44.17, + "learning_rate": 1.4584212513604158e-08, + "loss": 3.7452, + "step": 3977000 + }, + { + "epoch": 44.17, + "learning_rate": 1.4570330719854295e-08, + "loss": 3.7246, + "step": 3977500 + }, + { + "epoch": 44.18, + "learning_rate": 1.4556448926104434e-08, + "loss": 3.7642, + "step": 3978000 + }, + { + "epoch": 44.18, + "learning_rate": 1.4542567132354575e-08, + "loss": 3.7721, + "step": 3978500 + }, + { + "epoch": 44.19, + "learning_rate": 1.4528685338604712e-08, + "loss": 3.7495, + "step": 3979000 + }, + { + "epoch": 44.19, + "learning_rate": 1.4514803544854851e-08, + "loss": 3.7444, + "step": 3979500 + }, + { + "epoch": 44.2, + "learning_rate": 1.4500921751104992e-08, + "loss": 3.7519, + "step": 3980000 + }, + { + "epoch": 44.21, + "learning_rate": 1.4487039957355129e-08, + "loss": 3.7442, + "step": 3980500 + }, + { + "epoch": 44.21, + "learning_rate": 1.4473158163605268e-08, + "loss": 3.768, + "step": 3981000 + }, + { + "epoch": 44.22, + "learning_rate": 1.4459276369855405e-08, + "loss": 3.7554, + "step": 3981500 + }, + { + "epoch": 44.22, + "learning_rate": 1.4445394576105546e-08, + "loss": 3.7301, + "step": 3982000 + }, + { + "epoch": 44.23, + "learning_rate": 1.4431512782355685e-08, + "loss": 3.7521, + "step": 3982500 + }, + { + "epoch": 44.23, + "learning_rate": 1.4417630988605822e-08, + "loss": 3.7522, + "step": 3983000 + }, + { + "epoch": 44.24, + "learning_rate": 1.4403749194855963e-08, + "loss": 3.7554, + "step": 3983500 + }, + { + "epoch": 44.24, + "learning_rate": 1.4389867401106102e-08, + "loss": 3.7347, + "step": 3984000 + }, + { + "epoch": 44.25, + "learning_rate": 1.4375985607356239e-08, + "loss": 3.7266, + "step": 3984500 + }, + { + "epoch": 44.26, + "learning_rate": 1.4362103813606376e-08, + "loss": 3.741, + "step": 3985000 + }, + { + "epoch": 44.26, + "learning_rate": 1.4348222019856519e-08, + "loss": 3.7482, + "step": 3985500 + }, + { + "epoch": 44.27, + "learning_rate": 1.4334340226106656e-08, + "loss": 3.7459, + "step": 3986000 + }, + { + "epoch": 44.27, + "learning_rate": 1.4320458432356793e-08, + "loss": 3.7621, + "step": 3986500 + }, + { + "epoch": 44.28, + "learning_rate": 1.4306576638606936e-08, + "loss": 3.7382, + "step": 3987000 + }, + { + "epoch": 44.28, + "learning_rate": 1.4292694844857073e-08, + "loss": 3.748, + "step": 3987500 + }, + { + "epoch": 44.29, + "learning_rate": 1.427881305110721e-08, + "loss": 3.7637, + "step": 3988000 + }, + { + "epoch": 44.29, + "learning_rate": 1.4264931257357349e-08, + "loss": 3.7428, + "step": 3988500 + }, + { + "epoch": 44.3, + "learning_rate": 1.425104946360749e-08, + "loss": 3.7289, + "step": 3989000 + }, + { + "epoch": 44.31, + "learning_rate": 1.4237167669857627e-08, + "loss": 3.7704, + "step": 3989500 + }, + { + "epoch": 44.31, + "learning_rate": 1.4223285876107766e-08, + "loss": 3.7569, + "step": 3990000 + }, + { + "epoch": 44.32, + "learning_rate": 1.4209404082357907e-08, + "loss": 3.771, + "step": 3990500 + }, + { + "epoch": 44.32, + "learning_rate": 1.4195522288608044e-08, + "loss": 3.7529, + "step": 3991000 + }, + { + "epoch": 44.33, + "learning_rate": 1.4181640494858183e-08, + "loss": 3.7669, + "step": 3991500 + }, + { + "epoch": 44.33, + "learning_rate": 1.416775870110832e-08, + "loss": 3.7479, + "step": 3992000 + }, + { + "epoch": 44.34, + "learning_rate": 1.415387690735846e-08, + "loss": 3.7628, + "step": 3992500 + }, + { + "epoch": 44.34, + "learning_rate": 1.41399951136086e-08, + "loss": 3.7516, + "step": 3993000 + }, + { + "epoch": 44.35, + "learning_rate": 1.4126113319858737e-08, + "loss": 3.7432, + "step": 3993500 + }, + { + "epoch": 44.36, + "learning_rate": 1.4112231526108878e-08, + "loss": 3.7659, + "step": 3994000 + }, + { + "epoch": 44.36, + "learning_rate": 1.4098349732359017e-08, + "loss": 3.7552, + "step": 3994500 + }, + { + "epoch": 44.37, + "learning_rate": 1.4084467938609154e-08, + "loss": 3.7322, + "step": 3995000 + }, + { + "epoch": 44.37, + "learning_rate": 1.4070586144859293e-08, + "loss": 3.7449, + "step": 3995500 + }, + { + "epoch": 44.38, + "learning_rate": 1.4056704351109433e-08, + "loss": 3.7483, + "step": 3996000 + }, + { + "epoch": 44.38, + "learning_rate": 1.404282255735957e-08, + "loss": 3.7239, + "step": 3996500 + }, + { + "epoch": 44.39, + "learning_rate": 1.402894076360971e-08, + "loss": 3.7373, + "step": 3997000 + }, + { + "epoch": 44.39, + "learning_rate": 1.401505896985985e-08, + "loss": 3.7341, + "step": 3997500 + }, + { + "epoch": 44.4, + "learning_rate": 1.4001177176109988e-08, + "loss": 3.7541, + "step": 3998000 + }, + { + "epoch": 44.41, + "learning_rate": 1.3987295382360127e-08, + "loss": 3.7219, + "step": 3998500 + }, + { + "epoch": 44.41, + "learning_rate": 1.3973413588610264e-08, + "loss": 3.7479, + "step": 3999000 + }, + { + "epoch": 44.42, + "learning_rate": 1.3959531794860404e-08, + "loss": 3.7457, + "step": 3999500 + }, + { + "epoch": 44.42, + "learning_rate": 1.3945650001110543e-08, + "loss": 3.7491, + "step": 4000000 + }, + { + "epoch": 44.43, + "learning_rate": 1.393176820736068e-08, + "loss": 3.7566, + "step": 4000500 + }, + { + "epoch": 44.43, + "learning_rate": 1.3917886413610821e-08, + "loss": 3.7669, + "step": 4001000 + }, + { + "epoch": 44.44, + "learning_rate": 1.390400461986096e-08, + "loss": 3.7531, + "step": 4001500 + }, + { + "epoch": 44.44, + "learning_rate": 1.3890122826111098e-08, + "loss": 3.7343, + "step": 4002000 + }, + { + "epoch": 44.45, + "learning_rate": 1.3876241032361238e-08, + "loss": 3.7539, + "step": 4002500 + }, + { + "epoch": 44.46, + "learning_rate": 1.3862359238611376e-08, + "loss": 3.7508, + "step": 4003000 + }, + { + "epoch": 44.46, + "learning_rate": 1.3848477444861514e-08, + "loss": 3.7296, + "step": 4003500 + }, + { + "epoch": 44.47, + "learning_rate": 1.3834595651111652e-08, + "loss": 3.7681, + "step": 4004000 + }, + { + "epoch": 44.47, + "learning_rate": 1.3820713857361792e-08, + "loss": 3.7451, + "step": 4004500 + }, + { + "epoch": 44.48, + "learning_rate": 1.3806832063611931e-08, + "loss": 3.7555, + "step": 4005000 + }, + { + "epoch": 44.48, + "learning_rate": 1.3792950269862069e-08, + "loss": 3.745, + "step": 4005500 + }, + { + "epoch": 44.49, + "learning_rate": 1.377906847611221e-08, + "loss": 3.7486, + "step": 4006000 + }, + { + "epoch": 44.49, + "learning_rate": 1.3765186682362348e-08, + "loss": 3.7372, + "step": 4006500 + }, + { + "epoch": 44.5, + "learning_rate": 1.3751304888612486e-08, + "loss": 3.7507, + "step": 4007000 + }, + { + "epoch": 44.51, + "learning_rate": 1.3737423094862624e-08, + "loss": 3.7391, + "step": 4007500 + }, + { + "epoch": 44.51, + "learning_rate": 1.3723541301112765e-08, + "loss": 3.7495, + "step": 4008000 + }, + { + "epoch": 44.52, + "learning_rate": 1.3709659507362902e-08, + "loss": 3.7727, + "step": 4008500 + }, + { + "epoch": 44.52, + "learning_rate": 1.3695777713613041e-08, + "loss": 3.756, + "step": 4009000 + }, + { + "epoch": 44.53, + "learning_rate": 1.3681895919863182e-08, + "loss": 3.7414, + "step": 4009500 + }, + { + "epoch": 44.53, + "learning_rate": 1.366801412611332e-08, + "loss": 3.7522, + "step": 4010000 + }, + { + "epoch": 44.54, + "learning_rate": 1.3654132332363458e-08, + "loss": 3.7396, + "step": 4010500 + }, + { + "epoch": 44.54, + "learning_rate": 1.3640250538613596e-08, + "loss": 3.7509, + "step": 4011000 + }, + { + "epoch": 44.55, + "learning_rate": 1.3626368744863736e-08, + "loss": 3.7472, + "step": 4011500 + }, + { + "epoch": 44.56, + "learning_rate": 1.3612486951113875e-08, + "loss": 3.759, + "step": 4012000 + }, + { + "epoch": 44.56, + "learning_rate": 1.3598605157364012e-08, + "loss": 3.7635, + "step": 4012500 + }, + { + "epoch": 44.57, + "learning_rate": 1.3584723363614153e-08, + "loss": 3.7449, + "step": 4013000 + }, + { + "epoch": 44.57, + "learning_rate": 1.3570841569864292e-08, + "loss": 3.7508, + "step": 4013500 + }, + { + "epoch": 44.58, + "learning_rate": 1.355695977611443e-08, + "loss": 3.7459, + "step": 4014000 + }, + { + "epoch": 44.58, + "learning_rate": 1.3543077982364568e-08, + "loss": 3.7684, + "step": 4014500 + }, + { + "epoch": 44.59, + "learning_rate": 1.3529196188614709e-08, + "loss": 3.7569, + "step": 4015000 + }, + { + "epoch": 44.59, + "learning_rate": 1.3515314394864846e-08, + "loss": 3.7226, + "step": 4015500 + }, + { + "epoch": 44.6, + "learning_rate": 1.3501432601114985e-08, + "loss": 3.758, + "step": 4016000 + }, + { + "epoch": 44.6, + "learning_rate": 1.3487550807365126e-08, + "loss": 3.758, + "step": 4016500 + }, + { + "epoch": 44.61, + "learning_rate": 1.3473669013615263e-08, + "loss": 3.7382, + "step": 4017000 + }, + { + "epoch": 44.62, + "learning_rate": 1.3459787219865402e-08, + "loss": 3.7436, + "step": 4017500 + }, + { + "epoch": 44.62, + "learning_rate": 1.344590542611554e-08, + "loss": 3.7335, + "step": 4018000 + }, + { + "epoch": 44.63, + "learning_rate": 1.343202363236568e-08, + "loss": 3.7361, + "step": 4018500 + }, + { + "epoch": 44.63, + "learning_rate": 1.3418141838615817e-08, + "loss": 3.7572, + "step": 4019000 + }, + { + "epoch": 44.64, + "learning_rate": 1.3404260044865956e-08, + "loss": 3.7661, + "step": 4019500 + }, + { + "epoch": 44.64, + "learning_rate": 1.3390378251116097e-08, + "loss": 3.7308, + "step": 4020000 + }, + { + "epoch": 44.65, + "learning_rate": 1.3376496457366234e-08, + "loss": 3.7391, + "step": 4020500 + }, + { + "epoch": 44.65, + "learning_rate": 1.3362614663616373e-08, + "loss": 3.7574, + "step": 4021000 + }, + { + "epoch": 44.66, + "learning_rate": 1.334873286986651e-08, + "loss": 3.7558, + "step": 4021500 + }, + { + "epoch": 44.67, + "learning_rate": 1.3334851076116651e-08, + "loss": 3.7232, + "step": 4022000 + }, + { + "epoch": 44.67, + "learning_rate": 1.332096928236679e-08, + "loss": 3.7447, + "step": 4022500 + }, + { + "epoch": 44.68, + "learning_rate": 1.3307087488616927e-08, + "loss": 3.7503, + "step": 4023000 + }, + { + "epoch": 44.68, + "learning_rate": 1.3293205694867068e-08, + "loss": 3.756, + "step": 4023500 + }, + { + "epoch": 44.69, + "learning_rate": 1.3279323901117207e-08, + "loss": 3.7406, + "step": 4024000 + }, + { + "epoch": 44.69, + "learning_rate": 1.3265442107367344e-08, + "loss": 3.7481, + "step": 4024500 + }, + { + "epoch": 44.7, + "learning_rate": 1.3251560313617483e-08, + "loss": 3.7262, + "step": 4025000 + }, + { + "epoch": 44.7, + "learning_rate": 1.3237678519867624e-08, + "loss": 3.7111, + "step": 4025500 + }, + { + "epoch": 44.71, + "learning_rate": 1.3223796726117761e-08, + "loss": 3.7257, + "step": 4026000 + }, + { + "epoch": 44.72, + "learning_rate": 1.32099149323679e-08, + "loss": 3.7578, + "step": 4026500 + }, + { + "epoch": 44.72, + "learning_rate": 1.319603313861804e-08, + "loss": 3.7541, + "step": 4027000 + }, + { + "epoch": 44.73, + "learning_rate": 1.3182151344868178e-08, + "loss": 3.7401, + "step": 4027500 + }, + { + "epoch": 44.73, + "learning_rate": 1.3168269551118317e-08, + "loss": 3.7298, + "step": 4028000 + }, + { + "epoch": 44.74, + "learning_rate": 1.3154387757368454e-08, + "loss": 3.7379, + "step": 4028500 + }, + { + "epoch": 44.74, + "learning_rate": 1.3140505963618595e-08, + "loss": 3.7431, + "step": 4029000 + }, + { + "epoch": 44.75, + "learning_rate": 1.3126624169868734e-08, + "loss": 3.7572, + "step": 4029500 + }, + { + "epoch": 44.75, + "learning_rate": 1.3112742376118871e-08, + "loss": 3.744, + "step": 4030000 + }, + { + "epoch": 44.76, + "learning_rate": 1.3098860582369011e-08, + "loss": 3.7559, + "step": 4030500 + }, + { + "epoch": 44.77, + "learning_rate": 1.308497878861915e-08, + "loss": 3.7692, + "step": 4031000 + }, + { + "epoch": 44.77, + "learning_rate": 1.3071096994869288e-08, + "loss": 3.7629, + "step": 4031500 + }, + { + "epoch": 44.78, + "learning_rate": 1.3057215201119427e-08, + "loss": 3.7448, + "step": 4032000 + }, + { + "epoch": 44.78, + "learning_rate": 1.3043333407369567e-08, + "loss": 3.7355, + "step": 4032500 + }, + { + "epoch": 44.79, + "learning_rate": 1.3029451613619705e-08, + "loss": 3.754, + "step": 4033000 + }, + { + "epoch": 44.79, + "learning_rate": 1.3015569819869844e-08, + "loss": 3.7549, + "step": 4033500 + }, + { + "epoch": 44.8, + "learning_rate": 1.3001688026119984e-08, + "loss": 3.7674, + "step": 4034000 + }, + { + "epoch": 44.8, + "learning_rate": 1.2987806232370121e-08, + "loss": 3.7338, + "step": 4034500 + }, + { + "epoch": 44.81, + "learning_rate": 1.2973924438620259e-08, + "loss": 3.7445, + "step": 4035000 + }, + { + "epoch": 44.82, + "learning_rate": 1.2960042644870401e-08, + "loss": 3.7565, + "step": 4035500 + }, + { + "epoch": 44.82, + "learning_rate": 1.2946160851120538e-08, + "loss": 3.7377, + "step": 4036000 + }, + { + "epoch": 44.83, + "learning_rate": 1.2932279057370676e-08, + "loss": 3.7462, + "step": 4036500 + }, + { + "epoch": 44.83, + "learning_rate": 1.2918397263620815e-08, + "loss": 3.7426, + "step": 4037000 + }, + { + "epoch": 44.84, + "learning_rate": 1.2904515469870955e-08, + "loss": 3.7397, + "step": 4037500 + }, + { + "epoch": 44.84, + "learning_rate": 1.2890633676121092e-08, + "loss": 3.7572, + "step": 4038000 + }, + { + "epoch": 44.85, + "learning_rate": 1.2876751882371231e-08, + "loss": 3.7421, + "step": 4038500 + }, + { + "epoch": 44.85, + "learning_rate": 1.2862870088621372e-08, + "loss": 3.7649, + "step": 4039000 + }, + { + "epoch": 44.86, + "learning_rate": 1.284898829487151e-08, + "loss": 3.7551, + "step": 4039500 + }, + { + "epoch": 44.87, + "learning_rate": 1.2835106501121648e-08, + "loss": 3.7483, + "step": 4040000 + }, + { + "epoch": 44.87, + "learning_rate": 1.2821224707371786e-08, + "loss": 3.7292, + "step": 4040500 + }, + { + "epoch": 44.88, + "learning_rate": 1.2807342913621926e-08, + "loss": 3.7425, + "step": 4041000 + }, + { + "epoch": 44.88, + "learning_rate": 1.2793461119872065e-08, + "loss": 3.7643, + "step": 4041500 + }, + { + "epoch": 44.89, + "learning_rate": 1.2779579326122202e-08, + "loss": 3.751, + "step": 4042000 + }, + { + "epoch": 44.89, + "learning_rate": 1.2765697532372343e-08, + "loss": 3.7423, + "step": 4042500 + }, + { + "epoch": 44.9, + "learning_rate": 1.2751815738622482e-08, + "loss": 3.7656, + "step": 4043000 + }, + { + "epoch": 44.9, + "learning_rate": 1.273793394487262e-08, + "loss": 3.7598, + "step": 4043500 + }, + { + "epoch": 44.91, + "learning_rate": 1.2724052151122758e-08, + "loss": 3.7552, + "step": 4044000 + }, + { + "epoch": 44.92, + "learning_rate": 1.2710170357372899e-08, + "loss": 3.7349, + "step": 4044500 + }, + { + "epoch": 44.92, + "learning_rate": 1.2696288563623036e-08, + "loss": 3.743, + "step": 4045000 + }, + { + "epoch": 44.93, + "learning_rate": 1.2682406769873175e-08, + "loss": 3.7793, + "step": 4045500 + }, + { + "epoch": 44.93, + "learning_rate": 1.2668524976123316e-08, + "loss": 3.7613, + "step": 4046000 + }, + { + "epoch": 44.94, + "learning_rate": 1.2654643182373453e-08, + "loss": 3.7471, + "step": 4046500 + }, + { + "epoch": 44.94, + "learning_rate": 1.2640761388623592e-08, + "loss": 3.7377, + "step": 4047000 + }, + { + "epoch": 44.95, + "learning_rate": 1.262687959487373e-08, + "loss": 3.747, + "step": 4047500 + }, + { + "epoch": 44.95, + "learning_rate": 1.261299780112387e-08, + "loss": 3.7668, + "step": 4048000 + }, + { + "epoch": 44.96, + "learning_rate": 1.2599116007374009e-08, + "loss": 3.7604, + "step": 4048500 + }, + { + "epoch": 44.97, + "learning_rate": 1.2585234213624146e-08, + "loss": 3.7462, + "step": 4049000 + }, + { + "epoch": 44.97, + "learning_rate": 1.2571352419874287e-08, + "loss": 3.7346, + "step": 4049500 + }, + { + "epoch": 44.98, + "learning_rate": 1.2557470626124426e-08, + "loss": 3.7358, + "step": 4050000 + }, + { + "epoch": 44.98, + "learning_rate": 1.2543588832374563e-08, + "loss": 3.7457, + "step": 4050500 + }, + { + "epoch": 44.99, + "learning_rate": 1.25297070386247e-08, + "loss": 3.738, + "step": 4051000 + }, + { + "epoch": 44.99, + "learning_rate": 1.2515825244874843e-08, + "loss": 3.7374, + "step": 4051500 + }, + { + "epoch": 45.0, + "learning_rate": 1.250194345112498e-08, + "loss": 3.7573, + "step": 4052000 + }, + { + "epoch": 45.0, + "eval_loss": 3.8247101306915283, + "eval_runtime": 6.3052, + "eval_samples_per_second": 246.462, + "step": 4052070 + }, + { + "epoch": 45.0, + "learning_rate": 1.2488061657375119e-08, + "loss": 3.7472, + "step": 4052500 + }, + { + "epoch": 45.01, + "learning_rate": 1.2474179863625258e-08, + "loss": 3.7597, + "step": 4053000 + }, + { + "epoch": 45.02, + "learning_rate": 1.2460298069875397e-08, + "loss": 3.7478, + "step": 4053500 + }, + { + "epoch": 45.02, + "learning_rate": 1.2446416276125534e-08, + "loss": 3.7377, + "step": 4054000 + }, + { + "epoch": 45.03, + "learning_rate": 1.2432534482375675e-08, + "loss": 3.75, + "step": 4054500 + }, + { + "epoch": 45.03, + "learning_rate": 1.2418652688625812e-08, + "loss": 3.7509, + "step": 4055000 + }, + { + "epoch": 45.04, + "learning_rate": 1.2404770894875951e-08, + "loss": 3.7481, + "step": 4055500 + }, + { + "epoch": 45.04, + "learning_rate": 1.2390889101126092e-08, + "loss": 3.7611, + "step": 4056000 + }, + { + "epoch": 45.05, + "learning_rate": 1.2377007307376229e-08, + "loss": 3.7565, + "step": 4056500 + }, + { + "epoch": 45.05, + "learning_rate": 1.2363125513626368e-08, + "loss": 3.7633, + "step": 4057000 + }, + { + "epoch": 45.06, + "learning_rate": 1.2349243719876507e-08, + "loss": 3.7424, + "step": 4057500 + }, + { + "epoch": 45.07, + "learning_rate": 1.2335361926126646e-08, + "loss": 3.7399, + "step": 4058000 + }, + { + "epoch": 45.07, + "learning_rate": 1.2321480132376785e-08, + "loss": 3.7541, + "step": 4058500 + }, + { + "epoch": 45.08, + "learning_rate": 1.2307598338626924e-08, + "loss": 3.7407, + "step": 4059000 + }, + { + "epoch": 45.08, + "learning_rate": 1.2293716544877063e-08, + "loss": 3.7414, + "step": 4059500 + }, + { + "epoch": 45.09, + "learning_rate": 1.2279834751127202e-08, + "loss": 3.7469, + "step": 4060000 + }, + { + "epoch": 45.09, + "learning_rate": 1.226595295737734e-08, + "loss": 3.7551, + "step": 4060500 + }, + { + "epoch": 45.1, + "learning_rate": 1.2252071163627478e-08, + "loss": 3.739, + "step": 4061000 + }, + { + "epoch": 45.1, + "learning_rate": 1.2238189369877617e-08, + "loss": 3.7541, + "step": 4061500 + }, + { + "epoch": 45.11, + "learning_rate": 1.2224307576127757e-08, + "loss": 3.7308, + "step": 4062000 + }, + { + "epoch": 45.12, + "learning_rate": 1.2210425782377895e-08, + "loss": 3.7442, + "step": 4062500 + }, + { + "epoch": 45.12, + "learning_rate": 1.2196543988628034e-08, + "loss": 3.7178, + "step": 4063000 + }, + { + "epoch": 45.13, + "learning_rate": 1.2182662194878173e-08, + "loss": 3.752, + "step": 4063500 + }, + { + "epoch": 45.13, + "learning_rate": 1.2168780401128312e-08, + "loss": 3.7237, + "step": 4064000 + }, + { + "epoch": 45.14, + "learning_rate": 1.215489860737845e-08, + "loss": 3.7518, + "step": 4064500 + }, + { + "epoch": 45.14, + "learning_rate": 1.214101681362859e-08, + "loss": 3.7263, + "step": 4065000 + }, + { + "epoch": 45.15, + "learning_rate": 1.2127135019878728e-08, + "loss": 3.7328, + "step": 4065500 + }, + { + "epoch": 45.15, + "learning_rate": 1.2113253226128867e-08, + "loss": 3.7408, + "step": 4066000 + }, + { + "epoch": 45.16, + "learning_rate": 1.2099371432379006e-08, + "loss": 3.7489, + "step": 4066500 + }, + { + "epoch": 45.17, + "learning_rate": 1.2085489638629144e-08, + "loss": 3.7408, + "step": 4067000 + }, + { + "epoch": 45.17, + "learning_rate": 1.2071607844879284e-08, + "loss": 3.753, + "step": 4067500 + }, + { + "epoch": 45.18, + "learning_rate": 1.2057726051129422e-08, + "loss": 3.7521, + "step": 4068000 + }, + { + "epoch": 45.18, + "learning_rate": 1.204384425737956e-08, + "loss": 3.74, + "step": 4068500 + }, + { + "epoch": 45.19, + "learning_rate": 1.2029962463629701e-08, + "loss": 3.7246, + "step": 4069000 + }, + { + "epoch": 45.19, + "learning_rate": 1.2016080669879838e-08, + "loss": 3.7426, + "step": 4069500 + }, + { + "epoch": 45.2, + "learning_rate": 1.2002198876129977e-08, + "loss": 3.7464, + "step": 4070000 + }, + { + "epoch": 45.2, + "learning_rate": 1.1988317082380116e-08, + "loss": 3.7602, + "step": 4070500 + }, + { + "epoch": 45.21, + "learning_rate": 1.1974435288630255e-08, + "loss": 3.7413, + "step": 4071000 + }, + { + "epoch": 45.22, + "learning_rate": 1.1960553494880394e-08, + "loss": 3.7405, + "step": 4071500 + }, + { + "epoch": 45.22, + "learning_rate": 1.1946671701130533e-08, + "loss": 3.7508, + "step": 4072000 + }, + { + "epoch": 45.23, + "learning_rate": 1.1932789907380672e-08, + "loss": 3.7437, + "step": 4072500 + }, + { + "epoch": 45.23, + "learning_rate": 1.191890811363081e-08, + "loss": 3.7679, + "step": 4073000 + }, + { + "epoch": 45.24, + "learning_rate": 1.190502631988095e-08, + "loss": 3.7467, + "step": 4073500 + }, + { + "epoch": 45.24, + "learning_rate": 1.1891144526131087e-08, + "loss": 3.7599, + "step": 4074000 + }, + { + "epoch": 45.25, + "learning_rate": 1.1877262732381226e-08, + "loss": 3.7611, + "step": 4074500 + }, + { + "epoch": 45.25, + "learning_rate": 1.1863380938631367e-08, + "loss": 3.7522, + "step": 4075000 + }, + { + "epoch": 45.26, + "learning_rate": 1.1849499144881504e-08, + "loss": 3.756, + "step": 4075500 + }, + { + "epoch": 45.27, + "learning_rate": 1.1835617351131643e-08, + "loss": 3.7582, + "step": 4076000 + }, + { + "epoch": 45.27, + "learning_rate": 1.1821735557381782e-08, + "loss": 3.7371, + "step": 4076500 + }, + { + "epoch": 45.28, + "learning_rate": 1.1807853763631921e-08, + "loss": 3.7583, + "step": 4077000 + }, + { + "epoch": 45.28, + "learning_rate": 1.1793971969882058e-08, + "loss": 3.7402, + "step": 4077500 + }, + { + "epoch": 45.29, + "learning_rate": 1.1780090176132199e-08, + "loss": 3.7629, + "step": 4078000 + }, + { + "epoch": 45.29, + "learning_rate": 1.1766208382382338e-08, + "loss": 3.7404, + "step": 4078500 + }, + { + "epoch": 45.3, + "learning_rate": 1.1752326588632475e-08, + "loss": 3.7602, + "step": 4079000 + }, + { + "epoch": 45.3, + "learning_rate": 1.1738444794882616e-08, + "loss": 3.74, + "step": 4079500 + }, + { + "epoch": 45.31, + "learning_rate": 1.1724563001132753e-08, + "loss": 3.7437, + "step": 4080000 + }, + { + "epoch": 45.32, + "learning_rate": 1.1710681207382892e-08, + "loss": 3.7595, + "step": 4080500 + }, + { + "epoch": 45.32, + "learning_rate": 1.1696799413633031e-08, + "loss": 3.7593, + "step": 4081000 + }, + { + "epoch": 45.33, + "learning_rate": 1.168291761988317e-08, + "loss": 3.7379, + "step": 4081500 + }, + { + "epoch": 45.33, + "learning_rate": 1.1669035826133309e-08, + "loss": 3.7428, + "step": 4082000 + }, + { + "epoch": 45.34, + "learning_rate": 1.1655154032383448e-08, + "loss": 3.7346, + "step": 4082500 + }, + { + "epoch": 45.34, + "learning_rate": 1.1641272238633587e-08, + "loss": 3.7299, + "step": 4083000 + }, + { + "epoch": 45.35, + "learning_rate": 1.1627390444883726e-08, + "loss": 3.744, + "step": 4083500 + }, + { + "epoch": 45.35, + "learning_rate": 1.1613508651133865e-08, + "loss": 3.7515, + "step": 4084000 + }, + { + "epoch": 45.36, + "learning_rate": 1.1599626857384002e-08, + "loss": 3.7353, + "step": 4084500 + }, + { + "epoch": 45.37, + "learning_rate": 1.1585745063634143e-08, + "loss": 3.7435, + "step": 4085000 + }, + { + "epoch": 45.37, + "learning_rate": 1.1571863269884282e-08, + "loss": 3.7552, + "step": 4085500 + }, + { + "epoch": 45.38, + "learning_rate": 1.1557981476134419e-08, + "loss": 3.749, + "step": 4086000 + }, + { + "epoch": 45.38, + "learning_rate": 1.154409968238456e-08, + "loss": 3.7577, + "step": 4086500 + }, + { + "epoch": 45.39, + "learning_rate": 1.1530217888634697e-08, + "loss": 3.7472, + "step": 4087000 + }, + { + "epoch": 45.39, + "learning_rate": 1.1516336094884836e-08, + "loss": 3.7482, + "step": 4087500 + }, + { + "epoch": 45.4, + "learning_rate": 1.1502454301134976e-08, + "loss": 3.7297, + "step": 4088000 + }, + { + "epoch": 45.4, + "learning_rate": 1.1488572507385114e-08, + "loss": 3.7501, + "step": 4088500 + }, + { + "epoch": 45.41, + "learning_rate": 1.1474690713635253e-08, + "loss": 3.7697, + "step": 4089000 + }, + { + "epoch": 45.42, + "learning_rate": 1.1460808919885392e-08, + "loss": 3.7517, + "step": 4089500 + }, + { + "epoch": 45.42, + "learning_rate": 1.144692712613553e-08, + "loss": 3.744, + "step": 4090000 + }, + { + "epoch": 45.43, + "learning_rate": 1.1433045332385668e-08, + "loss": 3.7476, + "step": 4090500 + }, + { + "epoch": 45.43, + "learning_rate": 1.1419163538635809e-08, + "loss": 3.7611, + "step": 4091000 + }, + { + "epoch": 45.44, + "learning_rate": 1.1405281744885947e-08, + "loss": 3.7593, + "step": 4091500 + }, + { + "epoch": 45.44, + "learning_rate": 1.1391399951136085e-08, + "loss": 3.7525, + "step": 4092000 + }, + { + "epoch": 45.45, + "learning_rate": 1.1377518157386225e-08, + "loss": 3.7613, + "step": 4092500 + }, + { + "epoch": 45.45, + "learning_rate": 1.1363636363636363e-08, + "loss": 3.757, + "step": 4093000 + }, + { + "epoch": 45.46, + "learning_rate": 1.1349754569886502e-08, + "loss": 3.7462, + "step": 4093500 + }, + { + "epoch": 45.47, + "learning_rate": 1.133587277613664e-08, + "loss": 3.7684, + "step": 4094000 + }, + { + "epoch": 45.47, + "learning_rate": 1.132199098238678e-08, + "loss": 3.7631, + "step": 4094500 + }, + { + "epoch": 45.48, + "learning_rate": 1.1308109188636918e-08, + "loss": 3.7457, + "step": 4095000 + }, + { + "epoch": 45.48, + "learning_rate": 1.1294227394887057e-08, + "loss": 3.7521, + "step": 4095500 + }, + { + "epoch": 45.49, + "learning_rate": 1.1280345601137196e-08, + "loss": 3.7518, + "step": 4096000 + }, + { + "epoch": 45.49, + "learning_rate": 1.1266463807387334e-08, + "loss": 3.7475, + "step": 4096500 + }, + { + "epoch": 45.5, + "learning_rate": 1.1252582013637474e-08, + "loss": 3.7414, + "step": 4097000 + }, + { + "epoch": 45.5, + "learning_rate": 1.1238700219887612e-08, + "loss": 3.7509, + "step": 4097500 + }, + { + "epoch": 45.51, + "learning_rate": 1.122481842613775e-08, + "loss": 3.7135, + "step": 4098000 + }, + { + "epoch": 45.52, + "learning_rate": 1.1210936632387891e-08, + "loss": 3.7353, + "step": 4098500 + }, + { + "epoch": 45.52, + "learning_rate": 1.1197054838638028e-08, + "loss": 3.7351, + "step": 4099000 + }, + { + "epoch": 45.53, + "learning_rate": 1.1183173044888167e-08, + "loss": 3.7446, + "step": 4099500 + }, + { + "epoch": 45.53, + "learning_rate": 1.1169291251138306e-08, + "loss": 3.7458, + "step": 4100000 + }, + { + "epoch": 45.54, + "learning_rate": 1.1155409457388445e-08, + "loss": 3.7647, + "step": 4100500 + }, + { + "epoch": 45.54, + "learning_rate": 1.1141527663638584e-08, + "loss": 3.7459, + "step": 4101000 + }, + { + "epoch": 45.55, + "learning_rate": 1.1127645869888723e-08, + "loss": 3.7482, + "step": 4101500 + }, + { + "epoch": 45.55, + "learning_rate": 1.1113764076138862e-08, + "loss": 3.7484, + "step": 4102000 + }, + { + "epoch": 45.56, + "learning_rate": 1.1099882282389001e-08, + "loss": 3.7419, + "step": 4102500 + }, + { + "epoch": 45.57, + "learning_rate": 1.108600048863914e-08, + "loss": 3.7481, + "step": 4103000 + }, + { + "epoch": 45.57, + "learning_rate": 1.1072118694889277e-08, + "loss": 3.7291, + "step": 4103500 + }, + { + "epoch": 45.58, + "learning_rate": 1.1058236901139418e-08, + "loss": 3.7579, + "step": 4104000 + }, + { + "epoch": 45.58, + "learning_rate": 1.1044355107389557e-08, + "loss": 3.754, + "step": 4104500 + }, + { + "epoch": 45.59, + "learning_rate": 1.1030473313639694e-08, + "loss": 3.7544, + "step": 4105000 + }, + { + "epoch": 45.59, + "learning_rate": 1.1016591519889833e-08, + "loss": 3.7459, + "step": 4105500 + }, + { + "epoch": 45.6, + "learning_rate": 1.1002709726139972e-08, + "loss": 3.7469, + "step": 4106000 + }, + { + "epoch": 45.6, + "learning_rate": 1.0988827932390111e-08, + "loss": 3.7461, + "step": 4106500 + }, + { + "epoch": 45.61, + "learning_rate": 1.097494613864025e-08, + "loss": 3.7401, + "step": 4107000 + }, + { + "epoch": 45.62, + "learning_rate": 1.0961064344890389e-08, + "loss": 3.756, + "step": 4107500 + }, + { + "epoch": 45.62, + "learning_rate": 1.0947182551140528e-08, + "loss": 3.7323, + "step": 4108000 + }, + { + "epoch": 45.63, + "learning_rate": 1.0933300757390667e-08, + "loss": 3.7526, + "step": 4108500 + }, + { + "epoch": 45.63, + "learning_rate": 1.0919418963640806e-08, + "loss": 3.7568, + "step": 4109000 + }, + { + "epoch": 45.64, + "learning_rate": 1.0905537169890943e-08, + "loss": 3.7228, + "step": 4109500 + }, + { + "epoch": 45.64, + "learning_rate": 1.0891655376141084e-08, + "loss": 3.7769, + "step": 4110000 + }, + { + "epoch": 45.65, + "learning_rate": 1.0877773582391221e-08, + "loss": 3.7644, + "step": 4110500 + }, + { + "epoch": 45.65, + "learning_rate": 1.086389178864136e-08, + "loss": 3.7292, + "step": 4111000 + }, + { + "epoch": 45.66, + "learning_rate": 1.08500099948915e-08, + "loss": 3.7508, + "step": 4111500 + }, + { + "epoch": 45.67, + "learning_rate": 1.0836128201141638e-08, + "loss": 3.7341, + "step": 4112000 + }, + { + "epoch": 45.67, + "learning_rate": 1.0822246407391777e-08, + "loss": 3.7353, + "step": 4112500 + }, + { + "epoch": 45.68, + "learning_rate": 1.0808364613641916e-08, + "loss": 3.7593, + "step": 4113000 + }, + { + "epoch": 45.68, + "learning_rate": 1.0794482819892055e-08, + "loss": 3.7551, + "step": 4113500 + }, + { + "epoch": 45.69, + "learning_rate": 1.0780601026142192e-08, + "loss": 3.7555, + "step": 4114000 + }, + { + "epoch": 45.69, + "learning_rate": 1.0766719232392333e-08, + "loss": 3.7646, + "step": 4114500 + }, + { + "epoch": 45.7, + "learning_rate": 1.0752837438642472e-08, + "loss": 3.7526, + "step": 4115000 + }, + { + "epoch": 45.7, + "learning_rate": 1.0738955644892609e-08, + "loss": 3.7397, + "step": 4115500 + }, + { + "epoch": 45.71, + "learning_rate": 1.072507385114275e-08, + "loss": 3.7402, + "step": 4116000 + }, + { + "epoch": 45.72, + "learning_rate": 1.0711192057392887e-08, + "loss": 3.7589, + "step": 4116500 + }, + { + "epoch": 45.72, + "learning_rate": 1.0697310263643026e-08, + "loss": 3.7486, + "step": 4117000 + }, + { + "epoch": 45.73, + "learning_rate": 1.0683428469893165e-08, + "loss": 3.7497, + "step": 4117500 + }, + { + "epoch": 45.73, + "learning_rate": 1.0669546676143304e-08, + "loss": 3.7544, + "step": 4118000 + }, + { + "epoch": 45.74, + "learning_rate": 1.0655664882393443e-08, + "loss": 3.7395, + "step": 4118500 + }, + { + "epoch": 45.74, + "learning_rate": 1.0641783088643582e-08, + "loss": 3.7352, + "step": 4119000 + }, + { + "epoch": 45.75, + "learning_rate": 1.062790129489372e-08, + "loss": 3.7522, + "step": 4119500 + }, + { + "epoch": 45.75, + "learning_rate": 1.0614019501143858e-08, + "loss": 3.7602, + "step": 4120000 + }, + { + "epoch": 45.76, + "learning_rate": 1.0600137707393999e-08, + "loss": 3.7724, + "step": 4120500 + }, + { + "epoch": 45.77, + "learning_rate": 1.0586255913644138e-08, + "loss": 3.7349, + "step": 4121000 + }, + { + "epoch": 45.77, + "learning_rate": 1.0572374119894275e-08, + "loss": 3.7577, + "step": 4121500 + }, + { + "epoch": 45.78, + "learning_rate": 1.0558492326144415e-08, + "loss": 3.7606, + "step": 4122000 + }, + { + "epoch": 45.78, + "learning_rate": 1.0544610532394553e-08, + "loss": 3.7307, + "step": 4122500 + }, + { + "epoch": 45.79, + "learning_rate": 1.0530728738644692e-08, + "loss": 3.7421, + "step": 4123000 + }, + { + "epoch": 45.79, + "learning_rate": 1.051684694489483e-08, + "loss": 3.7552, + "step": 4123500 + }, + { + "epoch": 45.8, + "learning_rate": 1.050296515114497e-08, + "loss": 3.7399, + "step": 4124000 + }, + { + "epoch": 45.8, + "learning_rate": 1.0489083357395109e-08, + "loss": 3.7693, + "step": 4124500 + }, + { + "epoch": 45.81, + "learning_rate": 1.0475201563645248e-08, + "loss": 3.7475, + "step": 4125000 + }, + { + "epoch": 45.82, + "learning_rate": 1.0461319769895386e-08, + "loss": 3.7391, + "step": 4125500 + }, + { + "epoch": 45.82, + "learning_rate": 1.0447437976145525e-08, + "loss": 3.7574, + "step": 4126000 + }, + { + "epoch": 45.83, + "learning_rate": 1.0433556182395664e-08, + "loss": 3.7356, + "step": 4126500 + }, + { + "epoch": 45.83, + "learning_rate": 1.0419674388645802e-08, + "loss": 3.7367, + "step": 4127000 + }, + { + "epoch": 45.84, + "learning_rate": 1.0405792594895942e-08, + "loss": 3.7585, + "step": 4127500 + }, + { + "epoch": 45.84, + "learning_rate": 1.0391910801146081e-08, + "loss": 3.7518, + "step": 4128000 + }, + { + "epoch": 45.85, + "learning_rate": 1.0378029007396219e-08, + "loss": 3.7324, + "step": 4128500 + }, + { + "epoch": 45.85, + "learning_rate": 1.036414721364636e-08, + "loss": 3.7376, + "step": 4129000 + }, + { + "epoch": 45.86, + "learning_rate": 1.0350265419896496e-08, + "loss": 3.7528, + "step": 4129500 + }, + { + "epoch": 45.87, + "learning_rate": 1.0336383626146635e-08, + "loss": 3.7487, + "step": 4130000 + }, + { + "epoch": 45.87, + "learning_rate": 1.0322501832396774e-08, + "loss": 3.7465, + "step": 4130500 + }, + { + "epoch": 45.88, + "learning_rate": 1.0308620038646913e-08, + "loss": 3.7546, + "step": 4131000 + }, + { + "epoch": 45.88, + "learning_rate": 1.0294738244897052e-08, + "loss": 3.7473, + "step": 4131500 + }, + { + "epoch": 45.89, + "learning_rate": 1.0280856451147191e-08, + "loss": 3.7433, + "step": 4132000 + }, + { + "epoch": 45.89, + "learning_rate": 1.026697465739733e-08, + "loss": 3.7562, + "step": 4132500 + }, + { + "epoch": 45.9, + "learning_rate": 1.0253092863647468e-08, + "loss": 3.7568, + "step": 4133000 + }, + { + "epoch": 45.9, + "learning_rate": 1.0239211069897608e-08, + "loss": 3.7314, + "step": 4133500 + }, + { + "epoch": 45.91, + "learning_rate": 1.0225329276147747e-08, + "loss": 3.7356, + "step": 4134000 + }, + { + "epoch": 45.92, + "learning_rate": 1.0211447482397884e-08, + "loss": 3.7454, + "step": 4134500 + }, + { + "epoch": 45.92, + "learning_rate": 1.0197565688648025e-08, + "loss": 3.7406, + "step": 4135000 + }, + { + "epoch": 45.93, + "learning_rate": 1.0183683894898162e-08, + "loss": 3.7525, + "step": 4135500 + }, + { + "epoch": 45.93, + "learning_rate": 1.0169802101148301e-08, + "loss": 3.7258, + "step": 4136000 + }, + { + "epoch": 45.94, + "learning_rate": 1.015592030739844e-08, + "loss": 3.7477, + "step": 4136500 + }, + { + "epoch": 45.94, + "learning_rate": 1.0142038513648579e-08, + "loss": 3.7516, + "step": 4137000 + }, + { + "epoch": 45.95, + "learning_rate": 1.0128156719898718e-08, + "loss": 3.7278, + "step": 4137500 + }, + { + "epoch": 45.95, + "learning_rate": 1.0114274926148857e-08, + "loss": 3.7652, + "step": 4138000 + }, + { + "epoch": 45.96, + "learning_rate": 1.0100393132398996e-08, + "loss": 3.719, + "step": 4138500 + }, + { + "epoch": 45.97, + "learning_rate": 1.0086511338649133e-08, + "loss": 3.7351, + "step": 4139000 + }, + { + "epoch": 45.97, + "learning_rate": 1.0072629544899274e-08, + "loss": 3.7496, + "step": 4139500 + }, + { + "epoch": 45.98, + "learning_rate": 1.0058747751149411e-08, + "loss": 3.7471, + "step": 4140000 + }, + { + "epoch": 45.98, + "learning_rate": 1.004486595739955e-08, + "loss": 3.7638, + "step": 4140500 + }, + { + "epoch": 45.99, + "learning_rate": 1.003098416364969e-08, + "loss": 3.7404, + "step": 4141000 + }, + { + "epoch": 45.99, + "learning_rate": 1.0017102369899828e-08, + "loss": 3.7646, + "step": 4141500 + }, + { + "epoch": 46.0, + "learning_rate": 1.0003220576149967e-08, + "loss": 3.7462, + "step": 4142000 + }, + { + "epoch": 46.0, + "eval_loss": 3.824589490890503, + "eval_runtime": 6.307, + "eval_samples_per_second": 246.393, + "step": 4142116 + }, + { + "epoch": 46.0, + "learning_rate": 9.989338782400106e-09, + "loss": 3.7482, + "step": 4142500 + }, + { + "epoch": 46.01, + "learning_rate": 9.975456988650245e-09, + "loss": 3.7465, + "step": 4143000 + }, + { + "epoch": 46.02, + "learning_rate": 9.961575194900384e-09, + "loss": 3.7503, + "step": 4143500 + }, + { + "epoch": 46.02, + "learning_rate": 9.947693401150523e-09, + "loss": 3.7474, + "step": 4144000 + }, + { + "epoch": 46.03, + "learning_rate": 9.933811607400662e-09, + "loss": 3.7468, + "step": 4144500 + }, + { + "epoch": 46.03, + "learning_rate": 9.9199298136508e-09, + "loss": 3.7563, + "step": 4145000 + }, + { + "epoch": 46.04, + "learning_rate": 9.90604801990094e-09, + "loss": 3.7407, + "step": 4145500 + }, + { + "epoch": 46.04, + "learning_rate": 9.892166226151077e-09, + "loss": 3.7697, + "step": 4146000 + }, + { + "epoch": 46.05, + "learning_rate": 9.878284432401218e-09, + "loss": 3.7523, + "step": 4146500 + }, + { + "epoch": 46.05, + "learning_rate": 9.864402638651355e-09, + "loss": 3.7531, + "step": 4147000 + }, + { + "epoch": 46.06, + "learning_rate": 9.850520844901494e-09, + "loss": 3.7775, + "step": 4147500 + }, + { + "epoch": 46.07, + "learning_rate": 9.836639051151633e-09, + "loss": 3.7518, + "step": 4148000 + }, + { + "epoch": 46.07, + "learning_rate": 9.822757257401772e-09, + "loss": 3.7412, + "step": 4148500 + }, + { + "epoch": 46.08, + "learning_rate": 9.80887546365191e-09, + "loss": 3.7648, + "step": 4149000 + }, + { + "epoch": 46.08, + "learning_rate": 9.79499366990205e-09, + "loss": 3.7634, + "step": 4149500 + }, + { + "epoch": 46.09, + "learning_rate": 9.781111876152189e-09, + "loss": 3.755, + "step": 4150000 + }, + { + "epoch": 46.09, + "learning_rate": 9.767230082402328e-09, + "loss": 3.7599, + "step": 4150500 + }, + { + "epoch": 46.1, + "learning_rate": 9.753348288652467e-09, + "loss": 3.7446, + "step": 4151000 + }, + { + "epoch": 46.1, + "learning_rate": 9.739466494902606e-09, + "loss": 3.7538, + "step": 4151500 + }, + { + "epoch": 46.11, + "learning_rate": 9.725584701152743e-09, + "loss": 3.7337, + "step": 4152000 + }, + { + "epoch": 46.12, + "learning_rate": 9.711702907402883e-09, + "loss": 3.7634, + "step": 4152500 + }, + { + "epoch": 46.12, + "learning_rate": 9.69782111365302e-09, + "loss": 3.7337, + "step": 4153000 + }, + { + "epoch": 46.13, + "learning_rate": 9.68393931990316e-09, + "loss": 3.7546, + "step": 4153500 + }, + { + "epoch": 46.13, + "learning_rate": 9.6700575261533e-09, + "loss": 3.7528, + "step": 4154000 + }, + { + "epoch": 46.14, + "learning_rate": 9.656175732403438e-09, + "loss": 3.7457, + "step": 4154500 + }, + { + "epoch": 46.14, + "learning_rate": 9.642293938653577e-09, + "loss": 3.7325, + "step": 4155000 + }, + { + "epoch": 46.15, + "learning_rate": 9.628412144903716e-09, + "loss": 3.7416, + "step": 4155500 + }, + { + "epoch": 46.15, + "learning_rate": 9.614530351153854e-09, + "loss": 3.7404, + "step": 4156000 + }, + { + "epoch": 46.16, + "learning_rate": 9.600648557403992e-09, + "loss": 3.7493, + "step": 4156500 + }, + { + "epoch": 46.17, + "learning_rate": 9.586766763654132e-09, + "loss": 3.7475, + "step": 4157000 + }, + { + "epoch": 46.17, + "learning_rate": 9.572884969904271e-09, + "loss": 3.7655, + "step": 4157500 + }, + { + "epoch": 46.18, + "learning_rate": 9.559003176154409e-09, + "loss": 3.7527, + "step": 4158000 + }, + { + "epoch": 46.18, + "learning_rate": 9.54512138240455e-09, + "loss": 3.7587, + "step": 4158500 + }, + { + "epoch": 46.19, + "learning_rate": 9.531239588654687e-09, + "loss": 3.7539, + "step": 4159000 + }, + { + "epoch": 46.19, + "learning_rate": 9.517357794904826e-09, + "loss": 3.7548, + "step": 4159500 + }, + { + "epoch": 46.2, + "learning_rate": 9.503476001154964e-09, + "loss": 3.7661, + "step": 4160000 + }, + { + "epoch": 46.2, + "learning_rate": 9.489594207405103e-09, + "loss": 3.7509, + "step": 4160500 + }, + { + "epoch": 46.21, + "learning_rate": 9.475712413655242e-09, + "loss": 3.7366, + "step": 4161000 + }, + { + "epoch": 46.22, + "learning_rate": 9.461830619905381e-09, + "loss": 3.7553, + "step": 4161500 + }, + { + "epoch": 46.22, + "learning_rate": 9.44794882615552e-09, + "loss": 3.7445, + "step": 4162000 + }, + { + "epoch": 46.23, + "learning_rate": 9.434067032405658e-09, + "loss": 3.7339, + "step": 4162500 + }, + { + "epoch": 46.23, + "learning_rate": 9.420185238655798e-09, + "loss": 3.765, + "step": 4163000 + }, + { + "epoch": 46.24, + "learning_rate": 9.406303444905936e-09, + "loss": 3.7396, + "step": 4163500 + }, + { + "epoch": 46.24, + "learning_rate": 9.392421651156074e-09, + "loss": 3.7764, + "step": 4164000 + }, + { + "epoch": 46.25, + "learning_rate": 9.378539857406215e-09, + "loss": 3.7443, + "step": 4164500 + }, + { + "epoch": 46.25, + "learning_rate": 9.364658063656352e-09, + "loss": 3.7498, + "step": 4165000 + }, + { + "epoch": 46.26, + "learning_rate": 9.350776269906491e-09, + "loss": 3.7494, + "step": 4165500 + }, + { + "epoch": 46.27, + "learning_rate": 9.33689447615663e-09, + "loss": 3.749, + "step": 4166000 + }, + { + "epoch": 46.27, + "learning_rate": 9.32301268240677e-09, + "loss": 3.7586, + "step": 4166500 + }, + { + "epoch": 46.28, + "learning_rate": 9.309130888656908e-09, + "loss": 3.7361, + "step": 4167000 + }, + { + "epoch": 46.28, + "learning_rate": 9.295249094907047e-09, + "loss": 3.7269, + "step": 4167500 + }, + { + "epoch": 46.29, + "learning_rate": 9.281367301157186e-09, + "loss": 3.749, + "step": 4168000 + }, + { + "epoch": 46.29, + "learning_rate": 9.267485507407325e-09, + "loss": 3.7509, + "step": 4168500 + }, + { + "epoch": 46.3, + "learning_rate": 9.253603713657464e-09, + "loss": 3.7378, + "step": 4169000 + }, + { + "epoch": 46.3, + "learning_rate": 9.239721919907601e-09, + "loss": 3.7373, + "step": 4169500 + }, + { + "epoch": 46.31, + "learning_rate": 9.225840126157742e-09, + "loss": 3.748, + "step": 4170000 + }, + { + "epoch": 46.32, + "learning_rate": 9.211958332407881e-09, + "loss": 3.7466, + "step": 4170500 + }, + { + "epoch": 46.32, + "learning_rate": 9.198076538658018e-09, + "loss": 3.7457, + "step": 4171000 + }, + { + "epoch": 46.33, + "learning_rate": 9.184194744908159e-09, + "loss": 3.7494, + "step": 4171500 + }, + { + "epoch": 46.33, + "learning_rate": 9.170312951158296e-09, + "loss": 3.7453, + "step": 4172000 + }, + { + "epoch": 46.34, + "learning_rate": 9.156431157408435e-09, + "loss": 3.7418, + "step": 4172500 + }, + { + "epoch": 46.34, + "learning_rate": 9.142549363658574e-09, + "loss": 3.7366, + "step": 4173000 + }, + { + "epoch": 46.35, + "learning_rate": 9.128667569908713e-09, + "loss": 3.7323, + "step": 4173500 + }, + { + "epoch": 46.35, + "learning_rate": 9.114785776158852e-09, + "loss": 3.7559, + "step": 4174000 + }, + { + "epoch": 46.36, + "learning_rate": 9.100903982408991e-09, + "loss": 3.7543, + "step": 4174500 + }, + { + "epoch": 46.37, + "learning_rate": 9.08702218865913e-09, + "loss": 3.7335, + "step": 4175000 + }, + { + "epoch": 46.37, + "learning_rate": 9.073140394909267e-09, + "loss": 3.7483, + "step": 4175500 + }, + { + "epoch": 46.38, + "learning_rate": 9.059258601159408e-09, + "loss": 3.7488, + "step": 4176000 + }, + { + "epoch": 46.38, + "learning_rate": 9.045376807409545e-09, + "loss": 3.7475, + "step": 4176500 + }, + { + "epoch": 46.39, + "learning_rate": 9.031495013659684e-09, + "loss": 3.7447, + "step": 4177000 + }, + { + "epoch": 46.39, + "learning_rate": 9.017613219909825e-09, + "loss": 3.7255, + "step": 4177500 + }, + { + "epoch": 46.4, + "learning_rate": 9.003731426159962e-09, + "loss": 3.7488, + "step": 4178000 + }, + { + "epoch": 46.4, + "learning_rate": 8.989849632410101e-09, + "loss": 3.7595, + "step": 4178500 + }, + { + "epoch": 46.41, + "learning_rate": 8.97596783866024e-09, + "loss": 3.7672, + "step": 4179000 + }, + { + "epoch": 46.42, + "learning_rate": 8.962086044910379e-09, + "loss": 3.7499, + "step": 4179500 + }, + { + "epoch": 46.42, + "learning_rate": 8.948204251160516e-09, + "loss": 3.7323, + "step": 4180000 + }, + { + "epoch": 46.43, + "learning_rate": 8.934322457410657e-09, + "loss": 3.7302, + "step": 4180500 + }, + { + "epoch": 46.43, + "learning_rate": 8.920440663660796e-09, + "loss": 3.7605, + "step": 4181000 + }, + { + "epoch": 46.44, + "learning_rate": 8.906558869910933e-09, + "loss": 3.7395, + "step": 4181500 + }, + { + "epoch": 46.44, + "learning_rate": 8.892677076161074e-09, + "loss": 3.7641, + "step": 4182000 + }, + { + "epoch": 46.45, + "learning_rate": 8.878795282411211e-09, + "loss": 3.7529, + "step": 4182500 + }, + { + "epoch": 46.45, + "learning_rate": 8.86491348866135e-09, + "loss": 3.7306, + "step": 4183000 + }, + { + "epoch": 46.46, + "learning_rate": 8.85103169491149e-09, + "loss": 3.7585, + "step": 4183500 + }, + { + "epoch": 46.47, + "learning_rate": 8.837149901161628e-09, + "loss": 3.7317, + "step": 4184000 + }, + { + "epoch": 46.47, + "learning_rate": 8.823268107411767e-09, + "loss": 3.7636, + "step": 4184500 + }, + { + "epoch": 46.48, + "learning_rate": 8.809386313661906e-09, + "loss": 3.7619, + "step": 4185000 + }, + { + "epoch": 46.48, + "learning_rate": 8.795504519912045e-09, + "loss": 3.7452, + "step": 4185500 + }, + { + "epoch": 46.49, + "learning_rate": 8.781622726162184e-09, + "loss": 3.7616, + "step": 4186000 + }, + { + "epoch": 46.49, + "learning_rate": 8.767740932412323e-09, + "loss": 3.7291, + "step": 4186500 + }, + { + "epoch": 46.5, + "learning_rate": 8.753859138662461e-09, + "loss": 3.74, + "step": 4187000 + }, + { + "epoch": 46.5, + "learning_rate": 8.7399773449126e-09, + "loss": 3.7648, + "step": 4187500 + }, + { + "epoch": 46.51, + "learning_rate": 8.72609555116274e-09, + "loss": 3.7428, + "step": 4188000 + }, + { + "epoch": 46.52, + "learning_rate": 8.712213757412877e-09, + "loss": 3.7448, + "step": 4188500 + }, + { + "epoch": 46.52, + "learning_rate": 8.698331963663017e-09, + "loss": 3.7228, + "step": 4189000 + }, + { + "epoch": 46.53, + "learning_rate": 8.684450169913155e-09, + "loss": 3.7527, + "step": 4189500 + }, + { + "epoch": 46.53, + "learning_rate": 8.670568376163294e-09, + "loss": 3.7418, + "step": 4190000 + }, + { + "epoch": 46.54, + "learning_rate": 8.656686582413432e-09, + "loss": 3.745, + "step": 4190500 + }, + { + "epoch": 46.54, + "learning_rate": 8.642804788663571e-09, + "loss": 3.7309, + "step": 4191000 + }, + { + "epoch": 46.55, + "learning_rate": 8.62892299491371e-09, + "loss": 3.7464, + "step": 4191500 + }, + { + "epoch": 46.55, + "learning_rate": 8.61504120116385e-09, + "loss": 3.7476, + "step": 4192000 + }, + { + "epoch": 46.56, + "learning_rate": 8.601159407413988e-09, + "loss": 3.737, + "step": 4192500 + }, + { + "epoch": 46.57, + "learning_rate": 8.587277613664126e-09, + "loss": 3.7437, + "step": 4193000 + }, + { + "epoch": 46.57, + "learning_rate": 8.573395819914266e-09, + "loss": 3.7578, + "step": 4193500 + }, + { + "epoch": 46.58, + "learning_rate": 8.559514026164405e-09, + "loss": 3.7389, + "step": 4194000 + }, + { + "epoch": 46.58, + "learning_rate": 8.545632232414542e-09, + "loss": 3.7572, + "step": 4194500 + }, + { + "epoch": 46.59, + "learning_rate": 8.531750438664683e-09, + "loss": 3.7411, + "step": 4195000 + }, + { + "epoch": 46.59, + "learning_rate": 8.51786864491482e-09, + "loss": 3.754, + "step": 4195500 + }, + { + "epoch": 46.6, + "learning_rate": 8.50398685116496e-09, + "loss": 3.746, + "step": 4196000 + }, + { + "epoch": 46.6, + "learning_rate": 8.4901050574151e-09, + "loss": 3.7338, + "step": 4196500 + }, + { + "epoch": 46.61, + "learning_rate": 8.476223263665237e-09, + "loss": 3.776, + "step": 4197000 + }, + { + "epoch": 46.62, + "learning_rate": 8.462341469915376e-09, + "loss": 3.7661, + "step": 4197500 + }, + { + "epoch": 46.62, + "learning_rate": 8.448459676165515e-09, + "loss": 3.7635, + "step": 4198000 + }, + { + "epoch": 46.63, + "learning_rate": 8.434577882415654e-09, + "loss": 3.7478, + "step": 4198500 + }, + { + "epoch": 46.63, + "learning_rate": 8.420696088665791e-09, + "loss": 3.7508, + "step": 4199000 + }, + { + "epoch": 46.64, + "learning_rate": 8.406814294915932e-09, + "loss": 3.7457, + "step": 4199500 + }, + { + "epoch": 46.64, + "learning_rate": 8.392932501166071e-09, + "loss": 3.7412, + "step": 4200000 + }, + { + "epoch": 46.65, + "learning_rate": 8.379050707416208e-09, + "loss": 3.7442, + "step": 4200500 + }, + { + "epoch": 46.65, + "learning_rate": 8.365168913666349e-09, + "loss": 3.7372, + "step": 4201000 + }, + { + "epoch": 46.66, + "learning_rate": 8.351287119916486e-09, + "loss": 3.7292, + "step": 4201500 + }, + { + "epoch": 46.67, + "learning_rate": 8.337405326166625e-09, + "loss": 3.751, + "step": 4202000 + }, + { + "epoch": 46.67, + "learning_rate": 8.323523532416764e-09, + "loss": 3.7346, + "step": 4202500 + }, + { + "epoch": 46.68, + "learning_rate": 8.309641738666903e-09, + "loss": 3.7454, + "step": 4203000 + }, + { + "epoch": 46.68, + "learning_rate": 8.295759944917042e-09, + "loss": 3.745, + "step": 4203500 + }, + { + "epoch": 46.69, + "learning_rate": 8.281878151167181e-09, + "loss": 3.7355, + "step": 4204000 + }, + { + "epoch": 46.69, + "learning_rate": 8.26799635741732e-09, + "loss": 3.738, + "step": 4204500 + }, + { + "epoch": 46.7, + "learning_rate": 8.254114563667459e-09, + "loss": 3.7401, + "step": 4205000 + }, + { + "epoch": 46.7, + "learning_rate": 8.240232769917598e-09, + "loss": 3.7503, + "step": 4205500 + }, + { + "epoch": 46.71, + "learning_rate": 8.226350976167735e-09, + "loss": 3.7402, + "step": 4206000 + }, + { + "epoch": 46.72, + "learning_rate": 8.212469182417874e-09, + "loss": 3.7453, + "step": 4206500 + }, + { + "epoch": 46.72, + "learning_rate": 8.198587388668015e-09, + "loss": 3.7628, + "step": 4207000 + }, + { + "epoch": 46.73, + "learning_rate": 8.184705594918152e-09, + "loss": 3.7436, + "step": 4207500 + }, + { + "epoch": 46.73, + "learning_rate": 8.170823801168291e-09, + "loss": 3.7496, + "step": 4208000 + }, + { + "epoch": 46.74, + "learning_rate": 8.15694200741843e-09, + "loss": 3.7505, + "step": 4208500 + }, + { + "epoch": 46.74, + "learning_rate": 8.143060213668569e-09, + "loss": 3.7419, + "step": 4209000 + }, + { + "epoch": 46.75, + "learning_rate": 8.129178419918708e-09, + "loss": 3.7672, + "step": 4209500 + }, + { + "epoch": 46.75, + "learning_rate": 8.115296626168847e-09, + "loss": 3.7119, + "step": 4210000 + }, + { + "epoch": 46.76, + "learning_rate": 8.101414832418986e-09, + "loss": 3.7372, + "step": 4210500 + }, + { + "epoch": 46.76, + "learning_rate": 8.087533038669125e-09, + "loss": 3.7472, + "step": 4211000 + }, + { + "epoch": 46.77, + "learning_rate": 8.073651244919264e-09, + "loss": 3.7616, + "step": 4211500 + }, + { + "epoch": 46.78, + "learning_rate": 8.059769451169401e-09, + "loss": 3.7497, + "step": 4212000 + }, + { + "epoch": 46.78, + "learning_rate": 8.045887657419542e-09, + "loss": 3.7366, + "step": 4212500 + }, + { + "epoch": 46.79, + "learning_rate": 8.03200586366968e-09, + "loss": 3.766, + "step": 4213000 + }, + { + "epoch": 46.79, + "learning_rate": 8.018124069919818e-09, + "loss": 3.7244, + "step": 4213500 + }, + { + "epoch": 46.8, + "learning_rate": 8.004242276169958e-09, + "loss": 3.7508, + "step": 4214000 + }, + { + "epoch": 46.8, + "learning_rate": 7.990360482420096e-09, + "loss": 3.74, + "step": 4214500 + }, + { + "epoch": 46.81, + "learning_rate": 7.976478688670235e-09, + "loss": 3.7578, + "step": 4215000 + }, + { + "epoch": 46.81, + "learning_rate": 7.962596894920374e-09, + "loss": 3.7258, + "step": 4215500 + }, + { + "epoch": 46.82, + "learning_rate": 7.948715101170513e-09, + "loss": 3.7509, + "step": 4216000 + }, + { + "epoch": 46.83, + "learning_rate": 7.934833307420652e-09, + "loss": 3.7406, + "step": 4216500 + }, + { + "epoch": 46.83, + "learning_rate": 7.92095151367079e-09, + "loss": 3.7494, + "step": 4217000 + }, + { + "epoch": 46.84, + "learning_rate": 7.90706971992093e-09, + "loss": 3.7761, + "step": 4217500 + }, + { + "epoch": 46.84, + "learning_rate": 7.893187926171067e-09, + "loss": 3.7286, + "step": 4218000 + }, + { + "epoch": 46.85, + "learning_rate": 7.879306132421207e-09, + "loss": 3.7617, + "step": 4218500 + }, + { + "epoch": 46.85, + "learning_rate": 7.865424338671345e-09, + "loss": 3.754, + "step": 4219000 + }, + { + "epoch": 46.86, + "learning_rate": 7.851542544921484e-09, + "loss": 3.7426, + "step": 4219500 + }, + { + "epoch": 46.86, + "learning_rate": 7.837660751171624e-09, + "loss": 3.7342, + "step": 4220000 + }, + { + "epoch": 46.87, + "learning_rate": 7.823778957421762e-09, + "loss": 3.7505, + "step": 4220500 + }, + { + "epoch": 46.88, + "learning_rate": 7.8098971636719e-09, + "loss": 3.7531, + "step": 4221000 + }, + { + "epoch": 46.88, + "learning_rate": 7.79601536992204e-09, + "loss": 3.7449, + "step": 4221500 + }, + { + "epoch": 46.89, + "learning_rate": 7.782133576172178e-09, + "loss": 3.7328, + "step": 4222000 + }, + { + "epoch": 46.89, + "learning_rate": 7.768251782422316e-09, + "loss": 3.7622, + "step": 4222500 + }, + { + "epoch": 46.9, + "learning_rate": 7.754369988672456e-09, + "loss": 3.7446, + "step": 4223000 + }, + { + "epoch": 46.9, + "learning_rate": 7.740488194922595e-09, + "loss": 3.7417, + "step": 4223500 + }, + { + "epoch": 46.91, + "learning_rate": 7.726606401172733e-09, + "loss": 3.741, + "step": 4224000 + }, + { + "epoch": 46.91, + "learning_rate": 7.712724607422873e-09, + "loss": 3.7444, + "step": 4224500 + }, + { + "epoch": 46.92, + "learning_rate": 7.69884281367301e-09, + "loss": 3.7523, + "step": 4225000 + }, + { + "epoch": 46.93, + "learning_rate": 7.68496101992315e-09, + "loss": 3.7361, + "step": 4225500 + }, + { + "epoch": 46.93, + "learning_rate": 7.671079226173288e-09, + "loss": 3.7277, + "step": 4226000 + }, + { + "epoch": 46.94, + "learning_rate": 7.657197432423427e-09, + "loss": 3.7439, + "step": 4226500 + }, + { + "epoch": 46.94, + "learning_rate": 7.643315638673566e-09, + "loss": 3.7499, + "step": 4227000 + }, + { + "epoch": 46.95, + "learning_rate": 7.629433844923705e-09, + "loss": 3.7438, + "step": 4227500 + }, + { + "epoch": 46.95, + "learning_rate": 7.615552051173844e-09, + "loss": 3.7331, + "step": 4228000 + }, + { + "epoch": 46.96, + "learning_rate": 7.601670257423983e-09, + "loss": 3.7447, + "step": 4228500 + }, + { + "epoch": 46.96, + "learning_rate": 7.587788463674122e-09, + "loss": 3.7508, + "step": 4229000 + }, + { + "epoch": 46.97, + "learning_rate": 7.573906669924261e-09, + "loss": 3.7655, + "step": 4229500 + }, + { + "epoch": 46.98, + "learning_rate": 7.5600248761744e-09, + "loss": 3.7447, + "step": 4230000 + }, + { + "epoch": 46.98, + "learning_rate": 7.546143082424539e-09, + "loss": 3.7397, + "step": 4230500 + }, + { + "epoch": 46.99, + "learning_rate": 7.532261288674676e-09, + "loss": 3.7532, + "step": 4231000 + }, + { + "epoch": 46.99, + "learning_rate": 7.518379494924817e-09, + "loss": 3.7338, + "step": 4231500 + }, + { + "epoch": 47.0, + "learning_rate": 7.504497701174954e-09, + "loss": 3.7446, + "step": 4232000 + }, + { + "epoch": 47.0, + "eval_loss": 3.8242743015289307, + "eval_runtime": 6.3073, + "eval_samples_per_second": 246.381, + "step": 4232162 + }, + { + "epoch": 47.0, + "learning_rate": 7.490615907425093e-09, + "loss": 3.7579, + "step": 4232500 + }, + { + "epoch": 47.01, + "learning_rate": 7.476734113675232e-09, + "loss": 3.7574, + "step": 4233000 + }, + { + "epoch": 47.01, + "learning_rate": 7.462852319925371e-09, + "loss": 3.7213, + "step": 4233500 + }, + { + "epoch": 47.02, + "learning_rate": 7.44897052617551e-09, + "loss": 3.7384, + "step": 4234000 + }, + { + "epoch": 47.03, + "learning_rate": 7.435088732425648e-09, + "loss": 3.7482, + "step": 4234500 + }, + { + "epoch": 47.03, + "learning_rate": 7.421206938675788e-09, + "loss": 3.7463, + "step": 4235000 + }, + { + "epoch": 47.04, + "learning_rate": 7.407325144925926e-09, + "loss": 3.7536, + "step": 4235500 + }, + { + "epoch": 47.04, + "learning_rate": 7.393443351176065e-09, + "loss": 3.7523, + "step": 4236000 + }, + { + "epoch": 47.05, + "learning_rate": 7.379561557426205e-09, + "loss": 3.752, + "step": 4236500 + }, + { + "epoch": 47.05, + "learning_rate": 7.365679763676343e-09, + "loss": 3.7335, + "step": 4237000 + }, + { + "epoch": 47.06, + "learning_rate": 7.351797969926482e-09, + "loss": 3.7644, + "step": 4237500 + }, + { + "epoch": 47.06, + "learning_rate": 7.33791617617662e-09, + "loss": 3.7377, + "step": 4238000 + }, + { + "epoch": 47.07, + "learning_rate": 7.32403438242676e-09, + "loss": 3.7583, + "step": 4238500 + }, + { + "epoch": 47.08, + "learning_rate": 7.310152588676897e-09, + "loss": 3.748, + "step": 4239000 + }, + { + "epoch": 47.08, + "learning_rate": 7.296270794927037e-09, + "loss": 3.7442, + "step": 4239500 + }, + { + "epoch": 47.09, + "learning_rate": 7.282389001177177e-09, + "loss": 3.7548, + "step": 4240000 + }, + { + "epoch": 47.09, + "learning_rate": 7.268507207427314e-09, + "loss": 3.7585, + "step": 4240500 + }, + { + "epoch": 47.1, + "learning_rate": 7.254625413677454e-09, + "loss": 3.7421, + "step": 4241000 + }, + { + "epoch": 47.1, + "learning_rate": 7.240743619927592e-09, + "loss": 3.753, + "step": 4241500 + }, + { + "epoch": 47.11, + "learning_rate": 7.226861826177731e-09, + "loss": 3.7334, + "step": 4242000 + }, + { + "epoch": 47.11, + "learning_rate": 7.212980032427871e-09, + "loss": 3.7381, + "step": 4242500 + }, + { + "epoch": 47.12, + "learning_rate": 7.199098238678009e-09, + "loss": 3.7534, + "step": 4243000 + }, + { + "epoch": 47.13, + "learning_rate": 7.185216444928148e-09, + "loss": 3.7541, + "step": 4243500 + }, + { + "epoch": 47.13, + "learning_rate": 7.171334651178286e-09, + "loss": 3.7599, + "step": 4244000 + }, + { + "epoch": 47.14, + "learning_rate": 7.157452857428426e-09, + "loss": 3.757, + "step": 4244500 + }, + { + "epoch": 47.14, + "learning_rate": 7.143571063678564e-09, + "loss": 3.7498, + "step": 4245000 + }, + { + "epoch": 47.15, + "learning_rate": 7.129689269928703e-09, + "loss": 3.7329, + "step": 4245500 + }, + { + "epoch": 47.15, + "learning_rate": 7.1158074761788425e-09, + "loss": 3.7626, + "step": 4246000 + }, + { + "epoch": 47.16, + "learning_rate": 7.101925682428981e-09, + "loss": 3.7494, + "step": 4246500 + }, + { + "epoch": 47.16, + "learning_rate": 7.0880438886791196e-09, + "loss": 3.7384, + "step": 4247000 + }, + { + "epoch": 47.17, + "learning_rate": 7.074162094929258e-09, + "loss": 3.7444, + "step": 4247500 + }, + { + "epoch": 47.18, + "learning_rate": 7.0602803011793975e-09, + "loss": 3.7411, + "step": 4248000 + }, + { + "epoch": 47.18, + "learning_rate": 7.046398507429535e-09, + "loss": 3.754, + "step": 4248500 + }, + { + "epoch": 47.19, + "learning_rate": 7.0325167136796745e-09, + "loss": 3.7453, + "step": 4249000 + }, + { + "epoch": 47.19, + "learning_rate": 7.018634919929814e-09, + "loss": 3.7349, + "step": 4249500 + }, + { + "epoch": 47.2, + "learning_rate": 7.004753126179952e-09, + "loss": 3.759, + "step": 4250000 + }, + { + "epoch": 47.2, + "learning_rate": 6.9908713324300914e-09, + "loss": 3.7364, + "step": 4250500 + }, + { + "epoch": 47.21, + "learning_rate": 6.9769895386802295e-09, + "loss": 3.7578, + "step": 4251000 + }, + { + "epoch": 47.21, + "learning_rate": 6.9631077449303685e-09, + "loss": 3.7372, + "step": 4251500 + }, + { + "epoch": 47.22, + "learning_rate": 6.949225951180507e-09, + "loss": 3.7757, + "step": 4252000 + }, + { + "epoch": 47.23, + "learning_rate": 6.935344157430646e-09, + "loss": 3.7667, + "step": 4252500 + }, + { + "epoch": 47.23, + "learning_rate": 6.921462363680785e-09, + "loss": 3.7323, + "step": 4253000 + }, + { + "epoch": 47.24, + "learning_rate": 6.9075805699309235e-09, + "loss": 3.7586, + "step": 4253500 + }, + { + "epoch": 47.24, + "learning_rate": 6.893698776181063e-09, + "loss": 3.7294, + "step": 4254000 + }, + { + "epoch": 47.25, + "learning_rate": 6.879816982431201e-09, + "loss": 3.7665, + "step": 4254500 + }, + { + "epoch": 47.25, + "learning_rate": 6.86593518868134e-09, + "loss": 3.7453, + "step": 4255000 + }, + { + "epoch": 47.26, + "learning_rate": 6.8520533949314785e-09, + "loss": 3.7365, + "step": 4255500 + }, + { + "epoch": 47.26, + "learning_rate": 6.838171601181618e-09, + "loss": 3.7523, + "step": 4256000 + }, + { + "epoch": 47.27, + "learning_rate": 6.824289807431757e-09, + "loss": 3.74, + "step": 4256500 + }, + { + "epoch": 47.28, + "learning_rate": 6.810408013681895e-09, + "loss": 3.7408, + "step": 4257000 + }, + { + "epoch": 47.28, + "learning_rate": 6.796526219932035e-09, + "loss": 3.7457, + "step": 4257500 + }, + { + "epoch": 47.29, + "learning_rate": 6.7826444261821724e-09, + "loss": 3.7513, + "step": 4258000 + }, + { + "epoch": 47.29, + "learning_rate": 6.768762632432312e-09, + "loss": 3.7356, + "step": 4258500 + }, + { + "epoch": 47.3, + "learning_rate": 6.754880838682451e-09, + "loss": 3.7465, + "step": 4259000 + }, + { + "epoch": 47.3, + "learning_rate": 6.740999044932589e-09, + "loss": 3.7641, + "step": 4259500 + }, + { + "epoch": 47.31, + "learning_rate": 6.727117251182729e-09, + "loss": 3.7496, + "step": 4260000 + }, + { + "epoch": 47.31, + "learning_rate": 6.713235457432867e-09, + "loss": 3.7393, + "step": 4260500 + }, + { + "epoch": 47.32, + "learning_rate": 6.699353663683006e-09, + "loss": 3.7531, + "step": 4261000 + }, + { + "epoch": 47.33, + "learning_rate": 6.685471869933144e-09, + "loss": 3.7458, + "step": 4261500 + }, + { + "epoch": 47.33, + "learning_rate": 6.671590076183284e-09, + "loss": 3.7333, + "step": 4262000 + }, + { + "epoch": 47.34, + "learning_rate": 6.657708282433423e-09, + "loss": 3.7629, + "step": 4262500 + }, + { + "epoch": 47.34, + "learning_rate": 6.643826488683561e-09, + "loss": 3.7322, + "step": 4263000 + }, + { + "epoch": 47.35, + "learning_rate": 6.629944694933701e-09, + "loss": 3.7509, + "step": 4263500 + }, + { + "epoch": 47.35, + "learning_rate": 6.616062901183839e-09, + "loss": 3.7388, + "step": 4264000 + }, + { + "epoch": 47.36, + "learning_rate": 6.602181107433978e-09, + "loss": 3.7486, + "step": 4264500 + }, + { + "epoch": 47.36, + "learning_rate": 6.588299313684116e-09, + "loss": 3.7601, + "step": 4265000 + }, + { + "epoch": 47.37, + "learning_rate": 6.574417519934256e-09, + "loss": 3.7358, + "step": 4265500 + }, + { + "epoch": 47.38, + "learning_rate": 6.560535726184395e-09, + "loss": 3.7471, + "step": 4266000 + }, + { + "epoch": 47.38, + "learning_rate": 6.546653932434533e-09, + "loss": 3.762, + "step": 4266500 + }, + { + "epoch": 47.39, + "learning_rate": 6.532772138684672e-09, + "loss": 3.7427, + "step": 4267000 + }, + { + "epoch": 47.39, + "learning_rate": 6.51889034493481e-09, + "loss": 3.748, + "step": 4267500 + }, + { + "epoch": 47.4, + "learning_rate": 6.50500855118495e-09, + "loss": 3.7439, + "step": 4268000 + }, + { + "epoch": 47.4, + "learning_rate": 6.491126757435088e-09, + "loss": 3.7493, + "step": 4268500 + }, + { + "epoch": 47.41, + "learning_rate": 6.477244963685227e-09, + "loss": 3.7257, + "step": 4269000 + }, + { + "epoch": 47.41, + "learning_rate": 6.463363169935367e-09, + "loss": 3.7338, + "step": 4269500 + }, + { + "epoch": 47.42, + "learning_rate": 6.449481376185505e-09, + "loss": 3.7524, + "step": 4270000 + }, + { + "epoch": 47.43, + "learning_rate": 6.435599582435644e-09, + "loss": 3.7315, + "step": 4270500 + }, + { + "epoch": 47.43, + "learning_rate": 6.421717788685782e-09, + "loss": 3.7389, + "step": 4271000 + }, + { + "epoch": 47.44, + "learning_rate": 6.407835994935922e-09, + "loss": 3.7326, + "step": 4271500 + }, + { + "epoch": 47.44, + "learning_rate": 6.39395420118606e-09, + "loss": 3.7458, + "step": 4272000 + }, + { + "epoch": 47.45, + "learning_rate": 6.380072407436199e-09, + "loss": 3.7548, + "step": 4272500 + }, + { + "epoch": 47.45, + "learning_rate": 6.366190613686339e-09, + "loss": 3.7648, + "step": 4273000 + }, + { + "epoch": 47.46, + "learning_rate": 6.352308819936476e-09, + "loss": 3.7574, + "step": 4273500 + }, + { + "epoch": 47.46, + "learning_rate": 6.338427026186616e-09, + "loss": 3.7652, + "step": 4274000 + }, + { + "epoch": 47.47, + "learning_rate": 6.324545232436754e-09, + "loss": 3.7503, + "step": 4274500 + }, + { + "epoch": 47.48, + "learning_rate": 6.310663438686893e-09, + "loss": 3.748, + "step": 4275000 + }, + { + "epoch": 47.48, + "learning_rate": 6.2967816449370326e-09, + "loss": 3.7334, + "step": 4275500 + }, + { + "epoch": 47.49, + "learning_rate": 6.282899851187171e-09, + "loss": 3.7136, + "step": 4276000 + }, + { + "epoch": 47.49, + "learning_rate": 6.26901805743731e-09, + "loss": 3.7517, + "step": 4276500 + }, + { + "epoch": 47.5, + "learning_rate": 6.255136263687448e-09, + "loss": 3.7631, + "step": 4277000 + }, + { + "epoch": 47.5, + "learning_rate": 6.2412544699375876e-09, + "loss": 3.7392, + "step": 4277500 + }, + { + "epoch": 47.51, + "learning_rate": 6.2273726761877265e-09, + "loss": 3.7512, + "step": 4278000 + }, + { + "epoch": 47.51, + "learning_rate": 6.213490882437865e-09, + "loss": 3.7409, + "step": 4278500 + }, + { + "epoch": 47.52, + "learning_rate": 6.199609088688004e-09, + "loss": 3.7504, + "step": 4279000 + }, + { + "epoch": 47.53, + "learning_rate": 6.1857272949381426e-09, + "loss": 3.7566, + "step": 4279500 + }, + { + "epoch": 47.53, + "learning_rate": 6.171845501188281e-09, + "loss": 3.7324, + "step": 4280000 + }, + { + "epoch": 47.54, + "learning_rate": 6.1579637074384205e-09, + "loss": 3.7633, + "step": 4280500 + }, + { + "epoch": 47.54, + "learning_rate": 6.1440819136885594e-09, + "loss": 3.7647, + "step": 4281000 + }, + { + "epoch": 47.55, + "learning_rate": 6.1302001199386975e-09, + "loss": 3.7463, + "step": 4281500 + }, + { + "epoch": 47.55, + "learning_rate": 6.1163183261888365e-09, + "loss": 3.7659, + "step": 4282000 + }, + { + "epoch": 47.56, + "learning_rate": 6.1024365324389755e-09, + "loss": 3.7252, + "step": 4282500 + }, + { + "epoch": 47.56, + "learning_rate": 6.0885547386891136e-09, + "loss": 3.7308, + "step": 4283000 + }, + { + "epoch": 47.57, + "learning_rate": 6.0746729449392525e-09, + "loss": 3.7476, + "step": 4283500 + }, + { + "epoch": 47.58, + "learning_rate": 6.060791151189392e-09, + "loss": 3.7416, + "step": 4284000 + }, + { + "epoch": 47.58, + "learning_rate": 6.0469093574395304e-09, + "loss": 3.7505, + "step": 4284500 + }, + { + "epoch": 47.59, + "learning_rate": 6.033027563689669e-09, + "loss": 3.7479, + "step": 4285000 + }, + { + "epoch": 47.59, + "learning_rate": 6.019145769939808e-09, + "loss": 3.7348, + "step": 4285500 + }, + { + "epoch": 47.6, + "learning_rate": 6.005263976189947e-09, + "loss": 3.7442, + "step": 4286000 + }, + { + "epoch": 47.6, + "learning_rate": 5.9913821824400854e-09, + "loss": 3.749, + "step": 4286500 + }, + { + "epoch": 47.61, + "learning_rate": 5.977500388690225e-09, + "loss": 3.7436, + "step": 4287000 + }, + { + "epoch": 47.61, + "learning_rate": 5.963618594940363e-09, + "loss": 3.7438, + "step": 4287500 + }, + { + "epoch": 47.62, + "learning_rate": 5.949736801190502e-09, + "loss": 3.7576, + "step": 4288000 + }, + { + "epoch": 47.63, + "learning_rate": 5.935855007440641e-09, + "loss": 3.7379, + "step": 4288500 + }, + { + "epoch": 47.63, + "learning_rate": 5.92197321369078e-09, + "loss": 3.7452, + "step": 4289000 + }, + { + "epoch": 47.64, + "learning_rate": 5.908091419940918e-09, + "loss": 3.7453, + "step": 4289500 + }, + { + "epoch": 47.64, + "learning_rate": 5.894209626191057e-09, + "loss": 3.731, + "step": 4290000 + }, + { + "epoch": 47.65, + "learning_rate": 5.880327832441197e-09, + "loss": 3.7422, + "step": 4290500 + }, + { + "epoch": 47.65, + "learning_rate": 5.866446038691335e-09, + "loss": 3.7458, + "step": 4291000 + }, + { + "epoch": 47.66, + "learning_rate": 5.852564244941474e-09, + "loss": 3.7311, + "step": 4291500 + }, + { + "epoch": 47.66, + "learning_rate": 5.838682451191613e-09, + "loss": 3.7443, + "step": 4292000 + }, + { + "epoch": 47.67, + "learning_rate": 5.824800657441751e-09, + "loss": 3.751, + "step": 4292500 + }, + { + "epoch": 47.68, + "learning_rate": 5.81091886369189e-09, + "loss": 3.7512, + "step": 4293000 + }, + { + "epoch": 47.68, + "learning_rate": 5.79703706994203e-09, + "loss": 3.7437, + "step": 4293500 + }, + { + "epoch": 47.69, + "learning_rate": 5.783155276192168e-09, + "loss": 3.7528, + "step": 4294000 + }, + { + "epoch": 47.69, + "learning_rate": 5.769273482442307e-09, + "loss": 3.7476, + "step": 4294500 + }, + { + "epoch": 47.7, + "learning_rate": 5.755391688692446e-09, + "loss": 3.7665, + "step": 4295000 + }, + { + "epoch": 47.7, + "learning_rate": 5.741509894942584e-09, + "loss": 3.7433, + "step": 4295500 + }, + { + "epoch": 47.71, + "learning_rate": 5.727628101192723e-09, + "loss": 3.7335, + "step": 4296000 + }, + { + "epoch": 47.71, + "learning_rate": 5.713746307442862e-09, + "loss": 3.7512, + "step": 4296500 + }, + { + "epoch": 47.72, + "learning_rate": 5.699864513693001e-09, + "loss": 3.7378, + "step": 4297000 + }, + { + "epoch": 47.73, + "learning_rate": 5.68598271994314e-09, + "loss": 3.7372, + "step": 4297500 + }, + { + "epoch": 47.73, + "learning_rate": 5.672100926193279e-09, + "loss": 3.759, + "step": 4298000 + }, + { + "epoch": 47.74, + "learning_rate": 5.658219132443418e-09, + "loss": 3.7443, + "step": 4298500 + }, + { + "epoch": 47.74, + "learning_rate": 5.644337338693556e-09, + "loss": 3.7466, + "step": 4299000 + }, + { + "epoch": 47.75, + "learning_rate": 5.630455544943695e-09, + "loss": 3.7543, + "step": 4299500 + }, + { + "epoch": 47.75, + "learning_rate": 5.616573751193834e-09, + "loss": 3.7359, + "step": 4300000 + }, + { + "epoch": 47.76, + "learning_rate": 5.602691957443973e-09, + "loss": 3.7403, + "step": 4300500 + }, + { + "epoch": 47.76, + "learning_rate": 5.588810163694112e-09, + "loss": 3.7444, + "step": 4301000 + }, + { + "epoch": 47.77, + "learning_rate": 5.574928369944251e-09, + "loss": 3.7319, + "step": 4301500 + }, + { + "epoch": 47.78, + "learning_rate": 5.561046576194389e-09, + "loss": 3.7551, + "step": 4302000 + }, + { + "epoch": 47.78, + "learning_rate": 5.547164782444528e-09, + "loss": 3.7523, + "step": 4302500 + }, + { + "epoch": 47.79, + "learning_rate": 5.533282988694667e-09, + "loss": 3.7417, + "step": 4303000 + }, + { + "epoch": 47.79, + "learning_rate": 5.519401194944806e-09, + "loss": 3.7365, + "step": 4303500 + }, + { + "epoch": 47.8, + "learning_rate": 5.505519401194945e-09, + "loss": 3.7219, + "step": 4304000 + }, + { + "epoch": 47.8, + "learning_rate": 5.491637607445084e-09, + "loss": 3.7592, + "step": 4304500 + }, + { + "epoch": 47.81, + "learning_rate": 5.477755813695222e-09, + "loss": 3.7389, + "step": 4305000 + }, + { + "epoch": 47.81, + "learning_rate": 5.463874019945361e-09, + "loss": 3.7461, + "step": 4305500 + }, + { + "epoch": 47.82, + "learning_rate": 5.4499922261955e-09, + "loss": 3.758, + "step": 4306000 + }, + { + "epoch": 47.83, + "learning_rate": 5.436110432445639e-09, + "loss": 3.7574, + "step": 4306500 + }, + { + "epoch": 47.83, + "learning_rate": 5.422228638695778e-09, + "loss": 3.7653, + "step": 4307000 + }, + { + "epoch": 47.84, + "learning_rate": 5.408346844945917e-09, + "loss": 3.7657, + "step": 4307500 + }, + { + "epoch": 47.84, + "learning_rate": 5.3944650511960556e-09, + "loss": 3.7303, + "step": 4308000 + }, + { + "epoch": 47.85, + "learning_rate": 5.380583257446194e-09, + "loss": 3.751, + "step": 4308500 + }, + { + "epoch": 47.85, + "learning_rate": 5.366701463696333e-09, + "loss": 3.742, + "step": 4309000 + }, + { + "epoch": 47.86, + "learning_rate": 5.352819669946472e-09, + "loss": 3.748, + "step": 4309500 + }, + { + "epoch": 47.86, + "learning_rate": 5.3389378761966106e-09, + "loss": 3.7503, + "step": 4310000 + }, + { + "epoch": 47.87, + "learning_rate": 5.3250560824467495e-09, + "loss": 3.7446, + "step": 4310500 + }, + { + "epoch": 47.88, + "learning_rate": 5.3111742886968885e-09, + "loss": 3.7476, + "step": 4311000 + }, + { + "epoch": 47.88, + "learning_rate": 5.297292494947027e-09, + "loss": 3.7416, + "step": 4311500 + }, + { + "epoch": 47.89, + "learning_rate": 5.2834107011971655e-09, + "loss": 3.7469, + "step": 4312000 + }, + { + "epoch": 47.89, + "learning_rate": 5.2695289074473045e-09, + "loss": 3.7595, + "step": 4312500 + }, + { + "epoch": 47.9, + "learning_rate": 5.255647113697443e-09, + "loss": 3.741, + "step": 4313000 + }, + { + "epoch": 47.9, + "learning_rate": 5.2417653199475824e-09, + "loss": 3.7428, + "step": 4313500 + }, + { + "epoch": 47.91, + "learning_rate": 5.227883526197721e-09, + "loss": 3.7363, + "step": 4314000 + }, + { + "epoch": 47.91, + "learning_rate": 5.2140017324478595e-09, + "loss": 3.7287, + "step": 4314500 + }, + { + "epoch": 47.92, + "learning_rate": 5.2001199386979985e-09, + "loss": 3.7548, + "step": 4315000 + }, + { + "epoch": 47.93, + "learning_rate": 5.186238144948137e-09, + "loss": 3.7676, + "step": 4315500 + }, + { + "epoch": 47.93, + "learning_rate": 5.172356351198276e-09, + "loss": 3.7503, + "step": 4316000 + }, + { + "epoch": 47.94, + "learning_rate": 5.158474557448415e-09, + "loss": 3.7547, + "step": 4316500 + }, + { + "epoch": 47.94, + "learning_rate": 5.144592763698554e-09, + "loss": 3.7436, + "step": 4317000 + }, + { + "epoch": 47.95, + "learning_rate": 5.130710969948692e-09, + "loss": 3.7506, + "step": 4317500 + }, + { + "epoch": 47.95, + "learning_rate": 5.116829176198831e-09, + "loss": 3.7512, + "step": 4318000 + }, + { + "epoch": 47.96, + "learning_rate": 5.10294738244897e-09, + "loss": 3.7576, + "step": 4318500 + }, + { + "epoch": 47.96, + "learning_rate": 5.089065588699109e-09, + "loss": 3.752, + "step": 4319000 + }, + { + "epoch": 47.97, + "learning_rate": 5.075183794949247e-09, + "loss": 3.7454, + "step": 4319500 + }, + { + "epoch": 47.98, + "learning_rate": 5.061302001199387e-09, + "loss": 3.7301, + "step": 4320000 + }, + { + "epoch": 47.98, + "learning_rate": 5.047420207449526e-09, + "loss": 3.7606, + "step": 4320500 + }, + { + "epoch": 47.99, + "learning_rate": 5.033538413699664e-09, + "loss": 3.7354, + "step": 4321000 + }, + { + "epoch": 47.99, + "learning_rate": 5.019656619949803e-09, + "loss": 3.7535, + "step": 4321500 + }, + { + "epoch": 48.0, + "learning_rate": 5.005774826199942e-09, + "loss": 3.7427, + "step": 4322000 + }, + { + "epoch": 48.0, + "eval_loss": 3.824249267578125, + "eval_runtime": 6.2995, + "eval_samples_per_second": 246.684, + "step": 4322208 + }, + { + "epoch": 48.0, + "learning_rate": 4.99189303245008e-09, + "loss": 3.7413, + "step": 4322500 + }, + { + "epoch": 48.01, + "learning_rate": 4.978011238700219e-09, + "loss": 3.761, + "step": 4323000 + }, + { + "epoch": 48.01, + "learning_rate": 4.964129444950359e-09, + "loss": 3.7313, + "step": 4323500 + }, + { + "epoch": 48.02, + "learning_rate": 4.950247651200497e-09, + "loss": 3.7562, + "step": 4324000 + }, + { + "epoch": 48.03, + "learning_rate": 4.936365857450636e-09, + "loss": 3.7567, + "step": 4324500 + }, + { + "epoch": 48.03, + "learning_rate": 4.922484063700775e-09, + "loss": 3.7307, + "step": 4325000 + }, + { + "epoch": 48.04, + "learning_rate": 4.908602269950913e-09, + "loss": 3.7446, + "step": 4325500 + }, + { + "epoch": 48.04, + "learning_rate": 4.894720476201052e-09, + "loss": 3.7502, + "step": 4326000 + }, + { + "epoch": 48.05, + "learning_rate": 4.880838682451192e-09, + "loss": 3.7549, + "step": 4326500 + }, + { + "epoch": 48.05, + "learning_rate": 4.86695688870133e-09, + "loss": 3.7369, + "step": 4327000 + }, + { + "epoch": 48.06, + "learning_rate": 4.853075094951469e-09, + "loss": 3.7435, + "step": 4327500 + }, + { + "epoch": 48.06, + "learning_rate": 4.839193301201608e-09, + "loss": 3.7563, + "step": 4328000 + }, + { + "epoch": 48.07, + "learning_rate": 4.825311507451747e-09, + "loss": 3.7402, + "step": 4328500 + }, + { + "epoch": 48.08, + "learning_rate": 4.811429713701885e-09, + "loss": 3.736, + "step": 4329000 + }, + { + "epoch": 48.08, + "learning_rate": 4.797547919952024e-09, + "loss": 3.734, + "step": 4329500 + }, + { + "epoch": 48.09, + "learning_rate": 4.783666126202164e-09, + "loss": 3.7503, + "step": 4330000 + }, + { + "epoch": 48.09, + "learning_rate": 4.769784332452302e-09, + "loss": 3.762, + "step": 4330500 + }, + { + "epoch": 48.1, + "learning_rate": 4.755902538702441e-09, + "loss": 3.76, + "step": 4331000 + }, + { + "epoch": 48.1, + "learning_rate": 4.74202074495258e-09, + "loss": 3.7506, + "step": 4331500 + }, + { + "epoch": 48.11, + "learning_rate": 4.728138951202718e-09, + "loss": 3.7372, + "step": 4332000 + }, + { + "epoch": 48.11, + "learning_rate": 4.714257157452857e-09, + "loss": 3.7512, + "step": 4332500 + }, + { + "epoch": 48.12, + "learning_rate": 4.700375363702997e-09, + "loss": 3.7584, + "step": 4333000 + }, + { + "epoch": 48.13, + "learning_rate": 4.686493569953135e-09, + "loss": 3.7337, + "step": 4333500 + }, + { + "epoch": 48.13, + "learning_rate": 4.672611776203274e-09, + "loss": 3.7273, + "step": 4334000 + }, + { + "epoch": 48.14, + "learning_rate": 4.658729982453413e-09, + "loss": 3.7547, + "step": 4334500 + }, + { + "epoch": 48.14, + "learning_rate": 4.644848188703551e-09, + "loss": 3.7307, + "step": 4335000 + }, + { + "epoch": 48.15, + "learning_rate": 4.63096639495369e-09, + "loss": 3.7276, + "step": 4335500 + }, + { + "epoch": 48.15, + "learning_rate": 4.617084601203829e-09, + "loss": 3.7379, + "step": 4336000 + }, + { + "epoch": 48.16, + "learning_rate": 4.603202807453968e-09, + "loss": 3.7674, + "step": 4336500 + }, + { + "epoch": 48.16, + "learning_rate": 4.589321013704107e-09, + "loss": 3.76, + "step": 4337000 + }, + { + "epoch": 48.17, + "learning_rate": 4.575439219954246e-09, + "loss": 3.7587, + "step": 4337500 + }, + { + "epoch": 48.18, + "learning_rate": 4.561557426204384e-09, + "loss": 3.7482, + "step": 4338000 + }, + { + "epoch": 48.18, + "learning_rate": 4.547675632454523e-09, + "loss": 3.7387, + "step": 4338500 + }, + { + "epoch": 48.19, + "learning_rate": 4.533793838704662e-09, + "loss": 3.7455, + "step": 4339000 + }, + { + "epoch": 48.19, + "learning_rate": 4.519912044954801e-09, + "loss": 3.7585, + "step": 4339500 + }, + { + "epoch": 48.2, + "learning_rate": 4.50603025120494e-09, + "loss": 3.7477, + "step": 4340000 + }, + { + "epoch": 48.2, + "learning_rate": 4.4921484574550786e-09, + "loss": 3.7481, + "step": 4340500 + }, + { + "epoch": 48.21, + "learning_rate": 4.4782666637052175e-09, + "loss": 3.7569, + "step": 4341000 + }, + { + "epoch": 48.21, + "learning_rate": 4.464384869955356e-09, + "loss": 3.7658, + "step": 4341500 + }, + { + "epoch": 48.22, + "learning_rate": 4.450503076205495e-09, + "loss": 3.7601, + "step": 4342000 + }, + { + "epoch": 48.23, + "learning_rate": 4.4366212824556336e-09, + "loss": 3.7424, + "step": 4342500 + }, + { + "epoch": 48.23, + "learning_rate": 4.4227394887057725e-09, + "loss": 3.7402, + "step": 4343000 + }, + { + "epoch": 48.24, + "learning_rate": 4.4088576949559115e-09, + "loss": 3.7434, + "step": 4343500 + }, + { + "epoch": 48.24, + "learning_rate": 4.3949759012060504e-09, + "loss": 3.7456, + "step": 4344000 + }, + { + "epoch": 48.25, + "learning_rate": 4.3810941074561885e-09, + "loss": 3.7526, + "step": 4344500 + }, + { + "epoch": 48.25, + "learning_rate": 4.3672123137063275e-09, + "loss": 3.7507, + "step": 4345000 + }, + { + "epoch": 48.26, + "learning_rate": 4.3533305199564665e-09, + "loss": 3.7498, + "step": 4345500 + }, + { + "epoch": 48.26, + "learning_rate": 4.3394487262066046e-09, + "loss": 3.7383, + "step": 4346000 + }, + { + "epoch": 48.27, + "learning_rate": 4.325566932456744e-09, + "loss": 3.774, + "step": 4346500 + }, + { + "epoch": 48.28, + "learning_rate": 4.311685138706883e-09, + "loss": 3.7397, + "step": 4347000 + }, + { + "epoch": 48.28, + "learning_rate": 4.2978033449570215e-09, + "loss": 3.7565, + "step": 4347500 + }, + { + "epoch": 48.29, + "learning_rate": 4.28392155120716e-09, + "loss": 3.7412, + "step": 4348000 + }, + { + "epoch": 48.29, + "learning_rate": 4.270039757457299e-09, + "loss": 3.7558, + "step": 4348500 + }, + { + "epoch": 48.3, + "learning_rate": 4.256157963707438e-09, + "loss": 3.739, + "step": 4349000 + }, + { + "epoch": 48.3, + "learning_rate": 4.242276169957577e-09, + "loss": 3.7347, + "step": 4349500 + }, + { + "epoch": 48.31, + "learning_rate": 4.228394376207716e-09, + "loss": 3.7446, + "step": 4350000 + }, + { + "epoch": 48.31, + "learning_rate": 4.214512582457855e-09, + "loss": 3.7396, + "step": 4350500 + }, + { + "epoch": 48.32, + "learning_rate": 4.200630788707993e-09, + "loss": 3.7384, + "step": 4351000 + }, + { + "epoch": 48.33, + "learning_rate": 4.186748994958132e-09, + "loss": 3.7331, + "step": 4351500 + }, + { + "epoch": 48.33, + "learning_rate": 4.172867201208271e-09, + "loss": 3.7452, + "step": 4352000 + }, + { + "epoch": 48.34, + "learning_rate": 4.158985407458409e-09, + "loss": 3.7511, + "step": 4352500 + }, + { + "epoch": 48.34, + "learning_rate": 4.145103613708549e-09, + "loss": 3.7357, + "step": 4353000 + }, + { + "epoch": 48.35, + "learning_rate": 4.131221819958688e-09, + "loss": 3.7459, + "step": 4353500 + }, + { + "epoch": 48.35, + "learning_rate": 4.117340026208826e-09, + "loss": 3.7424, + "step": 4354000 + }, + { + "epoch": 48.36, + "learning_rate": 4.103458232458965e-09, + "loss": 3.7428, + "step": 4354500 + }, + { + "epoch": 48.36, + "learning_rate": 4.089576438709104e-09, + "loss": 3.7326, + "step": 4355000 + }, + { + "epoch": 48.37, + "learning_rate": 4.075694644959242e-09, + "loss": 3.7446, + "step": 4355500 + }, + { + "epoch": 48.38, + "learning_rate": 4.061812851209382e-09, + "loss": 3.7469, + "step": 4356000 + }, + { + "epoch": 48.38, + "learning_rate": 4.047931057459521e-09, + "loss": 3.7358, + "step": 4356500 + }, + { + "epoch": 48.39, + "learning_rate": 4.034049263709659e-09, + "loss": 3.724, + "step": 4357000 + }, + { + "epoch": 48.39, + "learning_rate": 4.020167469959798e-09, + "loss": 3.7405, + "step": 4357500 + }, + { + "epoch": 48.4, + "learning_rate": 4.006285676209937e-09, + "loss": 3.7326, + "step": 4358000 + }, + { + "epoch": 48.4, + "learning_rate": 3.992403882460076e-09, + "loss": 3.76, + "step": 4358500 + }, + { + "epoch": 48.41, + "learning_rate": 3.978522088710214e-09, + "loss": 3.7301, + "step": 4359000 + }, + { + "epoch": 48.41, + "learning_rate": 3.964640294960354e-09, + "loss": 3.774, + "step": 4359500 + }, + { + "epoch": 48.42, + "learning_rate": 3.950758501210492e-09, + "loss": 3.7366, + "step": 4360000 + }, + { + "epoch": 48.43, + "learning_rate": 3.936876707460631e-09, + "loss": 3.7594, + "step": 4360500 + }, + { + "epoch": 48.43, + "learning_rate": 3.92299491371077e-09, + "loss": 3.7262, + "step": 4361000 + }, + { + "epoch": 48.44, + "learning_rate": 3.909113119960909e-09, + "loss": 3.7613, + "step": 4361500 + }, + { + "epoch": 48.44, + "learning_rate": 3.895231326211047e-09, + "loss": 3.7586, + "step": 4362000 + }, + { + "epoch": 48.45, + "learning_rate": 3.881349532461186e-09, + "loss": 3.7425, + "step": 4362500 + }, + { + "epoch": 48.45, + "learning_rate": 3.867467738711326e-09, + "loss": 3.7375, + "step": 4363000 + }, + { + "epoch": 48.46, + "learning_rate": 3.853585944961464e-09, + "loss": 3.7531, + "step": 4363500 + }, + { + "epoch": 48.46, + "learning_rate": 3.839704151211603e-09, + "loss": 3.7472, + "step": 4364000 + }, + { + "epoch": 48.47, + "learning_rate": 3.825822357461742e-09, + "loss": 3.736, + "step": 4364500 + }, + { + "epoch": 48.48, + "learning_rate": 3.81194056371188e-09, + "loss": 3.7336, + "step": 4365000 + }, + { + "epoch": 48.48, + "learning_rate": 3.798058769962019e-09, + "loss": 3.7498, + "step": 4365500 + }, + { + "epoch": 48.49, + "learning_rate": 3.784176976212159e-09, + "loss": 3.7462, + "step": 4366000 + }, + { + "epoch": 48.49, + "learning_rate": 3.770295182462297e-09, + "loss": 3.7505, + "step": 4366500 + }, + { + "epoch": 48.5, + "learning_rate": 3.756413388712436e-09, + "loss": 3.7319, + "step": 4367000 + }, + { + "epoch": 48.5, + "learning_rate": 3.742531594962575e-09, + "loss": 3.7197, + "step": 4367500 + }, + { + "epoch": 48.51, + "learning_rate": 3.728649801212713e-09, + "loss": 3.7403, + "step": 4368000 + }, + { + "epoch": 48.51, + "learning_rate": 3.7147680074628518e-09, + "loss": 3.7683, + "step": 4368500 + }, + { + "epoch": 48.52, + "learning_rate": 3.7008862137129907e-09, + "loss": 3.7513, + "step": 4369000 + }, + { + "epoch": 48.53, + "learning_rate": 3.68700441996313e-09, + "loss": 3.7484, + "step": 4369500 + }, + { + "epoch": 48.53, + "learning_rate": 3.6731226262132687e-09, + "loss": 3.7541, + "step": 4370000 + }, + { + "epoch": 48.54, + "learning_rate": 3.6592408324634076e-09, + "loss": 3.7547, + "step": 4370500 + }, + { + "epoch": 48.54, + "learning_rate": 3.645359038713546e-09, + "loss": 3.7576, + "step": 4371000 + }, + { + "epoch": 48.55, + "learning_rate": 3.631477244963685e-09, + "loss": 3.7592, + "step": 4371500 + }, + { + "epoch": 48.55, + "learning_rate": 3.6175954512138236e-09, + "loss": 3.7575, + "step": 4372000 + }, + { + "epoch": 48.56, + "learning_rate": 3.603713657463963e-09, + "loss": 3.7601, + "step": 4372500 + }, + { + "epoch": 48.56, + "learning_rate": 3.5898318637141016e-09, + "loss": 3.7476, + "step": 4373000 + }, + { + "epoch": 48.57, + "learning_rate": 3.5759500699642405e-09, + "loss": 3.7387, + "step": 4373500 + }, + { + "epoch": 48.58, + "learning_rate": 3.562068276214379e-09, + "loss": 3.7594, + "step": 4374000 + }, + { + "epoch": 48.58, + "learning_rate": 3.548186482464518e-09, + "loss": 3.7584, + "step": 4374500 + }, + { + "epoch": 48.59, + "learning_rate": 3.5343046887146565e-09, + "loss": 3.7279, + "step": 4375000 + }, + { + "epoch": 48.59, + "learning_rate": 3.5204228949647955e-09, + "loss": 3.7369, + "step": 4375500 + }, + { + "epoch": 48.6, + "learning_rate": 3.506541101214935e-09, + "loss": 3.7355, + "step": 4376000 + }, + { + "epoch": 48.6, + "learning_rate": 3.4926593074650734e-09, + "loss": 3.7466, + "step": 4376500 + }, + { + "epoch": 48.61, + "learning_rate": 3.478777513715212e-09, + "loss": 3.7471, + "step": 4377000 + }, + { + "epoch": 48.61, + "learning_rate": 3.464895719965351e-09, + "loss": 3.7259, + "step": 4377500 + }, + { + "epoch": 48.62, + "learning_rate": 3.4510139262154895e-09, + "loss": 3.737, + "step": 4378000 + }, + { + "epoch": 48.63, + "learning_rate": 3.4371321324656284e-09, + "loss": 3.74, + "step": 4378500 + }, + { + "epoch": 48.63, + "learning_rate": 3.4232503387157678e-09, + "loss": 3.7592, + "step": 4379000 + }, + { + "epoch": 48.64, + "learning_rate": 3.4093685449659063e-09, + "loss": 3.7585, + "step": 4379500 + }, + { + "epoch": 48.64, + "learning_rate": 3.3954867512160453e-09, + "loss": 3.7596, + "step": 4380000 + }, + { + "epoch": 48.65, + "learning_rate": 3.381604957466184e-09, + "loss": 3.7442, + "step": 4380500 + }, + { + "epoch": 48.65, + "learning_rate": 3.3677231637163224e-09, + "loss": 3.7309, + "step": 4381000 + }, + { + "epoch": 48.66, + "learning_rate": 3.3538413699664613e-09, + "loss": 3.751, + "step": 4381500 + }, + { + "epoch": 48.66, + "learning_rate": 3.3399595762166e-09, + "loss": 3.7478, + "step": 4382000 + }, + { + "epoch": 48.67, + "learning_rate": 3.3260777824667392e-09, + "loss": 3.7532, + "step": 4382500 + }, + { + "epoch": 48.68, + "learning_rate": 3.312195988716878e-09, + "loss": 3.7336, + "step": 4383000 + }, + { + "epoch": 48.68, + "learning_rate": 3.2983141949670167e-09, + "loss": 3.7752, + "step": 4383500 + }, + { + "epoch": 48.69, + "learning_rate": 3.2844324012171557e-09, + "loss": 3.7333, + "step": 4384000 + }, + { + "epoch": 48.69, + "learning_rate": 3.2705506074672942e-09, + "loss": 3.7346, + "step": 4384500 + }, + { + "epoch": 48.7, + "learning_rate": 3.2566688137174328e-09, + "loss": 3.7482, + "step": 4385000 + }, + { + "epoch": 48.7, + "learning_rate": 3.2427870199675717e-09, + "loss": 3.7437, + "step": 4385500 + }, + { + "epoch": 48.71, + "learning_rate": 3.228905226217711e-09, + "loss": 3.7538, + "step": 4386000 + }, + { + "epoch": 48.71, + "learning_rate": 3.2150234324678496e-09, + "loss": 3.7316, + "step": 4386500 + }, + { + "epoch": 48.72, + "learning_rate": 3.2011416387179886e-09, + "loss": 3.7489, + "step": 4387000 + }, + { + "epoch": 48.73, + "learning_rate": 3.187259844968127e-09, + "loss": 3.7448, + "step": 4387500 + }, + { + "epoch": 48.73, + "learning_rate": 3.173378051218266e-09, + "loss": 3.7478, + "step": 4388000 + }, + { + "epoch": 48.74, + "learning_rate": 3.1594962574684046e-09, + "loss": 3.7706, + "step": 4388500 + }, + { + "epoch": 48.74, + "learning_rate": 3.145614463718544e-09, + "loss": 3.7455, + "step": 4389000 + }, + { + "epoch": 48.75, + "learning_rate": 3.131732669968683e-09, + "loss": 3.7359, + "step": 4389500 + }, + { + "epoch": 48.75, + "learning_rate": 3.1178508762188215e-09, + "loss": 3.7488, + "step": 4390000 + }, + { + "epoch": 48.76, + "learning_rate": 3.10396908246896e-09, + "loss": 3.7531, + "step": 4390500 + }, + { + "epoch": 48.76, + "learning_rate": 3.090087288719099e-09, + "loss": 3.7573, + "step": 4391000 + }, + { + "epoch": 48.77, + "learning_rate": 3.076205494969238e-09, + "loss": 3.7429, + "step": 4391500 + }, + { + "epoch": 48.78, + "learning_rate": 3.0623237012193765e-09, + "loss": 3.7476, + "step": 4392000 + }, + { + "epoch": 48.78, + "learning_rate": 3.0484419074695154e-09, + "loss": 3.7535, + "step": 4392500 + }, + { + "epoch": 48.79, + "learning_rate": 3.0345601137196544e-09, + "loss": 3.7491, + "step": 4393000 + }, + { + "epoch": 48.79, + "learning_rate": 3.020678319969793e-09, + "loss": 3.7546, + "step": 4393500 + }, + { + "epoch": 48.8, + "learning_rate": 3.006796526219932e-09, + "loss": 3.7614, + "step": 4394000 + }, + { + "epoch": 48.8, + "learning_rate": 2.9929147324700704e-09, + "loss": 3.743, + "step": 4394500 + }, + { + "epoch": 48.81, + "learning_rate": 2.97903293872021e-09, + "loss": 3.7431, + "step": 4395000 + }, + { + "epoch": 48.81, + "learning_rate": 2.9651511449703483e-09, + "loss": 3.7512, + "step": 4395500 + }, + { + "epoch": 48.82, + "learning_rate": 2.951269351220487e-09, + "loss": 3.7339, + "step": 4396000 + }, + { + "epoch": 48.83, + "learning_rate": 2.9373875574706263e-09, + "loss": 3.7485, + "step": 4396500 + }, + { + "epoch": 48.83, + "learning_rate": 2.923505763720765e-09, + "loss": 3.7315, + "step": 4397000 + }, + { + "epoch": 48.84, + "learning_rate": 2.9096239699709033e-09, + "loss": 3.7569, + "step": 4397500 + }, + { + "epoch": 48.84, + "learning_rate": 2.8957421762210427e-09, + "loss": 3.7529, + "step": 4398000 + }, + { + "epoch": 48.85, + "learning_rate": 2.8818603824711812e-09, + "loss": 3.7447, + "step": 4398500 + }, + { + "epoch": 48.85, + "learning_rate": 2.86797858872132e-09, + "loss": 3.7557, + "step": 4399000 + }, + { + "epoch": 48.86, + "learning_rate": 2.8540967949714587e-09, + "loss": 3.7354, + "step": 4399500 + }, + { + "epoch": 48.86, + "learning_rate": 2.8402150012215977e-09, + "loss": 3.7433, + "step": 4400000 + }, + { + "epoch": 48.87, + "learning_rate": 2.8263332074717367e-09, + "loss": 3.745, + "step": 4400500 + }, + { + "epoch": 48.88, + "learning_rate": 2.812451413721875e-09, + "loss": 3.7475, + "step": 4401000 + }, + { + "epoch": 48.88, + "learning_rate": 2.798569619972014e-09, + "loss": 3.7395, + "step": 4401500 + }, + { + "epoch": 48.89, + "learning_rate": 2.784687826222153e-09, + "loss": 3.746, + "step": 4402000 + }, + { + "epoch": 48.89, + "learning_rate": 2.7708060324722916e-09, + "loss": 3.7468, + "step": 4402500 + }, + { + "epoch": 48.9, + "learning_rate": 2.7569242387224306e-09, + "loss": 3.7423, + "step": 4403000 + }, + { + "epoch": 48.9, + "learning_rate": 2.7430424449725696e-09, + "loss": 3.7408, + "step": 4403500 + }, + { + "epoch": 48.91, + "learning_rate": 2.729160651222708e-09, + "loss": 3.7488, + "step": 4404000 + }, + { + "epoch": 48.91, + "learning_rate": 2.715278857472847e-09, + "loss": 3.7566, + "step": 4404500 + }, + { + "epoch": 48.92, + "learning_rate": 2.701397063722986e-09, + "loss": 3.7398, + "step": 4405000 + }, + { + "epoch": 48.92, + "learning_rate": 2.6875152699731246e-09, + "loss": 3.7627, + "step": 4405500 + }, + { + "epoch": 48.93, + "learning_rate": 2.6736334762232635e-09, + "loss": 3.7559, + "step": 4406000 + }, + { + "epoch": 48.94, + "learning_rate": 2.6597516824734025e-09, + "loss": 3.757, + "step": 4406500 + }, + { + "epoch": 48.94, + "learning_rate": 2.645869888723541e-09, + "loss": 3.7432, + "step": 4407000 + }, + { + "epoch": 48.95, + "learning_rate": 2.63198809497368e-09, + "loss": 3.747, + "step": 4407500 + }, + { + "epoch": 48.95, + "learning_rate": 2.618106301223819e-09, + "loss": 3.741, + "step": 4408000 + }, + { + "epoch": 48.96, + "learning_rate": 2.6042245074739575e-09, + "loss": 3.766, + "step": 4408500 + }, + { + "epoch": 48.96, + "learning_rate": 2.5903427137240964e-09, + "loss": 3.7385, + "step": 4409000 + }, + { + "epoch": 48.97, + "learning_rate": 2.5764609199742354e-09, + "loss": 3.7701, + "step": 4409500 + }, + { + "epoch": 48.97, + "learning_rate": 2.5625791262243743e-09, + "loss": 3.7594, + "step": 4410000 + }, + { + "epoch": 48.98, + "learning_rate": 2.548697332474513e-09, + "loss": 3.7521, + "step": 4410500 + }, + { + "epoch": 48.99, + "learning_rate": 2.5348155387246514e-09, + "loss": 3.7384, + "step": 4411000 + }, + { + "epoch": 48.99, + "learning_rate": 2.5209337449747908e-09, + "loss": 3.7661, + "step": 4411500 + }, + { + "epoch": 49.0, + "learning_rate": 2.5070519512249293e-09, + "loss": 3.7427, + "step": 4412000 + }, + { + "epoch": 49.0, + "eval_loss": 3.824198007583618, + "eval_runtime": 6.3065, + "eval_samples_per_second": 246.412, + "step": 4412254 + }, + { + "epoch": 49.0, + "learning_rate": 2.493170157475068e-09, + "loss": 3.7514, + "step": 4412500 + }, + { + "epoch": 49.01, + "learning_rate": 2.4792883637252072e-09, + "loss": 3.7573, + "step": 4413000 + }, + { + "epoch": 49.01, + "learning_rate": 2.4654065699753458e-09, + "loss": 3.7368, + "step": 4413500 + }, + { + "epoch": 49.02, + "learning_rate": 2.4515247762254847e-09, + "loss": 3.7344, + "step": 4414000 + }, + { + "epoch": 49.02, + "learning_rate": 2.4376429824756237e-09, + "loss": 3.7523, + "step": 4414500 + }, + { + "epoch": 49.03, + "learning_rate": 2.4237611887257622e-09, + "loss": 3.7547, + "step": 4415000 + }, + { + "epoch": 49.04, + "learning_rate": 2.409879394975901e-09, + "loss": 3.7464, + "step": 4415500 + }, + { + "epoch": 49.04, + "learning_rate": 2.39599760122604e-09, + "loss": 3.7586, + "step": 4416000 + }, + { + "epoch": 49.05, + "learning_rate": 2.3821158074761787e-09, + "loss": 3.7505, + "step": 4416500 + }, + { + "epoch": 49.05, + "learning_rate": 2.3682340137263176e-09, + "loss": 3.7578, + "step": 4417000 + }, + { + "epoch": 49.06, + "learning_rate": 2.354352219976456e-09, + "loss": 3.7678, + "step": 4417500 + }, + { + "epoch": 49.06, + "learning_rate": 2.340470426226595e-09, + "loss": 3.7323, + "step": 4418000 + }, + { + "epoch": 49.07, + "learning_rate": 2.326588632476734e-09, + "loss": 3.7412, + "step": 4418500 + }, + { + "epoch": 49.07, + "learning_rate": 2.3127068387268726e-09, + "loss": 3.7396, + "step": 4419000 + }, + { + "epoch": 49.08, + "learning_rate": 2.2988250449770116e-09, + "loss": 3.7359, + "step": 4419500 + }, + { + "epoch": 49.09, + "learning_rate": 2.2849432512271505e-09, + "loss": 3.7364, + "step": 4420000 + }, + { + "epoch": 49.09, + "learning_rate": 2.271061457477289e-09, + "loss": 3.7276, + "step": 4420500 + }, + { + "epoch": 49.1, + "learning_rate": 2.2571796637274284e-09, + "loss": 3.7448, + "step": 4421000 + }, + { + "epoch": 49.1, + "learning_rate": 2.243297869977567e-09, + "loss": 3.7429, + "step": 4421500 + }, + { + "epoch": 49.11, + "learning_rate": 2.2294160762277055e-09, + "loss": 3.7547, + "step": 4422000 + }, + { + "epoch": 49.11, + "learning_rate": 2.2155342824778445e-09, + "loss": 3.7279, + "step": 4422500 + }, + { + "epoch": 49.12, + "learning_rate": 2.2016524887279834e-09, + "loss": 3.735, + "step": 4423000 + }, + { + "epoch": 49.12, + "learning_rate": 2.187770694978122e-09, + "loss": 3.7509, + "step": 4423500 + }, + { + "epoch": 49.13, + "learning_rate": 2.173888901228261e-09, + "loss": 3.7432, + "step": 4424000 + }, + { + "epoch": 49.14, + "learning_rate": 2.1600071074784e-09, + "loss": 3.757, + "step": 4424500 + }, + { + "epoch": 49.14, + "learning_rate": 2.146125313728539e-09, + "loss": 3.7542, + "step": 4425000 + }, + { + "epoch": 49.15, + "learning_rate": 2.1322435199786774e-09, + "loss": 3.7613, + "step": 4425500 + }, + { + "epoch": 49.15, + "learning_rate": 2.1183617262288163e-09, + "loss": 3.7442, + "step": 4426000 + }, + { + "epoch": 49.16, + "learning_rate": 2.1044799324789553e-09, + "loss": 3.7572, + "step": 4426500 + }, + { + "epoch": 49.16, + "learning_rate": 2.090598138729094e-09, + "loss": 3.7369, + "step": 4427000 + }, + { + "epoch": 49.17, + "learning_rate": 2.076716344979233e-09, + "loss": 3.7495, + "step": 4427500 + }, + { + "epoch": 49.17, + "learning_rate": 2.0628345512293718e-09, + "loss": 3.7577, + "step": 4428000 + }, + { + "epoch": 49.18, + "learning_rate": 2.0489527574795103e-09, + "loss": 3.7458, + "step": 4428500 + }, + { + "epoch": 49.19, + "learning_rate": 2.0350709637296492e-09, + "loss": 3.7673, + "step": 4429000 + }, + { + "epoch": 49.19, + "learning_rate": 2.021189169979788e-09, + "loss": 3.7531, + "step": 4429500 + }, + { + "epoch": 49.2, + "learning_rate": 2.0073073762299267e-09, + "loss": 3.7386, + "step": 4430000 + }, + { + "epoch": 49.2, + "learning_rate": 1.9934255824800657e-09, + "loss": 3.7456, + "step": 4430500 + }, + { + "epoch": 49.21, + "learning_rate": 1.9795437887302047e-09, + "loss": 3.7621, + "step": 4431000 + }, + { + "epoch": 49.21, + "learning_rate": 1.965661994980343e-09, + "loss": 3.7583, + "step": 4431500 + }, + { + "epoch": 49.22, + "learning_rate": 1.951780201230482e-09, + "loss": 3.7574, + "step": 4432000 + }, + { + "epoch": 49.22, + "learning_rate": 1.937898407480621e-09, + "loss": 3.7487, + "step": 4432500 + }, + { + "epoch": 49.23, + "learning_rate": 1.9240166137307597e-09, + "loss": 3.7506, + "step": 4433000 + }, + { + "epoch": 49.24, + "learning_rate": 1.9101348199808986e-09, + "loss": 3.7421, + "step": 4433500 + }, + { + "epoch": 49.24, + "learning_rate": 1.896253026231037e-09, + "loss": 3.722, + "step": 4434000 + }, + { + "epoch": 49.25, + "learning_rate": 1.882371232481176e-09, + "loss": 3.757, + "step": 4434500 + }, + { + "epoch": 49.25, + "learning_rate": 1.868489438731315e-09, + "loss": 3.7406, + "step": 4435000 + }, + { + "epoch": 49.26, + "learning_rate": 1.8546076449814538e-09, + "loss": 3.7292, + "step": 4435500 + }, + { + "epoch": 49.26, + "learning_rate": 1.8407258512315928e-09, + "loss": 3.766, + "step": 4436000 + }, + { + "epoch": 49.27, + "learning_rate": 1.8268440574817315e-09, + "loss": 3.7409, + "step": 4436500 + }, + { + "epoch": 49.27, + "learning_rate": 1.8129622637318703e-09, + "loss": 3.7467, + "step": 4437000 + }, + { + "epoch": 49.28, + "learning_rate": 1.7990804699820092e-09, + "loss": 3.7699, + "step": 4437500 + }, + { + "epoch": 49.29, + "learning_rate": 1.785198676232148e-09, + "loss": 3.7499, + "step": 4438000 + }, + { + "epoch": 49.29, + "learning_rate": 1.7713168824822867e-09, + "loss": 3.7527, + "step": 4438500 + }, + { + "epoch": 49.3, + "learning_rate": 1.7574350887324257e-09, + "loss": 3.7476, + "step": 4439000 + }, + { + "epoch": 49.3, + "learning_rate": 1.7435532949825644e-09, + "loss": 3.731, + "step": 4439500 + }, + { + "epoch": 49.31, + "learning_rate": 1.7296715012327032e-09, + "loss": 3.7352, + "step": 4440000 + }, + { + "epoch": 49.31, + "learning_rate": 1.715789707482842e-09, + "loss": 3.7597, + "step": 4440500 + }, + { + "epoch": 49.32, + "learning_rate": 1.7019079137329809e-09, + "loss": 3.7482, + "step": 4441000 + }, + { + "epoch": 49.32, + "learning_rate": 1.6880261199831196e-09, + "loss": 3.7368, + "step": 4441500 + }, + { + "epoch": 49.33, + "learning_rate": 1.6741443262332584e-09, + "loss": 3.7453, + "step": 4442000 + }, + { + "epoch": 49.34, + "learning_rate": 1.6602625324833975e-09, + "loss": 3.7434, + "step": 4442500 + }, + { + "epoch": 49.34, + "learning_rate": 1.646380738733536e-09, + "loss": 3.7398, + "step": 4443000 + }, + { + "epoch": 49.35, + "learning_rate": 1.6324989449836748e-09, + "loss": 3.7241, + "step": 4443500 + }, + { + "epoch": 49.35, + "learning_rate": 1.618617151233814e-09, + "loss": 3.7415, + "step": 4444000 + }, + { + "epoch": 49.36, + "learning_rate": 1.6047353574839527e-09, + "loss": 3.7426, + "step": 4444500 + }, + { + "epoch": 49.36, + "learning_rate": 1.5908535637340913e-09, + "loss": 3.7419, + "step": 4445000 + }, + { + "epoch": 49.37, + "learning_rate": 1.57697176998423e-09, + "loss": 3.7611, + "step": 4445500 + }, + { + "epoch": 49.37, + "learning_rate": 1.5630899762343692e-09, + "loss": 3.7504, + "step": 4446000 + }, + { + "epoch": 49.38, + "learning_rate": 1.5492081824845077e-09, + "loss": 3.7541, + "step": 4446500 + }, + { + "epoch": 49.39, + "learning_rate": 1.5353263887346467e-09, + "loss": 3.7546, + "step": 4447000 + }, + { + "epoch": 49.39, + "learning_rate": 1.5214445949847854e-09, + "loss": 3.7475, + "step": 4447500 + }, + { + "epoch": 49.4, + "learning_rate": 1.5075628012349244e-09, + "loss": 3.7358, + "step": 4448000 + }, + { + "epoch": 49.4, + "learning_rate": 1.4936810074850631e-09, + "loss": 3.7506, + "step": 4448500 + }, + { + "epoch": 49.41, + "learning_rate": 1.4797992137352019e-09, + "loss": 3.7492, + "step": 4449000 + }, + { + "epoch": 49.41, + "learning_rate": 1.4659174199853408e-09, + "loss": 3.7515, + "step": 4449500 + }, + { + "epoch": 49.42, + "learning_rate": 1.4520356262354796e-09, + "loss": 3.736, + "step": 4450000 + }, + { + "epoch": 49.42, + "learning_rate": 1.4381538324856183e-09, + "loss": 3.7339, + "step": 4450500 + }, + { + "epoch": 49.43, + "learning_rate": 1.4242720387357573e-09, + "loss": 3.7638, + "step": 4451000 + }, + { + "epoch": 49.44, + "learning_rate": 1.410390244985896e-09, + "loss": 3.75, + "step": 4451500 + }, + { + "epoch": 49.44, + "learning_rate": 1.3965084512360348e-09, + "loss": 3.7474, + "step": 4452000 + }, + { + "epoch": 49.45, + "learning_rate": 1.3826266574861735e-09, + "loss": 3.7398, + "step": 4452500 + }, + { + "epoch": 49.45, + "learning_rate": 1.3687448637363125e-09, + "loss": 3.7462, + "step": 4453000 + }, + { + "epoch": 49.46, + "learning_rate": 1.3548630699864514e-09, + "loss": 3.7402, + "step": 4453500 + }, + { + "epoch": 49.46, + "learning_rate": 1.34098127623659e-09, + "loss": 3.7444, + "step": 4454000 + }, + { + "epoch": 49.47, + "learning_rate": 1.327099482486729e-09, + "loss": 3.743, + "step": 4454500 + }, + { + "epoch": 49.47, + "learning_rate": 1.3132176887368677e-09, + "loss": 3.7348, + "step": 4455000 + }, + { + "epoch": 49.48, + "learning_rate": 1.2993358949870066e-09, + "loss": 3.7489, + "step": 4455500 + }, + { + "epoch": 49.49, + "learning_rate": 1.2854541012371454e-09, + "loss": 3.7387, + "step": 4456000 + }, + { + "epoch": 49.49, + "learning_rate": 1.2715723074872841e-09, + "loss": 3.7473, + "step": 4456500 + }, + { + "epoch": 49.5, + "learning_rate": 1.257690513737423e-09, + "loss": 3.7576, + "step": 4457000 + }, + { + "epoch": 49.5, + "learning_rate": 1.2438087199875618e-09, + "loss": 3.7461, + "step": 4457500 + }, + { + "epoch": 49.51, + "learning_rate": 1.2299269262377006e-09, + "loss": 3.7505, + "step": 4458000 + }, + { + "epoch": 49.51, + "learning_rate": 1.2160451324878395e-09, + "loss": 3.7514, + "step": 4458500 + }, + { + "epoch": 49.52, + "learning_rate": 1.2021633387379783e-09, + "loss": 3.7436, + "step": 4459000 + }, + { + "epoch": 49.52, + "learning_rate": 1.188281544988117e-09, + "loss": 3.7425, + "step": 4459500 + }, + { + "epoch": 49.53, + "learning_rate": 1.174399751238256e-09, + "loss": 3.7549, + "step": 4460000 + }, + { + "epoch": 49.54, + "learning_rate": 1.1605179574883947e-09, + "loss": 3.7402, + "step": 4460500 + }, + { + "epoch": 49.54, + "learning_rate": 1.1466361637385337e-09, + "loss": 3.7342, + "step": 4461000 + }, + { + "epoch": 49.55, + "learning_rate": 1.1327543699886722e-09, + "loss": 3.7424, + "step": 4461500 + }, + { + "epoch": 49.55, + "learning_rate": 1.1188725762388112e-09, + "loss": 3.7396, + "step": 4462000 + }, + { + "epoch": 49.56, + "learning_rate": 1.1049907824889502e-09, + "loss": 3.7593, + "step": 4462500 + }, + { + "epoch": 49.56, + "learning_rate": 1.091108988739089e-09, + "loss": 3.7552, + "step": 4463000 + }, + { + "epoch": 49.57, + "learning_rate": 1.0772271949892277e-09, + "loss": 3.7461, + "step": 4463500 + }, + { + "epoch": 49.57, + "learning_rate": 1.0633454012393664e-09, + "loss": 3.7367, + "step": 4464000 + }, + { + "epoch": 49.58, + "learning_rate": 1.0494636074895054e-09, + "loss": 3.7456, + "step": 4464500 + }, + { + "epoch": 49.59, + "learning_rate": 1.0355818137396441e-09, + "loss": 3.7521, + "step": 4465000 + }, + { + "epoch": 49.59, + "learning_rate": 1.0217000199897829e-09, + "loss": 3.7379, + "step": 4465500 + }, + { + "epoch": 49.6, + "learning_rate": 1.0078182262399218e-09, + "loss": 3.7427, + "step": 4466000 + }, + { + "epoch": 49.6, + "learning_rate": 9.939364324900606e-10, + "loss": 3.756, + "step": 4466500 + }, + { + "epoch": 49.61, + "learning_rate": 9.800546387401993e-10, + "loss": 3.7639, + "step": 4467000 + }, + { + "epoch": 49.61, + "learning_rate": 9.661728449903383e-10, + "loss": 3.7395, + "step": 4467500 + }, + { + "epoch": 49.62, + "learning_rate": 9.52291051240477e-10, + "loss": 3.7458, + "step": 4468000 + }, + { + "epoch": 49.62, + "learning_rate": 9.38409257490616e-10, + "loss": 3.749, + "step": 4468500 + }, + { + "epoch": 49.63, + "learning_rate": 9.245274637407546e-10, + "loss": 3.7483, + "step": 4469000 + }, + { + "epoch": 49.64, + "learning_rate": 9.106456699908935e-10, + "loss": 3.7516, + "step": 4469500 + }, + { + "epoch": 49.64, + "learning_rate": 8.967638762410324e-10, + "loss": 3.7545, + "step": 4470000 + }, + { + "epoch": 49.65, + "learning_rate": 8.828820824911711e-10, + "loss": 3.7323, + "step": 4470500 + }, + { + "epoch": 49.65, + "learning_rate": 8.6900028874131e-10, + "loss": 3.7421, + "step": 4471000 + }, + { + "epoch": 49.66, + "learning_rate": 8.551184949914489e-10, + "loss": 3.727, + "step": 4471500 + }, + { + "epoch": 49.66, + "learning_rate": 8.412367012415876e-10, + "loss": 3.7473, + "step": 4472000 + }, + { + "epoch": 49.67, + "learning_rate": 8.273549074917265e-10, + "loss": 3.7502, + "step": 4472500 + }, + { + "epoch": 49.67, + "learning_rate": 8.134731137418652e-10, + "loss": 3.7533, + "step": 4473000 + }, + { + "epoch": 49.68, + "learning_rate": 7.995913199920041e-10, + "loss": 3.737, + "step": 4473500 + }, + { + "epoch": 49.69, + "learning_rate": 7.857095262421429e-10, + "loss": 3.7678, + "step": 4474000 + }, + { + "epoch": 49.69, + "learning_rate": 7.718277324922817e-10, + "loss": 3.7366, + "step": 4474500 + }, + { + "epoch": 49.7, + "learning_rate": 7.579459387424205e-10, + "loss": 3.7476, + "step": 4475000 + }, + { + "epoch": 49.7, + "learning_rate": 7.440641449925594e-10, + "loss": 3.7469, + "step": 4475500 + }, + { + "epoch": 49.71, + "learning_rate": 7.301823512426981e-10, + "loss": 3.7447, + "step": 4476000 + }, + { + "epoch": 49.71, + "learning_rate": 7.16300557492837e-10, + "loss": 3.7465, + "step": 4476500 + }, + { + "epoch": 49.72, + "learning_rate": 7.024187637429757e-10, + "loss": 3.7516, + "step": 4477000 + }, + { + "epoch": 49.72, + "learning_rate": 6.885369699931146e-10, + "loss": 3.765, + "step": 4477500 + }, + { + "epoch": 49.73, + "learning_rate": 6.746551762432534e-10, + "loss": 3.7353, + "step": 4478000 + }, + { + "epoch": 49.74, + "learning_rate": 6.607733824933922e-10, + "loss": 3.7609, + "step": 4478500 + }, + { + "epoch": 49.74, + "learning_rate": 6.46891588743531e-10, + "loss": 3.7521, + "step": 4479000 + }, + { + "epoch": 49.75, + "learning_rate": 6.330097949936699e-10, + "loss": 3.7518, + "step": 4479500 + }, + { + "epoch": 49.75, + "learning_rate": 6.191280012438087e-10, + "loss": 3.7477, + "step": 4480000 + }, + { + "epoch": 49.76, + "learning_rate": 6.052462074939475e-10, + "loss": 3.7675, + "step": 4480500 + }, + { + "epoch": 49.76, + "learning_rate": 5.913644137440863e-10, + "loss": 3.7429, + "step": 4481000 + }, + { + "epoch": 49.77, + "learning_rate": 5.774826199942251e-10, + "loss": 3.7585, + "step": 4481500 + }, + { + "epoch": 49.77, + "learning_rate": 5.63600826244364e-10, + "loss": 3.7566, + "step": 4482000 + }, + { + "epoch": 49.78, + "learning_rate": 5.497190324945028e-10, + "loss": 3.7519, + "step": 4482500 + }, + { + "epoch": 49.79, + "learning_rate": 5.358372387446416e-10, + "loss": 3.7361, + "step": 4483000 + }, + { + "epoch": 49.79, + "learning_rate": 5.219554449947804e-10, + "loss": 3.7402, + "step": 4483500 + }, + { + "epoch": 49.8, + "learning_rate": 5.080736512449192e-10, + "loss": 3.7263, + "step": 4484000 + }, + { + "epoch": 49.8, + "learning_rate": 4.941918574950581e-10, + "loss": 3.7414, + "step": 4484500 + }, + { + "epoch": 49.81, + "learning_rate": 4.803100637451968e-10, + "loss": 3.7291, + "step": 4485000 + }, + { + "epoch": 49.81, + "learning_rate": 4.664282699953357e-10, + "loss": 3.7426, + "step": 4485500 + }, + { + "epoch": 49.82, + "learning_rate": 4.525464762454745e-10, + "loss": 3.7538, + "step": 4486000 + }, + { + "epoch": 49.82, + "learning_rate": 4.386646824956134e-10, + "loss": 3.7286, + "step": 4486500 + }, + { + "epoch": 49.83, + "learning_rate": 4.247828887457522e-10, + "loss": 3.7406, + "step": 4487000 + }, + { + "epoch": 49.84, + "learning_rate": 4.10901094995891e-10, + "loss": 3.7444, + "step": 4487500 + }, + { + "epoch": 49.84, + "learning_rate": 3.9701930124602974e-10, + "loss": 3.7448, + "step": 4488000 + }, + { + "epoch": 49.85, + "learning_rate": 3.831375074961686e-10, + "loss": 3.7188, + "step": 4488500 + }, + { + "epoch": 49.85, + "learning_rate": 3.692557137463074e-10, + "loss": 3.7447, + "step": 4489000 + }, + { + "epoch": 49.86, + "learning_rate": 3.5537391999644625e-10, + "loss": 3.7544, + "step": 4489500 + }, + { + "epoch": 49.86, + "learning_rate": 3.4149212624658505e-10, + "loss": 3.7333, + "step": 4490000 + }, + { + "epoch": 49.87, + "learning_rate": 3.276103324967239e-10, + "loss": 3.7406, + "step": 4490500 + }, + { + "epoch": 49.87, + "learning_rate": 3.137285387468627e-10, + "loss": 3.7704, + "step": 4491000 + }, + { + "epoch": 49.88, + "learning_rate": 2.998467449970015e-10, + "loss": 3.7435, + "step": 4491500 + }, + { + "epoch": 49.89, + "learning_rate": 2.8596495124714035e-10, + "loss": 3.7519, + "step": 4492000 + }, + { + "epoch": 49.89, + "learning_rate": 2.7208315749727915e-10, + "loss": 3.7512, + "step": 4492500 + }, + { + "epoch": 49.9, + "learning_rate": 2.58201363747418e-10, + "loss": 3.7613, + "step": 4493000 + }, + { + "epoch": 49.9, + "learning_rate": 2.443195699975568e-10, + "loss": 3.7335, + "step": 4493500 + }, + { + "epoch": 49.91, + "learning_rate": 2.3043777624769563e-10, + "loss": 3.7446, + "step": 4494000 + }, + { + "epoch": 49.91, + "learning_rate": 2.1655598249783443e-10, + "loss": 3.7563, + "step": 4494500 + }, + { + "epoch": 49.92, + "learning_rate": 2.0267418874797323e-10, + "loss": 3.7504, + "step": 4495000 + }, + { + "epoch": 49.92, + "learning_rate": 1.8879239499811206e-10, + "loss": 3.7485, + "step": 4495500 + }, + { + "epoch": 49.93, + "learning_rate": 1.7491060124825088e-10, + "loss": 3.7562, + "step": 4496000 + }, + { + "epoch": 49.94, + "learning_rate": 1.610288074983897e-10, + "loss": 3.7513, + "step": 4496500 + }, + { + "epoch": 49.94, + "learning_rate": 1.471470137485285e-10, + "loss": 3.7387, + "step": 4497000 + }, + { + "epoch": 49.95, + "learning_rate": 1.3326521999866734e-10, + "loss": 3.7599, + "step": 4497500 + }, + { + "epoch": 49.95, + "learning_rate": 1.1938342624880616e-10, + "loss": 3.7616, + "step": 4498000 + }, + { + "epoch": 49.96, + "learning_rate": 1.0550163249894498e-10, + "loss": 3.7502, + "step": 4498500 + }, + { + "epoch": 49.96, + "learning_rate": 9.16198387490838e-11, + "loss": 3.7519, + "step": 4499000 + }, + { + "epoch": 49.97, + "learning_rate": 7.773804499922262e-11, + "loss": 3.74, + "step": 4499500 + }, + { + "epoch": 49.97, + "learning_rate": 6.385625124936143e-11, + "loss": 3.741, + "step": 4500000 + }, + { + "epoch": 49.98, + "learning_rate": 4.9974457499500255e-11, + "loss": 3.7423, + "step": 4500500 + }, + { + "epoch": 49.99, + "learning_rate": 3.609266374963907e-11, + "loss": 3.7529, + "step": 4501000 + }, + { + "epoch": 49.99, + "learning_rate": 2.221086999977789e-11, + "loss": 3.7412, + "step": 4501500 + }, + { + "epoch": 50.0, + "learning_rate": 8.329076249916709e-12, + "loss": 3.7532, + "step": 4502000 + }, + { + "epoch": 50.0, + "eval_loss": 3.824197769165039, + "eval_runtime": 6.3023, + "eval_samples_per_second": 246.578, + "step": 4502300 + } + ], + "max_steps": 4502300, + "num_train_epochs": 50, + "total_flos": 2.1542227070270976e+17, + "trial_name": null, + "trial_params": null +}