diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,110518 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 15784, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "grad_norm": 9.532355308532715, + "learning_rate": 2.109704641350211e-08, + "loss": 1.3252, + "step": 1 + }, + { + "epoch": 0.0, + "grad_norm": 9.528098106384277, + "learning_rate": 4.219409282700422e-08, + "loss": 1.3375, + "step": 2 + }, + { + "epoch": 0.0, + "grad_norm": 9.426416397094727, + "learning_rate": 6.329113924050633e-08, + "loss": 1.3024, + "step": 3 + }, + { + "epoch": 0.0, + "grad_norm": 8.792394638061523, + "learning_rate": 8.438818565400844e-08, + "loss": 1.2546, + "step": 4 + }, + { + "epoch": 0.0, + "grad_norm": 9.00940227508545, + "learning_rate": 1.0548523206751055e-07, + "loss": 1.2509, + "step": 5 + }, + { + "epoch": 0.0, + "grad_norm": 9.64754867553711, + "learning_rate": 1.2658227848101266e-07, + "loss": 1.3443, + "step": 6 + }, + { + "epoch": 0.0, + "grad_norm": 8.994702339172363, + "learning_rate": 1.4767932489451477e-07, + "loss": 1.3474, + "step": 7 + }, + { + "epoch": 0.0, + "grad_norm": 9.668071746826172, + "learning_rate": 1.6877637130801689e-07, + "loss": 1.2868, + "step": 8 + }, + { + "epoch": 0.0, + "grad_norm": 10.72716999053955, + "learning_rate": 1.89873417721519e-07, + "loss": 1.3352, + "step": 9 + }, + { + "epoch": 0.0, + "grad_norm": 9.297769546508789, + "learning_rate": 2.109704641350211e-07, + "loss": 1.3187, + "step": 10 + }, + { + "epoch": 0.0, + "grad_norm": 10.302685737609863, + "learning_rate": 2.3206751054852324e-07, + "loss": 1.2814, + "step": 11 + }, + { + "epoch": 0.0, + "grad_norm": 9.82099437713623, + "learning_rate": 2.5316455696202533e-07, + "loss": 1.3105, + "step": 12 + }, + { + "epoch": 0.0, + "grad_norm": 9.666701316833496, + "learning_rate": 2.7426160337552746e-07, + "loss": 1.3054, + "step": 13 + }, + { + "epoch": 0.0, + "grad_norm": 8.828336715698242, + "learning_rate": 2.9535864978902955e-07, + "loss": 1.2671, + "step": 14 + }, + { + "epoch": 0.0, + "grad_norm": 9.0466890335083, + "learning_rate": 3.164556962025317e-07, + "loss": 1.3082, + "step": 15 + }, + { + "epoch": 0.0, + "grad_norm": 10.455029487609863, + "learning_rate": 3.3755274261603377e-07, + "loss": 1.3541, + "step": 16 + }, + { + "epoch": 0.0, + "grad_norm": 9.330451965332031, + "learning_rate": 3.586497890295359e-07, + "loss": 1.3523, + "step": 17 + }, + { + "epoch": 0.0, + "grad_norm": 9.279951095581055, + "learning_rate": 3.79746835443038e-07, + "loss": 1.3471, + "step": 18 + }, + { + "epoch": 0.0, + "grad_norm": 9.216424942016602, + "learning_rate": 4.0084388185654013e-07, + "loss": 1.2502, + "step": 19 + }, + { + "epoch": 0.0, + "grad_norm": 9.830771446228027, + "learning_rate": 4.219409282700422e-07, + "loss": 1.3262, + "step": 20 + }, + { + "epoch": 0.0, + "grad_norm": 8.596817970275879, + "learning_rate": 4.4303797468354435e-07, + "loss": 1.2861, + "step": 21 + }, + { + "epoch": 0.0, + "grad_norm": 7.879246711730957, + "learning_rate": 4.641350210970465e-07, + "loss": 1.3028, + "step": 22 + }, + { + "epoch": 0.0, + "grad_norm": 7.26005744934082, + "learning_rate": 4.852320675105486e-07, + "loss": 1.1705, + "step": 23 + }, + { + "epoch": 0.0, + "grad_norm": 8.682889938354492, + "learning_rate": 5.063291139240507e-07, + "loss": 1.2799, + "step": 24 + }, + { + "epoch": 0.0, + "grad_norm": 7.49697732925415, + "learning_rate": 5.274261603375528e-07, + "loss": 1.2697, + "step": 25 + }, + { + "epoch": 0.0, + "grad_norm": 8.21834945678711, + "learning_rate": 5.485232067510549e-07, + "loss": 1.3346, + "step": 26 + }, + { + "epoch": 0.0, + "grad_norm": 7.122488021850586, + "learning_rate": 5.69620253164557e-07, + "loss": 1.2485, + "step": 27 + }, + { + "epoch": 0.0, + "grad_norm": 7.122411727905273, + "learning_rate": 5.907172995780591e-07, + "loss": 1.2263, + "step": 28 + }, + { + "epoch": 0.0, + "grad_norm": 7.260400295257568, + "learning_rate": 6.118143459915613e-07, + "loss": 1.2086, + "step": 29 + }, + { + "epoch": 0.0, + "grad_norm": 6.424813270568848, + "learning_rate": 6.329113924050634e-07, + "loss": 1.216, + "step": 30 + }, + { + "epoch": 0.0, + "grad_norm": 6.159492492675781, + "learning_rate": 6.540084388185656e-07, + "loss": 1.1744, + "step": 31 + }, + { + "epoch": 0.0, + "grad_norm": 6.250075817108154, + "learning_rate": 6.751054852320675e-07, + "loss": 1.2486, + "step": 32 + }, + { + "epoch": 0.0, + "grad_norm": 6.240837097167969, + "learning_rate": 6.962025316455696e-07, + "loss": 1.1966, + "step": 33 + }, + { + "epoch": 0.0, + "grad_norm": 6.257637023925781, + "learning_rate": 7.172995780590718e-07, + "loss": 1.2206, + "step": 34 + }, + { + "epoch": 0.0, + "grad_norm": 6.090736389160156, + "learning_rate": 7.383966244725739e-07, + "loss": 1.1555, + "step": 35 + }, + { + "epoch": 0.0, + "grad_norm": 6.092897891998291, + "learning_rate": 7.59493670886076e-07, + "loss": 1.2119, + "step": 36 + }, + { + "epoch": 0.0, + "grad_norm": 5.601813793182373, + "learning_rate": 7.805907172995782e-07, + "loss": 1.2041, + "step": 37 + }, + { + "epoch": 0.0, + "grad_norm": 5.4696221351623535, + "learning_rate": 8.016877637130803e-07, + "loss": 1.0983, + "step": 38 + }, + { + "epoch": 0.0, + "grad_norm": 5.418956756591797, + "learning_rate": 8.227848101265823e-07, + "loss": 1.1278, + "step": 39 + }, + { + "epoch": 0.0, + "grad_norm": 4.913137912750244, + "learning_rate": 8.438818565400844e-07, + "loss": 1.0996, + "step": 40 + }, + { + "epoch": 0.0, + "grad_norm": 3.8683886528015137, + "learning_rate": 8.649789029535865e-07, + "loss": 1.1214, + "step": 41 + }, + { + "epoch": 0.0, + "grad_norm": 3.4708452224731445, + "learning_rate": 8.860759493670887e-07, + "loss": 1.0712, + "step": 42 + }, + { + "epoch": 0.0, + "grad_norm": 3.7991111278533936, + "learning_rate": 9.071729957805908e-07, + "loss": 1.0548, + "step": 43 + }, + { + "epoch": 0.0, + "grad_norm": 3.196794033050537, + "learning_rate": 9.28270042194093e-07, + "loss": 1.0285, + "step": 44 + }, + { + "epoch": 0.0, + "grad_norm": 3.9760518074035645, + "learning_rate": 9.493670886075951e-07, + "loss": 1.0126, + "step": 45 + }, + { + "epoch": 0.0, + "grad_norm": 3.435934066772461, + "learning_rate": 9.704641350210971e-07, + "loss": 1.0275, + "step": 46 + }, + { + "epoch": 0.0, + "grad_norm": 2.9900312423706055, + "learning_rate": 9.915611814345991e-07, + "loss": 0.9641, + "step": 47 + }, + { + "epoch": 0.0, + "grad_norm": 3.0222251415252686, + "learning_rate": 1.0126582278481013e-06, + "loss": 0.9919, + "step": 48 + }, + { + "epoch": 0.0, + "grad_norm": 2.916668653488159, + "learning_rate": 1.0337552742616035e-06, + "loss": 1.0351, + "step": 49 + }, + { + "epoch": 0.0, + "grad_norm": 3.182502508163452, + "learning_rate": 1.0548523206751057e-06, + "loss": 1.0153, + "step": 50 + }, + { + "epoch": 0.0, + "grad_norm": 2.9524974822998047, + "learning_rate": 1.0759493670886077e-06, + "loss": 0.9676, + "step": 51 + }, + { + "epoch": 0.0, + "grad_norm": 2.7479350566864014, + "learning_rate": 1.0970464135021099e-06, + "loss": 1.0005, + "step": 52 + }, + { + "epoch": 0.0, + "grad_norm": 1.9719606637954712, + "learning_rate": 1.1181434599156118e-06, + "loss": 1.0069, + "step": 53 + }, + { + "epoch": 0.0, + "grad_norm": 2.0742597579956055, + "learning_rate": 1.139240506329114e-06, + "loss": 0.9551, + "step": 54 + }, + { + "epoch": 0.0, + "grad_norm": 2.005128860473633, + "learning_rate": 1.1603375527426162e-06, + "loss": 0.9297, + "step": 55 + }, + { + "epoch": 0.0, + "grad_norm": 1.920249342918396, + "learning_rate": 1.1814345991561182e-06, + "loss": 0.878, + "step": 56 + }, + { + "epoch": 0.0, + "grad_norm": 1.9409500360488892, + "learning_rate": 1.2025316455696204e-06, + "loss": 0.9166, + "step": 57 + }, + { + "epoch": 0.0, + "grad_norm": 1.9121850728988647, + "learning_rate": 1.2236286919831226e-06, + "loss": 0.9379, + "step": 58 + }, + { + "epoch": 0.0, + "grad_norm": 1.7869422435760498, + "learning_rate": 1.2447257383966246e-06, + "loss": 0.9852, + "step": 59 + }, + { + "epoch": 0.0, + "grad_norm": 1.9101088047027588, + "learning_rate": 1.2658227848101267e-06, + "loss": 0.918, + "step": 60 + }, + { + "epoch": 0.0, + "grad_norm": 1.8155966997146606, + "learning_rate": 1.286919831223629e-06, + "loss": 0.8639, + "step": 61 + }, + { + "epoch": 0.0, + "grad_norm": 1.7884961366653442, + "learning_rate": 1.3080168776371311e-06, + "loss": 0.8911, + "step": 62 + }, + { + "epoch": 0.0, + "grad_norm": 1.5779091119766235, + "learning_rate": 1.3291139240506329e-06, + "loss": 0.8864, + "step": 63 + }, + { + "epoch": 0.0, + "grad_norm": 1.529002070426941, + "learning_rate": 1.350210970464135e-06, + "loss": 0.8961, + "step": 64 + }, + { + "epoch": 0.0, + "grad_norm": 1.548957109451294, + "learning_rate": 1.371308016877637e-06, + "loss": 0.8673, + "step": 65 + }, + { + "epoch": 0.0, + "grad_norm": 1.5970193147659302, + "learning_rate": 1.3924050632911392e-06, + "loss": 0.8815, + "step": 66 + }, + { + "epoch": 0.0, + "grad_norm": 1.5011041164398193, + "learning_rate": 1.4135021097046414e-06, + "loss": 0.889, + "step": 67 + }, + { + "epoch": 0.0, + "grad_norm": 1.4582399129867554, + "learning_rate": 1.4345991561181436e-06, + "loss": 0.878, + "step": 68 + }, + { + "epoch": 0.0, + "grad_norm": 1.4272208213806152, + "learning_rate": 1.4556962025316456e-06, + "loss": 0.8489, + "step": 69 + }, + { + "epoch": 0.0, + "grad_norm": 1.435996651649475, + "learning_rate": 1.4767932489451478e-06, + "loss": 0.9408, + "step": 70 + }, + { + "epoch": 0.0, + "grad_norm": 1.4072030782699585, + "learning_rate": 1.49789029535865e-06, + "loss": 0.8805, + "step": 71 + }, + { + "epoch": 0.0, + "grad_norm": 1.4391857385635376, + "learning_rate": 1.518987341772152e-06, + "loss": 0.881, + "step": 72 + }, + { + "epoch": 0.0, + "grad_norm": 1.229433298110962, + "learning_rate": 1.5400843881856542e-06, + "loss": 0.8213, + "step": 73 + }, + { + "epoch": 0.0, + "grad_norm": 1.2908027172088623, + "learning_rate": 1.5611814345991563e-06, + "loss": 0.8103, + "step": 74 + }, + { + "epoch": 0.0, + "grad_norm": 1.3904271125793457, + "learning_rate": 1.5822784810126585e-06, + "loss": 0.8272, + "step": 75 + }, + { + "epoch": 0.0, + "grad_norm": 1.211230754852295, + "learning_rate": 1.6033755274261605e-06, + "loss": 0.8327, + "step": 76 + }, + { + "epoch": 0.0, + "grad_norm": 1.340195655822754, + "learning_rate": 1.6244725738396625e-06, + "loss": 0.8251, + "step": 77 + }, + { + "epoch": 0.0, + "grad_norm": 1.2882392406463623, + "learning_rate": 1.6455696202531647e-06, + "loss": 0.8505, + "step": 78 + }, + { + "epoch": 0.01, + "grad_norm": 1.3191945552825928, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.92, + "step": 79 + }, + { + "epoch": 0.01, + "grad_norm": 1.2318845987319946, + "learning_rate": 1.6877637130801689e-06, + "loss": 0.854, + "step": 80 + }, + { + "epoch": 0.01, + "grad_norm": 1.2841240167617798, + "learning_rate": 1.708860759493671e-06, + "loss": 0.8995, + "step": 81 + }, + { + "epoch": 0.01, + "grad_norm": 1.2077780961990356, + "learning_rate": 1.729957805907173e-06, + "loss": 0.8117, + "step": 82 + }, + { + "epoch": 0.01, + "grad_norm": 1.1782020330429077, + "learning_rate": 1.7510548523206752e-06, + "loss": 0.8833, + "step": 83 + }, + { + "epoch": 0.01, + "grad_norm": 1.2451756000518799, + "learning_rate": 1.7721518987341774e-06, + "loss": 0.7938, + "step": 84 + }, + { + "epoch": 0.01, + "grad_norm": 1.2025009393692017, + "learning_rate": 1.7932489451476796e-06, + "loss": 0.8146, + "step": 85 + }, + { + "epoch": 0.01, + "grad_norm": 1.2381807565689087, + "learning_rate": 1.8143459915611816e-06, + "loss": 0.8662, + "step": 86 + }, + { + "epoch": 0.01, + "grad_norm": 1.2321275472640991, + "learning_rate": 1.8354430379746838e-06, + "loss": 0.8094, + "step": 87 + }, + { + "epoch": 0.01, + "grad_norm": 1.1794078350067139, + "learning_rate": 1.856540084388186e-06, + "loss": 0.8173, + "step": 88 + }, + { + "epoch": 0.01, + "grad_norm": 1.293404459953308, + "learning_rate": 1.877637130801688e-06, + "loss": 0.8645, + "step": 89 + }, + { + "epoch": 0.01, + "grad_norm": 1.269100308418274, + "learning_rate": 1.8987341772151901e-06, + "loss": 0.8404, + "step": 90 + }, + { + "epoch": 0.01, + "grad_norm": 1.262161374092102, + "learning_rate": 1.919831223628692e-06, + "loss": 0.8175, + "step": 91 + }, + { + "epoch": 0.01, + "grad_norm": 1.1058001518249512, + "learning_rate": 1.9409282700421943e-06, + "loss": 0.8211, + "step": 92 + }, + { + "epoch": 0.01, + "grad_norm": 1.223036289215088, + "learning_rate": 1.9620253164556965e-06, + "loss": 0.834, + "step": 93 + }, + { + "epoch": 0.01, + "grad_norm": 1.1103086471557617, + "learning_rate": 1.9831223628691982e-06, + "loss": 0.764, + "step": 94 + }, + { + "epoch": 0.01, + "grad_norm": 1.1673771142959595, + "learning_rate": 2.0042194092827004e-06, + "loss": 0.8599, + "step": 95 + }, + { + "epoch": 0.01, + "grad_norm": 1.1508725881576538, + "learning_rate": 2.0253164556962026e-06, + "loss": 0.8131, + "step": 96 + }, + { + "epoch": 0.01, + "grad_norm": 1.1243406534194946, + "learning_rate": 2.046413502109705e-06, + "loss": 0.695, + "step": 97 + }, + { + "epoch": 0.01, + "grad_norm": 1.088311791419983, + "learning_rate": 2.067510548523207e-06, + "loss": 0.7975, + "step": 98 + }, + { + "epoch": 0.01, + "grad_norm": 1.161037564277649, + "learning_rate": 2.088607594936709e-06, + "loss": 0.76, + "step": 99 + }, + { + "epoch": 0.01, + "grad_norm": 1.1575514078140259, + "learning_rate": 2.1097046413502114e-06, + "loss": 0.8673, + "step": 100 + }, + { + "epoch": 0.01, + "grad_norm": 1.1434293985366821, + "learning_rate": 2.130801687763713e-06, + "loss": 0.7514, + "step": 101 + }, + { + "epoch": 0.01, + "grad_norm": 1.1016374826431274, + "learning_rate": 2.1518987341772153e-06, + "loss": 0.812, + "step": 102 + }, + { + "epoch": 0.01, + "grad_norm": 1.1565126180648804, + "learning_rate": 2.1729957805907175e-06, + "loss": 0.8014, + "step": 103 + }, + { + "epoch": 0.01, + "grad_norm": 1.1164343357086182, + "learning_rate": 2.1940928270042197e-06, + "loss": 0.7952, + "step": 104 + }, + { + "epoch": 0.01, + "grad_norm": 1.1574078798294067, + "learning_rate": 2.2151898734177215e-06, + "loss": 0.8017, + "step": 105 + }, + { + "epoch": 0.01, + "grad_norm": 1.1797399520874023, + "learning_rate": 2.2362869198312237e-06, + "loss": 0.7733, + "step": 106 + }, + { + "epoch": 0.01, + "grad_norm": 1.1039958000183105, + "learning_rate": 2.257383966244726e-06, + "loss": 0.77, + "step": 107 + }, + { + "epoch": 0.01, + "grad_norm": 1.1885851621627808, + "learning_rate": 2.278481012658228e-06, + "loss": 0.8172, + "step": 108 + }, + { + "epoch": 0.01, + "grad_norm": 1.1390385627746582, + "learning_rate": 2.2995780590717302e-06, + "loss": 0.7927, + "step": 109 + }, + { + "epoch": 0.01, + "grad_norm": 1.1092506647109985, + "learning_rate": 2.3206751054852324e-06, + "loss": 0.7741, + "step": 110 + }, + { + "epoch": 0.01, + "grad_norm": 1.2666289806365967, + "learning_rate": 2.341772151898734e-06, + "loss": 0.7964, + "step": 111 + }, + { + "epoch": 0.01, + "grad_norm": 1.0005278587341309, + "learning_rate": 2.3628691983122364e-06, + "loss": 0.7241, + "step": 112 + }, + { + "epoch": 0.01, + "grad_norm": 1.1468347311019897, + "learning_rate": 2.3839662447257386e-06, + "loss": 0.7699, + "step": 113 + }, + { + "epoch": 0.01, + "grad_norm": 1.0486509799957275, + "learning_rate": 2.4050632911392408e-06, + "loss": 0.7698, + "step": 114 + }, + { + "epoch": 0.01, + "grad_norm": 1.0754250288009644, + "learning_rate": 2.426160337552743e-06, + "loss": 0.7577, + "step": 115 + }, + { + "epoch": 0.01, + "grad_norm": 1.0635970830917358, + "learning_rate": 2.447257383966245e-06, + "loss": 0.7201, + "step": 116 + }, + { + "epoch": 0.01, + "grad_norm": 1.1205726861953735, + "learning_rate": 2.4683544303797473e-06, + "loss": 0.8335, + "step": 117 + }, + { + "epoch": 0.01, + "grad_norm": 1.070901870727539, + "learning_rate": 2.489451476793249e-06, + "loss": 0.7949, + "step": 118 + }, + { + "epoch": 0.01, + "grad_norm": 1.0840576887130737, + "learning_rate": 2.5105485232067513e-06, + "loss": 0.7587, + "step": 119 + }, + { + "epoch": 0.01, + "grad_norm": 1.1758044958114624, + "learning_rate": 2.5316455696202535e-06, + "loss": 0.8069, + "step": 120 + }, + { + "epoch": 0.01, + "grad_norm": 1.0169506072998047, + "learning_rate": 2.5527426160337553e-06, + "loss": 0.7996, + "step": 121 + }, + { + "epoch": 0.01, + "grad_norm": 1.0591812133789062, + "learning_rate": 2.573839662447258e-06, + "loss": 0.7231, + "step": 122 + }, + { + "epoch": 0.01, + "grad_norm": 1.1686660051345825, + "learning_rate": 2.5949367088607596e-06, + "loss": 0.7476, + "step": 123 + }, + { + "epoch": 0.01, + "grad_norm": 1.0610604286193848, + "learning_rate": 2.6160337552742622e-06, + "loss": 0.7395, + "step": 124 + }, + { + "epoch": 0.01, + "grad_norm": 1.0829620361328125, + "learning_rate": 2.637130801687764e-06, + "loss": 0.8376, + "step": 125 + }, + { + "epoch": 0.01, + "grad_norm": 1.088221788406372, + "learning_rate": 2.6582278481012658e-06, + "loss": 0.7102, + "step": 126 + }, + { + "epoch": 0.01, + "grad_norm": 1.0730963945388794, + "learning_rate": 2.679324894514768e-06, + "loss": 0.7688, + "step": 127 + }, + { + "epoch": 0.01, + "grad_norm": 1.0567976236343384, + "learning_rate": 2.70042194092827e-06, + "loss": 0.7138, + "step": 128 + }, + { + "epoch": 0.01, + "grad_norm": 1.079959511756897, + "learning_rate": 2.7215189873417724e-06, + "loss": 0.7664, + "step": 129 + }, + { + "epoch": 0.01, + "grad_norm": 1.0153189897537231, + "learning_rate": 2.742616033755274e-06, + "loss": 0.7066, + "step": 130 + }, + { + "epoch": 0.01, + "grad_norm": 1.1893813610076904, + "learning_rate": 2.7637130801687767e-06, + "loss": 0.6818, + "step": 131 + }, + { + "epoch": 0.01, + "grad_norm": 1.1453039646148682, + "learning_rate": 2.7848101265822785e-06, + "loss": 0.8184, + "step": 132 + }, + { + "epoch": 0.01, + "grad_norm": 1.1458662748336792, + "learning_rate": 2.805907172995781e-06, + "loss": 0.7722, + "step": 133 + }, + { + "epoch": 0.01, + "grad_norm": 1.098507046699524, + "learning_rate": 2.827004219409283e-06, + "loss": 0.7963, + "step": 134 + }, + { + "epoch": 0.01, + "grad_norm": 1.0800371170043945, + "learning_rate": 2.848101265822785e-06, + "loss": 0.7168, + "step": 135 + }, + { + "epoch": 0.01, + "grad_norm": 1.0287774801254272, + "learning_rate": 2.8691983122362873e-06, + "loss": 0.7627, + "step": 136 + }, + { + "epoch": 0.01, + "grad_norm": 1.0763013362884521, + "learning_rate": 2.8902953586497895e-06, + "loss": 0.8135, + "step": 137 + }, + { + "epoch": 0.01, + "grad_norm": 1.2011860609054565, + "learning_rate": 2.9113924050632912e-06, + "loss": 0.7457, + "step": 138 + }, + { + "epoch": 0.01, + "grad_norm": 1.1453096866607666, + "learning_rate": 2.932489451476794e-06, + "loss": 0.7488, + "step": 139 + }, + { + "epoch": 0.01, + "grad_norm": 1.077102541923523, + "learning_rate": 2.9535864978902956e-06, + "loss": 0.7586, + "step": 140 + }, + { + "epoch": 0.01, + "grad_norm": 1.0777740478515625, + "learning_rate": 2.9746835443037974e-06, + "loss": 0.7453, + "step": 141 + }, + { + "epoch": 0.01, + "grad_norm": 1.0961055755615234, + "learning_rate": 2.9957805907173e-06, + "loss": 0.7509, + "step": 142 + }, + { + "epoch": 0.01, + "grad_norm": 1.0943870544433594, + "learning_rate": 3.0168776371308017e-06, + "loss": 0.7719, + "step": 143 + }, + { + "epoch": 0.01, + "grad_norm": 0.9995068907737732, + "learning_rate": 3.037974683544304e-06, + "loss": 0.7562, + "step": 144 + }, + { + "epoch": 0.01, + "grad_norm": 1.0800971984863281, + "learning_rate": 3.059071729957806e-06, + "loss": 0.8042, + "step": 145 + }, + { + "epoch": 0.01, + "grad_norm": 1.1074168682098389, + "learning_rate": 3.0801687763713083e-06, + "loss": 0.7368, + "step": 146 + }, + { + "epoch": 0.01, + "grad_norm": 1.1656383275985718, + "learning_rate": 3.10126582278481e-06, + "loss": 0.7654, + "step": 147 + }, + { + "epoch": 0.01, + "grad_norm": 1.1436306238174438, + "learning_rate": 3.1223628691983127e-06, + "loss": 0.7259, + "step": 148 + }, + { + "epoch": 0.01, + "grad_norm": 1.070346474647522, + "learning_rate": 3.1434599156118145e-06, + "loss": 0.7054, + "step": 149 + }, + { + "epoch": 0.01, + "grad_norm": 1.1207927465438843, + "learning_rate": 3.164556962025317e-06, + "loss": 0.7837, + "step": 150 + }, + { + "epoch": 0.01, + "grad_norm": 1.1490936279296875, + "learning_rate": 3.185654008438819e-06, + "loss": 0.7669, + "step": 151 + }, + { + "epoch": 0.01, + "grad_norm": 1.1072289943695068, + "learning_rate": 3.206751054852321e-06, + "loss": 0.7523, + "step": 152 + }, + { + "epoch": 0.01, + "grad_norm": 1.0876787900924683, + "learning_rate": 3.2278481012658232e-06, + "loss": 0.7732, + "step": 153 + }, + { + "epoch": 0.01, + "grad_norm": 1.1508756875991821, + "learning_rate": 3.248945147679325e-06, + "loss": 0.7658, + "step": 154 + }, + { + "epoch": 0.01, + "grad_norm": 1.2632142305374146, + "learning_rate": 3.270042194092827e-06, + "loss": 0.7805, + "step": 155 + }, + { + "epoch": 0.01, + "grad_norm": 1.0064738988876343, + "learning_rate": 3.2911392405063294e-06, + "loss": 0.7025, + "step": 156 + }, + { + "epoch": 0.01, + "grad_norm": 1.0723457336425781, + "learning_rate": 3.3122362869198316e-06, + "loss": 0.8056, + "step": 157 + }, + { + "epoch": 0.01, + "grad_norm": 1.0933613777160645, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.7526, + "step": 158 + }, + { + "epoch": 0.01, + "grad_norm": 1.164417028427124, + "learning_rate": 3.354430379746836e-06, + "loss": 0.7202, + "step": 159 + }, + { + "epoch": 0.01, + "grad_norm": 1.1357107162475586, + "learning_rate": 3.3755274261603377e-06, + "loss": 0.6892, + "step": 160 + }, + { + "epoch": 0.01, + "grad_norm": 1.1236298084259033, + "learning_rate": 3.39662447257384e-06, + "loss": 0.8033, + "step": 161 + }, + { + "epoch": 0.01, + "grad_norm": 1.0229138135910034, + "learning_rate": 3.417721518987342e-06, + "loss": 0.7655, + "step": 162 + }, + { + "epoch": 0.01, + "grad_norm": 1.1775723695755005, + "learning_rate": 3.4388185654008443e-06, + "loss": 0.7653, + "step": 163 + }, + { + "epoch": 0.01, + "grad_norm": 1.0776041746139526, + "learning_rate": 3.459915611814346e-06, + "loss": 0.7152, + "step": 164 + }, + { + "epoch": 0.01, + "grad_norm": 1.0283170938491821, + "learning_rate": 3.4810126582278487e-06, + "loss": 0.7518, + "step": 165 + }, + { + "epoch": 0.01, + "grad_norm": 1.0690875053405762, + "learning_rate": 3.5021097046413504e-06, + "loss": 0.8014, + "step": 166 + }, + { + "epoch": 0.01, + "grad_norm": 1.068129301071167, + "learning_rate": 3.523206751054853e-06, + "loss": 0.7015, + "step": 167 + }, + { + "epoch": 0.01, + "grad_norm": 1.0456384420394897, + "learning_rate": 3.544303797468355e-06, + "loss": 0.7503, + "step": 168 + }, + { + "epoch": 0.01, + "grad_norm": 1.148189663887024, + "learning_rate": 3.5654008438818566e-06, + "loss": 0.7158, + "step": 169 + }, + { + "epoch": 0.01, + "grad_norm": 1.050423264503479, + "learning_rate": 3.586497890295359e-06, + "loss": 0.7687, + "step": 170 + }, + { + "epoch": 0.01, + "grad_norm": 1.0767706632614136, + "learning_rate": 3.607594936708861e-06, + "loss": 0.7401, + "step": 171 + }, + { + "epoch": 0.01, + "grad_norm": 1.0276440382003784, + "learning_rate": 3.628691983122363e-06, + "loss": 0.798, + "step": 172 + }, + { + "epoch": 0.01, + "grad_norm": 1.1560982465744019, + "learning_rate": 3.649789029535865e-06, + "loss": 0.7576, + "step": 173 + }, + { + "epoch": 0.01, + "grad_norm": 1.0696228742599487, + "learning_rate": 3.6708860759493675e-06, + "loss": 0.7687, + "step": 174 + }, + { + "epoch": 0.01, + "grad_norm": 1.0968499183654785, + "learning_rate": 3.6919831223628693e-06, + "loss": 0.7254, + "step": 175 + }, + { + "epoch": 0.01, + "grad_norm": 1.0895642042160034, + "learning_rate": 3.713080168776372e-06, + "loss": 0.7755, + "step": 176 + }, + { + "epoch": 0.01, + "grad_norm": 1.0401157140731812, + "learning_rate": 3.7341772151898737e-06, + "loss": 0.729, + "step": 177 + }, + { + "epoch": 0.01, + "grad_norm": 1.1051784753799438, + "learning_rate": 3.755274261603376e-06, + "loss": 0.7117, + "step": 178 + }, + { + "epoch": 0.01, + "grad_norm": 1.12349271774292, + "learning_rate": 3.776371308016878e-06, + "loss": 0.7652, + "step": 179 + }, + { + "epoch": 0.01, + "grad_norm": 1.0855416059494019, + "learning_rate": 3.7974683544303802e-06, + "loss": 0.6883, + "step": 180 + }, + { + "epoch": 0.01, + "grad_norm": 1.1745630502700806, + "learning_rate": 3.818565400843882e-06, + "loss": 0.7449, + "step": 181 + }, + { + "epoch": 0.01, + "grad_norm": 1.0661110877990723, + "learning_rate": 3.839662447257384e-06, + "loss": 0.7232, + "step": 182 + }, + { + "epoch": 0.01, + "grad_norm": 1.0649524927139282, + "learning_rate": 3.860759493670886e-06, + "loss": 0.7632, + "step": 183 + }, + { + "epoch": 0.01, + "grad_norm": 1.128460168838501, + "learning_rate": 3.8818565400843886e-06, + "loss": 0.7516, + "step": 184 + }, + { + "epoch": 0.01, + "grad_norm": 1.1216446161270142, + "learning_rate": 3.902953586497891e-06, + "loss": 0.673, + "step": 185 + }, + { + "epoch": 0.01, + "grad_norm": 1.1039458513259888, + "learning_rate": 3.924050632911393e-06, + "loss": 0.7259, + "step": 186 + }, + { + "epoch": 0.01, + "grad_norm": 1.0785599946975708, + "learning_rate": 3.945147679324895e-06, + "loss": 0.7689, + "step": 187 + }, + { + "epoch": 0.01, + "grad_norm": 1.1407884359359741, + "learning_rate": 3.9662447257383965e-06, + "loss": 0.7762, + "step": 188 + }, + { + "epoch": 0.01, + "grad_norm": 1.2207587957382202, + "learning_rate": 3.9873417721518995e-06, + "loss": 0.7593, + "step": 189 + }, + { + "epoch": 0.01, + "grad_norm": 1.126558542251587, + "learning_rate": 4.008438818565401e-06, + "loss": 0.7983, + "step": 190 + }, + { + "epoch": 0.01, + "grad_norm": 1.0701903104782104, + "learning_rate": 4.029535864978903e-06, + "loss": 0.7166, + "step": 191 + }, + { + "epoch": 0.01, + "grad_norm": 1.0827971696853638, + "learning_rate": 4.050632911392405e-06, + "loss": 0.7339, + "step": 192 + }, + { + "epoch": 0.01, + "grad_norm": 0.9973072409629822, + "learning_rate": 4.0717299578059074e-06, + "loss": 0.6799, + "step": 193 + }, + { + "epoch": 0.01, + "grad_norm": 1.05364990234375, + "learning_rate": 4.09282700421941e-06, + "loss": 0.7106, + "step": 194 + }, + { + "epoch": 0.01, + "grad_norm": 1.0435612201690674, + "learning_rate": 4.113924050632912e-06, + "loss": 0.8139, + "step": 195 + }, + { + "epoch": 0.01, + "grad_norm": 1.0995101928710938, + "learning_rate": 4.135021097046414e-06, + "loss": 0.7309, + "step": 196 + }, + { + "epoch": 0.01, + "grad_norm": 1.0613794326782227, + "learning_rate": 4.156118143459915e-06, + "loss": 0.7698, + "step": 197 + }, + { + "epoch": 0.01, + "grad_norm": 1.1413551568984985, + "learning_rate": 4.177215189873418e-06, + "loss": 0.7912, + "step": 198 + }, + { + "epoch": 0.01, + "grad_norm": 1.0940371751785278, + "learning_rate": 4.19831223628692e-06, + "loss": 0.7316, + "step": 199 + }, + { + "epoch": 0.01, + "grad_norm": 1.1012215614318848, + "learning_rate": 4.219409282700423e-06, + "loss": 0.7178, + "step": 200 + }, + { + "epoch": 0.01, + "grad_norm": 1.1062922477722168, + "learning_rate": 4.240506329113924e-06, + "loss": 0.7409, + "step": 201 + }, + { + "epoch": 0.01, + "grad_norm": 1.1517467498779297, + "learning_rate": 4.261603375527426e-06, + "loss": 0.8392, + "step": 202 + }, + { + "epoch": 0.01, + "grad_norm": 0.9980434775352478, + "learning_rate": 4.2827004219409285e-06, + "loss": 0.7081, + "step": 203 + }, + { + "epoch": 0.01, + "grad_norm": 1.190421223640442, + "learning_rate": 4.303797468354431e-06, + "loss": 0.7573, + "step": 204 + }, + { + "epoch": 0.01, + "grad_norm": 1.1169999837875366, + "learning_rate": 4.324894514767933e-06, + "loss": 0.7444, + "step": 205 + }, + { + "epoch": 0.01, + "grad_norm": 1.1159812211990356, + "learning_rate": 4.345991561181435e-06, + "loss": 0.7336, + "step": 206 + }, + { + "epoch": 0.01, + "grad_norm": 1.016315221786499, + "learning_rate": 4.367088607594937e-06, + "loss": 0.7329, + "step": 207 + }, + { + "epoch": 0.01, + "grad_norm": 1.1287728548049927, + "learning_rate": 4.3881856540084394e-06, + "loss": 0.7638, + "step": 208 + }, + { + "epoch": 0.01, + "grad_norm": 1.0038844347000122, + "learning_rate": 4.409282700421942e-06, + "loss": 0.6823, + "step": 209 + }, + { + "epoch": 0.01, + "grad_norm": 1.0368938446044922, + "learning_rate": 4.430379746835443e-06, + "loss": 0.7563, + "step": 210 + }, + { + "epoch": 0.01, + "grad_norm": 1.215239405632019, + "learning_rate": 4.451476793248945e-06, + "loss": 0.7747, + "step": 211 + }, + { + "epoch": 0.01, + "grad_norm": 0.9736345410346985, + "learning_rate": 4.472573839662447e-06, + "loss": 0.6811, + "step": 212 + }, + { + "epoch": 0.01, + "grad_norm": 1.133034110069275, + "learning_rate": 4.4936708860759495e-06, + "loss": 0.8181, + "step": 213 + }, + { + "epoch": 0.01, + "grad_norm": 1.1740864515304565, + "learning_rate": 4.514767932489452e-06, + "loss": 0.7113, + "step": 214 + }, + { + "epoch": 0.01, + "grad_norm": 1.0824915170669556, + "learning_rate": 4.535864978902954e-06, + "loss": 0.7522, + "step": 215 + }, + { + "epoch": 0.01, + "grad_norm": 1.0215483903884888, + "learning_rate": 4.556962025316456e-06, + "loss": 0.7099, + "step": 216 + }, + { + "epoch": 0.01, + "grad_norm": 1.1021614074707031, + "learning_rate": 4.578059071729958e-06, + "loss": 0.6993, + "step": 217 + }, + { + "epoch": 0.01, + "grad_norm": 1.0713529586791992, + "learning_rate": 4.5991561181434605e-06, + "loss": 0.7765, + "step": 218 + }, + { + "epoch": 0.01, + "grad_norm": 1.0062373876571655, + "learning_rate": 4.620253164556963e-06, + "loss": 0.7104, + "step": 219 + }, + { + "epoch": 0.01, + "grad_norm": 1.0480597019195557, + "learning_rate": 4.641350210970465e-06, + "loss": 0.6808, + "step": 220 + }, + { + "epoch": 0.01, + "grad_norm": 1.0954842567443848, + "learning_rate": 4.662447257383967e-06, + "loss": 0.7508, + "step": 221 + }, + { + "epoch": 0.01, + "grad_norm": 1.1099358797073364, + "learning_rate": 4.683544303797468e-06, + "loss": 0.7075, + "step": 222 + }, + { + "epoch": 0.01, + "grad_norm": 1.0071532726287842, + "learning_rate": 4.7046413502109714e-06, + "loss": 0.6977, + "step": 223 + }, + { + "epoch": 0.01, + "grad_norm": 1.2051150798797607, + "learning_rate": 4.725738396624473e-06, + "loss": 0.7822, + "step": 224 + }, + { + "epoch": 0.01, + "grad_norm": 1.0721451044082642, + "learning_rate": 4.746835443037975e-06, + "loss": 0.7405, + "step": 225 + }, + { + "epoch": 0.01, + "grad_norm": 1.093891978263855, + "learning_rate": 4.767932489451477e-06, + "loss": 0.696, + "step": 226 + }, + { + "epoch": 0.01, + "grad_norm": 1.030121088027954, + "learning_rate": 4.789029535864979e-06, + "loss": 0.6639, + "step": 227 + }, + { + "epoch": 0.01, + "grad_norm": 1.029396653175354, + "learning_rate": 4.8101265822784815e-06, + "loss": 0.6721, + "step": 228 + }, + { + "epoch": 0.01, + "grad_norm": 1.075605034828186, + "learning_rate": 4.831223628691984e-06, + "loss": 0.767, + "step": 229 + }, + { + "epoch": 0.01, + "grad_norm": 1.1397629976272583, + "learning_rate": 4.852320675105486e-06, + "loss": 0.7567, + "step": 230 + }, + { + "epoch": 0.01, + "grad_norm": 0.9947602152824402, + "learning_rate": 4.873417721518987e-06, + "loss": 0.6579, + "step": 231 + }, + { + "epoch": 0.01, + "grad_norm": 1.1511805057525635, + "learning_rate": 4.89451476793249e-06, + "loss": 0.775, + "step": 232 + }, + { + "epoch": 0.01, + "grad_norm": 1.1157087087631226, + "learning_rate": 4.915611814345992e-06, + "loss": 0.7197, + "step": 233 + }, + { + "epoch": 0.01, + "grad_norm": 1.041972041130066, + "learning_rate": 4.936708860759495e-06, + "loss": 0.6443, + "step": 234 + }, + { + "epoch": 0.01, + "grad_norm": 1.1057621240615845, + "learning_rate": 4.957805907172996e-06, + "loss": 0.7262, + "step": 235 + }, + { + "epoch": 0.01, + "grad_norm": 1.0165050029754639, + "learning_rate": 4.978902953586498e-06, + "loss": 0.6992, + "step": 236 + }, + { + "epoch": 0.02, + "grad_norm": 1.093250036239624, + "learning_rate": 5e-06, + "loss": 0.7268, + "step": 237 + }, + { + "epoch": 0.02, + "grad_norm": 1.0492795705795288, + "learning_rate": 5.021097046413503e-06, + "loss": 0.7542, + "step": 238 + }, + { + "epoch": 0.02, + "grad_norm": 1.102603793144226, + "learning_rate": 5.042194092827004e-06, + "loss": 0.7308, + "step": 239 + }, + { + "epoch": 0.02, + "grad_norm": 1.078536033630371, + "learning_rate": 5.063291139240507e-06, + "loss": 0.6885, + "step": 240 + }, + { + "epoch": 0.02, + "grad_norm": 1.0672438144683838, + "learning_rate": 5.084388185654009e-06, + "loss": 0.7401, + "step": 241 + }, + { + "epoch": 0.02, + "grad_norm": 1.0409996509552002, + "learning_rate": 5.1054852320675105e-06, + "loss": 0.6583, + "step": 242 + }, + { + "epoch": 0.02, + "grad_norm": 1.0213676691055298, + "learning_rate": 5.126582278481013e-06, + "loss": 0.6894, + "step": 243 + }, + { + "epoch": 0.02, + "grad_norm": 1.00473952293396, + "learning_rate": 5.147679324894516e-06, + "loss": 0.6331, + "step": 244 + }, + { + "epoch": 0.02, + "grad_norm": 1.0353198051452637, + "learning_rate": 5.168776371308017e-06, + "loss": 0.6998, + "step": 245 + }, + { + "epoch": 0.02, + "grad_norm": 1.0606788396835327, + "learning_rate": 5.189873417721519e-06, + "loss": 0.7192, + "step": 246 + }, + { + "epoch": 0.02, + "grad_norm": 1.0762742757797241, + "learning_rate": 5.2109704641350215e-06, + "loss": 0.7234, + "step": 247 + }, + { + "epoch": 0.02, + "grad_norm": 1.0901657342910767, + "learning_rate": 5.2320675105485245e-06, + "loss": 0.7279, + "step": 248 + }, + { + "epoch": 0.02, + "grad_norm": 1.0092108249664307, + "learning_rate": 5.253164556962026e-06, + "loss": 0.7626, + "step": 249 + }, + { + "epoch": 0.02, + "grad_norm": 1.0560524463653564, + "learning_rate": 5.274261603375528e-06, + "loss": 0.6912, + "step": 250 + }, + { + "epoch": 0.02, + "grad_norm": 1.0830129384994507, + "learning_rate": 5.295358649789029e-06, + "loss": 0.7379, + "step": 251 + }, + { + "epoch": 0.02, + "grad_norm": 1.0292439460754395, + "learning_rate": 5.3164556962025316e-06, + "loss": 0.7024, + "step": 252 + }, + { + "epoch": 0.02, + "grad_norm": 1.0570290088653564, + "learning_rate": 5.337552742616035e-06, + "loss": 0.7485, + "step": 253 + }, + { + "epoch": 0.02, + "grad_norm": 1.098766565322876, + "learning_rate": 5.358649789029536e-06, + "loss": 0.7215, + "step": 254 + }, + { + "epoch": 0.02, + "grad_norm": 1.038459062576294, + "learning_rate": 5.379746835443038e-06, + "loss": 0.7304, + "step": 255 + }, + { + "epoch": 0.02, + "grad_norm": 1.0401676893234253, + "learning_rate": 5.40084388185654e-06, + "loss": 0.711, + "step": 256 + }, + { + "epoch": 0.02, + "grad_norm": 1.1514108180999756, + "learning_rate": 5.421940928270043e-06, + "loss": 0.7406, + "step": 257 + }, + { + "epoch": 0.02, + "grad_norm": 1.0562361478805542, + "learning_rate": 5.443037974683545e-06, + "loss": 0.6916, + "step": 258 + }, + { + "epoch": 0.02, + "grad_norm": 1.0437403917312622, + "learning_rate": 5.464135021097047e-06, + "loss": 0.7645, + "step": 259 + }, + { + "epoch": 0.02, + "grad_norm": 1.0529121160507202, + "learning_rate": 5.485232067510548e-06, + "loss": 0.7039, + "step": 260 + }, + { + "epoch": 0.02, + "grad_norm": 0.9928483963012695, + "learning_rate": 5.506329113924051e-06, + "loss": 0.7255, + "step": 261 + }, + { + "epoch": 0.02, + "grad_norm": 1.149880290031433, + "learning_rate": 5.5274261603375535e-06, + "loss": 0.6802, + "step": 262 + }, + { + "epoch": 0.02, + "grad_norm": 1.0575579404830933, + "learning_rate": 5.548523206751056e-06, + "loss": 0.6778, + "step": 263 + }, + { + "epoch": 0.02, + "grad_norm": 1.055155634880066, + "learning_rate": 5.569620253164557e-06, + "loss": 0.7686, + "step": 264 + }, + { + "epoch": 0.02, + "grad_norm": 1.0442339181900024, + "learning_rate": 5.590717299578059e-06, + "loss": 0.7301, + "step": 265 + }, + { + "epoch": 0.02, + "grad_norm": 1.128800868988037, + "learning_rate": 5.611814345991562e-06, + "loss": 0.7405, + "step": 266 + }, + { + "epoch": 0.02, + "grad_norm": 1.045422077178955, + "learning_rate": 5.6329113924050636e-06, + "loss": 0.7035, + "step": 267 + }, + { + "epoch": 0.02, + "grad_norm": 1.1714004278182983, + "learning_rate": 5.654008438818566e-06, + "loss": 0.6549, + "step": 268 + }, + { + "epoch": 0.02, + "grad_norm": 1.024259328842163, + "learning_rate": 5.675105485232067e-06, + "loss": 0.7305, + "step": 269 + }, + { + "epoch": 0.02, + "grad_norm": 0.9970294237136841, + "learning_rate": 5.69620253164557e-06, + "loss": 0.7004, + "step": 270 + }, + { + "epoch": 0.02, + "grad_norm": 1.1413666009902954, + "learning_rate": 5.717299578059072e-06, + "loss": 0.7865, + "step": 271 + }, + { + "epoch": 0.02, + "grad_norm": 1.0278419256210327, + "learning_rate": 5.7383966244725745e-06, + "loss": 0.7116, + "step": 272 + }, + { + "epoch": 0.02, + "grad_norm": 1.1053383350372314, + "learning_rate": 5.759493670886076e-06, + "loss": 0.7703, + "step": 273 + }, + { + "epoch": 0.02, + "grad_norm": 1.0635850429534912, + "learning_rate": 5.780590717299579e-06, + "loss": 0.7128, + "step": 274 + }, + { + "epoch": 0.02, + "grad_norm": 1.2253458499908447, + "learning_rate": 5.801687763713081e-06, + "loss": 0.7689, + "step": 275 + }, + { + "epoch": 0.02, + "grad_norm": 1.152657151222229, + "learning_rate": 5.8227848101265824e-06, + "loss": 0.7243, + "step": 276 + }, + { + "epoch": 0.02, + "grad_norm": 1.0314563512802124, + "learning_rate": 5.843881856540085e-06, + "loss": 0.702, + "step": 277 + }, + { + "epoch": 0.02, + "grad_norm": 0.9639416933059692, + "learning_rate": 5.864978902953588e-06, + "loss": 0.6504, + "step": 278 + }, + { + "epoch": 0.02, + "grad_norm": 1.0700966119766235, + "learning_rate": 5.886075949367089e-06, + "loss": 0.7275, + "step": 279 + }, + { + "epoch": 0.02, + "grad_norm": 1.0939685106277466, + "learning_rate": 5.907172995780591e-06, + "loss": 0.7193, + "step": 280 + }, + { + "epoch": 0.02, + "grad_norm": 1.0693538188934326, + "learning_rate": 5.928270042194093e-06, + "loss": 0.7746, + "step": 281 + }, + { + "epoch": 0.02, + "grad_norm": 1.057306170463562, + "learning_rate": 5.949367088607595e-06, + "loss": 0.7504, + "step": 282 + }, + { + "epoch": 0.02, + "grad_norm": 1.1040732860565186, + "learning_rate": 5.970464135021098e-06, + "loss": 0.7458, + "step": 283 + }, + { + "epoch": 0.02, + "grad_norm": 1.031706690788269, + "learning_rate": 5.9915611814346e-06, + "loss": 0.7131, + "step": 284 + }, + { + "epoch": 0.02, + "grad_norm": 1.0323573350906372, + "learning_rate": 6.012658227848101e-06, + "loss": 0.6868, + "step": 285 + }, + { + "epoch": 0.02, + "grad_norm": 1.0617097616195679, + "learning_rate": 6.0337552742616035e-06, + "loss": 0.63, + "step": 286 + }, + { + "epoch": 0.02, + "grad_norm": 1.11605703830719, + "learning_rate": 6.0548523206751065e-06, + "loss": 0.6858, + "step": 287 + }, + { + "epoch": 0.02, + "grad_norm": 1.0047672986984253, + "learning_rate": 6.075949367088608e-06, + "loss": 0.6639, + "step": 288 + }, + { + "epoch": 0.02, + "grad_norm": 1.131504774093628, + "learning_rate": 6.09704641350211e-06, + "loss": 0.7063, + "step": 289 + }, + { + "epoch": 0.02, + "grad_norm": 1.063057780265808, + "learning_rate": 6.118143459915612e-06, + "loss": 0.741, + "step": 290 + }, + { + "epoch": 0.02, + "grad_norm": 1.0435850620269775, + "learning_rate": 6.139240506329115e-06, + "loss": 0.6867, + "step": 291 + }, + { + "epoch": 0.02, + "grad_norm": 1.102751612663269, + "learning_rate": 6.160337552742617e-06, + "loss": 0.7544, + "step": 292 + }, + { + "epoch": 0.02, + "grad_norm": 1.1484432220458984, + "learning_rate": 6.181434599156119e-06, + "loss": 0.7241, + "step": 293 + }, + { + "epoch": 0.02, + "grad_norm": 1.1322616338729858, + "learning_rate": 6.20253164556962e-06, + "loss": 0.7311, + "step": 294 + }, + { + "epoch": 0.02, + "grad_norm": 1.077012300491333, + "learning_rate": 6.223628691983122e-06, + "loss": 0.7061, + "step": 295 + }, + { + "epoch": 0.02, + "grad_norm": 1.0520391464233398, + "learning_rate": 6.244725738396625e-06, + "loss": 0.7375, + "step": 296 + }, + { + "epoch": 0.02, + "grad_norm": 1.0452332496643066, + "learning_rate": 6.265822784810128e-06, + "loss": 0.6791, + "step": 297 + }, + { + "epoch": 0.02, + "grad_norm": 1.037235975265503, + "learning_rate": 6.286919831223629e-06, + "loss": 0.7208, + "step": 298 + }, + { + "epoch": 0.02, + "grad_norm": 1.0935461521148682, + "learning_rate": 6.308016877637131e-06, + "loss": 0.679, + "step": 299 + }, + { + "epoch": 0.02, + "grad_norm": 1.1446540355682373, + "learning_rate": 6.329113924050634e-06, + "loss": 0.7607, + "step": 300 + }, + { + "epoch": 0.02, + "grad_norm": 1.0081703662872314, + "learning_rate": 6.3502109704641355e-06, + "loss": 0.6801, + "step": 301 + }, + { + "epoch": 0.02, + "grad_norm": 1.042680025100708, + "learning_rate": 6.371308016877638e-06, + "loss": 0.709, + "step": 302 + }, + { + "epoch": 0.02, + "grad_norm": 0.9972267150878906, + "learning_rate": 6.392405063291139e-06, + "loss": 0.7027, + "step": 303 + }, + { + "epoch": 0.02, + "grad_norm": 1.1144083738327026, + "learning_rate": 6.413502109704642e-06, + "loss": 0.7594, + "step": 304 + }, + { + "epoch": 0.02, + "grad_norm": 1.0191450119018555, + "learning_rate": 6.434599156118144e-06, + "loss": 0.6988, + "step": 305 + }, + { + "epoch": 0.02, + "grad_norm": 1.0765477418899536, + "learning_rate": 6.4556962025316464e-06, + "loss": 0.6887, + "step": 306 + }, + { + "epoch": 0.02, + "grad_norm": 1.0328569412231445, + "learning_rate": 6.476793248945148e-06, + "loss": 0.6941, + "step": 307 + }, + { + "epoch": 0.02, + "grad_norm": 1.060673475265503, + "learning_rate": 6.49789029535865e-06, + "loss": 0.7643, + "step": 308 + }, + { + "epoch": 0.02, + "grad_norm": 1.0403449535369873, + "learning_rate": 6.518987341772153e-06, + "loss": 0.7178, + "step": 309 + }, + { + "epoch": 0.02, + "grad_norm": 1.000282883644104, + "learning_rate": 6.540084388185654e-06, + "loss": 0.6678, + "step": 310 + }, + { + "epoch": 0.02, + "grad_norm": 0.988453209400177, + "learning_rate": 6.5611814345991565e-06, + "loss": 0.6962, + "step": 311 + }, + { + "epoch": 0.02, + "grad_norm": 1.0525801181793213, + "learning_rate": 6.582278481012659e-06, + "loss": 0.6947, + "step": 312 + }, + { + "epoch": 0.02, + "grad_norm": 1.1088162660598755, + "learning_rate": 6.603375527426161e-06, + "loss": 0.7701, + "step": 313 + }, + { + "epoch": 0.02, + "grad_norm": 1.0845990180969238, + "learning_rate": 6.624472573839663e-06, + "loss": 0.6622, + "step": 314 + }, + { + "epoch": 0.02, + "grad_norm": 1.1164056062698364, + "learning_rate": 6.645569620253165e-06, + "loss": 0.6881, + "step": 315 + }, + { + "epoch": 0.02, + "grad_norm": 1.112685203552246, + "learning_rate": 6.666666666666667e-06, + "loss": 0.6864, + "step": 316 + }, + { + "epoch": 0.02, + "grad_norm": 1.0409352779388428, + "learning_rate": 6.68776371308017e-06, + "loss": 0.6724, + "step": 317 + }, + { + "epoch": 0.02, + "grad_norm": 1.0470497608184814, + "learning_rate": 6.708860759493672e-06, + "loss": 0.7243, + "step": 318 + }, + { + "epoch": 0.02, + "grad_norm": 1.1536136865615845, + "learning_rate": 6.729957805907173e-06, + "loss": 0.7587, + "step": 319 + }, + { + "epoch": 0.02, + "grad_norm": 1.1283690929412842, + "learning_rate": 6.751054852320675e-06, + "loss": 0.7378, + "step": 320 + }, + { + "epoch": 0.02, + "grad_norm": 1.1128261089324951, + "learning_rate": 6.772151898734178e-06, + "loss": 0.7279, + "step": 321 + }, + { + "epoch": 0.02, + "grad_norm": 0.973404586315155, + "learning_rate": 6.79324894514768e-06, + "loss": 0.7067, + "step": 322 + }, + { + "epoch": 0.02, + "grad_norm": 1.0499427318572998, + "learning_rate": 6.814345991561182e-06, + "loss": 0.6869, + "step": 323 + }, + { + "epoch": 0.02, + "grad_norm": 1.047536849975586, + "learning_rate": 6.835443037974684e-06, + "loss": 0.6862, + "step": 324 + }, + { + "epoch": 0.02, + "grad_norm": 1.076842188835144, + "learning_rate": 6.8565400843881855e-06, + "loss": 0.6522, + "step": 325 + }, + { + "epoch": 0.02, + "grad_norm": 1.1328686475753784, + "learning_rate": 6.8776371308016885e-06, + "loss": 0.7842, + "step": 326 + }, + { + "epoch": 0.02, + "grad_norm": 1.05484938621521, + "learning_rate": 6.898734177215191e-06, + "loss": 0.7303, + "step": 327 + }, + { + "epoch": 0.02, + "grad_norm": 1.0914195775985718, + "learning_rate": 6.919831223628692e-06, + "loss": 0.7791, + "step": 328 + }, + { + "epoch": 0.02, + "grad_norm": 1.1371619701385498, + "learning_rate": 6.940928270042194e-06, + "loss": 0.7417, + "step": 329 + }, + { + "epoch": 0.02, + "grad_norm": 1.0946354866027832, + "learning_rate": 6.962025316455697e-06, + "loss": 0.7099, + "step": 330 + }, + { + "epoch": 0.02, + "grad_norm": 1.048621416091919, + "learning_rate": 6.9831223628691995e-06, + "loss": 0.7406, + "step": 331 + }, + { + "epoch": 0.02, + "grad_norm": 1.0043079853057861, + "learning_rate": 7.004219409282701e-06, + "loss": 0.6941, + "step": 332 + }, + { + "epoch": 0.02, + "grad_norm": 1.0117560625076294, + "learning_rate": 7.025316455696203e-06, + "loss": 0.7254, + "step": 333 + }, + { + "epoch": 0.02, + "grad_norm": 0.9732447862625122, + "learning_rate": 7.046413502109706e-06, + "loss": 0.6738, + "step": 334 + }, + { + "epoch": 0.02, + "grad_norm": 1.0345726013183594, + "learning_rate": 7.067510548523207e-06, + "loss": 0.7732, + "step": 335 + }, + { + "epoch": 0.02, + "grad_norm": 1.0557833909988403, + "learning_rate": 7.08860759493671e-06, + "loss": 0.6691, + "step": 336 + }, + { + "epoch": 0.02, + "grad_norm": 1.0420573949813843, + "learning_rate": 7.109704641350211e-06, + "loss": 0.6917, + "step": 337 + }, + { + "epoch": 0.02, + "grad_norm": 1.0846279859542847, + "learning_rate": 7.130801687763713e-06, + "loss": 0.7203, + "step": 338 + }, + { + "epoch": 0.02, + "grad_norm": 1.0644689798355103, + "learning_rate": 7.151898734177216e-06, + "loss": 0.7264, + "step": 339 + }, + { + "epoch": 0.02, + "grad_norm": 0.9768193960189819, + "learning_rate": 7.172995780590718e-06, + "loss": 0.635, + "step": 340 + }, + { + "epoch": 0.02, + "grad_norm": 1.0101126432418823, + "learning_rate": 7.19409282700422e-06, + "loss": 0.6507, + "step": 341 + }, + { + "epoch": 0.02, + "grad_norm": 1.208484411239624, + "learning_rate": 7.215189873417722e-06, + "loss": 0.7535, + "step": 342 + }, + { + "epoch": 0.02, + "grad_norm": 1.0944318771362305, + "learning_rate": 7.236286919831225e-06, + "loss": 0.7649, + "step": 343 + }, + { + "epoch": 0.02, + "grad_norm": 0.9345969557762146, + "learning_rate": 7.257383966244726e-06, + "loss": 0.6838, + "step": 344 + }, + { + "epoch": 0.02, + "grad_norm": 1.0136656761169434, + "learning_rate": 7.2784810126582285e-06, + "loss": 0.6863, + "step": 345 + }, + { + "epoch": 0.02, + "grad_norm": 1.0391440391540527, + "learning_rate": 7.29957805907173e-06, + "loss": 0.7484, + "step": 346 + }, + { + "epoch": 0.02, + "grad_norm": 1.0096555948257446, + "learning_rate": 7.320675105485233e-06, + "loss": 0.6641, + "step": 347 + }, + { + "epoch": 0.02, + "grad_norm": 1.0947874784469604, + "learning_rate": 7.341772151898735e-06, + "loss": 0.6892, + "step": 348 + }, + { + "epoch": 0.02, + "grad_norm": 1.0220927000045776, + "learning_rate": 7.362869198312237e-06, + "loss": 0.6493, + "step": 349 + }, + { + "epoch": 0.02, + "grad_norm": 1.0061572790145874, + "learning_rate": 7.3839662447257386e-06, + "loss": 0.6969, + "step": 350 + }, + { + "epoch": 0.02, + "grad_norm": 1.142774224281311, + "learning_rate": 7.405063291139241e-06, + "loss": 0.7036, + "step": 351 + }, + { + "epoch": 0.02, + "grad_norm": 1.1005827188491821, + "learning_rate": 7.426160337552744e-06, + "loss": 0.6813, + "step": 352 + }, + { + "epoch": 0.02, + "grad_norm": 0.9909974932670593, + "learning_rate": 7.447257383966245e-06, + "loss": 0.6848, + "step": 353 + }, + { + "epoch": 0.02, + "grad_norm": 1.0379297733306885, + "learning_rate": 7.468354430379747e-06, + "loss": 0.6472, + "step": 354 + }, + { + "epoch": 0.02, + "grad_norm": 1.0365216732025146, + "learning_rate": 7.4894514767932495e-06, + "loss": 0.7371, + "step": 355 + }, + { + "epoch": 0.02, + "grad_norm": 1.0643117427825928, + "learning_rate": 7.510548523206752e-06, + "loss": 0.7383, + "step": 356 + }, + { + "epoch": 0.02, + "grad_norm": 1.0581454038619995, + "learning_rate": 7.531645569620254e-06, + "loss": 0.6001, + "step": 357 + }, + { + "epoch": 0.02, + "grad_norm": 1.147464632987976, + "learning_rate": 7.552742616033756e-06, + "loss": 0.7362, + "step": 358 + }, + { + "epoch": 0.02, + "grad_norm": 1.1402760744094849, + "learning_rate": 7.5738396624472574e-06, + "loss": 0.6973, + "step": 359 + }, + { + "epoch": 0.02, + "grad_norm": 0.987370491027832, + "learning_rate": 7.5949367088607605e-06, + "loss": 0.6186, + "step": 360 + }, + { + "epoch": 0.02, + "grad_norm": 1.1057859659194946, + "learning_rate": 7.616033755274263e-06, + "loss": 0.758, + "step": 361 + }, + { + "epoch": 0.02, + "grad_norm": 1.1299041509628296, + "learning_rate": 7.637130801687764e-06, + "loss": 0.6524, + "step": 362 + }, + { + "epoch": 0.02, + "grad_norm": 1.0638086795806885, + "learning_rate": 7.658227848101265e-06, + "loss": 0.6748, + "step": 363 + }, + { + "epoch": 0.02, + "grad_norm": 1.1084306240081787, + "learning_rate": 7.679324894514768e-06, + "loss": 0.7294, + "step": 364 + }, + { + "epoch": 0.02, + "grad_norm": 1.0223629474639893, + "learning_rate": 7.700421940928271e-06, + "loss": 0.7385, + "step": 365 + }, + { + "epoch": 0.02, + "grad_norm": 0.9762625694274902, + "learning_rate": 7.721518987341773e-06, + "loss": 0.657, + "step": 366 + }, + { + "epoch": 0.02, + "grad_norm": 1.045155644416809, + "learning_rate": 7.742616033755274e-06, + "loss": 0.6697, + "step": 367 + }, + { + "epoch": 0.02, + "grad_norm": 1.101192831993103, + "learning_rate": 7.763713080168777e-06, + "loss": 0.7388, + "step": 368 + }, + { + "epoch": 0.02, + "grad_norm": 0.9734575152397156, + "learning_rate": 7.78481012658228e-06, + "loss": 0.7006, + "step": 369 + }, + { + "epoch": 0.02, + "grad_norm": 1.0786582231521606, + "learning_rate": 7.805907172995782e-06, + "loss": 0.7253, + "step": 370 + }, + { + "epoch": 0.02, + "grad_norm": 1.0659972429275513, + "learning_rate": 7.827004219409283e-06, + "loss": 0.6975, + "step": 371 + }, + { + "epoch": 0.02, + "grad_norm": 1.0065947771072388, + "learning_rate": 7.848101265822786e-06, + "loss": 0.6478, + "step": 372 + }, + { + "epoch": 0.02, + "grad_norm": 1.066577672958374, + "learning_rate": 7.869198312236287e-06, + "loss": 0.6615, + "step": 373 + }, + { + "epoch": 0.02, + "grad_norm": 0.9906947612762451, + "learning_rate": 7.89029535864979e-06, + "loss": 0.6473, + "step": 374 + }, + { + "epoch": 0.02, + "grad_norm": 1.1715826988220215, + "learning_rate": 7.911392405063292e-06, + "loss": 0.7237, + "step": 375 + }, + { + "epoch": 0.02, + "grad_norm": 0.943070650100708, + "learning_rate": 7.932489451476793e-06, + "loss": 0.6124, + "step": 376 + }, + { + "epoch": 0.02, + "grad_norm": 1.0601487159729004, + "learning_rate": 7.953586497890296e-06, + "loss": 0.7008, + "step": 377 + }, + { + "epoch": 0.02, + "grad_norm": 0.9950490593910217, + "learning_rate": 7.974683544303799e-06, + "loss": 0.676, + "step": 378 + }, + { + "epoch": 0.02, + "grad_norm": 1.107460856437683, + "learning_rate": 7.9957805907173e-06, + "loss": 0.6832, + "step": 379 + }, + { + "epoch": 0.02, + "grad_norm": 1.111391305923462, + "learning_rate": 8.016877637130802e-06, + "loss": 0.7273, + "step": 380 + }, + { + "epoch": 0.02, + "grad_norm": 0.9769160747528076, + "learning_rate": 8.037974683544305e-06, + "loss": 0.6351, + "step": 381 + }, + { + "epoch": 0.02, + "grad_norm": 1.040307879447937, + "learning_rate": 8.059071729957806e-06, + "loss": 0.6979, + "step": 382 + }, + { + "epoch": 0.02, + "grad_norm": 1.1144888401031494, + "learning_rate": 8.080168776371309e-06, + "loss": 0.733, + "step": 383 + }, + { + "epoch": 0.02, + "grad_norm": 0.9809250831604004, + "learning_rate": 8.10126582278481e-06, + "loss": 0.7225, + "step": 384 + }, + { + "epoch": 0.02, + "grad_norm": 1.014562964439392, + "learning_rate": 8.122362869198312e-06, + "loss": 0.6472, + "step": 385 + }, + { + "epoch": 0.02, + "grad_norm": 1.0310808420181274, + "learning_rate": 8.143459915611815e-06, + "loss": 0.7224, + "step": 386 + }, + { + "epoch": 0.02, + "grad_norm": 1.026007890701294, + "learning_rate": 8.164556962025318e-06, + "loss": 0.6932, + "step": 387 + }, + { + "epoch": 0.02, + "grad_norm": 1.062725305557251, + "learning_rate": 8.18565400843882e-06, + "loss": 0.7098, + "step": 388 + }, + { + "epoch": 0.02, + "grad_norm": 1.1098779439926147, + "learning_rate": 8.20675105485232e-06, + "loss": 0.6994, + "step": 389 + }, + { + "epoch": 0.02, + "grad_norm": 1.0307683944702148, + "learning_rate": 8.227848101265824e-06, + "loss": 0.784, + "step": 390 + }, + { + "epoch": 0.02, + "grad_norm": 1.0046746730804443, + "learning_rate": 8.248945147679327e-06, + "loss": 0.6689, + "step": 391 + }, + { + "epoch": 0.02, + "grad_norm": 1.090871810913086, + "learning_rate": 8.270042194092828e-06, + "loss": 0.6986, + "step": 392 + }, + { + "epoch": 0.02, + "grad_norm": 0.9817966222763062, + "learning_rate": 8.29113924050633e-06, + "loss": 0.6288, + "step": 393 + }, + { + "epoch": 0.02, + "grad_norm": 1.004731297492981, + "learning_rate": 8.31223628691983e-06, + "loss": 0.6876, + "step": 394 + }, + { + "epoch": 0.03, + "grad_norm": 0.9606344103813171, + "learning_rate": 8.333333333333334e-06, + "loss": 0.6722, + "step": 395 + }, + { + "epoch": 0.03, + "grad_norm": 0.8888660669326782, + "learning_rate": 8.354430379746837e-06, + "loss": 0.6257, + "step": 396 + }, + { + "epoch": 0.03, + "grad_norm": 1.0444602966308594, + "learning_rate": 8.375527426160338e-06, + "loss": 0.7283, + "step": 397 + }, + { + "epoch": 0.03, + "grad_norm": 1.0116087198257446, + "learning_rate": 8.39662447257384e-06, + "loss": 0.6951, + "step": 398 + }, + { + "epoch": 0.03, + "grad_norm": 0.9750226736068726, + "learning_rate": 8.417721518987342e-06, + "loss": 0.6172, + "step": 399 + }, + { + "epoch": 0.03, + "grad_norm": 0.9671033024787903, + "learning_rate": 8.438818565400846e-06, + "loss": 0.6415, + "step": 400 + }, + { + "epoch": 0.03, + "grad_norm": 1.0681933164596558, + "learning_rate": 8.459915611814347e-06, + "loss": 0.6908, + "step": 401 + }, + { + "epoch": 0.03, + "grad_norm": 1.0316442251205444, + "learning_rate": 8.481012658227848e-06, + "loss": 0.6733, + "step": 402 + }, + { + "epoch": 0.03, + "grad_norm": 1.077885389328003, + "learning_rate": 8.502109704641351e-06, + "loss": 0.7253, + "step": 403 + }, + { + "epoch": 0.03, + "grad_norm": 1.03955078125, + "learning_rate": 8.523206751054853e-06, + "loss": 0.6548, + "step": 404 + }, + { + "epoch": 0.03, + "grad_norm": 1.1187814474105835, + "learning_rate": 8.544303797468356e-06, + "loss": 0.6971, + "step": 405 + }, + { + "epoch": 0.03, + "grad_norm": 1.0932029485702515, + "learning_rate": 8.565400843881857e-06, + "loss": 0.7234, + "step": 406 + }, + { + "epoch": 0.03, + "grad_norm": 1.0664738416671753, + "learning_rate": 8.586497890295358e-06, + "loss": 0.7203, + "step": 407 + }, + { + "epoch": 0.03, + "grad_norm": 1.0771609544754028, + "learning_rate": 8.607594936708861e-06, + "loss": 0.6594, + "step": 408 + }, + { + "epoch": 0.03, + "grad_norm": 1.0817670822143555, + "learning_rate": 8.628691983122364e-06, + "loss": 0.7253, + "step": 409 + }, + { + "epoch": 0.03, + "grad_norm": 1.0627022981643677, + "learning_rate": 8.649789029535866e-06, + "loss": 0.7192, + "step": 410 + }, + { + "epoch": 0.03, + "grad_norm": 0.9740765690803528, + "learning_rate": 8.670886075949367e-06, + "loss": 0.7057, + "step": 411 + }, + { + "epoch": 0.03, + "grad_norm": 0.9944364428520203, + "learning_rate": 8.69198312236287e-06, + "loss": 0.7844, + "step": 412 + }, + { + "epoch": 0.03, + "grad_norm": 0.9714933633804321, + "learning_rate": 8.713080168776371e-06, + "loss": 0.7249, + "step": 413 + }, + { + "epoch": 0.03, + "grad_norm": 1.0184885263442993, + "learning_rate": 8.734177215189874e-06, + "loss": 0.647, + "step": 414 + }, + { + "epoch": 0.03, + "grad_norm": 1.0341577529907227, + "learning_rate": 8.755274261603376e-06, + "loss": 0.6934, + "step": 415 + }, + { + "epoch": 0.03, + "grad_norm": 1.0786633491516113, + "learning_rate": 8.776371308016879e-06, + "loss": 0.6879, + "step": 416 + }, + { + "epoch": 0.03, + "grad_norm": 1.0012767314910889, + "learning_rate": 8.79746835443038e-06, + "loss": 0.7064, + "step": 417 + }, + { + "epoch": 0.03, + "grad_norm": 1.0431938171386719, + "learning_rate": 8.818565400843883e-06, + "loss": 0.7309, + "step": 418 + }, + { + "epoch": 0.03, + "grad_norm": 0.9838118553161621, + "learning_rate": 8.839662447257385e-06, + "loss": 0.6781, + "step": 419 + }, + { + "epoch": 0.03, + "grad_norm": 1.0146209001541138, + "learning_rate": 8.860759493670886e-06, + "loss": 0.699, + "step": 420 + }, + { + "epoch": 0.03, + "grad_norm": 1.0076758861541748, + "learning_rate": 8.881856540084389e-06, + "loss": 0.7561, + "step": 421 + }, + { + "epoch": 0.03, + "grad_norm": 0.9734664559364319, + "learning_rate": 8.90295358649789e-06, + "loss": 0.6111, + "step": 422 + }, + { + "epoch": 0.03, + "grad_norm": 0.9639232158660889, + "learning_rate": 8.924050632911393e-06, + "loss": 0.6722, + "step": 423 + }, + { + "epoch": 0.03, + "grad_norm": 0.9442266225814819, + "learning_rate": 8.945147679324895e-06, + "loss": 0.6588, + "step": 424 + }, + { + "epoch": 0.03, + "grad_norm": 0.9883629083633423, + "learning_rate": 8.966244725738398e-06, + "loss": 0.6536, + "step": 425 + }, + { + "epoch": 0.03, + "grad_norm": 1.013269066810608, + "learning_rate": 8.987341772151899e-06, + "loss": 0.6572, + "step": 426 + }, + { + "epoch": 0.03, + "grad_norm": 1.1119914054870605, + "learning_rate": 9.008438818565402e-06, + "loss": 0.7522, + "step": 427 + }, + { + "epoch": 0.03, + "grad_norm": 1.1306815147399902, + "learning_rate": 9.029535864978903e-06, + "loss": 0.6473, + "step": 428 + }, + { + "epoch": 0.03, + "grad_norm": 1.0609233379364014, + "learning_rate": 9.050632911392407e-06, + "loss": 0.6881, + "step": 429 + }, + { + "epoch": 0.03, + "grad_norm": 1.033166527748108, + "learning_rate": 9.071729957805908e-06, + "loss": 0.7441, + "step": 430 + }, + { + "epoch": 0.03, + "grad_norm": 1.1041021347045898, + "learning_rate": 9.09282700421941e-06, + "loss": 0.7217, + "step": 431 + }, + { + "epoch": 0.03, + "grad_norm": 1.1988205909729004, + "learning_rate": 9.113924050632912e-06, + "loss": 0.7092, + "step": 432 + }, + { + "epoch": 0.03, + "grad_norm": 1.054067611694336, + "learning_rate": 9.135021097046414e-06, + "loss": 0.6541, + "step": 433 + }, + { + "epoch": 0.03, + "grad_norm": 0.967709481716156, + "learning_rate": 9.156118143459917e-06, + "loss": 0.6873, + "step": 434 + }, + { + "epoch": 0.03, + "grad_norm": 1.0264620780944824, + "learning_rate": 9.177215189873418e-06, + "loss": 0.6761, + "step": 435 + }, + { + "epoch": 0.03, + "grad_norm": 1.0361930131912231, + "learning_rate": 9.198312236286921e-06, + "loss": 0.646, + "step": 436 + }, + { + "epoch": 0.03, + "grad_norm": 0.9842200875282288, + "learning_rate": 9.219409282700422e-06, + "loss": 0.6717, + "step": 437 + }, + { + "epoch": 0.03, + "grad_norm": 1.0381004810333252, + "learning_rate": 9.240506329113925e-06, + "loss": 0.6861, + "step": 438 + }, + { + "epoch": 0.03, + "grad_norm": 1.0402230024337769, + "learning_rate": 9.261603375527427e-06, + "loss": 0.7462, + "step": 439 + }, + { + "epoch": 0.03, + "grad_norm": 1.0186004638671875, + "learning_rate": 9.28270042194093e-06, + "loss": 0.6941, + "step": 440 + }, + { + "epoch": 0.03, + "grad_norm": 0.9940155148506165, + "learning_rate": 9.303797468354431e-06, + "loss": 0.7359, + "step": 441 + }, + { + "epoch": 0.03, + "grad_norm": 1.0634286403656006, + "learning_rate": 9.324894514767934e-06, + "loss": 0.7119, + "step": 442 + }, + { + "epoch": 0.03, + "grad_norm": 1.0823959112167358, + "learning_rate": 9.345991561181435e-06, + "loss": 0.6938, + "step": 443 + }, + { + "epoch": 0.03, + "grad_norm": 1.054929256439209, + "learning_rate": 9.367088607594937e-06, + "loss": 0.6777, + "step": 444 + }, + { + "epoch": 0.03, + "grad_norm": 0.9768496751785278, + "learning_rate": 9.38818565400844e-06, + "loss": 0.6919, + "step": 445 + }, + { + "epoch": 0.03, + "grad_norm": 0.9983486533164978, + "learning_rate": 9.409282700421943e-06, + "loss": 0.6621, + "step": 446 + }, + { + "epoch": 0.03, + "grad_norm": 1.0734831094741821, + "learning_rate": 9.430379746835444e-06, + "loss": 0.7321, + "step": 447 + }, + { + "epoch": 0.03, + "grad_norm": 0.9543827772140503, + "learning_rate": 9.451476793248946e-06, + "loss": 0.68, + "step": 448 + }, + { + "epoch": 0.03, + "grad_norm": 0.9443538188934326, + "learning_rate": 9.472573839662449e-06, + "loss": 0.6456, + "step": 449 + }, + { + "epoch": 0.03, + "grad_norm": 1.0233805179595947, + "learning_rate": 9.49367088607595e-06, + "loss": 0.7291, + "step": 450 + }, + { + "epoch": 0.03, + "grad_norm": 0.9053292870521545, + "learning_rate": 9.514767932489453e-06, + "loss": 0.6394, + "step": 451 + }, + { + "epoch": 0.03, + "grad_norm": 0.9343140125274658, + "learning_rate": 9.535864978902954e-06, + "loss": 0.6988, + "step": 452 + }, + { + "epoch": 0.03, + "grad_norm": 1.014218807220459, + "learning_rate": 9.556962025316456e-06, + "loss": 0.6681, + "step": 453 + }, + { + "epoch": 0.03, + "grad_norm": 1.0326037406921387, + "learning_rate": 9.578059071729959e-06, + "loss": 0.7683, + "step": 454 + }, + { + "epoch": 0.03, + "grad_norm": 1.0093090534210205, + "learning_rate": 9.599156118143462e-06, + "loss": 0.7108, + "step": 455 + }, + { + "epoch": 0.03, + "grad_norm": 1.0127726793289185, + "learning_rate": 9.620253164556963e-06, + "loss": 0.7319, + "step": 456 + }, + { + "epoch": 0.03, + "grad_norm": 1.0509214401245117, + "learning_rate": 9.641350210970464e-06, + "loss": 0.7353, + "step": 457 + }, + { + "epoch": 0.03, + "grad_norm": 1.0279244184494019, + "learning_rate": 9.662447257383967e-06, + "loss": 0.675, + "step": 458 + }, + { + "epoch": 0.03, + "grad_norm": 1.001551866531372, + "learning_rate": 9.68354430379747e-06, + "loss": 0.619, + "step": 459 + }, + { + "epoch": 0.03, + "grad_norm": 1.1098026037216187, + "learning_rate": 9.704641350210972e-06, + "loss": 0.7643, + "step": 460 + }, + { + "epoch": 0.03, + "grad_norm": 1.007842779159546, + "learning_rate": 9.725738396624473e-06, + "loss": 0.6524, + "step": 461 + }, + { + "epoch": 0.03, + "grad_norm": 1.0313695669174194, + "learning_rate": 9.746835443037975e-06, + "loss": 0.6565, + "step": 462 + }, + { + "epoch": 0.03, + "grad_norm": 1.0242507457733154, + "learning_rate": 9.767932489451478e-06, + "loss": 0.7165, + "step": 463 + }, + { + "epoch": 0.03, + "grad_norm": 1.0414601564407349, + "learning_rate": 9.78902953586498e-06, + "loss": 0.6985, + "step": 464 + }, + { + "epoch": 0.03, + "grad_norm": 1.0553888082504272, + "learning_rate": 9.810126582278482e-06, + "loss": 0.7361, + "step": 465 + }, + { + "epoch": 0.03, + "grad_norm": 1.0666173696517944, + "learning_rate": 9.831223628691983e-06, + "loss": 0.681, + "step": 466 + }, + { + "epoch": 0.03, + "grad_norm": 0.92351233959198, + "learning_rate": 9.852320675105486e-06, + "loss": 0.6019, + "step": 467 + }, + { + "epoch": 0.03, + "grad_norm": 1.0312128067016602, + "learning_rate": 9.87341772151899e-06, + "loss": 0.7481, + "step": 468 + }, + { + "epoch": 0.03, + "grad_norm": 1.0440195798873901, + "learning_rate": 9.89451476793249e-06, + "loss": 0.6794, + "step": 469 + }, + { + "epoch": 0.03, + "grad_norm": 1.0302846431732178, + "learning_rate": 9.915611814345992e-06, + "loss": 0.6839, + "step": 470 + }, + { + "epoch": 0.03, + "grad_norm": 1.0816489458084106, + "learning_rate": 9.936708860759493e-06, + "loss": 0.7631, + "step": 471 + }, + { + "epoch": 0.03, + "grad_norm": 1.0711013078689575, + "learning_rate": 9.957805907172996e-06, + "loss": 0.7376, + "step": 472 + }, + { + "epoch": 0.03, + "grad_norm": 0.9822407960891724, + "learning_rate": 9.9789029535865e-06, + "loss": 0.7144, + "step": 473 + }, + { + "epoch": 0.03, + "grad_norm": 1.0089426040649414, + "learning_rate": 1e-05, + "loss": 0.7019, + "step": 474 + }, + { + "epoch": 0.03, + "grad_norm": 1.0129270553588867, + "learning_rate": 9.999999894733699e-06, + "loss": 0.7017, + "step": 475 + }, + { + "epoch": 0.03, + "grad_norm": 1.04535973072052, + "learning_rate": 9.999999578934793e-06, + "loss": 0.7501, + "step": 476 + }, + { + "epoch": 0.03, + "grad_norm": 0.9517323970794678, + "learning_rate": 9.9999990526033e-06, + "loss": 0.6579, + "step": 477 + }, + { + "epoch": 0.03, + "grad_norm": 1.0189645290374756, + "learning_rate": 9.99999831573924e-06, + "loss": 0.6476, + "step": 478 + }, + { + "epoch": 0.03, + "grad_norm": 1.0060005187988281, + "learning_rate": 9.999997368342644e-06, + "loss": 0.7177, + "step": 479 + }, + { + "epoch": 0.03, + "grad_norm": 1.0013384819030762, + "learning_rate": 9.999996210413553e-06, + "loss": 0.6951, + "step": 480 + }, + { + "epoch": 0.03, + "grad_norm": 1.0175272226333618, + "learning_rate": 9.999994841952016e-06, + "loss": 0.6826, + "step": 481 + }, + { + "epoch": 0.03, + "grad_norm": 1.017972707748413, + "learning_rate": 9.99999326295809e-06, + "loss": 0.7456, + "step": 482 + }, + { + "epoch": 0.03, + "grad_norm": 1.0452282428741455, + "learning_rate": 9.99999147343184e-06, + "loss": 0.6436, + "step": 483 + }, + { + "epoch": 0.03, + "grad_norm": 1.0681228637695312, + "learning_rate": 9.999989473373344e-06, + "loss": 0.6529, + "step": 484 + }, + { + "epoch": 0.03, + "grad_norm": 1.0214364528656006, + "learning_rate": 9.999987262782684e-06, + "loss": 0.6911, + "step": 485 + }, + { + "epoch": 0.03, + "grad_norm": 1.0358929634094238, + "learning_rate": 9.999984841659955e-06, + "loss": 0.7087, + "step": 486 + }, + { + "epoch": 0.03, + "grad_norm": 1.1352185010910034, + "learning_rate": 9.999982210005258e-06, + "loss": 0.674, + "step": 487 + }, + { + "epoch": 0.03, + "grad_norm": 0.992149293422699, + "learning_rate": 9.999979367818704e-06, + "loss": 0.6709, + "step": 488 + }, + { + "epoch": 0.03, + "grad_norm": 0.9382144808769226, + "learning_rate": 9.999976315100412e-06, + "loss": 0.6415, + "step": 489 + }, + { + "epoch": 0.03, + "grad_norm": 1.002973198890686, + "learning_rate": 9.99997305185051e-06, + "loss": 0.6603, + "step": 490 + }, + { + "epoch": 0.03, + "grad_norm": 1.0173821449279785, + "learning_rate": 9.999969578069137e-06, + "loss": 0.7378, + "step": 491 + }, + { + "epoch": 0.03, + "grad_norm": 1.0466855764389038, + "learning_rate": 9.99996589375644e-06, + "loss": 0.6665, + "step": 492 + }, + { + "epoch": 0.03, + "grad_norm": 1.0387037992477417, + "learning_rate": 9.999961998912573e-06, + "loss": 0.6855, + "step": 493 + }, + { + "epoch": 0.03, + "grad_norm": 1.0338493585586548, + "learning_rate": 9.999957893537697e-06, + "loss": 0.7131, + "step": 494 + }, + { + "epoch": 0.03, + "grad_norm": 1.038668155670166, + "learning_rate": 9.999953577631991e-06, + "loss": 0.725, + "step": 495 + }, + { + "epoch": 0.03, + "grad_norm": 1.1043860912322998, + "learning_rate": 9.999949051195631e-06, + "loss": 0.7042, + "step": 496 + }, + { + "epoch": 0.03, + "grad_norm": 0.9517979025840759, + "learning_rate": 9.999944314228811e-06, + "loss": 0.6591, + "step": 497 + }, + { + "epoch": 0.03, + "grad_norm": 0.9950364232063293, + "learning_rate": 9.99993936673173e-06, + "loss": 0.6464, + "step": 498 + }, + { + "epoch": 0.03, + "grad_norm": 0.9766575694084167, + "learning_rate": 9.999934208704595e-06, + "loss": 0.6531, + "step": 499 + }, + { + "epoch": 0.03, + "grad_norm": 0.9440507888793945, + "learning_rate": 9.999928840147624e-06, + "loss": 0.6836, + "step": 500 + }, + { + "epoch": 0.03, + "grad_norm": 0.9839968681335449, + "learning_rate": 9.999923261061043e-06, + "loss": 0.6737, + "step": 501 + }, + { + "epoch": 0.03, + "grad_norm": 0.9848559498786926, + "learning_rate": 9.999917471445086e-06, + "loss": 0.6518, + "step": 502 + }, + { + "epoch": 0.03, + "grad_norm": 1.0254359245300293, + "learning_rate": 9.999911471299998e-06, + "loss": 0.6916, + "step": 503 + }, + { + "epoch": 0.03, + "grad_norm": 1.0536845922470093, + "learning_rate": 9.999905260626033e-06, + "loss": 0.7099, + "step": 504 + }, + { + "epoch": 0.03, + "grad_norm": 1.1166943311691284, + "learning_rate": 9.99989883942345e-06, + "loss": 0.7506, + "step": 505 + }, + { + "epoch": 0.03, + "grad_norm": 1.0335949659347534, + "learning_rate": 9.999892207692521e-06, + "loss": 0.666, + "step": 506 + }, + { + "epoch": 0.03, + "grad_norm": 1.14674711227417, + "learning_rate": 9.999885365433523e-06, + "loss": 0.6926, + "step": 507 + }, + { + "epoch": 0.03, + "grad_norm": 1.104755163192749, + "learning_rate": 9.999878312646748e-06, + "loss": 0.7511, + "step": 508 + }, + { + "epoch": 0.03, + "grad_norm": 1.075617790222168, + "learning_rate": 9.999871049332488e-06, + "loss": 0.7566, + "step": 509 + }, + { + "epoch": 0.03, + "grad_norm": 1.0128673315048218, + "learning_rate": 9.999863575491053e-06, + "loss": 0.7372, + "step": 510 + }, + { + "epoch": 0.03, + "grad_norm": 1.0448622703552246, + "learning_rate": 9.999855891122754e-06, + "loss": 0.7545, + "step": 511 + }, + { + "epoch": 0.03, + "grad_norm": 1.0948630571365356, + "learning_rate": 9.999847996227918e-06, + "loss": 0.7373, + "step": 512 + }, + { + "epoch": 0.03, + "grad_norm": 1.0382819175720215, + "learning_rate": 9.999839890806877e-06, + "loss": 0.7146, + "step": 513 + }, + { + "epoch": 0.03, + "grad_norm": 1.0122886896133423, + "learning_rate": 9.99983157485997e-06, + "loss": 0.6998, + "step": 514 + }, + { + "epoch": 0.03, + "grad_norm": 1.016714334487915, + "learning_rate": 9.99982304838755e-06, + "loss": 0.685, + "step": 515 + }, + { + "epoch": 0.03, + "grad_norm": 1.0722309350967407, + "learning_rate": 9.999814311389973e-06, + "loss": 0.6384, + "step": 516 + }, + { + "epoch": 0.03, + "grad_norm": 1.0432019233703613, + "learning_rate": 9.99980536386761e-06, + "loss": 0.7179, + "step": 517 + }, + { + "epoch": 0.03, + "grad_norm": 0.9547827243804932, + "learning_rate": 9.999796205820835e-06, + "loss": 0.6755, + "step": 518 + }, + { + "epoch": 0.03, + "grad_norm": 1.041869878768921, + "learning_rate": 9.999786837250034e-06, + "loss": 0.6711, + "step": 519 + }, + { + "epoch": 0.03, + "grad_norm": 1.1629664897918701, + "learning_rate": 9.999777258155604e-06, + "loss": 0.7044, + "step": 520 + }, + { + "epoch": 0.03, + "grad_norm": 1.1089982986450195, + "learning_rate": 9.999767468537947e-06, + "loss": 0.6838, + "step": 521 + }, + { + "epoch": 0.03, + "grad_norm": 1.0850619077682495, + "learning_rate": 9.999757468397473e-06, + "loss": 0.703, + "step": 522 + }, + { + "epoch": 0.03, + "grad_norm": 0.9639663100242615, + "learning_rate": 9.999747257734605e-06, + "loss": 0.6591, + "step": 523 + }, + { + "epoch": 0.03, + "grad_norm": 1.1336872577667236, + "learning_rate": 9.999736836549773e-06, + "loss": 0.6886, + "step": 524 + }, + { + "epoch": 0.03, + "grad_norm": 0.9170753955841064, + "learning_rate": 9.999726204843417e-06, + "loss": 0.645, + "step": 525 + }, + { + "epoch": 0.03, + "grad_norm": 0.9949559569358826, + "learning_rate": 9.999715362615983e-06, + "loss": 0.6407, + "step": 526 + }, + { + "epoch": 0.03, + "grad_norm": 1.0204219818115234, + "learning_rate": 9.999704309867926e-06, + "loss": 0.7058, + "step": 527 + }, + { + "epoch": 0.03, + "grad_norm": 0.9866481423377991, + "learning_rate": 9.999693046599715e-06, + "loss": 0.635, + "step": 528 + }, + { + "epoch": 0.03, + "grad_norm": 1.0090922117233276, + "learning_rate": 9.99968157281182e-06, + "loss": 0.6621, + "step": 529 + }, + { + "epoch": 0.03, + "grad_norm": 1.0550429821014404, + "learning_rate": 9.999669888504731e-06, + "loss": 0.6731, + "step": 530 + }, + { + "epoch": 0.03, + "grad_norm": 0.9702327251434326, + "learning_rate": 9.999657993678932e-06, + "loss": 0.6619, + "step": 531 + }, + { + "epoch": 0.03, + "grad_norm": 0.999877393245697, + "learning_rate": 9.999645888334927e-06, + "loss": 0.7003, + "step": 532 + }, + { + "epoch": 0.03, + "grad_norm": 0.9928595423698425, + "learning_rate": 9.999633572473228e-06, + "loss": 0.7044, + "step": 533 + }, + { + "epoch": 0.03, + "grad_norm": 1.1135807037353516, + "learning_rate": 9.999621046094353e-06, + "loss": 0.7308, + "step": 534 + }, + { + "epoch": 0.03, + "grad_norm": 1.073427438735962, + "learning_rate": 9.999608309198827e-06, + "loss": 0.6319, + "step": 535 + }, + { + "epoch": 0.03, + "grad_norm": 1.006304144859314, + "learning_rate": 9.999595361787187e-06, + "loss": 0.6534, + "step": 536 + }, + { + "epoch": 0.03, + "grad_norm": 1.0457631349563599, + "learning_rate": 9.999582203859977e-06, + "loss": 0.6932, + "step": 537 + }, + { + "epoch": 0.03, + "grad_norm": 1.0090550184249878, + "learning_rate": 9.999568835417755e-06, + "loss": 0.6825, + "step": 538 + }, + { + "epoch": 0.03, + "grad_norm": 0.9906076192855835, + "learning_rate": 9.99955525646108e-06, + "loss": 0.7105, + "step": 539 + }, + { + "epoch": 0.03, + "grad_norm": 1.0822291374206543, + "learning_rate": 9.999541466990526e-06, + "loss": 0.6908, + "step": 540 + }, + { + "epoch": 0.03, + "grad_norm": 0.9858079552650452, + "learning_rate": 9.999527467006674e-06, + "loss": 0.6584, + "step": 541 + }, + { + "epoch": 0.03, + "grad_norm": 0.99437415599823, + "learning_rate": 9.999513256510112e-06, + "loss": 0.6743, + "step": 542 + }, + { + "epoch": 0.03, + "grad_norm": 1.1383122205734253, + "learning_rate": 9.999498835501438e-06, + "loss": 0.7183, + "step": 543 + }, + { + "epoch": 0.03, + "grad_norm": 1.0107368230819702, + "learning_rate": 9.99948420398126e-06, + "loss": 0.7656, + "step": 544 + }, + { + "epoch": 0.03, + "grad_norm": 1.0026836395263672, + "learning_rate": 9.999469361950195e-06, + "loss": 0.7228, + "step": 545 + }, + { + "epoch": 0.03, + "grad_norm": 0.9779771566390991, + "learning_rate": 9.999454309408868e-06, + "loss": 0.7003, + "step": 546 + }, + { + "epoch": 0.03, + "grad_norm": 1.0403019189834595, + "learning_rate": 9.999439046357908e-06, + "loss": 0.6832, + "step": 547 + }, + { + "epoch": 0.03, + "grad_norm": 0.9707220196723938, + "learning_rate": 9.999423572797964e-06, + "loss": 0.6621, + "step": 548 + }, + { + "epoch": 0.03, + "grad_norm": 1.0087053775787354, + "learning_rate": 9.999407888729686e-06, + "loss": 0.698, + "step": 549 + }, + { + "epoch": 0.03, + "grad_norm": 0.9814850091934204, + "learning_rate": 9.999391994153734e-06, + "loss": 0.6615, + "step": 550 + }, + { + "epoch": 0.03, + "grad_norm": 0.9962078332901001, + "learning_rate": 9.999375889070773e-06, + "loss": 0.6748, + "step": 551 + }, + { + "epoch": 0.03, + "grad_norm": 0.9213439226150513, + "learning_rate": 9.99935957348149e-06, + "loss": 0.6722, + "step": 552 + }, + { + "epoch": 0.04, + "grad_norm": 0.9168039560317993, + "learning_rate": 9.999343047386562e-06, + "loss": 0.6371, + "step": 553 + }, + { + "epoch": 0.04, + "grad_norm": 0.9854353666305542, + "learning_rate": 9.999326310786692e-06, + "loss": 0.6603, + "step": 554 + }, + { + "epoch": 0.04, + "grad_norm": 1.073123574256897, + "learning_rate": 9.999309363682582e-06, + "loss": 0.6385, + "step": 555 + }, + { + "epoch": 0.04, + "grad_norm": 1.007665753364563, + "learning_rate": 9.999292206074946e-06, + "loss": 0.6184, + "step": 556 + }, + { + "epoch": 0.04, + "grad_norm": 1.0501381158828735, + "learning_rate": 9.999274837964507e-06, + "loss": 0.6922, + "step": 557 + }, + { + "epoch": 0.04, + "grad_norm": 1.044360876083374, + "learning_rate": 9.999257259351995e-06, + "loss": 0.7034, + "step": 558 + }, + { + "epoch": 0.04, + "grad_norm": 0.970619261264801, + "learning_rate": 9.999239470238151e-06, + "loss": 0.6615, + "step": 559 + }, + { + "epoch": 0.04, + "grad_norm": 0.9886036515235901, + "learning_rate": 9.999221470623726e-06, + "loss": 0.6686, + "step": 560 + }, + { + "epoch": 0.04, + "grad_norm": 0.9825366735458374, + "learning_rate": 9.999203260509473e-06, + "loss": 0.6488, + "step": 561 + }, + { + "epoch": 0.04, + "grad_norm": 0.9640889167785645, + "learning_rate": 9.999184839896163e-06, + "loss": 0.7087, + "step": 562 + }, + { + "epoch": 0.04, + "grad_norm": 0.9821694493293762, + "learning_rate": 9.99916620878457e-06, + "loss": 0.6697, + "step": 563 + }, + { + "epoch": 0.04, + "grad_norm": 0.999758780002594, + "learning_rate": 9.99914736717548e-06, + "loss": 0.7016, + "step": 564 + }, + { + "epoch": 0.04, + "grad_norm": 0.9824835658073425, + "learning_rate": 9.999128315069684e-06, + "loss": 0.6894, + "step": 565 + }, + { + "epoch": 0.04, + "grad_norm": 1.0214180946350098, + "learning_rate": 9.999109052467986e-06, + "loss": 0.689, + "step": 566 + }, + { + "epoch": 0.04, + "grad_norm": 0.9945915341377258, + "learning_rate": 9.999089579371195e-06, + "loss": 0.673, + "step": 567 + }, + { + "epoch": 0.04, + "grad_norm": 1.036805272102356, + "learning_rate": 9.999069895780133e-06, + "loss": 0.6567, + "step": 568 + }, + { + "epoch": 0.04, + "grad_norm": 1.0524154901504517, + "learning_rate": 9.99905000169563e-06, + "loss": 0.6371, + "step": 569 + }, + { + "epoch": 0.04, + "grad_norm": 1.024978518486023, + "learning_rate": 9.99902989711852e-06, + "loss": 0.607, + "step": 570 + }, + { + "epoch": 0.04, + "grad_norm": 0.9848310351371765, + "learning_rate": 9.99900958204965e-06, + "loss": 0.6629, + "step": 571 + }, + { + "epoch": 0.04, + "grad_norm": 0.9815685749053955, + "learning_rate": 9.99898905648988e-06, + "loss": 0.66, + "step": 572 + }, + { + "epoch": 0.04, + "grad_norm": 1.0205934047698975, + "learning_rate": 9.998968320440068e-06, + "loss": 0.6797, + "step": 573 + }, + { + "epoch": 0.04, + "grad_norm": 0.999477207660675, + "learning_rate": 9.998947373901092e-06, + "loss": 0.6605, + "step": 574 + }, + { + "epoch": 0.04, + "grad_norm": 0.8807664513587952, + "learning_rate": 9.998926216873833e-06, + "loss": 0.6118, + "step": 575 + }, + { + "epoch": 0.04, + "grad_norm": 0.9839978218078613, + "learning_rate": 9.998904849359179e-06, + "loss": 0.7242, + "step": 576 + }, + { + "epoch": 0.04, + "grad_norm": 1.015552043914795, + "learning_rate": 9.998883271358033e-06, + "loss": 0.6737, + "step": 577 + }, + { + "epoch": 0.04, + "grad_norm": 0.9544459581375122, + "learning_rate": 9.998861482871303e-06, + "loss": 0.654, + "step": 578 + }, + { + "epoch": 0.04, + "grad_norm": 1.0526336431503296, + "learning_rate": 9.998839483899904e-06, + "loss": 0.6855, + "step": 579 + }, + { + "epoch": 0.04, + "grad_norm": 0.9297081828117371, + "learning_rate": 9.998817274444765e-06, + "loss": 0.671, + "step": 580 + }, + { + "epoch": 0.04, + "grad_norm": 0.9545259475708008, + "learning_rate": 9.998794854506819e-06, + "loss": 0.6331, + "step": 581 + }, + { + "epoch": 0.04, + "grad_norm": 0.94922935962677, + "learning_rate": 9.998772224087011e-06, + "loss": 0.6739, + "step": 582 + }, + { + "epoch": 0.04, + "grad_norm": 0.9858238101005554, + "learning_rate": 9.998749383186296e-06, + "loss": 0.6402, + "step": 583 + }, + { + "epoch": 0.04, + "grad_norm": 1.0650100708007812, + "learning_rate": 9.998726331805632e-06, + "loss": 0.646, + "step": 584 + }, + { + "epoch": 0.04, + "grad_norm": 1.0530078411102295, + "learning_rate": 9.998703069945995e-06, + "loss": 0.6362, + "step": 585 + }, + { + "epoch": 0.04, + "grad_norm": 1.036247968673706, + "learning_rate": 9.998679597608357e-06, + "loss": 0.7401, + "step": 586 + }, + { + "epoch": 0.04, + "grad_norm": 0.990145206451416, + "learning_rate": 9.998655914793711e-06, + "loss": 0.6178, + "step": 587 + }, + { + "epoch": 0.04, + "grad_norm": 0.988255500793457, + "learning_rate": 9.998632021503055e-06, + "loss": 0.6291, + "step": 588 + }, + { + "epoch": 0.04, + "grad_norm": 0.9244970083236694, + "learning_rate": 9.998607917737393e-06, + "loss": 0.6075, + "step": 589 + }, + { + "epoch": 0.04, + "grad_norm": 1.020477056503296, + "learning_rate": 9.99858360349774e-06, + "loss": 0.6414, + "step": 590 + }, + { + "epoch": 0.04, + "grad_norm": 1.138732671737671, + "learning_rate": 9.99855907878512e-06, + "loss": 0.6744, + "step": 591 + }, + { + "epoch": 0.04, + "grad_norm": 1.045698881149292, + "learning_rate": 9.998534343600567e-06, + "loss": 0.6219, + "step": 592 + }, + { + "epoch": 0.04, + "grad_norm": 0.9490424990653992, + "learning_rate": 9.99850939794512e-06, + "loss": 0.6726, + "step": 593 + }, + { + "epoch": 0.04, + "grad_norm": 0.944223165512085, + "learning_rate": 9.998484241819833e-06, + "loss": 0.6593, + "step": 594 + }, + { + "epoch": 0.04, + "grad_norm": 0.941078245639801, + "learning_rate": 9.99845887522576e-06, + "loss": 0.6168, + "step": 595 + }, + { + "epoch": 0.04, + "grad_norm": 0.9710420966148376, + "learning_rate": 9.998433298163974e-06, + "loss": 0.7174, + "step": 596 + }, + { + "epoch": 0.04, + "grad_norm": 0.958227276802063, + "learning_rate": 9.99840751063555e-06, + "loss": 0.6664, + "step": 597 + }, + { + "epoch": 0.04, + "grad_norm": 1.0510207414627075, + "learning_rate": 9.998381512641574e-06, + "loss": 0.6356, + "step": 598 + }, + { + "epoch": 0.04, + "grad_norm": 1.0442863702774048, + "learning_rate": 9.99835530418314e-06, + "loss": 0.6988, + "step": 599 + }, + { + "epoch": 0.04, + "grad_norm": 1.0224828720092773, + "learning_rate": 9.998328885261352e-06, + "loss": 0.7323, + "step": 600 + }, + { + "epoch": 0.04, + "grad_norm": 1.007456660270691, + "learning_rate": 9.998302255877323e-06, + "loss": 0.7505, + "step": 601 + }, + { + "epoch": 0.04, + "grad_norm": 1.0247341394424438, + "learning_rate": 9.998275416032176e-06, + "loss": 0.6373, + "step": 602 + }, + { + "epoch": 0.04, + "grad_norm": 0.9564207792282104, + "learning_rate": 9.998248365727037e-06, + "loss": 0.6353, + "step": 603 + }, + { + "epoch": 0.04, + "grad_norm": 1.015875220298767, + "learning_rate": 9.998221104963047e-06, + "loss": 0.6737, + "step": 604 + }, + { + "epoch": 0.04, + "grad_norm": 0.9237945079803467, + "learning_rate": 9.998193633741353e-06, + "loss": 0.63, + "step": 605 + }, + { + "epoch": 0.04, + "grad_norm": 0.961380660533905, + "learning_rate": 9.998165952063113e-06, + "loss": 0.6423, + "step": 606 + }, + { + "epoch": 0.04, + "grad_norm": 1.033104658126831, + "learning_rate": 9.998138059929493e-06, + "loss": 0.6457, + "step": 607 + }, + { + "epoch": 0.04, + "grad_norm": 0.9923886060714722, + "learning_rate": 9.998109957341665e-06, + "loss": 0.6845, + "step": 608 + }, + { + "epoch": 0.04, + "grad_norm": 1.0205433368682861, + "learning_rate": 9.998081644300815e-06, + "loss": 0.6861, + "step": 609 + }, + { + "epoch": 0.04, + "grad_norm": 1.0005221366882324, + "learning_rate": 9.998053120808133e-06, + "loss": 0.6066, + "step": 610 + }, + { + "epoch": 0.04, + "grad_norm": 1.06248140335083, + "learning_rate": 9.998024386864821e-06, + "loss": 0.6687, + "step": 611 + }, + { + "epoch": 0.04, + "grad_norm": 0.9576296806335449, + "learning_rate": 9.99799544247209e-06, + "loss": 0.6272, + "step": 612 + }, + { + "epoch": 0.04, + "grad_norm": 1.0306636095046997, + "learning_rate": 9.997966287631157e-06, + "loss": 0.6418, + "step": 613 + }, + { + "epoch": 0.04, + "grad_norm": 0.9254865050315857, + "learning_rate": 9.997936922343253e-06, + "loss": 0.6598, + "step": 614 + }, + { + "epoch": 0.04, + "grad_norm": 1.0606073141098022, + "learning_rate": 9.997907346609608e-06, + "loss": 0.6819, + "step": 615 + }, + { + "epoch": 0.04, + "grad_norm": 0.9714949727058411, + "learning_rate": 9.997877560431472e-06, + "loss": 0.6894, + "step": 616 + }, + { + "epoch": 0.04, + "grad_norm": 0.9910696148872375, + "learning_rate": 9.9978475638101e-06, + "loss": 0.6925, + "step": 617 + }, + { + "epoch": 0.04, + "grad_norm": 0.986289918422699, + "learning_rate": 9.997817356746751e-06, + "loss": 0.6504, + "step": 618 + }, + { + "epoch": 0.04, + "grad_norm": 0.9627223014831543, + "learning_rate": 9.9977869392427e-06, + "loss": 0.6327, + "step": 619 + }, + { + "epoch": 0.04, + "grad_norm": 1.0320593118667603, + "learning_rate": 9.997756311299229e-06, + "loss": 0.711, + "step": 620 + }, + { + "epoch": 0.04, + "grad_norm": 1.0085593461990356, + "learning_rate": 9.997725472917623e-06, + "loss": 0.6989, + "step": 621 + }, + { + "epoch": 0.04, + "grad_norm": 0.9825278520584106, + "learning_rate": 9.997694424099184e-06, + "loss": 0.6127, + "step": 622 + }, + { + "epoch": 0.04, + "grad_norm": 0.9646775126457214, + "learning_rate": 9.99766316484522e-06, + "loss": 0.6, + "step": 623 + }, + { + "epoch": 0.04, + "grad_norm": 0.963337242603302, + "learning_rate": 9.997631695157043e-06, + "loss": 0.668, + "step": 624 + }, + { + "epoch": 0.04, + "grad_norm": 0.9412251710891724, + "learning_rate": 9.997600015035982e-06, + "loss": 0.7373, + "step": 625 + }, + { + "epoch": 0.04, + "grad_norm": 0.9189504981040955, + "learning_rate": 9.99756812448337e-06, + "loss": 0.6178, + "step": 626 + }, + { + "epoch": 0.04, + "grad_norm": 1.0134235620498657, + "learning_rate": 9.99753602350055e-06, + "loss": 0.7062, + "step": 627 + }, + { + "epoch": 0.04, + "grad_norm": 0.9411819577217102, + "learning_rate": 9.997503712088873e-06, + "loss": 0.6587, + "step": 628 + }, + { + "epoch": 0.04, + "grad_norm": 1.1199616193771362, + "learning_rate": 9.9974711902497e-06, + "loss": 0.7078, + "step": 629 + }, + { + "epoch": 0.04, + "grad_norm": 1.0165560245513916, + "learning_rate": 9.997438457984398e-06, + "loss": 0.7244, + "step": 630 + }, + { + "epoch": 0.04, + "grad_norm": 0.9950897097587585, + "learning_rate": 9.997405515294349e-06, + "loss": 0.6421, + "step": 631 + }, + { + "epoch": 0.04, + "grad_norm": 1.0164552927017212, + "learning_rate": 9.99737236218094e-06, + "loss": 0.7025, + "step": 632 + }, + { + "epoch": 0.04, + "grad_norm": 0.993774950504303, + "learning_rate": 9.997338998645562e-06, + "loss": 0.6816, + "step": 633 + }, + { + "epoch": 0.04, + "grad_norm": 1.4380717277526855, + "learning_rate": 9.997305424689626e-06, + "loss": 0.6834, + "step": 634 + }, + { + "epoch": 0.04, + "grad_norm": 0.9469321966171265, + "learning_rate": 9.997271640314542e-06, + "loss": 0.6764, + "step": 635 + }, + { + "epoch": 0.04, + "grad_norm": 0.992761492729187, + "learning_rate": 9.997237645521733e-06, + "loss": 0.6855, + "step": 636 + }, + { + "epoch": 0.04, + "grad_norm": 0.998432993888855, + "learning_rate": 9.997203440312632e-06, + "loss": 0.6508, + "step": 637 + }, + { + "epoch": 0.04, + "grad_norm": 1.0937362909317017, + "learning_rate": 9.997169024688678e-06, + "loss": 0.6324, + "step": 638 + }, + { + "epoch": 0.04, + "grad_norm": 0.9640938639640808, + "learning_rate": 9.997134398651318e-06, + "loss": 0.644, + "step": 639 + }, + { + "epoch": 0.04, + "grad_norm": 0.98480224609375, + "learning_rate": 9.997099562202015e-06, + "loss": 0.6983, + "step": 640 + }, + { + "epoch": 0.04, + "grad_norm": 0.9848492741584778, + "learning_rate": 9.997064515342232e-06, + "loss": 0.6504, + "step": 641 + }, + { + "epoch": 0.04, + "grad_norm": 1.068900465965271, + "learning_rate": 9.997029258073445e-06, + "loss": 0.6797, + "step": 642 + }, + { + "epoch": 0.04, + "grad_norm": 0.9294676780700684, + "learning_rate": 9.99699379039714e-06, + "loss": 0.6919, + "step": 643 + }, + { + "epoch": 0.04, + "grad_norm": 1.054355263710022, + "learning_rate": 9.996958112314811e-06, + "loss": 0.6627, + "step": 644 + }, + { + "epoch": 0.04, + "grad_norm": 0.9877650737762451, + "learning_rate": 9.996922223827958e-06, + "loss": 0.6641, + "step": 645 + }, + { + "epoch": 0.04, + "grad_norm": 0.9646854400634766, + "learning_rate": 9.996886124938092e-06, + "loss": 0.689, + "step": 646 + }, + { + "epoch": 0.04, + "grad_norm": 0.9384362101554871, + "learning_rate": 9.996849815646736e-06, + "loss": 0.6542, + "step": 647 + }, + { + "epoch": 0.04, + "grad_norm": 0.9519203305244446, + "learning_rate": 9.996813295955417e-06, + "loss": 0.6862, + "step": 648 + }, + { + "epoch": 0.04, + "grad_norm": 0.9722259640693665, + "learning_rate": 9.996776565865671e-06, + "loss": 0.647, + "step": 649 + }, + { + "epoch": 0.04, + "grad_norm": 0.9623055458068848, + "learning_rate": 9.996739625379049e-06, + "loss": 0.66, + "step": 650 + }, + { + "epoch": 0.04, + "grad_norm": 1.0324175357818604, + "learning_rate": 9.9967024744971e-06, + "loss": 0.6562, + "step": 651 + }, + { + "epoch": 0.04, + "grad_norm": 0.9541144371032715, + "learning_rate": 9.996665113221396e-06, + "loss": 0.6685, + "step": 652 + }, + { + "epoch": 0.04, + "grad_norm": 0.9493923187255859, + "learning_rate": 9.996627541553504e-06, + "loss": 0.6545, + "step": 653 + }, + { + "epoch": 0.04, + "grad_norm": 0.8991278409957886, + "learning_rate": 9.996589759495008e-06, + "loss": 0.627, + "step": 654 + }, + { + "epoch": 0.04, + "grad_norm": 1.066519021987915, + "learning_rate": 9.9965517670475e-06, + "loss": 0.7353, + "step": 655 + }, + { + "epoch": 0.04, + "grad_norm": 1.0149874687194824, + "learning_rate": 9.996513564212577e-06, + "loss": 0.6611, + "step": 656 + }, + { + "epoch": 0.04, + "grad_norm": 0.9408120512962341, + "learning_rate": 9.996475150991852e-06, + "loss": 0.6596, + "step": 657 + }, + { + "epoch": 0.04, + "grad_norm": 1.0212035179138184, + "learning_rate": 9.99643652738694e-06, + "loss": 0.7073, + "step": 658 + }, + { + "epoch": 0.04, + "grad_norm": 1.066163182258606, + "learning_rate": 9.996397693399465e-06, + "loss": 0.6822, + "step": 659 + }, + { + "epoch": 0.04, + "grad_norm": 0.9859758615493774, + "learning_rate": 9.996358649031066e-06, + "loss": 0.6691, + "step": 660 + }, + { + "epoch": 0.04, + "grad_norm": 0.9595489501953125, + "learning_rate": 9.996319394283384e-06, + "loss": 0.6466, + "step": 661 + }, + { + "epoch": 0.04, + "grad_norm": 1.0166726112365723, + "learning_rate": 9.996279929158074e-06, + "loss": 0.7591, + "step": 662 + }, + { + "epoch": 0.04, + "grad_norm": 1.0264617204666138, + "learning_rate": 9.996240253656796e-06, + "loss": 0.6632, + "step": 663 + }, + { + "epoch": 0.04, + "grad_norm": 0.9634756445884705, + "learning_rate": 9.996200367781224e-06, + "loss": 0.6782, + "step": 664 + }, + { + "epoch": 0.04, + "grad_norm": 0.9677413105964661, + "learning_rate": 9.996160271533033e-06, + "loss": 0.6686, + "step": 665 + }, + { + "epoch": 0.04, + "grad_norm": 1.0524028539657593, + "learning_rate": 9.996119964913914e-06, + "loss": 0.6577, + "step": 666 + }, + { + "epoch": 0.04, + "grad_norm": 1.0887914896011353, + "learning_rate": 9.996079447925563e-06, + "loss": 0.7091, + "step": 667 + }, + { + "epoch": 0.04, + "grad_norm": 1.1000274419784546, + "learning_rate": 9.996038720569688e-06, + "loss": 0.6642, + "step": 668 + }, + { + "epoch": 0.04, + "grad_norm": 0.8987544775009155, + "learning_rate": 9.995997782848e-06, + "loss": 0.6831, + "step": 669 + }, + { + "epoch": 0.04, + "grad_norm": 0.9039768576622009, + "learning_rate": 9.995956634762227e-06, + "loss": 0.6322, + "step": 670 + }, + { + "epoch": 0.04, + "grad_norm": 0.9678673148155212, + "learning_rate": 9.995915276314099e-06, + "loss": 0.6755, + "step": 671 + }, + { + "epoch": 0.04, + "grad_norm": 1.0128499269485474, + "learning_rate": 9.995873707505358e-06, + "loss": 0.6625, + "step": 672 + }, + { + "epoch": 0.04, + "grad_norm": 0.9772645235061646, + "learning_rate": 9.995831928337756e-06, + "loss": 0.6798, + "step": 673 + }, + { + "epoch": 0.04, + "grad_norm": 0.9836506843566895, + "learning_rate": 9.99578993881305e-06, + "loss": 0.7197, + "step": 674 + }, + { + "epoch": 0.04, + "grad_norm": 1.006994605064392, + "learning_rate": 9.995747738933009e-06, + "loss": 0.6148, + "step": 675 + }, + { + "epoch": 0.04, + "grad_norm": 0.9389720559120178, + "learning_rate": 9.995705328699408e-06, + "loss": 0.7033, + "step": 676 + }, + { + "epoch": 0.04, + "grad_norm": 1.027573823928833, + "learning_rate": 9.995662708114036e-06, + "loss": 0.7308, + "step": 677 + }, + { + "epoch": 0.04, + "grad_norm": 1.0021096467971802, + "learning_rate": 9.995619877178685e-06, + "loss": 0.6952, + "step": 678 + }, + { + "epoch": 0.04, + "grad_norm": 0.9737790822982788, + "learning_rate": 9.99557683589516e-06, + "loss": 0.741, + "step": 679 + }, + { + "epoch": 0.04, + "grad_norm": 0.9685630202293396, + "learning_rate": 9.995533584265273e-06, + "loss": 0.6778, + "step": 680 + }, + { + "epoch": 0.04, + "grad_norm": 1.057919979095459, + "learning_rate": 9.995490122290845e-06, + "loss": 0.7612, + "step": 681 + }, + { + "epoch": 0.04, + "grad_norm": 0.9825607538223267, + "learning_rate": 9.995446449973705e-06, + "loss": 0.6765, + "step": 682 + }, + { + "epoch": 0.04, + "grad_norm": 0.9538717269897461, + "learning_rate": 9.995402567315695e-06, + "loss": 0.6369, + "step": 683 + }, + { + "epoch": 0.04, + "grad_norm": 0.9630232453346252, + "learning_rate": 9.99535847431866e-06, + "loss": 0.6993, + "step": 684 + }, + { + "epoch": 0.04, + "grad_norm": 0.9161614179611206, + "learning_rate": 9.995314170984457e-06, + "loss": 0.6651, + "step": 685 + }, + { + "epoch": 0.04, + "grad_norm": 1.0125501155853271, + "learning_rate": 9.99526965731495e-06, + "loss": 0.6547, + "step": 686 + }, + { + "epoch": 0.04, + "grad_norm": 0.9450063109397888, + "learning_rate": 9.995224933312016e-06, + "loss": 0.6821, + "step": 687 + }, + { + "epoch": 0.04, + "grad_norm": 0.9932665228843689, + "learning_rate": 9.995179998977537e-06, + "loss": 0.6668, + "step": 688 + }, + { + "epoch": 0.04, + "grad_norm": 1.0881638526916504, + "learning_rate": 9.995134854313407e-06, + "loss": 0.6555, + "step": 689 + }, + { + "epoch": 0.04, + "grad_norm": 1.0094410181045532, + "learning_rate": 9.995089499321521e-06, + "loss": 0.7013, + "step": 690 + }, + { + "epoch": 0.04, + "grad_norm": 0.9489861726760864, + "learning_rate": 9.995043934003796e-06, + "loss": 0.6614, + "step": 691 + }, + { + "epoch": 0.04, + "grad_norm": 0.9359292984008789, + "learning_rate": 9.994998158362148e-06, + "loss": 0.6638, + "step": 692 + }, + { + "epoch": 0.04, + "grad_norm": 1.031830906867981, + "learning_rate": 9.994952172398502e-06, + "loss": 0.7097, + "step": 693 + }, + { + "epoch": 0.04, + "grad_norm": 0.9993519186973572, + "learning_rate": 9.994905976114799e-06, + "loss": 0.7518, + "step": 694 + }, + { + "epoch": 0.04, + "grad_norm": 0.9693325757980347, + "learning_rate": 9.994859569512978e-06, + "loss": 0.6672, + "step": 695 + }, + { + "epoch": 0.04, + "grad_norm": 0.9676175117492676, + "learning_rate": 9.994812952594998e-06, + "loss": 0.6357, + "step": 696 + }, + { + "epoch": 0.04, + "grad_norm": 0.9201942682266235, + "learning_rate": 9.994766125362821e-06, + "loss": 0.6413, + "step": 697 + }, + { + "epoch": 0.04, + "grad_norm": 0.9743843674659729, + "learning_rate": 9.994719087818416e-06, + "loss": 0.6913, + "step": 698 + }, + { + "epoch": 0.04, + "grad_norm": 0.9933353066444397, + "learning_rate": 9.994671839963766e-06, + "loss": 0.645, + "step": 699 + }, + { + "epoch": 0.04, + "grad_norm": 0.9530336856842041, + "learning_rate": 9.994624381800861e-06, + "loss": 0.6292, + "step": 700 + }, + { + "epoch": 0.04, + "grad_norm": 0.9179254770278931, + "learning_rate": 9.994576713331699e-06, + "loss": 0.6347, + "step": 701 + }, + { + "epoch": 0.04, + "grad_norm": 1.0408787727355957, + "learning_rate": 9.994528834558285e-06, + "loss": 0.7041, + "step": 702 + }, + { + "epoch": 0.04, + "grad_norm": 0.9889572262763977, + "learning_rate": 9.994480745482636e-06, + "loss": 0.7333, + "step": 703 + }, + { + "epoch": 0.04, + "grad_norm": 0.9241334795951843, + "learning_rate": 9.99443244610678e-06, + "loss": 0.6409, + "step": 704 + }, + { + "epoch": 0.04, + "grad_norm": 0.9185009598731995, + "learning_rate": 9.994383936432745e-06, + "loss": 0.6136, + "step": 705 + }, + { + "epoch": 0.04, + "grad_norm": 1.019323468208313, + "learning_rate": 9.994335216462579e-06, + "loss": 0.6535, + "step": 706 + }, + { + "epoch": 0.04, + "grad_norm": 0.9755436182022095, + "learning_rate": 9.99428628619833e-06, + "loss": 0.666, + "step": 707 + }, + { + "epoch": 0.04, + "grad_norm": 1.0832858085632324, + "learning_rate": 9.994237145642058e-06, + "loss": 0.731, + "step": 708 + }, + { + "epoch": 0.04, + "grad_norm": 1.036895513534546, + "learning_rate": 9.994187794795835e-06, + "loss": 0.6424, + "step": 709 + }, + { + "epoch": 0.04, + "grad_norm": 1.0125101804733276, + "learning_rate": 9.994138233661737e-06, + "loss": 0.7033, + "step": 710 + }, + { + "epoch": 0.05, + "grad_norm": 0.9705720543861389, + "learning_rate": 9.994088462241851e-06, + "loss": 0.7013, + "step": 711 + }, + { + "epoch": 0.05, + "grad_norm": 0.9837139248847961, + "learning_rate": 9.994038480538274e-06, + "loss": 0.6626, + "step": 712 + }, + { + "epoch": 0.05, + "grad_norm": 1.0453715324401855, + "learning_rate": 9.993988288553109e-06, + "loss": 0.6823, + "step": 713 + }, + { + "epoch": 0.05, + "grad_norm": 0.9410306811332703, + "learning_rate": 9.993937886288471e-06, + "loss": 0.6091, + "step": 714 + }, + { + "epoch": 0.05, + "grad_norm": 1.074299693107605, + "learning_rate": 9.99388727374648e-06, + "loss": 0.7144, + "step": 715 + }, + { + "epoch": 0.05, + "grad_norm": 0.9971576929092407, + "learning_rate": 9.993836450929268e-06, + "loss": 0.6564, + "step": 716 + }, + { + "epoch": 0.05, + "grad_norm": 0.9908046126365662, + "learning_rate": 9.993785417838978e-06, + "loss": 0.5828, + "step": 717 + }, + { + "epoch": 0.05, + "grad_norm": 0.985888659954071, + "learning_rate": 9.993734174477752e-06, + "loss": 0.6428, + "step": 718 + }, + { + "epoch": 0.05, + "grad_norm": 0.9888742566108704, + "learning_rate": 9.993682720847755e-06, + "loss": 0.6407, + "step": 719 + }, + { + "epoch": 0.05, + "grad_norm": 1.1355136632919312, + "learning_rate": 9.99363105695115e-06, + "loss": 0.6762, + "step": 720 + }, + { + "epoch": 0.05, + "grad_norm": 1.0007946491241455, + "learning_rate": 9.993579182790111e-06, + "loss": 0.6832, + "step": 721 + }, + { + "epoch": 0.05, + "grad_norm": 0.9047017097473145, + "learning_rate": 9.993527098366826e-06, + "loss": 0.6187, + "step": 722 + }, + { + "epoch": 0.05, + "grad_norm": 1.020936369895935, + "learning_rate": 9.993474803683486e-06, + "loss": 0.7007, + "step": 723 + }, + { + "epoch": 0.05, + "grad_norm": 1.0052990913391113, + "learning_rate": 9.993422298742293e-06, + "loss": 0.6472, + "step": 724 + }, + { + "epoch": 0.05, + "grad_norm": 0.9748853445053101, + "learning_rate": 9.993369583545456e-06, + "loss": 0.6705, + "step": 725 + }, + { + "epoch": 0.05, + "grad_norm": 1.0031288862228394, + "learning_rate": 9.993316658095198e-06, + "loss": 0.6891, + "step": 726 + }, + { + "epoch": 0.05, + "grad_norm": 1.018814206123352, + "learning_rate": 9.993263522393745e-06, + "loss": 0.7134, + "step": 727 + }, + { + "epoch": 0.05, + "grad_norm": 0.9574296474456787, + "learning_rate": 9.993210176443338e-06, + "loss": 0.6798, + "step": 728 + }, + { + "epoch": 0.05, + "grad_norm": 1.0445259809494019, + "learning_rate": 9.993156620246219e-06, + "loss": 0.7278, + "step": 729 + }, + { + "epoch": 0.05, + "grad_norm": 0.9974734783172607, + "learning_rate": 9.993102853804643e-06, + "loss": 0.687, + "step": 730 + }, + { + "epoch": 0.05, + "grad_norm": 0.9887290000915527, + "learning_rate": 9.993048877120876e-06, + "loss": 0.6525, + "step": 731 + }, + { + "epoch": 0.05, + "grad_norm": 1.0144176483154297, + "learning_rate": 9.992994690197192e-06, + "loss": 0.6687, + "step": 732 + }, + { + "epoch": 0.05, + "grad_norm": 0.9862350225448608, + "learning_rate": 9.992940293035871e-06, + "loss": 0.6795, + "step": 733 + }, + { + "epoch": 0.05, + "grad_norm": 0.9350804090499878, + "learning_rate": 9.992885685639203e-06, + "loss": 0.6152, + "step": 734 + }, + { + "epoch": 0.05, + "grad_norm": 0.937683641910553, + "learning_rate": 9.992830868009487e-06, + "loss": 0.6963, + "step": 735 + }, + { + "epoch": 0.05, + "grad_norm": 0.9010510444641113, + "learning_rate": 9.992775840149031e-06, + "loss": 0.6196, + "step": 736 + }, + { + "epoch": 0.05, + "grad_norm": 0.9523539543151855, + "learning_rate": 9.992720602060155e-06, + "loss": 0.6837, + "step": 737 + }, + { + "epoch": 0.05, + "grad_norm": 1.0255656242370605, + "learning_rate": 9.992665153745182e-06, + "loss": 0.7196, + "step": 738 + }, + { + "epoch": 0.05, + "grad_norm": 0.9392181038856506, + "learning_rate": 9.992609495206448e-06, + "loss": 0.7169, + "step": 739 + }, + { + "epoch": 0.05, + "grad_norm": 0.9734467267990112, + "learning_rate": 9.992553626446296e-06, + "loss": 0.6623, + "step": 740 + }, + { + "epoch": 0.05, + "grad_norm": 1.0188223123550415, + "learning_rate": 9.992497547467079e-06, + "loss": 0.7326, + "step": 741 + }, + { + "epoch": 0.05, + "grad_norm": 0.9624093770980835, + "learning_rate": 9.992441258271157e-06, + "loss": 0.6977, + "step": 742 + }, + { + "epoch": 0.05, + "grad_norm": 1.082211971282959, + "learning_rate": 9.992384758860902e-06, + "loss": 0.6825, + "step": 743 + }, + { + "epoch": 0.05, + "grad_norm": 0.9943745732307434, + "learning_rate": 9.99232804923869e-06, + "loss": 0.6755, + "step": 744 + }, + { + "epoch": 0.05, + "grad_norm": 1.016251564025879, + "learning_rate": 9.992271129406914e-06, + "loss": 0.6997, + "step": 745 + }, + { + "epoch": 0.05, + "grad_norm": 0.9673280715942383, + "learning_rate": 9.992213999367965e-06, + "loss": 0.6589, + "step": 746 + }, + { + "epoch": 0.05, + "grad_norm": 0.984897255897522, + "learning_rate": 9.992156659124253e-06, + "loss": 0.6589, + "step": 747 + }, + { + "epoch": 0.05, + "grad_norm": 0.991965651512146, + "learning_rate": 9.99209910867819e-06, + "loss": 0.6864, + "step": 748 + }, + { + "epoch": 0.05, + "grad_norm": 0.8924134373664856, + "learning_rate": 9.9920413480322e-06, + "loss": 0.6364, + "step": 749 + }, + { + "epoch": 0.05, + "grad_norm": 0.9772643446922302, + "learning_rate": 9.991983377188715e-06, + "loss": 0.6503, + "step": 750 + }, + { + "epoch": 0.05, + "grad_norm": 0.9954730868339539, + "learning_rate": 9.991925196150174e-06, + "loss": 0.6672, + "step": 751 + }, + { + "epoch": 0.05, + "grad_norm": 1.0930321216583252, + "learning_rate": 9.99186680491903e-06, + "loss": 0.6267, + "step": 752 + }, + { + "epoch": 0.05, + "grad_norm": 0.9928365349769592, + "learning_rate": 9.99180820349774e-06, + "loss": 0.7189, + "step": 753 + }, + { + "epoch": 0.05, + "grad_norm": 0.9924033880233765, + "learning_rate": 9.991749391888772e-06, + "loss": 0.7041, + "step": 754 + }, + { + "epoch": 0.05, + "grad_norm": 1.0099812746047974, + "learning_rate": 9.991690370094603e-06, + "loss": 0.7206, + "step": 755 + }, + { + "epoch": 0.05, + "grad_norm": 1.0289372205734253, + "learning_rate": 9.991631138117715e-06, + "loss": 0.7392, + "step": 756 + }, + { + "epoch": 0.05, + "grad_norm": 1.0022187232971191, + "learning_rate": 9.991571695960606e-06, + "loss": 0.6903, + "step": 757 + }, + { + "epoch": 0.05, + "grad_norm": 0.8819312453269958, + "learning_rate": 9.991512043625777e-06, + "loss": 0.6078, + "step": 758 + }, + { + "epoch": 0.05, + "grad_norm": 0.9569171071052551, + "learning_rate": 9.991452181115739e-06, + "loss": 0.6521, + "step": 759 + }, + { + "epoch": 0.05, + "grad_norm": 0.9458112120628357, + "learning_rate": 9.991392108433016e-06, + "loss": 0.7201, + "step": 760 + }, + { + "epoch": 0.05, + "grad_norm": 0.9446436762809753, + "learning_rate": 9.991331825580132e-06, + "loss": 0.7091, + "step": 761 + }, + { + "epoch": 0.05, + "grad_norm": 1.0510190725326538, + "learning_rate": 9.99127133255963e-06, + "loss": 0.6936, + "step": 762 + }, + { + "epoch": 0.05, + "grad_norm": 0.9923396110534668, + "learning_rate": 9.991210629374058e-06, + "loss": 0.65, + "step": 763 + }, + { + "epoch": 0.05, + "grad_norm": 0.9962745904922485, + "learning_rate": 9.991149716025967e-06, + "loss": 0.6632, + "step": 764 + }, + { + "epoch": 0.05, + "grad_norm": 0.9799250960350037, + "learning_rate": 9.991088592517924e-06, + "loss": 0.6631, + "step": 765 + }, + { + "epoch": 0.05, + "grad_norm": 0.922505795955658, + "learning_rate": 9.991027258852505e-06, + "loss": 0.6594, + "step": 766 + }, + { + "epoch": 0.05, + "grad_norm": 0.919065535068512, + "learning_rate": 9.990965715032289e-06, + "loss": 0.5974, + "step": 767 + }, + { + "epoch": 0.05, + "grad_norm": 0.9268805384635925, + "learning_rate": 9.99090396105987e-06, + "loss": 0.6345, + "step": 768 + }, + { + "epoch": 0.05, + "grad_norm": 0.9739397168159485, + "learning_rate": 9.990841996937846e-06, + "loss": 0.6757, + "step": 769 + }, + { + "epoch": 0.05, + "grad_norm": 1.0037983655929565, + "learning_rate": 9.990779822668827e-06, + "loss": 0.714, + "step": 770 + }, + { + "epoch": 0.05, + "grad_norm": 0.9694761633872986, + "learning_rate": 9.990717438255435e-06, + "loss": 0.6614, + "step": 771 + }, + { + "epoch": 0.05, + "grad_norm": 0.962847113609314, + "learning_rate": 9.99065484370029e-06, + "loss": 0.7379, + "step": 772 + }, + { + "epoch": 0.05, + "grad_norm": 0.9772030115127563, + "learning_rate": 9.99059203900603e-06, + "loss": 0.651, + "step": 773 + }, + { + "epoch": 0.05, + "grad_norm": 0.9414916634559631, + "learning_rate": 9.990529024175303e-06, + "loss": 0.5766, + "step": 774 + }, + { + "epoch": 0.05, + "grad_norm": 0.970845103263855, + "learning_rate": 9.990465799210757e-06, + "loss": 0.6302, + "step": 775 + }, + { + "epoch": 0.05, + "grad_norm": 1.0166348218917847, + "learning_rate": 9.99040236411506e-06, + "loss": 0.745, + "step": 776 + }, + { + "epoch": 0.05, + "grad_norm": 1.0761529207229614, + "learning_rate": 9.990338718890878e-06, + "loss": 0.6747, + "step": 777 + }, + { + "epoch": 0.05, + "grad_norm": 0.9640312194824219, + "learning_rate": 9.990274863540891e-06, + "loss": 0.6303, + "step": 778 + }, + { + "epoch": 0.05, + "grad_norm": 1.0130929946899414, + "learning_rate": 9.990210798067792e-06, + "loss": 0.6996, + "step": 779 + }, + { + "epoch": 0.05, + "grad_norm": 1.0341368913650513, + "learning_rate": 9.990146522474273e-06, + "loss": 0.6914, + "step": 780 + }, + { + "epoch": 0.05, + "grad_norm": 0.9964221119880676, + "learning_rate": 9.990082036763046e-06, + "loss": 0.6798, + "step": 781 + }, + { + "epoch": 0.05, + "grad_norm": 0.9139377474784851, + "learning_rate": 9.990017340936823e-06, + "loss": 0.6492, + "step": 782 + }, + { + "epoch": 0.05, + "grad_norm": 0.9424077868461609, + "learning_rate": 9.989952434998328e-06, + "loss": 0.7023, + "step": 783 + }, + { + "epoch": 0.05, + "grad_norm": 0.9752070307731628, + "learning_rate": 9.989887318950295e-06, + "loss": 0.6834, + "step": 784 + }, + { + "epoch": 0.05, + "grad_norm": 0.9262385964393616, + "learning_rate": 9.989821992795467e-06, + "loss": 0.667, + "step": 785 + }, + { + "epoch": 0.05, + "grad_norm": 0.9452196955680847, + "learning_rate": 9.989756456536593e-06, + "loss": 0.6285, + "step": 786 + }, + { + "epoch": 0.05, + "grad_norm": 1.0110949277877808, + "learning_rate": 9.989690710176433e-06, + "loss": 0.6696, + "step": 787 + }, + { + "epoch": 0.05, + "grad_norm": 0.9881210327148438, + "learning_rate": 9.989624753717752e-06, + "loss": 0.6981, + "step": 788 + }, + { + "epoch": 0.05, + "grad_norm": 0.9513158798217773, + "learning_rate": 9.989558587163332e-06, + "loss": 0.6507, + "step": 789 + }, + { + "epoch": 0.05, + "grad_norm": 0.9474494457244873, + "learning_rate": 9.989492210515958e-06, + "loss": 0.6531, + "step": 790 + }, + { + "epoch": 0.05, + "grad_norm": 0.9730693101882935, + "learning_rate": 9.989425623778423e-06, + "loss": 0.6714, + "step": 791 + }, + { + "epoch": 0.05, + "grad_norm": 1.0155802965164185, + "learning_rate": 9.989358826953533e-06, + "loss": 0.6823, + "step": 792 + }, + { + "epoch": 0.05, + "grad_norm": 0.9369945526123047, + "learning_rate": 9.989291820044099e-06, + "loss": 0.6641, + "step": 793 + }, + { + "epoch": 0.05, + "grad_norm": 0.9431589245796204, + "learning_rate": 9.989224603052943e-06, + "loss": 0.6021, + "step": 794 + }, + { + "epoch": 0.05, + "grad_norm": 1.0468026399612427, + "learning_rate": 9.989157175982896e-06, + "loss": 0.7079, + "step": 795 + }, + { + "epoch": 0.05, + "grad_norm": 0.9889234900474548, + "learning_rate": 9.989089538836795e-06, + "loss": 0.6192, + "step": 796 + }, + { + "epoch": 0.05, + "grad_norm": 0.9767878651618958, + "learning_rate": 9.98902169161749e-06, + "loss": 0.6864, + "step": 797 + }, + { + "epoch": 0.05, + "grad_norm": 0.9971264004707336, + "learning_rate": 9.988953634327836e-06, + "loss": 0.7311, + "step": 798 + }, + { + "epoch": 0.05, + "grad_norm": 0.9408350586891174, + "learning_rate": 9.988885366970701e-06, + "loss": 0.7097, + "step": 799 + }, + { + "epoch": 0.05, + "grad_norm": 0.9743333458900452, + "learning_rate": 9.988816889548958e-06, + "loss": 0.6899, + "step": 800 + }, + { + "epoch": 0.05, + "grad_norm": 0.943428635597229, + "learning_rate": 9.98874820206549e-06, + "loss": 0.6598, + "step": 801 + }, + { + "epoch": 0.05, + "grad_norm": 0.9481057524681091, + "learning_rate": 9.988679304523192e-06, + "loss": 0.6171, + "step": 802 + }, + { + "epoch": 0.05, + "grad_norm": 0.9587128758430481, + "learning_rate": 9.988610196924962e-06, + "loss": 0.6731, + "step": 803 + }, + { + "epoch": 0.05, + "grad_norm": 0.9046414494514465, + "learning_rate": 9.98854087927371e-06, + "loss": 0.6688, + "step": 804 + }, + { + "epoch": 0.05, + "grad_norm": 0.9200078248977661, + "learning_rate": 9.988471351572355e-06, + "loss": 0.6444, + "step": 805 + }, + { + "epoch": 0.05, + "grad_norm": 1.0290337800979614, + "learning_rate": 9.988401613823825e-06, + "loss": 0.6902, + "step": 806 + }, + { + "epoch": 0.05, + "grad_norm": 0.9850117564201355, + "learning_rate": 9.988331666031056e-06, + "loss": 0.6844, + "step": 807 + }, + { + "epoch": 0.05, + "grad_norm": 1.0165541172027588, + "learning_rate": 9.988261508196994e-06, + "loss": 0.6588, + "step": 808 + }, + { + "epoch": 0.05, + "grad_norm": 0.9509625434875488, + "learning_rate": 9.988191140324595e-06, + "loss": 0.6395, + "step": 809 + }, + { + "epoch": 0.05, + "grad_norm": 0.8759293556213379, + "learning_rate": 9.988120562416817e-06, + "loss": 0.6624, + "step": 810 + }, + { + "epoch": 0.05, + "grad_norm": 1.0014184713363647, + "learning_rate": 9.988049774476636e-06, + "loss": 0.658, + "step": 811 + }, + { + "epoch": 0.05, + "grad_norm": 0.9247162938117981, + "learning_rate": 9.98797877650703e-06, + "loss": 0.6577, + "step": 812 + }, + { + "epoch": 0.05, + "grad_norm": 0.9179931879043579, + "learning_rate": 9.987907568510991e-06, + "loss": 0.6404, + "step": 813 + }, + { + "epoch": 0.05, + "grad_norm": 1.043821930885315, + "learning_rate": 9.987836150491515e-06, + "loss": 0.708, + "step": 814 + }, + { + "epoch": 0.05, + "grad_norm": 1.0630831718444824, + "learning_rate": 9.98776452245161e-06, + "loss": 0.6716, + "step": 815 + }, + { + "epoch": 0.05, + "grad_norm": 0.825951874256134, + "learning_rate": 9.987692684394294e-06, + "loss": 0.6198, + "step": 816 + }, + { + "epoch": 0.05, + "grad_norm": 1.03840172290802, + "learning_rate": 9.987620636322589e-06, + "loss": 0.6358, + "step": 817 + }, + { + "epoch": 0.05, + "grad_norm": 0.9848338961601257, + "learning_rate": 9.987548378239529e-06, + "loss": 0.7417, + "step": 818 + }, + { + "epoch": 0.05, + "grad_norm": 1.0349116325378418, + "learning_rate": 9.987475910148156e-06, + "loss": 0.6809, + "step": 819 + }, + { + "epoch": 0.05, + "grad_norm": 0.8631666898727417, + "learning_rate": 9.987403232051525e-06, + "loss": 0.6175, + "step": 820 + }, + { + "epoch": 0.05, + "grad_norm": 0.972474992275238, + "learning_rate": 9.987330343952692e-06, + "loss": 0.6336, + "step": 821 + }, + { + "epoch": 0.05, + "grad_norm": 0.9713488817214966, + "learning_rate": 9.987257245854729e-06, + "loss": 0.6455, + "step": 822 + }, + { + "epoch": 0.05, + "grad_norm": 0.9748914241790771, + "learning_rate": 9.987183937760713e-06, + "loss": 0.6871, + "step": 823 + }, + { + "epoch": 0.05, + "grad_norm": 0.9042195081710815, + "learning_rate": 9.98711041967373e-06, + "loss": 0.674, + "step": 824 + }, + { + "epoch": 0.05, + "grad_norm": 0.9888205528259277, + "learning_rate": 9.987036691596877e-06, + "loss": 0.6587, + "step": 825 + }, + { + "epoch": 0.05, + "grad_norm": 1.0192331075668335, + "learning_rate": 9.986962753533257e-06, + "loss": 0.6968, + "step": 826 + }, + { + "epoch": 0.05, + "grad_norm": 0.9436393976211548, + "learning_rate": 9.986888605485983e-06, + "loss": 0.615, + "step": 827 + }, + { + "epoch": 0.05, + "grad_norm": 0.9654821753501892, + "learning_rate": 9.986814247458177e-06, + "loss": 0.6473, + "step": 828 + }, + { + "epoch": 0.05, + "grad_norm": 1.0658471584320068, + "learning_rate": 9.986739679452973e-06, + "loss": 0.7004, + "step": 829 + }, + { + "epoch": 0.05, + "grad_norm": 0.9651502966880798, + "learning_rate": 9.986664901473508e-06, + "loss": 0.5893, + "step": 830 + }, + { + "epoch": 0.05, + "grad_norm": 0.9251554608345032, + "learning_rate": 9.98658991352293e-06, + "loss": 0.6431, + "step": 831 + }, + { + "epoch": 0.05, + "grad_norm": 0.973141610622406, + "learning_rate": 9.986514715604401e-06, + "loss": 0.6458, + "step": 832 + }, + { + "epoch": 0.05, + "grad_norm": 1.0335615873336792, + "learning_rate": 9.986439307721083e-06, + "loss": 0.6615, + "step": 833 + }, + { + "epoch": 0.05, + "grad_norm": 1.0050101280212402, + "learning_rate": 9.98636368987615e-06, + "loss": 0.6729, + "step": 834 + }, + { + "epoch": 0.05, + "grad_norm": 0.9446513652801514, + "learning_rate": 9.98628786207279e-06, + "loss": 0.6221, + "step": 835 + }, + { + "epoch": 0.05, + "grad_norm": 0.9160159230232239, + "learning_rate": 9.986211824314193e-06, + "loss": 0.6568, + "step": 836 + }, + { + "epoch": 0.05, + "grad_norm": 0.9735708236694336, + "learning_rate": 9.986135576603564e-06, + "loss": 0.7023, + "step": 837 + }, + { + "epoch": 0.05, + "grad_norm": 0.996498167514801, + "learning_rate": 9.98605911894411e-06, + "loss": 0.6713, + "step": 838 + }, + { + "epoch": 0.05, + "grad_norm": 1.0343009233474731, + "learning_rate": 9.985982451339054e-06, + "loss": 0.6776, + "step": 839 + }, + { + "epoch": 0.05, + "grad_norm": 0.9924929141998291, + "learning_rate": 9.985905573791619e-06, + "loss": 0.6904, + "step": 840 + }, + { + "epoch": 0.05, + "grad_norm": 0.9932686686515808, + "learning_rate": 9.985828486305046e-06, + "loss": 0.692, + "step": 841 + }, + { + "epoch": 0.05, + "grad_norm": 0.9492565989494324, + "learning_rate": 9.98575118888258e-06, + "loss": 0.6513, + "step": 842 + }, + { + "epoch": 0.05, + "grad_norm": 0.9628197550773621, + "learning_rate": 9.985673681527474e-06, + "loss": 0.6683, + "step": 843 + }, + { + "epoch": 0.05, + "grad_norm": 1.0421559810638428, + "learning_rate": 9.985595964242996e-06, + "loss": 0.6264, + "step": 844 + }, + { + "epoch": 0.05, + "grad_norm": 1.0385863780975342, + "learning_rate": 9.985518037032413e-06, + "loss": 0.7485, + "step": 845 + }, + { + "epoch": 0.05, + "grad_norm": 0.9698561429977417, + "learning_rate": 9.98543989989901e-06, + "loss": 0.6673, + "step": 846 + }, + { + "epoch": 0.05, + "grad_norm": 1.1064468622207642, + "learning_rate": 9.985361552846076e-06, + "loss": 0.679, + "step": 847 + }, + { + "epoch": 0.05, + "grad_norm": 0.9422234296798706, + "learning_rate": 9.98528299587691e-06, + "loss": 0.621, + "step": 848 + }, + { + "epoch": 0.05, + "grad_norm": 0.9520366787910461, + "learning_rate": 9.98520422899482e-06, + "loss": 0.6739, + "step": 849 + }, + { + "epoch": 0.05, + "grad_norm": 1.012834072113037, + "learning_rate": 9.985125252203122e-06, + "loss": 0.6486, + "step": 850 + }, + { + "epoch": 0.05, + "grad_norm": 0.9515576958656311, + "learning_rate": 9.985046065505141e-06, + "loss": 0.6631, + "step": 851 + }, + { + "epoch": 0.05, + "grad_norm": 0.9131986498832703, + "learning_rate": 9.984966668904211e-06, + "loss": 0.6183, + "step": 852 + }, + { + "epoch": 0.05, + "grad_norm": 1.0273786783218384, + "learning_rate": 9.984887062403678e-06, + "loss": 0.6952, + "step": 853 + }, + { + "epoch": 0.05, + "grad_norm": 1.0616415739059448, + "learning_rate": 9.984807246006891e-06, + "loss": 0.6604, + "step": 854 + }, + { + "epoch": 0.05, + "grad_norm": 1.0264229774475098, + "learning_rate": 9.984727219717212e-06, + "loss": 0.6836, + "step": 855 + }, + { + "epoch": 0.05, + "grad_norm": 0.9620240926742554, + "learning_rate": 9.984646983538009e-06, + "loss": 0.6725, + "step": 856 + }, + { + "epoch": 0.05, + "grad_norm": 1.026843786239624, + "learning_rate": 9.984566537472662e-06, + "loss": 0.6973, + "step": 857 + }, + { + "epoch": 0.05, + "grad_norm": 0.910517156124115, + "learning_rate": 9.98448588152456e-06, + "loss": 0.6826, + "step": 858 + }, + { + "epoch": 0.05, + "grad_norm": 0.8792157769203186, + "learning_rate": 9.984405015697097e-06, + "loss": 0.6207, + "step": 859 + }, + { + "epoch": 0.05, + "grad_norm": 1.026893973350525, + "learning_rate": 9.984323939993678e-06, + "loss": 0.6576, + "step": 860 + }, + { + "epoch": 0.05, + "grad_norm": 0.9753056168556213, + "learning_rate": 9.984242654417716e-06, + "loss": 0.7123, + "step": 861 + }, + { + "epoch": 0.05, + "grad_norm": 0.9340549111366272, + "learning_rate": 9.984161158972636e-06, + "loss": 0.6311, + "step": 862 + }, + { + "epoch": 0.05, + "grad_norm": 0.958814799785614, + "learning_rate": 9.984079453661869e-06, + "loss": 0.6848, + "step": 863 + }, + { + "epoch": 0.05, + "grad_norm": 1.02495539188385, + "learning_rate": 9.983997538488851e-06, + "loss": 0.6614, + "step": 864 + }, + { + "epoch": 0.05, + "grad_norm": 0.9181699156761169, + "learning_rate": 9.983915413457036e-06, + "loss": 0.6182, + "step": 865 + }, + { + "epoch": 0.05, + "grad_norm": 0.9128296375274658, + "learning_rate": 9.983833078569883e-06, + "loss": 0.6696, + "step": 866 + }, + { + "epoch": 0.05, + "grad_norm": 0.9677926301956177, + "learning_rate": 9.983750533830856e-06, + "loss": 0.7208, + "step": 867 + }, + { + "epoch": 0.05, + "grad_norm": 1.0382143259048462, + "learning_rate": 9.98366777924343e-06, + "loss": 0.7064, + "step": 868 + }, + { + "epoch": 0.06, + "grad_norm": 1.0093235969543457, + "learning_rate": 9.983584814811092e-06, + "loss": 0.6637, + "step": 869 + }, + { + "epoch": 0.06, + "grad_norm": 0.9793532490730286, + "learning_rate": 9.983501640537333e-06, + "loss": 0.7384, + "step": 870 + }, + { + "epoch": 0.06, + "grad_norm": 0.9115772247314453, + "learning_rate": 9.983418256425656e-06, + "loss": 0.6931, + "step": 871 + }, + { + "epoch": 0.06, + "grad_norm": 1.007819414138794, + "learning_rate": 9.983334662479572e-06, + "loss": 0.6364, + "step": 872 + }, + { + "epoch": 0.06, + "grad_norm": 0.9625717997550964, + "learning_rate": 9.983250858702603e-06, + "loss": 0.6219, + "step": 873 + }, + { + "epoch": 0.06, + "grad_norm": 0.9227074980735779, + "learning_rate": 9.983166845098275e-06, + "loss": 0.6472, + "step": 874 + }, + { + "epoch": 0.06, + "grad_norm": 1.024123191833496, + "learning_rate": 9.983082621670126e-06, + "loss": 0.6085, + "step": 875 + }, + { + "epoch": 0.06, + "grad_norm": 0.9824538826942444, + "learning_rate": 9.982998188421702e-06, + "loss": 0.7077, + "step": 876 + }, + { + "epoch": 0.06, + "grad_norm": 0.9731464982032776, + "learning_rate": 9.98291354535656e-06, + "loss": 0.6783, + "step": 877 + }, + { + "epoch": 0.06, + "grad_norm": 0.9325253367424011, + "learning_rate": 9.982828692478261e-06, + "loss": 0.6467, + "step": 878 + }, + { + "epoch": 0.06, + "grad_norm": 1.0967223644256592, + "learning_rate": 9.982743629790382e-06, + "loss": 0.6342, + "step": 879 + }, + { + "epoch": 0.06, + "grad_norm": 0.9630370140075684, + "learning_rate": 9.982658357296502e-06, + "loss": 0.6359, + "step": 880 + }, + { + "epoch": 0.06, + "grad_norm": 0.9850766062736511, + "learning_rate": 9.982572875000212e-06, + "loss": 0.6571, + "step": 881 + }, + { + "epoch": 0.06, + "grad_norm": 1.0202929973602295, + "learning_rate": 9.98248718290511e-06, + "loss": 0.7151, + "step": 882 + }, + { + "epoch": 0.06, + "grad_norm": 0.9093936085700989, + "learning_rate": 9.982401281014806e-06, + "loss": 0.6123, + "step": 883 + }, + { + "epoch": 0.06, + "grad_norm": 1.017791748046875, + "learning_rate": 9.982315169332918e-06, + "loss": 0.7209, + "step": 884 + }, + { + "epoch": 0.06, + "grad_norm": 1.0141305923461914, + "learning_rate": 9.982228847863069e-06, + "loss": 0.6597, + "step": 885 + }, + { + "epoch": 0.06, + "grad_norm": 0.9242052435874939, + "learning_rate": 9.982142316608897e-06, + "loss": 0.6469, + "step": 886 + }, + { + "epoch": 0.06, + "grad_norm": 0.9739558100700378, + "learning_rate": 9.982055575574042e-06, + "loss": 0.6735, + "step": 887 + }, + { + "epoch": 0.06, + "grad_norm": 1.027789831161499, + "learning_rate": 9.981968624762159e-06, + "loss": 0.7039, + "step": 888 + }, + { + "epoch": 0.06, + "grad_norm": 1.0447962284088135, + "learning_rate": 9.981881464176908e-06, + "loss": 0.6446, + "step": 889 + }, + { + "epoch": 0.06, + "grad_norm": 1.0435072183609009, + "learning_rate": 9.981794093821957e-06, + "loss": 0.6343, + "step": 890 + }, + { + "epoch": 0.06, + "grad_norm": 0.9777998924255371, + "learning_rate": 9.981706513700989e-06, + "loss": 0.6912, + "step": 891 + }, + { + "epoch": 0.06, + "grad_norm": 0.9571147561073303, + "learning_rate": 9.98161872381769e-06, + "loss": 0.6138, + "step": 892 + }, + { + "epoch": 0.06, + "grad_norm": 0.9109900593757629, + "learning_rate": 9.981530724175756e-06, + "loss": 0.5711, + "step": 893 + }, + { + "epoch": 0.06, + "grad_norm": 0.95176762342453, + "learning_rate": 9.981442514778892e-06, + "loss": 0.6553, + "step": 894 + }, + { + "epoch": 0.06, + "grad_norm": 0.890281081199646, + "learning_rate": 9.981354095630816e-06, + "loss": 0.6194, + "step": 895 + }, + { + "epoch": 0.06, + "grad_norm": 0.9268616437911987, + "learning_rate": 9.981265466735244e-06, + "loss": 0.6303, + "step": 896 + }, + { + "epoch": 0.06, + "grad_norm": 0.961388885974884, + "learning_rate": 9.981176628095913e-06, + "loss": 0.6764, + "step": 897 + }, + { + "epoch": 0.06, + "grad_norm": 1.0211546421051025, + "learning_rate": 9.981087579716564e-06, + "loss": 0.6545, + "step": 898 + }, + { + "epoch": 0.06, + "grad_norm": 0.9296051859855652, + "learning_rate": 9.980998321600944e-06, + "loss": 0.613, + "step": 899 + }, + { + "epoch": 0.06, + "grad_norm": 0.9536461234092712, + "learning_rate": 9.98090885375281e-06, + "loss": 0.7281, + "step": 900 + }, + { + "epoch": 0.06, + "grad_norm": 0.9516177773475647, + "learning_rate": 9.980819176175932e-06, + "loss": 0.6636, + "step": 901 + }, + { + "epoch": 0.06, + "grad_norm": 0.9591559767723083, + "learning_rate": 9.980729288874088e-06, + "loss": 0.6949, + "step": 902 + }, + { + "epoch": 0.06, + "grad_norm": 1.0467829704284668, + "learning_rate": 9.98063919185106e-06, + "loss": 0.6268, + "step": 903 + }, + { + "epoch": 0.06, + "grad_norm": 1.0166997909545898, + "learning_rate": 9.980548885110641e-06, + "loss": 0.6232, + "step": 904 + }, + { + "epoch": 0.06, + "grad_norm": 0.9611027240753174, + "learning_rate": 9.980458368656635e-06, + "loss": 0.7333, + "step": 905 + }, + { + "epoch": 0.06, + "grad_norm": 0.9568818807601929, + "learning_rate": 9.980367642492852e-06, + "loss": 0.7402, + "step": 906 + }, + { + "epoch": 0.06, + "grad_norm": 0.9771458506584167, + "learning_rate": 9.980276706623114e-06, + "loss": 0.6486, + "step": 907 + }, + { + "epoch": 0.06, + "grad_norm": 0.8996546268463135, + "learning_rate": 9.98018556105125e-06, + "loss": 0.6198, + "step": 908 + }, + { + "epoch": 0.06, + "grad_norm": 1.0799936056137085, + "learning_rate": 9.980094205781094e-06, + "loss": 0.6487, + "step": 909 + }, + { + "epoch": 0.06, + "grad_norm": 0.9570757150650024, + "learning_rate": 9.980002640816498e-06, + "loss": 0.6478, + "step": 910 + }, + { + "epoch": 0.06, + "grad_norm": 0.9597965478897095, + "learning_rate": 9.979910866161313e-06, + "loss": 0.7147, + "step": 911 + }, + { + "epoch": 0.06, + "grad_norm": 0.9495083689689636, + "learning_rate": 9.979818881819408e-06, + "loss": 0.6683, + "step": 912 + }, + { + "epoch": 0.06, + "grad_norm": 0.9899744987487793, + "learning_rate": 9.979726687794651e-06, + "loss": 0.752, + "step": 913 + }, + { + "epoch": 0.06, + "grad_norm": 0.9405885934829712, + "learning_rate": 9.979634284090927e-06, + "loss": 0.6088, + "step": 914 + }, + { + "epoch": 0.06, + "grad_norm": 0.9481942057609558, + "learning_rate": 9.979541670712125e-06, + "loss": 0.706, + "step": 915 + }, + { + "epoch": 0.06, + "grad_norm": 1.053983211517334, + "learning_rate": 9.979448847662148e-06, + "loss": 0.6401, + "step": 916 + }, + { + "epoch": 0.06, + "grad_norm": 0.963388204574585, + "learning_rate": 9.979355814944901e-06, + "loss": 0.7043, + "step": 917 + }, + { + "epoch": 0.06, + "grad_norm": 1.0153307914733887, + "learning_rate": 9.979262572564303e-06, + "loss": 0.62, + "step": 918 + }, + { + "epoch": 0.06, + "grad_norm": 0.9814804196357727, + "learning_rate": 9.979169120524279e-06, + "loss": 0.637, + "step": 919 + }, + { + "epoch": 0.06, + "grad_norm": 0.9698815941810608, + "learning_rate": 9.979075458828765e-06, + "loss": 0.6741, + "step": 920 + }, + { + "epoch": 0.06, + "grad_norm": 0.9134169816970825, + "learning_rate": 9.978981587481705e-06, + "loss": 0.6723, + "step": 921 + }, + { + "epoch": 0.06, + "grad_norm": 1.1175658702850342, + "learning_rate": 9.978887506487049e-06, + "loss": 0.744, + "step": 922 + }, + { + "epoch": 0.06, + "grad_norm": 0.9176881909370422, + "learning_rate": 9.978793215848763e-06, + "loss": 0.6377, + "step": 923 + }, + { + "epoch": 0.06, + "grad_norm": 1.0039703845977783, + "learning_rate": 9.978698715570814e-06, + "loss": 0.6938, + "step": 924 + }, + { + "epoch": 0.06, + "grad_norm": 0.969764232635498, + "learning_rate": 9.97860400565718e-06, + "loss": 0.6361, + "step": 925 + }, + { + "epoch": 0.06, + "grad_norm": 0.9281071424484253, + "learning_rate": 9.978509086111852e-06, + "loss": 0.6727, + "step": 926 + }, + { + "epoch": 0.06, + "grad_norm": 0.9164725542068481, + "learning_rate": 9.978413956938824e-06, + "loss": 0.6378, + "step": 927 + }, + { + "epoch": 0.06, + "grad_norm": 0.9597803354263306, + "learning_rate": 9.978318618142104e-06, + "loss": 0.6448, + "step": 928 + }, + { + "epoch": 0.06, + "grad_norm": 0.9781709313392639, + "learning_rate": 9.978223069725706e-06, + "loss": 0.626, + "step": 929 + }, + { + "epoch": 0.06, + "grad_norm": 0.9814243912696838, + "learning_rate": 9.978127311693653e-06, + "loss": 0.5937, + "step": 930 + }, + { + "epoch": 0.06, + "grad_norm": 0.9959214925765991, + "learning_rate": 9.978031344049975e-06, + "loss": 0.6537, + "step": 931 + }, + { + "epoch": 0.06, + "grad_norm": 1.018471121788025, + "learning_rate": 9.977935166798714e-06, + "loss": 0.6903, + "step": 932 + }, + { + "epoch": 0.06, + "grad_norm": 0.9168115854263306, + "learning_rate": 9.977838779943921e-06, + "loss": 0.6261, + "step": 933 + }, + { + "epoch": 0.06, + "grad_norm": 1.0428775548934937, + "learning_rate": 9.977742183489653e-06, + "loss": 0.7146, + "step": 934 + }, + { + "epoch": 0.06, + "grad_norm": 0.9918137192726135, + "learning_rate": 9.977645377439979e-06, + "loss": 0.6469, + "step": 935 + }, + { + "epoch": 0.06, + "grad_norm": 0.9870752096176147, + "learning_rate": 9.977548361798975e-06, + "loss": 0.6586, + "step": 936 + }, + { + "epoch": 0.06, + "grad_norm": 0.9916146993637085, + "learning_rate": 9.977451136570722e-06, + "loss": 0.6951, + "step": 937 + }, + { + "epoch": 0.06, + "grad_norm": 0.894088089466095, + "learning_rate": 9.97735370175932e-06, + "loss": 0.6327, + "step": 938 + }, + { + "epoch": 0.06, + "grad_norm": 0.9738256335258484, + "learning_rate": 9.977256057368865e-06, + "loss": 0.6731, + "step": 939 + }, + { + "epoch": 0.06, + "grad_norm": 0.9997884631156921, + "learning_rate": 9.977158203403475e-06, + "loss": 0.6635, + "step": 940 + }, + { + "epoch": 0.06, + "grad_norm": 0.9893252849578857, + "learning_rate": 9.977060139867268e-06, + "loss": 0.6278, + "step": 941 + }, + { + "epoch": 0.06, + "grad_norm": 0.8766104578971863, + "learning_rate": 9.97696186676437e-06, + "loss": 0.5792, + "step": 942 + }, + { + "epoch": 0.06, + "grad_norm": 0.9894992113113403, + "learning_rate": 9.976863384098923e-06, + "loss": 0.6855, + "step": 943 + }, + { + "epoch": 0.06, + "grad_norm": 1.0352369546890259, + "learning_rate": 9.976764691875072e-06, + "loss": 0.7075, + "step": 944 + }, + { + "epoch": 0.06, + "grad_norm": 0.996104896068573, + "learning_rate": 9.976665790096971e-06, + "loss": 0.6464, + "step": 945 + }, + { + "epoch": 0.06, + "grad_norm": 0.9106736779212952, + "learning_rate": 9.976566678768787e-06, + "loss": 0.5984, + "step": 946 + }, + { + "epoch": 0.06, + "grad_norm": 0.9899172186851501, + "learning_rate": 9.976467357894693e-06, + "loss": 0.6433, + "step": 947 + }, + { + "epoch": 0.06, + "grad_norm": 0.9473981261253357, + "learning_rate": 9.97636782747887e-06, + "loss": 0.7038, + "step": 948 + }, + { + "epoch": 0.06, + "grad_norm": 1.0071048736572266, + "learning_rate": 9.976268087525509e-06, + "loss": 0.7246, + "step": 949 + }, + { + "epoch": 0.06, + "grad_norm": 0.9809601902961731, + "learning_rate": 9.976168138038812e-06, + "loss": 0.6786, + "step": 950 + }, + { + "epoch": 0.06, + "grad_norm": 0.9237947463989258, + "learning_rate": 9.976067979022983e-06, + "loss": 0.685, + "step": 951 + }, + { + "epoch": 0.06, + "grad_norm": 0.9583842754364014, + "learning_rate": 9.975967610482243e-06, + "loss": 0.6639, + "step": 952 + }, + { + "epoch": 0.06, + "grad_norm": 0.9147073030471802, + "learning_rate": 9.975867032420816e-06, + "loss": 0.6512, + "step": 953 + }, + { + "epoch": 0.06, + "grad_norm": 0.9555047750473022, + "learning_rate": 9.97576624484294e-06, + "loss": 0.7145, + "step": 954 + }, + { + "epoch": 0.06, + "grad_norm": 0.9743271470069885, + "learning_rate": 9.975665247752855e-06, + "loss": 0.6743, + "step": 955 + }, + { + "epoch": 0.06, + "grad_norm": 0.9561975598335266, + "learning_rate": 9.975564041154817e-06, + "loss": 0.6491, + "step": 956 + }, + { + "epoch": 0.06, + "grad_norm": 0.971872091293335, + "learning_rate": 9.975462625053085e-06, + "loss": 0.696, + "step": 957 + }, + { + "epoch": 0.06, + "grad_norm": 0.9306091666221619, + "learning_rate": 9.97536099945193e-06, + "loss": 0.6438, + "step": 958 + }, + { + "epoch": 0.06, + "grad_norm": 0.9069042205810547, + "learning_rate": 9.975259164355632e-06, + "loss": 0.6829, + "step": 959 + }, + { + "epoch": 0.06, + "grad_norm": 1.0041426420211792, + "learning_rate": 9.97515711976848e-06, + "loss": 0.6783, + "step": 960 + }, + { + "epoch": 0.06, + "grad_norm": 1.1071757078170776, + "learning_rate": 9.975054865694767e-06, + "loss": 0.6785, + "step": 961 + }, + { + "epoch": 0.06, + "grad_norm": 0.9503558278083801, + "learning_rate": 9.9749524021388e-06, + "loss": 0.7091, + "step": 962 + }, + { + "epoch": 0.06, + "grad_norm": 0.9102316498756409, + "learning_rate": 9.974849729104894e-06, + "loss": 0.7051, + "step": 963 + }, + { + "epoch": 0.06, + "grad_norm": 1.003288984298706, + "learning_rate": 9.974746846597373e-06, + "loss": 0.7456, + "step": 964 + }, + { + "epoch": 0.06, + "grad_norm": 0.9375484585762024, + "learning_rate": 9.974643754620567e-06, + "loss": 0.6835, + "step": 965 + }, + { + "epoch": 0.06, + "grad_norm": 1.0092264413833618, + "learning_rate": 9.97454045317882e-06, + "loss": 0.672, + "step": 966 + }, + { + "epoch": 0.06, + "grad_norm": 0.9897353053092957, + "learning_rate": 9.974436942276477e-06, + "loss": 0.6498, + "step": 967 + }, + { + "epoch": 0.06, + "grad_norm": 0.9781389236450195, + "learning_rate": 9.974333221917903e-06, + "loss": 0.6988, + "step": 968 + }, + { + "epoch": 0.06, + "grad_norm": 0.8853163719177246, + "learning_rate": 9.974229292107458e-06, + "loss": 0.6391, + "step": 969 + }, + { + "epoch": 0.06, + "grad_norm": 1.3123575448989868, + "learning_rate": 9.974125152849523e-06, + "loss": 0.689, + "step": 970 + }, + { + "epoch": 0.06, + "grad_norm": 0.9711446166038513, + "learning_rate": 9.974020804148482e-06, + "loss": 0.645, + "step": 971 + }, + { + "epoch": 0.06, + "grad_norm": 0.9429543614387512, + "learning_rate": 9.973916246008727e-06, + "loss": 0.6561, + "step": 972 + }, + { + "epoch": 0.06, + "grad_norm": 0.9982245564460754, + "learning_rate": 9.973811478434662e-06, + "loss": 0.6915, + "step": 973 + }, + { + "epoch": 0.06, + "grad_norm": 0.9405156373977661, + "learning_rate": 9.9737065014307e-06, + "loss": 0.6852, + "step": 974 + }, + { + "epoch": 0.06, + "grad_norm": 0.9267737865447998, + "learning_rate": 9.973601315001258e-06, + "loss": 0.6724, + "step": 975 + }, + { + "epoch": 0.06, + "grad_norm": 0.9884217977523804, + "learning_rate": 9.973495919150766e-06, + "loss": 0.6892, + "step": 976 + }, + { + "epoch": 0.06, + "grad_norm": 0.8952025771141052, + "learning_rate": 9.973390313883664e-06, + "loss": 0.6149, + "step": 977 + }, + { + "epoch": 0.06, + "grad_norm": 0.916035532951355, + "learning_rate": 9.973284499204396e-06, + "loss": 0.6147, + "step": 978 + }, + { + "epoch": 0.06, + "grad_norm": 0.9775811433792114, + "learning_rate": 9.973178475117419e-06, + "loss": 0.6582, + "step": 979 + }, + { + "epoch": 0.06, + "grad_norm": 0.942755401134491, + "learning_rate": 9.973072241627196e-06, + "loss": 0.7021, + "step": 980 + }, + { + "epoch": 0.06, + "grad_norm": 1.0862394571304321, + "learning_rate": 9.972965798738202e-06, + "loss": 0.7022, + "step": 981 + }, + { + "epoch": 0.06, + "grad_norm": 0.8991437554359436, + "learning_rate": 9.972859146454917e-06, + "loss": 0.6008, + "step": 982 + }, + { + "epoch": 0.06, + "grad_norm": 0.9662036895751953, + "learning_rate": 9.972752284781832e-06, + "loss": 0.6783, + "step": 983 + }, + { + "epoch": 0.06, + "grad_norm": 0.9464378952980042, + "learning_rate": 9.97264521372345e-06, + "loss": 0.6452, + "step": 984 + }, + { + "epoch": 0.06, + "grad_norm": 0.9118345975875854, + "learning_rate": 9.972537933284274e-06, + "loss": 0.6253, + "step": 985 + }, + { + "epoch": 0.06, + "grad_norm": 0.9645686745643616, + "learning_rate": 9.972430443468826e-06, + "loss": 0.6812, + "step": 986 + }, + { + "epoch": 0.06, + "grad_norm": 0.8795304894447327, + "learning_rate": 9.972322744281628e-06, + "loss": 0.5986, + "step": 987 + }, + { + "epoch": 0.06, + "grad_norm": 0.9460269808769226, + "learning_rate": 9.972214835727218e-06, + "loss": 0.6976, + "step": 988 + }, + { + "epoch": 0.06, + "grad_norm": 0.9464765191078186, + "learning_rate": 9.972106717810137e-06, + "loss": 0.6538, + "step": 989 + }, + { + "epoch": 0.06, + "grad_norm": 0.9307100176811218, + "learning_rate": 9.97199839053494e-06, + "loss": 0.7183, + "step": 990 + }, + { + "epoch": 0.06, + "grad_norm": 0.978036105632782, + "learning_rate": 9.971889853906186e-06, + "loss": 0.6879, + "step": 991 + }, + { + "epoch": 0.06, + "grad_norm": 0.9477901458740234, + "learning_rate": 9.971781107928447e-06, + "loss": 0.6093, + "step": 992 + }, + { + "epoch": 0.06, + "grad_norm": 0.9999047517776489, + "learning_rate": 9.9716721526063e-06, + "loss": 0.6431, + "step": 993 + }, + { + "epoch": 0.06, + "grad_norm": 0.9746558666229248, + "learning_rate": 9.971562987944336e-06, + "loss": 0.6916, + "step": 994 + }, + { + "epoch": 0.06, + "grad_norm": 0.9560214281082153, + "learning_rate": 9.971453613947147e-06, + "loss": 0.6746, + "step": 995 + }, + { + "epoch": 0.06, + "grad_norm": 1.0287420749664307, + "learning_rate": 9.971344030619342e-06, + "loss": 0.6463, + "step": 996 + }, + { + "epoch": 0.06, + "grad_norm": 0.9547033309936523, + "learning_rate": 9.971234237965534e-06, + "loss": 0.6567, + "step": 997 + }, + { + "epoch": 0.06, + "grad_norm": 0.9555925130844116, + "learning_rate": 9.971124235990346e-06, + "loss": 0.6834, + "step": 998 + }, + { + "epoch": 0.06, + "grad_norm": 0.9444142580032349, + "learning_rate": 9.971014024698408e-06, + "loss": 0.6531, + "step": 999 + }, + { + "epoch": 0.06, + "grad_norm": 1.0372717380523682, + "learning_rate": 9.970903604094365e-06, + "loss": 0.6779, + "step": 1000 + }, + { + "epoch": 0.06, + "grad_norm": 0.9465329051017761, + "learning_rate": 9.970792974182863e-06, + "loss": 0.6849, + "step": 1001 + }, + { + "epoch": 0.06, + "grad_norm": 0.9567575454711914, + "learning_rate": 9.97068213496856e-06, + "loss": 0.6118, + "step": 1002 + }, + { + "epoch": 0.06, + "grad_norm": 0.9264045357704163, + "learning_rate": 9.970571086456124e-06, + "loss": 0.6617, + "step": 1003 + }, + { + "epoch": 0.06, + "grad_norm": 0.9825202226638794, + "learning_rate": 9.970459828650232e-06, + "loss": 0.6393, + "step": 1004 + }, + { + "epoch": 0.06, + "grad_norm": 0.9990862011909485, + "learning_rate": 9.970348361555566e-06, + "loss": 0.633, + "step": 1005 + }, + { + "epoch": 0.06, + "grad_norm": 0.9339586496353149, + "learning_rate": 9.970236685176821e-06, + "loss": 0.6845, + "step": 1006 + }, + { + "epoch": 0.06, + "grad_norm": 0.9837610125541687, + "learning_rate": 9.9701247995187e-06, + "loss": 0.6371, + "step": 1007 + }, + { + "epoch": 0.06, + "grad_norm": 1.0499521493911743, + "learning_rate": 9.970012704585916e-06, + "loss": 0.6754, + "step": 1008 + }, + { + "epoch": 0.06, + "grad_norm": 1.0463135242462158, + "learning_rate": 9.969900400383183e-06, + "loss": 0.6943, + "step": 1009 + }, + { + "epoch": 0.06, + "grad_norm": 0.9164577126502991, + "learning_rate": 9.969787886915236e-06, + "loss": 0.6393, + "step": 1010 + }, + { + "epoch": 0.06, + "grad_norm": 0.9350469708442688, + "learning_rate": 9.969675164186807e-06, + "loss": 0.6697, + "step": 1011 + }, + { + "epoch": 0.06, + "grad_norm": 0.9471501111984253, + "learning_rate": 9.969562232202647e-06, + "loss": 0.6617, + "step": 1012 + }, + { + "epoch": 0.06, + "grad_norm": 0.9739314913749695, + "learning_rate": 9.969449090967509e-06, + "loss": 0.6864, + "step": 1013 + }, + { + "epoch": 0.06, + "grad_norm": 0.9026748538017273, + "learning_rate": 9.969335740486157e-06, + "loss": 0.5679, + "step": 1014 + }, + { + "epoch": 0.06, + "grad_norm": 0.9329193830490112, + "learning_rate": 9.969222180763363e-06, + "loss": 0.6714, + "step": 1015 + }, + { + "epoch": 0.06, + "grad_norm": 1.0058557987213135, + "learning_rate": 9.96910841180391e-06, + "loss": 0.6131, + "step": 1016 + }, + { + "epoch": 0.06, + "grad_norm": 1.0142805576324463, + "learning_rate": 9.968994433612589e-06, + "loss": 0.7093, + "step": 1017 + }, + { + "epoch": 0.06, + "grad_norm": 0.9591155648231506, + "learning_rate": 9.968880246194198e-06, + "loss": 0.6828, + "step": 1018 + }, + { + "epoch": 0.06, + "grad_norm": 0.9221545457839966, + "learning_rate": 9.968765849553544e-06, + "loss": 0.6824, + "step": 1019 + }, + { + "epoch": 0.06, + "grad_norm": 0.8757246136665344, + "learning_rate": 9.968651243695446e-06, + "loss": 0.6282, + "step": 1020 + }, + { + "epoch": 0.06, + "grad_norm": 1.0177953243255615, + "learning_rate": 9.968536428624729e-06, + "loss": 0.6673, + "step": 1021 + }, + { + "epoch": 0.06, + "grad_norm": 0.9448785781860352, + "learning_rate": 9.968421404346228e-06, + "loss": 0.6768, + "step": 1022 + }, + { + "epoch": 0.06, + "grad_norm": 1.0076022148132324, + "learning_rate": 9.968306170864786e-06, + "loss": 0.7581, + "step": 1023 + }, + { + "epoch": 0.06, + "grad_norm": 1.0110529661178589, + "learning_rate": 9.968190728185251e-06, + "loss": 0.6518, + "step": 1024 + }, + { + "epoch": 0.06, + "grad_norm": 0.8694904446601868, + "learning_rate": 9.968075076312492e-06, + "loss": 0.6453, + "step": 1025 + }, + { + "epoch": 0.07, + "grad_norm": 0.9269656538963318, + "learning_rate": 9.96795921525137e-06, + "loss": 0.6668, + "step": 1026 + }, + { + "epoch": 0.07, + "grad_norm": 0.9647197723388672, + "learning_rate": 9.967843145006771e-06, + "loss": 0.6586, + "step": 1027 + }, + { + "epoch": 0.07, + "grad_norm": 1.0203245878219604, + "learning_rate": 9.967726865583578e-06, + "loss": 0.6558, + "step": 1028 + }, + { + "epoch": 0.07, + "grad_norm": 0.9874720573425293, + "learning_rate": 9.967610376986687e-06, + "loss": 0.651, + "step": 1029 + }, + { + "epoch": 0.07, + "grad_norm": 1.0147123336791992, + "learning_rate": 9.967493679221006e-06, + "loss": 0.7248, + "step": 1030 + }, + { + "epoch": 0.07, + "grad_norm": 0.9211717844009399, + "learning_rate": 9.967376772291446e-06, + "loss": 0.6315, + "step": 1031 + }, + { + "epoch": 0.07, + "grad_norm": 0.9259415864944458, + "learning_rate": 9.96725965620293e-06, + "loss": 0.6483, + "step": 1032 + }, + { + "epoch": 0.07, + "grad_norm": 0.9673779010772705, + "learning_rate": 9.96714233096039e-06, + "loss": 0.648, + "step": 1033 + }, + { + "epoch": 0.07, + "grad_norm": 0.9705022573471069, + "learning_rate": 9.967024796568766e-06, + "loss": 0.7079, + "step": 1034 + }, + { + "epoch": 0.07, + "grad_norm": 0.9708682894706726, + "learning_rate": 9.966907053033006e-06, + "loss": 0.6949, + "step": 1035 + }, + { + "epoch": 0.07, + "grad_norm": 0.9706398844718933, + "learning_rate": 9.96678910035807e-06, + "loss": 0.6657, + "step": 1036 + }, + { + "epoch": 0.07, + "grad_norm": 0.9681613445281982, + "learning_rate": 9.966670938548923e-06, + "loss": 0.6909, + "step": 1037 + }, + { + "epoch": 0.07, + "grad_norm": 1.0291481018066406, + "learning_rate": 9.96655256761054e-06, + "loss": 0.644, + "step": 1038 + }, + { + "epoch": 0.07, + "grad_norm": 0.9305548667907715, + "learning_rate": 9.966433987547906e-06, + "loss": 0.6166, + "step": 1039 + }, + { + "epoch": 0.07, + "grad_norm": 0.9347004890441895, + "learning_rate": 9.966315198366011e-06, + "loss": 0.6963, + "step": 1040 + }, + { + "epoch": 0.07, + "grad_norm": 1.0041406154632568, + "learning_rate": 9.966196200069863e-06, + "loss": 0.6962, + "step": 1041 + }, + { + "epoch": 0.07, + "grad_norm": 1.0330625772476196, + "learning_rate": 9.966076992664469e-06, + "loss": 0.6894, + "step": 1042 + }, + { + "epoch": 0.07, + "grad_norm": 1.0031425952911377, + "learning_rate": 9.965957576154848e-06, + "loss": 0.6555, + "step": 1043 + }, + { + "epoch": 0.07, + "grad_norm": 1.005267858505249, + "learning_rate": 9.96583795054603e-06, + "loss": 0.6799, + "step": 1044 + }, + { + "epoch": 0.07, + "grad_norm": 0.9837890267372131, + "learning_rate": 9.965718115843048e-06, + "loss": 0.6881, + "step": 1045 + }, + { + "epoch": 0.07, + "grad_norm": 0.983871340751648, + "learning_rate": 9.965598072050953e-06, + "loss": 0.6925, + "step": 1046 + }, + { + "epoch": 0.07, + "grad_norm": 0.9536935687065125, + "learning_rate": 9.965477819174796e-06, + "loss": 0.6616, + "step": 1047 + }, + { + "epoch": 0.07, + "grad_norm": 0.9697441458702087, + "learning_rate": 9.96535735721964e-06, + "loss": 0.6473, + "step": 1048 + }, + { + "epoch": 0.07, + "grad_norm": 0.9311846494674683, + "learning_rate": 9.965236686190563e-06, + "loss": 0.734, + "step": 1049 + }, + { + "epoch": 0.07, + "grad_norm": 0.9542213678359985, + "learning_rate": 9.965115806092638e-06, + "loss": 0.6183, + "step": 1050 + }, + { + "epoch": 0.07, + "grad_norm": 0.940539538860321, + "learning_rate": 9.96499471693096e-06, + "loss": 0.64, + "step": 1051 + }, + { + "epoch": 0.07, + "grad_norm": 0.9398403763771057, + "learning_rate": 9.964873418710628e-06, + "loss": 0.573, + "step": 1052 + }, + { + "epoch": 0.07, + "grad_norm": 0.9776535034179688, + "learning_rate": 9.964751911436748e-06, + "loss": 0.6572, + "step": 1053 + }, + { + "epoch": 0.07, + "grad_norm": 0.9758483171463013, + "learning_rate": 9.964630195114432e-06, + "loss": 0.6424, + "step": 1054 + }, + { + "epoch": 0.07, + "grad_norm": 0.9551203846931458, + "learning_rate": 9.964508269748814e-06, + "loss": 0.6773, + "step": 1055 + }, + { + "epoch": 0.07, + "grad_norm": 0.953066885471344, + "learning_rate": 9.96438613534502e-06, + "loss": 0.6551, + "step": 1056 + }, + { + "epoch": 0.07, + "grad_norm": 1.0139051675796509, + "learning_rate": 9.964263791908198e-06, + "loss": 0.654, + "step": 1057 + }, + { + "epoch": 0.07, + "grad_norm": 0.9728212952613831, + "learning_rate": 9.964141239443497e-06, + "loss": 0.612, + "step": 1058 + }, + { + "epoch": 0.07, + "grad_norm": 0.9843549132347107, + "learning_rate": 9.964018477956075e-06, + "loss": 0.6867, + "step": 1059 + }, + { + "epoch": 0.07, + "grad_norm": 0.9203978776931763, + "learning_rate": 9.963895507451104e-06, + "loss": 0.6567, + "step": 1060 + }, + { + "epoch": 0.07, + "grad_norm": 0.9760221242904663, + "learning_rate": 9.963772327933764e-06, + "loss": 0.6684, + "step": 1061 + }, + { + "epoch": 0.07, + "grad_norm": 0.9449279308319092, + "learning_rate": 9.963648939409236e-06, + "loss": 0.6811, + "step": 1062 + }, + { + "epoch": 0.07, + "grad_norm": 0.8834384083747864, + "learning_rate": 9.96352534188272e-06, + "loss": 0.6195, + "step": 1063 + }, + { + "epoch": 0.07, + "grad_norm": 0.9562472701072693, + "learning_rate": 9.963401535359418e-06, + "loss": 0.6353, + "step": 1064 + }, + { + "epoch": 0.07, + "grad_norm": 0.9446646571159363, + "learning_rate": 9.963277519844544e-06, + "loss": 0.617, + "step": 1065 + }, + { + "epoch": 0.07, + "grad_norm": 1.0598340034484863, + "learning_rate": 9.963153295343319e-06, + "loss": 0.7194, + "step": 1066 + }, + { + "epoch": 0.07, + "grad_norm": 0.9084300994873047, + "learning_rate": 9.963028861860975e-06, + "loss": 0.6881, + "step": 1067 + }, + { + "epoch": 0.07, + "grad_norm": 0.9703001976013184, + "learning_rate": 9.962904219402752e-06, + "loss": 0.7235, + "step": 1068 + }, + { + "epoch": 0.07, + "grad_norm": 0.9932600259780884, + "learning_rate": 9.962779367973896e-06, + "loss": 0.6662, + "step": 1069 + }, + { + "epoch": 0.07, + "grad_norm": 0.9670122265815735, + "learning_rate": 9.962654307579665e-06, + "loss": 0.6604, + "step": 1070 + }, + { + "epoch": 0.07, + "grad_norm": 0.9765552282333374, + "learning_rate": 9.962529038225324e-06, + "loss": 0.7055, + "step": 1071 + }, + { + "epoch": 0.07, + "grad_norm": 0.9492608904838562, + "learning_rate": 9.962403559916149e-06, + "loss": 0.6433, + "step": 1072 + }, + { + "epoch": 0.07, + "grad_norm": 0.9491517543792725, + "learning_rate": 9.962277872657422e-06, + "loss": 0.6836, + "step": 1073 + }, + { + "epoch": 0.07, + "grad_norm": 0.950981616973877, + "learning_rate": 9.962151976454439e-06, + "loss": 0.6404, + "step": 1074 + }, + { + "epoch": 0.07, + "grad_norm": 1.0310513973236084, + "learning_rate": 9.962025871312497e-06, + "loss": 0.7125, + "step": 1075 + }, + { + "epoch": 0.07, + "grad_norm": 0.9882835149765015, + "learning_rate": 9.961899557236907e-06, + "loss": 0.6861, + "step": 1076 + }, + { + "epoch": 0.07, + "grad_norm": 0.9338645935058594, + "learning_rate": 9.961773034232987e-06, + "loss": 0.6555, + "step": 1077 + }, + { + "epoch": 0.07, + "grad_norm": 0.9319179058074951, + "learning_rate": 9.961646302306066e-06, + "loss": 0.6467, + "step": 1078 + }, + { + "epoch": 0.07, + "grad_norm": 0.9263783097267151, + "learning_rate": 9.961519361461481e-06, + "loss": 0.6384, + "step": 1079 + }, + { + "epoch": 0.07, + "grad_norm": 0.9706323742866516, + "learning_rate": 9.961392211704573e-06, + "loss": 0.6704, + "step": 1080 + }, + { + "epoch": 0.07, + "grad_norm": 0.9975467920303345, + "learning_rate": 9.9612648530407e-06, + "loss": 0.6444, + "step": 1081 + }, + { + "epoch": 0.07, + "grad_norm": 0.9272286295890808, + "learning_rate": 9.961137285475223e-06, + "loss": 0.6558, + "step": 1082 + }, + { + "epoch": 0.07, + "grad_norm": 0.9486019611358643, + "learning_rate": 9.961009509013512e-06, + "loss": 0.677, + "step": 1083 + }, + { + "epoch": 0.07, + "grad_norm": 0.8935267329216003, + "learning_rate": 9.96088152366095e-06, + "loss": 0.6781, + "step": 1084 + }, + { + "epoch": 0.07, + "grad_norm": 0.9707071781158447, + "learning_rate": 9.960753329422925e-06, + "loss": 0.6578, + "step": 1085 + }, + { + "epoch": 0.07, + "grad_norm": 0.9859130382537842, + "learning_rate": 9.960624926304834e-06, + "loss": 0.6421, + "step": 1086 + }, + { + "epoch": 0.07, + "grad_norm": 0.9117816090583801, + "learning_rate": 9.960496314312085e-06, + "loss": 0.6667, + "step": 1087 + }, + { + "epoch": 0.07, + "grad_norm": 0.9339293837547302, + "learning_rate": 9.96036749345009e-06, + "loss": 0.6645, + "step": 1088 + }, + { + "epoch": 0.07, + "grad_norm": 0.9403766393661499, + "learning_rate": 9.960238463724278e-06, + "loss": 0.6389, + "step": 1089 + }, + { + "epoch": 0.07, + "grad_norm": 0.9280768632888794, + "learning_rate": 9.96010922514008e-06, + "loss": 0.6599, + "step": 1090 + }, + { + "epoch": 0.07, + "grad_norm": 0.9073335528373718, + "learning_rate": 9.959979777702935e-06, + "loss": 0.6426, + "step": 1091 + }, + { + "epoch": 0.07, + "grad_norm": 0.8961593508720398, + "learning_rate": 9.959850121418298e-06, + "loss": 0.6284, + "step": 1092 + }, + { + "epoch": 0.07, + "grad_norm": 0.9164572954177856, + "learning_rate": 9.959720256291626e-06, + "loss": 0.6326, + "step": 1093 + }, + { + "epoch": 0.07, + "grad_norm": 0.9504919052124023, + "learning_rate": 9.959590182328387e-06, + "loss": 0.6923, + "step": 1094 + }, + { + "epoch": 0.07, + "grad_norm": 0.9429534077644348, + "learning_rate": 9.95945989953406e-06, + "loss": 0.6815, + "step": 1095 + }, + { + "epoch": 0.07, + "grad_norm": 0.9838384389877319, + "learning_rate": 9.959329407914129e-06, + "loss": 0.6425, + "step": 1096 + }, + { + "epoch": 0.07, + "grad_norm": 0.9929714798927307, + "learning_rate": 9.959198707474087e-06, + "loss": 0.6323, + "step": 1097 + }, + { + "epoch": 0.07, + "grad_norm": 0.9384214878082275, + "learning_rate": 9.959067798219442e-06, + "loss": 0.6735, + "step": 1098 + }, + { + "epoch": 0.07, + "grad_norm": 0.9922656416893005, + "learning_rate": 9.958936680155702e-06, + "loss": 0.6697, + "step": 1099 + }, + { + "epoch": 0.07, + "grad_norm": 0.9068803787231445, + "learning_rate": 9.958805353288388e-06, + "loss": 0.6207, + "step": 1100 + }, + { + "epoch": 0.07, + "grad_norm": 0.9993635416030884, + "learning_rate": 9.958673817623033e-06, + "loss": 0.6229, + "step": 1101 + }, + { + "epoch": 0.07, + "grad_norm": 1.0061472654342651, + "learning_rate": 9.958542073165172e-06, + "loss": 0.6769, + "step": 1102 + }, + { + "epoch": 0.07, + "grad_norm": 0.9775166511535645, + "learning_rate": 9.958410119920355e-06, + "loss": 0.6552, + "step": 1103 + }, + { + "epoch": 0.07, + "grad_norm": 0.8982160091400146, + "learning_rate": 9.958277957894137e-06, + "loss": 0.635, + "step": 1104 + }, + { + "epoch": 0.07, + "grad_norm": 0.9272829294204712, + "learning_rate": 9.958145587092082e-06, + "loss": 0.586, + "step": 1105 + }, + { + "epoch": 0.07, + "grad_norm": 0.9862303137779236, + "learning_rate": 9.958013007519764e-06, + "loss": 0.7076, + "step": 1106 + }, + { + "epoch": 0.07, + "grad_norm": 0.8938865661621094, + "learning_rate": 9.957880219182767e-06, + "loss": 0.5773, + "step": 1107 + }, + { + "epoch": 0.07, + "grad_norm": 0.9800034761428833, + "learning_rate": 9.957747222086682e-06, + "loss": 0.7099, + "step": 1108 + }, + { + "epoch": 0.07, + "grad_norm": 1.0491564273834229, + "learning_rate": 9.957614016237106e-06, + "loss": 0.6926, + "step": 1109 + }, + { + "epoch": 0.07, + "grad_norm": 0.930266261100769, + "learning_rate": 9.957480601639652e-06, + "loss": 0.7086, + "step": 1110 + }, + { + "epoch": 0.07, + "grad_norm": 0.9674487113952637, + "learning_rate": 9.957346978299935e-06, + "loss": 0.6541, + "step": 1111 + }, + { + "epoch": 0.07, + "grad_norm": 1.0485286712646484, + "learning_rate": 9.957213146223581e-06, + "loss": 0.6749, + "step": 1112 + }, + { + "epoch": 0.07, + "grad_norm": 0.913090169429779, + "learning_rate": 9.957079105416228e-06, + "loss": 0.648, + "step": 1113 + }, + { + "epoch": 0.07, + "grad_norm": 0.9890965819358826, + "learning_rate": 9.956944855883516e-06, + "loss": 0.6896, + "step": 1114 + }, + { + "epoch": 0.07, + "grad_norm": 0.9297420978546143, + "learning_rate": 9.956810397631103e-06, + "loss": 0.7024, + "step": 1115 + }, + { + "epoch": 0.07, + "grad_norm": 1.0065919160842896, + "learning_rate": 9.956675730664646e-06, + "loss": 0.6224, + "step": 1116 + }, + { + "epoch": 0.07, + "grad_norm": 0.992179274559021, + "learning_rate": 9.956540854989817e-06, + "loss": 0.6667, + "step": 1117 + }, + { + "epoch": 0.07, + "grad_norm": 0.9736528992652893, + "learning_rate": 9.956405770612295e-06, + "loss": 0.6669, + "step": 1118 + }, + { + "epoch": 0.07, + "grad_norm": 0.9248270988464355, + "learning_rate": 9.956270477537768e-06, + "loss": 0.6868, + "step": 1119 + }, + { + "epoch": 0.07, + "grad_norm": 0.9713013172149658, + "learning_rate": 9.956134975771934e-06, + "loss": 0.6949, + "step": 1120 + }, + { + "epoch": 0.07, + "grad_norm": 1.0682833194732666, + "learning_rate": 9.955999265320495e-06, + "loss": 0.7197, + "step": 1121 + }, + { + "epoch": 0.07, + "grad_norm": 0.9445773363113403, + "learning_rate": 9.95586334618917e-06, + "loss": 0.6679, + "step": 1122 + }, + { + "epoch": 0.07, + "grad_norm": 0.9021993279457092, + "learning_rate": 9.95572721838368e-06, + "loss": 0.6233, + "step": 1123 + }, + { + "epoch": 0.07, + "grad_norm": 0.9834071397781372, + "learning_rate": 9.955590881909753e-06, + "loss": 0.6273, + "step": 1124 + }, + { + "epoch": 0.07, + "grad_norm": 0.9453941583633423, + "learning_rate": 9.955454336773136e-06, + "loss": 0.6062, + "step": 1125 + }, + { + "epoch": 0.07, + "grad_norm": 0.8219738602638245, + "learning_rate": 9.955317582979575e-06, + "loss": 0.5888, + "step": 1126 + }, + { + "epoch": 0.07, + "grad_norm": 0.9637846350669861, + "learning_rate": 9.95518062053483e-06, + "loss": 0.6445, + "step": 1127 + }, + { + "epoch": 0.07, + "grad_norm": 0.9680708050727844, + "learning_rate": 9.955043449444665e-06, + "loss": 0.6871, + "step": 1128 + }, + { + "epoch": 0.07, + "grad_norm": 0.8683537840843201, + "learning_rate": 9.95490606971486e-06, + "loss": 0.6406, + "step": 1129 + }, + { + "epoch": 0.07, + "grad_norm": 0.896604061126709, + "learning_rate": 9.954768481351196e-06, + "loss": 0.6409, + "step": 1130 + }, + { + "epoch": 0.07, + "grad_norm": 0.9782860279083252, + "learning_rate": 9.954630684359468e-06, + "loss": 0.6409, + "step": 1131 + }, + { + "epoch": 0.07, + "grad_norm": 1.0023294687271118, + "learning_rate": 9.954492678745477e-06, + "loss": 0.6668, + "step": 1132 + }, + { + "epoch": 0.07, + "grad_norm": 0.9098303914070129, + "learning_rate": 9.954354464515035e-06, + "loss": 0.6047, + "step": 1133 + }, + { + "epoch": 0.07, + "grad_norm": 0.9375096559524536, + "learning_rate": 9.95421604167396e-06, + "loss": 0.605, + "step": 1134 + }, + { + "epoch": 0.07, + "grad_norm": 1.070643663406372, + "learning_rate": 9.954077410228084e-06, + "loss": 0.7336, + "step": 1135 + }, + { + "epoch": 0.07, + "grad_norm": 0.941253125667572, + "learning_rate": 9.95393857018324e-06, + "loss": 0.6458, + "step": 1136 + }, + { + "epoch": 0.07, + "grad_norm": 0.9726730585098267, + "learning_rate": 9.95379952154528e-06, + "loss": 0.6416, + "step": 1137 + }, + { + "epoch": 0.07, + "grad_norm": 1.0929338932037354, + "learning_rate": 9.953660264320053e-06, + "loss": 0.7103, + "step": 1138 + }, + { + "epoch": 0.07, + "grad_norm": 0.98173588514328, + "learning_rate": 9.953520798513425e-06, + "loss": 0.7664, + "step": 1139 + }, + { + "epoch": 0.07, + "grad_norm": 0.9463081955909729, + "learning_rate": 9.953381124131269e-06, + "loss": 0.6432, + "step": 1140 + }, + { + "epoch": 0.07, + "grad_norm": 0.967021644115448, + "learning_rate": 9.953241241179462e-06, + "loss": 0.6412, + "step": 1141 + }, + { + "epoch": 0.07, + "grad_norm": 0.9214240312576294, + "learning_rate": 9.953101149663902e-06, + "loss": 0.6414, + "step": 1142 + }, + { + "epoch": 0.07, + "grad_norm": 1.2380410432815552, + "learning_rate": 9.95296084959048e-06, + "loss": 0.5983, + "step": 1143 + }, + { + "epoch": 0.07, + "grad_norm": 0.9872441291809082, + "learning_rate": 9.952820340965109e-06, + "loss": 0.653, + "step": 1144 + }, + { + "epoch": 0.07, + "grad_norm": 0.8805240392684937, + "learning_rate": 9.952679623793702e-06, + "loss": 0.6532, + "step": 1145 + }, + { + "epoch": 0.07, + "grad_norm": 0.9314813017845154, + "learning_rate": 9.952538698082185e-06, + "loss": 0.6649, + "step": 1146 + }, + { + "epoch": 0.07, + "grad_norm": 0.9278802871704102, + "learning_rate": 9.95239756383649e-06, + "loss": 0.6832, + "step": 1147 + }, + { + "epoch": 0.07, + "grad_norm": 0.9457370042800903, + "learning_rate": 9.952256221062566e-06, + "loss": 0.5822, + "step": 1148 + }, + { + "epoch": 0.07, + "grad_norm": 1.0056676864624023, + "learning_rate": 9.952114669766357e-06, + "loss": 0.6653, + "step": 1149 + }, + { + "epoch": 0.07, + "grad_norm": 0.877746045589447, + "learning_rate": 9.951972909953828e-06, + "loss": 0.5947, + "step": 1150 + }, + { + "epoch": 0.07, + "grad_norm": 0.949008047580719, + "learning_rate": 9.951830941630946e-06, + "loss": 0.6857, + "step": 1151 + }, + { + "epoch": 0.07, + "grad_norm": 1.0061525106430054, + "learning_rate": 9.951688764803689e-06, + "loss": 0.7064, + "step": 1152 + }, + { + "epoch": 0.07, + "grad_norm": 1.1196439266204834, + "learning_rate": 9.951546379478044e-06, + "loss": 0.7059, + "step": 1153 + }, + { + "epoch": 0.07, + "grad_norm": 1.003239393234253, + "learning_rate": 9.951403785660005e-06, + "loss": 0.7148, + "step": 1154 + }, + { + "epoch": 0.07, + "grad_norm": 0.9646631479263306, + "learning_rate": 9.95126098335558e-06, + "loss": 0.6991, + "step": 1155 + }, + { + "epoch": 0.07, + "grad_norm": 0.9327188730239868, + "learning_rate": 9.951117972570776e-06, + "loss": 0.6421, + "step": 1156 + }, + { + "epoch": 0.07, + "grad_norm": 0.9369673132896423, + "learning_rate": 9.95097475331162e-06, + "loss": 0.7018, + "step": 1157 + }, + { + "epoch": 0.07, + "grad_norm": 0.9307648539543152, + "learning_rate": 9.950831325584138e-06, + "loss": 0.6523, + "step": 1158 + }, + { + "epoch": 0.07, + "grad_norm": 0.9541182518005371, + "learning_rate": 9.950687689394373e-06, + "loss": 0.6665, + "step": 1159 + }, + { + "epoch": 0.07, + "grad_norm": 0.9604858756065369, + "learning_rate": 9.950543844748372e-06, + "loss": 0.6852, + "step": 1160 + }, + { + "epoch": 0.07, + "grad_norm": 0.9503093957901001, + "learning_rate": 9.950399791652193e-06, + "loss": 0.6652, + "step": 1161 + }, + { + "epoch": 0.07, + "grad_norm": 1.0395541191101074, + "learning_rate": 9.950255530111896e-06, + "loss": 0.7136, + "step": 1162 + }, + { + "epoch": 0.07, + "grad_norm": 0.8791881799697876, + "learning_rate": 9.950111060133562e-06, + "loss": 0.6324, + "step": 1163 + }, + { + "epoch": 0.07, + "grad_norm": 0.8195285201072693, + "learning_rate": 9.94996638172327e-06, + "loss": 0.6415, + "step": 1164 + }, + { + "epoch": 0.07, + "grad_norm": 1.003089427947998, + "learning_rate": 9.949821494887116e-06, + "loss": 0.6959, + "step": 1165 + }, + { + "epoch": 0.07, + "grad_norm": 0.9248343706130981, + "learning_rate": 9.949676399631197e-06, + "loss": 0.6429, + "step": 1166 + }, + { + "epoch": 0.07, + "grad_norm": 0.9678802490234375, + "learning_rate": 9.949531095961621e-06, + "loss": 0.6973, + "step": 1167 + }, + { + "epoch": 0.07, + "grad_norm": 0.968346118927002, + "learning_rate": 9.94938558388451e-06, + "loss": 0.6681, + "step": 1168 + }, + { + "epoch": 0.07, + "grad_norm": 0.9063887000083923, + "learning_rate": 9.94923986340599e-06, + "loss": 0.6448, + "step": 1169 + }, + { + "epoch": 0.07, + "grad_norm": 0.8939194083213806, + "learning_rate": 9.949093934532196e-06, + "loss": 0.6124, + "step": 1170 + }, + { + "epoch": 0.07, + "grad_norm": 0.9329174757003784, + "learning_rate": 9.948947797269275e-06, + "loss": 0.6408, + "step": 1171 + }, + { + "epoch": 0.07, + "grad_norm": 0.8747833967208862, + "learning_rate": 9.948801451623376e-06, + "loss": 0.6501, + "step": 1172 + }, + { + "epoch": 0.07, + "grad_norm": 0.9457252621650696, + "learning_rate": 9.948654897600664e-06, + "loss": 0.6845, + "step": 1173 + }, + { + "epoch": 0.07, + "grad_norm": 0.9753620028495789, + "learning_rate": 9.94850813520731e-06, + "loss": 0.6558, + "step": 1174 + }, + { + "epoch": 0.07, + "grad_norm": 0.9322195053100586, + "learning_rate": 9.948361164449493e-06, + "loss": 0.6277, + "step": 1175 + }, + { + "epoch": 0.07, + "grad_norm": 0.9404197335243225, + "learning_rate": 9.948213985333403e-06, + "loss": 0.6757, + "step": 1176 + }, + { + "epoch": 0.07, + "grad_norm": 1.0010050535202026, + "learning_rate": 9.948066597865234e-06, + "loss": 0.6567, + "step": 1177 + }, + { + "epoch": 0.07, + "grad_norm": 0.9383962750434875, + "learning_rate": 9.947919002051194e-06, + "loss": 0.6576, + "step": 1178 + }, + { + "epoch": 0.07, + "grad_norm": 0.9984327554702759, + "learning_rate": 9.947771197897495e-06, + "loss": 0.6455, + "step": 1179 + }, + { + "epoch": 0.07, + "grad_norm": 0.968433678150177, + "learning_rate": 9.947623185410366e-06, + "loss": 0.6596, + "step": 1180 + }, + { + "epoch": 0.07, + "grad_norm": 0.9923335313796997, + "learning_rate": 9.947474964596036e-06, + "loss": 0.7174, + "step": 1181 + }, + { + "epoch": 0.07, + "grad_norm": 0.9914311170578003, + "learning_rate": 9.947326535460744e-06, + "loss": 0.6712, + "step": 1182 + }, + { + "epoch": 0.07, + "grad_norm": 0.9265934824943542, + "learning_rate": 9.947177898010745e-06, + "loss": 0.6429, + "step": 1183 + }, + { + "epoch": 0.08, + "grad_norm": 0.9598346948623657, + "learning_rate": 9.947029052252293e-06, + "loss": 0.633, + "step": 1184 + }, + { + "epoch": 0.08, + "grad_norm": 0.907012939453125, + "learning_rate": 9.946879998191656e-06, + "loss": 0.6516, + "step": 1185 + }, + { + "epoch": 0.08, + "grad_norm": 0.9562612771987915, + "learning_rate": 9.946730735835112e-06, + "loss": 0.7378, + "step": 1186 + }, + { + "epoch": 0.08, + "grad_norm": 0.9579821228981018, + "learning_rate": 9.946581265188947e-06, + "loss": 0.6426, + "step": 1187 + }, + { + "epoch": 0.08, + "grad_norm": 0.9079206585884094, + "learning_rate": 9.946431586259451e-06, + "loss": 0.6513, + "step": 1188 + }, + { + "epoch": 0.08, + "grad_norm": 0.9543782472610474, + "learning_rate": 9.946281699052928e-06, + "loss": 0.6829, + "step": 1189 + }, + { + "epoch": 0.08, + "grad_norm": 0.9957901239395142, + "learning_rate": 9.946131603575691e-06, + "loss": 0.7518, + "step": 1190 + }, + { + "epoch": 0.08, + "grad_norm": 0.971076250076294, + "learning_rate": 9.945981299834058e-06, + "loss": 0.6697, + "step": 1191 + }, + { + "epoch": 0.08, + "grad_norm": 0.9861418008804321, + "learning_rate": 9.945830787834358e-06, + "loss": 0.6971, + "step": 1192 + }, + { + "epoch": 0.08, + "grad_norm": 0.9396786093711853, + "learning_rate": 9.945680067582928e-06, + "loss": 0.6282, + "step": 1193 + }, + { + "epoch": 0.08, + "grad_norm": 0.8945440053939819, + "learning_rate": 9.945529139086116e-06, + "loss": 0.6315, + "step": 1194 + }, + { + "epoch": 0.08, + "grad_norm": 1.0057705640792847, + "learning_rate": 9.945378002350277e-06, + "loss": 0.6682, + "step": 1195 + }, + { + "epoch": 0.08, + "grad_norm": 1.003580093383789, + "learning_rate": 9.945226657381773e-06, + "loss": 0.6859, + "step": 1196 + }, + { + "epoch": 0.08, + "grad_norm": 0.8602601289749146, + "learning_rate": 9.945075104186978e-06, + "loss": 0.6245, + "step": 1197 + }, + { + "epoch": 0.08, + "grad_norm": 0.8855172395706177, + "learning_rate": 9.944923342772272e-06, + "loss": 0.656, + "step": 1198 + }, + { + "epoch": 0.08, + "grad_norm": 0.916766881942749, + "learning_rate": 9.944771373144047e-06, + "loss": 0.635, + "step": 1199 + }, + { + "epoch": 0.08, + "grad_norm": 0.8637309670448303, + "learning_rate": 9.944619195308701e-06, + "loss": 0.6163, + "step": 1200 + }, + { + "epoch": 0.08, + "grad_norm": 0.958526611328125, + "learning_rate": 9.944466809272642e-06, + "loss": 0.6275, + "step": 1201 + }, + { + "epoch": 0.08, + "grad_norm": 0.900386393070221, + "learning_rate": 9.944314215042286e-06, + "loss": 0.6159, + "step": 1202 + }, + { + "epoch": 0.08, + "grad_norm": 0.9339030385017395, + "learning_rate": 9.944161412624059e-06, + "loss": 0.6542, + "step": 1203 + }, + { + "epoch": 0.08, + "grad_norm": 0.9049733281135559, + "learning_rate": 9.944008402024395e-06, + "loss": 0.688, + "step": 1204 + }, + { + "epoch": 0.08, + "grad_norm": 1.030328392982483, + "learning_rate": 9.943855183249734e-06, + "loss": 0.6951, + "step": 1205 + }, + { + "epoch": 0.08, + "grad_norm": 0.9944655299186707, + "learning_rate": 9.94370175630653e-06, + "loss": 0.7017, + "step": 1206 + }, + { + "epoch": 0.08, + "grad_norm": 0.9828429222106934, + "learning_rate": 9.943548121201243e-06, + "loss": 0.6717, + "step": 1207 + }, + { + "epoch": 0.08, + "grad_norm": 0.9837692975997925, + "learning_rate": 9.943394277940344e-06, + "loss": 0.6156, + "step": 1208 + }, + { + "epoch": 0.08, + "grad_norm": 1.0148766040802002, + "learning_rate": 9.943240226530306e-06, + "loss": 0.7246, + "step": 1209 + }, + { + "epoch": 0.08, + "grad_norm": 1.0045223236083984, + "learning_rate": 9.94308596697762e-06, + "loss": 0.6648, + "step": 1210 + }, + { + "epoch": 0.08, + "grad_norm": 0.9967672824859619, + "learning_rate": 9.942931499288779e-06, + "loss": 0.6908, + "step": 1211 + }, + { + "epoch": 0.08, + "grad_norm": 0.931037187576294, + "learning_rate": 9.942776823470288e-06, + "loss": 0.6155, + "step": 1212 + }, + { + "epoch": 0.08, + "grad_norm": 0.9639803171157837, + "learning_rate": 9.94262193952866e-06, + "loss": 0.6644, + "step": 1213 + }, + { + "epoch": 0.08, + "grad_norm": 0.9461570978164673, + "learning_rate": 9.942466847470415e-06, + "loss": 0.6588, + "step": 1214 + }, + { + "epoch": 0.08, + "grad_norm": 0.8313033580780029, + "learning_rate": 9.942311547302087e-06, + "loss": 0.5843, + "step": 1215 + }, + { + "epoch": 0.08, + "grad_norm": 0.9389255046844482, + "learning_rate": 9.94215603903021e-06, + "loss": 0.65, + "step": 1216 + }, + { + "epoch": 0.08, + "grad_norm": 0.9553146362304688, + "learning_rate": 9.942000322661339e-06, + "loss": 0.6667, + "step": 1217 + }, + { + "epoch": 0.08, + "grad_norm": 0.9988784193992615, + "learning_rate": 9.941844398202022e-06, + "loss": 0.6178, + "step": 1218 + }, + { + "epoch": 0.08, + "grad_norm": 0.9160767197608948, + "learning_rate": 9.941688265658832e-06, + "loss": 0.6256, + "step": 1219 + }, + { + "epoch": 0.08, + "grad_norm": 0.9464467167854309, + "learning_rate": 9.941531925038337e-06, + "loss": 0.6528, + "step": 1220 + }, + { + "epoch": 0.08, + "grad_norm": 0.9644220471382141, + "learning_rate": 9.941375376347124e-06, + "loss": 0.6812, + "step": 1221 + }, + { + "epoch": 0.08, + "grad_norm": 0.9486405849456787, + "learning_rate": 9.941218619591783e-06, + "loss": 0.6619, + "step": 1222 + }, + { + "epoch": 0.08, + "grad_norm": 1.004610538482666, + "learning_rate": 9.941061654778917e-06, + "loss": 0.6183, + "step": 1223 + }, + { + "epoch": 0.08, + "grad_norm": 0.991166889667511, + "learning_rate": 9.940904481915132e-06, + "loss": 0.6616, + "step": 1224 + }, + { + "epoch": 0.08, + "grad_norm": 0.913848876953125, + "learning_rate": 9.940747101007049e-06, + "loss": 0.6455, + "step": 1225 + }, + { + "epoch": 0.08, + "grad_norm": 0.8956865072250366, + "learning_rate": 9.940589512061292e-06, + "loss": 0.6446, + "step": 1226 + }, + { + "epoch": 0.08, + "grad_norm": 0.9563295841217041, + "learning_rate": 9.940431715084498e-06, + "loss": 0.6442, + "step": 1227 + }, + { + "epoch": 0.08, + "grad_norm": 0.8902249336242676, + "learning_rate": 9.94027371008331e-06, + "loss": 0.63, + "step": 1228 + }, + { + "epoch": 0.08, + "grad_norm": 0.964384138584137, + "learning_rate": 9.940115497064383e-06, + "loss": 0.679, + "step": 1229 + }, + { + "epoch": 0.08, + "grad_norm": 1.0036017894744873, + "learning_rate": 9.939957076034379e-06, + "loss": 0.6915, + "step": 1230 + }, + { + "epoch": 0.08, + "grad_norm": 1.0034871101379395, + "learning_rate": 9.939798446999965e-06, + "loss": 0.6486, + "step": 1231 + }, + { + "epoch": 0.08, + "grad_norm": 0.8999437689781189, + "learning_rate": 9.939639609967825e-06, + "loss": 0.6805, + "step": 1232 + }, + { + "epoch": 0.08, + "grad_norm": 0.9058635830879211, + "learning_rate": 9.939480564944642e-06, + "loss": 0.6467, + "step": 1233 + }, + { + "epoch": 0.08, + "grad_norm": 0.9270319938659668, + "learning_rate": 9.939321311937117e-06, + "loss": 0.636, + "step": 1234 + }, + { + "epoch": 0.08, + "grad_norm": 0.9657304883003235, + "learning_rate": 9.939161850951955e-06, + "loss": 0.6824, + "step": 1235 + }, + { + "epoch": 0.08, + "grad_norm": 0.9326258301734924, + "learning_rate": 9.939002181995869e-06, + "loss": 0.6247, + "step": 1236 + }, + { + "epoch": 0.08, + "grad_norm": 0.8920637965202332, + "learning_rate": 9.938842305075583e-06, + "loss": 0.6373, + "step": 1237 + }, + { + "epoch": 0.08, + "grad_norm": 0.9409562349319458, + "learning_rate": 9.938682220197828e-06, + "loss": 0.6077, + "step": 1238 + }, + { + "epoch": 0.08, + "grad_norm": 0.8801417946815491, + "learning_rate": 9.938521927369344e-06, + "loss": 0.6141, + "step": 1239 + }, + { + "epoch": 0.08, + "grad_norm": 0.9063442945480347, + "learning_rate": 9.938361426596883e-06, + "loss": 0.6628, + "step": 1240 + }, + { + "epoch": 0.08, + "grad_norm": 0.9930490851402283, + "learning_rate": 9.938200717887202e-06, + "loss": 0.6316, + "step": 1241 + }, + { + "epoch": 0.08, + "grad_norm": 0.969541609287262, + "learning_rate": 9.938039801247066e-06, + "loss": 0.6512, + "step": 1242 + }, + { + "epoch": 0.08, + "grad_norm": 1.0176066160202026, + "learning_rate": 9.937878676683254e-06, + "loss": 0.706, + "step": 1243 + }, + { + "epoch": 0.08, + "grad_norm": 0.9394564032554626, + "learning_rate": 9.937717344202548e-06, + "loss": 0.5894, + "step": 1244 + }, + { + "epoch": 0.08, + "grad_norm": 0.9627434611320496, + "learning_rate": 9.93755580381174e-06, + "loss": 0.7024, + "step": 1245 + }, + { + "epoch": 0.08, + "grad_norm": 0.8768373131752014, + "learning_rate": 9.937394055517635e-06, + "loss": 0.6532, + "step": 1246 + }, + { + "epoch": 0.08, + "grad_norm": 0.941260039806366, + "learning_rate": 9.937232099327044e-06, + "loss": 0.5845, + "step": 1247 + }, + { + "epoch": 0.08, + "grad_norm": 0.9536455869674683, + "learning_rate": 9.937069935246782e-06, + "loss": 0.6578, + "step": 1248 + }, + { + "epoch": 0.08, + "grad_norm": 1.0150847434997559, + "learning_rate": 9.93690756328368e-06, + "loss": 0.6162, + "step": 1249 + }, + { + "epoch": 0.08, + "grad_norm": 0.8984355926513672, + "learning_rate": 9.936744983444576e-06, + "loss": 0.6779, + "step": 1250 + }, + { + "epoch": 0.08, + "grad_norm": 0.9334084987640381, + "learning_rate": 9.936582195736314e-06, + "loss": 0.6434, + "step": 1251 + }, + { + "epoch": 0.08, + "grad_norm": 0.9719336628913879, + "learning_rate": 9.936419200165748e-06, + "loss": 0.608, + "step": 1252 + }, + { + "epoch": 0.08, + "grad_norm": 0.9293937087059021, + "learning_rate": 9.936255996739743e-06, + "loss": 0.6417, + "step": 1253 + }, + { + "epoch": 0.08, + "grad_norm": 0.9545564651489258, + "learning_rate": 9.93609258546517e-06, + "loss": 0.68, + "step": 1254 + }, + { + "epoch": 0.08, + "grad_norm": 0.9403777122497559, + "learning_rate": 9.93592896634891e-06, + "loss": 0.6945, + "step": 1255 + }, + { + "epoch": 0.08, + "grad_norm": 1.053970456123352, + "learning_rate": 9.93576513939785e-06, + "loss": 0.6557, + "step": 1256 + }, + { + "epoch": 0.08, + "grad_norm": 0.9495333433151245, + "learning_rate": 9.935601104618892e-06, + "loss": 0.7352, + "step": 1257 + }, + { + "epoch": 0.08, + "grad_norm": 0.9049481153488159, + "learning_rate": 9.93543686201894e-06, + "loss": 0.664, + "step": 1258 + }, + { + "epoch": 0.08, + "grad_norm": 0.96930330991745, + "learning_rate": 9.935272411604913e-06, + "loss": 0.6956, + "step": 1259 + }, + { + "epoch": 0.08, + "grad_norm": 0.8767880201339722, + "learning_rate": 9.935107753383733e-06, + "loss": 0.6015, + "step": 1260 + }, + { + "epoch": 0.08, + "grad_norm": 0.9508046507835388, + "learning_rate": 9.93494288736233e-06, + "loss": 0.6727, + "step": 1261 + }, + { + "epoch": 0.08, + "grad_norm": 0.9858577847480774, + "learning_rate": 9.934777813547653e-06, + "loss": 0.6565, + "step": 1262 + }, + { + "epoch": 0.08, + "grad_norm": 0.9373133778572083, + "learning_rate": 9.934612531946648e-06, + "loss": 0.6131, + "step": 1263 + }, + { + "epoch": 0.08, + "grad_norm": 0.9406293630599976, + "learning_rate": 9.934447042566275e-06, + "loss": 0.6567, + "step": 1264 + }, + { + "epoch": 0.08, + "grad_norm": 0.990612804889679, + "learning_rate": 9.934281345413504e-06, + "loss": 0.6956, + "step": 1265 + }, + { + "epoch": 0.08, + "grad_norm": 0.9480006694793701, + "learning_rate": 9.934115440495311e-06, + "loss": 0.6838, + "step": 1266 + }, + { + "epoch": 0.08, + "grad_norm": 0.9584172964096069, + "learning_rate": 9.93394932781868e-06, + "loss": 0.6654, + "step": 1267 + }, + { + "epoch": 0.08, + "grad_norm": 0.9526914954185486, + "learning_rate": 9.933783007390608e-06, + "loss": 0.6718, + "step": 1268 + }, + { + "epoch": 0.08, + "grad_norm": 0.9477076530456543, + "learning_rate": 9.933616479218095e-06, + "loss": 0.6368, + "step": 1269 + }, + { + "epoch": 0.08, + "grad_norm": 0.9243208765983582, + "learning_rate": 9.933449743308155e-06, + "loss": 0.6717, + "step": 1270 + }, + { + "epoch": 0.08, + "grad_norm": 0.9876498579978943, + "learning_rate": 9.93328279966781e-06, + "loss": 0.6841, + "step": 1271 + }, + { + "epoch": 0.08, + "grad_norm": 0.9659183025360107, + "learning_rate": 9.933115648304087e-06, + "loss": 0.6039, + "step": 1272 + }, + { + "epoch": 0.08, + "grad_norm": 0.8511553406715393, + "learning_rate": 9.932948289224025e-06, + "loss": 0.6388, + "step": 1273 + }, + { + "epoch": 0.08, + "grad_norm": 0.9214879274368286, + "learning_rate": 9.932780722434671e-06, + "loss": 0.6694, + "step": 1274 + }, + { + "epoch": 0.08, + "grad_norm": 0.8896194696426392, + "learning_rate": 9.932612947943084e-06, + "loss": 0.6285, + "step": 1275 + }, + { + "epoch": 0.08, + "grad_norm": 1.1229159832000732, + "learning_rate": 9.932444965756321e-06, + "loss": 0.6201, + "step": 1276 + }, + { + "epoch": 0.08, + "grad_norm": 0.9542286396026611, + "learning_rate": 9.93227677588146e-06, + "loss": 0.6795, + "step": 1277 + }, + { + "epoch": 0.08, + "grad_norm": 0.9124268293380737, + "learning_rate": 9.932108378325582e-06, + "loss": 0.6349, + "step": 1278 + }, + { + "epoch": 0.08, + "grad_norm": 0.8908014893531799, + "learning_rate": 9.931939773095779e-06, + "loss": 0.6293, + "step": 1279 + }, + { + "epoch": 0.08, + "grad_norm": 0.9545292258262634, + "learning_rate": 9.93177096019915e-06, + "loss": 0.6677, + "step": 1280 + }, + { + "epoch": 0.08, + "grad_norm": 0.9467611908912659, + "learning_rate": 9.9316019396428e-06, + "loss": 0.7047, + "step": 1281 + }, + { + "epoch": 0.08, + "grad_norm": 0.8944831490516663, + "learning_rate": 9.931432711433849e-06, + "loss": 0.6674, + "step": 1282 + }, + { + "epoch": 0.08, + "grad_norm": 0.9431514739990234, + "learning_rate": 9.93126327557942e-06, + "loss": 0.7063, + "step": 1283 + }, + { + "epoch": 0.08, + "grad_norm": 0.9257122874259949, + "learning_rate": 9.931093632086651e-06, + "loss": 0.6482, + "step": 1284 + }, + { + "epoch": 0.08, + "grad_norm": 0.916651725769043, + "learning_rate": 9.930923780962683e-06, + "loss": 0.6141, + "step": 1285 + }, + { + "epoch": 0.08, + "grad_norm": 0.9624120593070984, + "learning_rate": 9.930753722214668e-06, + "loss": 0.6743, + "step": 1286 + }, + { + "epoch": 0.08, + "grad_norm": 0.8737559914588928, + "learning_rate": 9.930583455849766e-06, + "loss": 0.5961, + "step": 1287 + }, + { + "epoch": 0.08, + "grad_norm": 1.0165629386901855, + "learning_rate": 9.930412981875148e-06, + "loss": 0.6855, + "step": 1288 + }, + { + "epoch": 0.08, + "grad_norm": 0.9609097242355347, + "learning_rate": 9.93024230029799e-06, + "loss": 0.7235, + "step": 1289 + }, + { + "epoch": 0.08, + "grad_norm": 0.9250974059104919, + "learning_rate": 9.93007141112548e-06, + "loss": 0.6568, + "step": 1290 + }, + { + "epoch": 0.08, + "grad_norm": 0.9859768748283386, + "learning_rate": 9.929900314364813e-06, + "loss": 0.6838, + "step": 1291 + }, + { + "epoch": 0.08, + "grad_norm": 0.9288014769554138, + "learning_rate": 9.929729010023195e-06, + "loss": 0.6676, + "step": 1292 + }, + { + "epoch": 0.08, + "grad_norm": 0.9711349010467529, + "learning_rate": 9.929557498107836e-06, + "loss": 0.6951, + "step": 1293 + }, + { + "epoch": 0.08, + "grad_norm": 0.9767155051231384, + "learning_rate": 9.929385778625959e-06, + "loss": 0.6707, + "step": 1294 + }, + { + "epoch": 0.08, + "grad_norm": 0.9217318296432495, + "learning_rate": 9.929213851584798e-06, + "loss": 0.6735, + "step": 1295 + }, + { + "epoch": 0.08, + "grad_norm": 0.9826382398605347, + "learning_rate": 9.929041716991587e-06, + "loss": 0.6452, + "step": 1296 + }, + { + "epoch": 0.08, + "grad_norm": 0.9821561574935913, + "learning_rate": 9.928869374853576e-06, + "loss": 0.6308, + "step": 1297 + }, + { + "epoch": 0.08, + "grad_norm": 0.9427945613861084, + "learning_rate": 9.928696825178021e-06, + "loss": 0.6526, + "step": 1298 + }, + { + "epoch": 0.08, + "grad_norm": 0.9248101711273193, + "learning_rate": 9.92852406797219e-06, + "loss": 0.6777, + "step": 1299 + }, + { + "epoch": 0.08, + "grad_norm": 1.0439354181289673, + "learning_rate": 9.928351103243356e-06, + "loss": 0.693, + "step": 1300 + }, + { + "epoch": 0.08, + "grad_norm": 0.8999105095863342, + "learning_rate": 9.928177930998801e-06, + "loss": 0.6325, + "step": 1301 + }, + { + "epoch": 0.08, + "grad_norm": 0.9729776382446289, + "learning_rate": 9.928004551245818e-06, + "loss": 0.6127, + "step": 1302 + }, + { + "epoch": 0.08, + "grad_norm": 0.9380604028701782, + "learning_rate": 9.927830963991704e-06, + "loss": 0.6486, + "step": 1303 + }, + { + "epoch": 0.08, + "grad_norm": 0.9806588888168335, + "learning_rate": 9.927657169243773e-06, + "loss": 0.7019, + "step": 1304 + }, + { + "epoch": 0.08, + "grad_norm": 0.9600833654403687, + "learning_rate": 9.92748316700934e-06, + "loss": 0.7007, + "step": 1305 + }, + { + "epoch": 0.08, + "grad_norm": 0.8898829817771912, + "learning_rate": 9.927308957295733e-06, + "loss": 0.6332, + "step": 1306 + }, + { + "epoch": 0.08, + "grad_norm": 0.9268112182617188, + "learning_rate": 9.927134540110286e-06, + "loss": 0.6576, + "step": 1307 + }, + { + "epoch": 0.08, + "grad_norm": 0.8421509861946106, + "learning_rate": 9.926959915460344e-06, + "loss": 0.6011, + "step": 1308 + }, + { + "epoch": 0.08, + "grad_norm": 0.8830382823944092, + "learning_rate": 9.926785083353258e-06, + "loss": 0.5837, + "step": 1309 + }, + { + "epoch": 0.08, + "grad_norm": 0.9646912217140198, + "learning_rate": 9.926610043796394e-06, + "loss": 0.6313, + "step": 1310 + }, + { + "epoch": 0.08, + "grad_norm": 0.9010607004165649, + "learning_rate": 9.926434796797117e-06, + "loss": 0.622, + "step": 1311 + }, + { + "epoch": 0.08, + "grad_norm": 0.976517379283905, + "learning_rate": 9.92625934236281e-06, + "loss": 0.6945, + "step": 1312 + }, + { + "epoch": 0.08, + "grad_norm": 1.0138803720474243, + "learning_rate": 9.92608368050086e-06, + "loss": 0.6774, + "step": 1313 + }, + { + "epoch": 0.08, + "grad_norm": 0.9496868848800659, + "learning_rate": 9.925907811218661e-06, + "loss": 0.6395, + "step": 1314 + }, + { + "epoch": 0.08, + "grad_norm": 0.9517762064933777, + "learning_rate": 9.925731734523621e-06, + "loss": 0.6851, + "step": 1315 + }, + { + "epoch": 0.08, + "grad_norm": 0.8854117393493652, + "learning_rate": 9.925555450423153e-06, + "loss": 0.6039, + "step": 1316 + }, + { + "epoch": 0.08, + "grad_norm": 0.975888729095459, + "learning_rate": 9.92537895892468e-06, + "loss": 0.6392, + "step": 1317 + }, + { + "epoch": 0.08, + "grad_norm": 0.9728052616119385, + "learning_rate": 9.925202260035632e-06, + "loss": 0.6934, + "step": 1318 + }, + { + "epoch": 0.08, + "grad_norm": 0.8676881194114685, + "learning_rate": 9.925025353763452e-06, + "loss": 0.608, + "step": 1319 + }, + { + "epoch": 0.08, + "grad_norm": 0.9743890762329102, + "learning_rate": 9.924848240115585e-06, + "loss": 0.6726, + "step": 1320 + }, + { + "epoch": 0.08, + "grad_norm": 0.8968915939331055, + "learning_rate": 9.924670919099493e-06, + "loss": 0.6049, + "step": 1321 + }, + { + "epoch": 0.08, + "grad_norm": 0.8923019766807556, + "learning_rate": 9.92449339072264e-06, + "loss": 0.6528, + "step": 1322 + }, + { + "epoch": 0.08, + "grad_norm": 0.9763730764389038, + "learning_rate": 9.924315654992501e-06, + "loss": 0.6355, + "step": 1323 + }, + { + "epoch": 0.08, + "grad_norm": 0.9337408542633057, + "learning_rate": 9.924137711916559e-06, + "loss": 0.6283, + "step": 1324 + }, + { + "epoch": 0.08, + "grad_norm": 0.9374693036079407, + "learning_rate": 9.92395956150231e-06, + "loss": 0.6538, + "step": 1325 + }, + { + "epoch": 0.08, + "grad_norm": 1.0116811990737915, + "learning_rate": 9.923781203757253e-06, + "loss": 0.6373, + "step": 1326 + }, + { + "epoch": 0.08, + "grad_norm": 0.9618993997573853, + "learning_rate": 9.923602638688897e-06, + "loss": 0.6516, + "step": 1327 + }, + { + "epoch": 0.08, + "grad_norm": 0.9200368523597717, + "learning_rate": 9.923423866304761e-06, + "loss": 0.6495, + "step": 1328 + }, + { + "epoch": 0.08, + "grad_norm": 0.9342839121818542, + "learning_rate": 9.923244886612375e-06, + "loss": 0.6653, + "step": 1329 + }, + { + "epoch": 0.08, + "grad_norm": 0.9107709527015686, + "learning_rate": 9.923065699619273e-06, + "loss": 0.6235, + "step": 1330 + }, + { + "epoch": 0.08, + "grad_norm": 0.9179040193557739, + "learning_rate": 9.922886305333e-06, + "loss": 0.6449, + "step": 1331 + }, + { + "epoch": 0.08, + "grad_norm": 0.9737808108329773, + "learning_rate": 9.922706703761111e-06, + "loss": 0.6355, + "step": 1332 + }, + { + "epoch": 0.08, + "grad_norm": 0.9478714466094971, + "learning_rate": 9.922526894911166e-06, + "loss": 0.6301, + "step": 1333 + }, + { + "epoch": 0.08, + "grad_norm": 0.9034336805343628, + "learning_rate": 9.922346878790739e-06, + "loss": 0.6711, + "step": 1334 + }, + { + "epoch": 0.08, + "grad_norm": 0.9243572354316711, + "learning_rate": 9.922166655407408e-06, + "loss": 0.6703, + "step": 1335 + }, + { + "epoch": 0.08, + "grad_norm": 0.9634230136871338, + "learning_rate": 9.921986224768762e-06, + "loss": 0.629, + "step": 1336 + }, + { + "epoch": 0.08, + "grad_norm": 0.975191593170166, + "learning_rate": 9.9218055868824e-06, + "loss": 0.6973, + "step": 1337 + }, + { + "epoch": 0.08, + "grad_norm": 0.9010855555534363, + "learning_rate": 9.921624741755924e-06, + "loss": 0.6279, + "step": 1338 + }, + { + "epoch": 0.08, + "grad_norm": 0.9838567972183228, + "learning_rate": 9.921443689396952e-06, + "loss": 0.6527, + "step": 1339 + }, + { + "epoch": 0.08, + "grad_norm": 0.9376393556594849, + "learning_rate": 9.921262429813107e-06, + "loss": 0.5999, + "step": 1340 + }, + { + "epoch": 0.08, + "grad_norm": 1.01847505569458, + "learning_rate": 9.921080963012021e-06, + "loss": 0.637, + "step": 1341 + }, + { + "epoch": 0.09, + "grad_norm": 0.9339948892593384, + "learning_rate": 9.920899289001335e-06, + "loss": 0.7109, + "step": 1342 + }, + { + "epoch": 0.09, + "grad_norm": 0.8605727553367615, + "learning_rate": 9.9207174077887e-06, + "loss": 0.6148, + "step": 1343 + }, + { + "epoch": 0.09, + "grad_norm": 0.9776952862739563, + "learning_rate": 9.92053531938177e-06, + "loss": 0.6101, + "step": 1344 + }, + { + "epoch": 0.09, + "grad_norm": 0.9384501576423645, + "learning_rate": 9.920353023788216e-06, + "loss": 0.6861, + "step": 1345 + }, + { + "epoch": 0.09, + "grad_norm": 1.0112212896347046, + "learning_rate": 9.920170521015714e-06, + "loss": 0.6201, + "step": 1346 + }, + { + "epoch": 0.09, + "grad_norm": 0.993434488773346, + "learning_rate": 9.919987811071946e-06, + "loss": 0.7198, + "step": 1347 + }, + { + "epoch": 0.09, + "grad_norm": 1.0135927200317383, + "learning_rate": 9.919804893964607e-06, + "loss": 0.7262, + "step": 1348 + }, + { + "epoch": 0.09, + "grad_norm": 0.9492275714874268, + "learning_rate": 9.9196217697014e-06, + "loss": 0.6953, + "step": 1349 + }, + { + "epoch": 0.09, + "grad_norm": 0.9331912398338318, + "learning_rate": 9.919438438290032e-06, + "loss": 0.6936, + "step": 1350 + }, + { + "epoch": 0.09, + "grad_norm": 0.9214736223220825, + "learning_rate": 9.919254899738227e-06, + "loss": 0.6333, + "step": 1351 + }, + { + "epoch": 0.09, + "grad_norm": 0.921393096446991, + "learning_rate": 9.91907115405371e-06, + "loss": 0.6357, + "step": 1352 + }, + { + "epoch": 0.09, + "grad_norm": 0.867919385433197, + "learning_rate": 9.918887201244219e-06, + "loss": 0.6465, + "step": 1353 + }, + { + "epoch": 0.09, + "grad_norm": 0.9736660718917847, + "learning_rate": 9.918703041317498e-06, + "loss": 0.6553, + "step": 1354 + }, + { + "epoch": 0.09, + "grad_norm": 0.9158203601837158, + "learning_rate": 9.918518674281305e-06, + "loss": 0.6443, + "step": 1355 + }, + { + "epoch": 0.09, + "grad_norm": 0.9227981567382812, + "learning_rate": 9.9183341001434e-06, + "loss": 0.6795, + "step": 1356 + }, + { + "epoch": 0.09, + "grad_norm": 0.9615574479103088, + "learning_rate": 9.918149318911557e-06, + "loss": 0.6565, + "step": 1357 + }, + { + "epoch": 0.09, + "grad_norm": 0.9969097375869751, + "learning_rate": 9.917964330593553e-06, + "loss": 0.653, + "step": 1358 + }, + { + "epoch": 0.09, + "grad_norm": 0.9658487439155579, + "learning_rate": 9.917779135197181e-06, + "loss": 0.6578, + "step": 1359 + }, + { + "epoch": 0.09, + "grad_norm": 0.9004530906677246, + "learning_rate": 9.917593732730236e-06, + "loss": 0.6623, + "step": 1360 + }, + { + "epoch": 0.09, + "grad_norm": 1.01588773727417, + "learning_rate": 9.917408123200527e-06, + "loss": 0.6957, + "step": 1361 + }, + { + "epoch": 0.09, + "grad_norm": 0.9513044357299805, + "learning_rate": 9.917222306615868e-06, + "loss": 0.5935, + "step": 1362 + }, + { + "epoch": 0.09, + "grad_norm": 0.9240687489509583, + "learning_rate": 9.917036282984084e-06, + "loss": 0.6705, + "step": 1363 + }, + { + "epoch": 0.09, + "grad_norm": 0.9190331697463989, + "learning_rate": 9.916850052313007e-06, + "loss": 0.6988, + "step": 1364 + }, + { + "epoch": 0.09, + "grad_norm": 0.9454796314239502, + "learning_rate": 9.916663614610478e-06, + "loss": 0.6126, + "step": 1365 + }, + { + "epoch": 0.09, + "grad_norm": 0.930030345916748, + "learning_rate": 9.916476969884348e-06, + "loss": 0.6659, + "step": 1366 + }, + { + "epoch": 0.09, + "grad_norm": 0.8999435901641846, + "learning_rate": 9.916290118142478e-06, + "loss": 0.6424, + "step": 1367 + }, + { + "epoch": 0.09, + "grad_norm": 0.9197795987129211, + "learning_rate": 9.91610305939273e-06, + "loss": 0.6463, + "step": 1368 + }, + { + "epoch": 0.09, + "grad_norm": 0.8855273127555847, + "learning_rate": 9.915915793642987e-06, + "loss": 0.6438, + "step": 1369 + }, + { + "epoch": 0.09, + "grad_norm": 0.992560863494873, + "learning_rate": 9.91572832090113e-06, + "loss": 0.6389, + "step": 1370 + }, + { + "epoch": 0.09, + "grad_norm": 0.9714159965515137, + "learning_rate": 9.915540641175055e-06, + "loss": 0.6747, + "step": 1371 + }, + { + "epoch": 0.09, + "grad_norm": 0.9169105291366577, + "learning_rate": 9.915352754472662e-06, + "loss": 0.6518, + "step": 1372 + }, + { + "epoch": 0.09, + "grad_norm": 0.9008778929710388, + "learning_rate": 9.915164660801865e-06, + "loss": 0.6317, + "step": 1373 + }, + { + "epoch": 0.09, + "grad_norm": 0.8850517272949219, + "learning_rate": 9.914976360170583e-06, + "loss": 0.6009, + "step": 1374 + }, + { + "epoch": 0.09, + "grad_norm": 0.9355595707893372, + "learning_rate": 9.914787852586744e-06, + "loss": 0.6217, + "step": 1375 + }, + { + "epoch": 0.09, + "grad_norm": 0.9078220129013062, + "learning_rate": 9.914599138058285e-06, + "loss": 0.6433, + "step": 1376 + }, + { + "epoch": 0.09, + "grad_norm": 0.9317836761474609, + "learning_rate": 9.914410216593154e-06, + "loss": 0.6114, + "step": 1377 + }, + { + "epoch": 0.09, + "grad_norm": 0.941766083240509, + "learning_rate": 9.914221088199304e-06, + "loss": 0.6318, + "step": 1378 + }, + { + "epoch": 0.09, + "grad_norm": 0.9302542805671692, + "learning_rate": 9.9140317528847e-06, + "loss": 0.6385, + "step": 1379 + }, + { + "epoch": 0.09, + "grad_norm": 0.9480794072151184, + "learning_rate": 9.913842210657314e-06, + "loss": 0.6457, + "step": 1380 + }, + { + "epoch": 0.09, + "grad_norm": 0.9591321349143982, + "learning_rate": 9.913652461525126e-06, + "loss": 0.6889, + "step": 1381 + }, + { + "epoch": 0.09, + "grad_norm": 0.9544585347175598, + "learning_rate": 9.913462505496126e-06, + "loss": 0.6533, + "step": 1382 + }, + { + "epoch": 0.09, + "grad_norm": 0.9712944030761719, + "learning_rate": 9.913272342578312e-06, + "loss": 0.6232, + "step": 1383 + }, + { + "epoch": 0.09, + "grad_norm": 0.9564849138259888, + "learning_rate": 9.913081972779692e-06, + "loss": 0.6481, + "step": 1384 + }, + { + "epoch": 0.09, + "grad_norm": 0.9358051419258118, + "learning_rate": 9.912891396108281e-06, + "loss": 0.6599, + "step": 1385 + }, + { + "epoch": 0.09, + "grad_norm": 0.9481112360954285, + "learning_rate": 9.912700612572106e-06, + "loss": 0.6148, + "step": 1386 + }, + { + "epoch": 0.09, + "grad_norm": 0.9976912140846252, + "learning_rate": 9.912509622179197e-06, + "loss": 0.6802, + "step": 1387 + }, + { + "epoch": 0.09, + "grad_norm": 0.9644153118133545, + "learning_rate": 9.912318424937596e-06, + "loss": 0.674, + "step": 1388 + }, + { + "epoch": 0.09, + "grad_norm": 0.9347633719444275, + "learning_rate": 9.912127020855356e-06, + "loss": 0.6715, + "step": 1389 + }, + { + "epoch": 0.09, + "grad_norm": 0.9877498149871826, + "learning_rate": 9.911935409940536e-06, + "loss": 0.6818, + "step": 1390 + }, + { + "epoch": 0.09, + "grad_norm": 0.999534010887146, + "learning_rate": 9.911743592201203e-06, + "loss": 0.6524, + "step": 1391 + }, + { + "epoch": 0.09, + "grad_norm": 0.9707819223403931, + "learning_rate": 9.911551567645433e-06, + "loss": 0.6186, + "step": 1392 + }, + { + "epoch": 0.09, + "grad_norm": 0.9409770965576172, + "learning_rate": 9.911359336281312e-06, + "loss": 0.6754, + "step": 1393 + }, + { + "epoch": 0.09, + "grad_norm": 0.9744927883148193, + "learning_rate": 9.911166898116935e-06, + "loss": 0.6842, + "step": 1394 + }, + { + "epoch": 0.09, + "grad_norm": 0.8977870941162109, + "learning_rate": 9.910974253160405e-06, + "loss": 0.6315, + "step": 1395 + }, + { + "epoch": 0.09, + "grad_norm": 0.9655022025108337, + "learning_rate": 9.910781401419835e-06, + "loss": 0.6493, + "step": 1396 + }, + { + "epoch": 0.09, + "grad_norm": 0.9334004521369934, + "learning_rate": 9.910588342903342e-06, + "loss": 0.6679, + "step": 1397 + }, + { + "epoch": 0.09, + "grad_norm": 0.9170674085617065, + "learning_rate": 9.910395077619057e-06, + "loss": 0.652, + "step": 1398 + }, + { + "epoch": 0.09, + "grad_norm": 0.8829054832458496, + "learning_rate": 9.910201605575116e-06, + "loss": 0.5469, + "step": 1399 + }, + { + "epoch": 0.09, + "grad_norm": 0.9742071032524109, + "learning_rate": 9.910007926779669e-06, + "loss": 0.6423, + "step": 1400 + }, + { + "epoch": 0.09, + "grad_norm": 0.9407263398170471, + "learning_rate": 9.909814041240867e-06, + "loss": 0.7067, + "step": 1401 + }, + { + "epoch": 0.09, + "grad_norm": 0.9770819544792175, + "learning_rate": 9.909619948966875e-06, + "loss": 0.6211, + "step": 1402 + }, + { + "epoch": 0.09, + "grad_norm": 0.8862954378128052, + "learning_rate": 9.909425649965869e-06, + "loss": 0.6222, + "step": 1403 + }, + { + "epoch": 0.09, + "grad_norm": 0.8932839035987854, + "learning_rate": 9.909231144246026e-06, + "loss": 0.6406, + "step": 1404 + }, + { + "epoch": 0.09, + "grad_norm": 0.972990870475769, + "learning_rate": 9.909036431815538e-06, + "loss": 0.6454, + "step": 1405 + }, + { + "epoch": 0.09, + "grad_norm": 0.9712089896202087, + "learning_rate": 9.908841512682602e-06, + "loss": 0.636, + "step": 1406 + }, + { + "epoch": 0.09, + "grad_norm": 0.9432918429374695, + "learning_rate": 9.908646386855427e-06, + "loss": 0.639, + "step": 1407 + }, + { + "epoch": 0.09, + "grad_norm": 0.896690845489502, + "learning_rate": 9.90845105434223e-06, + "loss": 0.6778, + "step": 1408 + }, + { + "epoch": 0.09, + "grad_norm": 0.8984845280647278, + "learning_rate": 9.908255515151232e-06, + "loss": 0.6641, + "step": 1409 + }, + { + "epoch": 0.09, + "grad_norm": 0.9165180921554565, + "learning_rate": 9.90805976929067e-06, + "loss": 0.6136, + "step": 1410 + }, + { + "epoch": 0.09, + "grad_norm": 0.9825322031974792, + "learning_rate": 9.907863816768786e-06, + "loss": 0.68, + "step": 1411 + }, + { + "epoch": 0.09, + "grad_norm": 0.9669419527053833, + "learning_rate": 9.907667657593828e-06, + "loss": 0.6393, + "step": 1412 + }, + { + "epoch": 0.09, + "grad_norm": 0.9365913271903992, + "learning_rate": 9.907471291774058e-06, + "loss": 0.6369, + "step": 1413 + }, + { + "epoch": 0.09, + "grad_norm": 0.9577059745788574, + "learning_rate": 9.907274719317746e-06, + "loss": 0.6428, + "step": 1414 + }, + { + "epoch": 0.09, + "grad_norm": 0.9230369329452515, + "learning_rate": 9.907077940233162e-06, + "loss": 0.6102, + "step": 1415 + }, + { + "epoch": 0.09, + "grad_norm": 1.0820754766464233, + "learning_rate": 9.906880954528601e-06, + "loss": 0.7506, + "step": 1416 + }, + { + "epoch": 0.09, + "grad_norm": 0.98191237449646, + "learning_rate": 9.90668376221235e-06, + "loss": 0.6625, + "step": 1417 + }, + { + "epoch": 0.09, + "grad_norm": 1.2556596994400024, + "learning_rate": 9.906486363292718e-06, + "loss": 0.6521, + "step": 1418 + }, + { + "epoch": 0.09, + "grad_norm": 0.9278602004051208, + "learning_rate": 9.906288757778012e-06, + "loss": 0.6897, + "step": 1419 + }, + { + "epoch": 0.09, + "grad_norm": 0.932048499584198, + "learning_rate": 9.906090945676552e-06, + "loss": 0.6475, + "step": 1420 + }, + { + "epoch": 0.09, + "grad_norm": 0.9188955426216125, + "learning_rate": 9.905892926996672e-06, + "loss": 0.6607, + "step": 1421 + }, + { + "epoch": 0.09, + "grad_norm": 0.8783012628555298, + "learning_rate": 9.905694701746706e-06, + "loss": 0.67, + "step": 1422 + }, + { + "epoch": 0.09, + "grad_norm": 0.9844834208488464, + "learning_rate": 9.905496269935002e-06, + "loss": 0.6815, + "step": 1423 + }, + { + "epoch": 0.09, + "grad_norm": 0.8767088055610657, + "learning_rate": 9.905297631569915e-06, + "loss": 0.6505, + "step": 1424 + }, + { + "epoch": 0.09, + "grad_norm": 0.9569482207298279, + "learning_rate": 9.905098786659809e-06, + "loss": 0.6456, + "step": 1425 + }, + { + "epoch": 0.09, + "grad_norm": 0.991873562335968, + "learning_rate": 9.904899735213058e-06, + "loss": 0.6747, + "step": 1426 + }, + { + "epoch": 0.09, + "grad_norm": 0.8848119974136353, + "learning_rate": 9.90470047723804e-06, + "loss": 0.6224, + "step": 1427 + }, + { + "epoch": 0.09, + "grad_norm": 0.8410341143608093, + "learning_rate": 9.904501012743149e-06, + "loss": 0.5621, + "step": 1428 + }, + { + "epoch": 0.09, + "grad_norm": 0.951511561870575, + "learning_rate": 9.90430134173678e-06, + "loss": 0.651, + "step": 1429 + }, + { + "epoch": 0.09, + "grad_norm": 0.93990159034729, + "learning_rate": 9.904101464227342e-06, + "loss": 0.6245, + "step": 1430 + }, + { + "epoch": 0.09, + "grad_norm": 0.9799292087554932, + "learning_rate": 9.903901380223254e-06, + "loss": 0.6582, + "step": 1431 + }, + { + "epoch": 0.09, + "grad_norm": 0.9411140084266663, + "learning_rate": 9.903701089732937e-06, + "loss": 0.6366, + "step": 1432 + }, + { + "epoch": 0.09, + "grad_norm": 0.8953483700752258, + "learning_rate": 9.903500592764825e-06, + "loss": 0.5984, + "step": 1433 + }, + { + "epoch": 0.09, + "grad_norm": 0.9429217576980591, + "learning_rate": 9.903299889327362e-06, + "loss": 0.6379, + "step": 1434 + }, + { + "epoch": 0.09, + "grad_norm": 0.9783498644828796, + "learning_rate": 9.903098979428998e-06, + "loss": 0.6302, + "step": 1435 + }, + { + "epoch": 0.09, + "grad_norm": 0.9483500719070435, + "learning_rate": 9.902897863078192e-06, + "loss": 0.5857, + "step": 1436 + }, + { + "epoch": 0.09, + "grad_norm": 0.9564317464828491, + "learning_rate": 9.902696540283414e-06, + "loss": 0.6902, + "step": 1437 + }, + { + "epoch": 0.09, + "grad_norm": 0.8706897497177124, + "learning_rate": 9.90249501105314e-06, + "loss": 0.5932, + "step": 1438 + }, + { + "epoch": 0.09, + "grad_norm": 0.913366436958313, + "learning_rate": 9.902293275395854e-06, + "loss": 0.6527, + "step": 1439 + }, + { + "epoch": 0.09, + "grad_norm": 0.9667909741401672, + "learning_rate": 9.902091333320053e-06, + "loss": 0.6133, + "step": 1440 + }, + { + "epoch": 0.09, + "grad_norm": 0.9342280626296997, + "learning_rate": 9.90188918483424e-06, + "loss": 0.6631, + "step": 1441 + }, + { + "epoch": 0.09, + "grad_norm": 0.9814968705177307, + "learning_rate": 9.901686829946924e-06, + "loss": 0.6715, + "step": 1442 + }, + { + "epoch": 0.09, + "grad_norm": 0.9427680373191833, + "learning_rate": 9.901484268666628e-06, + "loss": 0.6623, + "step": 1443 + }, + { + "epoch": 0.09, + "grad_norm": 0.9245345592498779, + "learning_rate": 9.90128150100188e-06, + "loss": 0.7061, + "step": 1444 + }, + { + "epoch": 0.09, + "grad_norm": 0.9452770948410034, + "learning_rate": 9.90107852696122e-06, + "loss": 0.6333, + "step": 1445 + }, + { + "epoch": 0.09, + "grad_norm": 1.0394012928009033, + "learning_rate": 9.900875346553192e-06, + "loss": 0.675, + "step": 1446 + }, + { + "epoch": 0.09, + "grad_norm": 0.9171515107154846, + "learning_rate": 9.900671959786352e-06, + "loss": 0.6535, + "step": 1447 + }, + { + "epoch": 0.09, + "grad_norm": 0.9841604828834534, + "learning_rate": 9.900468366669264e-06, + "loss": 0.6465, + "step": 1448 + }, + { + "epoch": 0.09, + "grad_norm": 0.9761607050895691, + "learning_rate": 9.900264567210501e-06, + "loss": 0.6161, + "step": 1449 + }, + { + "epoch": 0.09, + "grad_norm": 0.8922892808914185, + "learning_rate": 9.900060561418643e-06, + "loss": 0.6091, + "step": 1450 + }, + { + "epoch": 0.09, + "grad_norm": 0.984734833240509, + "learning_rate": 9.89985634930228e-06, + "loss": 0.6684, + "step": 1451 + }, + { + "epoch": 0.09, + "grad_norm": 0.9379397630691528, + "learning_rate": 9.899651930870014e-06, + "loss": 0.6442, + "step": 1452 + }, + { + "epoch": 0.09, + "grad_norm": 0.9366482496261597, + "learning_rate": 9.899447306130447e-06, + "loss": 0.6081, + "step": 1453 + }, + { + "epoch": 0.09, + "grad_norm": 1.0072358846664429, + "learning_rate": 9.8992424750922e-06, + "loss": 0.6793, + "step": 1454 + }, + { + "epoch": 0.09, + "grad_norm": 0.9230679869651794, + "learning_rate": 9.899037437763894e-06, + "loss": 0.6299, + "step": 1455 + }, + { + "epoch": 0.09, + "grad_norm": 0.9557128548622131, + "learning_rate": 9.898832194154165e-06, + "loss": 0.6412, + "step": 1456 + }, + { + "epoch": 0.09, + "grad_norm": 0.8996137976646423, + "learning_rate": 9.898626744271654e-06, + "loss": 0.5912, + "step": 1457 + }, + { + "epoch": 0.09, + "grad_norm": 0.9441683292388916, + "learning_rate": 9.898421088125012e-06, + "loss": 0.6139, + "step": 1458 + }, + { + "epoch": 0.09, + "grad_norm": 0.9505671858787537, + "learning_rate": 9.898215225722899e-06, + "loss": 0.6811, + "step": 1459 + }, + { + "epoch": 0.09, + "grad_norm": 1.0145865678787231, + "learning_rate": 9.898009157073982e-06, + "loss": 0.6746, + "step": 1460 + }, + { + "epoch": 0.09, + "grad_norm": 0.9370318651199341, + "learning_rate": 9.897802882186938e-06, + "loss": 0.6384, + "step": 1461 + }, + { + "epoch": 0.09, + "grad_norm": 0.890518307685852, + "learning_rate": 9.897596401070452e-06, + "loss": 0.6382, + "step": 1462 + }, + { + "epoch": 0.09, + "grad_norm": 0.8634839653968811, + "learning_rate": 9.89738971373322e-06, + "loss": 0.6107, + "step": 1463 + }, + { + "epoch": 0.09, + "grad_norm": 0.9820486307144165, + "learning_rate": 9.897182820183944e-06, + "loss": 0.6614, + "step": 1464 + }, + { + "epoch": 0.09, + "grad_norm": 0.9493029117584229, + "learning_rate": 9.896975720431334e-06, + "loss": 0.6184, + "step": 1465 + }, + { + "epoch": 0.09, + "grad_norm": 0.9800208210945129, + "learning_rate": 9.896768414484115e-06, + "loss": 0.6639, + "step": 1466 + }, + { + "epoch": 0.09, + "grad_norm": 0.9691864848136902, + "learning_rate": 9.896560902351009e-06, + "loss": 0.6536, + "step": 1467 + }, + { + "epoch": 0.09, + "grad_norm": 0.964766263961792, + "learning_rate": 9.89635318404076e-06, + "loss": 0.6796, + "step": 1468 + }, + { + "epoch": 0.09, + "grad_norm": 0.8742868900299072, + "learning_rate": 9.896145259562111e-06, + "loss": 0.6627, + "step": 1469 + }, + { + "epoch": 0.09, + "grad_norm": 0.9007435441017151, + "learning_rate": 9.895937128923816e-06, + "loss": 0.6388, + "step": 1470 + }, + { + "epoch": 0.09, + "grad_norm": 0.931812584400177, + "learning_rate": 9.895728792134642e-06, + "loss": 0.6514, + "step": 1471 + }, + { + "epoch": 0.09, + "grad_norm": 0.9516260027885437, + "learning_rate": 9.895520249203358e-06, + "loss": 0.6737, + "step": 1472 + }, + { + "epoch": 0.09, + "grad_norm": 0.9379490613937378, + "learning_rate": 9.895311500138749e-06, + "loss": 0.6273, + "step": 1473 + }, + { + "epoch": 0.09, + "grad_norm": 0.9107145667076111, + "learning_rate": 9.8951025449496e-06, + "loss": 0.6562, + "step": 1474 + }, + { + "epoch": 0.09, + "grad_norm": 0.8951176404953003, + "learning_rate": 9.894893383644713e-06, + "loss": 0.5977, + "step": 1475 + }, + { + "epoch": 0.09, + "grad_norm": 0.9197559356689453, + "learning_rate": 9.894684016232893e-06, + "loss": 0.6614, + "step": 1476 + }, + { + "epoch": 0.09, + "grad_norm": 0.9731332063674927, + "learning_rate": 9.894474442722956e-06, + "loss": 0.6992, + "step": 1477 + }, + { + "epoch": 0.09, + "grad_norm": 0.9306113719940186, + "learning_rate": 9.89426466312373e-06, + "loss": 0.6351, + "step": 1478 + }, + { + "epoch": 0.09, + "grad_norm": 0.9742302298545837, + "learning_rate": 9.89405467744404e-06, + "loss": 0.7297, + "step": 1479 + }, + { + "epoch": 0.09, + "grad_norm": 0.891633927822113, + "learning_rate": 9.893844485692736e-06, + "loss": 0.6004, + "step": 1480 + }, + { + "epoch": 0.09, + "grad_norm": 1.000712275505066, + "learning_rate": 9.893634087878665e-06, + "loss": 0.6486, + "step": 1481 + }, + { + "epoch": 0.09, + "grad_norm": 0.9697219729423523, + "learning_rate": 9.893423484010685e-06, + "loss": 0.6353, + "step": 1482 + }, + { + "epoch": 0.09, + "grad_norm": 0.9428305625915527, + "learning_rate": 9.893212674097666e-06, + "loss": 0.6327, + "step": 1483 + }, + { + "epoch": 0.09, + "grad_norm": 0.8775177001953125, + "learning_rate": 9.893001658148482e-06, + "loss": 0.5795, + "step": 1484 + }, + { + "epoch": 0.09, + "grad_norm": 0.8842799663543701, + "learning_rate": 9.892790436172022e-06, + "loss": 0.6095, + "step": 1485 + }, + { + "epoch": 0.09, + "grad_norm": 0.9339465498924255, + "learning_rate": 9.892579008177176e-06, + "loss": 0.6589, + "step": 1486 + }, + { + "epoch": 0.09, + "grad_norm": 0.940681517124176, + "learning_rate": 9.892367374172849e-06, + "loss": 0.7008, + "step": 1487 + }, + { + "epoch": 0.09, + "grad_norm": 0.942547619342804, + "learning_rate": 9.89215553416795e-06, + "loss": 0.6456, + "step": 1488 + }, + { + "epoch": 0.09, + "grad_norm": 0.9299596548080444, + "learning_rate": 9.8919434881714e-06, + "loss": 0.6347, + "step": 1489 + }, + { + "epoch": 0.09, + "grad_norm": 0.9081819653511047, + "learning_rate": 9.891731236192127e-06, + "loss": 0.6871, + "step": 1490 + }, + { + "epoch": 0.09, + "grad_norm": 0.903308093547821, + "learning_rate": 9.89151877823907e-06, + "loss": 0.644, + "step": 1491 + }, + { + "epoch": 0.09, + "grad_norm": 0.8742372989654541, + "learning_rate": 9.891306114321175e-06, + "loss": 0.6389, + "step": 1492 + }, + { + "epoch": 0.09, + "grad_norm": 0.9535795450210571, + "learning_rate": 9.891093244447393e-06, + "loss": 0.6408, + "step": 1493 + }, + { + "epoch": 0.09, + "grad_norm": 0.9566690921783447, + "learning_rate": 9.890880168626691e-06, + "loss": 0.6521, + "step": 1494 + }, + { + "epoch": 0.09, + "grad_norm": 0.9142457246780396, + "learning_rate": 9.890666886868038e-06, + "loss": 0.6411, + "step": 1495 + }, + { + "epoch": 0.09, + "grad_norm": 0.8510489463806152, + "learning_rate": 9.890453399180415e-06, + "loss": 0.6156, + "step": 1496 + }, + { + "epoch": 0.09, + "grad_norm": 0.9929180145263672, + "learning_rate": 9.890239705572815e-06, + "loss": 0.6782, + "step": 1497 + }, + { + "epoch": 0.09, + "grad_norm": 0.93791264295578, + "learning_rate": 9.89002580605423e-06, + "loss": 0.6499, + "step": 1498 + }, + { + "epoch": 0.09, + "grad_norm": 0.9413290619850159, + "learning_rate": 9.88981170063367e-06, + "loss": 0.6317, + "step": 1499 + }, + { + "epoch": 0.1, + "grad_norm": 0.9057697057723999, + "learning_rate": 9.88959738932015e-06, + "loss": 0.5684, + "step": 1500 + }, + { + "epoch": 0.1, + "grad_norm": 0.9506174921989441, + "learning_rate": 9.889382872122693e-06, + "loss": 0.7017, + "step": 1501 + }, + { + "epoch": 0.1, + "grad_norm": 0.9269284605979919, + "learning_rate": 9.889168149050334e-06, + "loss": 0.6496, + "step": 1502 + }, + { + "epoch": 0.1, + "grad_norm": 0.9708095192909241, + "learning_rate": 9.88895322011211e-06, + "loss": 0.7373, + "step": 1503 + }, + { + "epoch": 0.1, + "grad_norm": 0.9477187991142273, + "learning_rate": 9.888738085317075e-06, + "loss": 0.7015, + "step": 1504 + }, + { + "epoch": 0.1, + "grad_norm": 0.8957401514053345, + "learning_rate": 9.888522744674286e-06, + "loss": 0.6327, + "step": 1505 + }, + { + "epoch": 0.1, + "grad_norm": 0.9387091994285583, + "learning_rate": 9.888307198192808e-06, + "loss": 0.6296, + "step": 1506 + }, + { + "epoch": 0.1, + "grad_norm": 0.9464743733406067, + "learning_rate": 9.888091445881723e-06, + "loss": 0.6616, + "step": 1507 + }, + { + "epoch": 0.1, + "grad_norm": 0.944981575012207, + "learning_rate": 9.887875487750108e-06, + "loss": 0.637, + "step": 1508 + }, + { + "epoch": 0.1, + "grad_norm": 0.9454977512359619, + "learning_rate": 9.887659323807062e-06, + "loss": 0.6645, + "step": 1509 + }, + { + "epoch": 0.1, + "grad_norm": 0.9209526777267456, + "learning_rate": 9.887442954061684e-06, + "loss": 0.6978, + "step": 1510 + }, + { + "epoch": 0.1, + "grad_norm": 0.9140705466270447, + "learning_rate": 9.887226378523085e-06, + "loss": 0.6424, + "step": 1511 + }, + { + "epoch": 0.1, + "grad_norm": 0.9300777316093445, + "learning_rate": 9.887009597200385e-06, + "loss": 0.6293, + "step": 1512 + }, + { + "epoch": 0.1, + "grad_norm": 0.883039653301239, + "learning_rate": 9.88679261010271e-06, + "loss": 0.561, + "step": 1513 + }, + { + "epoch": 0.1, + "grad_norm": 0.9039274454116821, + "learning_rate": 9.886575417239202e-06, + "loss": 0.6245, + "step": 1514 + }, + { + "epoch": 0.1, + "grad_norm": 0.9318472743034363, + "learning_rate": 9.886358018619e-06, + "loss": 0.6637, + "step": 1515 + }, + { + "epoch": 0.1, + "grad_norm": 0.853915810585022, + "learning_rate": 9.886140414251259e-06, + "loss": 0.6292, + "step": 1516 + }, + { + "epoch": 0.1, + "grad_norm": 0.996114194393158, + "learning_rate": 9.885922604145143e-06, + "loss": 0.6856, + "step": 1517 + }, + { + "epoch": 0.1, + "grad_norm": 0.9068061113357544, + "learning_rate": 9.885704588309825e-06, + "loss": 0.6218, + "step": 1518 + }, + { + "epoch": 0.1, + "grad_norm": 0.9396615624427795, + "learning_rate": 9.885486366754482e-06, + "loss": 0.6889, + "step": 1519 + }, + { + "epoch": 0.1, + "grad_norm": 0.9767167568206787, + "learning_rate": 9.885267939488303e-06, + "loss": 0.669, + "step": 1520 + }, + { + "epoch": 0.1, + "grad_norm": 0.9243539571762085, + "learning_rate": 9.885049306520487e-06, + "loss": 0.571, + "step": 1521 + }, + { + "epoch": 0.1, + "grad_norm": 0.9698777794837952, + "learning_rate": 9.884830467860238e-06, + "loss": 0.6195, + "step": 1522 + }, + { + "epoch": 0.1, + "grad_norm": 0.9754754900932312, + "learning_rate": 9.88461142351677e-06, + "loss": 0.6712, + "step": 1523 + }, + { + "epoch": 0.1, + "grad_norm": 0.9276134371757507, + "learning_rate": 9.884392173499308e-06, + "loss": 0.6022, + "step": 1524 + }, + { + "epoch": 0.1, + "grad_norm": 0.8962921500205994, + "learning_rate": 9.884172717817085e-06, + "loss": 0.6694, + "step": 1525 + }, + { + "epoch": 0.1, + "grad_norm": 0.9050678610801697, + "learning_rate": 9.883953056479336e-06, + "loss": 0.6422, + "step": 1526 + }, + { + "epoch": 0.1, + "grad_norm": 1.006984829902649, + "learning_rate": 9.883733189495316e-06, + "loss": 0.6856, + "step": 1527 + }, + { + "epoch": 0.1, + "grad_norm": 0.9265629053115845, + "learning_rate": 9.88351311687428e-06, + "loss": 0.6231, + "step": 1528 + }, + { + "epoch": 0.1, + "grad_norm": 0.9328793883323669, + "learning_rate": 9.883292838625495e-06, + "loss": 0.6304, + "step": 1529 + }, + { + "epoch": 0.1, + "grad_norm": 0.9297760128974915, + "learning_rate": 9.883072354758237e-06, + "loss": 0.6102, + "step": 1530 + }, + { + "epoch": 0.1, + "grad_norm": 0.9190971851348877, + "learning_rate": 9.88285166528179e-06, + "loss": 0.6895, + "step": 1531 + }, + { + "epoch": 0.1, + "grad_norm": 0.9351559281349182, + "learning_rate": 9.882630770205444e-06, + "loss": 0.5951, + "step": 1532 + }, + { + "epoch": 0.1, + "grad_norm": 0.9502492547035217, + "learning_rate": 9.882409669538503e-06, + "loss": 0.6165, + "step": 1533 + }, + { + "epoch": 0.1, + "grad_norm": 0.9726475477218628, + "learning_rate": 9.882188363290273e-06, + "loss": 0.6672, + "step": 1534 + }, + { + "epoch": 0.1, + "grad_norm": 1.011664628982544, + "learning_rate": 9.881966851470077e-06, + "loss": 0.6367, + "step": 1535 + }, + { + "epoch": 0.1, + "grad_norm": 0.9154837727546692, + "learning_rate": 9.881745134087239e-06, + "loss": 0.6487, + "step": 1536 + }, + { + "epoch": 0.1, + "grad_norm": 0.9829793572425842, + "learning_rate": 9.881523211151097e-06, + "loss": 0.6535, + "step": 1537 + }, + { + "epoch": 0.1, + "grad_norm": 0.9159083366394043, + "learning_rate": 9.881301082670992e-06, + "loss": 0.6405, + "step": 1538 + }, + { + "epoch": 0.1, + "grad_norm": 0.9753561615943909, + "learning_rate": 9.881078748656282e-06, + "loss": 0.673, + "step": 1539 + }, + { + "epoch": 0.1, + "grad_norm": 0.961372971534729, + "learning_rate": 9.880856209116324e-06, + "loss": 0.6218, + "step": 1540 + }, + { + "epoch": 0.1, + "grad_norm": 0.9532050490379333, + "learning_rate": 9.880633464060492e-06, + "loss": 0.6335, + "step": 1541 + }, + { + "epoch": 0.1, + "grad_norm": 0.9294744729995728, + "learning_rate": 9.880410513498163e-06, + "loss": 0.6179, + "step": 1542 + }, + { + "epoch": 0.1, + "grad_norm": 0.979083240032196, + "learning_rate": 9.880187357438722e-06, + "loss": 0.6624, + "step": 1543 + }, + { + "epoch": 0.1, + "grad_norm": 0.9284359216690063, + "learning_rate": 9.87996399589157e-06, + "loss": 0.646, + "step": 1544 + }, + { + "epoch": 0.1, + "grad_norm": 0.9217939376831055, + "learning_rate": 9.87974042886611e-06, + "loss": 0.6153, + "step": 1545 + }, + { + "epoch": 0.1, + "grad_norm": 0.8446288704872131, + "learning_rate": 9.879516656371758e-06, + "loss": 0.5636, + "step": 1546 + }, + { + "epoch": 0.1, + "grad_norm": 1.0131950378417969, + "learning_rate": 9.879292678417934e-06, + "loss": 0.6842, + "step": 1547 + }, + { + "epoch": 0.1, + "grad_norm": 0.9436971545219421, + "learning_rate": 9.879068495014068e-06, + "loss": 0.6342, + "step": 1548 + }, + { + "epoch": 0.1, + "grad_norm": 0.9207556843757629, + "learning_rate": 9.878844106169601e-06, + "loss": 0.717, + "step": 1549 + }, + { + "epoch": 0.1, + "grad_norm": 0.9688981771469116, + "learning_rate": 9.87861951189398e-06, + "loss": 0.6763, + "step": 1550 + }, + { + "epoch": 0.1, + "grad_norm": 0.9991617798805237, + "learning_rate": 9.878394712196665e-06, + "loss": 0.6928, + "step": 1551 + }, + { + "epoch": 0.1, + "grad_norm": 0.9736862182617188, + "learning_rate": 9.878169707087116e-06, + "loss": 0.6552, + "step": 1552 + }, + { + "epoch": 0.1, + "grad_norm": 0.9318220019340515, + "learning_rate": 9.877944496574813e-06, + "loss": 0.5917, + "step": 1553 + }, + { + "epoch": 0.1, + "grad_norm": 0.8690041303634644, + "learning_rate": 9.877719080669235e-06, + "loss": 0.6064, + "step": 1554 + }, + { + "epoch": 0.1, + "grad_norm": 0.9481027126312256, + "learning_rate": 9.877493459379876e-06, + "loss": 0.6604, + "step": 1555 + }, + { + "epoch": 0.1, + "grad_norm": 1.0152469873428345, + "learning_rate": 9.877267632716235e-06, + "loss": 0.7071, + "step": 1556 + }, + { + "epoch": 0.1, + "grad_norm": 0.9626147150993347, + "learning_rate": 9.87704160068782e-06, + "loss": 0.623, + "step": 1557 + }, + { + "epoch": 0.1, + "grad_norm": 0.9506250619888306, + "learning_rate": 9.87681536330415e-06, + "loss": 0.6372, + "step": 1558 + }, + { + "epoch": 0.1, + "grad_norm": 0.9783592224121094, + "learning_rate": 9.87658892057475e-06, + "loss": 0.6661, + "step": 1559 + }, + { + "epoch": 0.1, + "grad_norm": 0.9387713670730591, + "learning_rate": 9.876362272509154e-06, + "loss": 0.7131, + "step": 1560 + }, + { + "epoch": 0.1, + "grad_norm": 0.9006531238555908, + "learning_rate": 9.876135419116908e-06, + "loss": 0.6329, + "step": 1561 + }, + { + "epoch": 0.1, + "grad_norm": 0.9375502467155457, + "learning_rate": 9.87590836040756e-06, + "loss": 0.6184, + "step": 1562 + }, + { + "epoch": 0.1, + "grad_norm": 0.9109377264976501, + "learning_rate": 9.875681096390676e-06, + "loss": 0.6317, + "step": 1563 + }, + { + "epoch": 0.1, + "grad_norm": 0.9256362915039062, + "learning_rate": 9.87545362707582e-06, + "loss": 0.6397, + "step": 1564 + }, + { + "epoch": 0.1, + "grad_norm": 0.9016781449317932, + "learning_rate": 9.875225952472574e-06, + "loss": 0.6329, + "step": 1565 + }, + { + "epoch": 0.1, + "grad_norm": 0.9019981026649475, + "learning_rate": 9.874998072590521e-06, + "loss": 0.6723, + "step": 1566 + }, + { + "epoch": 0.1, + "grad_norm": 0.9814824461936951, + "learning_rate": 9.874769987439259e-06, + "loss": 0.6784, + "step": 1567 + }, + { + "epoch": 0.1, + "grad_norm": 0.9205242395401001, + "learning_rate": 9.87454169702839e-06, + "loss": 0.6541, + "step": 1568 + }, + { + "epoch": 0.1, + "grad_norm": 1.0002273321151733, + "learning_rate": 9.87431320136753e-06, + "loss": 0.5924, + "step": 1569 + }, + { + "epoch": 0.1, + "grad_norm": 0.9376479983329773, + "learning_rate": 9.874084500466295e-06, + "loss": 0.6596, + "step": 1570 + }, + { + "epoch": 0.1, + "grad_norm": 0.872928261756897, + "learning_rate": 9.873855594334319e-06, + "loss": 0.6838, + "step": 1571 + }, + { + "epoch": 0.1, + "grad_norm": 0.902869701385498, + "learning_rate": 9.873626482981238e-06, + "loss": 0.6284, + "step": 1572 + }, + { + "epoch": 0.1, + "grad_norm": 0.9037356972694397, + "learning_rate": 9.873397166416698e-06, + "loss": 0.6083, + "step": 1573 + }, + { + "epoch": 0.1, + "grad_norm": 0.8765510320663452, + "learning_rate": 9.87316764465036e-06, + "loss": 0.6122, + "step": 1574 + }, + { + "epoch": 0.1, + "grad_norm": 0.9921714067459106, + "learning_rate": 9.872937917691883e-06, + "loss": 0.5799, + "step": 1575 + }, + { + "epoch": 0.1, + "grad_norm": 0.9323515295982361, + "learning_rate": 9.872707985550942e-06, + "loss": 0.5727, + "step": 1576 + }, + { + "epoch": 0.1, + "grad_norm": 0.9624417424201965, + "learning_rate": 9.872477848237221e-06, + "loss": 0.6477, + "step": 1577 + }, + { + "epoch": 0.1, + "grad_norm": 0.9209104180335999, + "learning_rate": 9.872247505760405e-06, + "loss": 0.6059, + "step": 1578 + }, + { + "epoch": 0.1, + "grad_norm": 0.9874113202095032, + "learning_rate": 9.872016958130197e-06, + "loss": 0.6308, + "step": 1579 + }, + { + "epoch": 0.1, + "grad_norm": 0.981163740158081, + "learning_rate": 9.871786205356303e-06, + "loss": 0.6446, + "step": 1580 + }, + { + "epoch": 0.1, + "grad_norm": 0.9238435626029968, + "learning_rate": 9.871555247448442e-06, + "loss": 0.6831, + "step": 1581 + }, + { + "epoch": 0.1, + "grad_norm": 1.0302647352218628, + "learning_rate": 9.871324084416332e-06, + "loss": 0.6095, + "step": 1582 + }, + { + "epoch": 0.1, + "grad_norm": 0.9587137699127197, + "learning_rate": 9.871092716269714e-06, + "loss": 0.6613, + "step": 1583 + }, + { + "epoch": 0.1, + "grad_norm": 0.9017694592475891, + "learning_rate": 9.870861143018327e-06, + "loss": 0.6368, + "step": 1584 + }, + { + "epoch": 0.1, + "grad_norm": 0.9232084155082703, + "learning_rate": 9.87062936467192e-06, + "loss": 0.6588, + "step": 1585 + }, + { + "epoch": 0.1, + "grad_norm": 0.9299889206886292, + "learning_rate": 9.870397381240256e-06, + "loss": 0.6222, + "step": 1586 + }, + { + "epoch": 0.1, + "grad_norm": 0.8801997900009155, + "learning_rate": 9.870165192733101e-06, + "loss": 0.6371, + "step": 1587 + }, + { + "epoch": 0.1, + "grad_norm": 0.9414759278297424, + "learning_rate": 9.869932799160232e-06, + "loss": 0.6735, + "step": 1588 + }, + { + "epoch": 0.1, + "grad_norm": 1.5504239797592163, + "learning_rate": 9.869700200531431e-06, + "loss": 0.6738, + "step": 1589 + }, + { + "epoch": 0.1, + "grad_norm": 0.9341895580291748, + "learning_rate": 9.869467396856499e-06, + "loss": 0.6024, + "step": 1590 + }, + { + "epoch": 0.1, + "grad_norm": 0.921317994594574, + "learning_rate": 9.869234388145232e-06, + "loss": 0.6963, + "step": 1591 + }, + { + "epoch": 0.1, + "grad_norm": 0.9685119986534119, + "learning_rate": 9.869001174407444e-06, + "loss": 0.5984, + "step": 1592 + }, + { + "epoch": 0.1, + "grad_norm": 0.8759018182754517, + "learning_rate": 9.868767755652955e-06, + "loss": 0.6223, + "step": 1593 + }, + { + "epoch": 0.1, + "grad_norm": 0.8878785371780396, + "learning_rate": 9.868534131891594e-06, + "loss": 0.6196, + "step": 1594 + }, + { + "epoch": 0.1, + "grad_norm": 0.9563702344894409, + "learning_rate": 9.868300303133195e-06, + "loss": 0.6902, + "step": 1595 + }, + { + "epoch": 0.1, + "grad_norm": 0.9496309757232666, + "learning_rate": 9.868066269387609e-06, + "loss": 0.6131, + "step": 1596 + }, + { + "epoch": 0.1, + "grad_norm": 0.9410830140113831, + "learning_rate": 9.867832030664685e-06, + "loss": 0.6433, + "step": 1597 + }, + { + "epoch": 0.1, + "grad_norm": 1.0077545642852783, + "learning_rate": 9.867597586974288e-06, + "loss": 0.6728, + "step": 1598 + }, + { + "epoch": 0.1, + "grad_norm": 0.9375026226043701, + "learning_rate": 9.86736293832629e-06, + "loss": 0.6316, + "step": 1599 + }, + { + "epoch": 0.1, + "grad_norm": 0.9416118264198303, + "learning_rate": 9.86712808473057e-06, + "loss": 0.662, + "step": 1600 + }, + { + "epoch": 0.1, + "grad_norm": 0.9092760682106018, + "learning_rate": 9.86689302619702e-06, + "loss": 0.6317, + "step": 1601 + }, + { + "epoch": 0.1, + "grad_norm": 0.9220471978187561, + "learning_rate": 9.866657762735534e-06, + "loss": 0.6576, + "step": 1602 + }, + { + "epoch": 0.1, + "grad_norm": 0.9349024295806885, + "learning_rate": 9.866422294356019e-06, + "loss": 0.6976, + "step": 1603 + }, + { + "epoch": 0.1, + "grad_norm": 0.8267975449562073, + "learning_rate": 9.866186621068391e-06, + "loss": 0.5685, + "step": 1604 + }, + { + "epoch": 0.1, + "grad_norm": 0.9613746404647827, + "learning_rate": 9.865950742882574e-06, + "loss": 0.6038, + "step": 1605 + }, + { + "epoch": 0.1, + "grad_norm": 0.9852586984634399, + "learning_rate": 9.865714659808497e-06, + "loss": 0.6429, + "step": 1606 + }, + { + "epoch": 0.1, + "grad_norm": 0.9281002283096313, + "learning_rate": 9.865478371856102e-06, + "loss": 0.6473, + "step": 1607 + }, + { + "epoch": 0.1, + "grad_norm": 0.8990695476531982, + "learning_rate": 9.86524187903534e-06, + "loss": 0.6667, + "step": 1608 + }, + { + "epoch": 0.1, + "grad_norm": 0.9048877358436584, + "learning_rate": 9.865005181356166e-06, + "loss": 0.6437, + "step": 1609 + }, + { + "epoch": 0.1, + "grad_norm": 0.957685649394989, + "learning_rate": 9.864768278828548e-06, + "loss": 0.6228, + "step": 1610 + }, + { + "epoch": 0.1, + "grad_norm": 0.8602043986320496, + "learning_rate": 9.864531171462462e-06, + "loss": 0.5928, + "step": 1611 + }, + { + "epoch": 0.1, + "grad_norm": 0.9182524085044861, + "learning_rate": 9.86429385926789e-06, + "loss": 0.6859, + "step": 1612 + }, + { + "epoch": 0.1, + "grad_norm": 0.9720632433891296, + "learning_rate": 9.864056342254827e-06, + "loss": 0.6562, + "step": 1613 + }, + { + "epoch": 0.1, + "grad_norm": 0.9607463479042053, + "learning_rate": 9.86381862043327e-06, + "loss": 0.6783, + "step": 1614 + }, + { + "epoch": 0.1, + "grad_norm": 0.9194375276565552, + "learning_rate": 9.863580693813232e-06, + "loss": 0.6433, + "step": 1615 + }, + { + "epoch": 0.1, + "grad_norm": 0.9061447381973267, + "learning_rate": 9.86334256240473e-06, + "loss": 0.6577, + "step": 1616 + }, + { + "epoch": 0.1, + "grad_norm": 0.911880373954773, + "learning_rate": 9.86310422621779e-06, + "loss": 0.6437, + "step": 1617 + }, + { + "epoch": 0.1, + "grad_norm": 0.9014673233032227, + "learning_rate": 9.86286568526245e-06, + "loss": 0.6688, + "step": 1618 + }, + { + "epoch": 0.1, + "grad_norm": 0.8759530782699585, + "learning_rate": 9.862626939548751e-06, + "loss": 0.6889, + "step": 1619 + }, + { + "epoch": 0.1, + "grad_norm": 0.8872689008712769, + "learning_rate": 9.862387989086749e-06, + "loss": 0.6351, + "step": 1620 + }, + { + "epoch": 0.1, + "grad_norm": 0.912520706653595, + "learning_rate": 9.862148833886504e-06, + "loss": 0.6573, + "step": 1621 + }, + { + "epoch": 0.1, + "grad_norm": 0.935406506061554, + "learning_rate": 9.861909473958084e-06, + "loss": 0.6349, + "step": 1622 + }, + { + "epoch": 0.1, + "grad_norm": 0.9377623796463013, + "learning_rate": 9.861669909311571e-06, + "loss": 0.6324, + "step": 1623 + }, + { + "epoch": 0.1, + "grad_norm": 0.8664435744285583, + "learning_rate": 9.861430139957052e-06, + "loss": 0.6517, + "step": 1624 + }, + { + "epoch": 0.1, + "grad_norm": 0.9497208595275879, + "learning_rate": 9.861190165904617e-06, + "loss": 0.6703, + "step": 1625 + }, + { + "epoch": 0.1, + "grad_norm": 0.9303921461105347, + "learning_rate": 9.860949987164379e-06, + "loss": 0.611, + "step": 1626 + }, + { + "epoch": 0.1, + "grad_norm": 0.944831371307373, + "learning_rate": 9.860709603746445e-06, + "loss": 0.6534, + "step": 1627 + }, + { + "epoch": 0.1, + "grad_norm": 0.9013164043426514, + "learning_rate": 9.86046901566094e-06, + "loss": 0.6457, + "step": 1628 + }, + { + "epoch": 0.1, + "grad_norm": 0.9437874555587769, + "learning_rate": 9.860228222917992e-06, + "loss": 0.6238, + "step": 1629 + }, + { + "epoch": 0.1, + "grad_norm": 0.901542067527771, + "learning_rate": 9.859987225527742e-06, + "loss": 0.6299, + "step": 1630 + }, + { + "epoch": 0.1, + "grad_norm": 0.963375449180603, + "learning_rate": 9.859746023500337e-06, + "loss": 0.6798, + "step": 1631 + }, + { + "epoch": 0.1, + "grad_norm": 0.9021002054214478, + "learning_rate": 9.85950461684593e-06, + "loss": 0.6386, + "step": 1632 + }, + { + "epoch": 0.1, + "grad_norm": 0.932859480381012, + "learning_rate": 9.85926300557469e-06, + "loss": 0.6516, + "step": 1633 + }, + { + "epoch": 0.1, + "grad_norm": 0.8896989822387695, + "learning_rate": 9.85902118969679e-06, + "loss": 0.6372, + "step": 1634 + }, + { + "epoch": 0.1, + "grad_norm": 0.9466985464096069, + "learning_rate": 9.85877916922241e-06, + "loss": 0.6244, + "step": 1635 + }, + { + "epoch": 0.1, + "grad_norm": 0.9208292961120605, + "learning_rate": 9.858536944161743e-06, + "loss": 0.6742, + "step": 1636 + }, + { + "epoch": 0.1, + "grad_norm": 0.9316291213035583, + "learning_rate": 9.858294514524987e-06, + "loss": 0.6306, + "step": 1637 + }, + { + "epoch": 0.1, + "grad_norm": 0.9085369110107422, + "learning_rate": 9.858051880322347e-06, + "loss": 0.5967, + "step": 1638 + }, + { + "epoch": 0.1, + "grad_norm": 0.9222848415374756, + "learning_rate": 9.857809041564044e-06, + "loss": 0.656, + "step": 1639 + }, + { + "epoch": 0.1, + "grad_norm": 0.9137614369392395, + "learning_rate": 9.857565998260302e-06, + "loss": 0.6778, + "step": 1640 + }, + { + "epoch": 0.1, + "grad_norm": 0.8836297392845154, + "learning_rate": 9.857322750421353e-06, + "loss": 0.6172, + "step": 1641 + }, + { + "epoch": 0.1, + "grad_norm": 0.9377101063728333, + "learning_rate": 9.857079298057442e-06, + "loss": 0.6562, + "step": 1642 + }, + { + "epoch": 0.1, + "grad_norm": 0.938580334186554, + "learning_rate": 9.856835641178816e-06, + "loss": 0.6937, + "step": 1643 + }, + { + "epoch": 0.1, + "grad_norm": 0.9680647253990173, + "learning_rate": 9.856591779795738e-06, + "loss": 0.6493, + "step": 1644 + }, + { + "epoch": 0.1, + "grad_norm": 0.9074171781539917, + "learning_rate": 9.856347713918475e-06, + "loss": 0.6752, + "step": 1645 + }, + { + "epoch": 0.1, + "grad_norm": 0.8547381162643433, + "learning_rate": 9.856103443557304e-06, + "loss": 0.623, + "step": 1646 + }, + { + "epoch": 0.1, + "grad_norm": 1.0403729677200317, + "learning_rate": 9.85585896872251e-06, + "loss": 0.6593, + "step": 1647 + }, + { + "epoch": 0.1, + "grad_norm": 0.9444959163665771, + "learning_rate": 9.855614289424386e-06, + "loss": 0.634, + "step": 1648 + }, + { + "epoch": 0.1, + "grad_norm": 0.9254828095436096, + "learning_rate": 9.855369405673236e-06, + "loss": 0.5757, + "step": 1649 + }, + { + "epoch": 0.1, + "grad_norm": 0.9265499711036682, + "learning_rate": 9.855124317479372e-06, + "loss": 0.6326, + "step": 1650 + }, + { + "epoch": 0.1, + "grad_norm": 0.9064761400222778, + "learning_rate": 9.854879024853113e-06, + "loss": 0.6488, + "step": 1651 + }, + { + "epoch": 0.1, + "grad_norm": 0.9563080072402954, + "learning_rate": 9.854633527804787e-06, + "loss": 0.642, + "step": 1652 + }, + { + "epoch": 0.1, + "grad_norm": 0.838525116443634, + "learning_rate": 9.85438782634473e-06, + "loss": 0.5696, + "step": 1653 + }, + { + "epoch": 0.1, + "grad_norm": 0.8792423009872437, + "learning_rate": 9.854141920483289e-06, + "loss": 0.6282, + "step": 1654 + }, + { + "epoch": 0.1, + "grad_norm": 0.9897140264511108, + "learning_rate": 9.853895810230818e-06, + "loss": 0.6317, + "step": 1655 + }, + { + "epoch": 0.1, + "grad_norm": 0.9357428550720215, + "learning_rate": 9.853649495597682e-06, + "loss": 0.691, + "step": 1656 + }, + { + "epoch": 0.1, + "grad_norm": 0.8924740552902222, + "learning_rate": 9.853402976594248e-06, + "loss": 0.6754, + "step": 1657 + }, + { + "epoch": 0.11, + "grad_norm": 0.9512656331062317, + "learning_rate": 9.8531562532309e-06, + "loss": 0.6218, + "step": 1658 + }, + { + "epoch": 0.11, + "grad_norm": 0.9587389826774597, + "learning_rate": 9.852909325518022e-06, + "loss": 0.6707, + "step": 1659 + }, + { + "epoch": 0.11, + "grad_norm": 0.9361017942428589, + "learning_rate": 9.852662193466019e-06, + "loss": 0.6475, + "step": 1660 + }, + { + "epoch": 0.11, + "grad_norm": 0.9150497913360596, + "learning_rate": 9.852414857085288e-06, + "loss": 0.7143, + "step": 1661 + }, + { + "epoch": 0.11, + "grad_norm": 0.9618809223175049, + "learning_rate": 9.85216731638625e-06, + "loss": 0.646, + "step": 1662 + }, + { + "epoch": 0.11, + "grad_norm": 0.8974446654319763, + "learning_rate": 9.851919571379326e-06, + "loss": 0.6958, + "step": 1663 + }, + { + "epoch": 0.11, + "grad_norm": 0.9085642099380493, + "learning_rate": 9.851671622074947e-06, + "loss": 0.6291, + "step": 1664 + }, + { + "epoch": 0.11, + "grad_norm": 0.9605396389961243, + "learning_rate": 9.851423468483554e-06, + "loss": 0.669, + "step": 1665 + }, + { + "epoch": 0.11, + "grad_norm": 1.0041121244430542, + "learning_rate": 9.851175110615594e-06, + "loss": 0.5982, + "step": 1666 + }, + { + "epoch": 0.11, + "grad_norm": 0.9205458164215088, + "learning_rate": 9.850926548481528e-06, + "loss": 0.6587, + "step": 1667 + }, + { + "epoch": 0.11, + "grad_norm": 0.9921442270278931, + "learning_rate": 9.850677782091818e-06, + "loss": 0.6505, + "step": 1668 + }, + { + "epoch": 0.11, + "grad_norm": 0.9092791080474854, + "learning_rate": 9.850428811456943e-06, + "loss": 0.5881, + "step": 1669 + }, + { + "epoch": 0.11, + "grad_norm": 1.0009846687316895, + "learning_rate": 9.850179636587383e-06, + "loss": 0.657, + "step": 1670 + }, + { + "epoch": 0.11, + "grad_norm": 0.9284378886222839, + "learning_rate": 9.849930257493632e-06, + "loss": 0.616, + "step": 1671 + }, + { + "epoch": 0.11, + "grad_norm": 1.0465761423110962, + "learning_rate": 9.849680674186188e-06, + "loss": 0.7, + "step": 1672 + }, + { + "epoch": 0.11, + "grad_norm": 0.9236946105957031, + "learning_rate": 9.849430886675564e-06, + "loss": 0.6498, + "step": 1673 + }, + { + "epoch": 0.11, + "grad_norm": 0.8840457201004028, + "learning_rate": 9.849180894972272e-06, + "loss": 0.6517, + "step": 1674 + }, + { + "epoch": 0.11, + "grad_norm": 0.8201990723609924, + "learning_rate": 9.848930699086846e-06, + "loss": 0.6403, + "step": 1675 + }, + { + "epoch": 0.11, + "grad_norm": 0.9330858588218689, + "learning_rate": 9.848680299029813e-06, + "loss": 0.6374, + "step": 1676 + }, + { + "epoch": 0.11, + "grad_norm": 0.9151015877723694, + "learning_rate": 9.848429694811721e-06, + "loss": 0.5886, + "step": 1677 + }, + { + "epoch": 0.11, + "grad_norm": 0.9654482007026672, + "learning_rate": 9.84817888644312e-06, + "loss": 0.6554, + "step": 1678 + }, + { + "epoch": 0.11, + "grad_norm": 0.9523151516914368, + "learning_rate": 9.847927873934573e-06, + "loss": 0.6361, + "step": 1679 + }, + { + "epoch": 0.11, + "grad_norm": 0.9912353157997131, + "learning_rate": 9.847676657296647e-06, + "loss": 0.6584, + "step": 1680 + }, + { + "epoch": 0.11, + "grad_norm": 0.937496542930603, + "learning_rate": 9.847425236539922e-06, + "loss": 0.6502, + "step": 1681 + }, + { + "epoch": 0.11, + "grad_norm": 0.8653977513313293, + "learning_rate": 9.847173611674982e-06, + "loss": 0.605, + "step": 1682 + }, + { + "epoch": 0.11, + "grad_norm": 0.9031038880348206, + "learning_rate": 9.846921782712424e-06, + "loss": 0.6144, + "step": 1683 + }, + { + "epoch": 0.11, + "grad_norm": 0.9280396699905396, + "learning_rate": 9.846669749662851e-06, + "loss": 0.615, + "step": 1684 + }, + { + "epoch": 0.11, + "grad_norm": 0.9092390537261963, + "learning_rate": 9.846417512536874e-06, + "loss": 0.6235, + "step": 1685 + }, + { + "epoch": 0.11, + "grad_norm": 0.9543402791023254, + "learning_rate": 9.846165071345118e-06, + "loss": 0.6555, + "step": 1686 + }, + { + "epoch": 0.11, + "grad_norm": 1.045227289199829, + "learning_rate": 9.845912426098206e-06, + "loss": 0.6761, + "step": 1687 + }, + { + "epoch": 0.11, + "grad_norm": 0.9226247072219849, + "learning_rate": 9.845659576806781e-06, + "loss": 0.6211, + "step": 1688 + }, + { + "epoch": 0.11, + "grad_norm": 0.9279161691665649, + "learning_rate": 9.845406523481488e-06, + "loss": 0.6579, + "step": 1689 + }, + { + "epoch": 0.11, + "grad_norm": 0.91354900598526, + "learning_rate": 9.845153266132981e-06, + "loss": 0.6024, + "step": 1690 + }, + { + "epoch": 0.11, + "grad_norm": 0.9093358516693115, + "learning_rate": 9.844899804771927e-06, + "loss": 0.6232, + "step": 1691 + }, + { + "epoch": 0.11, + "grad_norm": 0.9965054988861084, + "learning_rate": 9.844646139408995e-06, + "loss": 0.6429, + "step": 1692 + }, + { + "epoch": 0.11, + "grad_norm": 0.9603714346885681, + "learning_rate": 9.844392270054868e-06, + "loss": 0.622, + "step": 1693 + }, + { + "epoch": 0.11, + "grad_norm": 0.9581913948059082, + "learning_rate": 9.844138196720236e-06, + "loss": 0.6845, + "step": 1694 + }, + { + "epoch": 0.11, + "grad_norm": 0.911685585975647, + "learning_rate": 9.843883919415795e-06, + "loss": 0.685, + "step": 1695 + }, + { + "epoch": 0.11, + "grad_norm": 0.9244683980941772, + "learning_rate": 9.843629438152252e-06, + "loss": 0.6441, + "step": 1696 + }, + { + "epoch": 0.11, + "grad_norm": 0.9643012285232544, + "learning_rate": 9.843374752940323e-06, + "loss": 0.6346, + "step": 1697 + }, + { + "epoch": 0.11, + "grad_norm": 0.9334665536880493, + "learning_rate": 9.843119863790733e-06, + "loss": 0.7161, + "step": 1698 + }, + { + "epoch": 0.11, + "grad_norm": 0.9710047245025635, + "learning_rate": 9.842864770714213e-06, + "loss": 0.6233, + "step": 1699 + }, + { + "epoch": 0.11, + "grad_norm": 0.9540897607803345, + "learning_rate": 9.842609473721505e-06, + "loss": 0.6271, + "step": 1700 + }, + { + "epoch": 0.11, + "grad_norm": 0.9325253367424011, + "learning_rate": 9.842353972823358e-06, + "loss": 0.6153, + "step": 1701 + }, + { + "epoch": 0.11, + "grad_norm": 0.8942682147026062, + "learning_rate": 9.842098268030532e-06, + "loss": 0.5922, + "step": 1702 + }, + { + "epoch": 0.11, + "grad_norm": 0.930939793586731, + "learning_rate": 9.84184235935379e-06, + "loss": 0.6366, + "step": 1703 + }, + { + "epoch": 0.11, + "grad_norm": 0.9117228984832764, + "learning_rate": 9.84158624680391e-06, + "loss": 0.647, + "step": 1704 + }, + { + "epoch": 0.11, + "grad_norm": 0.9259521961212158, + "learning_rate": 9.841329930391678e-06, + "loss": 0.6384, + "step": 1705 + }, + { + "epoch": 0.11, + "grad_norm": 0.867400050163269, + "learning_rate": 9.841073410127884e-06, + "loss": 0.6741, + "step": 1706 + }, + { + "epoch": 0.11, + "grad_norm": 1.0280332565307617, + "learning_rate": 9.840816686023329e-06, + "loss": 0.64, + "step": 1707 + }, + { + "epoch": 0.11, + "grad_norm": 0.9076325297355652, + "learning_rate": 9.840559758088821e-06, + "loss": 0.5936, + "step": 1708 + }, + { + "epoch": 0.11, + "grad_norm": 0.9110800623893738, + "learning_rate": 9.840302626335182e-06, + "loss": 0.6145, + "step": 1709 + }, + { + "epoch": 0.11, + "grad_norm": 0.8760718107223511, + "learning_rate": 9.84004529077324e-06, + "loss": 0.6375, + "step": 1710 + }, + { + "epoch": 0.11, + "grad_norm": 0.9137043356895447, + "learning_rate": 9.839787751413825e-06, + "loss": 0.6016, + "step": 1711 + }, + { + "epoch": 0.11, + "grad_norm": 0.8688681125640869, + "learning_rate": 9.839530008267785e-06, + "loss": 0.6208, + "step": 1712 + }, + { + "epoch": 0.11, + "grad_norm": 0.9339778423309326, + "learning_rate": 9.839272061345974e-06, + "loss": 0.6514, + "step": 1713 + }, + { + "epoch": 0.11, + "grad_norm": 0.9327898025512695, + "learning_rate": 9.839013910659249e-06, + "loss": 0.6528, + "step": 1714 + }, + { + "epoch": 0.11, + "grad_norm": 0.9317489266395569, + "learning_rate": 9.838755556218483e-06, + "loss": 0.6433, + "step": 1715 + }, + { + "epoch": 0.11, + "grad_norm": 0.9479151368141174, + "learning_rate": 9.838496998034552e-06, + "loss": 0.692, + "step": 1716 + }, + { + "epoch": 0.11, + "grad_norm": 0.9237775802612305, + "learning_rate": 9.838238236118344e-06, + "loss": 0.621, + "step": 1717 + }, + { + "epoch": 0.11, + "grad_norm": 0.9310511946678162, + "learning_rate": 9.837979270480758e-06, + "loss": 0.6333, + "step": 1718 + }, + { + "epoch": 0.11, + "grad_norm": 0.877641499042511, + "learning_rate": 9.837720101132692e-06, + "loss": 0.6943, + "step": 1719 + }, + { + "epoch": 0.11, + "grad_norm": 0.9281149506568909, + "learning_rate": 9.837460728085062e-06, + "loss": 0.6446, + "step": 1720 + }, + { + "epoch": 0.11, + "grad_norm": 0.9767260551452637, + "learning_rate": 9.83720115134879e-06, + "loss": 0.7114, + "step": 1721 + }, + { + "epoch": 0.11, + "grad_norm": 0.9054911732673645, + "learning_rate": 9.836941370934806e-06, + "loss": 0.6143, + "step": 1722 + }, + { + "epoch": 0.11, + "grad_norm": 0.9925005435943604, + "learning_rate": 9.836681386854045e-06, + "loss": 0.6386, + "step": 1723 + }, + { + "epoch": 0.11, + "grad_norm": 0.9308101534843445, + "learning_rate": 9.836421199117456e-06, + "loss": 0.6501, + "step": 1724 + }, + { + "epoch": 0.11, + "grad_norm": 0.9074007868766785, + "learning_rate": 9.836160807735997e-06, + "loss": 0.6792, + "step": 1725 + }, + { + "epoch": 0.11, + "grad_norm": 0.9303346276283264, + "learning_rate": 9.83590021272063e-06, + "loss": 0.6218, + "step": 1726 + }, + { + "epoch": 0.11, + "grad_norm": 0.9566819667816162, + "learning_rate": 9.835639414082327e-06, + "loss": 0.6525, + "step": 1727 + }, + { + "epoch": 0.11, + "grad_norm": 0.9563994407653809, + "learning_rate": 9.83537841183207e-06, + "loss": 0.6473, + "step": 1728 + }, + { + "epoch": 0.11, + "grad_norm": 0.9133448600769043, + "learning_rate": 9.83511720598085e-06, + "loss": 0.61, + "step": 1729 + }, + { + "epoch": 0.11, + "grad_norm": 0.9543222784996033, + "learning_rate": 9.834855796539665e-06, + "loss": 0.614, + "step": 1730 + }, + { + "epoch": 0.11, + "grad_norm": 0.9356175661087036, + "learning_rate": 9.834594183519521e-06, + "loss": 0.6181, + "step": 1731 + }, + { + "epoch": 0.11, + "grad_norm": 0.9626755118370056, + "learning_rate": 9.834332366931435e-06, + "loss": 0.6355, + "step": 1732 + }, + { + "epoch": 0.11, + "grad_norm": 0.9340695142745972, + "learning_rate": 9.834070346786428e-06, + "loss": 0.6235, + "step": 1733 + }, + { + "epoch": 0.11, + "grad_norm": 0.916644811630249, + "learning_rate": 9.833808123095538e-06, + "loss": 0.6401, + "step": 1734 + }, + { + "epoch": 0.11, + "grad_norm": 0.9744462370872498, + "learning_rate": 9.833545695869802e-06, + "loss": 0.6916, + "step": 1735 + }, + { + "epoch": 0.11, + "grad_norm": 0.9321338534355164, + "learning_rate": 9.833283065120272e-06, + "loss": 0.6363, + "step": 1736 + }, + { + "epoch": 0.11, + "grad_norm": 0.9485877752304077, + "learning_rate": 9.833020230858005e-06, + "loss": 0.6865, + "step": 1737 + }, + { + "epoch": 0.11, + "grad_norm": 0.8846791982650757, + "learning_rate": 9.832757193094072e-06, + "loss": 0.6522, + "step": 1738 + }, + { + "epoch": 0.11, + "grad_norm": 0.8400406837463379, + "learning_rate": 9.832493951839541e-06, + "loss": 0.626, + "step": 1739 + }, + { + "epoch": 0.11, + "grad_norm": 0.9057971239089966, + "learning_rate": 9.832230507105504e-06, + "loss": 0.6248, + "step": 1740 + }, + { + "epoch": 0.11, + "grad_norm": 0.9419105052947998, + "learning_rate": 9.831966858903049e-06, + "loss": 0.6535, + "step": 1741 + }, + { + "epoch": 0.11, + "grad_norm": 0.8836336135864258, + "learning_rate": 9.83170300724328e-06, + "loss": 0.6482, + "step": 1742 + }, + { + "epoch": 0.11, + "grad_norm": 0.861971378326416, + "learning_rate": 9.831438952137304e-06, + "loss": 0.6039, + "step": 1743 + }, + { + "epoch": 0.11, + "grad_norm": 0.8943654298782349, + "learning_rate": 9.831174693596241e-06, + "loss": 0.6038, + "step": 1744 + }, + { + "epoch": 0.11, + "grad_norm": 0.9814664721488953, + "learning_rate": 9.83091023163122e-06, + "loss": 0.6729, + "step": 1745 + }, + { + "epoch": 0.11, + "grad_norm": 0.8936158418655396, + "learning_rate": 9.830645566253374e-06, + "loss": 0.6335, + "step": 1746 + }, + { + "epoch": 0.11, + "grad_norm": 0.9102863073348999, + "learning_rate": 9.830380697473848e-06, + "loss": 0.6611, + "step": 1747 + }, + { + "epoch": 0.11, + "grad_norm": 0.9278464913368225, + "learning_rate": 9.830115625303793e-06, + "loss": 0.6865, + "step": 1748 + }, + { + "epoch": 0.11, + "grad_norm": 0.921346127986908, + "learning_rate": 9.829850349754373e-06, + "loss": 0.6441, + "step": 1749 + }, + { + "epoch": 0.11, + "grad_norm": 0.9085983037948608, + "learning_rate": 9.829584870836756e-06, + "loss": 0.6905, + "step": 1750 + }, + { + "epoch": 0.11, + "grad_norm": 0.8647844195365906, + "learning_rate": 9.82931918856212e-06, + "loss": 0.6083, + "step": 1751 + }, + { + "epoch": 0.11, + "grad_norm": 0.9357777237892151, + "learning_rate": 9.829053302941656e-06, + "loss": 0.6546, + "step": 1752 + }, + { + "epoch": 0.11, + "grad_norm": 0.9371474981307983, + "learning_rate": 9.828787213986554e-06, + "loss": 0.6826, + "step": 1753 + }, + { + "epoch": 0.11, + "grad_norm": 0.8982768654823303, + "learning_rate": 9.82852092170802e-06, + "loss": 0.6184, + "step": 1754 + }, + { + "epoch": 0.11, + "grad_norm": 0.8816835284233093, + "learning_rate": 9.82825442611727e-06, + "loss": 0.6204, + "step": 1755 + }, + { + "epoch": 0.11, + "grad_norm": 1.0181394815444946, + "learning_rate": 9.82798772722552e-06, + "loss": 0.6937, + "step": 1756 + }, + { + "epoch": 0.11, + "grad_norm": 0.9692837595939636, + "learning_rate": 9.827720825044003e-06, + "loss": 0.6443, + "step": 1757 + }, + { + "epoch": 0.11, + "grad_norm": 0.9221457242965698, + "learning_rate": 9.827453719583957e-06, + "loss": 0.6109, + "step": 1758 + }, + { + "epoch": 0.11, + "grad_norm": 0.8878170847892761, + "learning_rate": 9.827186410856627e-06, + "loss": 0.5887, + "step": 1759 + }, + { + "epoch": 0.11, + "grad_norm": 0.9767280220985413, + "learning_rate": 9.82691889887327e-06, + "loss": 0.5916, + "step": 1760 + }, + { + "epoch": 0.11, + "grad_norm": 0.9061947464942932, + "learning_rate": 9.82665118364515e-06, + "loss": 0.6084, + "step": 1761 + }, + { + "epoch": 0.11, + "grad_norm": 0.9700713753700256, + "learning_rate": 9.82638326518354e-06, + "loss": 0.6003, + "step": 1762 + }, + { + "epoch": 0.11, + "grad_norm": 0.977722704410553, + "learning_rate": 9.826115143499721e-06, + "loss": 0.6788, + "step": 1763 + }, + { + "epoch": 0.11, + "grad_norm": 0.9503071904182434, + "learning_rate": 9.82584681860498e-06, + "loss": 0.6288, + "step": 1764 + }, + { + "epoch": 0.11, + "grad_norm": 0.9503450393676758, + "learning_rate": 9.82557829051062e-06, + "loss": 0.6407, + "step": 1765 + }, + { + "epoch": 0.11, + "grad_norm": 1.0136359930038452, + "learning_rate": 9.825309559227944e-06, + "loss": 0.7054, + "step": 1766 + }, + { + "epoch": 0.11, + "grad_norm": 1.0087224245071411, + "learning_rate": 9.825040624768267e-06, + "loss": 0.6528, + "step": 1767 + }, + { + "epoch": 0.11, + "grad_norm": 0.9373607039451599, + "learning_rate": 9.824771487142917e-06, + "loss": 0.6851, + "step": 1768 + }, + { + "epoch": 0.11, + "grad_norm": 0.863404393196106, + "learning_rate": 9.824502146363222e-06, + "loss": 0.6083, + "step": 1769 + }, + { + "epoch": 0.11, + "grad_norm": 0.9172773361206055, + "learning_rate": 9.824232602440524e-06, + "loss": 0.647, + "step": 1770 + }, + { + "epoch": 0.11, + "grad_norm": 0.8769250512123108, + "learning_rate": 9.823962855386175e-06, + "loss": 0.6657, + "step": 1771 + }, + { + "epoch": 0.11, + "grad_norm": 0.9721053838729858, + "learning_rate": 9.823692905211533e-06, + "loss": 0.5903, + "step": 1772 + }, + { + "epoch": 0.11, + "grad_norm": 0.9308158159255981, + "learning_rate": 9.823422751927961e-06, + "loss": 0.6218, + "step": 1773 + }, + { + "epoch": 0.11, + "grad_norm": 0.9058137536048889, + "learning_rate": 9.823152395546836e-06, + "loss": 0.6584, + "step": 1774 + }, + { + "epoch": 0.11, + "grad_norm": 0.9022964239120483, + "learning_rate": 9.822881836079543e-06, + "loss": 0.6114, + "step": 1775 + }, + { + "epoch": 0.11, + "grad_norm": 0.9127461910247803, + "learning_rate": 9.822611073537474e-06, + "loss": 0.653, + "step": 1776 + }, + { + "epoch": 0.11, + "grad_norm": 0.9295786023139954, + "learning_rate": 9.822340107932028e-06, + "loss": 0.569, + "step": 1777 + }, + { + "epoch": 0.11, + "grad_norm": 0.945152759552002, + "learning_rate": 9.822068939274616e-06, + "loss": 0.6499, + "step": 1778 + }, + { + "epoch": 0.11, + "grad_norm": 0.9397128224372864, + "learning_rate": 9.821797567576656e-06, + "loss": 0.6069, + "step": 1779 + }, + { + "epoch": 0.11, + "grad_norm": 1.0003842115402222, + "learning_rate": 9.821525992849575e-06, + "loss": 0.6875, + "step": 1780 + }, + { + "epoch": 0.11, + "grad_norm": 0.9174728393554688, + "learning_rate": 9.821254215104808e-06, + "loss": 0.673, + "step": 1781 + }, + { + "epoch": 0.11, + "grad_norm": 0.9309795498847961, + "learning_rate": 9.820982234353795e-06, + "loss": 0.6023, + "step": 1782 + }, + { + "epoch": 0.11, + "grad_norm": 0.9999585747718811, + "learning_rate": 9.820710050607994e-06, + "loss": 0.6542, + "step": 1783 + }, + { + "epoch": 0.11, + "grad_norm": 0.8771758675575256, + "learning_rate": 9.820437663878862e-06, + "loss": 0.632, + "step": 1784 + }, + { + "epoch": 0.11, + "grad_norm": 0.9545724391937256, + "learning_rate": 9.820165074177867e-06, + "loss": 0.673, + "step": 1785 + }, + { + "epoch": 0.11, + "grad_norm": 0.9740934371948242, + "learning_rate": 9.819892281516491e-06, + "loss": 0.6621, + "step": 1786 + }, + { + "epoch": 0.11, + "grad_norm": 0.9247666597366333, + "learning_rate": 9.819619285906217e-06, + "loss": 0.609, + "step": 1787 + }, + { + "epoch": 0.11, + "grad_norm": 0.9412689208984375, + "learning_rate": 9.819346087358542e-06, + "loss": 0.689, + "step": 1788 + }, + { + "epoch": 0.11, + "grad_norm": 1.0129718780517578, + "learning_rate": 9.819072685884969e-06, + "loss": 0.6117, + "step": 1789 + }, + { + "epoch": 0.11, + "grad_norm": 0.9594516754150391, + "learning_rate": 9.818799081497008e-06, + "loss": 0.6672, + "step": 1790 + }, + { + "epoch": 0.11, + "grad_norm": 0.9422503709793091, + "learning_rate": 9.818525274206184e-06, + "loss": 0.6601, + "step": 1791 + }, + { + "epoch": 0.11, + "grad_norm": 0.9016688466072083, + "learning_rate": 9.818251264024018e-06, + "loss": 0.6811, + "step": 1792 + }, + { + "epoch": 0.11, + "grad_norm": 0.9061679244041443, + "learning_rate": 9.817977050962058e-06, + "loss": 0.6095, + "step": 1793 + }, + { + "epoch": 0.11, + "grad_norm": 0.973602831363678, + "learning_rate": 9.817702635031842e-06, + "loss": 0.6499, + "step": 1794 + }, + { + "epoch": 0.11, + "grad_norm": 0.9249684810638428, + "learning_rate": 9.817428016244928e-06, + "loss": 0.6369, + "step": 1795 + }, + { + "epoch": 0.11, + "grad_norm": 0.9385564923286438, + "learning_rate": 9.81715319461288e-06, + "loss": 0.7354, + "step": 1796 + }, + { + "epoch": 0.11, + "grad_norm": 0.9195820093154907, + "learning_rate": 9.816878170147268e-06, + "loss": 0.6723, + "step": 1797 + }, + { + "epoch": 0.11, + "grad_norm": 1.0029053688049316, + "learning_rate": 9.816602942859672e-06, + "loss": 0.6807, + "step": 1798 + }, + { + "epoch": 0.11, + "grad_norm": 1.0146007537841797, + "learning_rate": 9.816327512761683e-06, + "loss": 0.6377, + "step": 1799 + }, + { + "epoch": 0.11, + "grad_norm": 0.9209313988685608, + "learning_rate": 9.816051879864896e-06, + "loss": 0.5904, + "step": 1800 + }, + { + "epoch": 0.11, + "grad_norm": 0.8723121881484985, + "learning_rate": 9.81577604418092e-06, + "loss": 0.5807, + "step": 1801 + }, + { + "epoch": 0.11, + "grad_norm": 0.9405813217163086, + "learning_rate": 9.815500005721365e-06, + "loss": 0.6401, + "step": 1802 + }, + { + "epoch": 0.11, + "grad_norm": 1.0136600732803345, + "learning_rate": 9.815223764497859e-06, + "loss": 0.6395, + "step": 1803 + }, + { + "epoch": 0.11, + "grad_norm": 0.8953354358673096, + "learning_rate": 9.814947320522031e-06, + "loss": 0.6236, + "step": 1804 + }, + { + "epoch": 0.11, + "grad_norm": 0.9782286882400513, + "learning_rate": 9.81467067380552e-06, + "loss": 0.6592, + "step": 1805 + }, + { + "epoch": 0.11, + "grad_norm": 0.8998913168907166, + "learning_rate": 9.814393824359975e-06, + "loss": 0.6448, + "step": 1806 + }, + { + "epoch": 0.11, + "grad_norm": 0.8747649788856506, + "learning_rate": 9.814116772197058e-06, + "loss": 0.6038, + "step": 1807 + }, + { + "epoch": 0.11, + "grad_norm": 0.980236828327179, + "learning_rate": 9.813839517328428e-06, + "loss": 0.6272, + "step": 1808 + }, + { + "epoch": 0.11, + "grad_norm": 0.9255844354629517, + "learning_rate": 9.813562059765762e-06, + "loss": 0.6626, + "step": 1809 + }, + { + "epoch": 0.11, + "grad_norm": 0.9551252722740173, + "learning_rate": 9.813284399520744e-06, + "loss": 0.6511, + "step": 1810 + }, + { + "epoch": 0.11, + "grad_norm": 0.9699724912643433, + "learning_rate": 9.813006536605063e-06, + "loss": 0.6487, + "step": 1811 + }, + { + "epoch": 0.11, + "grad_norm": 0.9553450345993042, + "learning_rate": 9.812728471030421e-06, + "loss": 0.6733, + "step": 1812 + }, + { + "epoch": 0.11, + "grad_norm": 0.9273084402084351, + "learning_rate": 9.812450202808525e-06, + "loss": 0.6379, + "step": 1813 + }, + { + "epoch": 0.11, + "grad_norm": 0.9686819911003113, + "learning_rate": 9.812171731951092e-06, + "loss": 0.6156, + "step": 1814 + }, + { + "epoch": 0.11, + "grad_norm": 0.9068811535835266, + "learning_rate": 9.811893058469848e-06, + "loss": 0.6301, + "step": 1815 + }, + { + "epoch": 0.12, + "grad_norm": 0.8845064043998718, + "learning_rate": 9.811614182376527e-06, + "loss": 0.6841, + "step": 1816 + }, + { + "epoch": 0.12, + "grad_norm": 0.9105836153030396, + "learning_rate": 9.811335103682872e-06, + "loss": 0.6024, + "step": 1817 + }, + { + "epoch": 0.12, + "grad_norm": 0.9044576287269592, + "learning_rate": 9.81105582240063e-06, + "loss": 0.6668, + "step": 1818 + }, + { + "epoch": 0.12, + "grad_norm": 0.9328505992889404, + "learning_rate": 9.810776338541566e-06, + "loss": 0.6684, + "step": 1819 + }, + { + "epoch": 0.12, + "grad_norm": 0.9516772031784058, + "learning_rate": 9.810496652117445e-06, + "loss": 0.6531, + "step": 1820 + }, + { + "epoch": 0.12, + "grad_norm": 0.8713773488998413, + "learning_rate": 9.810216763140046e-06, + "loss": 0.586, + "step": 1821 + }, + { + "epoch": 0.12, + "grad_norm": 0.9502965807914734, + "learning_rate": 9.809936671621151e-06, + "loss": 0.5754, + "step": 1822 + }, + { + "epoch": 0.12, + "grad_norm": 0.9351384043693542, + "learning_rate": 9.809656377572556e-06, + "loss": 0.6786, + "step": 1823 + }, + { + "epoch": 0.12, + "grad_norm": 0.8560097217559814, + "learning_rate": 9.809375881006063e-06, + "loss": 0.5677, + "step": 1824 + }, + { + "epoch": 0.12, + "grad_norm": 0.8590288162231445, + "learning_rate": 9.809095181933482e-06, + "loss": 0.6032, + "step": 1825 + }, + { + "epoch": 0.12, + "grad_norm": 1.0070056915283203, + "learning_rate": 9.808814280366632e-06, + "loss": 0.6919, + "step": 1826 + }, + { + "epoch": 0.12, + "grad_norm": 0.9655309915542603, + "learning_rate": 9.808533176317341e-06, + "loss": 0.6631, + "step": 1827 + }, + { + "epoch": 0.12, + "grad_norm": 1.0063858032226562, + "learning_rate": 9.808251869797445e-06, + "loss": 0.6876, + "step": 1828 + }, + { + "epoch": 0.12, + "grad_norm": 0.9091975092887878, + "learning_rate": 9.807970360818791e-06, + "loss": 0.6122, + "step": 1829 + }, + { + "epoch": 0.12, + "grad_norm": 1.0076450109481812, + "learning_rate": 9.80768864939323e-06, + "loss": 0.6995, + "step": 1830 + }, + { + "epoch": 0.12, + "grad_norm": 0.8727695345878601, + "learning_rate": 9.807406735532625e-06, + "loss": 0.6056, + "step": 1831 + }, + { + "epoch": 0.12, + "grad_norm": 0.9693520069122314, + "learning_rate": 9.807124619248847e-06, + "loss": 0.6708, + "step": 1832 + }, + { + "epoch": 0.12, + "grad_norm": 0.993155300617218, + "learning_rate": 9.806842300553772e-06, + "loss": 0.6415, + "step": 1833 + }, + { + "epoch": 0.12, + "grad_norm": 0.9352355599403381, + "learning_rate": 9.806559779459291e-06, + "loss": 0.6858, + "step": 1834 + }, + { + "epoch": 0.12, + "grad_norm": 0.8845545649528503, + "learning_rate": 9.806277055977299e-06, + "loss": 0.6022, + "step": 1835 + }, + { + "epoch": 0.12, + "grad_norm": 0.9431570768356323, + "learning_rate": 9.8059941301197e-06, + "loss": 0.6488, + "step": 1836 + }, + { + "epoch": 0.12, + "grad_norm": 0.9353639483451843, + "learning_rate": 9.805711001898406e-06, + "loss": 0.6399, + "step": 1837 + }, + { + "epoch": 0.12, + "grad_norm": 0.9036180973052979, + "learning_rate": 9.805427671325339e-06, + "loss": 0.6234, + "step": 1838 + }, + { + "epoch": 0.12, + "grad_norm": 0.8938383460044861, + "learning_rate": 9.80514413841243e-06, + "loss": 0.5884, + "step": 1839 + }, + { + "epoch": 0.12, + "grad_norm": 1.001819372177124, + "learning_rate": 9.804860403171617e-06, + "loss": 0.6657, + "step": 1840 + }, + { + "epoch": 0.12, + "grad_norm": 0.9125610589981079, + "learning_rate": 9.804576465614848e-06, + "loss": 0.6196, + "step": 1841 + }, + { + "epoch": 0.12, + "grad_norm": 0.9416166543960571, + "learning_rate": 9.804292325754079e-06, + "loss": 0.6596, + "step": 1842 + }, + { + "epoch": 0.12, + "grad_norm": 0.9415349960327148, + "learning_rate": 9.804007983601271e-06, + "loss": 0.6558, + "step": 1843 + }, + { + "epoch": 0.12, + "grad_norm": 0.8949640393257141, + "learning_rate": 9.8037234391684e-06, + "loss": 0.6629, + "step": 1844 + }, + { + "epoch": 0.12, + "grad_norm": 0.9415730237960815, + "learning_rate": 9.803438692467446e-06, + "loss": 0.6158, + "step": 1845 + }, + { + "epoch": 0.12, + "grad_norm": 0.8983997106552124, + "learning_rate": 9.8031537435104e-06, + "loss": 0.6057, + "step": 1846 + }, + { + "epoch": 0.12, + "grad_norm": 0.8986216187477112, + "learning_rate": 9.802868592309255e-06, + "loss": 0.6404, + "step": 1847 + }, + { + "epoch": 0.12, + "grad_norm": 0.9952399134635925, + "learning_rate": 9.802583238876024e-06, + "loss": 0.6655, + "step": 1848 + }, + { + "epoch": 0.12, + "grad_norm": 0.908902108669281, + "learning_rate": 9.80229768322272e-06, + "loss": 0.6155, + "step": 1849 + }, + { + "epoch": 0.12, + "grad_norm": 0.9122000932693481, + "learning_rate": 9.802011925361366e-06, + "loss": 0.6594, + "step": 1850 + }, + { + "epoch": 0.12, + "grad_norm": 0.970879077911377, + "learning_rate": 9.801725965303995e-06, + "loss": 0.5872, + "step": 1851 + }, + { + "epoch": 0.12, + "grad_norm": 0.9796939492225647, + "learning_rate": 9.801439803062646e-06, + "loss": 0.6749, + "step": 1852 + }, + { + "epoch": 0.12, + "grad_norm": 0.8634384274482727, + "learning_rate": 9.801153438649371e-06, + "loss": 0.6442, + "step": 1853 + }, + { + "epoch": 0.12, + "grad_norm": 0.9319069981575012, + "learning_rate": 9.800866872076227e-06, + "loss": 0.6265, + "step": 1854 + }, + { + "epoch": 0.12, + "grad_norm": 0.8886886239051819, + "learning_rate": 9.80058010335528e-06, + "loss": 0.6443, + "step": 1855 + }, + { + "epoch": 0.12, + "grad_norm": 0.885466992855072, + "learning_rate": 9.800293132498603e-06, + "loss": 0.6565, + "step": 1856 + }, + { + "epoch": 0.12, + "grad_norm": 0.9097492694854736, + "learning_rate": 9.800005959518284e-06, + "loss": 0.6244, + "step": 1857 + }, + { + "epoch": 0.12, + "grad_norm": 0.9563896059989929, + "learning_rate": 9.79971858442641e-06, + "loss": 0.6734, + "step": 1858 + }, + { + "epoch": 0.12, + "grad_norm": 0.9626286625862122, + "learning_rate": 9.799431007235086e-06, + "loss": 0.6489, + "step": 1859 + }, + { + "epoch": 0.12, + "grad_norm": 0.9317120313644409, + "learning_rate": 9.799143227956416e-06, + "loss": 0.6892, + "step": 1860 + }, + { + "epoch": 0.12, + "grad_norm": 0.9715713262557983, + "learning_rate": 9.798855246602522e-06, + "loss": 0.6715, + "step": 1861 + }, + { + "epoch": 0.12, + "grad_norm": 0.9262539148330688, + "learning_rate": 9.798567063185525e-06, + "loss": 0.6057, + "step": 1862 + }, + { + "epoch": 0.12, + "grad_norm": 0.9007180333137512, + "learning_rate": 9.798278677717562e-06, + "loss": 0.6343, + "step": 1863 + }, + { + "epoch": 0.12, + "grad_norm": 0.9322105646133423, + "learning_rate": 9.797990090210777e-06, + "loss": 0.6516, + "step": 1864 + }, + { + "epoch": 0.12, + "grad_norm": 0.8944317102432251, + "learning_rate": 9.79770130067732e-06, + "loss": 0.6814, + "step": 1865 + }, + { + "epoch": 0.12, + "grad_norm": 0.8687607049942017, + "learning_rate": 9.797412309129351e-06, + "loss": 0.6282, + "step": 1866 + }, + { + "epoch": 0.12, + "grad_norm": 0.9158695340156555, + "learning_rate": 9.79712311557904e-06, + "loss": 0.6178, + "step": 1867 + }, + { + "epoch": 0.12, + "grad_norm": 0.9163758754730225, + "learning_rate": 9.79683372003856e-06, + "loss": 0.6176, + "step": 1868 + }, + { + "epoch": 0.12, + "grad_norm": 0.9717338681221008, + "learning_rate": 9.796544122520101e-06, + "loss": 0.6533, + "step": 1869 + }, + { + "epoch": 0.12, + "grad_norm": 0.880803108215332, + "learning_rate": 9.796254323035854e-06, + "loss": 0.5912, + "step": 1870 + }, + { + "epoch": 0.12, + "grad_norm": 0.8988786935806274, + "learning_rate": 9.795964321598023e-06, + "loss": 0.6374, + "step": 1871 + }, + { + "epoch": 0.12, + "grad_norm": 0.870625376701355, + "learning_rate": 9.795674118218819e-06, + "loss": 0.5529, + "step": 1872 + }, + { + "epoch": 0.12, + "grad_norm": 0.8748095631599426, + "learning_rate": 9.795383712910458e-06, + "loss": 0.6148, + "step": 1873 + }, + { + "epoch": 0.12, + "grad_norm": 0.962794303894043, + "learning_rate": 9.795093105685175e-06, + "loss": 0.658, + "step": 1874 + }, + { + "epoch": 0.12, + "grad_norm": 0.9512926340103149, + "learning_rate": 9.794802296555198e-06, + "loss": 0.6414, + "step": 1875 + }, + { + "epoch": 0.12, + "grad_norm": 0.8706688284873962, + "learning_rate": 9.79451128553278e-06, + "loss": 0.575, + "step": 1876 + }, + { + "epoch": 0.12, + "grad_norm": 0.9835572838783264, + "learning_rate": 9.794220072630168e-06, + "loss": 0.6807, + "step": 1877 + }, + { + "epoch": 0.12, + "grad_norm": 0.9953154921531677, + "learning_rate": 9.793928657859627e-06, + "loss": 0.6794, + "step": 1878 + }, + { + "epoch": 0.12, + "grad_norm": 0.9724439382553101, + "learning_rate": 9.793637041233428e-06, + "loss": 0.6771, + "step": 1879 + }, + { + "epoch": 0.12, + "grad_norm": 0.9492095708847046, + "learning_rate": 9.793345222763847e-06, + "loss": 0.6477, + "step": 1880 + }, + { + "epoch": 0.12, + "grad_norm": 0.8991506099700928, + "learning_rate": 9.793053202463176e-06, + "loss": 0.6047, + "step": 1881 + }, + { + "epoch": 0.12, + "grad_norm": 1.0061862468719482, + "learning_rate": 9.792760980343708e-06, + "loss": 0.6526, + "step": 1882 + }, + { + "epoch": 0.12, + "grad_norm": 0.9687420725822449, + "learning_rate": 9.792468556417746e-06, + "loss": 0.593, + "step": 1883 + }, + { + "epoch": 0.12, + "grad_norm": 0.9339932203292847, + "learning_rate": 9.792175930697608e-06, + "loss": 0.683, + "step": 1884 + }, + { + "epoch": 0.12, + "grad_norm": 0.900341808795929, + "learning_rate": 9.79188310319561e-06, + "loss": 0.6468, + "step": 1885 + }, + { + "epoch": 0.12, + "grad_norm": 1.0057995319366455, + "learning_rate": 9.791590073924086e-06, + "loss": 0.7412, + "step": 1886 + }, + { + "epoch": 0.12, + "grad_norm": 0.9101889729499817, + "learning_rate": 9.79129684289537e-06, + "loss": 0.6269, + "step": 1887 + }, + { + "epoch": 0.12, + "grad_norm": 0.8851762413978577, + "learning_rate": 9.791003410121815e-06, + "loss": 0.6335, + "step": 1888 + }, + { + "epoch": 0.12, + "grad_norm": 0.9171597361564636, + "learning_rate": 9.79070977561577e-06, + "loss": 0.6272, + "step": 1889 + }, + { + "epoch": 0.12, + "grad_norm": 0.8824100494384766, + "learning_rate": 9.790415939389604e-06, + "loss": 0.6263, + "step": 1890 + }, + { + "epoch": 0.12, + "grad_norm": 0.8969504237174988, + "learning_rate": 9.790121901455687e-06, + "loss": 0.6059, + "step": 1891 + }, + { + "epoch": 0.12, + "grad_norm": 0.9351462125778198, + "learning_rate": 9.7898276618264e-06, + "loss": 0.5978, + "step": 1892 + }, + { + "epoch": 0.12, + "grad_norm": 0.8654520511627197, + "learning_rate": 9.789533220514132e-06, + "loss": 0.5934, + "step": 1893 + }, + { + "epoch": 0.12, + "grad_norm": 0.9118187427520752, + "learning_rate": 9.789238577531284e-06, + "loss": 0.6832, + "step": 1894 + }, + { + "epoch": 0.12, + "grad_norm": 0.9300076365470886, + "learning_rate": 9.788943732890258e-06, + "loss": 0.5968, + "step": 1895 + }, + { + "epoch": 0.12, + "grad_norm": 0.9657106995582581, + "learning_rate": 9.788648686603472e-06, + "loss": 0.6519, + "step": 1896 + }, + { + "epoch": 0.12, + "grad_norm": 0.9614534378051758, + "learning_rate": 9.788353438683346e-06, + "loss": 0.6579, + "step": 1897 + }, + { + "epoch": 0.12, + "grad_norm": 0.9790334701538086, + "learning_rate": 9.788057989142317e-06, + "loss": 0.6839, + "step": 1898 + }, + { + "epoch": 0.12, + "grad_norm": 0.8710220456123352, + "learning_rate": 9.787762337992821e-06, + "loss": 0.6316, + "step": 1899 + }, + { + "epoch": 0.12, + "grad_norm": 0.9904646873474121, + "learning_rate": 9.78746648524731e-06, + "loss": 0.6722, + "step": 1900 + }, + { + "epoch": 0.12, + "grad_norm": 0.858887255191803, + "learning_rate": 9.787170430918239e-06, + "loss": 0.6155, + "step": 1901 + }, + { + "epoch": 0.12, + "grad_norm": 0.9751510620117188, + "learning_rate": 9.786874175018073e-06, + "loss": 0.6484, + "step": 1902 + }, + { + "epoch": 0.12, + "grad_norm": 0.9207701086997986, + "learning_rate": 9.78657771755929e-06, + "loss": 0.6447, + "step": 1903 + }, + { + "epoch": 0.12, + "grad_norm": 0.8809829354286194, + "learning_rate": 9.786281058554369e-06, + "loss": 0.6035, + "step": 1904 + }, + { + "epoch": 0.12, + "grad_norm": 0.8563817143440247, + "learning_rate": 9.785984198015804e-06, + "loss": 0.5981, + "step": 1905 + }, + { + "epoch": 0.12, + "grad_norm": 0.9085856676101685, + "learning_rate": 9.785687135956092e-06, + "loss": 0.6318, + "step": 1906 + }, + { + "epoch": 0.12, + "grad_norm": 0.9032816290855408, + "learning_rate": 9.785389872387745e-06, + "loss": 0.5691, + "step": 1907 + }, + { + "epoch": 0.12, + "grad_norm": 0.9462535977363586, + "learning_rate": 9.785092407323276e-06, + "loss": 0.6807, + "step": 1908 + }, + { + "epoch": 0.12, + "grad_norm": 0.9293099641799927, + "learning_rate": 9.784794740775212e-06, + "loss": 0.663, + "step": 1909 + }, + { + "epoch": 0.12, + "grad_norm": 0.9306168556213379, + "learning_rate": 9.784496872756086e-06, + "loss": 0.6242, + "step": 1910 + }, + { + "epoch": 0.12, + "grad_norm": 0.9209849834442139, + "learning_rate": 9.784198803278442e-06, + "loss": 0.6387, + "step": 1911 + }, + { + "epoch": 0.12, + "grad_norm": 0.8757005929946899, + "learning_rate": 9.78390053235483e-06, + "loss": 0.6034, + "step": 1912 + }, + { + "epoch": 0.12, + "grad_norm": 0.9847443699836731, + "learning_rate": 9.783602059997808e-06, + "loss": 0.6675, + "step": 1913 + }, + { + "epoch": 0.12, + "grad_norm": 0.9457899928092957, + "learning_rate": 9.783303386219942e-06, + "loss": 0.6446, + "step": 1914 + }, + { + "epoch": 0.12, + "grad_norm": 0.9456826448440552, + "learning_rate": 9.783004511033814e-06, + "loss": 0.6877, + "step": 1915 + }, + { + "epoch": 0.12, + "grad_norm": 1.0723676681518555, + "learning_rate": 9.782705434452002e-06, + "loss": 0.6977, + "step": 1916 + }, + { + "epoch": 0.12, + "grad_norm": 0.9328003525733948, + "learning_rate": 9.782406156487104e-06, + "loss": 0.6618, + "step": 1917 + }, + { + "epoch": 0.12, + "grad_norm": 1.0295826196670532, + "learning_rate": 9.782106677151717e-06, + "loss": 0.674, + "step": 1918 + }, + { + "epoch": 0.12, + "grad_norm": 0.919144868850708, + "learning_rate": 9.781806996458456e-06, + "loss": 0.6598, + "step": 1919 + }, + { + "epoch": 0.12, + "grad_norm": 0.8945218324661255, + "learning_rate": 9.781507114419937e-06, + "loss": 0.6114, + "step": 1920 + }, + { + "epoch": 0.12, + "grad_norm": 0.9285868406295776, + "learning_rate": 9.781207031048785e-06, + "loss": 0.6274, + "step": 1921 + }, + { + "epoch": 0.12, + "grad_norm": 0.974398136138916, + "learning_rate": 9.78090674635764e-06, + "loss": 0.631, + "step": 1922 + }, + { + "epoch": 0.12, + "grad_norm": 0.8931386470794678, + "learning_rate": 9.780606260359141e-06, + "loss": 0.6597, + "step": 1923 + }, + { + "epoch": 0.12, + "grad_norm": 1.076026201248169, + "learning_rate": 9.780305573065945e-06, + "loss": 0.7182, + "step": 1924 + }, + { + "epoch": 0.12, + "grad_norm": 0.8796352744102478, + "learning_rate": 9.78000468449071e-06, + "loss": 0.6023, + "step": 1925 + }, + { + "epoch": 0.12, + "grad_norm": 0.9814146161079407, + "learning_rate": 9.779703594646106e-06, + "loss": 0.7202, + "step": 1926 + }, + { + "epoch": 0.12, + "grad_norm": 0.9146006107330322, + "learning_rate": 9.779402303544811e-06, + "loss": 0.6295, + "step": 1927 + }, + { + "epoch": 0.12, + "grad_norm": 0.9009500741958618, + "learning_rate": 9.77910081119951e-06, + "loss": 0.5973, + "step": 1928 + }, + { + "epoch": 0.12, + "grad_norm": 1.0086877346038818, + "learning_rate": 9.7787991176229e-06, + "loss": 0.6795, + "step": 1929 + }, + { + "epoch": 0.12, + "grad_norm": 0.8697808384895325, + "learning_rate": 9.778497222827685e-06, + "loss": 0.5967, + "step": 1930 + }, + { + "epoch": 0.12, + "grad_norm": 0.8777212500572205, + "learning_rate": 9.778195126826574e-06, + "loss": 0.6801, + "step": 1931 + }, + { + "epoch": 0.12, + "grad_norm": 0.9132078289985657, + "learning_rate": 9.777892829632288e-06, + "loss": 0.6482, + "step": 1932 + }, + { + "epoch": 0.12, + "grad_norm": 0.9318856000900269, + "learning_rate": 9.777590331257557e-06, + "loss": 0.7193, + "step": 1933 + }, + { + "epoch": 0.12, + "grad_norm": 0.9304954409599304, + "learning_rate": 9.777287631715117e-06, + "loss": 0.5924, + "step": 1934 + }, + { + "epoch": 0.12, + "grad_norm": 0.9359629154205322, + "learning_rate": 9.776984731017714e-06, + "loss": 0.7213, + "step": 1935 + }, + { + "epoch": 0.12, + "grad_norm": 0.8828993439674377, + "learning_rate": 9.7766816291781e-06, + "loss": 0.6008, + "step": 1936 + }, + { + "epoch": 0.12, + "grad_norm": 0.9245378375053406, + "learning_rate": 9.77637832620904e-06, + "loss": 0.6614, + "step": 1937 + }, + { + "epoch": 0.12, + "grad_norm": 0.9555390477180481, + "learning_rate": 9.776074822123306e-06, + "loss": 0.6417, + "step": 1938 + }, + { + "epoch": 0.12, + "grad_norm": 0.9190395474433899, + "learning_rate": 9.775771116933674e-06, + "loss": 0.6063, + "step": 1939 + }, + { + "epoch": 0.12, + "grad_norm": 0.9256815314292908, + "learning_rate": 9.775467210652936e-06, + "loss": 0.6503, + "step": 1940 + }, + { + "epoch": 0.12, + "grad_norm": 0.8923386335372925, + "learning_rate": 9.775163103293885e-06, + "loss": 0.6111, + "step": 1941 + }, + { + "epoch": 0.12, + "grad_norm": 0.9070592522621155, + "learning_rate": 9.774858794869328e-06, + "loss": 0.6668, + "step": 1942 + }, + { + "epoch": 0.12, + "grad_norm": 0.8983462452888489, + "learning_rate": 9.774554285392078e-06, + "loss": 0.6129, + "step": 1943 + }, + { + "epoch": 0.12, + "grad_norm": 0.8370616436004639, + "learning_rate": 9.774249574874957e-06, + "loss": 0.6213, + "step": 1944 + }, + { + "epoch": 0.12, + "grad_norm": 0.8787031769752502, + "learning_rate": 9.773944663330793e-06, + "loss": 0.6145, + "step": 1945 + }, + { + "epoch": 0.12, + "grad_norm": 0.9314898252487183, + "learning_rate": 9.773639550772428e-06, + "loss": 0.6159, + "step": 1946 + }, + { + "epoch": 0.12, + "grad_norm": 0.9421966671943665, + "learning_rate": 9.773334237212707e-06, + "loss": 0.6402, + "step": 1947 + }, + { + "epoch": 0.12, + "grad_norm": 0.9963151812553406, + "learning_rate": 9.773028722664486e-06, + "loss": 0.6342, + "step": 1948 + }, + { + "epoch": 0.12, + "grad_norm": 0.8582517504692078, + "learning_rate": 9.77272300714063e-06, + "loss": 0.6762, + "step": 1949 + }, + { + "epoch": 0.12, + "grad_norm": 0.905519425868988, + "learning_rate": 9.77241709065401e-06, + "loss": 0.6098, + "step": 1950 + }, + { + "epoch": 0.12, + "grad_norm": 0.9416316151618958, + "learning_rate": 9.772110973217512e-06, + "loss": 0.6413, + "step": 1951 + }, + { + "epoch": 0.12, + "grad_norm": 0.9484925270080566, + "learning_rate": 9.77180465484402e-06, + "loss": 0.6415, + "step": 1952 + }, + { + "epoch": 0.12, + "grad_norm": 0.8854299187660217, + "learning_rate": 9.771498135546433e-06, + "loss": 0.6387, + "step": 1953 + }, + { + "epoch": 0.12, + "grad_norm": 0.896232545375824, + "learning_rate": 9.77119141533766e-06, + "loss": 0.611, + "step": 1954 + }, + { + "epoch": 0.12, + "grad_norm": 0.9634320735931396, + "learning_rate": 9.770884494230614e-06, + "loss": 0.6216, + "step": 1955 + }, + { + "epoch": 0.12, + "grad_norm": 0.9145449995994568, + "learning_rate": 9.770577372238217e-06, + "loss": 0.5922, + "step": 1956 + }, + { + "epoch": 0.12, + "grad_norm": 0.9219470620155334, + "learning_rate": 9.770270049373403e-06, + "loss": 0.6517, + "step": 1957 + }, + { + "epoch": 0.12, + "grad_norm": 0.8602051734924316, + "learning_rate": 9.769962525649112e-06, + "loss": 0.57, + "step": 1958 + }, + { + "epoch": 0.12, + "grad_norm": 0.9885112643241882, + "learning_rate": 9.769654801078294e-06, + "loss": 0.6788, + "step": 1959 + }, + { + "epoch": 0.12, + "grad_norm": 0.8877094984054565, + "learning_rate": 9.769346875673903e-06, + "loss": 0.602, + "step": 1960 + }, + { + "epoch": 0.12, + "grad_norm": 0.9231418967247009, + "learning_rate": 9.769038749448907e-06, + "loss": 0.6285, + "step": 1961 + }, + { + "epoch": 0.12, + "grad_norm": 0.9032172560691833, + "learning_rate": 9.76873042241628e-06, + "loss": 0.6017, + "step": 1962 + }, + { + "epoch": 0.12, + "grad_norm": 0.9338173866271973, + "learning_rate": 9.768421894589003e-06, + "loss": 0.6577, + "step": 1963 + }, + { + "epoch": 0.12, + "grad_norm": 0.99520343542099, + "learning_rate": 9.76811316598007e-06, + "loss": 0.6539, + "step": 1964 + }, + { + "epoch": 0.12, + "grad_norm": 0.8935354351997375, + "learning_rate": 9.767804236602476e-06, + "loss": 0.6299, + "step": 1965 + }, + { + "epoch": 0.12, + "grad_norm": 0.8823718428611755, + "learning_rate": 9.767495106469233e-06, + "loss": 0.6356, + "step": 1966 + }, + { + "epoch": 0.12, + "grad_norm": 0.9498067498207092, + "learning_rate": 9.767185775593356e-06, + "loss": 0.6466, + "step": 1967 + }, + { + "epoch": 0.12, + "grad_norm": 0.9748334884643555, + "learning_rate": 9.76687624398787e-06, + "loss": 0.6011, + "step": 1968 + }, + { + "epoch": 0.12, + "grad_norm": 0.9265943765640259, + "learning_rate": 9.766566511665808e-06, + "loss": 0.6582, + "step": 1969 + }, + { + "epoch": 0.12, + "grad_norm": 0.8861657381057739, + "learning_rate": 9.766256578640212e-06, + "loss": 0.6416, + "step": 1970 + }, + { + "epoch": 0.12, + "grad_norm": 0.9129331707954407, + "learning_rate": 9.76594644492413e-06, + "loss": 0.6252, + "step": 1971 + }, + { + "epoch": 0.12, + "grad_norm": 0.9186064004898071, + "learning_rate": 9.765636110530626e-06, + "loss": 0.6536, + "step": 1972 + }, + { + "epoch": 0.12, + "grad_norm": 1.0016237497329712, + "learning_rate": 9.765325575472761e-06, + "loss": 0.6365, + "step": 1973 + }, + { + "epoch": 0.13, + "grad_norm": 0.9075548052787781, + "learning_rate": 9.765014839763616e-06, + "loss": 0.6497, + "step": 1974 + }, + { + "epoch": 0.13, + "grad_norm": 0.9337440729141235, + "learning_rate": 9.764703903416271e-06, + "loss": 0.6143, + "step": 1975 + }, + { + "epoch": 0.13, + "grad_norm": 0.9033395648002625, + "learning_rate": 9.76439276644382e-06, + "loss": 0.6251, + "step": 1976 + }, + { + "epoch": 0.13, + "grad_norm": 0.9360528588294983, + "learning_rate": 9.764081428859363e-06, + "loss": 0.6793, + "step": 1977 + }, + { + "epoch": 0.13, + "grad_norm": 0.9267653226852417, + "learning_rate": 9.763769890676011e-06, + "loss": 0.6137, + "step": 1978 + }, + { + "epoch": 0.13, + "grad_norm": 0.9287899136543274, + "learning_rate": 9.76345815190688e-06, + "loss": 0.6879, + "step": 1979 + }, + { + "epoch": 0.13, + "grad_norm": 0.9556732773780823, + "learning_rate": 9.763146212565097e-06, + "loss": 0.6544, + "step": 1980 + }, + { + "epoch": 0.13, + "grad_norm": 0.9532358646392822, + "learning_rate": 9.762834072663798e-06, + "loss": 0.629, + "step": 1981 + }, + { + "epoch": 0.13, + "grad_norm": 0.8710858821868896, + "learning_rate": 9.762521732216124e-06, + "loss": 0.6043, + "step": 1982 + }, + { + "epoch": 0.13, + "grad_norm": 0.9163749814033508, + "learning_rate": 9.762209191235227e-06, + "loss": 0.7024, + "step": 1983 + }, + { + "epoch": 0.13, + "grad_norm": 0.9370541572570801, + "learning_rate": 9.761896449734269e-06, + "loss": 0.6327, + "step": 1984 + }, + { + "epoch": 0.13, + "grad_norm": 0.9257699251174927, + "learning_rate": 9.761583507726416e-06, + "loss": 0.6479, + "step": 1985 + }, + { + "epoch": 0.13, + "grad_norm": 0.9155780673027039, + "learning_rate": 9.761270365224846e-06, + "loss": 0.6547, + "step": 1986 + }, + { + "epoch": 0.13, + "grad_norm": 0.8561526536941528, + "learning_rate": 9.760957022242746e-06, + "loss": 0.565, + "step": 1987 + }, + { + "epoch": 0.13, + "grad_norm": 1.0075304508209229, + "learning_rate": 9.760643478793305e-06, + "loss": 0.6894, + "step": 1988 + }, + { + "epoch": 0.13, + "grad_norm": 0.9076879620552063, + "learning_rate": 9.760329734889729e-06, + "loss": 0.6435, + "step": 1989 + }, + { + "epoch": 0.13, + "grad_norm": 0.9092305302619934, + "learning_rate": 9.760015790545227e-06, + "loss": 0.6086, + "step": 1990 + }, + { + "epoch": 0.13, + "grad_norm": 0.9335655570030212, + "learning_rate": 9.759701645773022e-06, + "loss": 0.6191, + "step": 1991 + }, + { + "epoch": 0.13, + "grad_norm": 0.9743184447288513, + "learning_rate": 9.759387300586336e-06, + "loss": 0.6072, + "step": 1992 + }, + { + "epoch": 0.13, + "grad_norm": 0.7978373169898987, + "learning_rate": 9.759072754998407e-06, + "loss": 0.558, + "step": 1993 + }, + { + "epoch": 0.13, + "grad_norm": 0.9291953444480896, + "learning_rate": 9.758758009022482e-06, + "loss": 0.6755, + "step": 1994 + }, + { + "epoch": 0.13, + "grad_norm": 0.8679872751235962, + "learning_rate": 9.758443062671809e-06, + "loss": 0.657, + "step": 1995 + }, + { + "epoch": 0.13, + "grad_norm": 0.9098535180091858, + "learning_rate": 9.758127915959655e-06, + "loss": 0.6372, + "step": 1996 + }, + { + "epoch": 0.13, + "grad_norm": 0.899311363697052, + "learning_rate": 9.757812568899285e-06, + "loss": 0.6732, + "step": 1997 + }, + { + "epoch": 0.13, + "grad_norm": 0.8860989809036255, + "learning_rate": 9.75749702150398e-06, + "loss": 0.6065, + "step": 1998 + }, + { + "epoch": 0.13, + "grad_norm": 0.9011684656143188, + "learning_rate": 9.757181273787024e-06, + "loss": 0.6352, + "step": 1999 + }, + { + "epoch": 0.13, + "grad_norm": 0.8687819242477417, + "learning_rate": 9.756865325761715e-06, + "loss": 0.6535, + "step": 2000 + }, + { + "epoch": 0.13, + "grad_norm": 0.9379962682723999, + "learning_rate": 9.756549177441354e-06, + "loss": 0.602, + "step": 2001 + }, + { + "epoch": 0.13, + "grad_norm": 0.9150758385658264, + "learning_rate": 9.756232828839256e-06, + "loss": 0.621, + "step": 2002 + }, + { + "epoch": 0.13, + "grad_norm": 0.9422044157981873, + "learning_rate": 9.755916279968738e-06, + "loss": 0.6499, + "step": 2003 + }, + { + "epoch": 0.13, + "grad_norm": 0.906806230545044, + "learning_rate": 9.75559953084313e-06, + "loss": 0.6025, + "step": 2004 + }, + { + "epoch": 0.13, + "grad_norm": 0.9322741627693176, + "learning_rate": 9.755282581475769e-06, + "loss": 0.6008, + "step": 2005 + }, + { + "epoch": 0.13, + "grad_norm": 0.8540508151054382, + "learning_rate": 9.75496543188e-06, + "loss": 0.6156, + "step": 2006 + }, + { + "epoch": 0.13, + "grad_norm": 0.8853635191917419, + "learning_rate": 9.754648082069181e-06, + "loss": 0.5934, + "step": 2007 + }, + { + "epoch": 0.13, + "grad_norm": 0.9560227990150452, + "learning_rate": 9.75433053205667e-06, + "loss": 0.6117, + "step": 2008 + }, + { + "epoch": 0.13, + "grad_norm": 0.934593915939331, + "learning_rate": 9.754012781855837e-06, + "loss": 0.6571, + "step": 2009 + }, + { + "epoch": 0.13, + "grad_norm": 0.9356120824813843, + "learning_rate": 9.753694831480067e-06, + "loss": 0.6051, + "step": 2010 + }, + { + "epoch": 0.13, + "grad_norm": 0.9165395498275757, + "learning_rate": 9.753376680942744e-06, + "loss": 0.6509, + "step": 2011 + }, + { + "epoch": 0.13, + "grad_norm": 0.907821536064148, + "learning_rate": 9.753058330257263e-06, + "loss": 0.6233, + "step": 2012 + }, + { + "epoch": 0.13, + "grad_norm": 0.924810528755188, + "learning_rate": 9.752739779437032e-06, + "loss": 0.6273, + "step": 2013 + }, + { + "epoch": 0.13, + "grad_norm": 0.9630839824676514, + "learning_rate": 9.752421028495461e-06, + "loss": 0.6812, + "step": 2014 + }, + { + "epoch": 0.13, + "grad_norm": 0.9571135640144348, + "learning_rate": 9.752102077445974e-06, + "loss": 0.6364, + "step": 2015 + }, + { + "epoch": 0.13, + "grad_norm": 0.9181431531906128, + "learning_rate": 9.751782926302e-06, + "loss": 0.6296, + "step": 2016 + }, + { + "epoch": 0.13, + "grad_norm": 0.9492517113685608, + "learning_rate": 9.751463575076977e-06, + "loss": 0.6416, + "step": 2017 + }, + { + "epoch": 0.13, + "grad_norm": 0.9193875789642334, + "learning_rate": 9.75114402378435e-06, + "loss": 0.6329, + "step": 2018 + }, + { + "epoch": 0.13, + "grad_norm": 1.0199710130691528, + "learning_rate": 9.75082427243758e-06, + "loss": 0.6989, + "step": 2019 + }, + { + "epoch": 0.13, + "grad_norm": 1.0100188255310059, + "learning_rate": 9.750504321050126e-06, + "loss": 0.6863, + "step": 2020 + }, + { + "epoch": 0.13, + "grad_norm": 0.957966685295105, + "learning_rate": 9.75018416963546e-06, + "loss": 0.6497, + "step": 2021 + }, + { + "epoch": 0.13, + "grad_norm": 0.9822169542312622, + "learning_rate": 9.749863818207061e-06, + "loss": 0.6548, + "step": 2022 + }, + { + "epoch": 0.13, + "grad_norm": 0.9881288409233093, + "learning_rate": 9.749543266778424e-06, + "loss": 0.6411, + "step": 2023 + }, + { + "epoch": 0.13, + "grad_norm": 0.9414603114128113, + "learning_rate": 9.749222515363041e-06, + "loss": 0.6502, + "step": 2024 + }, + { + "epoch": 0.13, + "grad_norm": 0.935268223285675, + "learning_rate": 9.748901563974418e-06, + "loss": 0.6292, + "step": 2025 + }, + { + "epoch": 0.13, + "grad_norm": 0.9612113833427429, + "learning_rate": 9.748580412626072e-06, + "loss": 0.6809, + "step": 2026 + }, + { + "epoch": 0.13, + "grad_norm": 0.9209766983985901, + "learning_rate": 9.748259061331524e-06, + "loss": 0.6361, + "step": 2027 + }, + { + "epoch": 0.13, + "grad_norm": 0.9090907573699951, + "learning_rate": 9.747937510104305e-06, + "loss": 0.6418, + "step": 2028 + }, + { + "epoch": 0.13, + "grad_norm": 1.0263921022415161, + "learning_rate": 9.74761575895795e-06, + "loss": 0.6523, + "step": 2029 + }, + { + "epoch": 0.13, + "grad_norm": 0.9746382236480713, + "learning_rate": 9.747293807906017e-06, + "loss": 0.6635, + "step": 2030 + }, + { + "epoch": 0.13, + "grad_norm": 0.8786625266075134, + "learning_rate": 9.746971656962053e-06, + "loss": 0.6328, + "step": 2031 + }, + { + "epoch": 0.13, + "grad_norm": 0.9323434233665466, + "learning_rate": 9.746649306139627e-06, + "loss": 0.6359, + "step": 2032 + }, + { + "epoch": 0.13, + "grad_norm": 0.9723234176635742, + "learning_rate": 9.74632675545231e-06, + "loss": 0.6284, + "step": 2033 + }, + { + "epoch": 0.13, + "grad_norm": 0.9053655862808228, + "learning_rate": 9.746004004913688e-06, + "loss": 0.6266, + "step": 2034 + }, + { + "epoch": 0.13, + "grad_norm": 0.835522472858429, + "learning_rate": 9.745681054537345e-06, + "loss": 0.5554, + "step": 2035 + }, + { + "epoch": 0.13, + "grad_norm": 0.9256971478462219, + "learning_rate": 9.745357904336882e-06, + "loss": 0.6404, + "step": 2036 + }, + { + "epoch": 0.13, + "grad_norm": 0.9099552035331726, + "learning_rate": 9.745034554325905e-06, + "loss": 0.6096, + "step": 2037 + }, + { + "epoch": 0.13, + "grad_norm": 0.9474760293960571, + "learning_rate": 9.74471100451803e-06, + "loss": 0.6383, + "step": 2038 + }, + { + "epoch": 0.13, + "grad_norm": 0.8943539261817932, + "learning_rate": 9.744387254926882e-06, + "loss": 0.6159, + "step": 2039 + }, + { + "epoch": 0.13, + "grad_norm": 0.9860721230506897, + "learning_rate": 9.74406330556609e-06, + "loss": 0.6816, + "step": 2040 + }, + { + "epoch": 0.13, + "grad_norm": 0.9628056883811951, + "learning_rate": 9.743739156449294e-06, + "loss": 0.6745, + "step": 2041 + }, + { + "epoch": 0.13, + "grad_norm": 0.8961864709854126, + "learning_rate": 9.743414807590145e-06, + "loss": 0.6283, + "step": 2042 + }, + { + "epoch": 0.13, + "grad_norm": 0.9150246381759644, + "learning_rate": 9.743090259002302e-06, + "loss": 0.6118, + "step": 2043 + }, + { + "epoch": 0.13, + "grad_norm": 0.9093335270881653, + "learning_rate": 9.742765510699425e-06, + "loss": 0.6072, + "step": 2044 + }, + { + "epoch": 0.13, + "grad_norm": 0.9687802195549011, + "learning_rate": 9.742440562695194e-06, + "loss": 0.6055, + "step": 2045 + }, + { + "epoch": 0.13, + "grad_norm": 0.9072078466415405, + "learning_rate": 9.742115415003288e-06, + "loss": 0.6052, + "step": 2046 + }, + { + "epoch": 0.13, + "grad_norm": 0.9311554431915283, + "learning_rate": 9.741790067637398e-06, + "loss": 0.7248, + "step": 2047 + }, + { + "epoch": 0.13, + "grad_norm": 0.9264607429504395, + "learning_rate": 9.741464520611223e-06, + "loss": 0.6605, + "step": 2048 + }, + { + "epoch": 0.13, + "grad_norm": 0.9030102491378784, + "learning_rate": 9.741138773938472e-06, + "loss": 0.605, + "step": 2049 + }, + { + "epoch": 0.13, + "grad_norm": 0.9147754907608032, + "learning_rate": 9.74081282763286e-06, + "loss": 0.6209, + "step": 2050 + }, + { + "epoch": 0.13, + "grad_norm": 0.9356055855751038, + "learning_rate": 9.740486681708114e-06, + "loss": 0.6877, + "step": 2051 + }, + { + "epoch": 0.13, + "grad_norm": 0.9533680081367493, + "learning_rate": 9.740160336177962e-06, + "loss": 0.677, + "step": 2052 + }, + { + "epoch": 0.13, + "grad_norm": 0.9570329189300537, + "learning_rate": 9.73983379105615e-06, + "loss": 0.6264, + "step": 2053 + }, + { + "epoch": 0.13, + "grad_norm": 0.8931095600128174, + "learning_rate": 9.739507046356424e-06, + "loss": 0.65, + "step": 2054 + }, + { + "epoch": 0.13, + "grad_norm": 0.9158161282539368, + "learning_rate": 9.739180102092544e-06, + "loss": 0.6391, + "step": 2055 + }, + { + "epoch": 0.13, + "grad_norm": 0.9462281465530396, + "learning_rate": 9.738852958278278e-06, + "loss": 0.6517, + "step": 2056 + }, + { + "epoch": 0.13, + "grad_norm": 0.9567736983299255, + "learning_rate": 9.738525614927399e-06, + "loss": 0.6521, + "step": 2057 + }, + { + "epoch": 0.13, + "grad_norm": 0.8848094344139099, + "learning_rate": 9.73819807205369e-06, + "loss": 0.5847, + "step": 2058 + }, + { + "epoch": 0.13, + "grad_norm": 0.9664223194122314, + "learning_rate": 9.737870329670942e-06, + "loss": 0.6546, + "step": 2059 + }, + { + "epoch": 0.13, + "grad_norm": 1.0148460865020752, + "learning_rate": 9.737542387792957e-06, + "loss": 0.6764, + "step": 2060 + }, + { + "epoch": 0.13, + "grad_norm": 0.9341017007827759, + "learning_rate": 9.737214246433544e-06, + "loss": 0.6472, + "step": 2061 + }, + { + "epoch": 0.13, + "grad_norm": 0.9515483975410461, + "learning_rate": 9.736885905606516e-06, + "loss": 0.6375, + "step": 2062 + }, + { + "epoch": 0.13, + "grad_norm": 0.914443850517273, + "learning_rate": 9.736557365325703e-06, + "loss": 0.6448, + "step": 2063 + }, + { + "epoch": 0.13, + "grad_norm": 0.8859198689460754, + "learning_rate": 9.736228625604938e-06, + "loss": 0.6072, + "step": 2064 + }, + { + "epoch": 0.13, + "grad_norm": 0.9599637985229492, + "learning_rate": 9.735899686458059e-06, + "loss": 0.6274, + "step": 2065 + }, + { + "epoch": 0.13, + "grad_norm": 0.890293538570404, + "learning_rate": 9.73557054789892e-06, + "loss": 0.6138, + "step": 2066 + }, + { + "epoch": 0.13, + "grad_norm": 0.9055455923080444, + "learning_rate": 9.73524120994138e-06, + "loss": 0.6443, + "step": 2067 + }, + { + "epoch": 0.13, + "grad_norm": 0.9043434262275696, + "learning_rate": 9.734911672599304e-06, + "loss": 0.6233, + "step": 2068 + }, + { + "epoch": 0.13, + "grad_norm": 0.9188245534896851, + "learning_rate": 9.73458193588657e-06, + "loss": 0.6832, + "step": 2069 + }, + { + "epoch": 0.13, + "grad_norm": 0.928259551525116, + "learning_rate": 9.734251999817061e-06, + "loss": 0.6329, + "step": 2070 + }, + { + "epoch": 0.13, + "grad_norm": 0.9406484365463257, + "learning_rate": 9.733921864404669e-06, + "loss": 0.5915, + "step": 2071 + }, + { + "epoch": 0.13, + "grad_norm": 1.0036033391952515, + "learning_rate": 9.733591529663295e-06, + "loss": 0.6564, + "step": 2072 + }, + { + "epoch": 0.13, + "grad_norm": 0.9120476245880127, + "learning_rate": 9.73326099560685e-06, + "loss": 0.5945, + "step": 2073 + }, + { + "epoch": 0.13, + "grad_norm": 0.8819807767868042, + "learning_rate": 9.732930262249249e-06, + "loss": 0.5899, + "step": 2074 + }, + { + "epoch": 0.13, + "grad_norm": 0.953350841999054, + "learning_rate": 9.73259932960442e-06, + "loss": 0.6316, + "step": 2075 + }, + { + "epoch": 0.13, + "grad_norm": 0.9032095074653625, + "learning_rate": 9.732268197686296e-06, + "loss": 0.6505, + "step": 2076 + }, + { + "epoch": 0.13, + "grad_norm": 0.9567223787307739, + "learning_rate": 9.731936866508822e-06, + "loss": 0.6194, + "step": 2077 + }, + { + "epoch": 0.13, + "grad_norm": 0.980812132358551, + "learning_rate": 9.731605336085947e-06, + "loss": 0.6625, + "step": 2078 + }, + { + "epoch": 0.13, + "grad_norm": 0.8719442486763, + "learning_rate": 9.73127360643163e-06, + "loss": 0.6017, + "step": 2079 + }, + { + "epoch": 0.13, + "grad_norm": 0.9464851021766663, + "learning_rate": 9.730941677559843e-06, + "loss": 0.6396, + "step": 2080 + }, + { + "epoch": 0.13, + "grad_norm": 0.9146105051040649, + "learning_rate": 9.730609549484558e-06, + "loss": 0.672, + "step": 2081 + }, + { + "epoch": 0.13, + "grad_norm": 0.8443781733512878, + "learning_rate": 9.730277222219762e-06, + "loss": 0.5547, + "step": 2082 + }, + { + "epoch": 0.13, + "grad_norm": 0.9289159774780273, + "learning_rate": 9.729944695779448e-06, + "loss": 0.6482, + "step": 2083 + }, + { + "epoch": 0.13, + "grad_norm": 0.9049432873725891, + "learning_rate": 9.729611970177615e-06, + "loss": 0.5846, + "step": 2084 + }, + { + "epoch": 0.13, + "grad_norm": 0.9440188407897949, + "learning_rate": 9.729279045428277e-06, + "loss": 0.6773, + "step": 2085 + }, + { + "epoch": 0.13, + "grad_norm": 0.8901217579841614, + "learning_rate": 9.72894592154545e-06, + "loss": 0.6139, + "step": 2086 + }, + { + "epoch": 0.13, + "grad_norm": 0.9457870125770569, + "learning_rate": 9.72861259854316e-06, + "loss": 0.5882, + "step": 2087 + }, + { + "epoch": 0.13, + "grad_norm": 0.8994535207748413, + "learning_rate": 9.728279076435446e-06, + "loss": 0.5914, + "step": 2088 + }, + { + "epoch": 0.13, + "grad_norm": 0.9447741508483887, + "learning_rate": 9.727945355236345e-06, + "loss": 0.5963, + "step": 2089 + }, + { + "epoch": 0.13, + "grad_norm": 0.9336423277854919, + "learning_rate": 9.727611434959914e-06, + "loss": 0.6375, + "step": 2090 + }, + { + "epoch": 0.13, + "grad_norm": 0.9569306969642639, + "learning_rate": 9.727277315620212e-06, + "loss": 0.6664, + "step": 2091 + }, + { + "epoch": 0.13, + "grad_norm": 0.8650494813919067, + "learning_rate": 9.726942997231308e-06, + "loss": 0.6182, + "step": 2092 + }, + { + "epoch": 0.13, + "grad_norm": 0.8966164588928223, + "learning_rate": 9.726608479807278e-06, + "loss": 0.6107, + "step": 2093 + }, + { + "epoch": 0.13, + "grad_norm": 0.9104940295219421, + "learning_rate": 9.726273763362206e-06, + "loss": 0.6679, + "step": 2094 + }, + { + "epoch": 0.13, + "grad_norm": 0.9482932686805725, + "learning_rate": 9.725938847910187e-06, + "loss": 0.65, + "step": 2095 + }, + { + "epoch": 0.13, + "grad_norm": 0.8956882357597351, + "learning_rate": 9.725603733465325e-06, + "loss": 0.6163, + "step": 2096 + }, + { + "epoch": 0.13, + "grad_norm": 0.9436829686164856, + "learning_rate": 9.725268420041728e-06, + "loss": 0.6822, + "step": 2097 + }, + { + "epoch": 0.13, + "grad_norm": 0.9164643883705139, + "learning_rate": 9.724932907653516e-06, + "loss": 0.6358, + "step": 2098 + }, + { + "epoch": 0.13, + "grad_norm": 0.8321818113327026, + "learning_rate": 9.724597196314817e-06, + "loss": 0.5925, + "step": 2099 + }, + { + "epoch": 0.13, + "grad_norm": 0.8644357919692993, + "learning_rate": 9.724261286039766e-06, + "loss": 0.6181, + "step": 2100 + }, + { + "epoch": 0.13, + "grad_norm": 0.9723076820373535, + "learning_rate": 9.723925176842506e-06, + "loss": 0.6353, + "step": 2101 + }, + { + "epoch": 0.13, + "grad_norm": 0.9631821513175964, + "learning_rate": 9.72358886873719e-06, + "loss": 0.6319, + "step": 2102 + }, + { + "epoch": 0.13, + "grad_norm": 0.8844379782676697, + "learning_rate": 9.723252361737977e-06, + "loss": 0.6011, + "step": 2103 + }, + { + "epoch": 0.13, + "grad_norm": 0.8965840935707092, + "learning_rate": 9.722915655859042e-06, + "loss": 0.6657, + "step": 2104 + }, + { + "epoch": 0.13, + "grad_norm": 0.878998875617981, + "learning_rate": 9.722578751114556e-06, + "loss": 0.609, + "step": 2105 + }, + { + "epoch": 0.13, + "grad_norm": 0.9274184107780457, + "learning_rate": 9.72224164751871e-06, + "loss": 0.6772, + "step": 2106 + }, + { + "epoch": 0.13, + "grad_norm": 0.9545007944107056, + "learning_rate": 9.721904345085692e-06, + "loss": 0.7068, + "step": 2107 + }, + { + "epoch": 0.13, + "grad_norm": 0.8995804786682129, + "learning_rate": 9.72156684382971e-06, + "loss": 0.594, + "step": 2108 + }, + { + "epoch": 0.13, + "grad_norm": 0.8433576822280884, + "learning_rate": 9.721229143764975e-06, + "loss": 0.5871, + "step": 2109 + }, + { + "epoch": 0.13, + "grad_norm": 0.8701801300048828, + "learning_rate": 9.720891244905701e-06, + "loss": 0.6579, + "step": 2110 + }, + { + "epoch": 0.13, + "grad_norm": 0.9737119078636169, + "learning_rate": 9.720553147266123e-06, + "loss": 0.6812, + "step": 2111 + }, + { + "epoch": 0.13, + "grad_norm": 0.9231463074684143, + "learning_rate": 9.720214850860473e-06, + "loss": 0.6731, + "step": 2112 + }, + { + "epoch": 0.13, + "grad_norm": 0.9718281030654907, + "learning_rate": 9.719876355702993e-06, + "loss": 0.6299, + "step": 2113 + }, + { + "epoch": 0.13, + "grad_norm": 0.9555742144584656, + "learning_rate": 9.719537661807942e-06, + "loss": 0.6401, + "step": 2114 + }, + { + "epoch": 0.13, + "grad_norm": 0.9553225636482239, + "learning_rate": 9.719198769189577e-06, + "loss": 0.5957, + "step": 2115 + }, + { + "epoch": 0.13, + "grad_norm": 0.9114801287651062, + "learning_rate": 9.718859677862169e-06, + "loss": 0.6514, + "step": 2116 + }, + { + "epoch": 0.13, + "grad_norm": 0.9563703536987305, + "learning_rate": 9.718520387839996e-06, + "loss": 0.6367, + "step": 2117 + }, + { + "epoch": 0.13, + "grad_norm": 0.9876435399055481, + "learning_rate": 9.718180899137344e-06, + "loss": 0.6547, + "step": 2118 + }, + { + "epoch": 0.13, + "grad_norm": 0.91056889295578, + "learning_rate": 9.717841211768505e-06, + "loss": 0.6011, + "step": 2119 + }, + { + "epoch": 0.13, + "grad_norm": 0.9514434337615967, + "learning_rate": 9.71750132574779e-06, + "loss": 0.6384, + "step": 2120 + }, + { + "epoch": 0.13, + "grad_norm": 0.97725510597229, + "learning_rate": 9.717161241089501e-06, + "loss": 0.6478, + "step": 2121 + }, + { + "epoch": 0.13, + "grad_norm": 0.8876969814300537, + "learning_rate": 9.716820957807963e-06, + "loss": 0.6272, + "step": 2122 + }, + { + "epoch": 0.13, + "grad_norm": 0.9173263311386108, + "learning_rate": 9.716480475917504e-06, + "loss": 0.6316, + "step": 2123 + }, + { + "epoch": 0.13, + "grad_norm": 0.898065984249115, + "learning_rate": 9.716139795432459e-06, + "loss": 0.6161, + "step": 2124 + }, + { + "epoch": 0.13, + "grad_norm": 0.9070072770118713, + "learning_rate": 9.715798916367174e-06, + "loss": 0.6324, + "step": 2125 + }, + { + "epoch": 0.13, + "grad_norm": 0.871792733669281, + "learning_rate": 9.715457838736e-06, + "loss": 0.6324, + "step": 2126 + }, + { + "epoch": 0.13, + "grad_norm": 0.8872711658477783, + "learning_rate": 9.715116562553302e-06, + "loss": 0.6484, + "step": 2127 + }, + { + "epoch": 0.13, + "grad_norm": 0.903907299041748, + "learning_rate": 9.714775087833446e-06, + "loss": 0.6243, + "step": 2128 + }, + { + "epoch": 0.13, + "grad_norm": 0.9561790227890015, + "learning_rate": 9.714433414590816e-06, + "loss": 0.6232, + "step": 2129 + }, + { + "epoch": 0.13, + "grad_norm": 0.9328345656394958, + "learning_rate": 9.714091542839792e-06, + "loss": 0.6487, + "step": 2130 + }, + { + "epoch": 0.14, + "grad_norm": 0.8860450387001038, + "learning_rate": 9.713749472594773e-06, + "loss": 0.6337, + "step": 2131 + }, + { + "epoch": 0.14, + "grad_norm": 0.9177609086036682, + "learning_rate": 9.713407203870163e-06, + "loss": 0.6369, + "step": 2132 + }, + { + "epoch": 0.14, + "grad_norm": 0.8619222640991211, + "learning_rate": 9.713064736680372e-06, + "loss": 0.5794, + "step": 2133 + }, + { + "epoch": 0.14, + "grad_norm": 0.9057930707931519, + "learning_rate": 9.71272207103982e-06, + "loss": 0.5988, + "step": 2134 + }, + { + "epoch": 0.14, + "grad_norm": 0.9182736873626709, + "learning_rate": 9.712379206962936e-06, + "loss": 0.7029, + "step": 2135 + }, + { + "epoch": 0.14, + "grad_norm": 0.8596693277359009, + "learning_rate": 9.712036144464157e-06, + "loss": 0.598, + "step": 2136 + }, + { + "epoch": 0.14, + "grad_norm": 0.9068416953086853, + "learning_rate": 9.711692883557928e-06, + "loss": 0.6706, + "step": 2137 + }, + { + "epoch": 0.14, + "grad_norm": 0.9520207643508911, + "learning_rate": 9.711349424258702e-06, + "loss": 0.6514, + "step": 2138 + }, + { + "epoch": 0.14, + "grad_norm": 0.8763338327407837, + "learning_rate": 9.711005766580942e-06, + "loss": 0.5938, + "step": 2139 + }, + { + "epoch": 0.14, + "grad_norm": 0.9876976013183594, + "learning_rate": 9.710661910539117e-06, + "loss": 0.6066, + "step": 2140 + }, + { + "epoch": 0.14, + "grad_norm": 0.9245547652244568, + "learning_rate": 9.710317856147707e-06, + "loss": 0.6224, + "step": 2141 + }, + { + "epoch": 0.14, + "grad_norm": 0.8814427852630615, + "learning_rate": 9.709973603421196e-06, + "loss": 0.5874, + "step": 2142 + }, + { + "epoch": 0.14, + "grad_norm": 0.8934566378593445, + "learning_rate": 9.709629152374084e-06, + "loss": 0.6272, + "step": 2143 + }, + { + "epoch": 0.14, + "grad_norm": 0.8956682682037354, + "learning_rate": 9.70928450302087e-06, + "loss": 0.6705, + "step": 2144 + }, + { + "epoch": 0.14, + "grad_norm": 0.8444738984107971, + "learning_rate": 9.708939655376069e-06, + "loss": 0.5762, + "step": 2145 + }, + { + "epoch": 0.14, + "grad_norm": 0.9432147145271301, + "learning_rate": 9.708594609454201e-06, + "loss": 0.6278, + "step": 2146 + }, + { + "epoch": 0.14, + "grad_norm": 0.872987687587738, + "learning_rate": 9.708249365269793e-06, + "loss": 0.6689, + "step": 2147 + }, + { + "epoch": 0.14, + "grad_norm": 0.9760878086090088, + "learning_rate": 9.707903922837382e-06, + "loss": 0.665, + "step": 2148 + }, + { + "epoch": 0.14, + "grad_norm": 1.0128514766693115, + "learning_rate": 9.707558282171517e-06, + "loss": 0.7365, + "step": 2149 + }, + { + "epoch": 0.14, + "grad_norm": 0.9022131562232971, + "learning_rate": 9.707212443286746e-06, + "loss": 0.6428, + "step": 2150 + }, + { + "epoch": 0.14, + "grad_norm": 0.9122216105461121, + "learning_rate": 9.706866406197637e-06, + "loss": 0.617, + "step": 2151 + }, + { + "epoch": 0.14, + "grad_norm": 0.9093108773231506, + "learning_rate": 9.706520170918756e-06, + "loss": 0.6826, + "step": 2152 + }, + { + "epoch": 0.14, + "grad_norm": 0.9202459454536438, + "learning_rate": 9.706173737464683e-06, + "loss": 0.5962, + "step": 2153 + }, + { + "epoch": 0.14, + "grad_norm": 0.9246529936790466, + "learning_rate": 9.705827105850008e-06, + "loss": 0.6299, + "step": 2154 + }, + { + "epoch": 0.14, + "grad_norm": 0.9624621868133545, + "learning_rate": 9.705480276089323e-06, + "loss": 0.5714, + "step": 2155 + }, + { + "epoch": 0.14, + "grad_norm": 0.8589086532592773, + "learning_rate": 9.705133248197232e-06, + "loss": 0.6083, + "step": 2156 + }, + { + "epoch": 0.14, + "grad_norm": 0.8764198422431946, + "learning_rate": 9.704786022188346e-06, + "loss": 0.6235, + "step": 2157 + }, + { + "epoch": 0.14, + "grad_norm": 0.9193335771560669, + "learning_rate": 9.704438598077291e-06, + "loss": 0.5962, + "step": 2158 + }, + { + "epoch": 0.14, + "grad_norm": 0.8846250176429749, + "learning_rate": 9.70409097587869e-06, + "loss": 0.6036, + "step": 2159 + }, + { + "epoch": 0.14, + "grad_norm": 0.8875699043273926, + "learning_rate": 9.703743155607182e-06, + "loss": 0.5966, + "step": 2160 + }, + { + "epoch": 0.14, + "grad_norm": 0.9193180799484253, + "learning_rate": 9.703395137277414e-06, + "loss": 0.6257, + "step": 2161 + }, + { + "epoch": 0.14, + "grad_norm": 0.9386597275733948, + "learning_rate": 9.703046920904038e-06, + "loss": 0.6208, + "step": 2162 + }, + { + "epoch": 0.14, + "grad_norm": 1.0000402927398682, + "learning_rate": 9.702698506501717e-06, + "loss": 0.6224, + "step": 2163 + }, + { + "epoch": 0.14, + "grad_norm": 0.9253415465354919, + "learning_rate": 9.702349894085122e-06, + "loss": 0.6126, + "step": 2164 + }, + { + "epoch": 0.14, + "grad_norm": 0.9393420219421387, + "learning_rate": 9.702001083668931e-06, + "loss": 0.6636, + "step": 2165 + }, + { + "epoch": 0.14, + "grad_norm": 0.9388704299926758, + "learning_rate": 9.701652075267832e-06, + "loss": 0.6852, + "step": 2166 + }, + { + "epoch": 0.14, + "grad_norm": 0.9847069382667542, + "learning_rate": 9.701302868896518e-06, + "loss": 0.6375, + "step": 2167 + }, + { + "epoch": 0.14, + "grad_norm": 0.8902013301849365, + "learning_rate": 9.700953464569698e-06, + "loss": 0.6506, + "step": 2168 + }, + { + "epoch": 0.14, + "grad_norm": 0.8558127880096436, + "learning_rate": 9.700603862302078e-06, + "loss": 0.6022, + "step": 2169 + }, + { + "epoch": 0.14, + "grad_norm": 0.9520554542541504, + "learning_rate": 9.700254062108383e-06, + "loss": 0.6399, + "step": 2170 + }, + { + "epoch": 0.14, + "grad_norm": 0.90887451171875, + "learning_rate": 9.69990406400334e-06, + "loss": 0.6224, + "step": 2171 + }, + { + "epoch": 0.14, + "grad_norm": 0.9194844365119934, + "learning_rate": 9.699553868001688e-06, + "loss": 0.6657, + "step": 2172 + }, + { + "epoch": 0.14, + "grad_norm": 0.9208309054374695, + "learning_rate": 9.699203474118168e-06, + "loss": 0.5959, + "step": 2173 + }, + { + "epoch": 0.14, + "grad_norm": 0.9791555404663086, + "learning_rate": 9.69885288236754e-06, + "loss": 0.6241, + "step": 2174 + }, + { + "epoch": 0.14, + "grad_norm": 0.94879150390625, + "learning_rate": 9.698502092764562e-06, + "loss": 0.6352, + "step": 2175 + }, + { + "epoch": 0.14, + "grad_norm": 0.919954240322113, + "learning_rate": 9.698151105324006e-06, + "loss": 0.5973, + "step": 2176 + }, + { + "epoch": 0.14, + "grad_norm": 0.863923192024231, + "learning_rate": 9.697799920060651e-06, + "loss": 0.6602, + "step": 2177 + }, + { + "epoch": 0.14, + "grad_norm": 0.861292839050293, + "learning_rate": 9.697448536989284e-06, + "loss": 0.6121, + "step": 2178 + }, + { + "epoch": 0.14, + "grad_norm": 0.8456393480300903, + "learning_rate": 9.697096956124699e-06, + "loss": 0.6424, + "step": 2179 + }, + { + "epoch": 0.14, + "grad_norm": 0.9356247186660767, + "learning_rate": 9.696745177481703e-06, + "loss": 0.5969, + "step": 2180 + }, + { + "epoch": 0.14, + "grad_norm": 0.8527323007583618, + "learning_rate": 9.696393201075105e-06, + "loss": 0.6084, + "step": 2181 + }, + { + "epoch": 0.14, + "grad_norm": 0.8870608806610107, + "learning_rate": 9.696041026919727e-06, + "loss": 0.5883, + "step": 2182 + }, + { + "epoch": 0.14, + "grad_norm": 0.9454874992370605, + "learning_rate": 9.695688655030397e-06, + "loss": 0.6827, + "step": 2183 + }, + { + "epoch": 0.14, + "grad_norm": 0.9070287942886353, + "learning_rate": 9.695336085421953e-06, + "loss": 0.6381, + "step": 2184 + }, + { + "epoch": 0.14, + "grad_norm": 0.8830955624580383, + "learning_rate": 9.694983318109242e-06, + "loss": 0.6082, + "step": 2185 + }, + { + "epoch": 0.14, + "grad_norm": 0.8843045830726624, + "learning_rate": 9.694630353107115e-06, + "loss": 0.6811, + "step": 2186 + }, + { + "epoch": 0.14, + "grad_norm": 0.9212061166763306, + "learning_rate": 9.694277190430437e-06, + "loss": 0.6432, + "step": 2187 + }, + { + "epoch": 0.14, + "grad_norm": 0.8803088068962097, + "learning_rate": 9.693923830094074e-06, + "loss": 0.6415, + "step": 2188 + }, + { + "epoch": 0.14, + "grad_norm": 0.9358056783676147, + "learning_rate": 9.693570272112908e-06, + "loss": 0.661, + "step": 2189 + }, + { + "epoch": 0.14, + "grad_norm": 0.9013800024986267, + "learning_rate": 9.693216516501827e-06, + "loss": 0.6218, + "step": 2190 + }, + { + "epoch": 0.14, + "grad_norm": 0.944242000579834, + "learning_rate": 9.692862563275725e-06, + "loss": 0.6864, + "step": 2191 + }, + { + "epoch": 0.14, + "grad_norm": 0.8731442093849182, + "learning_rate": 9.692508412449505e-06, + "loss": 0.6566, + "step": 2192 + }, + { + "epoch": 0.14, + "grad_norm": 0.9164943099021912, + "learning_rate": 9.692154064038079e-06, + "loss": 0.6176, + "step": 2193 + }, + { + "epoch": 0.14, + "grad_norm": 0.8906972408294678, + "learning_rate": 9.691799518056369e-06, + "loss": 0.6736, + "step": 2194 + }, + { + "epoch": 0.14, + "grad_norm": 0.927988588809967, + "learning_rate": 9.691444774519302e-06, + "loss": 0.6201, + "step": 2195 + }, + { + "epoch": 0.14, + "grad_norm": 0.9599518775939941, + "learning_rate": 9.691089833441818e-06, + "loss": 0.662, + "step": 2196 + }, + { + "epoch": 0.14, + "grad_norm": 0.842663049697876, + "learning_rate": 9.69073469483886e-06, + "loss": 0.6176, + "step": 2197 + }, + { + "epoch": 0.14, + "grad_norm": 0.9649078845977783, + "learning_rate": 9.690379358725379e-06, + "loss": 0.6787, + "step": 2198 + }, + { + "epoch": 0.14, + "grad_norm": 0.9011525511741638, + "learning_rate": 9.69002382511634e-06, + "loss": 0.6612, + "step": 2199 + }, + { + "epoch": 0.14, + "grad_norm": 0.8732843399047852, + "learning_rate": 9.689668094026716e-06, + "loss": 0.6005, + "step": 2200 + }, + { + "epoch": 0.14, + "grad_norm": 0.9425661563873291, + "learning_rate": 9.689312165471483e-06, + "loss": 0.5944, + "step": 2201 + }, + { + "epoch": 0.14, + "grad_norm": 0.8813802003860474, + "learning_rate": 9.688956039465626e-06, + "loss": 0.6291, + "step": 2202 + }, + { + "epoch": 0.14, + "grad_norm": 0.9538077712059021, + "learning_rate": 9.688599716024141e-06, + "loss": 0.6214, + "step": 2203 + }, + { + "epoch": 0.14, + "grad_norm": 0.8900435566902161, + "learning_rate": 9.688243195162033e-06, + "loss": 0.62, + "step": 2204 + }, + { + "epoch": 0.14, + "grad_norm": 0.8894834518432617, + "learning_rate": 9.687886476894314e-06, + "loss": 0.5676, + "step": 2205 + }, + { + "epoch": 0.14, + "grad_norm": 1.0278310775756836, + "learning_rate": 9.687529561236004e-06, + "loss": 0.6704, + "step": 2206 + }, + { + "epoch": 0.14, + "grad_norm": 0.9716306924819946, + "learning_rate": 9.687172448202129e-06, + "loss": 0.6479, + "step": 2207 + }, + { + "epoch": 0.14, + "grad_norm": 0.8720564246177673, + "learning_rate": 9.68681513780773e-06, + "loss": 0.6445, + "step": 2208 + }, + { + "epoch": 0.14, + "grad_norm": 0.9259105324745178, + "learning_rate": 9.686457630067848e-06, + "loss": 0.6582, + "step": 2209 + }, + { + "epoch": 0.14, + "grad_norm": 0.9476026892662048, + "learning_rate": 9.686099924997538e-06, + "loss": 0.6086, + "step": 2210 + }, + { + "epoch": 0.14, + "grad_norm": 0.8634487390518188, + "learning_rate": 9.685742022611864e-06, + "loss": 0.5746, + "step": 2211 + }, + { + "epoch": 0.14, + "grad_norm": 0.9387729167938232, + "learning_rate": 9.685383922925892e-06, + "loss": 0.6432, + "step": 2212 + }, + { + "epoch": 0.14, + "grad_norm": 0.897686779499054, + "learning_rate": 9.685025625954703e-06, + "loss": 0.6607, + "step": 2213 + }, + { + "epoch": 0.14, + "grad_norm": 0.9364752769470215, + "learning_rate": 9.684667131713381e-06, + "loss": 0.6227, + "step": 2214 + }, + { + "epoch": 0.14, + "grad_norm": 0.9455356597900391, + "learning_rate": 9.684308440217026e-06, + "loss": 0.6756, + "step": 2215 + }, + { + "epoch": 0.14, + "grad_norm": 0.9486604928970337, + "learning_rate": 9.683949551480736e-06, + "loss": 0.5791, + "step": 2216 + }, + { + "epoch": 0.14, + "grad_norm": 0.9534194469451904, + "learning_rate": 9.683590465519625e-06, + "loss": 0.6675, + "step": 2217 + }, + { + "epoch": 0.14, + "grad_norm": 0.9059990048408508, + "learning_rate": 9.683231182348813e-06, + "loss": 0.6342, + "step": 2218 + }, + { + "epoch": 0.14, + "grad_norm": 0.8662623167037964, + "learning_rate": 9.682871701983428e-06, + "loss": 0.6061, + "step": 2219 + }, + { + "epoch": 0.14, + "grad_norm": 1.0336980819702148, + "learning_rate": 9.682512024438607e-06, + "loss": 0.6481, + "step": 2220 + }, + { + "epoch": 0.14, + "grad_norm": 0.9470313787460327, + "learning_rate": 9.682152149729491e-06, + "loss": 0.679, + "step": 2221 + }, + { + "epoch": 0.14, + "grad_norm": 0.9740751385688782, + "learning_rate": 9.681792077871238e-06, + "loss": 0.6369, + "step": 2222 + }, + { + "epoch": 0.14, + "grad_norm": 0.9472583532333374, + "learning_rate": 9.681431808879007e-06, + "loss": 0.6351, + "step": 2223 + }, + { + "epoch": 0.14, + "grad_norm": 0.9514747262001038, + "learning_rate": 9.681071342767967e-06, + "loss": 0.6496, + "step": 2224 + }, + { + "epoch": 0.14, + "grad_norm": 0.9216861724853516, + "learning_rate": 9.6807106795533e-06, + "loss": 0.6447, + "step": 2225 + }, + { + "epoch": 0.14, + "grad_norm": 0.9888139367103577, + "learning_rate": 9.680349819250185e-06, + "loss": 0.6117, + "step": 2226 + }, + { + "epoch": 0.14, + "grad_norm": 0.9336743354797363, + "learning_rate": 9.679988761873824e-06, + "loss": 0.609, + "step": 2227 + }, + { + "epoch": 0.14, + "grad_norm": 0.8719781637191772, + "learning_rate": 9.679627507439416e-06, + "loss": 0.6476, + "step": 2228 + }, + { + "epoch": 0.14, + "grad_norm": 0.8691688179969788, + "learning_rate": 9.679266055962174e-06, + "loss": 0.6706, + "step": 2229 + }, + { + "epoch": 0.14, + "grad_norm": 0.8492668271064758, + "learning_rate": 9.678904407457314e-06, + "loss": 0.621, + "step": 2230 + }, + { + "epoch": 0.14, + "grad_norm": 0.9780930876731873, + "learning_rate": 9.678542561940067e-06, + "loss": 0.6739, + "step": 2231 + }, + { + "epoch": 0.14, + "grad_norm": 0.983424186706543, + "learning_rate": 9.678180519425669e-06, + "loss": 0.6331, + "step": 2232 + }, + { + "epoch": 0.14, + "grad_norm": 0.8755106329917908, + "learning_rate": 9.677818279929363e-06, + "loss": 0.5712, + "step": 2233 + }, + { + "epoch": 0.14, + "grad_norm": 0.8746523857116699, + "learning_rate": 9.677455843466402e-06, + "loss": 0.6365, + "step": 2234 + }, + { + "epoch": 0.14, + "grad_norm": 0.9087699055671692, + "learning_rate": 9.677093210052048e-06, + "loss": 0.5855, + "step": 2235 + }, + { + "epoch": 0.14, + "grad_norm": 0.8961308598518372, + "learning_rate": 9.676730379701567e-06, + "loss": 0.6563, + "step": 2236 + }, + { + "epoch": 0.14, + "grad_norm": 0.917649507522583, + "learning_rate": 9.676367352430242e-06, + "loss": 0.6651, + "step": 2237 + }, + { + "epoch": 0.14, + "grad_norm": 0.8809880018234253, + "learning_rate": 9.676004128253354e-06, + "loss": 0.6311, + "step": 2238 + }, + { + "epoch": 0.14, + "grad_norm": 0.946129322052002, + "learning_rate": 9.675640707186199e-06, + "loss": 0.6366, + "step": 2239 + }, + { + "epoch": 0.14, + "grad_norm": 0.920985221862793, + "learning_rate": 9.67527708924408e-06, + "loss": 0.5958, + "step": 2240 + }, + { + "epoch": 0.14, + "grad_norm": 0.8754940629005432, + "learning_rate": 9.674913274442305e-06, + "loss": 0.6765, + "step": 2241 + }, + { + "epoch": 0.14, + "grad_norm": 0.9010186791419983, + "learning_rate": 9.674549262796196e-06, + "loss": 0.6457, + "step": 2242 + }, + { + "epoch": 0.14, + "grad_norm": 0.8508507013320923, + "learning_rate": 9.674185054321079e-06, + "loss": 0.5684, + "step": 2243 + }, + { + "epoch": 0.14, + "grad_norm": 0.9368433952331543, + "learning_rate": 9.67382064903229e-06, + "loss": 0.6338, + "step": 2244 + }, + { + "epoch": 0.14, + "grad_norm": 1.0419481992721558, + "learning_rate": 9.67345604694517e-06, + "loss": 0.6102, + "step": 2245 + }, + { + "epoch": 0.14, + "grad_norm": 0.9164296984672546, + "learning_rate": 9.673091248075077e-06, + "loss": 0.6279, + "step": 2246 + }, + { + "epoch": 0.14, + "grad_norm": 0.9411850571632385, + "learning_rate": 9.672726252437368e-06, + "loss": 0.6252, + "step": 2247 + }, + { + "epoch": 0.14, + "grad_norm": 0.8734287023544312, + "learning_rate": 9.67236106004741e-06, + "loss": 0.6179, + "step": 2248 + }, + { + "epoch": 0.14, + "grad_norm": 0.8806835412979126, + "learning_rate": 9.671995670920582e-06, + "loss": 0.5995, + "step": 2249 + }, + { + "epoch": 0.14, + "grad_norm": 0.9245673418045044, + "learning_rate": 9.671630085072268e-06, + "loss": 0.6239, + "step": 2250 + }, + { + "epoch": 0.14, + "grad_norm": 0.941852331161499, + "learning_rate": 9.671264302517864e-06, + "loss": 0.656, + "step": 2251 + }, + { + "epoch": 0.14, + "grad_norm": 0.8769700527191162, + "learning_rate": 9.67089832327277e-06, + "loss": 0.6233, + "step": 2252 + }, + { + "epoch": 0.14, + "grad_norm": 0.9232833385467529, + "learning_rate": 9.670532147352399e-06, + "loss": 0.6819, + "step": 2253 + }, + { + "epoch": 0.14, + "grad_norm": 0.9096298813819885, + "learning_rate": 9.670165774772164e-06, + "loss": 0.6606, + "step": 2254 + }, + { + "epoch": 0.14, + "grad_norm": 0.8869082927703857, + "learning_rate": 9.669799205547494e-06, + "loss": 0.6617, + "step": 2255 + }, + { + "epoch": 0.14, + "grad_norm": 0.8901436924934387, + "learning_rate": 9.669432439693827e-06, + "loss": 0.6609, + "step": 2256 + }, + { + "epoch": 0.14, + "grad_norm": 0.9619342088699341, + "learning_rate": 9.669065477226602e-06, + "loss": 0.6758, + "step": 2257 + }, + { + "epoch": 0.14, + "grad_norm": 0.9121052026748657, + "learning_rate": 9.668698318161271e-06, + "loss": 0.6162, + "step": 2258 + }, + { + "epoch": 0.14, + "grad_norm": 0.9086534380912781, + "learning_rate": 9.668330962513297e-06, + "loss": 0.6098, + "step": 2259 + }, + { + "epoch": 0.14, + "grad_norm": 1.0334198474884033, + "learning_rate": 9.667963410298147e-06, + "loss": 0.6498, + "step": 2260 + }, + { + "epoch": 0.14, + "grad_norm": 0.942879319190979, + "learning_rate": 9.667595661531294e-06, + "loss": 0.6464, + "step": 2261 + }, + { + "epoch": 0.14, + "grad_norm": 0.8824305534362793, + "learning_rate": 9.667227716228228e-06, + "loss": 0.6543, + "step": 2262 + }, + { + "epoch": 0.14, + "grad_norm": 0.8903138041496277, + "learning_rate": 9.666859574404434e-06, + "loss": 0.6208, + "step": 2263 + }, + { + "epoch": 0.14, + "grad_norm": 0.9194402694702148, + "learning_rate": 9.666491236075423e-06, + "loss": 0.6187, + "step": 2264 + }, + { + "epoch": 0.14, + "grad_norm": 0.9723901152610779, + "learning_rate": 9.666122701256697e-06, + "loss": 0.6729, + "step": 2265 + }, + { + "epoch": 0.14, + "grad_norm": 0.9405593276023865, + "learning_rate": 9.665753969963779e-06, + "loss": 0.6383, + "step": 2266 + }, + { + "epoch": 0.14, + "grad_norm": 0.9103307127952576, + "learning_rate": 9.66538504221219e-06, + "loss": 0.6709, + "step": 2267 + }, + { + "epoch": 0.14, + "grad_norm": 0.8941056132316589, + "learning_rate": 9.665015918017467e-06, + "loss": 0.6022, + "step": 2268 + }, + { + "epoch": 0.14, + "grad_norm": 0.9082260727882385, + "learning_rate": 9.664646597395151e-06, + "loss": 0.6307, + "step": 2269 + }, + { + "epoch": 0.14, + "grad_norm": 0.9310553073883057, + "learning_rate": 9.664277080360796e-06, + "loss": 0.6667, + "step": 2270 + }, + { + "epoch": 0.14, + "grad_norm": 0.893653154373169, + "learning_rate": 9.663907366929958e-06, + "loss": 0.6233, + "step": 2271 + }, + { + "epoch": 0.14, + "grad_norm": 0.9378598928451538, + "learning_rate": 9.663537457118206e-06, + "loss": 0.6922, + "step": 2272 + }, + { + "epoch": 0.14, + "grad_norm": 1.0078368186950684, + "learning_rate": 9.663167350941114e-06, + "loss": 0.7198, + "step": 2273 + }, + { + "epoch": 0.14, + "grad_norm": 0.9074714183807373, + "learning_rate": 9.662797048414267e-06, + "loss": 0.6979, + "step": 2274 + }, + { + "epoch": 0.14, + "grad_norm": 0.9271409511566162, + "learning_rate": 9.662426549553257e-06, + "loss": 0.6478, + "step": 2275 + }, + { + "epoch": 0.14, + "grad_norm": 0.8915387392044067, + "learning_rate": 9.662055854373684e-06, + "loss": 0.6721, + "step": 2276 + }, + { + "epoch": 0.14, + "grad_norm": 0.8576652407646179, + "learning_rate": 9.661684962891158e-06, + "loss": 0.6245, + "step": 2277 + }, + { + "epoch": 0.14, + "grad_norm": 0.8375203013420105, + "learning_rate": 9.661313875121294e-06, + "loss": 0.5757, + "step": 2278 + }, + { + "epoch": 0.14, + "grad_norm": 0.9374811053276062, + "learning_rate": 9.66094259107972e-06, + "loss": 0.673, + "step": 2279 + }, + { + "epoch": 0.14, + "grad_norm": 0.9571980834007263, + "learning_rate": 9.660571110782066e-06, + "loss": 0.6396, + "step": 2280 + }, + { + "epoch": 0.14, + "grad_norm": 0.9160385131835938, + "learning_rate": 9.660199434243977e-06, + "loss": 0.6462, + "step": 2281 + }, + { + "epoch": 0.14, + "grad_norm": 0.8740729689598083, + "learning_rate": 9.6598275614811e-06, + "loss": 0.6305, + "step": 2282 + }, + { + "epoch": 0.14, + "grad_norm": 0.9242905974388123, + "learning_rate": 9.659455492509096e-06, + "loss": 0.6148, + "step": 2283 + }, + { + "epoch": 0.14, + "grad_norm": 0.9448089599609375, + "learning_rate": 9.659083227343628e-06, + "loss": 0.6224, + "step": 2284 + }, + { + "epoch": 0.14, + "grad_norm": 0.9231502413749695, + "learning_rate": 9.658710766000375e-06, + "loss": 0.6241, + "step": 2285 + }, + { + "epoch": 0.14, + "grad_norm": 0.9591917991638184, + "learning_rate": 9.658338108495018e-06, + "loss": 0.6087, + "step": 2286 + }, + { + "epoch": 0.14, + "grad_norm": 0.9254891276359558, + "learning_rate": 9.65796525484325e-06, + "loss": 0.6346, + "step": 2287 + }, + { + "epoch": 0.14, + "grad_norm": 0.9530578255653381, + "learning_rate": 9.657592205060766e-06, + "loss": 0.6213, + "step": 2288 + }, + { + "epoch": 0.15, + "grad_norm": 0.9184418320655823, + "learning_rate": 9.657218959163278e-06, + "loss": 0.5876, + "step": 2289 + }, + { + "epoch": 0.15, + "grad_norm": 0.9244976043701172, + "learning_rate": 9.656845517166502e-06, + "loss": 0.6017, + "step": 2290 + }, + { + "epoch": 0.15, + "grad_norm": 0.9175297617912292, + "learning_rate": 9.656471879086158e-06, + "loss": 0.6249, + "step": 2291 + }, + { + "epoch": 0.15, + "grad_norm": 0.931868314743042, + "learning_rate": 9.656098044937985e-06, + "loss": 0.6413, + "step": 2292 + }, + { + "epoch": 0.15, + "grad_norm": 0.9301477074623108, + "learning_rate": 9.65572401473772e-06, + "loss": 0.6678, + "step": 2293 + }, + { + "epoch": 0.15, + "grad_norm": 0.8930208086967468, + "learning_rate": 9.655349788501112e-06, + "loss": 0.6502, + "step": 2294 + }, + { + "epoch": 0.15, + "grad_norm": 0.9450199007987976, + "learning_rate": 9.654975366243919e-06, + "loss": 0.622, + "step": 2295 + }, + { + "epoch": 0.15, + "grad_norm": 0.8430439829826355, + "learning_rate": 9.654600747981908e-06, + "loss": 0.5621, + "step": 2296 + }, + { + "epoch": 0.15, + "grad_norm": 0.9476586580276489, + "learning_rate": 9.654225933730852e-06, + "loss": 0.6427, + "step": 2297 + }, + { + "epoch": 0.15, + "grad_norm": 0.8823800086975098, + "learning_rate": 9.653850923506532e-06, + "loss": 0.6457, + "step": 2298 + }, + { + "epoch": 0.15, + "grad_norm": 0.8883811235427856, + "learning_rate": 9.653475717324739e-06, + "loss": 0.6332, + "step": 2299 + }, + { + "epoch": 0.15, + "grad_norm": 0.8883042335510254, + "learning_rate": 9.65310031520127e-06, + "loss": 0.6744, + "step": 2300 + }, + { + "epoch": 0.15, + "grad_norm": 0.9382773041725159, + "learning_rate": 9.652724717151938e-06, + "loss": 0.6717, + "step": 2301 + }, + { + "epoch": 0.15, + "grad_norm": 0.9416858553886414, + "learning_rate": 9.652348923192551e-06, + "loss": 0.648, + "step": 2302 + }, + { + "epoch": 0.15, + "grad_norm": 0.8762007355690002, + "learning_rate": 9.651972933338935e-06, + "loss": 0.5897, + "step": 2303 + }, + { + "epoch": 0.15, + "grad_norm": 0.9719755053520203, + "learning_rate": 9.651596747606924e-06, + "loss": 0.6991, + "step": 2304 + }, + { + "epoch": 0.15, + "grad_norm": 0.9252588152885437, + "learning_rate": 9.651220366012354e-06, + "loss": 0.6186, + "step": 2305 + }, + { + "epoch": 0.15, + "grad_norm": 0.9560814499855042, + "learning_rate": 9.650843788571076e-06, + "loss": 0.6411, + "step": 2306 + }, + { + "epoch": 0.15, + "grad_norm": 1.036543369293213, + "learning_rate": 9.650467015298943e-06, + "loss": 0.6339, + "step": 2307 + }, + { + "epoch": 0.15, + "grad_norm": 0.9324323534965515, + "learning_rate": 9.650090046211822e-06, + "loss": 0.6649, + "step": 2308 + }, + { + "epoch": 0.15, + "grad_norm": 0.8707371950149536, + "learning_rate": 9.649712881325587e-06, + "loss": 0.5718, + "step": 2309 + }, + { + "epoch": 0.15, + "grad_norm": 0.9522401690483093, + "learning_rate": 9.649335520656118e-06, + "loss": 0.6915, + "step": 2310 + }, + { + "epoch": 0.15, + "grad_norm": 0.9509444236755371, + "learning_rate": 9.648957964219303e-06, + "loss": 0.6725, + "step": 2311 + }, + { + "epoch": 0.15, + "grad_norm": 0.9052115678787231, + "learning_rate": 9.64858021203104e-06, + "loss": 0.6543, + "step": 2312 + }, + { + "epoch": 0.15, + "grad_norm": 0.914665162563324, + "learning_rate": 9.648202264107239e-06, + "loss": 0.6265, + "step": 2313 + }, + { + "epoch": 0.15, + "grad_norm": 0.895332396030426, + "learning_rate": 9.647824120463806e-06, + "loss": 0.6248, + "step": 2314 + }, + { + "epoch": 0.15, + "grad_norm": 0.9358121752738953, + "learning_rate": 9.64744578111667e-06, + "loss": 0.5782, + "step": 2315 + }, + { + "epoch": 0.15, + "grad_norm": 0.9630364179611206, + "learning_rate": 9.647067246081761e-06, + "loss": 0.6326, + "step": 2316 + }, + { + "epoch": 0.15, + "grad_norm": 0.9551122784614563, + "learning_rate": 9.646688515375014e-06, + "loss": 0.6224, + "step": 2317 + }, + { + "epoch": 0.15, + "grad_norm": 0.9448221325874329, + "learning_rate": 9.646309589012379e-06, + "loss": 0.6124, + "step": 2318 + }, + { + "epoch": 0.15, + "grad_norm": 0.8628481030464172, + "learning_rate": 9.64593046700981e-06, + "loss": 0.5868, + "step": 2319 + }, + { + "epoch": 0.15, + "grad_norm": 0.9186686873435974, + "learning_rate": 9.645551149383272e-06, + "loss": 0.6142, + "step": 2320 + }, + { + "epoch": 0.15, + "grad_norm": 0.8454536199569702, + "learning_rate": 9.645171636148736e-06, + "loss": 0.5603, + "step": 2321 + }, + { + "epoch": 0.15, + "grad_norm": 0.904983639717102, + "learning_rate": 9.644791927322182e-06, + "loss": 0.6052, + "step": 2322 + }, + { + "epoch": 0.15, + "grad_norm": 0.9742248058319092, + "learning_rate": 9.644412022919597e-06, + "loss": 0.5941, + "step": 2323 + }, + { + "epoch": 0.15, + "grad_norm": 0.8749731183052063, + "learning_rate": 9.644031922956979e-06, + "loss": 0.625, + "step": 2324 + }, + { + "epoch": 0.15, + "grad_norm": 0.9458450078964233, + "learning_rate": 9.64365162745033e-06, + "loss": 0.6475, + "step": 2325 + }, + { + "epoch": 0.15, + "grad_norm": 0.8835443258285522, + "learning_rate": 9.643271136415668e-06, + "loss": 0.6253, + "step": 2326 + }, + { + "epoch": 0.15, + "grad_norm": 0.9501144886016846, + "learning_rate": 9.642890449869008e-06, + "loss": 0.6576, + "step": 2327 + }, + { + "epoch": 0.15, + "grad_norm": 0.8323443531990051, + "learning_rate": 9.642509567826386e-06, + "loss": 0.6001, + "step": 2328 + }, + { + "epoch": 0.15, + "grad_norm": 0.8868235945701599, + "learning_rate": 9.642128490303834e-06, + "loss": 0.5987, + "step": 2329 + }, + { + "epoch": 0.15, + "grad_norm": 0.8260801434516907, + "learning_rate": 9.6417472173174e-06, + "loss": 0.6312, + "step": 2330 + }, + { + "epoch": 0.15, + "grad_norm": 0.8221123218536377, + "learning_rate": 9.64136574888314e-06, + "loss": 0.6371, + "step": 2331 + }, + { + "epoch": 0.15, + "grad_norm": 0.911744236946106, + "learning_rate": 9.640984085017113e-06, + "loss": 0.6679, + "step": 2332 + }, + { + "epoch": 0.15, + "grad_norm": 0.8895740509033203, + "learning_rate": 9.640602225735391e-06, + "loss": 0.6627, + "step": 2333 + }, + { + "epoch": 0.15, + "grad_norm": 0.8667907118797302, + "learning_rate": 9.640220171054054e-06, + "loss": 0.6181, + "step": 2334 + }, + { + "epoch": 0.15, + "grad_norm": 0.9176861643791199, + "learning_rate": 9.639837920989188e-06, + "loss": 0.6174, + "step": 2335 + }, + { + "epoch": 0.15, + "grad_norm": 1.0207765102386475, + "learning_rate": 9.639455475556887e-06, + "loss": 0.6571, + "step": 2336 + }, + { + "epoch": 0.15, + "grad_norm": 0.9681141972541809, + "learning_rate": 9.639072834773254e-06, + "loss": 0.6719, + "step": 2337 + }, + { + "epoch": 0.15, + "grad_norm": 0.8755819797515869, + "learning_rate": 9.638689998654404e-06, + "loss": 0.5911, + "step": 2338 + }, + { + "epoch": 0.15, + "grad_norm": 0.9221803545951843, + "learning_rate": 9.638306967216453e-06, + "loss": 0.6486, + "step": 2339 + }, + { + "epoch": 0.15, + "grad_norm": 0.8622904419898987, + "learning_rate": 9.637923740475534e-06, + "loss": 0.5772, + "step": 2340 + }, + { + "epoch": 0.15, + "grad_norm": 0.888806164264679, + "learning_rate": 9.637540318447778e-06, + "loss": 0.6504, + "step": 2341 + }, + { + "epoch": 0.15, + "grad_norm": 0.8896088004112244, + "learning_rate": 9.637156701149333e-06, + "loss": 0.6623, + "step": 2342 + }, + { + "epoch": 0.15, + "grad_norm": 0.9848870635032654, + "learning_rate": 9.636772888596352e-06, + "loss": 0.6652, + "step": 2343 + }, + { + "epoch": 0.15, + "grad_norm": 0.814385712146759, + "learning_rate": 9.636388880804991e-06, + "loss": 0.6293, + "step": 2344 + }, + { + "epoch": 0.15, + "grad_norm": 0.9577558040618896, + "learning_rate": 9.636004677791427e-06, + "loss": 0.6725, + "step": 2345 + }, + { + "epoch": 0.15, + "grad_norm": 0.9663403630256653, + "learning_rate": 9.635620279571833e-06, + "loss": 0.6702, + "step": 2346 + }, + { + "epoch": 0.15, + "grad_norm": 0.9322980642318726, + "learning_rate": 9.635235686162395e-06, + "loss": 0.6654, + "step": 2347 + }, + { + "epoch": 0.15, + "grad_norm": 0.8965892791748047, + "learning_rate": 9.634850897579304e-06, + "loss": 0.6208, + "step": 2348 + }, + { + "epoch": 0.15, + "grad_norm": 0.858284592628479, + "learning_rate": 9.63446591383877e-06, + "loss": 0.6063, + "step": 2349 + }, + { + "epoch": 0.15, + "grad_norm": 0.932563304901123, + "learning_rate": 9.634080734956993e-06, + "loss": 0.6188, + "step": 2350 + }, + { + "epoch": 0.15, + "grad_norm": 0.9433985948562622, + "learning_rate": 9.633695360950202e-06, + "loss": 0.6515, + "step": 2351 + }, + { + "epoch": 0.15, + "grad_norm": 0.9088814854621887, + "learning_rate": 9.633309791834617e-06, + "loss": 0.5985, + "step": 2352 + }, + { + "epoch": 0.15, + "grad_norm": 0.9924407601356506, + "learning_rate": 9.632924027626474e-06, + "loss": 0.6527, + "step": 2353 + }, + { + "epoch": 0.15, + "grad_norm": 0.989184558391571, + "learning_rate": 9.632538068342018e-06, + "loss": 0.6183, + "step": 2354 + }, + { + "epoch": 0.15, + "grad_norm": 0.898025631904602, + "learning_rate": 9.632151913997498e-06, + "loss": 0.6068, + "step": 2355 + }, + { + "epoch": 0.15, + "grad_norm": 0.8926374912261963, + "learning_rate": 9.631765564609177e-06, + "loss": 0.588, + "step": 2356 + }, + { + "epoch": 0.15, + "grad_norm": 0.9426562190055847, + "learning_rate": 9.63137902019332e-06, + "loss": 0.6104, + "step": 2357 + }, + { + "epoch": 0.15, + "grad_norm": 0.9089484810829163, + "learning_rate": 9.630992280766202e-06, + "loss": 0.5981, + "step": 2358 + }, + { + "epoch": 0.15, + "grad_norm": 0.9309037923812866, + "learning_rate": 9.630605346344113e-06, + "loss": 0.6064, + "step": 2359 + }, + { + "epoch": 0.15, + "grad_norm": 0.9744449257850647, + "learning_rate": 9.630218216943338e-06, + "loss": 0.6856, + "step": 2360 + }, + { + "epoch": 0.15, + "grad_norm": 0.9766737222671509, + "learning_rate": 9.629830892580183e-06, + "loss": 0.669, + "step": 2361 + }, + { + "epoch": 0.15, + "grad_norm": 0.9922558665275574, + "learning_rate": 9.629443373270954e-06, + "loss": 0.631, + "step": 2362 + }, + { + "epoch": 0.15, + "grad_norm": 0.960340678691864, + "learning_rate": 9.62905565903197e-06, + "loss": 0.653, + "step": 2363 + }, + { + "epoch": 0.15, + "grad_norm": 0.9167748689651489, + "learning_rate": 9.628667749879555e-06, + "loss": 0.6177, + "step": 2364 + }, + { + "epoch": 0.15, + "grad_norm": 0.8925089836120605, + "learning_rate": 9.628279645830044e-06, + "loss": 0.6215, + "step": 2365 + }, + { + "epoch": 0.15, + "grad_norm": 0.9379563331604004, + "learning_rate": 9.627891346899775e-06, + "loss": 0.5828, + "step": 2366 + }, + { + "epoch": 0.15, + "grad_norm": 0.8987218141555786, + "learning_rate": 9.627502853105104e-06, + "loss": 0.6567, + "step": 2367 + }, + { + "epoch": 0.15, + "grad_norm": 0.8803840279579163, + "learning_rate": 9.627114164462385e-06, + "loss": 0.6219, + "step": 2368 + }, + { + "epoch": 0.15, + "grad_norm": 0.9460154175758362, + "learning_rate": 9.626725280987985e-06, + "loss": 0.6922, + "step": 2369 + }, + { + "epoch": 0.15, + "grad_norm": 0.8633837103843689, + "learning_rate": 9.626336202698277e-06, + "loss": 0.6041, + "step": 2370 + }, + { + "epoch": 0.15, + "grad_norm": 0.9062354564666748, + "learning_rate": 9.625946929609647e-06, + "loss": 0.6013, + "step": 2371 + }, + { + "epoch": 0.15, + "grad_norm": 1.0080102682113647, + "learning_rate": 9.625557461738484e-06, + "loss": 0.6919, + "step": 2372 + }, + { + "epoch": 0.15, + "grad_norm": 0.9922934174537659, + "learning_rate": 9.625167799101188e-06, + "loss": 0.6966, + "step": 2373 + }, + { + "epoch": 0.15, + "grad_norm": 0.9306240081787109, + "learning_rate": 9.624777941714165e-06, + "loss": 0.7226, + "step": 2374 + }, + { + "epoch": 0.15, + "grad_norm": 0.9547491073608398, + "learning_rate": 9.624387889593832e-06, + "loss": 0.6127, + "step": 2375 + }, + { + "epoch": 0.15, + "grad_norm": 0.9361152052879333, + "learning_rate": 9.62399764275661e-06, + "loss": 0.6275, + "step": 2376 + }, + { + "epoch": 0.15, + "grad_norm": 0.9301709532737732, + "learning_rate": 9.623607201218934e-06, + "loss": 0.6553, + "step": 2377 + }, + { + "epoch": 0.15, + "grad_norm": 0.9561883807182312, + "learning_rate": 9.623216564997244e-06, + "loss": 0.6708, + "step": 2378 + }, + { + "epoch": 0.15, + "grad_norm": 0.8827099800109863, + "learning_rate": 9.622825734107987e-06, + "loss": 0.6176, + "step": 2379 + }, + { + "epoch": 0.15, + "grad_norm": 0.9545076489448547, + "learning_rate": 9.62243470856762e-06, + "loss": 0.6568, + "step": 2380 + }, + { + "epoch": 0.15, + "grad_norm": 0.947793185710907, + "learning_rate": 9.622043488392607e-06, + "loss": 0.6247, + "step": 2381 + }, + { + "epoch": 0.15, + "grad_norm": 0.8860893249511719, + "learning_rate": 9.621652073599423e-06, + "loss": 0.6495, + "step": 2382 + }, + { + "epoch": 0.15, + "grad_norm": 0.852778434753418, + "learning_rate": 9.621260464204548e-06, + "loss": 0.6111, + "step": 2383 + }, + { + "epoch": 0.15, + "grad_norm": 0.8790839910507202, + "learning_rate": 9.620868660224468e-06, + "loss": 0.6269, + "step": 2384 + }, + { + "epoch": 0.15, + "grad_norm": 0.9253284931182861, + "learning_rate": 9.620476661675685e-06, + "loss": 0.6211, + "step": 2385 + }, + { + "epoch": 0.15, + "grad_norm": 0.892335832118988, + "learning_rate": 9.620084468574704e-06, + "loss": 0.6312, + "step": 2386 + }, + { + "epoch": 0.15, + "grad_norm": 0.9835995435714722, + "learning_rate": 9.619692080938039e-06, + "loss": 0.5984, + "step": 2387 + }, + { + "epoch": 0.15, + "grad_norm": 0.9870280027389526, + "learning_rate": 9.61929949878221e-06, + "loss": 0.6646, + "step": 2388 + }, + { + "epoch": 0.15, + "grad_norm": 1.0109413862228394, + "learning_rate": 9.618906722123748e-06, + "loss": 0.6489, + "step": 2389 + }, + { + "epoch": 0.15, + "grad_norm": 0.9506871700286865, + "learning_rate": 9.618513750979193e-06, + "loss": 0.649, + "step": 2390 + }, + { + "epoch": 0.15, + "grad_norm": 0.8704227209091187, + "learning_rate": 9.61812058536509e-06, + "loss": 0.5762, + "step": 2391 + }, + { + "epoch": 0.15, + "grad_norm": 0.9024654626846313, + "learning_rate": 9.617727225297994e-06, + "loss": 0.6464, + "step": 2392 + }, + { + "epoch": 0.15, + "grad_norm": 0.9265242218971252, + "learning_rate": 9.617333670794468e-06, + "loss": 0.627, + "step": 2393 + }, + { + "epoch": 0.15, + "grad_norm": 0.8859432935714722, + "learning_rate": 9.616939921871087e-06, + "loss": 0.6211, + "step": 2394 + }, + { + "epoch": 0.15, + "grad_norm": 0.9842885732650757, + "learning_rate": 9.616545978544424e-06, + "loss": 0.6308, + "step": 2395 + }, + { + "epoch": 0.15, + "grad_norm": 0.8890007138252258, + "learning_rate": 9.616151840831069e-06, + "loss": 0.5769, + "step": 2396 + }, + { + "epoch": 0.15, + "grad_norm": 0.9050889015197754, + "learning_rate": 9.61575750874762e-06, + "loss": 0.6224, + "step": 2397 + }, + { + "epoch": 0.15, + "grad_norm": 0.8961501717567444, + "learning_rate": 9.615362982310679e-06, + "loss": 0.5271, + "step": 2398 + }, + { + "epoch": 0.15, + "grad_norm": 0.8966047167778015, + "learning_rate": 9.614968261536858e-06, + "loss": 0.6134, + "step": 2399 + }, + { + "epoch": 0.15, + "grad_norm": 1.0056560039520264, + "learning_rate": 9.61457334644278e-06, + "loss": 0.6931, + "step": 2400 + }, + { + "epoch": 0.15, + "grad_norm": 0.9624162316322327, + "learning_rate": 9.61417823704507e-06, + "loss": 0.6242, + "step": 2401 + }, + { + "epoch": 0.15, + "grad_norm": 0.9640290141105652, + "learning_rate": 9.613782933360365e-06, + "loss": 0.6799, + "step": 2402 + }, + { + "epoch": 0.15, + "grad_norm": 0.9172433018684387, + "learning_rate": 9.613387435405312e-06, + "loss": 0.5416, + "step": 2403 + }, + { + "epoch": 0.15, + "grad_norm": 0.965398371219635, + "learning_rate": 9.612991743196562e-06, + "loss": 0.6174, + "step": 2404 + }, + { + "epoch": 0.15, + "grad_norm": 0.909716010093689, + "learning_rate": 9.612595856750776e-06, + "loss": 0.6275, + "step": 2405 + }, + { + "epoch": 0.15, + "grad_norm": 0.9636967182159424, + "learning_rate": 9.612199776084627e-06, + "loss": 0.6389, + "step": 2406 + }, + { + "epoch": 0.15, + "grad_norm": 0.8924964070320129, + "learning_rate": 9.611803501214789e-06, + "loss": 0.6796, + "step": 2407 + }, + { + "epoch": 0.15, + "grad_norm": 0.9327677488327026, + "learning_rate": 9.61140703215795e-06, + "loss": 0.612, + "step": 2408 + }, + { + "epoch": 0.15, + "grad_norm": 0.943336546421051, + "learning_rate": 9.611010368930801e-06, + "loss": 0.6227, + "step": 2409 + }, + { + "epoch": 0.15, + "grad_norm": 0.9563452005386353, + "learning_rate": 9.610613511550047e-06, + "loss": 0.6554, + "step": 2410 + }, + { + "epoch": 0.15, + "grad_norm": 0.9521295428276062, + "learning_rate": 9.610216460032398e-06, + "loss": 0.6661, + "step": 2411 + }, + { + "epoch": 0.15, + "grad_norm": 1.0174225568771362, + "learning_rate": 9.60981921439457e-06, + "loss": 0.716, + "step": 2412 + }, + { + "epoch": 0.15, + "grad_norm": 1.0105873346328735, + "learning_rate": 9.609421774653291e-06, + "loss": 0.6864, + "step": 2413 + }, + { + "epoch": 0.15, + "grad_norm": 0.9458989500999451, + "learning_rate": 9.609024140825299e-06, + "loss": 0.5787, + "step": 2414 + }, + { + "epoch": 0.15, + "grad_norm": 1.0147578716278076, + "learning_rate": 9.608626312927331e-06, + "loss": 0.6836, + "step": 2415 + }, + { + "epoch": 0.15, + "grad_norm": 0.9052198529243469, + "learning_rate": 9.608228290976143e-06, + "loss": 0.6575, + "step": 2416 + }, + { + "epoch": 0.15, + "grad_norm": 0.9532240629196167, + "learning_rate": 9.607830074988491e-06, + "loss": 0.7125, + "step": 2417 + }, + { + "epoch": 0.15, + "grad_norm": 0.9613702297210693, + "learning_rate": 9.607431664981144e-06, + "loss": 0.6158, + "step": 2418 + }, + { + "epoch": 0.15, + "grad_norm": 1.0011951923370361, + "learning_rate": 9.607033060970878e-06, + "loss": 0.6624, + "step": 2419 + }, + { + "epoch": 0.15, + "grad_norm": 1.0187532901763916, + "learning_rate": 9.606634262974477e-06, + "loss": 0.6558, + "step": 2420 + }, + { + "epoch": 0.15, + "grad_norm": 0.9332427382469177, + "learning_rate": 9.606235271008732e-06, + "loss": 0.5966, + "step": 2421 + }, + { + "epoch": 0.15, + "grad_norm": 1.0140283107757568, + "learning_rate": 9.605836085090445e-06, + "loss": 0.6317, + "step": 2422 + }, + { + "epoch": 0.15, + "grad_norm": 0.9521609544754028, + "learning_rate": 9.605436705236421e-06, + "loss": 0.624, + "step": 2423 + }, + { + "epoch": 0.15, + "grad_norm": 0.8743317127227783, + "learning_rate": 9.60503713146348e-06, + "loss": 0.6424, + "step": 2424 + }, + { + "epoch": 0.15, + "grad_norm": 0.9343128800392151, + "learning_rate": 9.604637363788444e-06, + "loss": 0.6336, + "step": 2425 + }, + { + "epoch": 0.15, + "grad_norm": 0.876990795135498, + "learning_rate": 9.604237402228149e-06, + "loss": 0.6946, + "step": 2426 + }, + { + "epoch": 0.15, + "grad_norm": 1.0633113384246826, + "learning_rate": 9.603837246799431e-06, + "loss": 0.6597, + "step": 2427 + }, + { + "epoch": 0.15, + "grad_norm": 0.9568866491317749, + "learning_rate": 9.603436897519145e-06, + "loss": 0.6364, + "step": 2428 + }, + { + "epoch": 0.15, + "grad_norm": 0.8877198100090027, + "learning_rate": 9.603036354404145e-06, + "loss": 0.6024, + "step": 2429 + }, + { + "epoch": 0.15, + "grad_norm": 0.844281792640686, + "learning_rate": 9.602635617471295e-06, + "loss": 0.6393, + "step": 2430 + }, + { + "epoch": 0.15, + "grad_norm": 0.8881232738494873, + "learning_rate": 9.602234686737473e-06, + "loss": 0.6738, + "step": 2431 + }, + { + "epoch": 0.15, + "grad_norm": 0.8689331412315369, + "learning_rate": 9.601833562219556e-06, + "loss": 0.6245, + "step": 2432 + }, + { + "epoch": 0.15, + "grad_norm": 0.8485287427902222, + "learning_rate": 9.601432243934437e-06, + "loss": 0.5738, + "step": 2433 + }, + { + "epoch": 0.15, + "grad_norm": 0.910656213760376, + "learning_rate": 9.601030731899014e-06, + "loss": 0.6129, + "step": 2434 + }, + { + "epoch": 0.15, + "grad_norm": 0.8227107524871826, + "learning_rate": 9.600629026130192e-06, + "loss": 0.5835, + "step": 2435 + }, + { + "epoch": 0.15, + "grad_norm": 0.948371410369873, + "learning_rate": 9.600227126644887e-06, + "loss": 0.7007, + "step": 2436 + }, + { + "epoch": 0.15, + "grad_norm": 0.8964093327522278, + "learning_rate": 9.59982503346002e-06, + "loss": 0.6071, + "step": 2437 + }, + { + "epoch": 0.15, + "grad_norm": 0.9090175628662109, + "learning_rate": 9.599422746592522e-06, + "loss": 0.6698, + "step": 2438 + }, + { + "epoch": 0.15, + "grad_norm": 1.0446149110794067, + "learning_rate": 9.599020266059334e-06, + "loss": 0.699, + "step": 2439 + }, + { + "epoch": 0.15, + "grad_norm": 0.8509514331817627, + "learning_rate": 9.5986175918774e-06, + "loss": 0.6503, + "step": 2440 + }, + { + "epoch": 0.15, + "grad_norm": 0.9461331367492676, + "learning_rate": 9.598214724063678e-06, + "loss": 0.6716, + "step": 2441 + }, + { + "epoch": 0.15, + "grad_norm": 0.8966230750083923, + "learning_rate": 9.597811662635128e-06, + "loss": 0.6537, + "step": 2442 + }, + { + "epoch": 0.15, + "grad_norm": 1.0068098306655884, + "learning_rate": 9.597408407608725e-06, + "loss": 0.6665, + "step": 2443 + }, + { + "epoch": 0.15, + "grad_norm": 0.9178805351257324, + "learning_rate": 9.597004959001447e-06, + "loss": 0.628, + "step": 2444 + }, + { + "epoch": 0.15, + "grad_norm": 0.9293497204780579, + "learning_rate": 9.596601316830282e-06, + "loss": 0.6272, + "step": 2445 + }, + { + "epoch": 0.15, + "grad_norm": 0.9563755989074707, + "learning_rate": 9.596197481112225e-06, + "loss": 0.6115, + "step": 2446 + }, + { + "epoch": 0.16, + "grad_norm": 0.8711754083633423, + "learning_rate": 9.59579345186428e-06, + "loss": 0.5987, + "step": 2447 + }, + { + "epoch": 0.16, + "grad_norm": 0.9303868412971497, + "learning_rate": 9.595389229103464e-06, + "loss": 0.6427, + "step": 2448 + }, + { + "epoch": 0.16, + "grad_norm": 0.8827221393585205, + "learning_rate": 9.594984812846792e-06, + "loss": 0.6017, + "step": 2449 + }, + { + "epoch": 0.16, + "grad_norm": 0.9278771877288818, + "learning_rate": 9.594580203111294e-06, + "loss": 0.5994, + "step": 2450 + }, + { + "epoch": 0.16, + "grad_norm": 0.9450991153717041, + "learning_rate": 9.594175399914008e-06, + "loss": 0.6128, + "step": 2451 + }, + { + "epoch": 0.16, + "grad_norm": 0.9174882173538208, + "learning_rate": 9.593770403271977e-06, + "loss": 0.661, + "step": 2452 + }, + { + "epoch": 0.16, + "grad_norm": 0.9412451982498169, + "learning_rate": 9.593365213202255e-06, + "loss": 0.6346, + "step": 2453 + }, + { + "epoch": 0.16, + "grad_norm": 0.8439229726791382, + "learning_rate": 9.592959829721903e-06, + "loss": 0.6032, + "step": 2454 + }, + { + "epoch": 0.16, + "grad_norm": 0.8956865072250366, + "learning_rate": 9.59255425284799e-06, + "loss": 0.6588, + "step": 2455 + }, + { + "epoch": 0.16, + "grad_norm": 0.8552918434143066, + "learning_rate": 9.592148482597595e-06, + "loss": 0.6176, + "step": 2456 + }, + { + "epoch": 0.16, + "grad_norm": 0.9776921272277832, + "learning_rate": 9.591742518987802e-06, + "loss": 0.6922, + "step": 2457 + }, + { + "epoch": 0.16, + "grad_norm": 0.8479081392288208, + "learning_rate": 9.591336362035703e-06, + "loss": 0.5635, + "step": 2458 + }, + { + "epoch": 0.16, + "grad_norm": 0.8601279854774475, + "learning_rate": 9.590930011758403e-06, + "loss": 0.6025, + "step": 2459 + }, + { + "epoch": 0.16, + "grad_norm": 0.9203231334686279, + "learning_rate": 9.590523468173011e-06, + "loss": 0.6317, + "step": 2460 + }, + { + "epoch": 0.16, + "grad_norm": 0.9199931621551514, + "learning_rate": 9.590116731296646e-06, + "loss": 0.578, + "step": 2461 + }, + { + "epoch": 0.16, + "grad_norm": 0.8737656474113464, + "learning_rate": 9.589709801146432e-06, + "loss": 0.6047, + "step": 2462 + }, + { + "epoch": 0.16, + "grad_norm": 0.8943954706192017, + "learning_rate": 9.589302677739506e-06, + "loss": 0.6203, + "step": 2463 + }, + { + "epoch": 0.16, + "grad_norm": 0.9137763381004333, + "learning_rate": 9.588895361093009e-06, + "loss": 0.6568, + "step": 2464 + }, + { + "epoch": 0.16, + "grad_norm": 0.9582598805427551, + "learning_rate": 9.588487851224091e-06, + "loss": 0.6377, + "step": 2465 + }, + { + "epoch": 0.16, + "grad_norm": 0.9316682815551758, + "learning_rate": 9.588080148149912e-06, + "loss": 0.6544, + "step": 2466 + }, + { + "epoch": 0.16, + "grad_norm": 1.0368373394012451, + "learning_rate": 9.587672251887639e-06, + "loss": 0.7225, + "step": 2467 + }, + { + "epoch": 0.16, + "grad_norm": 0.8449527621269226, + "learning_rate": 9.587264162454447e-06, + "loss": 0.5722, + "step": 2468 + }, + { + "epoch": 0.16, + "grad_norm": 0.8870164155960083, + "learning_rate": 9.586855879867519e-06, + "loss": 0.6279, + "step": 2469 + }, + { + "epoch": 0.16, + "grad_norm": 0.9462539553642273, + "learning_rate": 9.586447404144046e-06, + "loss": 0.6945, + "step": 2470 + }, + { + "epoch": 0.16, + "grad_norm": 0.9636325240135193, + "learning_rate": 9.58603873530123e-06, + "loss": 0.626, + "step": 2471 + }, + { + "epoch": 0.16, + "grad_norm": 0.8742256164550781, + "learning_rate": 9.585629873356273e-06, + "loss": 0.5091, + "step": 2472 + }, + { + "epoch": 0.16, + "grad_norm": 0.937807559967041, + "learning_rate": 9.585220818326395e-06, + "loss": 0.6507, + "step": 2473 + }, + { + "epoch": 0.16, + "grad_norm": 0.8809791207313538, + "learning_rate": 9.58481157022882e-06, + "loss": 0.6041, + "step": 2474 + }, + { + "epoch": 0.16, + "grad_norm": 0.9614810347557068, + "learning_rate": 9.584402129080779e-06, + "loss": 0.6466, + "step": 2475 + }, + { + "epoch": 0.16, + "grad_norm": 0.8808587789535522, + "learning_rate": 9.583992494899513e-06, + "loss": 0.6032, + "step": 2476 + }, + { + "epoch": 0.16, + "grad_norm": 0.9078788161277771, + "learning_rate": 9.583582667702269e-06, + "loss": 0.6371, + "step": 2477 + }, + { + "epoch": 0.16, + "grad_norm": 0.8558230996131897, + "learning_rate": 9.583172647506305e-06, + "loss": 0.6056, + "step": 2478 + }, + { + "epoch": 0.16, + "grad_norm": 0.8734446167945862, + "learning_rate": 9.582762434328883e-06, + "loss": 0.6081, + "step": 2479 + }, + { + "epoch": 0.16, + "grad_norm": 0.8628250360488892, + "learning_rate": 9.582352028187278e-06, + "loss": 0.665, + "step": 2480 + }, + { + "epoch": 0.16, + "grad_norm": 0.8482995629310608, + "learning_rate": 9.581941429098769e-06, + "loss": 0.5588, + "step": 2481 + }, + { + "epoch": 0.16, + "grad_norm": 0.9192953109741211, + "learning_rate": 9.581530637080647e-06, + "loss": 0.6463, + "step": 2482 + }, + { + "epoch": 0.16, + "grad_norm": 0.9629647135734558, + "learning_rate": 9.581119652150208e-06, + "loss": 0.6296, + "step": 2483 + }, + { + "epoch": 0.16, + "grad_norm": 0.9503898620605469, + "learning_rate": 9.580708474324755e-06, + "loss": 0.6034, + "step": 2484 + }, + { + "epoch": 0.16, + "grad_norm": 0.8851401209831238, + "learning_rate": 9.580297103621605e-06, + "loss": 0.6284, + "step": 2485 + }, + { + "epoch": 0.16, + "grad_norm": 0.9362215399742126, + "learning_rate": 9.579885540058079e-06, + "loss": 0.6451, + "step": 2486 + }, + { + "epoch": 0.16, + "grad_norm": 0.8985670804977417, + "learning_rate": 9.579473783651503e-06, + "loss": 0.6017, + "step": 2487 + }, + { + "epoch": 0.16, + "grad_norm": 0.977086067199707, + "learning_rate": 9.579061834419217e-06, + "loss": 0.6823, + "step": 2488 + }, + { + "epoch": 0.16, + "grad_norm": 0.9364843368530273, + "learning_rate": 9.578649692378567e-06, + "loss": 0.706, + "step": 2489 + }, + { + "epoch": 0.16, + "grad_norm": 0.9483008980751038, + "learning_rate": 9.578237357546907e-06, + "loss": 0.6172, + "step": 2490 + }, + { + "epoch": 0.16, + "grad_norm": 0.9181289672851562, + "learning_rate": 9.577824829941597e-06, + "loss": 0.6565, + "step": 2491 + }, + { + "epoch": 0.16, + "grad_norm": 0.9168728590011597, + "learning_rate": 9.577412109580009e-06, + "loss": 0.6018, + "step": 2492 + }, + { + "epoch": 0.16, + "grad_norm": 0.9681271910667419, + "learning_rate": 9.57699919647952e-06, + "loss": 0.6707, + "step": 2493 + }, + { + "epoch": 0.16, + "grad_norm": 1.0229047536849976, + "learning_rate": 9.576586090657519e-06, + "loss": 0.6503, + "step": 2494 + }, + { + "epoch": 0.16, + "grad_norm": 0.9658745527267456, + "learning_rate": 9.576172792131397e-06, + "loss": 0.6538, + "step": 2495 + }, + { + "epoch": 0.16, + "grad_norm": 0.9022778272628784, + "learning_rate": 9.57575930091856e-06, + "loss": 0.6287, + "step": 2496 + }, + { + "epoch": 0.16, + "grad_norm": 0.9274746179580688, + "learning_rate": 9.575345617036413e-06, + "loss": 0.659, + "step": 2497 + }, + { + "epoch": 0.16, + "grad_norm": 0.8899304270744324, + "learning_rate": 9.574931740502383e-06, + "loss": 0.6294, + "step": 2498 + }, + { + "epoch": 0.16, + "grad_norm": 1.072940468788147, + "learning_rate": 9.57451767133389e-06, + "loss": 0.6603, + "step": 2499 + }, + { + "epoch": 0.16, + "grad_norm": 0.8845842480659485, + "learning_rate": 9.57410340954837e-06, + "loss": 0.6408, + "step": 2500 + }, + { + "epoch": 0.16, + "grad_norm": 0.8758795857429504, + "learning_rate": 9.57368895516327e-06, + "loss": 0.6419, + "step": 2501 + }, + { + "epoch": 0.16, + "grad_norm": 0.9652571082115173, + "learning_rate": 9.573274308196037e-06, + "loss": 0.6189, + "step": 2502 + }, + { + "epoch": 0.16, + "grad_norm": 0.8658424615859985, + "learning_rate": 9.572859468664133e-06, + "loss": 0.5963, + "step": 2503 + }, + { + "epoch": 0.16, + "grad_norm": 0.9083049893379211, + "learning_rate": 9.572444436585025e-06, + "loss": 0.6744, + "step": 2504 + }, + { + "epoch": 0.16, + "grad_norm": 0.8568194508552551, + "learning_rate": 9.572029211976189e-06, + "loss": 0.6413, + "step": 2505 + }, + { + "epoch": 0.16, + "grad_norm": 0.8805359601974487, + "learning_rate": 9.571613794855105e-06, + "loss": 0.6408, + "step": 2506 + }, + { + "epoch": 0.16, + "grad_norm": 0.9113273620605469, + "learning_rate": 9.57119818523927e-06, + "loss": 0.6041, + "step": 2507 + }, + { + "epoch": 0.16, + "grad_norm": 0.925477147102356, + "learning_rate": 9.570782383146183e-06, + "loss": 0.6571, + "step": 2508 + }, + { + "epoch": 0.16, + "grad_norm": 1.012748122215271, + "learning_rate": 9.570366388593347e-06, + "loss": 0.6822, + "step": 2509 + }, + { + "epoch": 0.16, + "grad_norm": 1.0008292198181152, + "learning_rate": 9.569950201598283e-06, + "loss": 0.6183, + "step": 2510 + }, + { + "epoch": 0.16, + "grad_norm": 0.8939400911331177, + "learning_rate": 9.569533822178513e-06, + "loss": 0.6556, + "step": 2511 + }, + { + "epoch": 0.16, + "grad_norm": 0.8361603021621704, + "learning_rate": 9.569117250351571e-06, + "loss": 0.6179, + "step": 2512 + }, + { + "epoch": 0.16, + "grad_norm": 0.9382283687591553, + "learning_rate": 9.568700486134996e-06, + "loss": 0.6307, + "step": 2513 + }, + { + "epoch": 0.16, + "grad_norm": 0.9003825783729553, + "learning_rate": 9.568283529546336e-06, + "loss": 0.5918, + "step": 2514 + }, + { + "epoch": 0.16, + "grad_norm": 0.9097765684127808, + "learning_rate": 9.56786638060315e-06, + "loss": 0.6467, + "step": 2515 + }, + { + "epoch": 0.16, + "grad_norm": 0.938727080821991, + "learning_rate": 9.567449039323e-06, + "loss": 0.6822, + "step": 2516 + }, + { + "epoch": 0.16, + "grad_norm": 0.8862230181694031, + "learning_rate": 9.56703150572346e-06, + "loss": 0.6319, + "step": 2517 + }, + { + "epoch": 0.16, + "grad_norm": 0.8898985981941223, + "learning_rate": 9.56661377982211e-06, + "loss": 0.6129, + "step": 2518 + }, + { + "epoch": 0.16, + "grad_norm": 0.9016578197479248, + "learning_rate": 9.566195861636542e-06, + "loss": 0.668, + "step": 2519 + }, + { + "epoch": 0.16, + "grad_norm": 0.8894520401954651, + "learning_rate": 9.56577775118435e-06, + "loss": 0.6323, + "step": 2520 + }, + { + "epoch": 0.16, + "grad_norm": 0.9632962346076965, + "learning_rate": 9.56535944848314e-06, + "loss": 0.7104, + "step": 2521 + }, + { + "epoch": 0.16, + "grad_norm": 0.8559346199035645, + "learning_rate": 9.564940953550525e-06, + "loss": 0.6451, + "step": 2522 + }, + { + "epoch": 0.16, + "grad_norm": 0.9069300293922424, + "learning_rate": 9.564522266404127e-06, + "loss": 0.6152, + "step": 2523 + }, + { + "epoch": 0.16, + "grad_norm": 0.9622822403907776, + "learning_rate": 9.564103387061575e-06, + "loss": 0.5734, + "step": 2524 + }, + { + "epoch": 0.16, + "grad_norm": 0.9601327776908875, + "learning_rate": 9.563684315540507e-06, + "loss": 0.6096, + "step": 2525 + }, + { + "epoch": 0.16, + "grad_norm": 0.905097246170044, + "learning_rate": 9.563265051858569e-06, + "loss": 0.6449, + "step": 2526 + }, + { + "epoch": 0.16, + "grad_norm": 0.9115608334541321, + "learning_rate": 9.562845596033413e-06, + "loss": 0.6879, + "step": 2527 + }, + { + "epoch": 0.16, + "grad_norm": 0.9223030209541321, + "learning_rate": 9.562425948082702e-06, + "loss": 0.6029, + "step": 2528 + }, + { + "epoch": 0.16, + "grad_norm": 0.8907862901687622, + "learning_rate": 9.562006108024106e-06, + "loss": 0.6018, + "step": 2529 + }, + { + "epoch": 0.16, + "grad_norm": 0.9722427129745483, + "learning_rate": 9.561586075875304e-06, + "loss": 0.649, + "step": 2530 + }, + { + "epoch": 0.16, + "grad_norm": 0.9734516739845276, + "learning_rate": 9.56116585165398e-06, + "loss": 0.595, + "step": 2531 + }, + { + "epoch": 0.16, + "grad_norm": 0.9580360651016235, + "learning_rate": 9.560745435377828e-06, + "loss": 0.604, + "step": 2532 + }, + { + "epoch": 0.16, + "grad_norm": 0.8849531412124634, + "learning_rate": 9.560324827064553e-06, + "loss": 0.6313, + "step": 2533 + }, + { + "epoch": 0.16, + "grad_norm": 0.8849808573722839, + "learning_rate": 9.559904026731862e-06, + "loss": 0.5895, + "step": 2534 + }, + { + "epoch": 0.16, + "grad_norm": 0.8286584615707397, + "learning_rate": 9.559483034397477e-06, + "loss": 0.6168, + "step": 2535 + }, + { + "epoch": 0.16, + "grad_norm": 0.8422954678535461, + "learning_rate": 9.559061850079121e-06, + "loss": 0.5688, + "step": 2536 + }, + { + "epoch": 0.16, + "grad_norm": 0.9304640293121338, + "learning_rate": 9.558640473794533e-06, + "loss": 0.5911, + "step": 2537 + }, + { + "epoch": 0.16, + "grad_norm": 0.9410046339035034, + "learning_rate": 9.558218905561452e-06, + "loss": 0.6099, + "step": 2538 + }, + { + "epoch": 0.16, + "grad_norm": 0.8600730895996094, + "learning_rate": 9.557797145397629e-06, + "loss": 0.635, + "step": 2539 + }, + { + "epoch": 0.16, + "grad_norm": 0.923870325088501, + "learning_rate": 9.557375193320824e-06, + "loss": 0.6513, + "step": 2540 + }, + { + "epoch": 0.16, + "grad_norm": 0.9524445533752441, + "learning_rate": 9.556953049348803e-06, + "loss": 0.6036, + "step": 2541 + }, + { + "epoch": 0.16, + "grad_norm": 0.945360004901886, + "learning_rate": 9.556530713499341e-06, + "loss": 0.6471, + "step": 2542 + }, + { + "epoch": 0.16, + "grad_norm": 1.020447850227356, + "learning_rate": 9.556108185790223e-06, + "loss": 0.7046, + "step": 2543 + }, + { + "epoch": 0.16, + "grad_norm": 0.9810319542884827, + "learning_rate": 9.55568546623924e-06, + "loss": 0.6746, + "step": 2544 + }, + { + "epoch": 0.16, + "grad_norm": 0.9337319135665894, + "learning_rate": 9.555262554864188e-06, + "loss": 0.6229, + "step": 2545 + }, + { + "epoch": 0.16, + "grad_norm": 0.890835165977478, + "learning_rate": 9.554839451682876e-06, + "loss": 0.5636, + "step": 2546 + }, + { + "epoch": 0.16, + "grad_norm": 0.8403000831604004, + "learning_rate": 9.554416156713121e-06, + "loss": 0.6144, + "step": 2547 + }, + { + "epoch": 0.16, + "grad_norm": 0.8973768353462219, + "learning_rate": 9.553992669972744e-06, + "loss": 0.6128, + "step": 2548 + }, + { + "epoch": 0.16, + "grad_norm": 0.912047803401947, + "learning_rate": 9.55356899147958e-06, + "loss": 0.6295, + "step": 2549 + }, + { + "epoch": 0.16, + "grad_norm": 0.8875672817230225, + "learning_rate": 9.553145121251465e-06, + "loss": 0.6375, + "step": 2550 + }, + { + "epoch": 0.16, + "grad_norm": 0.8986533284187317, + "learning_rate": 9.552721059306248e-06, + "loss": 0.6332, + "step": 2551 + }, + { + "epoch": 0.16, + "grad_norm": 0.8964718580245972, + "learning_rate": 9.552296805661787e-06, + "loss": 0.6369, + "step": 2552 + }, + { + "epoch": 0.16, + "grad_norm": 0.9571990370750427, + "learning_rate": 9.551872360335941e-06, + "loss": 0.6474, + "step": 2553 + }, + { + "epoch": 0.16, + "grad_norm": 0.927249550819397, + "learning_rate": 9.551447723346587e-06, + "loss": 0.624, + "step": 2554 + }, + { + "epoch": 0.16, + "grad_norm": 0.9312215447425842, + "learning_rate": 9.5510228947116e-06, + "loss": 0.6383, + "step": 2555 + }, + { + "epoch": 0.16, + "grad_norm": 0.9223430156707764, + "learning_rate": 9.550597874448874e-06, + "loss": 0.6332, + "step": 2556 + }, + { + "epoch": 0.16, + "grad_norm": 0.8620796799659729, + "learning_rate": 9.5501726625763e-06, + "loss": 0.6429, + "step": 2557 + }, + { + "epoch": 0.16, + "grad_norm": 0.8788149356842041, + "learning_rate": 9.549747259111786e-06, + "loss": 0.6188, + "step": 2558 + }, + { + "epoch": 0.16, + "grad_norm": 0.9338142275810242, + "learning_rate": 9.54932166407324e-06, + "loss": 0.6234, + "step": 2559 + }, + { + "epoch": 0.16, + "grad_norm": 0.8641449213027954, + "learning_rate": 9.548895877478585e-06, + "loss": 0.6202, + "step": 2560 + }, + { + "epoch": 0.16, + "grad_norm": 0.9130368828773499, + "learning_rate": 9.54846989934575e-06, + "loss": 0.6636, + "step": 2561 + }, + { + "epoch": 0.16, + "grad_norm": 0.9087523818016052, + "learning_rate": 9.54804372969267e-06, + "loss": 0.6419, + "step": 2562 + }, + { + "epoch": 0.16, + "grad_norm": 0.8906131982803345, + "learning_rate": 9.54761736853729e-06, + "loss": 0.5957, + "step": 2563 + }, + { + "epoch": 0.16, + "grad_norm": 0.8853945732116699, + "learning_rate": 9.547190815897563e-06, + "loss": 0.5888, + "step": 2564 + }, + { + "epoch": 0.16, + "grad_norm": 0.951070249080658, + "learning_rate": 9.54676407179145e-06, + "loss": 0.6681, + "step": 2565 + }, + { + "epoch": 0.16, + "grad_norm": 0.9170838594436646, + "learning_rate": 9.546337136236916e-06, + "loss": 0.6224, + "step": 2566 + }, + { + "epoch": 0.16, + "grad_norm": 0.955334484577179, + "learning_rate": 9.545910009251945e-06, + "loss": 0.6488, + "step": 2567 + }, + { + "epoch": 0.16, + "grad_norm": 0.8778351545333862, + "learning_rate": 9.545482690854513e-06, + "loss": 0.6396, + "step": 2568 + }, + { + "epoch": 0.16, + "grad_norm": 0.8910854458808899, + "learning_rate": 9.545055181062621e-06, + "loss": 0.6397, + "step": 2569 + }, + { + "epoch": 0.16, + "grad_norm": 1.0262346267700195, + "learning_rate": 9.544627479894264e-06, + "loss": 0.6648, + "step": 2570 + }, + { + "epoch": 0.16, + "grad_norm": 0.881415843963623, + "learning_rate": 9.544199587367455e-06, + "loss": 0.6112, + "step": 2571 + }, + { + "epoch": 0.16, + "grad_norm": 0.8958014249801636, + "learning_rate": 9.54377150350021e-06, + "loss": 0.6493, + "step": 2572 + }, + { + "epoch": 0.16, + "grad_norm": 0.9083918929100037, + "learning_rate": 9.543343228310551e-06, + "loss": 0.578, + "step": 2573 + }, + { + "epoch": 0.16, + "grad_norm": 0.9322221279144287, + "learning_rate": 9.542914761816518e-06, + "loss": 0.6487, + "step": 2574 + }, + { + "epoch": 0.16, + "grad_norm": 0.914716362953186, + "learning_rate": 9.542486104036143e-06, + "loss": 0.6269, + "step": 2575 + }, + { + "epoch": 0.16, + "grad_norm": 0.9125852584838867, + "learning_rate": 9.542057254987485e-06, + "loss": 0.6308, + "step": 2576 + }, + { + "epoch": 0.16, + "grad_norm": 0.9945306777954102, + "learning_rate": 9.541628214688595e-06, + "loss": 0.6203, + "step": 2577 + }, + { + "epoch": 0.16, + "grad_norm": 0.9009057283401489, + "learning_rate": 9.541198983157538e-06, + "loss": 0.6603, + "step": 2578 + }, + { + "epoch": 0.16, + "grad_norm": 0.8918367028236389, + "learning_rate": 9.54076956041239e-06, + "loss": 0.6313, + "step": 2579 + }, + { + "epoch": 0.16, + "grad_norm": 0.8985729217529297, + "learning_rate": 9.540339946471235e-06, + "loss": 0.6205, + "step": 2580 + }, + { + "epoch": 0.16, + "grad_norm": 0.8877277970314026, + "learning_rate": 9.539910141352156e-06, + "loss": 0.6364, + "step": 2581 + }, + { + "epoch": 0.16, + "grad_norm": 0.9015381336212158, + "learning_rate": 9.539480145073257e-06, + "loss": 0.5959, + "step": 2582 + }, + { + "epoch": 0.16, + "grad_norm": 0.9096758365631104, + "learning_rate": 9.53904995765264e-06, + "loss": 0.6062, + "step": 2583 + }, + { + "epoch": 0.16, + "grad_norm": 0.9512980580329895, + "learning_rate": 9.538619579108417e-06, + "loss": 0.6782, + "step": 2584 + }, + { + "epoch": 0.16, + "grad_norm": 0.9591136574745178, + "learning_rate": 9.538189009458715e-06, + "loss": 0.6716, + "step": 2585 + }, + { + "epoch": 0.16, + "grad_norm": 0.9070512056350708, + "learning_rate": 9.53775824872166e-06, + "loss": 0.5908, + "step": 2586 + }, + { + "epoch": 0.16, + "grad_norm": 0.8964409232139587, + "learning_rate": 9.53732729691539e-06, + "loss": 0.622, + "step": 2587 + }, + { + "epoch": 0.16, + "grad_norm": 0.9467551708221436, + "learning_rate": 9.536896154058053e-06, + "loss": 0.6137, + "step": 2588 + }, + { + "epoch": 0.16, + "grad_norm": 0.8990939259529114, + "learning_rate": 9.536464820167804e-06, + "loss": 0.6319, + "step": 2589 + }, + { + "epoch": 0.16, + "grad_norm": 0.8276720643043518, + "learning_rate": 9.536033295262799e-06, + "loss": 0.5556, + "step": 2590 + }, + { + "epoch": 0.16, + "grad_norm": 0.8583798408508301, + "learning_rate": 9.535601579361214e-06, + "loss": 0.5774, + "step": 2591 + }, + { + "epoch": 0.16, + "grad_norm": 0.9028250575065613, + "learning_rate": 9.535169672481222e-06, + "loss": 0.629, + "step": 2592 + }, + { + "epoch": 0.16, + "grad_norm": 0.9669902920722961, + "learning_rate": 9.534737574641014e-06, + "loss": 0.6514, + "step": 2593 + }, + { + "epoch": 0.16, + "grad_norm": 0.9334651827812195, + "learning_rate": 9.53430528585878e-06, + "loss": 0.6385, + "step": 2594 + }, + { + "epoch": 0.16, + "grad_norm": 0.8801825046539307, + "learning_rate": 9.533872806152727e-06, + "loss": 0.6043, + "step": 2595 + }, + { + "epoch": 0.16, + "grad_norm": 0.9169769883155823, + "learning_rate": 9.533440135541059e-06, + "loss": 0.6202, + "step": 2596 + }, + { + "epoch": 0.16, + "grad_norm": 0.90007483959198, + "learning_rate": 9.533007274042e-06, + "loss": 0.6977, + "step": 2597 + }, + { + "epoch": 0.16, + "grad_norm": 0.887588620185852, + "learning_rate": 9.532574221673772e-06, + "loss": 0.6228, + "step": 2598 + }, + { + "epoch": 0.16, + "grad_norm": 0.9043447971343994, + "learning_rate": 9.532140978454614e-06, + "loss": 0.6192, + "step": 2599 + }, + { + "epoch": 0.16, + "grad_norm": 0.9651160836219788, + "learning_rate": 9.531707544402762e-06, + "loss": 0.6675, + "step": 2600 + }, + { + "epoch": 0.16, + "grad_norm": 0.9440380334854126, + "learning_rate": 9.531273919536473e-06, + "loss": 0.6294, + "step": 2601 + }, + { + "epoch": 0.16, + "grad_norm": 0.9147106409072876, + "learning_rate": 9.530840103874001e-06, + "loss": 0.6483, + "step": 2602 + }, + { + "epoch": 0.16, + "grad_norm": 0.9056714177131653, + "learning_rate": 9.530406097433615e-06, + "loss": 0.5734, + "step": 2603 + }, + { + "epoch": 0.16, + "grad_norm": 0.9497922658920288, + "learning_rate": 9.529971900233587e-06, + "loss": 0.5915, + "step": 2604 + }, + { + "epoch": 0.17, + "grad_norm": 0.8961224555969238, + "learning_rate": 9.529537512292201e-06, + "loss": 0.6239, + "step": 2605 + }, + { + "epoch": 0.17, + "grad_norm": 0.9149653315544128, + "learning_rate": 9.529102933627747e-06, + "loss": 0.6477, + "step": 2606 + }, + { + "epoch": 0.17, + "grad_norm": 0.904569685459137, + "learning_rate": 9.528668164258525e-06, + "loss": 0.6361, + "step": 2607 + }, + { + "epoch": 0.17, + "grad_norm": 0.8962168097496033, + "learning_rate": 9.528233204202842e-06, + "loss": 0.6214, + "step": 2608 + }, + { + "epoch": 0.17, + "grad_norm": 0.891830325126648, + "learning_rate": 9.527798053479009e-06, + "loss": 0.5854, + "step": 2609 + }, + { + "epoch": 0.17, + "grad_norm": 0.9612575173377991, + "learning_rate": 9.527362712105353e-06, + "loss": 0.6016, + "step": 2610 + }, + { + "epoch": 0.17, + "grad_norm": 0.9431421756744385, + "learning_rate": 9.5269271801002e-06, + "loss": 0.6032, + "step": 2611 + }, + { + "epoch": 0.17, + "grad_norm": 0.8791323304176331, + "learning_rate": 9.526491457481895e-06, + "loss": 0.6002, + "step": 2612 + }, + { + "epoch": 0.17, + "grad_norm": 0.9468672275543213, + "learning_rate": 9.526055544268778e-06, + "loss": 0.6101, + "step": 2613 + }, + { + "epoch": 0.17, + "grad_norm": 0.8586993217468262, + "learning_rate": 9.525619440479209e-06, + "loss": 0.5971, + "step": 2614 + }, + { + "epoch": 0.17, + "grad_norm": 0.88875812292099, + "learning_rate": 9.525183146131549e-06, + "loss": 0.6711, + "step": 2615 + }, + { + "epoch": 0.17, + "grad_norm": 0.9012202620506287, + "learning_rate": 9.524746661244166e-06, + "loss": 0.6357, + "step": 2616 + }, + { + "epoch": 0.17, + "grad_norm": 0.9038097858428955, + "learning_rate": 9.524309985835444e-06, + "loss": 0.6106, + "step": 2617 + }, + { + "epoch": 0.17, + "grad_norm": 0.9143322706222534, + "learning_rate": 9.523873119923768e-06, + "loss": 0.5951, + "step": 2618 + }, + { + "epoch": 0.17, + "grad_norm": 0.9046504497528076, + "learning_rate": 9.523436063527531e-06, + "loss": 0.5902, + "step": 2619 + }, + { + "epoch": 0.17, + "grad_norm": 0.959321916103363, + "learning_rate": 9.522998816665137e-06, + "loss": 0.6532, + "step": 2620 + }, + { + "epoch": 0.17, + "grad_norm": 0.8277800679206848, + "learning_rate": 9.522561379354997e-06, + "loss": 0.6249, + "step": 2621 + }, + { + "epoch": 0.17, + "grad_norm": 0.9031876921653748, + "learning_rate": 9.522123751615532e-06, + "loss": 0.6575, + "step": 2622 + }, + { + "epoch": 0.17, + "grad_norm": 0.9128404855728149, + "learning_rate": 9.521685933465166e-06, + "loss": 0.6547, + "step": 2623 + }, + { + "epoch": 0.17, + "grad_norm": 0.8987665772438049, + "learning_rate": 9.521247924922334e-06, + "loss": 0.6212, + "step": 2624 + }, + { + "epoch": 0.17, + "grad_norm": 0.944159209728241, + "learning_rate": 9.520809726005481e-06, + "loss": 0.5963, + "step": 2625 + }, + { + "epoch": 0.17, + "grad_norm": 0.9575842022895813, + "learning_rate": 9.52037133673306e-06, + "loss": 0.5637, + "step": 2626 + }, + { + "epoch": 0.17, + "grad_norm": 0.8398919105529785, + "learning_rate": 9.519932757123523e-06, + "loss": 0.664, + "step": 2627 + }, + { + "epoch": 0.17, + "grad_norm": 0.9531906843185425, + "learning_rate": 9.519493987195343e-06, + "loss": 0.5932, + "step": 2628 + }, + { + "epoch": 0.17, + "grad_norm": 0.9427643418312073, + "learning_rate": 9.519055026966995e-06, + "loss": 0.5979, + "step": 2629 + }, + { + "epoch": 0.17, + "grad_norm": 0.9445648193359375, + "learning_rate": 9.518615876456958e-06, + "loss": 0.6406, + "step": 2630 + }, + { + "epoch": 0.17, + "grad_norm": 0.8915479183197021, + "learning_rate": 9.518176535683727e-06, + "loss": 0.5887, + "step": 2631 + }, + { + "epoch": 0.17, + "grad_norm": 0.9278690218925476, + "learning_rate": 9.5177370046658e-06, + "loss": 0.6604, + "step": 2632 + }, + { + "epoch": 0.17, + "grad_norm": 0.9619773626327515, + "learning_rate": 9.517297283421681e-06, + "loss": 0.6622, + "step": 2633 + }, + { + "epoch": 0.17, + "grad_norm": 0.9478781819343567, + "learning_rate": 9.51685737196989e-06, + "loss": 0.6336, + "step": 2634 + }, + { + "epoch": 0.17, + "grad_norm": 0.8679977059364319, + "learning_rate": 9.516417270328948e-06, + "loss": 0.6031, + "step": 2635 + }, + { + "epoch": 0.17, + "grad_norm": 0.9029505252838135, + "learning_rate": 9.515976978517387e-06, + "loss": 0.6204, + "step": 2636 + }, + { + "epoch": 0.17, + "grad_norm": 0.8872044086456299, + "learning_rate": 9.515536496553744e-06, + "loss": 0.578, + "step": 2637 + }, + { + "epoch": 0.17, + "grad_norm": 0.9961317777633667, + "learning_rate": 9.515095824456568e-06, + "loss": 0.6484, + "step": 2638 + }, + { + "epoch": 0.17, + "grad_norm": 0.8571626543998718, + "learning_rate": 9.514654962244414e-06, + "loss": 0.6417, + "step": 2639 + }, + { + "epoch": 0.17, + "grad_norm": 0.8865385055541992, + "learning_rate": 9.514213909935843e-06, + "loss": 0.5677, + "step": 2640 + }, + { + "epoch": 0.17, + "grad_norm": 0.9392569661140442, + "learning_rate": 9.51377266754943e-06, + "loss": 0.6493, + "step": 2641 + }, + { + "epoch": 0.17, + "grad_norm": 0.9384260773658752, + "learning_rate": 9.513331235103751e-06, + "loss": 0.6117, + "step": 2642 + }, + { + "epoch": 0.17, + "grad_norm": 1.0064356327056885, + "learning_rate": 9.512889612617397e-06, + "loss": 0.6214, + "step": 2643 + }, + { + "epoch": 0.17, + "grad_norm": 0.8559515476226807, + "learning_rate": 9.512447800108958e-06, + "loss": 0.6171, + "step": 2644 + }, + { + "epoch": 0.17, + "grad_norm": 0.9168458580970764, + "learning_rate": 9.512005797597042e-06, + "loss": 0.6406, + "step": 2645 + }, + { + "epoch": 0.17, + "grad_norm": 0.9505908489227295, + "learning_rate": 9.511563605100255e-06, + "loss": 0.63, + "step": 2646 + }, + { + "epoch": 0.17, + "grad_norm": 0.9313047528266907, + "learning_rate": 9.511121222637222e-06, + "loss": 0.6543, + "step": 2647 + }, + { + "epoch": 0.17, + "grad_norm": 0.8740178346633911, + "learning_rate": 9.510678650226567e-06, + "loss": 0.5734, + "step": 2648 + }, + { + "epoch": 0.17, + "grad_norm": 0.9065948128700256, + "learning_rate": 9.510235887886923e-06, + "loss": 0.6048, + "step": 2649 + }, + { + "epoch": 0.17, + "grad_norm": 0.9390092492103577, + "learning_rate": 9.509792935636939e-06, + "loss": 0.5976, + "step": 2650 + }, + { + "epoch": 0.17, + "grad_norm": 0.9297692179679871, + "learning_rate": 9.50934979349526e-06, + "loss": 0.5868, + "step": 2651 + }, + { + "epoch": 0.17, + "grad_norm": 0.9775800704956055, + "learning_rate": 9.508906461480549e-06, + "loss": 0.6938, + "step": 2652 + }, + { + "epoch": 0.17, + "grad_norm": 0.934540867805481, + "learning_rate": 9.508462939611473e-06, + "loss": 0.6, + "step": 2653 + }, + { + "epoch": 0.17, + "grad_norm": 0.9152988195419312, + "learning_rate": 9.508019227906706e-06, + "loss": 0.6573, + "step": 2654 + }, + { + "epoch": 0.17, + "grad_norm": 0.9159802794456482, + "learning_rate": 9.507575326384932e-06, + "loss": 0.5607, + "step": 2655 + }, + { + "epoch": 0.17, + "grad_norm": 0.9005085229873657, + "learning_rate": 9.507131235064842e-06, + "loss": 0.6402, + "step": 2656 + }, + { + "epoch": 0.17, + "grad_norm": 0.9148140549659729, + "learning_rate": 9.506686953965134e-06, + "loss": 0.6254, + "step": 2657 + }, + { + "epoch": 0.17, + "grad_norm": 0.8619657754898071, + "learning_rate": 9.506242483104517e-06, + "loss": 0.534, + "step": 2658 + }, + { + "epoch": 0.17, + "grad_norm": 0.8992459774017334, + "learning_rate": 9.505797822501704e-06, + "loss": 0.6414, + "step": 2659 + }, + { + "epoch": 0.17, + "grad_norm": 0.9422406554222107, + "learning_rate": 9.505352972175419e-06, + "loss": 0.6557, + "step": 2660 + }, + { + "epoch": 0.17, + "grad_norm": 0.9567902088165283, + "learning_rate": 9.504907932144394e-06, + "loss": 0.6674, + "step": 2661 + }, + { + "epoch": 0.17, + "grad_norm": 0.9111477136611938, + "learning_rate": 9.504462702427369e-06, + "loss": 0.634, + "step": 2662 + }, + { + "epoch": 0.17, + "grad_norm": 0.9020829796791077, + "learning_rate": 9.504017283043087e-06, + "loss": 0.6443, + "step": 2663 + }, + { + "epoch": 0.17, + "grad_norm": 0.9128588438034058, + "learning_rate": 9.503571674010305e-06, + "loss": 0.651, + "step": 2664 + }, + { + "epoch": 0.17, + "grad_norm": 0.908065676689148, + "learning_rate": 9.503125875347789e-06, + "loss": 0.6225, + "step": 2665 + }, + { + "epoch": 0.17, + "grad_norm": 0.9279728531837463, + "learning_rate": 9.502679887074306e-06, + "loss": 0.6425, + "step": 2666 + }, + { + "epoch": 0.17, + "grad_norm": 0.8896051645278931, + "learning_rate": 9.502233709208637e-06, + "loss": 0.6823, + "step": 2667 + }, + { + "epoch": 0.17, + "grad_norm": 0.9090619087219238, + "learning_rate": 9.50178734176957e-06, + "loss": 0.5903, + "step": 2668 + }, + { + "epoch": 0.17, + "grad_norm": 0.8844740986824036, + "learning_rate": 9.501340784775896e-06, + "loss": 0.6276, + "step": 2669 + }, + { + "epoch": 0.17, + "grad_norm": 0.9212251901626587, + "learning_rate": 9.500894038246424e-06, + "loss": 0.5796, + "step": 2670 + }, + { + "epoch": 0.17, + "grad_norm": 0.9225980639457703, + "learning_rate": 9.50044710219996e-06, + "loss": 0.6326, + "step": 2671 + }, + { + "epoch": 0.17, + "grad_norm": 0.9283084869384766, + "learning_rate": 9.499999976655324e-06, + "loss": 0.6165, + "step": 2672 + }, + { + "epoch": 0.17, + "grad_norm": 0.8648502826690674, + "learning_rate": 9.499552661631342e-06, + "loss": 0.6137, + "step": 2673 + }, + { + "epoch": 0.17, + "grad_norm": 0.88034588098526, + "learning_rate": 9.49910515714685e-06, + "loss": 0.594, + "step": 2674 + }, + { + "epoch": 0.17, + "grad_norm": 0.841262698173523, + "learning_rate": 9.498657463220694e-06, + "loss": 0.5953, + "step": 2675 + }, + { + "epoch": 0.17, + "grad_norm": 0.9340731501579285, + "learning_rate": 9.49820957987172e-06, + "loss": 0.6236, + "step": 2676 + }, + { + "epoch": 0.17, + "grad_norm": 0.898252546787262, + "learning_rate": 9.49776150711879e-06, + "loss": 0.5813, + "step": 2677 + }, + { + "epoch": 0.17, + "grad_norm": 0.8751718997955322, + "learning_rate": 9.497313244980768e-06, + "loss": 0.5712, + "step": 2678 + }, + { + "epoch": 0.17, + "grad_norm": 0.8850248456001282, + "learning_rate": 9.496864793476532e-06, + "loss": 0.6464, + "step": 2679 + }, + { + "epoch": 0.17, + "grad_norm": 0.9821275472640991, + "learning_rate": 9.49641615262496e-06, + "loss": 0.6297, + "step": 2680 + }, + { + "epoch": 0.17, + "grad_norm": 0.8436826467514038, + "learning_rate": 9.49596732244495e-06, + "loss": 0.5828, + "step": 2681 + }, + { + "epoch": 0.17, + "grad_norm": 0.9077553749084473, + "learning_rate": 9.495518302955393e-06, + "loss": 0.6651, + "step": 2682 + }, + { + "epoch": 0.17, + "grad_norm": 0.9323903322219849, + "learning_rate": 9.4950690941752e-06, + "loss": 0.6516, + "step": 2683 + }, + { + "epoch": 0.17, + "grad_norm": 1.0304430723190308, + "learning_rate": 9.494619696123286e-06, + "loss": 0.6534, + "step": 2684 + }, + { + "epoch": 0.17, + "grad_norm": 0.9509037137031555, + "learning_rate": 9.49417010881857e-06, + "loss": 0.6013, + "step": 2685 + }, + { + "epoch": 0.17, + "grad_norm": 0.8547189831733704, + "learning_rate": 9.493720332279987e-06, + "loss": 0.5765, + "step": 2686 + }, + { + "epoch": 0.17, + "grad_norm": 0.9771583676338196, + "learning_rate": 9.493270366526471e-06, + "loss": 0.6383, + "step": 2687 + }, + { + "epoch": 0.17, + "grad_norm": 0.9149676561355591, + "learning_rate": 9.492820211576971e-06, + "loss": 0.6117, + "step": 2688 + }, + { + "epoch": 0.17, + "grad_norm": 0.8924671411514282, + "learning_rate": 9.492369867450444e-06, + "loss": 0.5931, + "step": 2689 + }, + { + "epoch": 0.17, + "grad_norm": 0.9182107448577881, + "learning_rate": 9.491919334165846e-06, + "loss": 0.6233, + "step": 2690 + }, + { + "epoch": 0.17, + "grad_norm": 0.9452329277992249, + "learning_rate": 9.491468611742154e-06, + "loss": 0.7153, + "step": 2691 + }, + { + "epoch": 0.17, + "grad_norm": 0.9435275197029114, + "learning_rate": 9.491017700198343e-06, + "loss": 0.6737, + "step": 2692 + }, + { + "epoch": 0.17, + "grad_norm": 0.9835942387580872, + "learning_rate": 9.490566599553399e-06, + "loss": 0.6323, + "step": 2693 + }, + { + "epoch": 0.17, + "grad_norm": 1.015770673751831, + "learning_rate": 9.490115309826317e-06, + "loss": 0.6106, + "step": 2694 + }, + { + "epoch": 0.17, + "grad_norm": 0.9940273761749268, + "learning_rate": 9.4896638310361e-06, + "loss": 0.6326, + "step": 2695 + }, + { + "epoch": 0.17, + "grad_norm": 0.9595569968223572, + "learning_rate": 9.489212163201758e-06, + "loss": 0.6314, + "step": 2696 + }, + { + "epoch": 0.17, + "grad_norm": 0.918870747089386, + "learning_rate": 9.488760306342307e-06, + "loss": 0.6369, + "step": 2697 + }, + { + "epoch": 0.17, + "grad_norm": 0.9247921705245972, + "learning_rate": 9.488308260476776e-06, + "loss": 0.5877, + "step": 2698 + }, + { + "epoch": 0.17, + "grad_norm": 0.8694366812705994, + "learning_rate": 9.487856025624196e-06, + "loss": 0.6188, + "step": 2699 + }, + { + "epoch": 0.17, + "grad_norm": 0.9364984631538391, + "learning_rate": 9.487403601803614e-06, + "loss": 0.5841, + "step": 2700 + }, + { + "epoch": 0.17, + "grad_norm": 0.8980706930160522, + "learning_rate": 9.486950989034074e-06, + "loss": 0.6324, + "step": 2701 + }, + { + "epoch": 0.17, + "grad_norm": 0.8469223380088806, + "learning_rate": 9.486498187334636e-06, + "loss": 0.5997, + "step": 2702 + }, + { + "epoch": 0.17, + "grad_norm": 0.9805670976638794, + "learning_rate": 9.48604519672437e-06, + "loss": 0.6745, + "step": 2703 + }, + { + "epoch": 0.17, + "grad_norm": 0.9122759103775024, + "learning_rate": 9.485592017222344e-06, + "loss": 0.6904, + "step": 2704 + }, + { + "epoch": 0.17, + "grad_norm": 0.9132962822914124, + "learning_rate": 9.485138648847643e-06, + "loss": 0.5926, + "step": 2705 + }, + { + "epoch": 0.17, + "grad_norm": 0.8468869924545288, + "learning_rate": 9.484685091619358e-06, + "loss": 0.6072, + "step": 2706 + }, + { + "epoch": 0.17, + "grad_norm": 0.9402836561203003, + "learning_rate": 9.484231345556582e-06, + "loss": 0.6308, + "step": 2707 + }, + { + "epoch": 0.17, + "grad_norm": 0.8940732479095459, + "learning_rate": 9.483777410678427e-06, + "loss": 0.63, + "step": 2708 + }, + { + "epoch": 0.17, + "grad_norm": 0.886562705039978, + "learning_rate": 9.483323287004001e-06, + "loss": 0.5811, + "step": 2709 + }, + { + "epoch": 0.17, + "grad_norm": 0.9191167950630188, + "learning_rate": 9.482868974552427e-06, + "loss": 0.6349, + "step": 2710 + }, + { + "epoch": 0.17, + "grad_norm": 0.936594545841217, + "learning_rate": 9.482414473342835e-06, + "loss": 0.72, + "step": 2711 + }, + { + "epoch": 0.17, + "grad_norm": 0.9029736518859863, + "learning_rate": 9.481959783394365e-06, + "loss": 0.6818, + "step": 2712 + }, + { + "epoch": 0.17, + "grad_norm": 0.9597886800765991, + "learning_rate": 9.48150490472616e-06, + "loss": 0.6462, + "step": 2713 + }, + { + "epoch": 0.17, + "grad_norm": 0.9007745385169983, + "learning_rate": 9.481049837357371e-06, + "loss": 0.6234, + "step": 2714 + }, + { + "epoch": 0.17, + "grad_norm": 0.8033143877983093, + "learning_rate": 9.480594581307164e-06, + "loss": 0.5724, + "step": 2715 + }, + { + "epoch": 0.17, + "grad_norm": 0.856959879398346, + "learning_rate": 9.480139136594706e-06, + "loss": 0.5977, + "step": 2716 + }, + { + "epoch": 0.17, + "grad_norm": 0.9320681095123291, + "learning_rate": 9.479683503239172e-06, + "loss": 0.6452, + "step": 2717 + }, + { + "epoch": 0.17, + "grad_norm": 0.8906647562980652, + "learning_rate": 9.479227681259751e-06, + "loss": 0.6675, + "step": 2718 + }, + { + "epoch": 0.17, + "grad_norm": 0.8599271774291992, + "learning_rate": 9.478771670675635e-06, + "loss": 0.6287, + "step": 2719 + }, + { + "epoch": 0.17, + "grad_norm": 0.8469679355621338, + "learning_rate": 9.478315471506023e-06, + "loss": 0.5967, + "step": 2720 + }, + { + "epoch": 0.17, + "grad_norm": 0.8832866549491882, + "learning_rate": 9.477859083770126e-06, + "loss": 0.6506, + "step": 2721 + }, + { + "epoch": 0.17, + "grad_norm": 0.8781976699829102, + "learning_rate": 9.477402507487162e-06, + "loss": 0.6026, + "step": 2722 + }, + { + "epoch": 0.17, + "grad_norm": 0.9236262440681458, + "learning_rate": 9.476945742676352e-06, + "loss": 0.5791, + "step": 2723 + }, + { + "epoch": 0.17, + "grad_norm": 0.9180050492286682, + "learning_rate": 9.476488789356933e-06, + "loss": 0.5972, + "step": 2724 + }, + { + "epoch": 0.17, + "grad_norm": 0.8968567848205566, + "learning_rate": 9.47603164754814e-06, + "loss": 0.6701, + "step": 2725 + }, + { + "epoch": 0.17, + "grad_norm": 0.9011199474334717, + "learning_rate": 9.47557431726923e-06, + "loss": 0.6389, + "step": 2726 + }, + { + "epoch": 0.17, + "grad_norm": 0.964178204536438, + "learning_rate": 9.475116798539451e-06, + "loss": 0.6804, + "step": 2727 + }, + { + "epoch": 0.17, + "grad_norm": 0.9103108048439026, + "learning_rate": 9.474659091378074e-06, + "loss": 0.5935, + "step": 2728 + }, + { + "epoch": 0.17, + "grad_norm": 0.9424949884414673, + "learning_rate": 9.474201195804367e-06, + "loss": 0.6662, + "step": 2729 + }, + { + "epoch": 0.17, + "grad_norm": 0.9513722658157349, + "learning_rate": 9.473743111837612e-06, + "loss": 0.6526, + "step": 2730 + }, + { + "epoch": 0.17, + "grad_norm": 0.9301340579986572, + "learning_rate": 9.4732848394971e-06, + "loss": 0.5824, + "step": 2731 + }, + { + "epoch": 0.17, + "grad_norm": 0.9112258553504944, + "learning_rate": 9.472826378802122e-06, + "loss": 0.6287, + "step": 2732 + }, + { + "epoch": 0.17, + "grad_norm": 0.9196444749832153, + "learning_rate": 9.472367729771987e-06, + "loss": 0.6376, + "step": 2733 + }, + { + "epoch": 0.17, + "grad_norm": 0.9066518545150757, + "learning_rate": 9.471908892426005e-06, + "loss": 0.6648, + "step": 2734 + }, + { + "epoch": 0.17, + "grad_norm": 0.8786914944648743, + "learning_rate": 9.471449866783495e-06, + "loss": 0.6161, + "step": 2735 + }, + { + "epoch": 0.17, + "grad_norm": 0.9169754385948181, + "learning_rate": 9.470990652863787e-06, + "loss": 0.6643, + "step": 2736 + }, + { + "epoch": 0.17, + "grad_norm": 0.9611136317253113, + "learning_rate": 9.470531250686216e-06, + "loss": 0.6446, + "step": 2737 + }, + { + "epoch": 0.17, + "grad_norm": 0.9478945732116699, + "learning_rate": 9.470071660270126e-06, + "loss": 0.6436, + "step": 2738 + }, + { + "epoch": 0.17, + "grad_norm": 0.8549840450286865, + "learning_rate": 9.469611881634868e-06, + "loss": 0.607, + "step": 2739 + }, + { + "epoch": 0.17, + "grad_norm": 0.9151300191879272, + "learning_rate": 9.469151914799803e-06, + "loss": 0.5987, + "step": 2740 + }, + { + "epoch": 0.17, + "grad_norm": 0.87184077501297, + "learning_rate": 9.468691759784298e-06, + "loss": 0.6307, + "step": 2741 + }, + { + "epoch": 0.17, + "grad_norm": 0.9251417517662048, + "learning_rate": 9.468231416607727e-06, + "loss": 0.5822, + "step": 2742 + }, + { + "epoch": 0.17, + "grad_norm": 0.9144605994224548, + "learning_rate": 9.467770885289477e-06, + "loss": 0.5699, + "step": 2743 + }, + { + "epoch": 0.17, + "grad_norm": 0.8591218590736389, + "learning_rate": 9.467310165848935e-06, + "loss": 0.6483, + "step": 2744 + }, + { + "epoch": 0.17, + "grad_norm": 0.8842750787734985, + "learning_rate": 9.466849258305504e-06, + "loss": 0.6478, + "step": 2745 + }, + { + "epoch": 0.17, + "grad_norm": 0.8982271552085876, + "learning_rate": 9.46638816267859e-06, + "loss": 0.6189, + "step": 2746 + }, + { + "epoch": 0.17, + "grad_norm": 1.1078075170516968, + "learning_rate": 9.465926878987609e-06, + "loss": 0.652, + "step": 2747 + }, + { + "epoch": 0.17, + "grad_norm": 0.9062262773513794, + "learning_rate": 9.46546540725198e-06, + "loss": 0.6205, + "step": 2748 + }, + { + "epoch": 0.17, + "grad_norm": 0.9785717725753784, + "learning_rate": 9.465003747491138e-06, + "loss": 0.6586, + "step": 2749 + }, + { + "epoch": 0.17, + "grad_norm": 0.9226608276367188, + "learning_rate": 9.464541899724522e-06, + "loss": 0.6167, + "step": 2750 + }, + { + "epoch": 0.17, + "grad_norm": 0.9549429416656494, + "learning_rate": 9.464079863971576e-06, + "loss": 0.6093, + "step": 2751 + }, + { + "epoch": 0.17, + "grad_norm": 0.9625465869903564, + "learning_rate": 9.463617640251756e-06, + "loss": 0.6058, + "step": 2752 + }, + { + "epoch": 0.17, + "grad_norm": 0.917473316192627, + "learning_rate": 9.463155228584526e-06, + "loss": 0.608, + "step": 2753 + }, + { + "epoch": 0.17, + "grad_norm": 0.969939649105072, + "learning_rate": 9.462692628989356e-06, + "loss": 0.5676, + "step": 2754 + }, + { + "epoch": 0.17, + "grad_norm": 0.9174929857254028, + "learning_rate": 9.462229841485723e-06, + "loss": 0.6664, + "step": 2755 + }, + { + "epoch": 0.17, + "grad_norm": 0.9567301273345947, + "learning_rate": 9.461766866093117e-06, + "loss": 0.6435, + "step": 2756 + }, + { + "epoch": 0.17, + "grad_norm": 0.8922646045684814, + "learning_rate": 9.461303702831026e-06, + "loss": 0.5949, + "step": 2757 + }, + { + "epoch": 0.17, + "grad_norm": 0.8556625843048096, + "learning_rate": 9.460840351718958e-06, + "loss": 0.5995, + "step": 2758 + }, + { + "epoch": 0.17, + "grad_norm": 0.9240930676460266, + "learning_rate": 9.46037681277642e-06, + "loss": 0.6158, + "step": 2759 + }, + { + "epoch": 0.17, + "grad_norm": 0.9151474833488464, + "learning_rate": 9.459913086022931e-06, + "loss": 0.6091, + "step": 2760 + }, + { + "epoch": 0.17, + "grad_norm": 0.937988817691803, + "learning_rate": 9.459449171478017e-06, + "loss": 0.5562, + "step": 2761 + }, + { + "epoch": 0.17, + "grad_norm": 0.8838707804679871, + "learning_rate": 9.458985069161212e-06, + "loss": 0.5736, + "step": 2762 + }, + { + "epoch": 0.18, + "grad_norm": 0.9612347483634949, + "learning_rate": 9.458520779092057e-06, + "loss": 0.5838, + "step": 2763 + }, + { + "epoch": 0.18, + "grad_norm": 1.0034922361373901, + "learning_rate": 9.458056301290102e-06, + "loss": 0.6895, + "step": 2764 + }, + { + "epoch": 0.18, + "grad_norm": 0.9068509340286255, + "learning_rate": 9.457591635774905e-06, + "loss": 0.687, + "step": 2765 + }, + { + "epoch": 0.18, + "grad_norm": 0.9105919599533081, + "learning_rate": 9.457126782566031e-06, + "loss": 0.6629, + "step": 2766 + }, + { + "epoch": 0.18, + "grad_norm": 0.9419427514076233, + "learning_rate": 9.456661741683054e-06, + "loss": 0.6553, + "step": 2767 + }, + { + "epoch": 0.18, + "grad_norm": 0.9317494034767151, + "learning_rate": 9.456196513145553e-06, + "loss": 0.619, + "step": 2768 + }, + { + "epoch": 0.18, + "grad_norm": 0.9247744679450989, + "learning_rate": 9.455731096973119e-06, + "loss": 0.6352, + "step": 2769 + }, + { + "epoch": 0.18, + "grad_norm": 0.9570684432983398, + "learning_rate": 9.455265493185349e-06, + "loss": 0.6674, + "step": 2770 + }, + { + "epoch": 0.18, + "grad_norm": 0.9092298150062561, + "learning_rate": 9.454799701801849e-06, + "loss": 0.6136, + "step": 2771 + }, + { + "epoch": 0.18, + "grad_norm": 0.9638829827308655, + "learning_rate": 9.45433372284223e-06, + "loss": 0.6206, + "step": 2772 + }, + { + "epoch": 0.18, + "grad_norm": 1.1069514751434326, + "learning_rate": 9.453867556326113e-06, + "loss": 0.6166, + "step": 2773 + }, + { + "epoch": 0.18, + "grad_norm": 0.958802342414856, + "learning_rate": 9.453401202273127e-06, + "loss": 0.6009, + "step": 2774 + }, + { + "epoch": 0.18, + "grad_norm": 0.8832184076309204, + "learning_rate": 9.45293466070291e-06, + "loss": 0.609, + "step": 2775 + }, + { + "epoch": 0.18, + "grad_norm": 0.9852387309074402, + "learning_rate": 9.452467931635104e-06, + "loss": 0.6633, + "step": 2776 + }, + { + "epoch": 0.18, + "grad_norm": 0.8827134370803833, + "learning_rate": 9.452001015089363e-06, + "loss": 0.6112, + "step": 2777 + }, + { + "epoch": 0.18, + "grad_norm": 0.9104273915290833, + "learning_rate": 9.451533911085346e-06, + "loss": 0.6043, + "step": 2778 + }, + { + "epoch": 0.18, + "grad_norm": 0.9635795950889587, + "learning_rate": 9.451066619642721e-06, + "loss": 0.628, + "step": 2779 + }, + { + "epoch": 0.18, + "grad_norm": 0.9080226421356201, + "learning_rate": 9.450599140781166e-06, + "loss": 0.6428, + "step": 2780 + }, + { + "epoch": 0.18, + "grad_norm": 0.8342934846878052, + "learning_rate": 9.450131474520364e-06, + "loss": 0.6056, + "step": 2781 + }, + { + "epoch": 0.18, + "grad_norm": 0.8714557886123657, + "learning_rate": 9.449663620880006e-06, + "loss": 0.6105, + "step": 2782 + }, + { + "epoch": 0.18, + "grad_norm": 0.8582709431648254, + "learning_rate": 9.449195579879793e-06, + "loss": 0.6117, + "step": 2783 + }, + { + "epoch": 0.18, + "grad_norm": 1.0167529582977295, + "learning_rate": 9.448727351539431e-06, + "loss": 0.6551, + "step": 2784 + }, + { + "epoch": 0.18, + "grad_norm": 0.8866241574287415, + "learning_rate": 9.448258935878635e-06, + "loss": 0.623, + "step": 2785 + }, + { + "epoch": 0.18, + "grad_norm": 0.9443932771682739, + "learning_rate": 9.44779033291713e-06, + "loss": 0.6456, + "step": 2786 + }, + { + "epoch": 0.18, + "grad_norm": 0.9517203569412231, + "learning_rate": 9.447321542674647e-06, + "loss": 0.6439, + "step": 2787 + }, + { + "epoch": 0.18, + "grad_norm": 0.9734207987785339, + "learning_rate": 9.446852565170928e-06, + "loss": 0.6553, + "step": 2788 + }, + { + "epoch": 0.18, + "grad_norm": 0.898755669593811, + "learning_rate": 9.446383400425713e-06, + "loss": 0.6615, + "step": 2789 + }, + { + "epoch": 0.18, + "grad_norm": 0.9627699851989746, + "learning_rate": 9.445914048458764e-06, + "loss": 0.5574, + "step": 2790 + }, + { + "epoch": 0.18, + "grad_norm": 0.8621180057525635, + "learning_rate": 9.445444509289838e-06, + "loss": 0.6064, + "step": 2791 + }, + { + "epoch": 0.18, + "grad_norm": 0.9321991205215454, + "learning_rate": 9.44497478293871e-06, + "loss": 0.6189, + "step": 2792 + }, + { + "epoch": 0.18, + "grad_norm": 0.9137430787086487, + "learning_rate": 9.444504869425154e-06, + "loss": 0.6378, + "step": 2793 + }, + { + "epoch": 0.18, + "grad_norm": 0.9660084843635559, + "learning_rate": 9.44403476876896e-06, + "loss": 0.6376, + "step": 2794 + }, + { + "epoch": 0.18, + "grad_norm": 0.8711713552474976, + "learning_rate": 9.443564480989924e-06, + "loss": 0.6145, + "step": 2795 + }, + { + "epoch": 0.18, + "grad_norm": 0.8694255352020264, + "learning_rate": 9.443094006107844e-06, + "loss": 0.6109, + "step": 2796 + }, + { + "epoch": 0.18, + "grad_norm": 0.9288530945777893, + "learning_rate": 9.442623344142534e-06, + "loss": 0.6055, + "step": 2797 + }, + { + "epoch": 0.18, + "grad_norm": 0.9127347469329834, + "learning_rate": 9.442152495113808e-06, + "loss": 0.6153, + "step": 2798 + }, + { + "epoch": 0.18, + "grad_norm": 0.8872652053833008, + "learning_rate": 9.441681459041494e-06, + "loss": 0.6426, + "step": 2799 + }, + { + "epoch": 0.18, + "grad_norm": 0.9660980105400085, + "learning_rate": 9.441210235945425e-06, + "loss": 0.6255, + "step": 2800 + }, + { + "epoch": 0.18, + "grad_norm": 0.8567848801612854, + "learning_rate": 9.440738825845441e-06, + "loss": 0.6009, + "step": 2801 + }, + { + "epoch": 0.18, + "grad_norm": 0.9663728475570679, + "learning_rate": 9.440267228761395e-06, + "loss": 0.6588, + "step": 2802 + }, + { + "epoch": 0.18, + "grad_norm": 0.9529426097869873, + "learning_rate": 9.439795444713143e-06, + "loss": 0.6628, + "step": 2803 + }, + { + "epoch": 0.18, + "grad_norm": 0.929195761680603, + "learning_rate": 9.43932347372055e-06, + "loss": 0.6209, + "step": 2804 + }, + { + "epoch": 0.18, + "grad_norm": 0.9078366160392761, + "learning_rate": 9.438851315803488e-06, + "loss": 0.5669, + "step": 2805 + }, + { + "epoch": 0.18, + "grad_norm": 0.9016088247299194, + "learning_rate": 9.438378970981839e-06, + "loss": 0.6074, + "step": 2806 + }, + { + "epoch": 0.18, + "grad_norm": 0.9534980654716492, + "learning_rate": 9.43790643927549e-06, + "loss": 0.7098, + "step": 2807 + }, + { + "epoch": 0.18, + "grad_norm": 0.8913077116012573, + "learning_rate": 9.437433720704342e-06, + "loss": 0.586, + "step": 2808 + }, + { + "epoch": 0.18, + "grad_norm": 1.0161441564559937, + "learning_rate": 9.436960815288294e-06, + "loss": 0.6038, + "step": 2809 + }, + { + "epoch": 0.18, + "grad_norm": 0.8946830034255981, + "learning_rate": 9.436487723047263e-06, + "loss": 0.6169, + "step": 2810 + }, + { + "epoch": 0.18, + "grad_norm": 0.9344162344932556, + "learning_rate": 9.436014444001167e-06, + "loss": 0.6332, + "step": 2811 + }, + { + "epoch": 0.18, + "grad_norm": 0.8833682537078857, + "learning_rate": 9.435540978169933e-06, + "loss": 0.6148, + "step": 2812 + }, + { + "epoch": 0.18, + "grad_norm": 0.9014259576797485, + "learning_rate": 9.435067325573499e-06, + "loss": 0.6617, + "step": 2813 + }, + { + "epoch": 0.18, + "grad_norm": 0.8786671757698059, + "learning_rate": 9.43459348623181e-06, + "loss": 0.6741, + "step": 2814 + }, + { + "epoch": 0.18, + "grad_norm": 0.9095485806465149, + "learning_rate": 9.434119460164816e-06, + "loss": 0.5859, + "step": 2815 + }, + { + "epoch": 0.18, + "grad_norm": 0.9492687582969666, + "learning_rate": 9.433645247392476e-06, + "loss": 0.6005, + "step": 2816 + }, + { + "epoch": 0.18, + "grad_norm": 0.9836667776107788, + "learning_rate": 9.433170847934759e-06, + "loss": 0.673, + "step": 2817 + }, + { + "epoch": 0.18, + "grad_norm": 0.9654482007026672, + "learning_rate": 9.432696261811637e-06, + "loss": 0.6462, + "step": 2818 + }, + { + "epoch": 0.18, + "grad_norm": 0.919657826423645, + "learning_rate": 9.432221489043097e-06, + "loss": 0.6495, + "step": 2819 + }, + { + "epoch": 0.18, + "grad_norm": 0.928325355052948, + "learning_rate": 9.43174652964913e-06, + "loss": 0.6354, + "step": 2820 + }, + { + "epoch": 0.18, + "grad_norm": 1.0097019672393799, + "learning_rate": 9.431271383649731e-06, + "loss": 0.636, + "step": 2821 + }, + { + "epoch": 0.18, + "grad_norm": 0.8387419581413269, + "learning_rate": 9.430796051064913e-06, + "loss": 0.6435, + "step": 2822 + }, + { + "epoch": 0.18, + "grad_norm": 0.9152708649635315, + "learning_rate": 9.430320531914683e-06, + "loss": 0.6436, + "step": 2823 + }, + { + "epoch": 0.18, + "grad_norm": 0.9267799854278564, + "learning_rate": 9.42984482621907e-06, + "loss": 0.6528, + "step": 2824 + }, + { + "epoch": 0.18, + "grad_norm": 0.8546323776245117, + "learning_rate": 9.4293689339981e-06, + "loss": 0.5591, + "step": 2825 + }, + { + "epoch": 0.18, + "grad_norm": 1.015834093093872, + "learning_rate": 9.428892855271813e-06, + "loss": 0.7004, + "step": 2826 + }, + { + "epoch": 0.18, + "grad_norm": 0.9022856950759888, + "learning_rate": 9.428416590060256e-06, + "loss": 0.6214, + "step": 2827 + }, + { + "epoch": 0.18, + "grad_norm": 0.9249994158744812, + "learning_rate": 9.427940138383482e-06, + "loss": 0.6688, + "step": 2828 + }, + { + "epoch": 0.18, + "grad_norm": 0.8863480091094971, + "learning_rate": 9.427463500261551e-06, + "loss": 0.6651, + "step": 2829 + }, + { + "epoch": 0.18, + "grad_norm": 0.8578901290893555, + "learning_rate": 9.426986675714535e-06, + "loss": 0.5767, + "step": 2830 + }, + { + "epoch": 0.18, + "grad_norm": 0.8513709902763367, + "learning_rate": 9.426509664762509e-06, + "loss": 0.545, + "step": 2831 + }, + { + "epoch": 0.18, + "grad_norm": 0.9681910872459412, + "learning_rate": 9.42603246742556e-06, + "loss": 0.6421, + "step": 2832 + }, + { + "epoch": 0.18, + "grad_norm": 0.9950567483901978, + "learning_rate": 9.425555083723783e-06, + "loss": 0.6663, + "step": 2833 + }, + { + "epoch": 0.18, + "grad_norm": 0.9001085162162781, + "learning_rate": 9.425077513677276e-06, + "loss": 0.61, + "step": 2834 + }, + { + "epoch": 0.18, + "grad_norm": 0.9015680551528931, + "learning_rate": 9.424599757306148e-06, + "loss": 0.6296, + "step": 2835 + }, + { + "epoch": 0.18, + "grad_norm": 0.862308144569397, + "learning_rate": 9.424121814630516e-06, + "loss": 0.5494, + "step": 2836 + }, + { + "epoch": 0.18, + "grad_norm": 0.913428008556366, + "learning_rate": 9.423643685670504e-06, + "loss": 0.6652, + "step": 2837 + }, + { + "epoch": 0.18, + "grad_norm": 0.8796103000640869, + "learning_rate": 9.423165370446249e-06, + "loss": 0.5867, + "step": 2838 + }, + { + "epoch": 0.18, + "grad_norm": 0.9445327520370483, + "learning_rate": 9.422686868977884e-06, + "loss": 0.5812, + "step": 2839 + }, + { + "epoch": 0.18, + "grad_norm": 1.0006681680679321, + "learning_rate": 9.42220818128556e-06, + "loss": 0.6484, + "step": 2840 + }, + { + "epoch": 0.18, + "grad_norm": 0.9889962077140808, + "learning_rate": 9.421729307389435e-06, + "loss": 0.6266, + "step": 2841 + }, + { + "epoch": 0.18, + "grad_norm": 0.8913476467132568, + "learning_rate": 9.42125024730967e-06, + "loss": 0.6197, + "step": 2842 + }, + { + "epoch": 0.18, + "grad_norm": 1.0092391967773438, + "learning_rate": 9.420771001066439e-06, + "loss": 0.6748, + "step": 2843 + }, + { + "epoch": 0.18, + "grad_norm": 0.9135981202125549, + "learning_rate": 9.420291568679917e-06, + "loss": 0.6796, + "step": 2844 + }, + { + "epoch": 0.18, + "grad_norm": 0.9135114550590515, + "learning_rate": 9.419811950170294e-06, + "loss": 0.6444, + "step": 2845 + }, + { + "epoch": 0.18, + "grad_norm": 0.9234583377838135, + "learning_rate": 9.419332145557768e-06, + "loss": 0.652, + "step": 2846 + }, + { + "epoch": 0.18, + "grad_norm": 1.013744831085205, + "learning_rate": 9.418852154862538e-06, + "loss": 0.6552, + "step": 2847 + }, + { + "epoch": 0.18, + "grad_norm": 0.8808279633522034, + "learning_rate": 9.418371978104816e-06, + "loss": 0.6126, + "step": 2848 + }, + { + "epoch": 0.18, + "grad_norm": 0.9165722131729126, + "learning_rate": 9.41789161530482e-06, + "loss": 0.6558, + "step": 2849 + }, + { + "epoch": 0.18, + "grad_norm": 0.9362298250198364, + "learning_rate": 9.417411066482777e-06, + "loss": 0.6204, + "step": 2850 + }, + { + "epoch": 0.18, + "grad_norm": 0.9138143658638, + "learning_rate": 9.41693033165892e-06, + "loss": 0.6359, + "step": 2851 + }, + { + "epoch": 0.18, + "grad_norm": 0.8916357755661011, + "learning_rate": 9.416449410853495e-06, + "loss": 0.6234, + "step": 2852 + }, + { + "epoch": 0.18, + "grad_norm": 1.0022516250610352, + "learning_rate": 9.415968304086746e-06, + "loss": 0.6353, + "step": 2853 + }, + { + "epoch": 0.18, + "grad_norm": 0.8648804426193237, + "learning_rate": 9.415487011378935e-06, + "loss": 0.6154, + "step": 2854 + }, + { + "epoch": 0.18, + "grad_norm": 0.9364731311798096, + "learning_rate": 9.415005532750326e-06, + "loss": 0.5895, + "step": 2855 + }, + { + "epoch": 0.18, + "grad_norm": 0.961506187915802, + "learning_rate": 9.414523868221192e-06, + "loss": 0.6945, + "step": 2856 + }, + { + "epoch": 0.18, + "grad_norm": 0.8515611886978149, + "learning_rate": 9.414042017811817e-06, + "loss": 0.6253, + "step": 2857 + }, + { + "epoch": 0.18, + "grad_norm": 0.8460178375244141, + "learning_rate": 9.413559981542486e-06, + "loss": 0.6468, + "step": 2858 + }, + { + "epoch": 0.18, + "grad_norm": 0.9305799603462219, + "learning_rate": 9.413077759433498e-06, + "loss": 0.644, + "step": 2859 + }, + { + "epoch": 0.18, + "grad_norm": 0.9662857055664062, + "learning_rate": 9.412595351505158e-06, + "loss": 0.6078, + "step": 2860 + }, + { + "epoch": 0.18, + "grad_norm": 0.8659998178482056, + "learning_rate": 9.412112757777777e-06, + "loss": 0.6536, + "step": 2861 + }, + { + "epoch": 0.18, + "grad_norm": 0.9392401576042175, + "learning_rate": 9.411629978271679e-06, + "loss": 0.6528, + "step": 2862 + }, + { + "epoch": 0.18, + "grad_norm": 0.9797030687332153, + "learning_rate": 9.411147013007188e-06, + "loss": 0.6421, + "step": 2863 + }, + { + "epoch": 0.18, + "grad_norm": 0.9174354672431946, + "learning_rate": 9.41066386200464e-06, + "loss": 0.6395, + "step": 2864 + }, + { + "epoch": 0.18, + "grad_norm": 0.8441389203071594, + "learning_rate": 9.410180525284384e-06, + "loss": 0.5613, + "step": 2865 + }, + { + "epoch": 0.18, + "grad_norm": 0.8536418080329895, + "learning_rate": 9.409697002866765e-06, + "loss": 0.5965, + "step": 2866 + }, + { + "epoch": 0.18, + "grad_norm": 0.9425634145736694, + "learning_rate": 9.409213294772147e-06, + "loss": 0.6575, + "step": 2867 + }, + { + "epoch": 0.18, + "grad_norm": 0.9369651079177856, + "learning_rate": 9.408729401020896e-06, + "loss": 0.6457, + "step": 2868 + }, + { + "epoch": 0.18, + "grad_norm": 0.9151921272277832, + "learning_rate": 9.408245321633385e-06, + "loss": 0.6186, + "step": 2869 + }, + { + "epoch": 0.18, + "grad_norm": 0.8802269697189331, + "learning_rate": 9.407761056629999e-06, + "loss": 0.5875, + "step": 2870 + }, + { + "epoch": 0.18, + "grad_norm": 0.9625882506370544, + "learning_rate": 9.40727660603113e-06, + "loss": 0.6164, + "step": 2871 + }, + { + "epoch": 0.18, + "grad_norm": 1.045422911643982, + "learning_rate": 9.406791969857173e-06, + "loss": 0.6814, + "step": 2872 + }, + { + "epoch": 0.18, + "grad_norm": 0.9631166458129883, + "learning_rate": 9.406307148128537e-06, + "loss": 0.6423, + "step": 2873 + }, + { + "epoch": 0.18, + "grad_norm": 1.0093629360198975, + "learning_rate": 9.405822140865636e-06, + "loss": 0.6567, + "step": 2874 + }, + { + "epoch": 0.18, + "grad_norm": 0.8591984510421753, + "learning_rate": 9.40533694808889e-06, + "loss": 0.5637, + "step": 2875 + }, + { + "epoch": 0.18, + "grad_norm": 0.8643238544464111, + "learning_rate": 9.404851569818731e-06, + "loss": 0.6406, + "step": 2876 + }, + { + "epoch": 0.18, + "grad_norm": 1.0501185655593872, + "learning_rate": 9.404366006075596e-06, + "loss": 0.7109, + "step": 2877 + }, + { + "epoch": 0.18, + "grad_norm": 0.9728371500968933, + "learning_rate": 9.403880256879931e-06, + "loss": 0.6682, + "step": 2878 + }, + { + "epoch": 0.18, + "grad_norm": 0.9601030349731445, + "learning_rate": 9.403394322252186e-06, + "loss": 0.6103, + "step": 2879 + }, + { + "epoch": 0.18, + "grad_norm": 0.8728528618812561, + "learning_rate": 9.402908202212826e-06, + "loss": 0.5668, + "step": 2880 + }, + { + "epoch": 0.18, + "grad_norm": 0.9390819072723389, + "learning_rate": 9.402421896782319e-06, + "loss": 0.6126, + "step": 2881 + }, + { + "epoch": 0.18, + "grad_norm": 0.8935672044754028, + "learning_rate": 9.401935405981138e-06, + "loss": 0.6379, + "step": 2882 + }, + { + "epoch": 0.18, + "grad_norm": 0.8943515419960022, + "learning_rate": 9.401448729829773e-06, + "loss": 0.6343, + "step": 2883 + }, + { + "epoch": 0.18, + "grad_norm": 0.8738617300987244, + "learning_rate": 9.400961868348713e-06, + "loss": 0.6473, + "step": 2884 + }, + { + "epoch": 0.18, + "grad_norm": 0.8747914433479309, + "learning_rate": 9.400474821558457e-06, + "loss": 0.5848, + "step": 2885 + }, + { + "epoch": 0.18, + "grad_norm": 0.8880560994148254, + "learning_rate": 9.399987589479516e-06, + "loss": 0.6521, + "step": 2886 + }, + { + "epoch": 0.18, + "grad_norm": 0.9082310199737549, + "learning_rate": 9.399500172132403e-06, + "loss": 0.6059, + "step": 2887 + }, + { + "epoch": 0.18, + "grad_norm": 0.945617139339447, + "learning_rate": 9.399012569537643e-06, + "loss": 0.6031, + "step": 2888 + }, + { + "epoch": 0.18, + "grad_norm": 0.9733775854110718, + "learning_rate": 9.398524781715767e-06, + "loss": 0.6393, + "step": 2889 + }, + { + "epoch": 0.18, + "grad_norm": 0.9139953851699829, + "learning_rate": 9.398036808687314e-06, + "loss": 0.5984, + "step": 2890 + }, + { + "epoch": 0.18, + "grad_norm": 0.896920919418335, + "learning_rate": 9.39754865047283e-06, + "loss": 0.6455, + "step": 2891 + }, + { + "epoch": 0.18, + "grad_norm": 0.8665996193885803, + "learning_rate": 9.39706030709287e-06, + "loss": 0.5565, + "step": 2892 + }, + { + "epoch": 0.18, + "grad_norm": 0.9414594173431396, + "learning_rate": 9.396571778567997e-06, + "loss": 0.6157, + "step": 2893 + }, + { + "epoch": 0.18, + "grad_norm": 0.9462769031524658, + "learning_rate": 9.396083064918782e-06, + "loss": 0.6198, + "step": 2894 + }, + { + "epoch": 0.18, + "grad_norm": 0.9393496513366699, + "learning_rate": 9.3955941661658e-06, + "loss": 0.6631, + "step": 2895 + }, + { + "epoch": 0.18, + "grad_norm": 0.8664448857307434, + "learning_rate": 9.39510508232964e-06, + "loss": 0.5953, + "step": 2896 + }, + { + "epoch": 0.18, + "grad_norm": 0.8992229700088501, + "learning_rate": 9.394615813430895e-06, + "loss": 0.604, + "step": 2897 + }, + { + "epoch": 0.18, + "grad_norm": 0.9086971282958984, + "learning_rate": 9.394126359490166e-06, + "loss": 0.6294, + "step": 2898 + }, + { + "epoch": 0.18, + "grad_norm": 0.9720740914344788, + "learning_rate": 9.393636720528061e-06, + "loss": 0.6259, + "step": 2899 + }, + { + "epoch": 0.18, + "grad_norm": 0.9365057349205017, + "learning_rate": 9.393146896565197e-06, + "loss": 0.6621, + "step": 2900 + }, + { + "epoch": 0.18, + "grad_norm": 1.003066897392273, + "learning_rate": 9.392656887622202e-06, + "loss": 0.639, + "step": 2901 + }, + { + "epoch": 0.18, + "grad_norm": 0.9764630794525146, + "learning_rate": 9.392166693719706e-06, + "loss": 0.6113, + "step": 2902 + }, + { + "epoch": 0.18, + "grad_norm": 0.901775598526001, + "learning_rate": 9.391676314878348e-06, + "loss": 0.6274, + "step": 2903 + }, + { + "epoch": 0.18, + "grad_norm": 0.8916495442390442, + "learning_rate": 9.391185751118782e-06, + "loss": 0.6762, + "step": 2904 + }, + { + "epoch": 0.18, + "grad_norm": 0.9183511137962341, + "learning_rate": 9.390695002461657e-06, + "loss": 0.6358, + "step": 2905 + }, + { + "epoch": 0.18, + "grad_norm": 0.8651520609855652, + "learning_rate": 9.390204068927638e-06, + "loss": 0.629, + "step": 2906 + }, + { + "epoch": 0.18, + "grad_norm": 0.9377386569976807, + "learning_rate": 9.389712950537399e-06, + "loss": 0.638, + "step": 2907 + }, + { + "epoch": 0.18, + "grad_norm": 0.9569635987281799, + "learning_rate": 9.38922164731162e-06, + "loss": 0.6532, + "step": 2908 + }, + { + "epoch": 0.18, + "grad_norm": 0.8829088807106018, + "learning_rate": 9.388730159270984e-06, + "loss": 0.524, + "step": 2909 + }, + { + "epoch": 0.18, + "grad_norm": 0.8493028879165649, + "learning_rate": 9.38823848643619e-06, + "loss": 0.6161, + "step": 2910 + }, + { + "epoch": 0.18, + "grad_norm": 0.9601280689239502, + "learning_rate": 9.38774662882794e-06, + "loss": 0.7469, + "step": 2911 + }, + { + "epoch": 0.18, + "grad_norm": 0.9152663946151733, + "learning_rate": 9.387254586466942e-06, + "loss": 0.6003, + "step": 2912 + }, + { + "epoch": 0.18, + "grad_norm": 0.8507892489433289, + "learning_rate": 9.386762359373915e-06, + "loss": 0.6036, + "step": 2913 + }, + { + "epoch": 0.18, + "grad_norm": 0.8200054168701172, + "learning_rate": 9.386269947569585e-06, + "loss": 0.5787, + "step": 2914 + }, + { + "epoch": 0.18, + "grad_norm": 0.9165661334991455, + "learning_rate": 9.385777351074688e-06, + "loss": 0.6487, + "step": 2915 + }, + { + "epoch": 0.18, + "grad_norm": 0.9168198108673096, + "learning_rate": 9.385284569909963e-06, + "loss": 0.628, + "step": 2916 + }, + { + "epoch": 0.18, + "grad_norm": 0.8360828161239624, + "learning_rate": 9.38479160409616e-06, + "loss": 0.6177, + "step": 2917 + }, + { + "epoch": 0.18, + "grad_norm": 0.9536049962043762, + "learning_rate": 9.384298453654037e-06, + "loss": 0.6575, + "step": 2918 + }, + { + "epoch": 0.18, + "grad_norm": 0.8601536154747009, + "learning_rate": 9.383805118604357e-06, + "loss": 0.6362, + "step": 2919 + }, + { + "epoch": 0.18, + "grad_norm": 0.90377277135849, + "learning_rate": 9.383311598967892e-06, + "loss": 0.6232, + "step": 2920 + }, + { + "epoch": 0.19, + "grad_norm": 0.9626878499984741, + "learning_rate": 9.382817894765426e-06, + "loss": 0.7203, + "step": 2921 + }, + { + "epoch": 0.19, + "grad_norm": 0.9490284323692322, + "learning_rate": 9.382324006017746e-06, + "loss": 0.6665, + "step": 2922 + }, + { + "epoch": 0.19, + "grad_norm": 0.9522665739059448, + "learning_rate": 9.381829932745646e-06, + "loss": 0.5931, + "step": 2923 + }, + { + "epoch": 0.19, + "grad_norm": 0.9432849884033203, + "learning_rate": 9.38133567496993e-06, + "loss": 0.6349, + "step": 2924 + }, + { + "epoch": 0.19, + "grad_norm": 0.9463351368904114, + "learning_rate": 9.380841232711412e-06, + "loss": 0.6388, + "step": 2925 + }, + { + "epoch": 0.19, + "grad_norm": 0.9243980646133423, + "learning_rate": 9.38034660599091e-06, + "loss": 0.5969, + "step": 2926 + }, + { + "epoch": 0.19, + "grad_norm": 0.8849940299987793, + "learning_rate": 9.379851794829247e-06, + "loss": 0.59, + "step": 2927 + }, + { + "epoch": 0.19, + "grad_norm": 0.8930582404136658, + "learning_rate": 9.379356799247263e-06, + "loss": 0.6975, + "step": 2928 + }, + { + "epoch": 0.19, + "grad_norm": 0.8990230560302734, + "learning_rate": 9.3788616192658e-06, + "loss": 0.6572, + "step": 2929 + }, + { + "epoch": 0.19, + "grad_norm": 0.9136034250259399, + "learning_rate": 9.378366254905706e-06, + "loss": 0.6293, + "step": 2930 + }, + { + "epoch": 0.19, + "grad_norm": 0.8949812054634094, + "learning_rate": 9.377870706187841e-06, + "loss": 0.5834, + "step": 2931 + }, + { + "epoch": 0.19, + "grad_norm": 0.9137758016586304, + "learning_rate": 9.37737497313307e-06, + "loss": 0.5751, + "step": 2932 + }, + { + "epoch": 0.19, + "grad_norm": 0.8495166301727295, + "learning_rate": 9.376879055762267e-06, + "loss": 0.5503, + "step": 2933 + }, + { + "epoch": 0.19, + "grad_norm": 0.900567889213562, + "learning_rate": 9.376382954096312e-06, + "loss": 0.6444, + "step": 2934 + }, + { + "epoch": 0.19, + "grad_norm": 0.8367151021957397, + "learning_rate": 9.375886668156095e-06, + "loss": 0.6396, + "step": 2935 + }, + { + "epoch": 0.19, + "grad_norm": 0.9150660634040833, + "learning_rate": 9.375390197962514e-06, + "loss": 0.5965, + "step": 2936 + }, + { + "epoch": 0.19, + "grad_norm": 0.9402359127998352, + "learning_rate": 9.374893543536471e-06, + "loss": 0.5989, + "step": 2937 + }, + { + "epoch": 0.19, + "grad_norm": 0.9302038550376892, + "learning_rate": 9.374396704898883e-06, + "loss": 0.6621, + "step": 2938 + }, + { + "epoch": 0.19, + "grad_norm": 0.9301861524581909, + "learning_rate": 9.373899682070664e-06, + "loss": 0.6565, + "step": 2939 + }, + { + "epoch": 0.19, + "grad_norm": 0.9140589237213135, + "learning_rate": 9.373402475072746e-06, + "loss": 0.6323, + "step": 2940 + }, + { + "epoch": 0.19, + "grad_norm": 0.8676826357841492, + "learning_rate": 9.372905083926064e-06, + "loss": 0.6269, + "step": 2941 + }, + { + "epoch": 0.19, + "grad_norm": 0.8796793818473816, + "learning_rate": 9.372407508651561e-06, + "loss": 0.6185, + "step": 2942 + }, + { + "epoch": 0.19, + "grad_norm": 0.8520810604095459, + "learning_rate": 9.371909749270189e-06, + "loss": 0.5918, + "step": 2943 + }, + { + "epoch": 0.19, + "grad_norm": 0.8967190384864807, + "learning_rate": 9.371411805802907e-06, + "loss": 0.6327, + "step": 2944 + }, + { + "epoch": 0.19, + "grad_norm": 0.9223288893699646, + "learning_rate": 9.370913678270678e-06, + "loss": 0.5942, + "step": 2945 + }, + { + "epoch": 0.19, + "grad_norm": 0.8543033599853516, + "learning_rate": 9.370415366694483e-06, + "loss": 0.6218, + "step": 2946 + }, + { + "epoch": 0.19, + "grad_norm": 0.9239391684532166, + "learning_rate": 9.369916871095299e-06, + "loss": 0.6351, + "step": 2947 + }, + { + "epoch": 0.19, + "grad_norm": 0.8336774110794067, + "learning_rate": 9.369418191494117e-06, + "loss": 0.6284, + "step": 2948 + }, + { + "epoch": 0.19, + "grad_norm": 0.9170488119125366, + "learning_rate": 9.368919327911934e-06, + "loss": 0.6324, + "step": 2949 + }, + { + "epoch": 0.19, + "grad_norm": 0.9082837104797363, + "learning_rate": 9.368420280369759e-06, + "loss": 0.6565, + "step": 2950 + }, + { + "epoch": 0.19, + "grad_norm": 0.8999912738800049, + "learning_rate": 9.3679210488886e-06, + "loss": 0.6246, + "step": 2951 + }, + { + "epoch": 0.19, + "grad_norm": 0.8772600293159485, + "learning_rate": 9.367421633489482e-06, + "loss": 0.6999, + "step": 2952 + }, + { + "epoch": 0.19, + "grad_norm": 0.8400874733924866, + "learning_rate": 9.366922034193431e-06, + "loss": 0.6037, + "step": 2953 + }, + { + "epoch": 0.19, + "grad_norm": 0.9329245090484619, + "learning_rate": 9.366422251021486e-06, + "loss": 0.6171, + "step": 2954 + }, + { + "epoch": 0.19, + "grad_norm": 0.8737487196922302, + "learning_rate": 9.365922283994689e-06, + "loss": 0.6164, + "step": 2955 + }, + { + "epoch": 0.19, + "grad_norm": 0.9520554542541504, + "learning_rate": 9.365422133134093e-06, + "loss": 0.6257, + "step": 2956 + }, + { + "epoch": 0.19, + "grad_norm": 0.8844176530838013, + "learning_rate": 9.364921798460756e-06, + "loss": 0.6177, + "step": 2957 + }, + { + "epoch": 0.19, + "grad_norm": 0.9074828028678894, + "learning_rate": 9.364421279995747e-06, + "loss": 0.6601, + "step": 2958 + }, + { + "epoch": 0.19, + "grad_norm": 0.8782038688659668, + "learning_rate": 9.36392057776014e-06, + "loss": 0.6358, + "step": 2959 + }, + { + "epoch": 0.19, + "grad_norm": 0.9199265241622925, + "learning_rate": 9.36341969177502e-06, + "loss": 0.5666, + "step": 2960 + }, + { + "epoch": 0.19, + "grad_norm": 0.887235164642334, + "learning_rate": 9.362918622061475e-06, + "loss": 0.6011, + "step": 2961 + }, + { + "epoch": 0.19, + "grad_norm": 0.882902204990387, + "learning_rate": 9.362417368640604e-06, + "loss": 0.5865, + "step": 2962 + }, + { + "epoch": 0.19, + "grad_norm": 0.8558497428894043, + "learning_rate": 9.361915931533513e-06, + "loss": 0.6236, + "step": 2963 + }, + { + "epoch": 0.19, + "grad_norm": 0.879278838634491, + "learning_rate": 9.36141431076132e-06, + "loss": 0.6493, + "step": 2964 + }, + { + "epoch": 0.19, + "grad_norm": 0.929413378238678, + "learning_rate": 9.360912506345139e-06, + "loss": 0.6075, + "step": 2965 + }, + { + "epoch": 0.19, + "grad_norm": 0.8678581118583679, + "learning_rate": 9.360410518306103e-06, + "loss": 0.5635, + "step": 2966 + }, + { + "epoch": 0.19, + "grad_norm": 0.8952652812004089, + "learning_rate": 9.359908346665349e-06, + "loss": 0.5775, + "step": 2967 + }, + { + "epoch": 0.19, + "grad_norm": 0.9281927943229675, + "learning_rate": 9.359405991444022e-06, + "loss": 0.6704, + "step": 2968 + }, + { + "epoch": 0.19, + "grad_norm": 0.958543598651886, + "learning_rate": 9.358903452663273e-06, + "loss": 0.6666, + "step": 2969 + }, + { + "epoch": 0.19, + "grad_norm": 0.8834668397903442, + "learning_rate": 9.358400730344265e-06, + "loss": 0.6254, + "step": 2970 + }, + { + "epoch": 0.19, + "grad_norm": 0.8528700470924377, + "learning_rate": 9.357897824508163e-06, + "loss": 0.5954, + "step": 2971 + }, + { + "epoch": 0.19, + "grad_norm": 0.9526364207267761, + "learning_rate": 9.357394735176144e-06, + "loss": 0.6404, + "step": 2972 + }, + { + "epoch": 0.19, + "grad_norm": 0.8991536498069763, + "learning_rate": 9.356891462369391e-06, + "loss": 0.6091, + "step": 2973 + }, + { + "epoch": 0.19, + "grad_norm": 0.916277289390564, + "learning_rate": 9.356388006109094e-06, + "loss": 0.6429, + "step": 2974 + }, + { + "epoch": 0.19, + "grad_norm": 0.8340256214141846, + "learning_rate": 9.355884366416454e-06, + "loss": 0.6042, + "step": 2975 + }, + { + "epoch": 0.19, + "grad_norm": 0.9151265621185303, + "learning_rate": 9.355380543312676e-06, + "loss": 0.6805, + "step": 2976 + }, + { + "epoch": 0.19, + "grad_norm": 0.9069379568099976, + "learning_rate": 9.354876536818974e-06, + "loss": 0.6437, + "step": 2977 + }, + { + "epoch": 0.19, + "grad_norm": 0.8915377259254456, + "learning_rate": 9.35437234695657e-06, + "loss": 0.6179, + "step": 2978 + }, + { + "epoch": 0.19, + "grad_norm": 0.9558995366096497, + "learning_rate": 9.353867973746696e-06, + "loss": 0.6258, + "step": 2979 + }, + { + "epoch": 0.19, + "grad_norm": 0.8677237629890442, + "learning_rate": 9.353363417210587e-06, + "loss": 0.5676, + "step": 2980 + }, + { + "epoch": 0.19, + "grad_norm": 0.9419227838516235, + "learning_rate": 9.352858677369488e-06, + "loss": 0.655, + "step": 2981 + }, + { + "epoch": 0.19, + "grad_norm": 0.8914104700088501, + "learning_rate": 9.352353754244654e-06, + "loss": 0.5995, + "step": 2982 + }, + { + "epoch": 0.19, + "grad_norm": 0.8787503838539124, + "learning_rate": 9.351848647857343e-06, + "loss": 0.6032, + "step": 2983 + }, + { + "epoch": 0.19, + "grad_norm": 0.856158971786499, + "learning_rate": 9.351343358228825e-06, + "loss": 0.5997, + "step": 2984 + }, + { + "epoch": 0.19, + "grad_norm": 0.8458609580993652, + "learning_rate": 9.350837885380375e-06, + "loss": 0.6437, + "step": 2985 + }, + { + "epoch": 0.19, + "grad_norm": 0.9018574953079224, + "learning_rate": 9.350332229333276e-06, + "loss": 0.607, + "step": 2986 + }, + { + "epoch": 0.19, + "grad_norm": 0.8946293592453003, + "learning_rate": 9.349826390108823e-06, + "loss": 0.6058, + "step": 2987 + }, + { + "epoch": 0.19, + "grad_norm": 0.8331573605537415, + "learning_rate": 9.349320367728312e-06, + "loss": 0.5638, + "step": 2988 + }, + { + "epoch": 0.19, + "grad_norm": 0.8785397410392761, + "learning_rate": 9.34881416221305e-06, + "loss": 0.6192, + "step": 2989 + }, + { + "epoch": 0.19, + "grad_norm": 0.9098055958747864, + "learning_rate": 9.348307773584351e-06, + "loss": 0.6112, + "step": 2990 + }, + { + "epoch": 0.19, + "grad_norm": 0.9168455600738525, + "learning_rate": 9.34780120186354e-06, + "loss": 0.6383, + "step": 2991 + }, + { + "epoch": 0.19, + "grad_norm": 0.9003625512123108, + "learning_rate": 9.347294447071945e-06, + "loss": 0.6049, + "step": 2992 + }, + { + "epoch": 0.19, + "grad_norm": 0.9203317165374756, + "learning_rate": 9.346787509230903e-06, + "loss": 0.6291, + "step": 2993 + }, + { + "epoch": 0.19, + "grad_norm": 0.8706764578819275, + "learning_rate": 9.346280388361761e-06, + "loss": 0.6065, + "step": 2994 + }, + { + "epoch": 0.19, + "grad_norm": 0.8694605827331543, + "learning_rate": 9.34577308448587e-06, + "loss": 0.6128, + "step": 2995 + }, + { + "epoch": 0.19, + "grad_norm": 0.8985933065414429, + "learning_rate": 9.345265597624595e-06, + "loss": 0.6279, + "step": 2996 + }, + { + "epoch": 0.19, + "grad_norm": 0.9904145002365112, + "learning_rate": 9.344757927799299e-06, + "loss": 0.6427, + "step": 2997 + }, + { + "epoch": 0.19, + "grad_norm": 0.943004846572876, + "learning_rate": 9.344250075031362e-06, + "loss": 0.6155, + "step": 2998 + }, + { + "epoch": 0.19, + "grad_norm": 0.8913044929504395, + "learning_rate": 9.343742039342168e-06, + "loss": 0.5856, + "step": 2999 + }, + { + "epoch": 0.19, + "grad_norm": 0.8846127986907959, + "learning_rate": 9.343233820753107e-06, + "loss": 0.6393, + "step": 3000 + }, + { + "epoch": 0.19, + "grad_norm": 0.8753595948219299, + "learning_rate": 9.34272541928558e-06, + "loss": 0.6275, + "step": 3001 + }, + { + "epoch": 0.19, + "grad_norm": 0.9006514549255371, + "learning_rate": 9.34221683496099e-06, + "loss": 0.5802, + "step": 3002 + }, + { + "epoch": 0.19, + "grad_norm": 0.9199402332305908, + "learning_rate": 9.341708067800757e-06, + "loss": 0.5987, + "step": 3003 + }, + { + "epoch": 0.19, + "grad_norm": 0.9146287441253662, + "learning_rate": 9.341199117826298e-06, + "loss": 0.6328, + "step": 3004 + }, + { + "epoch": 0.19, + "grad_norm": 0.8784115314483643, + "learning_rate": 9.340689985059048e-06, + "loss": 0.6419, + "step": 3005 + }, + { + "epoch": 0.19, + "grad_norm": 0.8956212997436523, + "learning_rate": 9.340180669520443e-06, + "loss": 0.6327, + "step": 3006 + }, + { + "epoch": 0.19, + "grad_norm": 0.9466882944107056, + "learning_rate": 9.339671171231929e-06, + "loss": 0.6233, + "step": 3007 + }, + { + "epoch": 0.19, + "grad_norm": 0.8755168318748474, + "learning_rate": 9.339161490214957e-06, + "loss": 0.6351, + "step": 3008 + }, + { + "epoch": 0.19, + "grad_norm": 0.8940410614013672, + "learning_rate": 9.33865162649099e-06, + "loss": 0.6076, + "step": 3009 + }, + { + "epoch": 0.19, + "grad_norm": 0.833020031452179, + "learning_rate": 9.338141580081496e-06, + "loss": 0.5649, + "step": 3010 + }, + { + "epoch": 0.19, + "grad_norm": 0.950567364692688, + "learning_rate": 9.337631351007953e-06, + "loss": 0.5768, + "step": 3011 + }, + { + "epoch": 0.19, + "grad_norm": 0.9161326289176941, + "learning_rate": 9.337120939291842e-06, + "loss": 0.6158, + "step": 3012 + }, + { + "epoch": 0.19, + "grad_norm": 0.9443663954734802, + "learning_rate": 9.336610344954656e-06, + "loss": 0.5929, + "step": 3013 + }, + { + "epoch": 0.19, + "grad_norm": 0.9030787348747253, + "learning_rate": 9.336099568017895e-06, + "loss": 0.583, + "step": 3014 + }, + { + "epoch": 0.19, + "grad_norm": 0.984470546245575, + "learning_rate": 9.335588608503065e-06, + "loss": 0.6265, + "step": 3015 + }, + { + "epoch": 0.19, + "grad_norm": 0.9294076561927795, + "learning_rate": 9.33507746643168e-06, + "loss": 0.6035, + "step": 3016 + }, + { + "epoch": 0.19, + "grad_norm": 0.8897981643676758, + "learning_rate": 9.334566141825266e-06, + "loss": 0.649, + "step": 3017 + }, + { + "epoch": 0.19, + "grad_norm": 0.8603422045707703, + "learning_rate": 9.334054634705347e-06, + "loss": 0.5839, + "step": 3018 + }, + { + "epoch": 0.19, + "grad_norm": 0.9341859817504883, + "learning_rate": 9.333542945093468e-06, + "loss": 0.6234, + "step": 3019 + }, + { + "epoch": 0.19, + "grad_norm": 0.885899007320404, + "learning_rate": 9.333031073011169e-06, + "loss": 0.6592, + "step": 3020 + }, + { + "epoch": 0.19, + "grad_norm": 0.9095667004585266, + "learning_rate": 9.332519018480005e-06, + "loss": 0.6809, + "step": 3021 + }, + { + "epoch": 0.19, + "grad_norm": 0.8997942805290222, + "learning_rate": 9.332006781521537e-06, + "loss": 0.5953, + "step": 3022 + }, + { + "epoch": 0.19, + "grad_norm": 0.8883410692214966, + "learning_rate": 9.331494362157335e-06, + "loss": 0.5812, + "step": 3023 + }, + { + "epoch": 0.19, + "grad_norm": 0.9656973481178284, + "learning_rate": 9.330981760408972e-06, + "loss": 0.6112, + "step": 3024 + }, + { + "epoch": 0.19, + "grad_norm": 0.862815260887146, + "learning_rate": 9.330468976298033e-06, + "loss": 0.6099, + "step": 3025 + }, + { + "epoch": 0.19, + "grad_norm": 0.9055874347686768, + "learning_rate": 9.329956009846111e-06, + "loss": 0.6427, + "step": 3026 + }, + { + "epoch": 0.19, + "grad_norm": 0.9218257665634155, + "learning_rate": 9.329442861074803e-06, + "loss": 0.6451, + "step": 3027 + }, + { + "epoch": 0.19, + "grad_norm": 0.8925780057907104, + "learning_rate": 9.328929530005717e-06, + "loss": 0.6358, + "step": 3028 + }, + { + "epoch": 0.19, + "grad_norm": 0.9461687803268433, + "learning_rate": 9.328416016660471e-06, + "loss": 0.6319, + "step": 3029 + }, + { + "epoch": 0.19, + "grad_norm": 0.8816470503807068, + "learning_rate": 9.327902321060681e-06, + "loss": 0.5662, + "step": 3030 + }, + { + "epoch": 0.19, + "grad_norm": 0.8994545936584473, + "learning_rate": 9.327388443227981e-06, + "loss": 0.6594, + "step": 3031 + }, + { + "epoch": 0.19, + "grad_norm": 0.9947099685668945, + "learning_rate": 9.326874383184006e-06, + "loss": 0.6412, + "step": 3032 + }, + { + "epoch": 0.19, + "grad_norm": 0.9207108020782471, + "learning_rate": 9.326360140950406e-06, + "loss": 0.6029, + "step": 3033 + }, + { + "epoch": 0.19, + "grad_norm": 0.9070324897766113, + "learning_rate": 9.325845716548827e-06, + "loss": 0.648, + "step": 3034 + }, + { + "epoch": 0.19, + "grad_norm": 0.959884762763977, + "learning_rate": 9.325331110000937e-06, + "loss": 0.6598, + "step": 3035 + }, + { + "epoch": 0.19, + "grad_norm": 0.9339284896850586, + "learning_rate": 9.324816321328398e-06, + "loss": 0.6646, + "step": 3036 + }, + { + "epoch": 0.19, + "grad_norm": 0.852982759475708, + "learning_rate": 9.324301350552889e-06, + "loss": 0.6305, + "step": 3037 + }, + { + "epoch": 0.19, + "grad_norm": 0.9116036891937256, + "learning_rate": 9.323786197696094e-06, + "loss": 0.6283, + "step": 3038 + }, + { + "epoch": 0.19, + "grad_norm": 0.9738210439682007, + "learning_rate": 9.323270862779704e-06, + "loss": 0.6546, + "step": 3039 + }, + { + "epoch": 0.19, + "grad_norm": 0.8786873817443848, + "learning_rate": 9.322755345825418e-06, + "loss": 0.5488, + "step": 3040 + }, + { + "epoch": 0.19, + "grad_norm": 0.891937255859375, + "learning_rate": 9.32223964685494e-06, + "loss": 0.6073, + "step": 3041 + }, + { + "epoch": 0.19, + "grad_norm": 0.9519621133804321, + "learning_rate": 9.321723765889987e-06, + "loss": 0.6103, + "step": 3042 + }, + { + "epoch": 0.19, + "grad_norm": 0.9369633197784424, + "learning_rate": 9.321207702952281e-06, + "loss": 0.6551, + "step": 3043 + }, + { + "epoch": 0.19, + "grad_norm": 1.0724352598190308, + "learning_rate": 9.320691458063552e-06, + "loss": 0.644, + "step": 3044 + }, + { + "epoch": 0.19, + "grad_norm": 0.8826418519020081, + "learning_rate": 9.320175031245535e-06, + "loss": 0.6231, + "step": 3045 + }, + { + "epoch": 0.19, + "grad_norm": 0.9791775345802307, + "learning_rate": 9.319658422519977e-06, + "loss": 0.6646, + "step": 3046 + }, + { + "epoch": 0.19, + "grad_norm": 1.0244020223617554, + "learning_rate": 9.319141631908628e-06, + "loss": 0.6662, + "step": 3047 + }, + { + "epoch": 0.19, + "grad_norm": 0.8816352486610413, + "learning_rate": 9.318624659433254e-06, + "loss": 0.6291, + "step": 3048 + }, + { + "epoch": 0.19, + "grad_norm": 0.9350719451904297, + "learning_rate": 9.318107505115615e-06, + "loss": 0.6312, + "step": 3049 + }, + { + "epoch": 0.19, + "grad_norm": 0.8697081208229065, + "learning_rate": 9.317590168977492e-06, + "loss": 0.6504, + "step": 3050 + }, + { + "epoch": 0.19, + "grad_norm": 0.9508548378944397, + "learning_rate": 9.317072651040666e-06, + "loss": 0.6127, + "step": 3051 + }, + { + "epoch": 0.19, + "grad_norm": 0.9957895278930664, + "learning_rate": 9.31655495132693e-06, + "loss": 0.6096, + "step": 3052 + }, + { + "epoch": 0.19, + "grad_norm": 0.906047523021698, + "learning_rate": 9.31603706985808e-06, + "loss": 0.6054, + "step": 3053 + }, + { + "epoch": 0.19, + "grad_norm": 0.9574893116950989, + "learning_rate": 9.315519006655925e-06, + "loss": 0.6771, + "step": 3054 + }, + { + "epoch": 0.19, + "grad_norm": 0.8845919966697693, + "learning_rate": 9.315000761742276e-06, + "loss": 0.6098, + "step": 3055 + }, + { + "epoch": 0.19, + "grad_norm": 0.9109580516815186, + "learning_rate": 9.314482335138954e-06, + "loss": 0.6058, + "step": 3056 + }, + { + "epoch": 0.19, + "grad_norm": 0.8939434289932251, + "learning_rate": 9.313963726867793e-06, + "loss": 0.6442, + "step": 3057 + }, + { + "epoch": 0.19, + "grad_norm": 0.9312341213226318, + "learning_rate": 9.313444936950626e-06, + "loss": 0.5768, + "step": 3058 + }, + { + "epoch": 0.19, + "grad_norm": 0.9663771986961365, + "learning_rate": 9.312925965409297e-06, + "loss": 0.6526, + "step": 3059 + }, + { + "epoch": 0.19, + "grad_norm": 1.0164662599563599, + "learning_rate": 9.312406812265659e-06, + "loss": 0.6253, + "step": 3060 + }, + { + "epoch": 0.19, + "grad_norm": 0.8911099433898926, + "learning_rate": 9.311887477541574e-06, + "loss": 0.6088, + "step": 3061 + }, + { + "epoch": 0.19, + "grad_norm": 0.8796306848526001, + "learning_rate": 9.311367961258906e-06, + "loss": 0.6459, + "step": 3062 + }, + { + "epoch": 0.19, + "grad_norm": 0.8623282313346863, + "learning_rate": 9.31084826343953e-06, + "loss": 0.6009, + "step": 3063 + }, + { + "epoch": 0.19, + "grad_norm": 0.9488338828086853, + "learning_rate": 9.310328384105331e-06, + "loss": 0.6573, + "step": 3064 + }, + { + "epoch": 0.19, + "grad_norm": 0.9341808557510376, + "learning_rate": 9.309808323278199e-06, + "loss": 0.5932, + "step": 3065 + }, + { + "epoch": 0.19, + "grad_norm": 0.8781132698059082, + "learning_rate": 9.30928808098003e-06, + "loss": 0.6225, + "step": 3066 + }, + { + "epoch": 0.19, + "grad_norm": 0.9114009737968445, + "learning_rate": 9.308767657232733e-06, + "loss": 0.6765, + "step": 3067 + }, + { + "epoch": 0.19, + "grad_norm": 0.9022600650787354, + "learning_rate": 9.308247052058217e-06, + "loss": 0.6255, + "step": 3068 + }, + { + "epoch": 0.19, + "grad_norm": 0.9390726089477539, + "learning_rate": 9.307726265478405e-06, + "loss": 0.6543, + "step": 3069 + }, + { + "epoch": 0.19, + "grad_norm": 0.8956183791160583, + "learning_rate": 9.307205297515225e-06, + "loss": 0.6966, + "step": 3070 + }, + { + "epoch": 0.19, + "grad_norm": 0.9602479338645935, + "learning_rate": 9.306684148190616e-06, + "loss": 0.6754, + "step": 3071 + }, + { + "epoch": 0.19, + "grad_norm": 0.885344922542572, + "learning_rate": 9.306162817526519e-06, + "loss": 0.6952, + "step": 3072 + }, + { + "epoch": 0.19, + "grad_norm": 0.8164680004119873, + "learning_rate": 9.305641305544884e-06, + "loss": 0.5571, + "step": 3073 + }, + { + "epoch": 0.19, + "grad_norm": 0.8812573552131653, + "learning_rate": 9.305119612267673e-06, + "loss": 0.6224, + "step": 3074 + }, + { + "epoch": 0.19, + "grad_norm": 0.9333205819129944, + "learning_rate": 9.30459773771685e-06, + "loss": 0.66, + "step": 3075 + }, + { + "epoch": 0.19, + "grad_norm": 0.9016597270965576, + "learning_rate": 9.304075681914392e-06, + "loss": 0.5679, + "step": 3076 + }, + { + "epoch": 0.19, + "grad_norm": 0.8855369687080383, + "learning_rate": 9.30355344488228e-06, + "loss": 0.5928, + "step": 3077 + }, + { + "epoch": 0.2, + "grad_norm": 0.885812520980835, + "learning_rate": 9.303031026642504e-06, + "loss": 0.5906, + "step": 3078 + }, + { + "epoch": 0.2, + "grad_norm": 0.8560954332351685, + "learning_rate": 9.302508427217059e-06, + "loss": 0.6125, + "step": 3079 + }, + { + "epoch": 0.2, + "grad_norm": 0.9503233432769775, + "learning_rate": 9.301985646627953e-06, + "loss": 0.6551, + "step": 3080 + }, + { + "epoch": 0.2, + "grad_norm": 0.8623626828193665, + "learning_rate": 9.301462684897195e-06, + "loss": 0.613, + "step": 3081 + }, + { + "epoch": 0.2, + "grad_norm": 0.9079574346542358, + "learning_rate": 9.300939542046808e-06, + "loss": 0.601, + "step": 3082 + }, + { + "epoch": 0.2, + "grad_norm": 0.950981616973877, + "learning_rate": 9.30041621809882e-06, + "loss": 0.6574, + "step": 3083 + }, + { + "epoch": 0.2, + "grad_norm": 0.9608682990074158, + "learning_rate": 9.299892713075263e-06, + "loss": 0.6698, + "step": 3084 + }, + { + "epoch": 0.2, + "grad_norm": 0.9275756478309631, + "learning_rate": 9.299369026998184e-06, + "loss": 0.6307, + "step": 3085 + }, + { + "epoch": 0.2, + "grad_norm": 0.9137438535690308, + "learning_rate": 9.298845159889632e-06, + "loss": 0.612, + "step": 3086 + }, + { + "epoch": 0.2, + "grad_norm": 0.9493110179901123, + "learning_rate": 9.298321111771664e-06, + "loss": 0.6554, + "step": 3087 + }, + { + "epoch": 0.2, + "grad_norm": 0.9397709369659424, + "learning_rate": 9.297796882666346e-06, + "loss": 0.6389, + "step": 3088 + }, + { + "epoch": 0.2, + "grad_norm": 0.9529610276222229, + "learning_rate": 9.297272472595753e-06, + "loss": 0.6638, + "step": 3089 + }, + { + "epoch": 0.2, + "grad_norm": 0.8715389370918274, + "learning_rate": 9.296747881581965e-06, + "loss": 0.6121, + "step": 3090 + }, + { + "epoch": 0.2, + "grad_norm": 0.8829297423362732, + "learning_rate": 9.29622310964707e-06, + "loss": 0.6215, + "step": 3091 + }, + { + "epoch": 0.2, + "grad_norm": 0.9472043514251709, + "learning_rate": 9.295698156813167e-06, + "loss": 0.6624, + "step": 3092 + }, + { + "epoch": 0.2, + "grad_norm": 1.0828962326049805, + "learning_rate": 9.295173023102358e-06, + "loss": 0.6576, + "step": 3093 + }, + { + "epoch": 0.2, + "grad_norm": 1.0033841133117676, + "learning_rate": 9.294647708536754e-06, + "loss": 0.6569, + "step": 3094 + }, + { + "epoch": 0.2, + "grad_norm": 0.9431530237197876, + "learning_rate": 9.294122213138475e-06, + "loss": 0.5798, + "step": 3095 + }, + { + "epoch": 0.2, + "grad_norm": 0.951475977897644, + "learning_rate": 9.29359653692965e-06, + "loss": 0.6196, + "step": 3096 + }, + { + "epoch": 0.2, + "grad_norm": 0.9203341007232666, + "learning_rate": 9.293070679932407e-06, + "loss": 0.6916, + "step": 3097 + }, + { + "epoch": 0.2, + "grad_norm": 0.9140121936798096, + "learning_rate": 9.292544642168896e-06, + "loss": 0.5716, + "step": 3098 + }, + { + "epoch": 0.2, + "grad_norm": 0.8884128928184509, + "learning_rate": 9.292018423661261e-06, + "loss": 0.6084, + "step": 3099 + }, + { + "epoch": 0.2, + "grad_norm": 0.9457247257232666, + "learning_rate": 9.291492024431661e-06, + "loss": 0.6486, + "step": 3100 + }, + { + "epoch": 0.2, + "grad_norm": 0.9811872839927673, + "learning_rate": 9.290965444502263e-06, + "loss": 0.6772, + "step": 3101 + }, + { + "epoch": 0.2, + "grad_norm": 0.8778002858161926, + "learning_rate": 9.290438683895236e-06, + "loss": 0.6258, + "step": 3102 + }, + { + "epoch": 0.2, + "grad_norm": 0.962437093257904, + "learning_rate": 9.28991174263276e-06, + "loss": 0.6443, + "step": 3103 + }, + { + "epoch": 0.2, + "grad_norm": 0.961173415184021, + "learning_rate": 9.289384620737025e-06, + "loss": 0.6779, + "step": 3104 + }, + { + "epoch": 0.2, + "grad_norm": 0.9411439299583435, + "learning_rate": 9.288857318230225e-06, + "loss": 0.6218, + "step": 3105 + }, + { + "epoch": 0.2, + "grad_norm": 0.9473131895065308, + "learning_rate": 9.288329835134563e-06, + "loss": 0.6684, + "step": 3106 + }, + { + "epoch": 0.2, + "grad_norm": 0.8709444999694824, + "learning_rate": 9.28780217147225e-06, + "loss": 0.5879, + "step": 3107 + }, + { + "epoch": 0.2, + "grad_norm": 0.902916431427002, + "learning_rate": 9.287274327265505e-06, + "loss": 0.64, + "step": 3108 + }, + { + "epoch": 0.2, + "grad_norm": 0.9012413024902344, + "learning_rate": 9.286746302536551e-06, + "loss": 0.6239, + "step": 3109 + }, + { + "epoch": 0.2, + "grad_norm": 0.9633440971374512, + "learning_rate": 9.286218097307623e-06, + "loss": 0.6198, + "step": 3110 + }, + { + "epoch": 0.2, + "grad_norm": 0.9164153337478638, + "learning_rate": 9.285689711600961e-06, + "loss": 0.6444, + "step": 3111 + }, + { + "epoch": 0.2, + "grad_norm": 0.9006356000900269, + "learning_rate": 9.285161145438815e-06, + "loss": 0.6407, + "step": 3112 + }, + { + "epoch": 0.2, + "grad_norm": 0.8857513666152954, + "learning_rate": 9.284632398843439e-06, + "loss": 0.5797, + "step": 3113 + }, + { + "epoch": 0.2, + "grad_norm": 0.9555025696754456, + "learning_rate": 9.284103471837097e-06, + "loss": 0.644, + "step": 3114 + }, + { + "epoch": 0.2, + "grad_norm": 0.9066913723945618, + "learning_rate": 9.283574364442066e-06, + "loss": 0.5744, + "step": 3115 + }, + { + "epoch": 0.2, + "grad_norm": 0.9738790392875671, + "learning_rate": 9.283045076680614e-06, + "loss": 0.6821, + "step": 3116 + }, + { + "epoch": 0.2, + "grad_norm": 0.9615656733512878, + "learning_rate": 9.282515608575038e-06, + "loss": 0.6394, + "step": 3117 + }, + { + "epoch": 0.2, + "grad_norm": 0.9014465808868408, + "learning_rate": 9.281985960147625e-06, + "loss": 0.621, + "step": 3118 + }, + { + "epoch": 0.2, + "grad_norm": 0.8829550743103027, + "learning_rate": 9.28145613142068e-06, + "loss": 0.6598, + "step": 3119 + }, + { + "epoch": 0.2, + "grad_norm": 0.9063881039619446, + "learning_rate": 9.28092612241651e-06, + "loss": 0.6228, + "step": 3120 + }, + { + "epoch": 0.2, + "grad_norm": 0.9000431895256042, + "learning_rate": 9.280395933157436e-06, + "loss": 0.5869, + "step": 3121 + }, + { + "epoch": 0.2, + "grad_norm": 0.8334502577781677, + "learning_rate": 9.279865563665778e-06, + "loss": 0.593, + "step": 3122 + }, + { + "epoch": 0.2, + "grad_norm": 0.8414939045906067, + "learning_rate": 9.27933501396387e-06, + "loss": 0.5866, + "step": 3123 + }, + { + "epoch": 0.2, + "grad_norm": 0.8854286670684814, + "learning_rate": 9.27880428407405e-06, + "loss": 0.639, + "step": 3124 + }, + { + "epoch": 0.2, + "grad_norm": 0.9396377801895142, + "learning_rate": 9.278273374018669e-06, + "loss": 0.6313, + "step": 3125 + }, + { + "epoch": 0.2, + "grad_norm": 0.9234236478805542, + "learning_rate": 9.277742283820077e-06, + "loss": 0.6578, + "step": 3126 + }, + { + "epoch": 0.2, + "grad_norm": 0.9088654518127441, + "learning_rate": 9.27721101350064e-06, + "loss": 0.6698, + "step": 3127 + }, + { + "epoch": 0.2, + "grad_norm": 0.8841193318367004, + "learning_rate": 9.276679563082726e-06, + "loss": 0.6511, + "step": 3128 + }, + { + "epoch": 0.2, + "grad_norm": 0.8056107759475708, + "learning_rate": 9.276147932588712e-06, + "loss": 0.573, + "step": 3129 + }, + { + "epoch": 0.2, + "grad_norm": 0.8983877301216125, + "learning_rate": 9.275616122040985e-06, + "loss": 0.5889, + "step": 3130 + }, + { + "epoch": 0.2, + "grad_norm": 0.9186437129974365, + "learning_rate": 9.275084131461938e-06, + "loss": 0.6494, + "step": 3131 + }, + { + "epoch": 0.2, + "grad_norm": 0.9824482202529907, + "learning_rate": 9.27455196087397e-06, + "loss": 0.6029, + "step": 3132 + }, + { + "epoch": 0.2, + "grad_norm": 0.8984844088554382, + "learning_rate": 9.274019610299487e-06, + "loss": 0.6178, + "step": 3133 + }, + { + "epoch": 0.2, + "grad_norm": 0.8482160568237305, + "learning_rate": 9.273487079760908e-06, + "loss": 0.5949, + "step": 3134 + }, + { + "epoch": 0.2, + "grad_norm": 0.9106261134147644, + "learning_rate": 9.272954369280654e-06, + "loss": 0.6198, + "step": 3135 + }, + { + "epoch": 0.2, + "grad_norm": 0.8580856323242188, + "learning_rate": 9.272421478881158e-06, + "loss": 0.6239, + "step": 3136 + }, + { + "epoch": 0.2, + "grad_norm": 0.9911568760871887, + "learning_rate": 9.271888408584852e-06, + "loss": 0.6535, + "step": 3137 + }, + { + "epoch": 0.2, + "grad_norm": 0.8783669471740723, + "learning_rate": 9.27135515841419e-06, + "loss": 0.6204, + "step": 3138 + }, + { + "epoch": 0.2, + "grad_norm": 0.8823960423469543, + "learning_rate": 9.27082172839162e-06, + "loss": 0.6155, + "step": 3139 + }, + { + "epoch": 0.2, + "grad_norm": 0.9094551801681519, + "learning_rate": 9.270288118539603e-06, + "loss": 0.636, + "step": 3140 + }, + { + "epoch": 0.2, + "grad_norm": 0.906217098236084, + "learning_rate": 9.26975432888061e-06, + "loss": 0.5808, + "step": 3141 + }, + { + "epoch": 0.2, + "grad_norm": 0.8561161160469055, + "learning_rate": 9.269220359437114e-06, + "loss": 0.5757, + "step": 3142 + }, + { + "epoch": 0.2, + "grad_norm": 0.9296371340751648, + "learning_rate": 9.2686862102316e-06, + "loss": 0.6503, + "step": 3143 + }, + { + "epoch": 0.2, + "grad_norm": 0.9102144837379456, + "learning_rate": 9.268151881286561e-06, + "loss": 0.6316, + "step": 3144 + }, + { + "epoch": 0.2, + "grad_norm": 0.8436759114265442, + "learning_rate": 9.267617372624494e-06, + "loss": 0.6341, + "step": 3145 + }, + { + "epoch": 0.2, + "grad_norm": 0.9814794659614563, + "learning_rate": 9.267082684267905e-06, + "loss": 0.6439, + "step": 3146 + }, + { + "epoch": 0.2, + "grad_norm": 0.968041181564331, + "learning_rate": 9.266547816239309e-06, + "loss": 0.6738, + "step": 3147 + }, + { + "epoch": 0.2, + "grad_norm": 0.9353750348091125, + "learning_rate": 9.266012768561225e-06, + "loss": 0.6825, + "step": 3148 + }, + { + "epoch": 0.2, + "grad_norm": 0.9027935266494751, + "learning_rate": 9.265477541256184e-06, + "loss": 0.6578, + "step": 3149 + }, + { + "epoch": 0.2, + "grad_norm": 0.9193140864372253, + "learning_rate": 9.264942134346723e-06, + "loss": 0.6061, + "step": 3150 + }, + { + "epoch": 0.2, + "grad_norm": 0.8780162930488586, + "learning_rate": 9.264406547855386e-06, + "loss": 0.5997, + "step": 3151 + }, + { + "epoch": 0.2, + "grad_norm": 0.9127413630485535, + "learning_rate": 9.263870781804723e-06, + "loss": 0.6124, + "step": 3152 + }, + { + "epoch": 0.2, + "grad_norm": 0.9376271963119507, + "learning_rate": 9.263334836217295e-06, + "loss": 0.5963, + "step": 3153 + }, + { + "epoch": 0.2, + "grad_norm": 0.8163601160049438, + "learning_rate": 9.262798711115667e-06, + "loss": 0.626, + "step": 3154 + }, + { + "epoch": 0.2, + "grad_norm": 0.8610231280326843, + "learning_rate": 9.262262406522415e-06, + "loss": 0.5428, + "step": 3155 + }, + { + "epoch": 0.2, + "grad_norm": 0.950401782989502, + "learning_rate": 9.261725922460121e-06, + "loss": 0.6314, + "step": 3156 + }, + { + "epoch": 0.2, + "grad_norm": 0.9740757942199707, + "learning_rate": 9.261189258951372e-06, + "loss": 0.637, + "step": 3157 + }, + { + "epoch": 0.2, + "grad_norm": 0.9891514778137207, + "learning_rate": 9.26065241601877e-06, + "loss": 0.6871, + "step": 3158 + }, + { + "epoch": 0.2, + "grad_norm": 0.8673012852668762, + "learning_rate": 9.260115393684914e-06, + "loss": 0.6357, + "step": 3159 + }, + { + "epoch": 0.2, + "grad_norm": 0.9474377036094666, + "learning_rate": 9.25957819197242e-06, + "loss": 0.616, + "step": 3160 + }, + { + "epoch": 0.2, + "grad_norm": 0.8863465189933777, + "learning_rate": 9.259040810903906e-06, + "loss": 0.6046, + "step": 3161 + }, + { + "epoch": 0.2, + "grad_norm": 0.8935105800628662, + "learning_rate": 9.258503250501998e-06, + "loss": 0.6472, + "step": 3162 + }, + { + "epoch": 0.2, + "grad_norm": 0.9094743728637695, + "learning_rate": 9.257965510789334e-06, + "loss": 0.5834, + "step": 3163 + }, + { + "epoch": 0.2, + "grad_norm": 0.9533581137657166, + "learning_rate": 9.257427591788555e-06, + "loss": 0.6631, + "step": 3164 + }, + { + "epoch": 0.2, + "grad_norm": 0.8987277746200562, + "learning_rate": 9.25688949352231e-06, + "loss": 0.6267, + "step": 3165 + }, + { + "epoch": 0.2, + "grad_norm": 0.8459535241127014, + "learning_rate": 9.256351216013257e-06, + "loss": 0.6366, + "step": 3166 + }, + { + "epoch": 0.2, + "grad_norm": 0.8984457850456238, + "learning_rate": 9.255812759284062e-06, + "loss": 0.5716, + "step": 3167 + }, + { + "epoch": 0.2, + "grad_norm": 0.8287543654441833, + "learning_rate": 9.255274123357396e-06, + "loss": 0.5677, + "step": 3168 + }, + { + "epoch": 0.2, + "grad_norm": 0.9328951239585876, + "learning_rate": 9.254735308255937e-06, + "loss": 0.7044, + "step": 3169 + }, + { + "epoch": 0.2, + "grad_norm": 0.9265501499176025, + "learning_rate": 9.254196314002379e-06, + "loss": 0.625, + "step": 3170 + }, + { + "epoch": 0.2, + "grad_norm": 0.959682285785675, + "learning_rate": 9.253657140619412e-06, + "loss": 0.6506, + "step": 3171 + }, + { + "epoch": 0.2, + "grad_norm": 0.9735859036445618, + "learning_rate": 9.25311778812974e-06, + "loss": 0.6281, + "step": 3172 + }, + { + "epoch": 0.2, + "grad_norm": 0.9741908311843872, + "learning_rate": 9.252578256556075e-06, + "loss": 0.6645, + "step": 3173 + }, + { + "epoch": 0.2, + "grad_norm": 0.9076485633850098, + "learning_rate": 9.252038545921131e-06, + "loss": 0.5691, + "step": 3174 + }, + { + "epoch": 0.2, + "grad_norm": 0.9652928113937378, + "learning_rate": 9.251498656247636e-06, + "loss": 0.6645, + "step": 3175 + }, + { + "epoch": 0.2, + "grad_norm": 0.9393512010574341, + "learning_rate": 9.250958587558326e-06, + "loss": 0.6011, + "step": 3176 + }, + { + "epoch": 0.2, + "grad_norm": 0.9639145731925964, + "learning_rate": 9.250418339875934e-06, + "loss": 0.6379, + "step": 3177 + }, + { + "epoch": 0.2, + "grad_norm": 0.8793298602104187, + "learning_rate": 9.249877913223213e-06, + "loss": 0.6104, + "step": 3178 + }, + { + "epoch": 0.2, + "grad_norm": 0.8683106899261475, + "learning_rate": 9.249337307622916e-06, + "loss": 0.62, + "step": 3179 + }, + { + "epoch": 0.2, + "grad_norm": 0.9256559014320374, + "learning_rate": 9.24879652309781e-06, + "loss": 0.642, + "step": 3180 + }, + { + "epoch": 0.2, + "grad_norm": 0.8257124423980713, + "learning_rate": 9.248255559670661e-06, + "loss": 0.5951, + "step": 3181 + }, + { + "epoch": 0.2, + "grad_norm": 0.8258576989173889, + "learning_rate": 9.247714417364251e-06, + "loss": 0.6086, + "step": 3182 + }, + { + "epoch": 0.2, + "grad_norm": 0.8258581161499023, + "learning_rate": 9.24717309620136e-06, + "loss": 0.576, + "step": 3183 + }, + { + "epoch": 0.2, + "grad_norm": 0.9140012860298157, + "learning_rate": 9.246631596204788e-06, + "loss": 0.5943, + "step": 3184 + }, + { + "epoch": 0.2, + "grad_norm": 0.9424448609352112, + "learning_rate": 9.246089917397332e-06, + "loss": 0.6434, + "step": 3185 + }, + { + "epoch": 0.2, + "grad_norm": 0.9118272066116333, + "learning_rate": 9.2455480598018e-06, + "loss": 0.6748, + "step": 3186 + }, + { + "epoch": 0.2, + "grad_norm": 0.9356390833854675, + "learning_rate": 9.245006023441008e-06, + "loss": 0.6076, + "step": 3187 + }, + { + "epoch": 0.2, + "grad_norm": 0.9224506616592407, + "learning_rate": 9.24446380833778e-06, + "loss": 0.6892, + "step": 3188 + }, + { + "epoch": 0.2, + "grad_norm": 0.8446199893951416, + "learning_rate": 9.243921414514947e-06, + "loss": 0.5628, + "step": 3189 + }, + { + "epoch": 0.2, + "grad_norm": 0.8842172026634216, + "learning_rate": 9.243378841995346e-06, + "loss": 0.5721, + "step": 3190 + }, + { + "epoch": 0.2, + "grad_norm": 0.9054396748542786, + "learning_rate": 9.242836090801823e-06, + "loss": 0.6142, + "step": 3191 + }, + { + "epoch": 0.2, + "grad_norm": 0.832400918006897, + "learning_rate": 9.242293160957231e-06, + "loss": 0.6044, + "step": 3192 + }, + { + "epoch": 0.2, + "grad_norm": 0.9510114789009094, + "learning_rate": 9.241750052484435e-06, + "loss": 0.6215, + "step": 3193 + }, + { + "epoch": 0.2, + "grad_norm": 0.9344449043273926, + "learning_rate": 9.241206765406298e-06, + "loss": 0.5672, + "step": 3194 + }, + { + "epoch": 0.2, + "grad_norm": 0.9397872090339661, + "learning_rate": 9.2406632997457e-06, + "loss": 0.5928, + "step": 3195 + }, + { + "epoch": 0.2, + "grad_norm": 0.9468801021575928, + "learning_rate": 9.240119655525522e-06, + "loss": 0.6609, + "step": 3196 + }, + { + "epoch": 0.2, + "grad_norm": 0.9130421280860901, + "learning_rate": 9.239575832768655e-06, + "loss": 0.6606, + "step": 3197 + }, + { + "epoch": 0.2, + "grad_norm": 0.8960924744606018, + "learning_rate": 9.239031831498e-06, + "loss": 0.588, + "step": 3198 + }, + { + "epoch": 0.2, + "grad_norm": 0.9796780347824097, + "learning_rate": 9.238487651736458e-06, + "loss": 0.6605, + "step": 3199 + }, + { + "epoch": 0.2, + "grad_norm": 0.9915714859962463, + "learning_rate": 9.237943293506948e-06, + "loss": 0.6517, + "step": 3200 + }, + { + "epoch": 0.2, + "grad_norm": 0.8834147453308105, + "learning_rate": 9.237398756832387e-06, + "loss": 0.6094, + "step": 3201 + }, + { + "epoch": 0.2, + "grad_norm": 0.9595925211906433, + "learning_rate": 9.236854041735706e-06, + "loss": 0.5947, + "step": 3202 + }, + { + "epoch": 0.2, + "grad_norm": 0.9719516038894653, + "learning_rate": 9.236309148239839e-06, + "loss": 0.6186, + "step": 3203 + }, + { + "epoch": 0.2, + "grad_norm": 0.912463366985321, + "learning_rate": 9.235764076367732e-06, + "loss": 0.5836, + "step": 3204 + }, + { + "epoch": 0.2, + "grad_norm": 0.9614611864089966, + "learning_rate": 9.235218826142337e-06, + "loss": 0.6131, + "step": 3205 + }, + { + "epoch": 0.2, + "grad_norm": 0.8435421586036682, + "learning_rate": 9.234673397586606e-06, + "loss": 0.6088, + "step": 3206 + }, + { + "epoch": 0.2, + "grad_norm": 0.9031780958175659, + "learning_rate": 9.234127790723512e-06, + "loss": 0.6154, + "step": 3207 + }, + { + "epoch": 0.2, + "grad_norm": 0.9034252166748047, + "learning_rate": 9.233582005576028e-06, + "loss": 0.5843, + "step": 3208 + }, + { + "epoch": 0.2, + "grad_norm": 0.912809431552887, + "learning_rate": 9.233036042167131e-06, + "loss": 0.634, + "step": 3209 + }, + { + "epoch": 0.2, + "grad_norm": 0.924806535243988, + "learning_rate": 9.232489900519812e-06, + "loss": 0.6497, + "step": 3210 + }, + { + "epoch": 0.2, + "grad_norm": 0.9530941247940063, + "learning_rate": 9.231943580657069e-06, + "loss": 0.6562, + "step": 3211 + }, + { + "epoch": 0.2, + "grad_norm": 0.8763086795806885, + "learning_rate": 9.2313970826019e-06, + "loss": 0.5798, + "step": 3212 + }, + { + "epoch": 0.2, + "grad_norm": 0.8387221097946167, + "learning_rate": 9.230850406377323e-06, + "loss": 0.5948, + "step": 3213 + }, + { + "epoch": 0.2, + "grad_norm": 0.8941132426261902, + "learning_rate": 9.230303552006352e-06, + "loss": 0.5912, + "step": 3214 + }, + { + "epoch": 0.2, + "grad_norm": 0.9189191460609436, + "learning_rate": 9.229756519512014e-06, + "loss": 0.6402, + "step": 3215 + }, + { + "epoch": 0.2, + "grad_norm": 0.8971881866455078, + "learning_rate": 9.229209308917343e-06, + "loss": 0.6072, + "step": 3216 + }, + { + "epoch": 0.2, + "grad_norm": 0.910284161567688, + "learning_rate": 9.228661920245383e-06, + "loss": 0.5816, + "step": 3217 + }, + { + "epoch": 0.2, + "grad_norm": 0.8809064626693726, + "learning_rate": 9.22811435351918e-06, + "loss": 0.5952, + "step": 3218 + }, + { + "epoch": 0.2, + "grad_norm": 0.9248557686805725, + "learning_rate": 9.227566608761786e-06, + "loss": 0.6442, + "step": 3219 + }, + { + "epoch": 0.2, + "grad_norm": 0.9311677813529968, + "learning_rate": 9.227018685996272e-06, + "loss": 0.6215, + "step": 3220 + }, + { + "epoch": 0.2, + "grad_norm": 0.8465821146965027, + "learning_rate": 9.226470585245706e-06, + "loss": 0.6053, + "step": 3221 + }, + { + "epoch": 0.2, + "grad_norm": 0.9327176213264465, + "learning_rate": 9.225922306533164e-06, + "loss": 0.6709, + "step": 3222 + }, + { + "epoch": 0.2, + "grad_norm": 0.8878608345985413, + "learning_rate": 9.225373849881739e-06, + "loss": 0.6033, + "step": 3223 + }, + { + "epoch": 0.2, + "grad_norm": 0.8926795721054077, + "learning_rate": 9.224825215314515e-06, + "loss": 0.6279, + "step": 3224 + }, + { + "epoch": 0.2, + "grad_norm": 0.9022210240364075, + "learning_rate": 9.224276402854601e-06, + "loss": 0.641, + "step": 3225 + }, + { + "epoch": 0.2, + "grad_norm": 0.9377365708351135, + "learning_rate": 9.223727412525103e-06, + "loss": 0.6236, + "step": 3226 + }, + { + "epoch": 0.2, + "grad_norm": 0.9372929334640503, + "learning_rate": 9.223178244349135e-06, + "loss": 0.6389, + "step": 3227 + }, + { + "epoch": 0.2, + "grad_norm": 0.8741313219070435, + "learning_rate": 9.222628898349825e-06, + "loss": 0.6063, + "step": 3228 + }, + { + "epoch": 0.2, + "grad_norm": 0.8348528742790222, + "learning_rate": 9.2220793745503e-06, + "loss": 0.5962, + "step": 3229 + }, + { + "epoch": 0.2, + "grad_norm": 0.8601580858230591, + "learning_rate": 9.221529672973701e-06, + "loss": 0.589, + "step": 3230 + }, + { + "epoch": 0.2, + "grad_norm": 1.0050134658813477, + "learning_rate": 9.220979793643173e-06, + "loss": 0.6276, + "step": 3231 + }, + { + "epoch": 0.2, + "grad_norm": 0.9667968153953552, + "learning_rate": 9.220429736581869e-06, + "loss": 0.654, + "step": 3232 + }, + { + "epoch": 0.2, + "grad_norm": 0.9288026690483093, + "learning_rate": 9.219879501812952e-06, + "loss": 0.5867, + "step": 3233 + }, + { + "epoch": 0.2, + "grad_norm": 0.9640477895736694, + "learning_rate": 9.219329089359588e-06, + "loss": 0.6481, + "step": 3234 + }, + { + "epoch": 0.2, + "grad_norm": 1.0388972759246826, + "learning_rate": 9.218778499244953e-06, + "loss": 0.6895, + "step": 3235 + }, + { + "epoch": 0.21, + "grad_norm": 0.9421420693397522, + "learning_rate": 9.218227731492234e-06, + "loss": 0.6852, + "step": 3236 + }, + { + "epoch": 0.21, + "grad_norm": 0.9457274079322815, + "learning_rate": 9.217676786124616e-06, + "loss": 0.6247, + "step": 3237 + }, + { + "epoch": 0.21, + "grad_norm": 0.9554296135902405, + "learning_rate": 9.217125663165303e-06, + "loss": 0.6557, + "step": 3238 + }, + { + "epoch": 0.21, + "grad_norm": 0.930719792842865, + "learning_rate": 9.216574362637498e-06, + "loss": 0.7073, + "step": 3239 + }, + { + "epoch": 0.21, + "grad_norm": 0.880737841129303, + "learning_rate": 9.216022884564414e-06, + "loss": 0.6405, + "step": 3240 + }, + { + "epoch": 0.21, + "grad_norm": 0.9576687812805176, + "learning_rate": 9.215471228969275e-06, + "loss": 0.6455, + "step": 3241 + }, + { + "epoch": 0.21, + "grad_norm": 0.8890754580497742, + "learning_rate": 9.214919395875306e-06, + "loss": 0.674, + "step": 3242 + }, + { + "epoch": 0.21, + "grad_norm": 0.8603907823562622, + "learning_rate": 9.214367385305744e-06, + "loss": 0.5467, + "step": 3243 + }, + { + "epoch": 0.21, + "grad_norm": 0.925334632396698, + "learning_rate": 9.213815197283834e-06, + "loss": 0.6226, + "step": 3244 + }, + { + "epoch": 0.21, + "grad_norm": 0.9949658513069153, + "learning_rate": 9.21326283183282e-06, + "loss": 0.6078, + "step": 3245 + }, + { + "epoch": 0.21, + "grad_norm": 0.8808592557907104, + "learning_rate": 9.21271028897597e-06, + "loss": 0.6003, + "step": 3246 + }, + { + "epoch": 0.21, + "grad_norm": 0.9511841535568237, + "learning_rate": 9.212157568736542e-06, + "loss": 0.6048, + "step": 3247 + }, + { + "epoch": 0.21, + "grad_norm": 0.8486485481262207, + "learning_rate": 9.211604671137812e-06, + "loss": 0.6194, + "step": 3248 + }, + { + "epoch": 0.21, + "grad_norm": 0.9710730314254761, + "learning_rate": 9.211051596203061e-06, + "loss": 0.6914, + "step": 3249 + }, + { + "epoch": 0.21, + "grad_norm": 0.8839832544326782, + "learning_rate": 9.210498343955576e-06, + "loss": 0.6243, + "step": 3250 + }, + { + "epoch": 0.21, + "grad_norm": 0.9419470429420471, + "learning_rate": 9.209944914418653e-06, + "loss": 0.6847, + "step": 3251 + }, + { + "epoch": 0.21, + "grad_norm": 0.8645347356796265, + "learning_rate": 9.209391307615596e-06, + "loss": 0.574, + "step": 3252 + }, + { + "epoch": 0.21, + "grad_norm": 0.9880130887031555, + "learning_rate": 9.208837523569713e-06, + "loss": 0.6631, + "step": 3253 + }, + { + "epoch": 0.21, + "grad_norm": 0.8735252618789673, + "learning_rate": 9.208283562304326e-06, + "loss": 0.5747, + "step": 3254 + }, + { + "epoch": 0.21, + "grad_norm": 0.9930894374847412, + "learning_rate": 9.207729423842755e-06, + "loss": 0.6138, + "step": 3255 + }, + { + "epoch": 0.21, + "grad_norm": 0.9474650025367737, + "learning_rate": 9.207175108208334e-06, + "loss": 0.6524, + "step": 3256 + }, + { + "epoch": 0.21, + "grad_norm": 0.9482831358909607, + "learning_rate": 9.20662061542441e-06, + "loss": 0.6654, + "step": 3257 + }, + { + "epoch": 0.21, + "grad_norm": 0.8777074813842773, + "learning_rate": 9.206065945514321e-06, + "loss": 0.6201, + "step": 3258 + }, + { + "epoch": 0.21, + "grad_norm": 0.9118297696113586, + "learning_rate": 9.20551109850143e-06, + "loss": 0.6074, + "step": 3259 + }, + { + "epoch": 0.21, + "grad_norm": 0.973640501499176, + "learning_rate": 9.204956074409095e-06, + "loss": 0.6246, + "step": 3260 + }, + { + "epoch": 0.21, + "grad_norm": 0.9374106526374817, + "learning_rate": 9.204400873260688e-06, + "loss": 0.6306, + "step": 3261 + }, + { + "epoch": 0.21, + "grad_norm": 0.8689625859260559, + "learning_rate": 9.203845495079587e-06, + "loss": 0.6317, + "step": 3262 + }, + { + "epoch": 0.21, + "grad_norm": 0.8900114893913269, + "learning_rate": 9.203289939889175e-06, + "loss": 0.6137, + "step": 3263 + }, + { + "epoch": 0.21, + "grad_norm": 0.862295389175415, + "learning_rate": 9.202734207712847e-06, + "loss": 0.6081, + "step": 3264 + }, + { + "epoch": 0.21, + "grad_norm": 0.8732759952545166, + "learning_rate": 9.202178298574e-06, + "loss": 0.5917, + "step": 3265 + }, + { + "epoch": 0.21, + "grad_norm": 0.9287835359573364, + "learning_rate": 9.201622212496043e-06, + "loss": 0.6226, + "step": 3266 + }, + { + "epoch": 0.21, + "grad_norm": 0.9804710745811462, + "learning_rate": 9.201065949502394e-06, + "loss": 0.6828, + "step": 3267 + }, + { + "epoch": 0.21, + "grad_norm": 0.907406210899353, + "learning_rate": 9.20050950961647e-06, + "loss": 0.6004, + "step": 3268 + }, + { + "epoch": 0.21, + "grad_norm": 0.870427668094635, + "learning_rate": 9.199952892861706e-06, + "loss": 0.6121, + "step": 3269 + }, + { + "epoch": 0.21, + "grad_norm": 0.8811596035957336, + "learning_rate": 9.199396099261532e-06, + "loss": 0.6258, + "step": 3270 + }, + { + "epoch": 0.21, + "grad_norm": 0.9226367473602295, + "learning_rate": 9.198839128839399e-06, + "loss": 0.6473, + "step": 3271 + }, + { + "epoch": 0.21, + "grad_norm": 0.8732794523239136, + "learning_rate": 9.198281981618757e-06, + "loss": 0.5947, + "step": 3272 + }, + { + "epoch": 0.21, + "grad_norm": 0.9539616703987122, + "learning_rate": 9.197724657623066e-06, + "loss": 0.6034, + "step": 3273 + }, + { + "epoch": 0.21, + "grad_norm": 0.9064382910728455, + "learning_rate": 9.197167156875793e-06, + "loss": 0.6329, + "step": 3274 + }, + { + "epoch": 0.21, + "grad_norm": 0.9909444451332092, + "learning_rate": 9.19660947940041e-06, + "loss": 0.6264, + "step": 3275 + }, + { + "epoch": 0.21, + "grad_norm": 0.8617537021636963, + "learning_rate": 9.196051625220401e-06, + "loss": 0.5953, + "step": 3276 + }, + { + "epoch": 0.21, + "grad_norm": 0.8831681609153748, + "learning_rate": 9.195493594359254e-06, + "loss": 0.6043, + "step": 3277 + }, + { + "epoch": 0.21, + "grad_norm": 0.9826748371124268, + "learning_rate": 9.19493538684047e-06, + "loss": 0.6699, + "step": 3278 + }, + { + "epoch": 0.21, + "grad_norm": 0.8774879574775696, + "learning_rate": 9.194377002687547e-06, + "loss": 0.6173, + "step": 3279 + }, + { + "epoch": 0.21, + "grad_norm": 0.976276159286499, + "learning_rate": 9.193818441924003e-06, + "loss": 0.6248, + "step": 3280 + }, + { + "epoch": 0.21, + "grad_norm": 0.8805941343307495, + "learning_rate": 9.19325970457335e-06, + "loss": 0.5739, + "step": 3281 + }, + { + "epoch": 0.21, + "grad_norm": 0.8417159914970398, + "learning_rate": 9.192700790659121e-06, + "loss": 0.5478, + "step": 3282 + }, + { + "epoch": 0.21, + "grad_norm": 0.958260178565979, + "learning_rate": 9.192141700204844e-06, + "loss": 0.7037, + "step": 3283 + }, + { + "epoch": 0.21, + "grad_norm": 0.8954302668571472, + "learning_rate": 9.191582433234067e-06, + "loss": 0.6518, + "step": 3284 + }, + { + "epoch": 0.21, + "grad_norm": 0.8878317475318909, + "learning_rate": 9.191022989770332e-06, + "loss": 0.6168, + "step": 3285 + }, + { + "epoch": 0.21, + "grad_norm": 0.9823928475379944, + "learning_rate": 9.1904633698372e-06, + "loss": 0.6546, + "step": 3286 + }, + { + "epoch": 0.21, + "grad_norm": 0.8733540177345276, + "learning_rate": 9.189903573458234e-06, + "loss": 0.6393, + "step": 3287 + }, + { + "epoch": 0.21, + "grad_norm": 0.9368897080421448, + "learning_rate": 9.189343600657002e-06, + "loss": 0.6342, + "step": 3288 + }, + { + "epoch": 0.21, + "grad_norm": 0.9004266858100891, + "learning_rate": 9.188783451457086e-06, + "loss": 0.6332, + "step": 3289 + }, + { + "epoch": 0.21, + "grad_norm": 0.8798797130584717, + "learning_rate": 9.18822312588207e-06, + "loss": 0.6061, + "step": 3290 + }, + { + "epoch": 0.21, + "grad_norm": 0.8371910452842712, + "learning_rate": 9.187662623955548e-06, + "loss": 0.5436, + "step": 3291 + }, + { + "epoch": 0.21, + "grad_norm": 0.9449594616889954, + "learning_rate": 9.18710194570112e-06, + "loss": 0.629, + "step": 3292 + }, + { + "epoch": 0.21, + "grad_norm": 0.8512078523635864, + "learning_rate": 9.186541091142397e-06, + "loss": 0.5525, + "step": 3293 + }, + { + "epoch": 0.21, + "grad_norm": 0.9958682656288147, + "learning_rate": 9.18598006030299e-06, + "loss": 0.6585, + "step": 3294 + }, + { + "epoch": 0.21, + "grad_norm": 0.8955892324447632, + "learning_rate": 9.185418853206528e-06, + "loss": 0.5793, + "step": 3295 + }, + { + "epoch": 0.21, + "grad_norm": 1.0014921426773071, + "learning_rate": 9.184857469876635e-06, + "loss": 0.6248, + "step": 3296 + }, + { + "epoch": 0.21, + "grad_norm": 0.9238271117210388, + "learning_rate": 9.184295910336953e-06, + "loss": 0.6186, + "step": 3297 + }, + { + "epoch": 0.21, + "grad_norm": 0.9311379790306091, + "learning_rate": 9.183734174611125e-06, + "loss": 0.702, + "step": 3298 + }, + { + "epoch": 0.21, + "grad_norm": 0.9323460459709167, + "learning_rate": 9.183172262722807e-06, + "loss": 0.7064, + "step": 3299 + }, + { + "epoch": 0.21, + "grad_norm": 0.8786803483963013, + "learning_rate": 9.182610174695656e-06, + "loss": 0.6119, + "step": 3300 + }, + { + "epoch": 0.21, + "grad_norm": 0.8774591088294983, + "learning_rate": 9.182047910553342e-06, + "loss": 0.6455, + "step": 3301 + }, + { + "epoch": 0.21, + "grad_norm": 0.8772428035736084, + "learning_rate": 9.181485470319537e-06, + "loss": 0.6336, + "step": 3302 + }, + { + "epoch": 0.21, + "grad_norm": 0.9135443568229675, + "learning_rate": 9.180922854017927e-06, + "loss": 0.5964, + "step": 3303 + }, + { + "epoch": 0.21, + "grad_norm": 0.9094753861427307, + "learning_rate": 9.1803600616722e-06, + "loss": 0.6492, + "step": 3304 + }, + { + "epoch": 0.21, + "grad_norm": 1.001076340675354, + "learning_rate": 9.179797093306053e-06, + "loss": 0.6767, + "step": 3305 + }, + { + "epoch": 0.21, + "grad_norm": 0.989811360836029, + "learning_rate": 9.17923394894319e-06, + "loss": 0.6325, + "step": 3306 + }, + { + "epoch": 0.21, + "grad_norm": 0.8956232070922852, + "learning_rate": 9.178670628607325e-06, + "loss": 0.6254, + "step": 3307 + }, + { + "epoch": 0.21, + "grad_norm": 0.8942602276802063, + "learning_rate": 9.178107132322174e-06, + "loss": 0.5803, + "step": 3308 + }, + { + "epoch": 0.21, + "grad_norm": 0.9630834460258484, + "learning_rate": 9.177543460111469e-06, + "loss": 0.6188, + "step": 3309 + }, + { + "epoch": 0.21, + "grad_norm": 0.912510097026825, + "learning_rate": 9.17697961199894e-06, + "loss": 0.6383, + "step": 3310 + }, + { + "epoch": 0.21, + "grad_norm": 0.9050446152687073, + "learning_rate": 9.176415588008332e-06, + "loss": 0.67, + "step": 3311 + }, + { + "epoch": 0.21, + "grad_norm": 0.9326666593551636, + "learning_rate": 9.175851388163391e-06, + "loss": 0.6253, + "step": 3312 + }, + { + "epoch": 0.21, + "grad_norm": 0.933397114276886, + "learning_rate": 9.175287012487874e-06, + "loss": 0.6303, + "step": 3313 + }, + { + "epoch": 0.21, + "grad_norm": 0.8903535604476929, + "learning_rate": 9.174722461005546e-06, + "loss": 0.6088, + "step": 3314 + }, + { + "epoch": 0.21, + "grad_norm": 0.9588652849197388, + "learning_rate": 9.174157733740178e-06, + "loss": 0.6508, + "step": 3315 + }, + { + "epoch": 0.21, + "grad_norm": 0.9236728549003601, + "learning_rate": 9.173592830715548e-06, + "loss": 0.5812, + "step": 3316 + }, + { + "epoch": 0.21, + "grad_norm": 0.9767409563064575, + "learning_rate": 9.173027751955444e-06, + "loss": 0.6523, + "step": 3317 + }, + { + "epoch": 0.21, + "grad_norm": 0.8604898452758789, + "learning_rate": 9.172462497483658e-06, + "loss": 0.555, + "step": 3318 + }, + { + "epoch": 0.21, + "grad_norm": 0.9616580009460449, + "learning_rate": 9.17189706732399e-06, + "loss": 0.6045, + "step": 3319 + }, + { + "epoch": 0.21, + "grad_norm": 1.0537388324737549, + "learning_rate": 9.171331461500253e-06, + "loss": 0.6657, + "step": 3320 + }, + { + "epoch": 0.21, + "grad_norm": 0.8993361592292786, + "learning_rate": 9.170765680036256e-06, + "loss": 0.6046, + "step": 3321 + }, + { + "epoch": 0.21, + "grad_norm": 0.8810584545135498, + "learning_rate": 9.170199722955825e-06, + "loss": 0.6191, + "step": 3322 + }, + { + "epoch": 0.21, + "grad_norm": 0.8661196231842041, + "learning_rate": 9.169633590282793e-06, + "loss": 0.608, + "step": 3323 + }, + { + "epoch": 0.21, + "grad_norm": 0.9606330990791321, + "learning_rate": 9.169067282040994e-06, + "loss": 0.649, + "step": 3324 + }, + { + "epoch": 0.21, + "grad_norm": 0.8650776147842407, + "learning_rate": 9.168500798254275e-06, + "loss": 0.5795, + "step": 3325 + }, + { + "epoch": 0.21, + "grad_norm": 0.9146811366081238, + "learning_rate": 9.167934138946489e-06, + "loss": 0.6595, + "step": 3326 + }, + { + "epoch": 0.21, + "grad_norm": 1.0079501867294312, + "learning_rate": 9.167367304141494e-06, + "loss": 0.6568, + "step": 3327 + }, + { + "epoch": 0.21, + "grad_norm": 0.9679005146026611, + "learning_rate": 9.166800293863161e-06, + "loss": 0.6695, + "step": 3328 + }, + { + "epoch": 0.21, + "grad_norm": 0.9201866984367371, + "learning_rate": 9.166233108135362e-06, + "loss": 0.5872, + "step": 3329 + }, + { + "epoch": 0.21, + "grad_norm": 0.9234635233879089, + "learning_rate": 9.165665746981982e-06, + "loss": 0.6317, + "step": 3330 + }, + { + "epoch": 0.21, + "grad_norm": 0.8861828446388245, + "learning_rate": 9.165098210426905e-06, + "loss": 0.6627, + "step": 3331 + }, + { + "epoch": 0.21, + "grad_norm": 0.8471156358718872, + "learning_rate": 9.164530498494035e-06, + "loss": 0.6388, + "step": 3332 + }, + { + "epoch": 0.21, + "grad_norm": 0.898435115814209, + "learning_rate": 9.163962611207272e-06, + "loss": 0.6261, + "step": 3333 + }, + { + "epoch": 0.21, + "grad_norm": 0.9174916744232178, + "learning_rate": 9.163394548590529e-06, + "loss": 0.6421, + "step": 3334 + }, + { + "epoch": 0.21, + "grad_norm": 0.922631025314331, + "learning_rate": 9.162826310667725e-06, + "loss": 0.641, + "step": 3335 + }, + { + "epoch": 0.21, + "grad_norm": 0.9309580326080322, + "learning_rate": 9.162257897462784e-06, + "loss": 0.6544, + "step": 3336 + }, + { + "epoch": 0.21, + "grad_norm": 0.8594711422920227, + "learning_rate": 9.161689308999646e-06, + "loss": 0.6029, + "step": 3337 + }, + { + "epoch": 0.21, + "grad_norm": 0.969755232334137, + "learning_rate": 9.161120545302246e-06, + "loss": 0.6378, + "step": 3338 + }, + { + "epoch": 0.21, + "grad_norm": 0.9250763058662415, + "learning_rate": 9.160551606394537e-06, + "loss": 0.6754, + "step": 3339 + }, + { + "epoch": 0.21, + "grad_norm": 0.910316526889801, + "learning_rate": 9.159982492300473e-06, + "loss": 0.6433, + "step": 3340 + }, + { + "epoch": 0.21, + "grad_norm": 0.9393495321273804, + "learning_rate": 9.159413203044017e-06, + "loss": 0.614, + "step": 3341 + }, + { + "epoch": 0.21, + "grad_norm": 0.9090781211853027, + "learning_rate": 9.158843738649141e-06, + "loss": 0.601, + "step": 3342 + }, + { + "epoch": 0.21, + "grad_norm": 0.8957191705703735, + "learning_rate": 9.158274099139823e-06, + "loss": 0.6071, + "step": 3343 + }, + { + "epoch": 0.21, + "grad_norm": 0.8667554259300232, + "learning_rate": 9.157704284540047e-06, + "loss": 0.5847, + "step": 3344 + }, + { + "epoch": 0.21, + "grad_norm": 0.904606819152832, + "learning_rate": 9.15713429487381e-06, + "loss": 0.6435, + "step": 3345 + }, + { + "epoch": 0.21, + "grad_norm": 0.8986235857009888, + "learning_rate": 9.156564130165106e-06, + "loss": 0.5976, + "step": 3346 + }, + { + "epoch": 0.21, + "grad_norm": 0.9764082431793213, + "learning_rate": 9.155993790437949e-06, + "loss": 0.6332, + "step": 3347 + }, + { + "epoch": 0.21, + "grad_norm": 0.8578452467918396, + "learning_rate": 9.155423275716351e-06, + "loss": 0.5981, + "step": 3348 + }, + { + "epoch": 0.21, + "grad_norm": 0.8971353769302368, + "learning_rate": 9.154852586024332e-06, + "loss": 0.6173, + "step": 3349 + }, + { + "epoch": 0.21, + "grad_norm": 0.8360897302627563, + "learning_rate": 9.154281721385928e-06, + "loss": 0.6029, + "step": 3350 + }, + { + "epoch": 0.21, + "grad_norm": 0.9505079984664917, + "learning_rate": 9.153710681825169e-06, + "loss": 0.6472, + "step": 3351 + }, + { + "epoch": 0.21, + "grad_norm": 0.8876816034317017, + "learning_rate": 9.153139467366103e-06, + "loss": 0.6335, + "step": 3352 + }, + { + "epoch": 0.21, + "grad_norm": 0.8487616777420044, + "learning_rate": 9.152568078032783e-06, + "loss": 0.6377, + "step": 3353 + }, + { + "epoch": 0.21, + "grad_norm": 0.9489740133285522, + "learning_rate": 9.151996513849267e-06, + "loss": 0.6148, + "step": 3354 + }, + { + "epoch": 0.21, + "grad_norm": 0.7857329249382019, + "learning_rate": 9.151424774839622e-06, + "loss": 0.5779, + "step": 3355 + }, + { + "epoch": 0.21, + "grad_norm": 0.8584344983100891, + "learning_rate": 9.15085286102792e-06, + "loss": 0.5893, + "step": 3356 + }, + { + "epoch": 0.21, + "grad_norm": 0.8409185409545898, + "learning_rate": 9.150280772438245e-06, + "loss": 0.6258, + "step": 3357 + }, + { + "epoch": 0.21, + "grad_norm": 0.8375939726829529, + "learning_rate": 9.149708509094684e-06, + "loss": 0.5923, + "step": 3358 + }, + { + "epoch": 0.21, + "grad_norm": 0.9026387333869934, + "learning_rate": 9.149136071021333e-06, + "loss": 0.6251, + "step": 3359 + }, + { + "epoch": 0.21, + "grad_norm": 0.8975716233253479, + "learning_rate": 9.148563458242296e-06, + "loss": 0.6533, + "step": 3360 + }, + { + "epoch": 0.21, + "grad_norm": 0.8834118247032166, + "learning_rate": 9.147990670781683e-06, + "loss": 0.6297, + "step": 3361 + }, + { + "epoch": 0.21, + "grad_norm": 0.8428575992584229, + "learning_rate": 9.147417708663615e-06, + "loss": 0.5204, + "step": 3362 + }, + { + "epoch": 0.21, + "grad_norm": 0.8661702871322632, + "learning_rate": 9.146844571912213e-06, + "loss": 0.6314, + "step": 3363 + }, + { + "epoch": 0.21, + "grad_norm": 0.9363715052604675, + "learning_rate": 9.146271260551614e-06, + "loss": 0.6431, + "step": 3364 + }, + { + "epoch": 0.21, + "grad_norm": 0.9469258785247803, + "learning_rate": 9.145697774605953e-06, + "loss": 0.6139, + "step": 3365 + }, + { + "epoch": 0.21, + "grad_norm": 0.9144854545593262, + "learning_rate": 9.145124114099382e-06, + "loss": 0.6105, + "step": 3366 + }, + { + "epoch": 0.21, + "grad_norm": 1.001625657081604, + "learning_rate": 9.144550279056055e-06, + "loss": 0.5842, + "step": 3367 + }, + { + "epoch": 0.21, + "grad_norm": 0.9497262239456177, + "learning_rate": 9.143976269500133e-06, + "loss": 0.5686, + "step": 3368 + }, + { + "epoch": 0.21, + "grad_norm": 0.9559330940246582, + "learning_rate": 9.143402085455785e-06, + "loss": 0.6098, + "step": 3369 + }, + { + "epoch": 0.21, + "grad_norm": 0.8853155374526978, + "learning_rate": 9.142827726947193e-06, + "loss": 0.6347, + "step": 3370 + }, + { + "epoch": 0.21, + "grad_norm": 0.9385725855827332, + "learning_rate": 9.142253193998533e-06, + "loss": 0.6098, + "step": 3371 + }, + { + "epoch": 0.21, + "grad_norm": 0.8995375037193298, + "learning_rate": 9.141678486634002e-06, + "loss": 0.6358, + "step": 3372 + }, + { + "epoch": 0.21, + "grad_norm": 0.8732660412788391, + "learning_rate": 9.1411036048778e-06, + "loss": 0.6316, + "step": 3373 + }, + { + "epoch": 0.21, + "grad_norm": 0.8813968896865845, + "learning_rate": 9.140528548754128e-06, + "loss": 0.6106, + "step": 3374 + }, + { + "epoch": 0.21, + "grad_norm": 0.8351157307624817, + "learning_rate": 9.139953318287204e-06, + "loss": 0.5437, + "step": 3375 + }, + { + "epoch": 0.21, + "grad_norm": 0.9750312566757202, + "learning_rate": 9.139377913501247e-06, + "loss": 0.63, + "step": 3376 + }, + { + "epoch": 0.21, + "grad_norm": 0.8909156322479248, + "learning_rate": 9.138802334420486e-06, + "loss": 0.58, + "step": 3377 + }, + { + "epoch": 0.21, + "grad_norm": 0.9233285188674927, + "learning_rate": 9.138226581069158e-06, + "loss": 0.6109, + "step": 3378 + }, + { + "epoch": 0.21, + "grad_norm": 0.9473268985748291, + "learning_rate": 9.137650653471505e-06, + "loss": 0.6883, + "step": 3379 + }, + { + "epoch": 0.21, + "grad_norm": 0.9071610569953918, + "learning_rate": 9.137074551651774e-06, + "loss": 0.6187, + "step": 3380 + }, + { + "epoch": 0.21, + "grad_norm": 0.9321165084838867, + "learning_rate": 9.136498275634226e-06, + "loss": 0.649, + "step": 3381 + }, + { + "epoch": 0.21, + "grad_norm": 0.885661780834198, + "learning_rate": 9.135921825443125e-06, + "loss": 0.6455, + "step": 3382 + }, + { + "epoch": 0.21, + "grad_norm": 0.9413583278656006, + "learning_rate": 9.135345201102745e-06, + "loss": 0.5993, + "step": 3383 + }, + { + "epoch": 0.21, + "grad_norm": 0.9824182391166687, + "learning_rate": 9.134768402637366e-06, + "loss": 0.6636, + "step": 3384 + }, + { + "epoch": 0.21, + "grad_norm": 0.8217403292655945, + "learning_rate": 9.13419143007127e-06, + "loss": 0.6168, + "step": 3385 + }, + { + "epoch": 0.21, + "grad_norm": 0.8587862253189087, + "learning_rate": 9.133614283428757e-06, + "loss": 0.58, + "step": 3386 + }, + { + "epoch": 0.21, + "grad_norm": 1.0002095699310303, + "learning_rate": 9.133036962734127e-06, + "loss": 0.6247, + "step": 3387 + }, + { + "epoch": 0.21, + "grad_norm": 0.8633260726928711, + "learning_rate": 9.132459468011686e-06, + "loss": 0.6379, + "step": 3388 + }, + { + "epoch": 0.21, + "grad_norm": 0.9585233926773071, + "learning_rate": 9.131881799285754e-06, + "loss": 0.6496, + "step": 3389 + }, + { + "epoch": 0.21, + "grad_norm": 1.0030509233474731, + "learning_rate": 9.131303956580653e-06, + "loss": 0.6768, + "step": 3390 + }, + { + "epoch": 0.21, + "grad_norm": 0.8934270143508911, + "learning_rate": 9.130725939920712e-06, + "loss": 0.5957, + "step": 3391 + }, + { + "epoch": 0.21, + "grad_norm": 0.9003897905349731, + "learning_rate": 9.130147749330275e-06, + "loss": 0.6296, + "step": 3392 + }, + { + "epoch": 0.21, + "grad_norm": 0.8862766027450562, + "learning_rate": 9.129569384833682e-06, + "loss": 0.636, + "step": 3393 + }, + { + "epoch": 0.22, + "grad_norm": 0.9102445840835571, + "learning_rate": 9.128990846455287e-06, + "loss": 0.66, + "step": 3394 + }, + { + "epoch": 0.22, + "grad_norm": 0.9600756168365479, + "learning_rate": 9.128412134219453e-06, + "loss": 0.5945, + "step": 3395 + }, + { + "epoch": 0.22, + "grad_norm": 0.9160851240158081, + "learning_rate": 9.127833248150546e-06, + "loss": 0.6609, + "step": 3396 + }, + { + "epoch": 0.22, + "grad_norm": 0.9181495904922485, + "learning_rate": 9.12725418827294e-06, + "loss": 0.5946, + "step": 3397 + }, + { + "epoch": 0.22, + "grad_norm": 0.870098888874054, + "learning_rate": 9.126674954611016e-06, + "loss": 0.6147, + "step": 3398 + }, + { + "epoch": 0.22, + "grad_norm": 0.8894675374031067, + "learning_rate": 9.12609554718917e-06, + "loss": 0.6234, + "step": 3399 + }, + { + "epoch": 0.22, + "grad_norm": 0.8688364028930664, + "learning_rate": 9.12551596603179e-06, + "loss": 0.6225, + "step": 3400 + }, + { + "epoch": 0.22, + "grad_norm": 0.8926935195922852, + "learning_rate": 9.124936211163284e-06, + "loss": 0.623, + "step": 3401 + }, + { + "epoch": 0.22, + "grad_norm": 0.8921478390693665, + "learning_rate": 9.124356282608065e-06, + "loss": 0.5813, + "step": 3402 + }, + { + "epoch": 0.22, + "grad_norm": 0.9715839624404907, + "learning_rate": 9.123776180390552e-06, + "loss": 0.6453, + "step": 3403 + }, + { + "epoch": 0.22, + "grad_norm": 0.95328289270401, + "learning_rate": 9.123195904535167e-06, + "loss": 0.5729, + "step": 3404 + }, + { + "epoch": 0.22, + "grad_norm": 0.9020276665687561, + "learning_rate": 9.122615455066348e-06, + "loss": 0.6469, + "step": 3405 + }, + { + "epoch": 0.22, + "grad_norm": 0.9650804996490479, + "learning_rate": 9.122034832008532e-06, + "loss": 0.6088, + "step": 3406 + }, + { + "epoch": 0.22, + "grad_norm": 0.9247978329658508, + "learning_rate": 9.12145403538617e-06, + "loss": 0.6298, + "step": 3407 + }, + { + "epoch": 0.22, + "grad_norm": 0.9034278988838196, + "learning_rate": 9.120873065223716e-06, + "loss": 0.6255, + "step": 3408 + }, + { + "epoch": 0.22, + "grad_norm": 0.9190613031387329, + "learning_rate": 9.120291921545633e-06, + "loss": 0.5856, + "step": 3409 + }, + { + "epoch": 0.22, + "grad_norm": 1.0126057863235474, + "learning_rate": 9.11971060437639e-06, + "loss": 0.6682, + "step": 3410 + }, + { + "epoch": 0.22, + "grad_norm": 0.9093670845031738, + "learning_rate": 9.119129113740463e-06, + "loss": 0.5967, + "step": 3411 + }, + { + "epoch": 0.22, + "grad_norm": 0.8827959299087524, + "learning_rate": 9.118547449662342e-06, + "loss": 0.6004, + "step": 3412 + }, + { + "epoch": 0.22, + "grad_norm": 0.9230462312698364, + "learning_rate": 9.117965612166514e-06, + "loss": 0.636, + "step": 3413 + }, + { + "epoch": 0.22, + "grad_norm": 0.8487642407417297, + "learning_rate": 9.117383601277478e-06, + "loss": 0.6186, + "step": 3414 + }, + { + "epoch": 0.22, + "grad_norm": 0.8755055665969849, + "learning_rate": 9.116801417019744e-06, + "loss": 0.581, + "step": 3415 + }, + { + "epoch": 0.22, + "grad_norm": 0.9088988304138184, + "learning_rate": 9.116219059417821e-06, + "loss": 0.6535, + "step": 3416 + }, + { + "epoch": 0.22, + "grad_norm": 0.9298532009124756, + "learning_rate": 9.115636528496236e-06, + "loss": 0.6107, + "step": 3417 + }, + { + "epoch": 0.22, + "grad_norm": 0.8741108775138855, + "learning_rate": 9.115053824279511e-06, + "loss": 0.6134, + "step": 3418 + }, + { + "epoch": 0.22, + "grad_norm": 0.9260140657424927, + "learning_rate": 9.114470946792187e-06, + "loss": 0.6243, + "step": 3419 + }, + { + "epoch": 0.22, + "grad_norm": 0.8403961062431335, + "learning_rate": 9.113887896058805e-06, + "loss": 0.6119, + "step": 3420 + }, + { + "epoch": 0.22, + "grad_norm": 0.9289052486419678, + "learning_rate": 9.11330467210391e-06, + "loss": 0.6269, + "step": 3421 + }, + { + "epoch": 0.22, + "grad_norm": 0.9038977026939392, + "learning_rate": 9.11272127495207e-06, + "loss": 0.5929, + "step": 3422 + }, + { + "epoch": 0.22, + "grad_norm": 0.8994903564453125, + "learning_rate": 9.112137704627842e-06, + "loss": 0.6708, + "step": 3423 + }, + { + "epoch": 0.22, + "grad_norm": 0.8599143028259277, + "learning_rate": 9.1115539611558e-06, + "loss": 0.597, + "step": 3424 + }, + { + "epoch": 0.22, + "grad_norm": 0.9443843960762024, + "learning_rate": 9.110970044560524e-06, + "loss": 0.6339, + "step": 3425 + }, + { + "epoch": 0.22, + "grad_norm": 0.8927295804023743, + "learning_rate": 9.1103859548666e-06, + "loss": 0.6615, + "step": 3426 + }, + { + "epoch": 0.22, + "grad_norm": 1.0299628973007202, + "learning_rate": 9.109801692098624e-06, + "loss": 0.6202, + "step": 3427 + }, + { + "epoch": 0.22, + "grad_norm": 0.8992637991905212, + "learning_rate": 9.109217256281196e-06, + "loss": 0.5849, + "step": 3428 + }, + { + "epoch": 0.22, + "grad_norm": 0.9295695424079895, + "learning_rate": 9.108632647438922e-06, + "loss": 0.6528, + "step": 3429 + }, + { + "epoch": 0.22, + "grad_norm": 0.9012725949287415, + "learning_rate": 9.108047865596421e-06, + "loss": 0.6073, + "step": 3430 + }, + { + "epoch": 0.22, + "grad_norm": 0.9266906380653381, + "learning_rate": 9.107462910778316e-06, + "loss": 0.5892, + "step": 3431 + }, + { + "epoch": 0.22, + "grad_norm": 0.8764145970344543, + "learning_rate": 9.106877783009236e-06, + "loss": 0.6318, + "step": 3432 + }, + { + "epoch": 0.22, + "grad_norm": 0.9359897375106812, + "learning_rate": 9.106292482313819e-06, + "loss": 0.6241, + "step": 3433 + }, + { + "epoch": 0.22, + "grad_norm": 0.8579049706459045, + "learning_rate": 9.105707008716712e-06, + "loss": 0.5882, + "step": 3434 + }, + { + "epoch": 0.22, + "grad_norm": 0.9599249362945557, + "learning_rate": 9.105121362242564e-06, + "loss": 0.6201, + "step": 3435 + }, + { + "epoch": 0.22, + "grad_norm": 0.8719608187675476, + "learning_rate": 9.104535542916035e-06, + "loss": 0.5912, + "step": 3436 + }, + { + "epoch": 0.22, + "grad_norm": 0.9004802107810974, + "learning_rate": 9.103949550761795e-06, + "loss": 0.6307, + "step": 3437 + }, + { + "epoch": 0.22, + "grad_norm": 0.9485353827476501, + "learning_rate": 9.103363385804516e-06, + "loss": 0.6454, + "step": 3438 + }, + { + "epoch": 0.22, + "grad_norm": 0.9338861703872681, + "learning_rate": 9.102777048068878e-06, + "loss": 0.6493, + "step": 3439 + }, + { + "epoch": 0.22, + "grad_norm": 0.9316098093986511, + "learning_rate": 9.102190537579572e-06, + "loss": 0.6294, + "step": 3440 + }, + { + "epoch": 0.22, + "grad_norm": 0.880497932434082, + "learning_rate": 9.101603854361291e-06, + "loss": 0.6074, + "step": 3441 + }, + { + "epoch": 0.22, + "grad_norm": 0.9129565954208374, + "learning_rate": 9.101016998438743e-06, + "loss": 0.5802, + "step": 3442 + }, + { + "epoch": 0.22, + "grad_norm": 0.9664899706840515, + "learning_rate": 9.100429969836636e-06, + "loss": 0.6579, + "step": 3443 + }, + { + "epoch": 0.22, + "grad_norm": 0.928165853023529, + "learning_rate": 9.099842768579685e-06, + "loss": 0.6038, + "step": 3444 + }, + { + "epoch": 0.22, + "grad_norm": 0.909015953540802, + "learning_rate": 9.099255394692618e-06, + "loss": 0.5444, + "step": 3445 + }, + { + "epoch": 0.22, + "grad_norm": 0.9096186757087708, + "learning_rate": 9.098667848200167e-06, + "loss": 0.5793, + "step": 3446 + }, + { + "epoch": 0.22, + "grad_norm": 0.969042956829071, + "learning_rate": 9.09808012912707e-06, + "loss": 0.6421, + "step": 3447 + }, + { + "epoch": 0.22, + "grad_norm": 0.9990017414093018, + "learning_rate": 9.097492237498076e-06, + "loss": 0.6331, + "step": 3448 + }, + { + "epoch": 0.22, + "grad_norm": 0.8431956768035889, + "learning_rate": 9.096904173337937e-06, + "loss": 0.6034, + "step": 3449 + }, + { + "epoch": 0.22, + "grad_norm": 0.967842698097229, + "learning_rate": 9.096315936671416e-06, + "loss": 0.6123, + "step": 3450 + }, + { + "epoch": 0.22, + "grad_norm": 0.9783948063850403, + "learning_rate": 9.095727527523282e-06, + "loss": 0.6611, + "step": 3451 + }, + { + "epoch": 0.22, + "grad_norm": 0.9480175971984863, + "learning_rate": 9.095138945918309e-06, + "loss": 0.6269, + "step": 3452 + }, + { + "epoch": 0.22, + "grad_norm": 0.8809651732444763, + "learning_rate": 9.094550191881281e-06, + "loss": 0.5726, + "step": 3453 + }, + { + "epoch": 0.22, + "grad_norm": 0.9356509447097778, + "learning_rate": 9.093961265436988e-06, + "loss": 0.6504, + "step": 3454 + }, + { + "epoch": 0.22, + "grad_norm": 0.8500334024429321, + "learning_rate": 9.093372166610229e-06, + "loss": 0.619, + "step": 3455 + }, + { + "epoch": 0.22, + "grad_norm": 0.8734151124954224, + "learning_rate": 9.092782895425806e-06, + "loss": 0.5817, + "step": 3456 + }, + { + "epoch": 0.22, + "grad_norm": 0.8919950723648071, + "learning_rate": 9.092193451908533e-06, + "loss": 0.6438, + "step": 3457 + }, + { + "epoch": 0.22, + "grad_norm": 0.9189222455024719, + "learning_rate": 9.091603836083231e-06, + "loss": 0.6717, + "step": 3458 + }, + { + "epoch": 0.22, + "grad_norm": 0.941829264163971, + "learning_rate": 9.091014047974725e-06, + "loss": 0.5565, + "step": 3459 + }, + { + "epoch": 0.22, + "grad_norm": 0.9333182573318481, + "learning_rate": 9.090424087607848e-06, + "loss": 0.6282, + "step": 3460 + }, + { + "epoch": 0.22, + "grad_norm": 0.8771211504936218, + "learning_rate": 9.089833955007443e-06, + "loss": 0.5849, + "step": 3461 + }, + { + "epoch": 0.22, + "grad_norm": 0.9246846437454224, + "learning_rate": 9.089243650198359e-06, + "loss": 0.6186, + "step": 3462 + }, + { + "epoch": 0.22, + "grad_norm": 0.8576235771179199, + "learning_rate": 9.088653173205449e-06, + "loss": 0.5996, + "step": 3463 + }, + { + "epoch": 0.22, + "grad_norm": 0.9263531565666199, + "learning_rate": 9.088062524053575e-06, + "loss": 0.6116, + "step": 3464 + }, + { + "epoch": 0.22, + "grad_norm": 0.8749649524688721, + "learning_rate": 9.087471702767612e-06, + "loss": 0.5922, + "step": 3465 + }, + { + "epoch": 0.22, + "grad_norm": 0.9297971725463867, + "learning_rate": 9.086880709372434e-06, + "loss": 0.6259, + "step": 3466 + }, + { + "epoch": 0.22, + "grad_norm": 0.8290271759033203, + "learning_rate": 9.086289543892928e-06, + "loss": 0.5753, + "step": 3467 + }, + { + "epoch": 0.22, + "grad_norm": 0.9221488833427429, + "learning_rate": 9.085698206353983e-06, + "loss": 0.5982, + "step": 3468 + }, + { + "epoch": 0.22, + "grad_norm": 0.8664331436157227, + "learning_rate": 9.085106696780499e-06, + "loss": 0.5829, + "step": 3469 + }, + { + "epoch": 0.22, + "grad_norm": 0.943659245967865, + "learning_rate": 9.084515015197384e-06, + "loss": 0.6722, + "step": 3470 + }, + { + "epoch": 0.22, + "grad_norm": 0.9838310480117798, + "learning_rate": 9.08392316162955e-06, + "loss": 0.6407, + "step": 3471 + }, + { + "epoch": 0.22, + "grad_norm": 0.9057297110557556, + "learning_rate": 9.083331136101921e-06, + "loss": 0.6113, + "step": 3472 + }, + { + "epoch": 0.22, + "grad_norm": 0.872379720211029, + "learning_rate": 9.08273893863942e-06, + "loss": 0.5884, + "step": 3473 + }, + { + "epoch": 0.22, + "grad_norm": 0.9110143184661865, + "learning_rate": 9.082146569266988e-06, + "loss": 0.5865, + "step": 3474 + }, + { + "epoch": 0.22, + "grad_norm": 0.9769248366355896, + "learning_rate": 9.081554028009562e-06, + "loss": 0.6642, + "step": 3475 + }, + { + "epoch": 0.22, + "grad_norm": 0.8390948176383972, + "learning_rate": 9.080961314892096e-06, + "loss": 0.6116, + "step": 3476 + }, + { + "epoch": 0.22, + "grad_norm": 0.9101285338401794, + "learning_rate": 9.080368429939546e-06, + "loss": 0.6063, + "step": 3477 + }, + { + "epoch": 0.22, + "grad_norm": 0.9952099323272705, + "learning_rate": 9.079775373176874e-06, + "loss": 0.6302, + "step": 3478 + }, + { + "epoch": 0.22, + "grad_norm": 0.9361991286277771, + "learning_rate": 9.079182144629055e-06, + "loss": 0.6237, + "step": 3479 + }, + { + "epoch": 0.22, + "grad_norm": 0.8918977975845337, + "learning_rate": 9.078588744321067e-06, + "loss": 0.5958, + "step": 3480 + }, + { + "epoch": 0.22, + "grad_norm": 0.9270057082176208, + "learning_rate": 9.077995172277894e-06, + "loss": 0.598, + "step": 3481 + }, + { + "epoch": 0.22, + "grad_norm": 0.9182881712913513, + "learning_rate": 9.07740142852453e-06, + "loss": 0.6355, + "step": 3482 + }, + { + "epoch": 0.22, + "grad_norm": 0.9537854194641113, + "learning_rate": 9.076807513085976e-06, + "loss": 0.6256, + "step": 3483 + }, + { + "epoch": 0.22, + "grad_norm": 0.9619026780128479, + "learning_rate": 9.076213425987242e-06, + "loss": 0.6517, + "step": 3484 + }, + { + "epoch": 0.22, + "grad_norm": 0.9318684339523315, + "learning_rate": 9.07561916725334e-06, + "loss": 0.6745, + "step": 3485 + }, + { + "epoch": 0.22, + "grad_norm": 0.9609551429748535, + "learning_rate": 9.075024736909292e-06, + "loss": 0.6062, + "step": 3486 + }, + { + "epoch": 0.22, + "grad_norm": 0.9244940280914307, + "learning_rate": 9.074430134980129e-06, + "loss": 0.6348, + "step": 3487 + }, + { + "epoch": 0.22, + "grad_norm": 0.9906083941459656, + "learning_rate": 9.073835361490885e-06, + "loss": 0.6681, + "step": 3488 + }, + { + "epoch": 0.22, + "grad_norm": 0.9201457500457764, + "learning_rate": 9.073240416466609e-06, + "loss": 0.6429, + "step": 3489 + }, + { + "epoch": 0.22, + "grad_norm": 0.8737314939498901, + "learning_rate": 9.072645299932347e-06, + "loss": 0.6151, + "step": 3490 + }, + { + "epoch": 0.22, + "grad_norm": 0.8806108832359314, + "learning_rate": 9.07205001191316e-06, + "loss": 0.6441, + "step": 3491 + }, + { + "epoch": 0.22, + "grad_norm": 0.8998177647590637, + "learning_rate": 9.071454552434111e-06, + "loss": 0.6407, + "step": 3492 + }, + { + "epoch": 0.22, + "grad_norm": 0.8950275778770447, + "learning_rate": 9.070858921520276e-06, + "loss": 0.6341, + "step": 3493 + }, + { + "epoch": 0.22, + "grad_norm": 0.8834101557731628, + "learning_rate": 9.070263119196734e-06, + "loss": 0.6065, + "step": 3494 + }, + { + "epoch": 0.22, + "grad_norm": 0.9296960830688477, + "learning_rate": 9.06966714548857e-06, + "loss": 0.6307, + "step": 3495 + }, + { + "epoch": 0.22, + "grad_norm": 0.8565431833267212, + "learning_rate": 9.069071000420879e-06, + "loss": 0.607, + "step": 3496 + }, + { + "epoch": 0.22, + "grad_norm": 0.9660019874572754, + "learning_rate": 9.068474684018765e-06, + "loss": 0.5671, + "step": 3497 + }, + { + "epoch": 0.22, + "grad_norm": 0.9031816124916077, + "learning_rate": 9.067878196307334e-06, + "loss": 0.6158, + "step": 3498 + }, + { + "epoch": 0.22, + "grad_norm": 0.8707241415977478, + "learning_rate": 9.067281537311705e-06, + "loss": 0.5942, + "step": 3499 + }, + { + "epoch": 0.22, + "grad_norm": 0.9110444188117981, + "learning_rate": 9.066684707056999e-06, + "loss": 0.5835, + "step": 3500 + }, + { + "epoch": 0.22, + "grad_norm": 0.9043798446655273, + "learning_rate": 9.066087705568346e-06, + "loss": 0.6047, + "step": 3501 + }, + { + "epoch": 0.22, + "grad_norm": 0.9171016216278076, + "learning_rate": 9.065490532870884e-06, + "loss": 0.5593, + "step": 3502 + }, + { + "epoch": 0.22, + "grad_norm": 0.9416684508323669, + "learning_rate": 9.06489318898976e-06, + "loss": 0.6465, + "step": 3503 + }, + { + "epoch": 0.22, + "grad_norm": 0.9238849878311157, + "learning_rate": 9.064295673950125e-06, + "loss": 0.653, + "step": 3504 + }, + { + "epoch": 0.22, + "grad_norm": 0.9581873416900635, + "learning_rate": 9.063697987777136e-06, + "loss": 0.6547, + "step": 3505 + }, + { + "epoch": 0.22, + "grad_norm": 0.907537579536438, + "learning_rate": 9.063100130495962e-06, + "loss": 0.6362, + "step": 3506 + }, + { + "epoch": 0.22, + "grad_norm": 0.8580865859985352, + "learning_rate": 9.062502102131777e-06, + "loss": 0.6312, + "step": 3507 + }, + { + "epoch": 0.22, + "grad_norm": 0.9068456888198853, + "learning_rate": 9.06190390270976e-06, + "loss": 0.6583, + "step": 3508 + }, + { + "epoch": 0.22, + "grad_norm": 0.8349429368972778, + "learning_rate": 9.0613055322551e-06, + "loss": 0.6388, + "step": 3509 + }, + { + "epoch": 0.22, + "grad_norm": 0.8973667621612549, + "learning_rate": 9.060706990792993e-06, + "loss": 0.6076, + "step": 3510 + }, + { + "epoch": 0.22, + "grad_norm": 0.8447120189666748, + "learning_rate": 9.06010827834864e-06, + "loss": 0.6158, + "step": 3511 + }, + { + "epoch": 0.22, + "grad_norm": 0.8853378295898438, + "learning_rate": 9.059509394947252e-06, + "loss": 0.6026, + "step": 3512 + }, + { + "epoch": 0.22, + "grad_norm": 0.9272050857543945, + "learning_rate": 9.058910340614045e-06, + "loss": 0.6184, + "step": 3513 + }, + { + "epoch": 0.22, + "grad_norm": 0.8689481616020203, + "learning_rate": 9.058311115374244e-06, + "loss": 0.6424, + "step": 3514 + }, + { + "epoch": 0.22, + "grad_norm": 0.8084876537322998, + "learning_rate": 9.057711719253077e-06, + "loss": 0.5953, + "step": 3515 + }, + { + "epoch": 0.22, + "grad_norm": 0.9154835343360901, + "learning_rate": 9.057112152275788e-06, + "loss": 0.6471, + "step": 3516 + }, + { + "epoch": 0.22, + "grad_norm": 0.9204840660095215, + "learning_rate": 9.05651241446762e-06, + "loss": 0.6131, + "step": 3517 + }, + { + "epoch": 0.22, + "grad_norm": 0.8655226826667786, + "learning_rate": 9.055912505853826e-06, + "loss": 0.577, + "step": 3518 + }, + { + "epoch": 0.22, + "grad_norm": 0.9272779226303101, + "learning_rate": 9.055312426459663e-06, + "loss": 0.6352, + "step": 3519 + }, + { + "epoch": 0.22, + "grad_norm": 0.955590009689331, + "learning_rate": 9.054712176310405e-06, + "loss": 0.7019, + "step": 3520 + }, + { + "epoch": 0.22, + "grad_norm": 0.898430585861206, + "learning_rate": 9.05411175543132e-06, + "loss": 0.5869, + "step": 3521 + }, + { + "epoch": 0.22, + "grad_norm": 0.908953845500946, + "learning_rate": 9.053511163847694e-06, + "loss": 0.5677, + "step": 3522 + }, + { + "epoch": 0.22, + "grad_norm": 0.9534192085266113, + "learning_rate": 9.052910401584812e-06, + "loss": 0.6819, + "step": 3523 + }, + { + "epoch": 0.22, + "grad_norm": 0.8306724429130554, + "learning_rate": 9.052309468667974e-06, + "loss": 0.591, + "step": 3524 + }, + { + "epoch": 0.22, + "grad_norm": 0.7923970818519592, + "learning_rate": 9.05170836512248e-06, + "loss": 0.5629, + "step": 3525 + }, + { + "epoch": 0.22, + "grad_norm": 0.9316359162330627, + "learning_rate": 9.051107090973642e-06, + "loss": 0.6095, + "step": 3526 + }, + { + "epoch": 0.22, + "grad_norm": 0.9419963359832764, + "learning_rate": 9.050505646246777e-06, + "loss": 0.6759, + "step": 3527 + }, + { + "epoch": 0.22, + "grad_norm": 0.8494296669960022, + "learning_rate": 9.04990403096721e-06, + "loss": 0.582, + "step": 3528 + }, + { + "epoch": 0.22, + "grad_norm": 0.9111973643302917, + "learning_rate": 9.049302245160273e-06, + "loss": 0.6585, + "step": 3529 + }, + { + "epoch": 0.22, + "grad_norm": 0.906576931476593, + "learning_rate": 9.048700288851305e-06, + "loss": 0.6302, + "step": 3530 + }, + { + "epoch": 0.22, + "grad_norm": 0.8784658312797546, + "learning_rate": 9.048098162065652e-06, + "loss": 0.653, + "step": 3531 + }, + { + "epoch": 0.22, + "grad_norm": 0.8768582344055176, + "learning_rate": 9.047495864828668e-06, + "loss": 0.6133, + "step": 3532 + }, + { + "epoch": 0.22, + "grad_norm": 0.8660056591033936, + "learning_rate": 9.046893397165713e-06, + "loss": 0.6245, + "step": 3533 + }, + { + "epoch": 0.22, + "grad_norm": 0.923427939414978, + "learning_rate": 9.046290759102155e-06, + "loss": 0.6139, + "step": 3534 + }, + { + "epoch": 0.22, + "grad_norm": 0.8469942212104797, + "learning_rate": 9.04568795066337e-06, + "loss": 0.5996, + "step": 3535 + }, + { + "epoch": 0.22, + "grad_norm": 0.9075682759284973, + "learning_rate": 9.045084971874738e-06, + "loss": 0.6336, + "step": 3536 + }, + { + "epoch": 0.22, + "grad_norm": 0.9033473134040833, + "learning_rate": 9.044481822761651e-06, + "loss": 0.6277, + "step": 3537 + }, + { + "epoch": 0.22, + "grad_norm": 0.9756919145584106, + "learning_rate": 9.043878503349503e-06, + "loss": 0.6174, + "step": 3538 + }, + { + "epoch": 0.22, + "grad_norm": 0.8659248352050781, + "learning_rate": 9.043275013663699e-06, + "loss": 0.5844, + "step": 3539 + }, + { + "epoch": 0.22, + "grad_norm": 0.9130862355232239, + "learning_rate": 9.04267135372965e-06, + "loss": 0.6517, + "step": 3540 + }, + { + "epoch": 0.22, + "grad_norm": 0.9586864709854126, + "learning_rate": 9.042067523572775e-06, + "loss": 0.6081, + "step": 3541 + }, + { + "epoch": 0.22, + "grad_norm": 0.9088827967643738, + "learning_rate": 9.041463523218496e-06, + "loss": 0.6973, + "step": 3542 + }, + { + "epoch": 0.22, + "grad_norm": 0.9386407136917114, + "learning_rate": 9.040859352692249e-06, + "loss": 0.6771, + "step": 3543 + }, + { + "epoch": 0.22, + "grad_norm": 0.9016104340553284, + "learning_rate": 9.04025501201947e-06, + "loss": 0.6504, + "step": 3544 + }, + { + "epoch": 0.22, + "grad_norm": 0.8565789461135864, + "learning_rate": 9.039650501225608e-06, + "loss": 0.6128, + "step": 3545 + }, + { + "epoch": 0.22, + "grad_norm": 0.8813103437423706, + "learning_rate": 9.039045820336116e-06, + "loss": 0.6283, + "step": 3546 + }, + { + "epoch": 0.22, + "grad_norm": 0.883348286151886, + "learning_rate": 9.038440969376456e-06, + "loss": 0.6106, + "step": 3547 + }, + { + "epoch": 0.22, + "grad_norm": 0.8444504737854004, + "learning_rate": 9.037835948372095e-06, + "loss": 0.5763, + "step": 3548 + }, + { + "epoch": 0.22, + "grad_norm": 0.856566846370697, + "learning_rate": 9.03723075734851e-06, + "loss": 0.601, + "step": 3549 + }, + { + "epoch": 0.22, + "grad_norm": 0.9262292385101318, + "learning_rate": 9.03662539633118e-06, + "loss": 0.5976, + "step": 3550 + }, + { + "epoch": 0.22, + "grad_norm": 0.931098461151123, + "learning_rate": 9.0360198653456e-06, + "loss": 0.6391, + "step": 3551 + }, + { + "epoch": 0.23, + "grad_norm": 0.9265716075897217, + "learning_rate": 9.035414164417262e-06, + "loss": 0.5899, + "step": 3552 + }, + { + "epoch": 0.23, + "grad_norm": 0.9725390672683716, + "learning_rate": 9.034808293571672e-06, + "loss": 0.6615, + "step": 3553 + }, + { + "epoch": 0.23, + "grad_norm": 0.9248775839805603, + "learning_rate": 9.03420225283434e-06, + "loss": 0.6472, + "step": 3554 + }, + { + "epoch": 0.23, + "grad_norm": 0.851396381855011, + "learning_rate": 9.033596042230788e-06, + "loss": 0.5497, + "step": 3555 + }, + { + "epoch": 0.23, + "grad_norm": 0.9172872304916382, + "learning_rate": 9.032989661786535e-06, + "loss": 0.65, + "step": 3556 + }, + { + "epoch": 0.23, + "grad_norm": 0.8770195841789246, + "learning_rate": 9.032383111527119e-06, + "loss": 0.6253, + "step": 3557 + }, + { + "epoch": 0.23, + "grad_norm": 0.9005029201507568, + "learning_rate": 9.031776391478077e-06, + "loss": 0.631, + "step": 3558 + }, + { + "epoch": 0.23, + "grad_norm": 0.8701792359352112, + "learning_rate": 9.031169501664958e-06, + "loss": 0.6235, + "step": 3559 + }, + { + "epoch": 0.23, + "grad_norm": 0.9129980206489563, + "learning_rate": 9.030562442113313e-06, + "loss": 0.6273, + "step": 3560 + }, + { + "epoch": 0.23, + "grad_norm": 0.9068407416343689, + "learning_rate": 9.029955212848706e-06, + "loss": 0.6408, + "step": 3561 + }, + { + "epoch": 0.23, + "grad_norm": 0.9272667169570923, + "learning_rate": 9.029347813896704e-06, + "loss": 0.5862, + "step": 3562 + }, + { + "epoch": 0.23, + "grad_norm": 0.8602524399757385, + "learning_rate": 9.028740245282881e-06, + "loss": 0.6004, + "step": 3563 + }, + { + "epoch": 0.23, + "grad_norm": 0.9108449220657349, + "learning_rate": 9.028132507032823e-06, + "loss": 0.6113, + "step": 3564 + }, + { + "epoch": 0.23, + "grad_norm": 0.8397127985954285, + "learning_rate": 9.027524599172117e-06, + "loss": 0.601, + "step": 3565 + }, + { + "epoch": 0.23, + "grad_norm": 0.9540258049964905, + "learning_rate": 9.026916521726361e-06, + "loss": 0.6869, + "step": 3566 + }, + { + "epoch": 0.23, + "grad_norm": 0.9084812998771667, + "learning_rate": 9.026308274721161e-06, + "loss": 0.5817, + "step": 3567 + }, + { + "epoch": 0.23, + "grad_norm": 0.9301480650901794, + "learning_rate": 9.025699858182125e-06, + "loss": 0.5917, + "step": 3568 + }, + { + "epoch": 0.23, + "grad_norm": 0.8542090058326721, + "learning_rate": 9.02509127213487e-06, + "loss": 0.6182, + "step": 3569 + }, + { + "epoch": 0.23, + "grad_norm": 0.8809559941291809, + "learning_rate": 9.024482516605026e-06, + "loss": 0.5781, + "step": 3570 + }, + { + "epoch": 0.23, + "grad_norm": 0.9583331346511841, + "learning_rate": 9.023873591618224e-06, + "loss": 0.6249, + "step": 3571 + }, + { + "epoch": 0.23, + "grad_norm": 0.9110972881317139, + "learning_rate": 9.023264497200102e-06, + "loss": 0.624, + "step": 3572 + }, + { + "epoch": 0.23, + "grad_norm": 0.876470685005188, + "learning_rate": 9.022655233376308e-06, + "loss": 0.6066, + "step": 3573 + }, + { + "epoch": 0.23, + "grad_norm": 0.8327741622924805, + "learning_rate": 9.022045800172493e-06, + "loss": 0.592, + "step": 3574 + }, + { + "epoch": 0.23, + "grad_norm": 0.9016212821006775, + "learning_rate": 9.021436197614326e-06, + "loss": 0.5999, + "step": 3575 + }, + { + "epoch": 0.23, + "grad_norm": 0.9393583536148071, + "learning_rate": 9.020826425727468e-06, + "loss": 0.6292, + "step": 3576 + }, + { + "epoch": 0.23, + "grad_norm": 0.8916171193122864, + "learning_rate": 9.020216484537595e-06, + "loss": 0.6681, + "step": 3577 + }, + { + "epoch": 0.23, + "grad_norm": 0.9707697629928589, + "learning_rate": 9.019606374070394e-06, + "loss": 0.6506, + "step": 3578 + }, + { + "epoch": 0.23, + "grad_norm": 0.9742267727851868, + "learning_rate": 9.01899609435155e-06, + "loss": 0.6432, + "step": 3579 + }, + { + "epoch": 0.23, + "grad_norm": 0.9248902201652527, + "learning_rate": 9.018385645406765e-06, + "loss": 0.627, + "step": 3580 + }, + { + "epoch": 0.23, + "grad_norm": 0.8701397180557251, + "learning_rate": 9.017775027261735e-06, + "loss": 0.6343, + "step": 3581 + }, + { + "epoch": 0.23, + "grad_norm": 0.8465285897254944, + "learning_rate": 9.017164239942178e-06, + "loss": 0.6101, + "step": 3582 + }, + { + "epoch": 0.23, + "grad_norm": 0.8863876461982727, + "learning_rate": 9.016553283473808e-06, + "loss": 0.6401, + "step": 3583 + }, + { + "epoch": 0.23, + "grad_norm": 0.8480295538902283, + "learning_rate": 9.015942157882353e-06, + "loss": 0.5718, + "step": 3584 + }, + { + "epoch": 0.23, + "grad_norm": 0.8785873055458069, + "learning_rate": 9.015330863193543e-06, + "loss": 0.6074, + "step": 3585 + }, + { + "epoch": 0.23, + "grad_norm": 0.8759261965751648, + "learning_rate": 9.01471939943312e-06, + "loss": 0.6138, + "step": 3586 + }, + { + "epoch": 0.23, + "grad_norm": 0.8847134113311768, + "learning_rate": 9.014107766626828e-06, + "loss": 0.5651, + "step": 3587 + }, + { + "epoch": 0.23, + "grad_norm": 0.8662316203117371, + "learning_rate": 9.013495964800423e-06, + "loss": 0.6643, + "step": 3588 + }, + { + "epoch": 0.23, + "grad_norm": 0.8825305700302124, + "learning_rate": 9.012883993979663e-06, + "loss": 0.7025, + "step": 3589 + }, + { + "epoch": 0.23, + "grad_norm": 0.8754686713218689, + "learning_rate": 9.01227185419032e-06, + "loss": 0.6276, + "step": 3590 + }, + { + "epoch": 0.23, + "grad_norm": 0.9244438409805298, + "learning_rate": 9.011659545458167e-06, + "loss": 0.5912, + "step": 3591 + }, + { + "epoch": 0.23, + "grad_norm": 0.9278584718704224, + "learning_rate": 9.011047067808985e-06, + "loss": 0.6441, + "step": 3592 + }, + { + "epoch": 0.23, + "grad_norm": 0.853956401348114, + "learning_rate": 9.010434421268564e-06, + "loss": 0.5881, + "step": 3593 + }, + { + "epoch": 0.23, + "grad_norm": 0.903804361820221, + "learning_rate": 9.009821605862701e-06, + "loss": 0.6268, + "step": 3594 + }, + { + "epoch": 0.23, + "grad_norm": 0.884956955909729, + "learning_rate": 9.0092086216172e-06, + "loss": 0.5976, + "step": 3595 + }, + { + "epoch": 0.23, + "grad_norm": 0.8600631952285767, + "learning_rate": 9.00859546855787e-06, + "loss": 0.5976, + "step": 3596 + }, + { + "epoch": 0.23, + "grad_norm": 0.8109932541847229, + "learning_rate": 9.007982146710533e-06, + "loss": 0.5807, + "step": 3597 + }, + { + "epoch": 0.23, + "grad_norm": 0.8790200352668762, + "learning_rate": 9.007368656101006e-06, + "loss": 0.6335, + "step": 3598 + }, + { + "epoch": 0.23, + "grad_norm": 0.8840540647506714, + "learning_rate": 9.006754996755129e-06, + "loss": 0.5932, + "step": 3599 + }, + { + "epoch": 0.23, + "grad_norm": 0.9371446967124939, + "learning_rate": 9.006141168698735e-06, + "loss": 0.6723, + "step": 3600 + }, + { + "epoch": 0.23, + "grad_norm": 0.8178922533988953, + "learning_rate": 9.005527171957676e-06, + "loss": 0.5882, + "step": 3601 + }, + { + "epoch": 0.23, + "grad_norm": 0.9067853093147278, + "learning_rate": 9.004913006557798e-06, + "loss": 0.6432, + "step": 3602 + }, + { + "epoch": 0.23, + "grad_norm": 0.8906139135360718, + "learning_rate": 9.004298672524967e-06, + "loss": 0.6492, + "step": 3603 + }, + { + "epoch": 0.23, + "grad_norm": 0.8456130623817444, + "learning_rate": 9.003684169885049e-06, + "loss": 0.6127, + "step": 3604 + }, + { + "epoch": 0.23, + "grad_norm": 0.8719025254249573, + "learning_rate": 9.00306949866392e-06, + "loss": 0.6, + "step": 3605 + }, + { + "epoch": 0.23, + "grad_norm": 0.8929893970489502, + "learning_rate": 9.002454658887458e-06, + "loss": 0.5915, + "step": 3606 + }, + { + "epoch": 0.23, + "grad_norm": 0.9277382493019104, + "learning_rate": 9.001839650581554e-06, + "loss": 0.6316, + "step": 3607 + }, + { + "epoch": 0.23, + "grad_norm": 0.9326600432395935, + "learning_rate": 9.001224473772104e-06, + "loss": 0.6662, + "step": 3608 + }, + { + "epoch": 0.23, + "grad_norm": 0.8711685538291931, + "learning_rate": 9.000609128485011e-06, + "loss": 0.5557, + "step": 3609 + }, + { + "epoch": 0.23, + "grad_norm": 0.9938933849334717, + "learning_rate": 8.999993614746184e-06, + "loss": 0.6923, + "step": 3610 + }, + { + "epoch": 0.23, + "grad_norm": 0.8392737507820129, + "learning_rate": 8.999377932581541e-06, + "loss": 0.5789, + "step": 3611 + }, + { + "epoch": 0.23, + "grad_norm": 0.9159629344940186, + "learning_rate": 8.998762082017006e-06, + "loss": 0.6179, + "step": 3612 + }, + { + "epoch": 0.23, + "grad_norm": 0.9216225743293762, + "learning_rate": 8.998146063078512e-06, + "loss": 0.6627, + "step": 3613 + }, + { + "epoch": 0.23, + "grad_norm": 0.8778311610221863, + "learning_rate": 8.997529875791993e-06, + "loss": 0.6039, + "step": 3614 + }, + { + "epoch": 0.23, + "grad_norm": 0.9303637742996216, + "learning_rate": 8.9969135201834e-06, + "loss": 0.6187, + "step": 3615 + }, + { + "epoch": 0.23, + "grad_norm": 0.9529017806053162, + "learning_rate": 8.996296996278682e-06, + "loss": 0.6698, + "step": 3616 + }, + { + "epoch": 0.23, + "grad_norm": 0.8703224658966064, + "learning_rate": 8.9956803041038e-06, + "loss": 0.6047, + "step": 3617 + }, + { + "epoch": 0.23, + "grad_norm": 0.8795974254608154, + "learning_rate": 8.99506344368472e-06, + "loss": 0.5653, + "step": 3618 + }, + { + "epoch": 0.23, + "grad_norm": 0.8557493090629578, + "learning_rate": 8.994446415047415e-06, + "loss": 0.5735, + "step": 3619 + }, + { + "epoch": 0.23, + "grad_norm": 0.8863241672515869, + "learning_rate": 8.993829218217867e-06, + "loss": 0.6194, + "step": 3620 + }, + { + "epoch": 0.23, + "grad_norm": 0.8855205178260803, + "learning_rate": 8.993211853222065e-06, + "loss": 0.6443, + "step": 3621 + }, + { + "epoch": 0.23, + "grad_norm": 0.9319906830787659, + "learning_rate": 8.992594320086005e-06, + "loss": 0.6551, + "step": 3622 + }, + { + "epoch": 0.23, + "grad_norm": 0.863646924495697, + "learning_rate": 8.991976618835685e-06, + "loss": 0.6152, + "step": 3623 + }, + { + "epoch": 0.23, + "grad_norm": 0.9434888362884521, + "learning_rate": 8.991358749497117e-06, + "loss": 0.6381, + "step": 3624 + }, + { + "epoch": 0.23, + "grad_norm": 0.9003688097000122, + "learning_rate": 8.990740712096317e-06, + "loss": 0.6295, + "step": 3625 + }, + { + "epoch": 0.23, + "grad_norm": 0.8399546146392822, + "learning_rate": 8.99012250665931e-06, + "loss": 0.5874, + "step": 3626 + }, + { + "epoch": 0.23, + "grad_norm": 0.9593385457992554, + "learning_rate": 8.989504133212123e-06, + "loss": 0.7235, + "step": 3627 + }, + { + "epoch": 0.23, + "grad_norm": 0.8997763991355896, + "learning_rate": 8.988885591780795e-06, + "loss": 0.6178, + "step": 3628 + }, + { + "epoch": 0.23, + "grad_norm": 0.888486385345459, + "learning_rate": 8.988266882391374e-06, + "loss": 0.6022, + "step": 3629 + }, + { + "epoch": 0.23, + "grad_norm": 0.8956373929977417, + "learning_rate": 8.987648005069907e-06, + "loss": 0.6401, + "step": 3630 + }, + { + "epoch": 0.23, + "grad_norm": 0.9338024854660034, + "learning_rate": 8.987028959842454e-06, + "loss": 0.6308, + "step": 3631 + }, + { + "epoch": 0.23, + "grad_norm": 0.9498031139373779, + "learning_rate": 8.986409746735084e-06, + "loss": 0.5903, + "step": 3632 + }, + { + "epoch": 0.23, + "grad_norm": 0.9222273826599121, + "learning_rate": 8.985790365773864e-06, + "loss": 0.6238, + "step": 3633 + }, + { + "epoch": 0.23, + "grad_norm": 0.8916066884994507, + "learning_rate": 8.985170816984878e-06, + "loss": 0.5938, + "step": 3634 + }, + { + "epoch": 0.23, + "grad_norm": 0.901877760887146, + "learning_rate": 8.984551100394212e-06, + "loss": 0.6308, + "step": 3635 + }, + { + "epoch": 0.23, + "grad_norm": 0.9297860264778137, + "learning_rate": 8.98393121602796e-06, + "loss": 0.6093, + "step": 3636 + }, + { + "epoch": 0.23, + "grad_norm": 0.9041366577148438, + "learning_rate": 8.983311163912227e-06, + "loss": 0.5895, + "step": 3637 + }, + { + "epoch": 0.23, + "grad_norm": 0.9007093906402588, + "learning_rate": 8.982690944073113e-06, + "loss": 0.5988, + "step": 3638 + }, + { + "epoch": 0.23, + "grad_norm": 0.8943149447441101, + "learning_rate": 8.982070556536741e-06, + "loss": 0.6197, + "step": 3639 + }, + { + "epoch": 0.23, + "grad_norm": 1.2114888429641724, + "learning_rate": 8.98145000132923e-06, + "loss": 0.639, + "step": 3640 + }, + { + "epoch": 0.23, + "grad_norm": 0.8697226047515869, + "learning_rate": 8.980829278476711e-06, + "loss": 0.612, + "step": 3641 + }, + { + "epoch": 0.23, + "grad_norm": 0.9321666359901428, + "learning_rate": 8.980208388005318e-06, + "loss": 0.6106, + "step": 3642 + }, + { + "epoch": 0.23, + "grad_norm": 0.8679060935974121, + "learning_rate": 8.979587329941197e-06, + "loss": 0.635, + "step": 3643 + }, + { + "epoch": 0.23, + "grad_norm": 0.8968499898910522, + "learning_rate": 8.978966104310497e-06, + "loss": 0.6707, + "step": 3644 + }, + { + "epoch": 0.23, + "grad_norm": 0.8723733425140381, + "learning_rate": 8.978344711139374e-06, + "loss": 0.5988, + "step": 3645 + }, + { + "epoch": 0.23, + "grad_norm": 0.9933862090110779, + "learning_rate": 8.977723150453999e-06, + "loss": 0.6475, + "step": 3646 + }, + { + "epoch": 0.23, + "grad_norm": 0.9089076519012451, + "learning_rate": 8.977101422280536e-06, + "loss": 0.6124, + "step": 3647 + }, + { + "epoch": 0.23, + "grad_norm": 0.8408138155937195, + "learning_rate": 8.97647952664517e-06, + "loss": 0.5773, + "step": 3648 + }, + { + "epoch": 0.23, + "grad_norm": 0.8604863882064819, + "learning_rate": 8.975857463574082e-06, + "loss": 0.5365, + "step": 3649 + }, + { + "epoch": 0.23, + "grad_norm": 0.8891851902008057, + "learning_rate": 8.97523523309347e-06, + "loss": 0.5713, + "step": 3650 + }, + { + "epoch": 0.23, + "grad_norm": 0.8845424652099609, + "learning_rate": 8.974612835229528e-06, + "loss": 0.6176, + "step": 3651 + }, + { + "epoch": 0.23, + "grad_norm": 0.8516875505447388, + "learning_rate": 8.973990270008467e-06, + "loss": 0.6297, + "step": 3652 + }, + { + "epoch": 0.23, + "grad_norm": 0.8722536563873291, + "learning_rate": 8.973367537456502e-06, + "loss": 0.6413, + "step": 3653 + }, + { + "epoch": 0.23, + "grad_norm": 0.9590871334075928, + "learning_rate": 8.97274463759985e-06, + "loss": 0.6862, + "step": 3654 + }, + { + "epoch": 0.23, + "grad_norm": 0.952622652053833, + "learning_rate": 8.972121570464744e-06, + "loss": 0.6474, + "step": 3655 + }, + { + "epoch": 0.23, + "grad_norm": 0.8545388579368591, + "learning_rate": 8.971498336077415e-06, + "loss": 0.6003, + "step": 3656 + }, + { + "epoch": 0.23, + "grad_norm": 0.9351946115493774, + "learning_rate": 8.970874934464108e-06, + "loss": 0.6054, + "step": 3657 + }, + { + "epoch": 0.23, + "grad_norm": 0.8703538179397583, + "learning_rate": 8.970251365651071e-06, + "loss": 0.6466, + "step": 3658 + }, + { + "epoch": 0.23, + "grad_norm": 0.908089816570282, + "learning_rate": 8.969627629664559e-06, + "loss": 0.5536, + "step": 3659 + }, + { + "epoch": 0.23, + "grad_norm": 0.8776571154594421, + "learning_rate": 8.969003726530838e-06, + "loss": 0.6136, + "step": 3660 + }, + { + "epoch": 0.23, + "grad_norm": 0.901607096195221, + "learning_rate": 8.968379656276177e-06, + "loss": 0.6526, + "step": 3661 + }, + { + "epoch": 0.23, + "grad_norm": 0.9027665853500366, + "learning_rate": 8.967755418926854e-06, + "loss": 0.6117, + "step": 3662 + }, + { + "epoch": 0.23, + "grad_norm": 0.8891413807868958, + "learning_rate": 8.967131014509152e-06, + "loss": 0.586, + "step": 3663 + }, + { + "epoch": 0.23, + "grad_norm": 0.9754297137260437, + "learning_rate": 8.966506443049366e-06, + "loss": 0.6608, + "step": 3664 + }, + { + "epoch": 0.23, + "grad_norm": 0.9024408459663391, + "learning_rate": 8.965881704573789e-06, + "loss": 0.6286, + "step": 3665 + }, + { + "epoch": 0.23, + "grad_norm": 1.0116610527038574, + "learning_rate": 8.965256799108733e-06, + "loss": 0.6086, + "step": 3666 + }, + { + "epoch": 0.23, + "grad_norm": 0.9310095310211182, + "learning_rate": 8.964631726680504e-06, + "loss": 0.6619, + "step": 3667 + }, + { + "epoch": 0.23, + "grad_norm": 0.9279587268829346, + "learning_rate": 8.964006487315426e-06, + "loss": 0.6823, + "step": 3668 + }, + { + "epoch": 0.23, + "grad_norm": 0.8194244503974915, + "learning_rate": 8.963381081039826e-06, + "loss": 0.6129, + "step": 3669 + }, + { + "epoch": 0.23, + "grad_norm": 0.8662837147712708, + "learning_rate": 8.962755507880036e-06, + "loss": 0.6013, + "step": 3670 + }, + { + "epoch": 0.23, + "grad_norm": 0.8568682074546814, + "learning_rate": 8.962129767862395e-06, + "loss": 0.6053, + "step": 3671 + }, + { + "epoch": 0.23, + "grad_norm": 0.9012435078620911, + "learning_rate": 8.961503861013255e-06, + "loss": 0.6542, + "step": 3672 + }, + { + "epoch": 0.23, + "grad_norm": 0.8900840282440186, + "learning_rate": 8.960877787358968e-06, + "loss": 0.6021, + "step": 3673 + }, + { + "epoch": 0.23, + "grad_norm": 0.9452951550483704, + "learning_rate": 8.960251546925895e-06, + "loss": 0.6342, + "step": 3674 + }, + { + "epoch": 0.23, + "grad_norm": 0.7982982397079468, + "learning_rate": 8.959625139740407e-06, + "loss": 0.5353, + "step": 3675 + }, + { + "epoch": 0.23, + "grad_norm": 0.9291501045227051, + "learning_rate": 8.95899856582888e-06, + "loss": 0.6417, + "step": 3676 + }, + { + "epoch": 0.23, + "grad_norm": 0.8522927761077881, + "learning_rate": 8.958371825217693e-06, + "loss": 0.5758, + "step": 3677 + }, + { + "epoch": 0.23, + "grad_norm": 0.8960750699043274, + "learning_rate": 8.957744917933241e-06, + "loss": 0.5945, + "step": 3678 + }, + { + "epoch": 0.23, + "grad_norm": 0.8411138653755188, + "learning_rate": 8.957117844001919e-06, + "loss": 0.6068, + "step": 3679 + }, + { + "epoch": 0.23, + "grad_norm": 0.9141689538955688, + "learning_rate": 8.956490603450128e-06, + "loss": 0.6117, + "step": 3680 + }, + { + "epoch": 0.23, + "grad_norm": 0.9008049964904785, + "learning_rate": 8.955863196304282e-06, + "loss": 0.6095, + "step": 3681 + }, + { + "epoch": 0.23, + "grad_norm": 0.9140220284461975, + "learning_rate": 8.9552356225908e-06, + "loss": 0.5998, + "step": 3682 + }, + { + "epoch": 0.23, + "grad_norm": 0.8372965455055237, + "learning_rate": 8.954607882336105e-06, + "loss": 0.5772, + "step": 3683 + }, + { + "epoch": 0.23, + "grad_norm": 0.9582294225692749, + "learning_rate": 8.953979975566626e-06, + "loss": 0.6542, + "step": 3684 + }, + { + "epoch": 0.23, + "grad_norm": 0.9331498146057129, + "learning_rate": 8.953351902308807e-06, + "loss": 0.6334, + "step": 3685 + }, + { + "epoch": 0.23, + "grad_norm": 0.9214125871658325, + "learning_rate": 8.952723662589093e-06, + "loss": 0.6551, + "step": 3686 + }, + { + "epoch": 0.23, + "grad_norm": 0.9166949987411499, + "learning_rate": 8.952095256433934e-06, + "loss": 0.6552, + "step": 3687 + }, + { + "epoch": 0.23, + "grad_norm": 0.9024720191955566, + "learning_rate": 8.951466683869795e-06, + "loss": 0.5499, + "step": 3688 + }, + { + "epoch": 0.23, + "grad_norm": 0.8689588308334351, + "learning_rate": 8.950837944923138e-06, + "loss": 0.5789, + "step": 3689 + }, + { + "epoch": 0.23, + "grad_norm": 0.9102311134338379, + "learning_rate": 8.95020903962044e-06, + "loss": 0.6514, + "step": 3690 + }, + { + "epoch": 0.23, + "grad_norm": 0.9357526302337646, + "learning_rate": 8.94957996798818e-06, + "loss": 0.608, + "step": 3691 + }, + { + "epoch": 0.23, + "grad_norm": 0.8915140628814697, + "learning_rate": 8.948950730052847e-06, + "loss": 0.6221, + "step": 3692 + }, + { + "epoch": 0.23, + "grad_norm": 0.9464169144630432, + "learning_rate": 8.948321325840937e-06, + "loss": 0.6701, + "step": 3693 + }, + { + "epoch": 0.23, + "grad_norm": 0.9279240965843201, + "learning_rate": 8.94769175537895e-06, + "loss": 0.6052, + "step": 3694 + }, + { + "epoch": 0.23, + "grad_norm": 0.9310309886932373, + "learning_rate": 8.9470620186934e-06, + "loss": 0.6523, + "step": 3695 + }, + { + "epoch": 0.23, + "grad_norm": 0.930351972579956, + "learning_rate": 8.946432115810795e-06, + "loss": 0.6639, + "step": 3696 + }, + { + "epoch": 0.23, + "grad_norm": 0.9379802346229553, + "learning_rate": 8.945802046757666e-06, + "loss": 0.6714, + "step": 3697 + }, + { + "epoch": 0.23, + "grad_norm": 0.903059184551239, + "learning_rate": 8.945171811560535e-06, + "loss": 0.6284, + "step": 3698 + }, + { + "epoch": 0.23, + "grad_norm": 0.9489940404891968, + "learning_rate": 8.944541410245947e-06, + "loss": 0.5875, + "step": 3699 + }, + { + "epoch": 0.23, + "grad_norm": 0.9724168181419373, + "learning_rate": 8.943910842840439e-06, + "loss": 0.6435, + "step": 3700 + }, + { + "epoch": 0.23, + "grad_norm": 0.9340975284576416, + "learning_rate": 8.943280109370568e-06, + "loss": 0.6209, + "step": 3701 + }, + { + "epoch": 0.23, + "grad_norm": 0.8607521653175354, + "learning_rate": 8.942649209862888e-06, + "loss": 0.5788, + "step": 3702 + }, + { + "epoch": 0.23, + "grad_norm": 0.8896112442016602, + "learning_rate": 8.942018144343965e-06, + "loss": 0.6177, + "step": 3703 + }, + { + "epoch": 0.23, + "grad_norm": 0.9297407865524292, + "learning_rate": 8.941386912840372e-06, + "loss": 0.6398, + "step": 3704 + }, + { + "epoch": 0.23, + "grad_norm": 0.8317979574203491, + "learning_rate": 8.940755515378687e-06, + "loss": 0.6036, + "step": 3705 + }, + { + "epoch": 0.23, + "grad_norm": 0.9319295287132263, + "learning_rate": 8.940123951985495e-06, + "loss": 0.608, + "step": 3706 + }, + { + "epoch": 0.23, + "grad_norm": 0.9755576252937317, + "learning_rate": 8.939492222687392e-06, + "loss": 0.6238, + "step": 3707 + }, + { + "epoch": 0.23, + "grad_norm": 0.8916385173797607, + "learning_rate": 8.938860327510975e-06, + "loss": 0.614, + "step": 3708 + }, + { + "epoch": 0.23, + "grad_norm": 0.8873549699783325, + "learning_rate": 8.938228266482852e-06, + "loss": 0.6389, + "step": 3709 + }, + { + "epoch": 0.24, + "grad_norm": 0.8616818785667419, + "learning_rate": 8.937596039629637e-06, + "loss": 0.6028, + "step": 3710 + }, + { + "epoch": 0.24, + "grad_norm": 0.8916230797767639, + "learning_rate": 8.93696364697795e-06, + "loss": 0.6395, + "step": 3711 + }, + { + "epoch": 0.24, + "grad_norm": 0.8822511434555054, + "learning_rate": 8.936331088554419e-06, + "loss": 0.5956, + "step": 3712 + }, + { + "epoch": 0.24, + "grad_norm": 0.8785961270332336, + "learning_rate": 8.93569836438568e-06, + "loss": 0.5859, + "step": 3713 + }, + { + "epoch": 0.24, + "grad_norm": 0.9163837432861328, + "learning_rate": 8.935065474498375e-06, + "loss": 0.6075, + "step": 3714 + }, + { + "epoch": 0.24, + "grad_norm": 0.8735101819038391, + "learning_rate": 8.934432418919153e-06, + "loss": 0.6399, + "step": 3715 + }, + { + "epoch": 0.24, + "grad_norm": 0.877932608127594, + "learning_rate": 8.933799197674667e-06, + "loss": 0.6058, + "step": 3716 + }, + { + "epoch": 0.24, + "grad_norm": 0.9489808082580566, + "learning_rate": 8.933165810791579e-06, + "loss": 0.6173, + "step": 3717 + }, + { + "epoch": 0.24, + "grad_norm": 0.8636232018470764, + "learning_rate": 8.932532258296565e-06, + "loss": 0.6418, + "step": 3718 + }, + { + "epoch": 0.24, + "grad_norm": 0.9418687224388123, + "learning_rate": 8.931898540216297e-06, + "loss": 0.6438, + "step": 3719 + }, + { + "epoch": 0.24, + "grad_norm": 0.9097021222114563, + "learning_rate": 8.931264656577459e-06, + "loss": 0.6215, + "step": 3720 + }, + { + "epoch": 0.24, + "grad_norm": 0.8493873476982117, + "learning_rate": 8.930630607406743e-06, + "loss": 0.6228, + "step": 3721 + }, + { + "epoch": 0.24, + "grad_norm": 0.9140156507492065, + "learning_rate": 8.929996392730844e-06, + "loss": 0.6362, + "step": 3722 + }, + { + "epoch": 0.24, + "grad_norm": 0.8999550938606262, + "learning_rate": 8.92936201257647e-06, + "loss": 0.6398, + "step": 3723 + }, + { + "epoch": 0.24, + "grad_norm": 0.8380311727523804, + "learning_rate": 8.928727466970331e-06, + "loss": 0.588, + "step": 3724 + }, + { + "epoch": 0.24, + "grad_norm": 0.9263492822647095, + "learning_rate": 8.928092755939145e-06, + "loss": 0.6247, + "step": 3725 + }, + { + "epoch": 0.24, + "grad_norm": 0.9678030014038086, + "learning_rate": 8.927457879509638e-06, + "loss": 0.624, + "step": 3726 + }, + { + "epoch": 0.24, + "grad_norm": 0.8912070989608765, + "learning_rate": 8.926822837708542e-06, + "loss": 0.6393, + "step": 3727 + }, + { + "epoch": 0.24, + "grad_norm": 0.844551682472229, + "learning_rate": 8.926187630562597e-06, + "loss": 0.6139, + "step": 3728 + }, + { + "epoch": 0.24, + "grad_norm": 0.9056801199913025, + "learning_rate": 8.925552258098549e-06, + "loss": 0.5725, + "step": 3729 + }, + { + "epoch": 0.24, + "grad_norm": 0.8850533962249756, + "learning_rate": 8.924916720343151e-06, + "loss": 0.6235, + "step": 3730 + }, + { + "epoch": 0.24, + "grad_norm": 0.858784556388855, + "learning_rate": 8.924281017323164e-06, + "loss": 0.59, + "step": 3731 + }, + { + "epoch": 0.24, + "grad_norm": 0.8923681974411011, + "learning_rate": 8.923645149065354e-06, + "loss": 0.5841, + "step": 3732 + }, + { + "epoch": 0.24, + "grad_norm": 0.9098735451698303, + "learning_rate": 8.923009115596498e-06, + "loss": 0.5895, + "step": 3733 + }, + { + "epoch": 0.24, + "grad_norm": 0.8857651352882385, + "learning_rate": 8.922372916943374e-06, + "loss": 0.6612, + "step": 3734 + }, + { + "epoch": 0.24, + "grad_norm": 0.9229490756988525, + "learning_rate": 8.921736553132772e-06, + "loss": 0.6304, + "step": 3735 + }, + { + "epoch": 0.24, + "grad_norm": 0.8978235125541687, + "learning_rate": 8.921100024191486e-06, + "loss": 0.5965, + "step": 3736 + }, + { + "epoch": 0.24, + "grad_norm": 0.8856748938560486, + "learning_rate": 8.920463330146318e-06, + "loss": 0.6114, + "step": 3737 + }, + { + "epoch": 0.24, + "grad_norm": 0.9307460784912109, + "learning_rate": 8.919826471024078e-06, + "loss": 0.6278, + "step": 3738 + }, + { + "epoch": 0.24, + "grad_norm": 0.9287357926368713, + "learning_rate": 8.919189446851583e-06, + "loss": 0.5925, + "step": 3739 + }, + { + "epoch": 0.24, + "grad_norm": 0.9358810782432556, + "learning_rate": 8.918552257655652e-06, + "loss": 0.608, + "step": 3740 + }, + { + "epoch": 0.24, + "grad_norm": 0.9406039714813232, + "learning_rate": 8.917914903463119e-06, + "loss": 0.6813, + "step": 3741 + }, + { + "epoch": 0.24, + "grad_norm": 0.9355833530426025, + "learning_rate": 8.917277384300817e-06, + "loss": 0.6438, + "step": 3742 + }, + { + "epoch": 0.24, + "grad_norm": 0.8952451944351196, + "learning_rate": 8.916639700195593e-06, + "loss": 0.5932, + "step": 3743 + }, + { + "epoch": 0.24, + "grad_norm": 0.8967479467391968, + "learning_rate": 8.916001851174296e-06, + "loss": 0.6132, + "step": 3744 + }, + { + "epoch": 0.24, + "grad_norm": 0.9279077053070068, + "learning_rate": 8.915363837263782e-06, + "loss": 0.6351, + "step": 3745 + }, + { + "epoch": 0.24, + "grad_norm": 0.8428364396095276, + "learning_rate": 8.91472565849092e-06, + "loss": 0.5932, + "step": 3746 + }, + { + "epoch": 0.24, + "grad_norm": 0.892693817615509, + "learning_rate": 8.914087314882578e-06, + "loss": 0.585, + "step": 3747 + }, + { + "epoch": 0.24, + "grad_norm": 0.8900630474090576, + "learning_rate": 8.913448806465634e-06, + "loss": 0.6317, + "step": 3748 + }, + { + "epoch": 0.24, + "grad_norm": 0.8545112013816833, + "learning_rate": 8.912810133266976e-06, + "loss": 0.5925, + "step": 3749 + }, + { + "epoch": 0.24, + "grad_norm": 0.9142085313796997, + "learning_rate": 8.912171295313493e-06, + "loss": 0.5952, + "step": 3750 + }, + { + "epoch": 0.24, + "grad_norm": 0.8664583563804626, + "learning_rate": 8.911532292632089e-06, + "loss": 0.6369, + "step": 3751 + }, + { + "epoch": 0.24, + "grad_norm": 0.8957768678665161, + "learning_rate": 8.910893125249666e-06, + "loss": 0.6155, + "step": 3752 + }, + { + "epoch": 0.24, + "grad_norm": 0.9018309116363525, + "learning_rate": 8.91025379319314e-06, + "loss": 0.6574, + "step": 3753 + }, + { + "epoch": 0.24, + "grad_norm": 0.8896942734718323, + "learning_rate": 8.909614296489428e-06, + "loss": 0.5785, + "step": 3754 + }, + { + "epoch": 0.24, + "grad_norm": 0.9446683526039124, + "learning_rate": 8.908974635165458e-06, + "loss": 0.638, + "step": 3755 + }, + { + "epoch": 0.24, + "grad_norm": 0.8614102602005005, + "learning_rate": 8.908334809248165e-06, + "loss": 0.5967, + "step": 3756 + }, + { + "epoch": 0.24, + "grad_norm": 0.9426348805427551, + "learning_rate": 8.90769481876449e-06, + "loss": 0.6738, + "step": 3757 + }, + { + "epoch": 0.24, + "grad_norm": 0.9114719033241272, + "learning_rate": 8.90705466374138e-06, + "loss": 0.6332, + "step": 3758 + }, + { + "epoch": 0.24, + "grad_norm": 0.8968010544776917, + "learning_rate": 8.906414344205789e-06, + "loss": 0.6338, + "step": 3759 + }, + { + "epoch": 0.24, + "grad_norm": 0.8845388293266296, + "learning_rate": 8.905773860184679e-06, + "loss": 0.6355, + "step": 3760 + }, + { + "epoch": 0.24, + "grad_norm": 0.8717195391654968, + "learning_rate": 8.905133211705019e-06, + "loss": 0.593, + "step": 3761 + }, + { + "epoch": 0.24, + "grad_norm": 0.8622083067893982, + "learning_rate": 8.904492398793785e-06, + "loss": 0.632, + "step": 3762 + }, + { + "epoch": 0.24, + "grad_norm": 0.9208856225013733, + "learning_rate": 8.903851421477959e-06, + "loss": 0.6135, + "step": 3763 + }, + { + "epoch": 0.24, + "grad_norm": 0.8842298984527588, + "learning_rate": 8.90321027978453e-06, + "loss": 0.6295, + "step": 3764 + }, + { + "epoch": 0.24, + "grad_norm": 0.9459641575813293, + "learning_rate": 8.902568973740495e-06, + "loss": 0.5951, + "step": 3765 + }, + { + "epoch": 0.24, + "grad_norm": 0.9696717858314514, + "learning_rate": 8.901927503372855e-06, + "loss": 0.5996, + "step": 3766 + }, + { + "epoch": 0.24, + "grad_norm": 0.8983449935913086, + "learning_rate": 8.901285868708622e-06, + "loss": 0.6206, + "step": 3767 + }, + { + "epoch": 0.24, + "grad_norm": 0.8596554398536682, + "learning_rate": 8.900644069774815e-06, + "loss": 0.5802, + "step": 3768 + }, + { + "epoch": 0.24, + "grad_norm": 0.912438690662384, + "learning_rate": 8.900002106598453e-06, + "loss": 0.6058, + "step": 3769 + }, + { + "epoch": 0.24, + "grad_norm": 0.931678056716919, + "learning_rate": 8.89935997920657e-06, + "loss": 0.6331, + "step": 3770 + }, + { + "epoch": 0.24, + "grad_norm": 1.06976318359375, + "learning_rate": 8.898717687626203e-06, + "loss": 0.6401, + "step": 3771 + }, + { + "epoch": 0.24, + "grad_norm": 0.9052533507347107, + "learning_rate": 8.898075231884397e-06, + "loss": 0.6218, + "step": 3772 + }, + { + "epoch": 0.24, + "grad_norm": 0.8735697269439697, + "learning_rate": 8.897432612008206e-06, + "loss": 0.633, + "step": 3773 + }, + { + "epoch": 0.24, + "grad_norm": 0.8962618112564087, + "learning_rate": 8.896789828024682e-06, + "loss": 0.6216, + "step": 3774 + }, + { + "epoch": 0.24, + "grad_norm": 0.8915939927101135, + "learning_rate": 8.896146879960896e-06, + "loss": 0.6651, + "step": 3775 + }, + { + "epoch": 0.24, + "grad_norm": 0.877487063407898, + "learning_rate": 8.895503767843918e-06, + "loss": 0.6433, + "step": 3776 + }, + { + "epoch": 0.24, + "grad_norm": 0.9209939241409302, + "learning_rate": 8.89486049170083e-06, + "loss": 0.6573, + "step": 3777 + }, + { + "epoch": 0.24, + "grad_norm": 0.8641723990440369, + "learning_rate": 8.894217051558713e-06, + "loss": 0.596, + "step": 3778 + }, + { + "epoch": 0.24, + "grad_norm": 0.8952119946479797, + "learning_rate": 8.893573447444663e-06, + "loss": 0.6429, + "step": 3779 + }, + { + "epoch": 0.24, + "grad_norm": 0.8626795411109924, + "learning_rate": 8.892929679385783e-06, + "loss": 0.5635, + "step": 3780 + }, + { + "epoch": 0.24, + "grad_norm": 0.9164071679115295, + "learning_rate": 8.892285747409172e-06, + "loss": 0.5775, + "step": 3781 + }, + { + "epoch": 0.24, + "grad_norm": 0.8823123574256897, + "learning_rate": 8.891641651541953e-06, + "loss": 0.5881, + "step": 3782 + }, + { + "epoch": 0.24, + "grad_norm": 0.9153462648391724, + "learning_rate": 8.89099739181124e-06, + "loss": 0.5915, + "step": 3783 + }, + { + "epoch": 0.24, + "grad_norm": 0.9311332106590271, + "learning_rate": 8.890352968244162e-06, + "loss": 0.601, + "step": 3784 + }, + { + "epoch": 0.24, + "grad_norm": 0.9409120678901672, + "learning_rate": 8.889708380867856e-06, + "loss": 0.6608, + "step": 3785 + }, + { + "epoch": 0.24, + "grad_norm": 0.8671489357948303, + "learning_rate": 8.88906362970946e-06, + "loss": 0.5921, + "step": 3786 + }, + { + "epoch": 0.24, + "grad_norm": 0.8596804141998291, + "learning_rate": 8.888418714796124e-06, + "loss": 0.6256, + "step": 3787 + }, + { + "epoch": 0.24, + "grad_norm": 0.8811514377593994, + "learning_rate": 8.887773636155002e-06, + "loss": 0.5861, + "step": 3788 + }, + { + "epoch": 0.24, + "grad_norm": 0.900944709777832, + "learning_rate": 8.887128393813257e-06, + "loss": 0.641, + "step": 3789 + }, + { + "epoch": 0.24, + "grad_norm": 0.915507435798645, + "learning_rate": 8.886482987798059e-06, + "loss": 0.6348, + "step": 3790 + }, + { + "epoch": 0.24, + "grad_norm": 1.0151876211166382, + "learning_rate": 8.885837418136581e-06, + "loss": 0.5955, + "step": 3791 + }, + { + "epoch": 0.24, + "grad_norm": 0.8506528735160828, + "learning_rate": 8.885191684856007e-06, + "loss": 0.5497, + "step": 3792 + }, + { + "epoch": 0.24, + "grad_norm": 0.8645548224449158, + "learning_rate": 8.884545787983528e-06, + "loss": 0.6036, + "step": 3793 + }, + { + "epoch": 0.24, + "grad_norm": 0.8505982160568237, + "learning_rate": 8.88389972754634e-06, + "loss": 0.6379, + "step": 3794 + }, + { + "epoch": 0.24, + "grad_norm": 0.9695981740951538, + "learning_rate": 8.883253503571643e-06, + "loss": 0.632, + "step": 3795 + }, + { + "epoch": 0.24, + "grad_norm": 0.9560012817382812, + "learning_rate": 8.882607116086651e-06, + "loss": 0.6135, + "step": 3796 + }, + { + "epoch": 0.24, + "grad_norm": 0.8482503890991211, + "learning_rate": 8.881960565118581e-06, + "loss": 0.538, + "step": 3797 + }, + { + "epoch": 0.24, + "grad_norm": 0.9212302565574646, + "learning_rate": 8.881313850694653e-06, + "loss": 0.6599, + "step": 3798 + }, + { + "epoch": 0.24, + "grad_norm": 0.9493160843849182, + "learning_rate": 8.880666972842105e-06, + "loss": 0.6263, + "step": 3799 + }, + { + "epoch": 0.24, + "grad_norm": 0.8596429824829102, + "learning_rate": 8.880019931588167e-06, + "loss": 0.6504, + "step": 3800 + }, + { + "epoch": 0.24, + "grad_norm": 0.929779589176178, + "learning_rate": 8.87937272696009e-06, + "loss": 0.6391, + "step": 3801 + }, + { + "epoch": 0.24, + "grad_norm": 0.8671481013298035, + "learning_rate": 8.878725358985121e-06, + "loss": 0.5667, + "step": 3802 + }, + { + "epoch": 0.24, + "grad_norm": 0.9427719116210938, + "learning_rate": 8.87807782769052e-06, + "loss": 0.6795, + "step": 3803 + }, + { + "epoch": 0.24, + "grad_norm": 0.9037208557128906, + "learning_rate": 8.877430133103555e-06, + "loss": 0.6183, + "step": 3804 + }, + { + "epoch": 0.24, + "grad_norm": 0.8492844104766846, + "learning_rate": 8.876782275251491e-06, + "loss": 0.6273, + "step": 3805 + }, + { + "epoch": 0.24, + "grad_norm": 0.8471344113349915, + "learning_rate": 8.876134254161617e-06, + "loss": 0.6053, + "step": 3806 + }, + { + "epoch": 0.24, + "grad_norm": 0.8713465332984924, + "learning_rate": 8.87548606986121e-06, + "loss": 0.6451, + "step": 3807 + }, + { + "epoch": 0.24, + "grad_norm": 0.9716042280197144, + "learning_rate": 8.874837722377568e-06, + "loss": 0.5792, + "step": 3808 + }, + { + "epoch": 0.24, + "grad_norm": 0.8822860717773438, + "learning_rate": 8.87418921173799e-06, + "loss": 0.6024, + "step": 3809 + }, + { + "epoch": 0.24, + "grad_norm": 0.8905455470085144, + "learning_rate": 8.87354053796978e-06, + "loss": 0.5976, + "step": 3810 + }, + { + "epoch": 0.24, + "grad_norm": 0.807611346244812, + "learning_rate": 8.872891701100253e-06, + "loss": 0.6114, + "step": 3811 + }, + { + "epoch": 0.24, + "grad_norm": 0.9287991523742676, + "learning_rate": 8.872242701156731e-06, + "loss": 0.6195, + "step": 3812 + }, + { + "epoch": 0.24, + "grad_norm": 0.8870870471000671, + "learning_rate": 8.871593538166538e-06, + "loss": 0.6173, + "step": 3813 + }, + { + "epoch": 0.24, + "grad_norm": 1.0158964395523071, + "learning_rate": 8.870944212157008e-06, + "loss": 0.6278, + "step": 3814 + }, + { + "epoch": 0.24, + "grad_norm": 0.8998157382011414, + "learning_rate": 8.870294723155486e-06, + "loss": 0.6385, + "step": 3815 + }, + { + "epoch": 0.24, + "grad_norm": 0.9535521268844604, + "learning_rate": 8.869645071189316e-06, + "loss": 0.6515, + "step": 3816 + }, + { + "epoch": 0.24, + "grad_norm": 0.9406755566596985, + "learning_rate": 8.868995256285853e-06, + "loss": 0.6271, + "step": 3817 + }, + { + "epoch": 0.24, + "grad_norm": 0.920963704586029, + "learning_rate": 8.868345278472458e-06, + "loss": 0.6204, + "step": 3818 + }, + { + "epoch": 0.24, + "grad_norm": 0.9438266754150391, + "learning_rate": 8.867695137776503e-06, + "loss": 0.6417, + "step": 3819 + }, + { + "epoch": 0.24, + "grad_norm": 0.9606151580810547, + "learning_rate": 8.86704483422536e-06, + "loss": 0.6408, + "step": 3820 + }, + { + "epoch": 0.24, + "grad_norm": 0.8948151469230652, + "learning_rate": 8.86639436784641e-06, + "loss": 0.6276, + "step": 3821 + }, + { + "epoch": 0.24, + "grad_norm": 0.9214081168174744, + "learning_rate": 8.865743738667045e-06, + "loss": 0.5498, + "step": 3822 + }, + { + "epoch": 0.24, + "grad_norm": 0.8617424964904785, + "learning_rate": 8.865092946714657e-06, + "loss": 0.5677, + "step": 3823 + }, + { + "epoch": 0.24, + "grad_norm": 0.9291020035743713, + "learning_rate": 8.864441992016653e-06, + "loss": 0.5911, + "step": 3824 + }, + { + "epoch": 0.24, + "grad_norm": 0.9329352378845215, + "learning_rate": 8.863790874600438e-06, + "loss": 0.6073, + "step": 3825 + }, + { + "epoch": 0.24, + "grad_norm": 0.8821927309036255, + "learning_rate": 8.863139594493432e-06, + "loss": 0.5866, + "step": 3826 + }, + { + "epoch": 0.24, + "grad_norm": 0.8997513055801392, + "learning_rate": 8.862488151723055e-06, + "loss": 0.6081, + "step": 3827 + }, + { + "epoch": 0.24, + "grad_norm": 0.9646042585372925, + "learning_rate": 8.86183654631674e-06, + "loss": 0.6295, + "step": 3828 + }, + { + "epoch": 0.24, + "grad_norm": 0.9080867171287537, + "learning_rate": 8.861184778301921e-06, + "loss": 0.6282, + "step": 3829 + }, + { + "epoch": 0.24, + "grad_norm": 0.8966723084449768, + "learning_rate": 8.860532847706046e-06, + "loss": 0.637, + "step": 3830 + }, + { + "epoch": 0.24, + "grad_norm": 0.9197657704353333, + "learning_rate": 8.85988075455656e-06, + "loss": 0.5963, + "step": 3831 + }, + { + "epoch": 0.24, + "grad_norm": 0.9177777767181396, + "learning_rate": 8.859228498880923e-06, + "loss": 0.6453, + "step": 3832 + }, + { + "epoch": 0.24, + "grad_norm": 0.9342770576477051, + "learning_rate": 8.8585760807066e-06, + "loss": 0.6383, + "step": 3833 + }, + { + "epoch": 0.24, + "grad_norm": 0.9254716038703918, + "learning_rate": 8.85792350006106e-06, + "loss": 0.6608, + "step": 3834 + }, + { + "epoch": 0.24, + "grad_norm": 0.878808319568634, + "learning_rate": 8.857270756971785e-06, + "loss": 0.6036, + "step": 3835 + }, + { + "epoch": 0.24, + "grad_norm": 0.9698695540428162, + "learning_rate": 8.856617851466254e-06, + "loss": 0.6553, + "step": 3836 + }, + { + "epoch": 0.24, + "grad_norm": 0.8826630115509033, + "learning_rate": 8.855964783571963e-06, + "loss": 0.5691, + "step": 3837 + }, + { + "epoch": 0.24, + "grad_norm": 0.9340159296989441, + "learning_rate": 8.855311553316409e-06, + "loss": 0.5863, + "step": 3838 + }, + { + "epoch": 0.24, + "grad_norm": 0.8885470628738403, + "learning_rate": 8.854658160727096e-06, + "loss": 0.6368, + "step": 3839 + }, + { + "epoch": 0.24, + "grad_norm": 0.846393346786499, + "learning_rate": 8.854004605831536e-06, + "loss": 0.5378, + "step": 3840 + }, + { + "epoch": 0.24, + "grad_norm": 0.8811196684837341, + "learning_rate": 8.853350888657251e-06, + "loss": 0.6132, + "step": 3841 + }, + { + "epoch": 0.24, + "grad_norm": 0.9290794134140015, + "learning_rate": 8.852697009231766e-06, + "loss": 0.5925, + "step": 3842 + }, + { + "epoch": 0.24, + "grad_norm": 0.8985415697097778, + "learning_rate": 8.852042967582611e-06, + "loss": 0.6533, + "step": 3843 + }, + { + "epoch": 0.24, + "grad_norm": 0.8721175789833069, + "learning_rate": 8.851388763737328e-06, + "loss": 0.5439, + "step": 3844 + }, + { + "epoch": 0.24, + "grad_norm": 0.898200511932373, + "learning_rate": 8.850734397723461e-06, + "loss": 0.5901, + "step": 3845 + }, + { + "epoch": 0.24, + "grad_norm": 1.0981974601745605, + "learning_rate": 8.850079869568565e-06, + "loss": 0.6579, + "step": 3846 + }, + { + "epoch": 0.24, + "grad_norm": 0.8868777751922607, + "learning_rate": 8.849425179300197e-06, + "loss": 0.6113, + "step": 3847 + }, + { + "epoch": 0.24, + "grad_norm": 0.8843356370925903, + "learning_rate": 8.848770326945927e-06, + "loss": 0.5933, + "step": 3848 + }, + { + "epoch": 0.24, + "grad_norm": 0.9298630356788635, + "learning_rate": 8.84811531253333e-06, + "loss": 0.5776, + "step": 3849 + }, + { + "epoch": 0.24, + "grad_norm": 1.0039656162261963, + "learning_rate": 8.847460136089982e-06, + "loss": 0.6304, + "step": 3850 + }, + { + "epoch": 0.24, + "grad_norm": 0.8467380404472351, + "learning_rate": 8.846804797643472e-06, + "loss": 0.6048, + "step": 3851 + }, + { + "epoch": 0.24, + "grad_norm": 0.8640190958976746, + "learning_rate": 8.846149297221394e-06, + "loss": 0.5943, + "step": 3852 + }, + { + "epoch": 0.24, + "grad_norm": 0.9181884527206421, + "learning_rate": 8.845493634851348e-06, + "loss": 0.6935, + "step": 3853 + }, + { + "epoch": 0.24, + "grad_norm": 0.8371793627738953, + "learning_rate": 8.844837810560943e-06, + "loss": 0.5877, + "step": 3854 + }, + { + "epoch": 0.24, + "grad_norm": 0.9249871969223022, + "learning_rate": 8.844181824377793e-06, + "loss": 0.5707, + "step": 3855 + }, + { + "epoch": 0.24, + "grad_norm": 0.8807600140571594, + "learning_rate": 8.843525676329521e-06, + "loss": 0.6436, + "step": 3856 + }, + { + "epoch": 0.24, + "grad_norm": 0.8587551116943359, + "learning_rate": 8.842869366443751e-06, + "loss": 0.5952, + "step": 3857 + }, + { + "epoch": 0.24, + "grad_norm": 0.8563278317451477, + "learning_rate": 8.842212894748122e-06, + "loss": 0.5835, + "step": 3858 + }, + { + "epoch": 0.24, + "grad_norm": 0.8369274735450745, + "learning_rate": 8.841556261270272e-06, + "loss": 0.5843, + "step": 3859 + }, + { + "epoch": 0.24, + "grad_norm": 0.9252521395683289, + "learning_rate": 8.840899466037854e-06, + "loss": 0.6468, + "step": 3860 + }, + { + "epoch": 0.24, + "grad_norm": 0.874243438243866, + "learning_rate": 8.840242509078521e-06, + "loss": 0.5989, + "step": 3861 + }, + { + "epoch": 0.24, + "grad_norm": 0.8621048331260681, + "learning_rate": 8.839585390419933e-06, + "loss": 0.5763, + "step": 3862 + }, + { + "epoch": 0.24, + "grad_norm": 0.8070306777954102, + "learning_rate": 8.838928110089763e-06, + "loss": 0.6054, + "step": 3863 + }, + { + "epoch": 0.24, + "grad_norm": 0.9515740275382996, + "learning_rate": 8.838270668115685e-06, + "loss": 0.6457, + "step": 3864 + }, + { + "epoch": 0.24, + "grad_norm": 0.8527739644050598, + "learning_rate": 8.837613064525381e-06, + "loss": 0.5998, + "step": 3865 + }, + { + "epoch": 0.24, + "grad_norm": 0.9535593390464783, + "learning_rate": 8.83695529934654e-06, + "loss": 0.6252, + "step": 3866 + }, + { + "epoch": 0.24, + "grad_norm": 0.8122836351394653, + "learning_rate": 8.83629737260686e-06, + "loss": 0.5928, + "step": 3867 + }, + { + "epoch": 0.25, + "grad_norm": 0.9340097904205322, + "learning_rate": 8.835639284334043e-06, + "loss": 0.5719, + "step": 3868 + }, + { + "epoch": 0.25, + "grad_norm": 0.9119397401809692, + "learning_rate": 8.834981034555799e-06, + "loss": 0.6028, + "step": 3869 + }, + { + "epoch": 0.25, + "grad_norm": 0.8478021025657654, + "learning_rate": 8.834322623299844e-06, + "loss": 0.5882, + "step": 3870 + }, + { + "epoch": 0.25, + "grad_norm": 0.9423801898956299, + "learning_rate": 8.833664050593904e-06, + "loss": 0.5901, + "step": 3871 + }, + { + "epoch": 0.25, + "grad_norm": 0.973012387752533, + "learning_rate": 8.833005316465706e-06, + "loss": 0.5702, + "step": 3872 + }, + { + "epoch": 0.25, + "grad_norm": 0.870364785194397, + "learning_rate": 8.832346420942987e-06, + "loss": 0.5943, + "step": 3873 + }, + { + "epoch": 0.25, + "grad_norm": 0.8896936774253845, + "learning_rate": 8.831687364053493e-06, + "loss": 0.6135, + "step": 3874 + }, + { + "epoch": 0.25, + "grad_norm": 0.9121167063713074, + "learning_rate": 8.831028145824974e-06, + "loss": 0.639, + "step": 3875 + }, + { + "epoch": 0.25, + "grad_norm": 0.9295619130134583, + "learning_rate": 8.830368766285186e-06, + "loss": 0.6404, + "step": 3876 + }, + { + "epoch": 0.25, + "grad_norm": 0.9236605763435364, + "learning_rate": 8.829709225461894e-06, + "loss": 0.596, + "step": 3877 + }, + { + "epoch": 0.25, + "grad_norm": 1.0370179414749146, + "learning_rate": 8.829049523382871e-06, + "loss": 0.6572, + "step": 3878 + }, + { + "epoch": 0.25, + "grad_norm": 0.8750087022781372, + "learning_rate": 8.828389660075891e-06, + "loss": 0.6232, + "step": 3879 + }, + { + "epoch": 0.25, + "grad_norm": 0.8742169141769409, + "learning_rate": 8.82772963556874e-06, + "loss": 0.6312, + "step": 3880 + }, + { + "epoch": 0.25, + "grad_norm": 0.8765554428100586, + "learning_rate": 8.827069449889211e-06, + "loss": 0.58, + "step": 3881 + }, + { + "epoch": 0.25, + "grad_norm": 0.9164361357688904, + "learning_rate": 8.8264091030651e-06, + "loss": 0.6194, + "step": 3882 + }, + { + "epoch": 0.25, + "grad_norm": 0.914909839630127, + "learning_rate": 8.825748595124214e-06, + "loss": 0.6188, + "step": 3883 + }, + { + "epoch": 0.25, + "grad_norm": 0.88898104429245, + "learning_rate": 8.825087926094363e-06, + "loss": 0.5854, + "step": 3884 + }, + { + "epoch": 0.25, + "grad_norm": 0.8506219387054443, + "learning_rate": 8.824427096003367e-06, + "loss": 0.5805, + "step": 3885 + }, + { + "epoch": 0.25, + "grad_norm": 0.9433155655860901, + "learning_rate": 8.823766104879047e-06, + "loss": 0.5827, + "step": 3886 + }, + { + "epoch": 0.25, + "grad_norm": 0.8702185153961182, + "learning_rate": 8.823104952749242e-06, + "loss": 0.5661, + "step": 3887 + }, + { + "epoch": 0.25, + "grad_norm": 0.8791462779045105, + "learning_rate": 8.822443639641785e-06, + "loss": 0.5424, + "step": 3888 + }, + { + "epoch": 0.25, + "grad_norm": 0.8864879012107849, + "learning_rate": 8.821782165584524e-06, + "loss": 0.6041, + "step": 3889 + }, + { + "epoch": 0.25, + "grad_norm": 0.9141310453414917, + "learning_rate": 8.82112053060531e-06, + "loss": 0.6335, + "step": 3890 + }, + { + "epoch": 0.25, + "grad_norm": 0.9409934878349304, + "learning_rate": 8.820458734732004e-06, + "loss": 0.6872, + "step": 3891 + }, + { + "epoch": 0.25, + "grad_norm": 0.9157419204711914, + "learning_rate": 8.819796777992471e-06, + "loss": 0.5836, + "step": 3892 + }, + { + "epoch": 0.25, + "grad_norm": 0.957832396030426, + "learning_rate": 8.819134660414585e-06, + "loss": 0.6145, + "step": 3893 + }, + { + "epoch": 0.25, + "grad_norm": 0.9433353543281555, + "learning_rate": 8.818472382026222e-06, + "loss": 0.573, + "step": 3894 + }, + { + "epoch": 0.25, + "grad_norm": 0.919173002243042, + "learning_rate": 8.817809942855272e-06, + "loss": 0.5815, + "step": 3895 + }, + { + "epoch": 0.25, + "grad_norm": 0.8651015758514404, + "learning_rate": 8.817147342929626e-06, + "loss": 0.5762, + "step": 3896 + }, + { + "epoch": 0.25, + "grad_norm": 0.9625697135925293, + "learning_rate": 8.816484582277184e-06, + "loss": 0.6389, + "step": 3897 + }, + { + "epoch": 0.25, + "grad_norm": 0.8946129083633423, + "learning_rate": 8.815821660925853e-06, + "loss": 0.6084, + "step": 3898 + }, + { + "epoch": 0.25, + "grad_norm": 0.9177218079566956, + "learning_rate": 8.815158578903548e-06, + "loss": 0.6022, + "step": 3899 + }, + { + "epoch": 0.25, + "grad_norm": 0.8781201243400574, + "learning_rate": 8.814495336238185e-06, + "loss": 0.6393, + "step": 3900 + }, + { + "epoch": 0.25, + "grad_norm": 0.9286174774169922, + "learning_rate": 8.813831932957696e-06, + "loss": 0.6149, + "step": 3901 + }, + { + "epoch": 0.25, + "grad_norm": 0.882340669631958, + "learning_rate": 8.813168369090007e-06, + "loss": 0.5349, + "step": 3902 + }, + { + "epoch": 0.25, + "grad_norm": 0.8473665118217468, + "learning_rate": 8.812504644663066e-06, + "loss": 0.5991, + "step": 3903 + }, + { + "epoch": 0.25, + "grad_norm": 1.013710618019104, + "learning_rate": 8.811840759704816e-06, + "loss": 0.6184, + "step": 3904 + }, + { + "epoch": 0.25, + "grad_norm": 0.8682031631469727, + "learning_rate": 8.811176714243213e-06, + "loss": 0.6179, + "step": 3905 + }, + { + "epoch": 0.25, + "grad_norm": 0.9201847314834595, + "learning_rate": 8.810512508306216e-06, + "loss": 0.5807, + "step": 3906 + }, + { + "epoch": 0.25, + "grad_norm": 0.8606781959533691, + "learning_rate": 8.809848141921793e-06, + "loss": 0.5846, + "step": 3907 + }, + { + "epoch": 0.25, + "grad_norm": 0.9430428743362427, + "learning_rate": 8.809183615117919e-06, + "loss": 0.6372, + "step": 3908 + }, + { + "epoch": 0.25, + "grad_norm": 0.8924831748008728, + "learning_rate": 8.808518927922574e-06, + "loss": 0.6182, + "step": 3909 + }, + { + "epoch": 0.25, + "grad_norm": 0.9287380576133728, + "learning_rate": 8.807854080363745e-06, + "loss": 0.6251, + "step": 3910 + }, + { + "epoch": 0.25, + "grad_norm": 0.9271407723426819, + "learning_rate": 8.807189072469428e-06, + "loss": 0.6197, + "step": 3911 + }, + { + "epoch": 0.25, + "grad_norm": 0.8575233817100525, + "learning_rate": 8.806523904267623e-06, + "loss": 0.6011, + "step": 3912 + }, + { + "epoch": 0.25, + "grad_norm": 1.0265129804611206, + "learning_rate": 8.80585857578634e-06, + "loss": 0.6534, + "step": 3913 + }, + { + "epoch": 0.25, + "grad_norm": 0.8787725567817688, + "learning_rate": 8.80519308705359e-06, + "loss": 0.5598, + "step": 3914 + }, + { + "epoch": 0.25, + "grad_norm": 0.9931519031524658, + "learning_rate": 8.804527438097396e-06, + "loss": 0.624, + "step": 3915 + }, + { + "epoch": 0.25, + "grad_norm": 0.9081161022186279, + "learning_rate": 8.803861628945787e-06, + "loss": 0.5939, + "step": 3916 + }, + { + "epoch": 0.25, + "grad_norm": 0.9506007432937622, + "learning_rate": 8.803195659626798e-06, + "loss": 0.6593, + "step": 3917 + }, + { + "epoch": 0.25, + "grad_norm": 0.8824777603149414, + "learning_rate": 8.802529530168469e-06, + "loss": 0.6381, + "step": 3918 + }, + { + "epoch": 0.25, + "grad_norm": 0.8718113899230957, + "learning_rate": 8.801863240598851e-06, + "loss": 0.6002, + "step": 3919 + }, + { + "epoch": 0.25, + "grad_norm": 0.880943238735199, + "learning_rate": 8.801196790945999e-06, + "loss": 0.587, + "step": 3920 + }, + { + "epoch": 0.25, + "grad_norm": 0.9570931196212769, + "learning_rate": 8.800530181237971e-06, + "loss": 0.6615, + "step": 3921 + }, + { + "epoch": 0.25, + "grad_norm": 0.9796764254570007, + "learning_rate": 8.799863411502838e-06, + "loss": 0.5868, + "step": 3922 + }, + { + "epoch": 0.25, + "grad_norm": 0.8545233607292175, + "learning_rate": 8.799196481768677e-06, + "loss": 0.6008, + "step": 3923 + }, + { + "epoch": 0.25, + "grad_norm": 0.8299331068992615, + "learning_rate": 8.798529392063569e-06, + "loss": 0.5954, + "step": 3924 + }, + { + "epoch": 0.25, + "grad_norm": 0.8435283899307251, + "learning_rate": 8.7978621424156e-06, + "loss": 0.5738, + "step": 3925 + }, + { + "epoch": 0.25, + "grad_norm": 0.9209175109863281, + "learning_rate": 8.79719473285287e-06, + "loss": 0.64, + "step": 3926 + }, + { + "epoch": 0.25, + "grad_norm": 0.9299062490463257, + "learning_rate": 8.796527163403479e-06, + "loss": 0.6396, + "step": 3927 + }, + { + "epoch": 0.25, + "grad_norm": 0.9327616691589355, + "learning_rate": 8.795859434095535e-06, + "loss": 0.6079, + "step": 3928 + }, + { + "epoch": 0.25, + "grad_norm": 0.9269071221351624, + "learning_rate": 8.795191544957156e-06, + "loss": 0.6834, + "step": 3929 + }, + { + "epoch": 0.25, + "grad_norm": 0.9255284667015076, + "learning_rate": 8.794523496016465e-06, + "loss": 0.6848, + "step": 3930 + }, + { + "epoch": 0.25, + "grad_norm": 0.8709956407546997, + "learning_rate": 8.793855287301588e-06, + "loss": 0.6473, + "step": 3931 + }, + { + "epoch": 0.25, + "grad_norm": 0.840023934841156, + "learning_rate": 8.793186918840661e-06, + "loss": 0.5463, + "step": 3932 + }, + { + "epoch": 0.25, + "grad_norm": 0.9407967925071716, + "learning_rate": 8.792518390661831e-06, + "loss": 0.6065, + "step": 3933 + }, + { + "epoch": 0.25, + "grad_norm": 0.8985733985900879, + "learning_rate": 8.791849702793245e-06, + "loss": 0.6236, + "step": 3934 + }, + { + "epoch": 0.25, + "grad_norm": 0.8958050012588501, + "learning_rate": 8.791180855263057e-06, + "loss": 0.6322, + "step": 3935 + }, + { + "epoch": 0.25, + "grad_norm": 0.9496500492095947, + "learning_rate": 8.790511848099433e-06, + "loss": 0.6399, + "step": 3936 + }, + { + "epoch": 0.25, + "grad_norm": 0.9206477999687195, + "learning_rate": 8.789842681330543e-06, + "loss": 0.6232, + "step": 3937 + }, + { + "epoch": 0.25, + "grad_norm": 0.9060776829719543, + "learning_rate": 8.789173354984557e-06, + "loss": 0.5971, + "step": 3938 + }, + { + "epoch": 0.25, + "grad_norm": 0.9292250871658325, + "learning_rate": 8.788503869089667e-06, + "loss": 0.6796, + "step": 3939 + }, + { + "epoch": 0.25, + "grad_norm": 0.8954676985740662, + "learning_rate": 8.787834223674056e-06, + "loss": 0.6061, + "step": 3940 + }, + { + "epoch": 0.25, + "grad_norm": 0.8948878049850464, + "learning_rate": 8.787164418765923e-06, + "loss": 0.5726, + "step": 3941 + }, + { + "epoch": 0.25, + "grad_norm": 0.9730081558227539, + "learning_rate": 8.786494454393472e-06, + "loss": 0.6271, + "step": 3942 + }, + { + "epoch": 0.25, + "grad_norm": 0.9321277737617493, + "learning_rate": 8.785824330584912e-06, + "loss": 0.5956, + "step": 3943 + }, + { + "epoch": 0.25, + "grad_norm": 0.9635143280029297, + "learning_rate": 8.785154047368459e-06, + "loss": 0.6239, + "step": 3944 + }, + { + "epoch": 0.25, + "grad_norm": 0.9317022562026978, + "learning_rate": 8.784483604772336e-06, + "loss": 0.6193, + "step": 3945 + }, + { + "epoch": 0.25, + "grad_norm": 0.893115222454071, + "learning_rate": 8.783813002824773e-06, + "loss": 0.617, + "step": 3946 + }, + { + "epoch": 0.25, + "grad_norm": 0.899761974811554, + "learning_rate": 8.783142241554009e-06, + "loss": 0.5963, + "step": 3947 + }, + { + "epoch": 0.25, + "grad_norm": 0.8902785181999207, + "learning_rate": 8.782471320988284e-06, + "loss": 0.6318, + "step": 3948 + }, + { + "epoch": 0.25, + "grad_norm": 0.8925158381462097, + "learning_rate": 8.781800241155851e-06, + "loss": 0.5684, + "step": 3949 + }, + { + "epoch": 0.25, + "grad_norm": 0.9196040630340576, + "learning_rate": 8.781129002084965e-06, + "loss": 0.5899, + "step": 3950 + }, + { + "epoch": 0.25, + "grad_norm": 0.9138063192367554, + "learning_rate": 8.780457603803892e-06, + "loss": 0.6088, + "step": 3951 + }, + { + "epoch": 0.25, + "grad_norm": 0.8876779675483704, + "learning_rate": 8.779786046340898e-06, + "loss": 0.6453, + "step": 3952 + }, + { + "epoch": 0.25, + "grad_norm": 0.9258411526679993, + "learning_rate": 8.779114329724265e-06, + "loss": 0.6308, + "step": 3953 + }, + { + "epoch": 0.25, + "grad_norm": 0.8825391530990601, + "learning_rate": 8.778442453982272e-06, + "loss": 0.5773, + "step": 3954 + }, + { + "epoch": 0.25, + "grad_norm": 0.8886011242866516, + "learning_rate": 8.777770419143214e-06, + "loss": 0.646, + "step": 3955 + }, + { + "epoch": 0.25, + "grad_norm": 0.9516189694404602, + "learning_rate": 8.777098225235384e-06, + "loss": 0.6543, + "step": 3956 + }, + { + "epoch": 0.25, + "grad_norm": 0.9398981928825378, + "learning_rate": 8.776425872287087e-06, + "loss": 0.6067, + "step": 3957 + }, + { + "epoch": 0.25, + "grad_norm": 0.9179983139038086, + "learning_rate": 8.775753360326635e-06, + "loss": 0.6425, + "step": 3958 + }, + { + "epoch": 0.25, + "grad_norm": 0.8767003417015076, + "learning_rate": 8.775080689382342e-06, + "loss": 0.6333, + "step": 3959 + }, + { + "epoch": 0.25, + "grad_norm": 0.8714125752449036, + "learning_rate": 8.774407859482537e-06, + "loss": 0.6302, + "step": 3960 + }, + { + "epoch": 0.25, + "grad_norm": 0.8385068774223328, + "learning_rate": 8.773734870655544e-06, + "loss": 0.6087, + "step": 3961 + }, + { + "epoch": 0.25, + "grad_norm": 0.9136397242546082, + "learning_rate": 8.773061722929704e-06, + "loss": 0.6035, + "step": 3962 + }, + { + "epoch": 0.25, + "grad_norm": 0.805779755115509, + "learning_rate": 8.772388416333361e-06, + "loss": 0.5575, + "step": 3963 + }, + { + "epoch": 0.25, + "grad_norm": 0.9417558908462524, + "learning_rate": 8.771714950894865e-06, + "loss": 0.5909, + "step": 3964 + }, + { + "epoch": 0.25, + "grad_norm": 0.922148585319519, + "learning_rate": 8.771041326642572e-06, + "loss": 0.6075, + "step": 3965 + }, + { + "epoch": 0.25, + "grad_norm": 0.9802806377410889, + "learning_rate": 8.770367543604849e-06, + "loss": 0.6446, + "step": 3966 + }, + { + "epoch": 0.25, + "grad_norm": 0.924773633480072, + "learning_rate": 8.769693601810066e-06, + "loss": 0.6432, + "step": 3967 + }, + { + "epoch": 0.25, + "grad_norm": 0.8747174143791199, + "learning_rate": 8.769019501286598e-06, + "loss": 0.5868, + "step": 3968 + }, + { + "epoch": 0.25, + "grad_norm": 0.9166977405548096, + "learning_rate": 8.768345242062828e-06, + "loss": 0.6624, + "step": 3969 + }, + { + "epoch": 0.25, + "grad_norm": 0.886821985244751, + "learning_rate": 8.767670824167151e-06, + "loss": 0.6774, + "step": 3970 + }, + { + "epoch": 0.25, + "grad_norm": 0.8805607557296753, + "learning_rate": 8.766996247627963e-06, + "loss": 0.6026, + "step": 3971 + }, + { + "epoch": 0.25, + "grad_norm": 0.8670737743377686, + "learning_rate": 8.766321512473666e-06, + "loss": 0.6216, + "step": 3972 + }, + { + "epoch": 0.25, + "grad_norm": 0.9067496657371521, + "learning_rate": 8.765646618732672e-06, + "loss": 0.6065, + "step": 3973 + }, + { + "epoch": 0.25, + "grad_norm": 0.8839542269706726, + "learning_rate": 8.7649715664334e-06, + "loss": 0.6026, + "step": 3974 + }, + { + "epoch": 0.25, + "grad_norm": 0.915699303150177, + "learning_rate": 8.764296355604273e-06, + "loss": 0.6433, + "step": 3975 + }, + { + "epoch": 0.25, + "grad_norm": 0.8603993654251099, + "learning_rate": 8.76362098627372e-06, + "loss": 0.6031, + "step": 3976 + }, + { + "epoch": 0.25, + "grad_norm": 0.9431526064872742, + "learning_rate": 8.76294545847018e-06, + "loss": 0.6777, + "step": 3977 + }, + { + "epoch": 0.25, + "grad_norm": 0.919879138469696, + "learning_rate": 8.762269772222099e-06, + "loss": 0.5918, + "step": 3978 + }, + { + "epoch": 0.25, + "grad_norm": 0.8991773128509521, + "learning_rate": 8.761593927557923e-06, + "loss": 0.6318, + "step": 3979 + }, + { + "epoch": 0.25, + "grad_norm": 0.8900842666625977, + "learning_rate": 8.760917924506114e-06, + "loss": 0.578, + "step": 3980 + }, + { + "epoch": 0.25, + "grad_norm": 1.0085675716400146, + "learning_rate": 8.760241763095135e-06, + "loss": 0.6554, + "step": 3981 + }, + { + "epoch": 0.25, + "grad_norm": 0.9195557832717896, + "learning_rate": 8.759565443353454e-06, + "loss": 0.6484, + "step": 3982 + }, + { + "epoch": 0.25, + "grad_norm": 0.9673278331756592, + "learning_rate": 8.758888965309554e-06, + "loss": 0.6418, + "step": 3983 + }, + { + "epoch": 0.25, + "grad_norm": 0.9138756394386292, + "learning_rate": 8.758212328991913e-06, + "loss": 0.6098, + "step": 3984 + }, + { + "epoch": 0.25, + "grad_norm": 0.9599946737289429, + "learning_rate": 8.757535534429027e-06, + "loss": 0.6413, + "step": 3985 + }, + { + "epoch": 0.25, + "grad_norm": 0.9634223580360413, + "learning_rate": 8.756858581649391e-06, + "loss": 0.6299, + "step": 3986 + }, + { + "epoch": 0.25, + "grad_norm": 0.8933982253074646, + "learning_rate": 8.756181470681507e-06, + "loss": 0.642, + "step": 3987 + }, + { + "epoch": 0.25, + "grad_norm": 0.8917509317398071, + "learning_rate": 8.755504201553889e-06, + "loss": 0.6301, + "step": 3988 + }, + { + "epoch": 0.25, + "grad_norm": 0.9482274651527405, + "learning_rate": 8.754826774295056e-06, + "loss": 0.6465, + "step": 3989 + }, + { + "epoch": 0.25, + "grad_norm": 0.9077238440513611, + "learning_rate": 8.754149188933527e-06, + "loss": 0.5909, + "step": 3990 + }, + { + "epoch": 0.25, + "grad_norm": 0.9035444855690002, + "learning_rate": 8.753471445497837e-06, + "loss": 0.5492, + "step": 3991 + }, + { + "epoch": 0.25, + "grad_norm": 0.8981434106826782, + "learning_rate": 8.752793544016519e-06, + "loss": 0.6003, + "step": 3992 + }, + { + "epoch": 0.25, + "grad_norm": 0.9048976898193359, + "learning_rate": 8.752115484518123e-06, + "loss": 0.656, + "step": 3993 + }, + { + "epoch": 0.25, + "grad_norm": 0.9182979464530945, + "learning_rate": 8.751437267031194e-06, + "loss": 0.6331, + "step": 3994 + }, + { + "epoch": 0.25, + "grad_norm": 0.9162821173667908, + "learning_rate": 8.750758891584293e-06, + "loss": 0.6385, + "step": 3995 + }, + { + "epoch": 0.25, + "grad_norm": 0.882770836353302, + "learning_rate": 8.750080358205983e-06, + "loss": 0.5651, + "step": 3996 + }, + { + "epoch": 0.25, + "grad_norm": 0.8625838756561279, + "learning_rate": 8.749401666924834e-06, + "loss": 0.5548, + "step": 3997 + }, + { + "epoch": 0.25, + "grad_norm": 0.8796778917312622, + "learning_rate": 8.748722817769426e-06, + "loss": 0.6218, + "step": 3998 + }, + { + "epoch": 0.25, + "grad_norm": 0.8950878977775574, + "learning_rate": 8.74804381076834e-06, + "loss": 0.591, + "step": 3999 + }, + { + "epoch": 0.25, + "grad_norm": 0.9669718742370605, + "learning_rate": 8.747364645950168e-06, + "loss": 0.6653, + "step": 4000 + }, + { + "epoch": 0.25, + "grad_norm": 0.8975842595100403, + "learning_rate": 8.746685323343507e-06, + "loss": 0.6355, + "step": 4001 + }, + { + "epoch": 0.25, + "grad_norm": 0.8494351506233215, + "learning_rate": 8.74600584297696e-06, + "loss": 0.5891, + "step": 4002 + }, + { + "epoch": 0.25, + "grad_norm": 0.8773183226585388, + "learning_rate": 8.745326204879139e-06, + "loss": 0.6023, + "step": 4003 + }, + { + "epoch": 0.25, + "grad_norm": 0.8296153545379639, + "learning_rate": 8.74464640907866e-06, + "loss": 0.5928, + "step": 4004 + }, + { + "epoch": 0.25, + "grad_norm": 0.989004909992218, + "learning_rate": 8.743966455604147e-06, + "loss": 0.6234, + "step": 4005 + }, + { + "epoch": 0.25, + "grad_norm": 0.8475044369697571, + "learning_rate": 8.743286344484232e-06, + "loss": 0.5406, + "step": 4006 + }, + { + "epoch": 0.25, + "grad_norm": 0.8195810317993164, + "learning_rate": 8.74260607574755e-06, + "loss": 0.5639, + "step": 4007 + }, + { + "epoch": 0.25, + "grad_norm": 0.909238874912262, + "learning_rate": 8.741925649422746e-06, + "loss": 0.5854, + "step": 4008 + }, + { + "epoch": 0.25, + "grad_norm": 0.9121100902557373, + "learning_rate": 8.741245065538471e-06, + "loss": 0.614, + "step": 4009 + }, + { + "epoch": 0.25, + "grad_norm": 0.8657447695732117, + "learning_rate": 8.74056432412338e-06, + "loss": 0.568, + "step": 4010 + }, + { + "epoch": 0.25, + "grad_norm": 0.808589518070221, + "learning_rate": 8.739883425206138e-06, + "loss": 0.598, + "step": 4011 + }, + { + "epoch": 0.25, + "grad_norm": 0.9169565439224243, + "learning_rate": 8.739202368815416e-06, + "loss": 0.6287, + "step": 4012 + }, + { + "epoch": 0.25, + "grad_norm": 0.9050797820091248, + "learning_rate": 8.738521154979889e-06, + "loss": 0.6202, + "step": 4013 + }, + { + "epoch": 0.25, + "grad_norm": 0.9150273203849792, + "learning_rate": 8.737839783728242e-06, + "loss": 0.6038, + "step": 4014 + }, + { + "epoch": 0.25, + "grad_norm": 0.9774922132492065, + "learning_rate": 8.737158255089164e-06, + "loss": 0.6055, + "step": 4015 + }, + { + "epoch": 0.25, + "grad_norm": 0.855354905128479, + "learning_rate": 8.736476569091352e-06, + "loss": 0.5831, + "step": 4016 + }, + { + "epoch": 0.25, + "grad_norm": 0.8585079312324524, + "learning_rate": 8.735794725763512e-06, + "loss": 0.6254, + "step": 4017 + }, + { + "epoch": 0.25, + "grad_norm": 0.9431387782096863, + "learning_rate": 8.735112725134352e-06, + "loss": 0.5971, + "step": 4018 + }, + { + "epoch": 0.25, + "grad_norm": 0.9413880109786987, + "learning_rate": 8.734430567232585e-06, + "loss": 0.6035, + "step": 4019 + }, + { + "epoch": 0.25, + "grad_norm": 0.8787413835525513, + "learning_rate": 8.733748252086943e-06, + "loss": 0.6233, + "step": 4020 + }, + { + "epoch": 0.25, + "grad_norm": 0.9067035913467407, + "learning_rate": 8.733065779726146e-06, + "loss": 0.6869, + "step": 4021 + }, + { + "epoch": 0.25, + "grad_norm": 0.8852519392967224, + "learning_rate": 8.732383150178938e-06, + "loss": 0.6373, + "step": 4022 + }, + { + "epoch": 0.25, + "grad_norm": 0.9651377201080322, + "learning_rate": 8.73170036347406e-06, + "loss": 0.6788, + "step": 4023 + }, + { + "epoch": 0.25, + "grad_norm": 0.8923559784889221, + "learning_rate": 8.731017419640261e-06, + "loss": 0.6376, + "step": 4024 + }, + { + "epoch": 0.26, + "grad_norm": 0.9307226538658142, + "learning_rate": 8.730334318706297e-06, + "loss": 0.5993, + "step": 4025 + }, + { + "epoch": 0.26, + "grad_norm": 0.9229474067687988, + "learning_rate": 8.729651060700932e-06, + "loss": 0.6617, + "step": 4026 + }, + { + "epoch": 0.26, + "grad_norm": 0.863122284412384, + "learning_rate": 8.728967645652936e-06, + "loss": 0.5719, + "step": 4027 + }, + { + "epoch": 0.26, + "grad_norm": 0.9152368903160095, + "learning_rate": 8.728284073591083e-06, + "loss": 0.6337, + "step": 4028 + }, + { + "epoch": 0.26, + "grad_norm": 0.922824501991272, + "learning_rate": 8.727600344544159e-06, + "loss": 0.6418, + "step": 4029 + }, + { + "epoch": 0.26, + "grad_norm": 0.8921812176704407, + "learning_rate": 8.72691645854095e-06, + "loss": 0.625, + "step": 4030 + }, + { + "epoch": 0.26, + "grad_norm": 1.0182279348373413, + "learning_rate": 8.726232415610257e-06, + "loss": 0.6637, + "step": 4031 + }, + { + "epoch": 0.26, + "grad_norm": 0.8648515343666077, + "learning_rate": 8.725548215780877e-06, + "loss": 0.5921, + "step": 4032 + }, + { + "epoch": 0.26, + "grad_norm": 0.8446393609046936, + "learning_rate": 8.724863859081622e-06, + "loss": 0.6198, + "step": 4033 + }, + { + "epoch": 0.26, + "grad_norm": 0.8602930307388306, + "learning_rate": 8.724179345541308e-06, + "loss": 0.5806, + "step": 4034 + }, + { + "epoch": 0.26, + "grad_norm": 0.896755576133728, + "learning_rate": 8.72349467518876e-06, + "loss": 0.6625, + "step": 4035 + }, + { + "epoch": 0.26, + "grad_norm": 0.8296254277229309, + "learning_rate": 8.7228098480528e-06, + "loss": 0.6138, + "step": 4036 + }, + { + "epoch": 0.26, + "grad_norm": 0.9342056512832642, + "learning_rate": 8.72212486416227e-06, + "loss": 0.6454, + "step": 4037 + }, + { + "epoch": 0.26, + "grad_norm": 0.9745578169822693, + "learning_rate": 8.721439723546012e-06, + "loss": 0.6671, + "step": 4038 + }, + { + "epoch": 0.26, + "grad_norm": 0.9331424832344055, + "learning_rate": 8.720754426232871e-06, + "loss": 0.5942, + "step": 4039 + }, + { + "epoch": 0.26, + "grad_norm": 0.9035102128982544, + "learning_rate": 8.720068972251705e-06, + "loss": 0.6128, + "step": 4040 + }, + { + "epoch": 0.26, + "grad_norm": 0.8807538747787476, + "learning_rate": 8.719383361631376e-06, + "loss": 0.6637, + "step": 4041 + }, + { + "epoch": 0.26, + "grad_norm": 0.8650099635124207, + "learning_rate": 8.718697594400753e-06, + "loss": 0.6391, + "step": 4042 + }, + { + "epoch": 0.26, + "grad_norm": 0.8635523915290833, + "learning_rate": 8.71801167058871e-06, + "loss": 0.6161, + "step": 4043 + }, + { + "epoch": 0.26, + "grad_norm": 0.8900404572486877, + "learning_rate": 8.717325590224129e-06, + "loss": 0.6197, + "step": 4044 + }, + { + "epoch": 0.26, + "grad_norm": 0.9338827133178711, + "learning_rate": 8.7166393533359e-06, + "loss": 0.6293, + "step": 4045 + }, + { + "epoch": 0.26, + "grad_norm": 0.9154714941978455, + "learning_rate": 8.715952959952917e-06, + "loss": 0.6291, + "step": 4046 + }, + { + "epoch": 0.26, + "grad_norm": 0.8892160058021545, + "learning_rate": 8.715266410104081e-06, + "loss": 0.6401, + "step": 4047 + }, + { + "epoch": 0.26, + "grad_norm": 0.8630048036575317, + "learning_rate": 8.714579703818301e-06, + "loss": 0.6683, + "step": 4048 + }, + { + "epoch": 0.26, + "grad_norm": 0.8822508454322815, + "learning_rate": 8.713892841124492e-06, + "loss": 0.6048, + "step": 4049 + }, + { + "epoch": 0.26, + "grad_norm": 0.9436633586883545, + "learning_rate": 8.713205822051576e-06, + "loss": 0.6598, + "step": 4050 + }, + { + "epoch": 0.26, + "grad_norm": 0.8699237704277039, + "learning_rate": 8.71251864662848e-06, + "loss": 0.607, + "step": 4051 + }, + { + "epoch": 0.26, + "grad_norm": 0.9008539915084839, + "learning_rate": 8.711831314884137e-06, + "loss": 0.6121, + "step": 4052 + }, + { + "epoch": 0.26, + "grad_norm": 0.8727585077285767, + "learning_rate": 8.711143826847491e-06, + "loss": 0.6199, + "step": 4053 + }, + { + "epoch": 0.26, + "grad_norm": 0.8655484914779663, + "learning_rate": 8.71045618254749e-06, + "loss": 0.58, + "step": 4054 + }, + { + "epoch": 0.26, + "grad_norm": 0.9389612078666687, + "learning_rate": 8.709768382013084e-06, + "loss": 0.591, + "step": 4055 + }, + { + "epoch": 0.26, + "grad_norm": 0.9785065650939941, + "learning_rate": 8.709080425273238e-06, + "loss": 0.6502, + "step": 4056 + }, + { + "epoch": 0.26, + "grad_norm": 0.9361227750778198, + "learning_rate": 8.708392312356919e-06, + "loss": 0.6516, + "step": 4057 + }, + { + "epoch": 0.26, + "grad_norm": 0.9632472395896912, + "learning_rate": 8.7077040432931e-06, + "loss": 0.7088, + "step": 4058 + }, + { + "epoch": 0.26, + "grad_norm": 0.9708462953567505, + "learning_rate": 8.707015618110761e-06, + "loss": 0.6293, + "step": 4059 + }, + { + "epoch": 0.26, + "grad_norm": 0.8739571571350098, + "learning_rate": 8.706327036838891e-06, + "loss": 0.6009, + "step": 4060 + }, + { + "epoch": 0.26, + "grad_norm": 0.8531939387321472, + "learning_rate": 8.705638299506482e-06, + "loss": 0.5739, + "step": 4061 + }, + { + "epoch": 0.26, + "grad_norm": 1.007936716079712, + "learning_rate": 8.704949406142536e-06, + "loss": 0.6615, + "step": 4062 + }, + { + "epoch": 0.26, + "grad_norm": 0.9138129949569702, + "learning_rate": 8.70426035677606e-06, + "loss": 0.6759, + "step": 4063 + }, + { + "epoch": 0.26, + "grad_norm": 0.8993487358093262, + "learning_rate": 8.703571151436064e-06, + "loss": 0.5449, + "step": 4064 + }, + { + "epoch": 0.26, + "grad_norm": 0.9341960549354553, + "learning_rate": 8.702881790151572e-06, + "loss": 0.5835, + "step": 4065 + }, + { + "epoch": 0.26, + "grad_norm": 0.9405590891838074, + "learning_rate": 8.70219227295161e-06, + "loss": 0.7077, + "step": 4066 + }, + { + "epoch": 0.26, + "grad_norm": 0.8309204578399658, + "learning_rate": 8.70150259986521e-06, + "loss": 0.5906, + "step": 4067 + }, + { + "epoch": 0.26, + "grad_norm": 0.9234378337860107, + "learning_rate": 8.70081277092141e-06, + "loss": 0.6099, + "step": 4068 + }, + { + "epoch": 0.26, + "grad_norm": 0.9216628670692444, + "learning_rate": 8.700122786149261e-06, + "loss": 0.6042, + "step": 4069 + }, + { + "epoch": 0.26, + "grad_norm": 1.0060338973999023, + "learning_rate": 8.699432645577812e-06, + "loss": 0.7019, + "step": 4070 + }, + { + "epoch": 0.26, + "grad_norm": 0.8804594278335571, + "learning_rate": 8.698742349236124e-06, + "loss": 0.5648, + "step": 4071 + }, + { + "epoch": 0.26, + "grad_norm": 0.9049243927001953, + "learning_rate": 8.698051897153264e-06, + "loss": 0.6575, + "step": 4072 + }, + { + "epoch": 0.26, + "grad_norm": 0.8847574591636658, + "learning_rate": 8.697361289358302e-06, + "loss": 0.5955, + "step": 4073 + }, + { + "epoch": 0.26, + "grad_norm": 0.943061351776123, + "learning_rate": 8.696670525880318e-06, + "loss": 0.6276, + "step": 4074 + }, + { + "epoch": 0.26, + "grad_norm": 0.9238365292549133, + "learning_rate": 8.695979606748398e-06, + "loss": 0.5952, + "step": 4075 + }, + { + "epoch": 0.26, + "grad_norm": 1.0495893955230713, + "learning_rate": 8.695288531991633e-06, + "loss": 0.6174, + "step": 4076 + }, + { + "epoch": 0.26, + "grad_norm": 0.8936722278594971, + "learning_rate": 8.694597301639125e-06, + "loss": 0.6327, + "step": 4077 + }, + { + "epoch": 0.26, + "grad_norm": 0.9051495790481567, + "learning_rate": 8.693905915719976e-06, + "loss": 0.6268, + "step": 4078 + }, + { + "epoch": 0.26, + "grad_norm": 0.883945882320404, + "learning_rate": 8.693214374263298e-06, + "loss": 0.6083, + "step": 4079 + }, + { + "epoch": 0.26, + "grad_norm": 0.8976554274559021, + "learning_rate": 8.692522677298213e-06, + "loss": 0.6267, + "step": 4080 + }, + { + "epoch": 0.26, + "grad_norm": 0.8834015727043152, + "learning_rate": 8.691830824853843e-06, + "loss": 0.6268, + "step": 4081 + }, + { + "epoch": 0.26, + "grad_norm": 0.8857308030128479, + "learning_rate": 8.691138816959318e-06, + "loss": 0.6356, + "step": 4082 + }, + { + "epoch": 0.26, + "grad_norm": 0.9096359014511108, + "learning_rate": 8.690446653643778e-06, + "loss": 0.6125, + "step": 4083 + }, + { + "epoch": 0.26, + "grad_norm": 0.9551771879196167, + "learning_rate": 8.68975433493637e-06, + "loss": 0.627, + "step": 4084 + }, + { + "epoch": 0.26, + "grad_norm": 0.9362192749977112, + "learning_rate": 8.689061860866242e-06, + "loss": 0.5975, + "step": 4085 + }, + { + "epoch": 0.26, + "grad_norm": 0.9172837734222412, + "learning_rate": 8.68836923146255e-06, + "loss": 0.6706, + "step": 4086 + }, + { + "epoch": 0.26, + "grad_norm": 0.9365245699882507, + "learning_rate": 8.687676446754464e-06, + "loss": 0.6429, + "step": 4087 + }, + { + "epoch": 0.26, + "grad_norm": 0.9222214221954346, + "learning_rate": 8.686983506771149e-06, + "loss": 0.6116, + "step": 4088 + }, + { + "epoch": 0.26, + "grad_norm": 0.8810616731643677, + "learning_rate": 8.686290411541785e-06, + "loss": 0.5765, + "step": 4089 + }, + { + "epoch": 0.26, + "grad_norm": 0.8715612888336182, + "learning_rate": 8.685597161095555e-06, + "loss": 0.5724, + "step": 4090 + }, + { + "epoch": 0.26, + "grad_norm": 0.8744463920593262, + "learning_rate": 8.68490375546165e-06, + "loss": 0.5963, + "step": 4091 + }, + { + "epoch": 0.26, + "grad_norm": 0.936255931854248, + "learning_rate": 8.684210194669269e-06, + "loss": 0.6308, + "step": 4092 + }, + { + "epoch": 0.26, + "grad_norm": 0.9600224494934082, + "learning_rate": 8.68351647874761e-06, + "loss": 0.6213, + "step": 4093 + }, + { + "epoch": 0.26, + "grad_norm": 0.9066085815429688, + "learning_rate": 8.682822607725887e-06, + "loss": 0.6384, + "step": 4094 + }, + { + "epoch": 0.26, + "grad_norm": 0.9050360918045044, + "learning_rate": 8.682128581633316e-06, + "loss": 0.6109, + "step": 4095 + }, + { + "epoch": 0.26, + "grad_norm": 0.8644648194313049, + "learning_rate": 8.68143440049912e-06, + "loss": 0.6181, + "step": 4096 + }, + { + "epoch": 0.26, + "grad_norm": 0.8626109957695007, + "learning_rate": 8.68074006435253e-06, + "loss": 0.6159, + "step": 4097 + }, + { + "epoch": 0.26, + "grad_norm": 0.9877548217773438, + "learning_rate": 8.680045573222776e-06, + "loss": 0.6638, + "step": 4098 + }, + { + "epoch": 0.26, + "grad_norm": 0.9884246587753296, + "learning_rate": 8.679350927139108e-06, + "loss": 0.6608, + "step": 4099 + }, + { + "epoch": 0.26, + "grad_norm": 0.8285159468650818, + "learning_rate": 8.678656126130768e-06, + "loss": 0.6032, + "step": 4100 + }, + { + "epoch": 0.26, + "grad_norm": 0.9355902671813965, + "learning_rate": 8.677961170227021e-06, + "loss": 0.6451, + "step": 4101 + }, + { + "epoch": 0.26, + "grad_norm": 0.8407034873962402, + "learning_rate": 8.677266059457121e-06, + "loss": 0.6093, + "step": 4102 + }, + { + "epoch": 0.26, + "grad_norm": 0.8834094405174255, + "learning_rate": 8.67657079385034e-06, + "loss": 0.6294, + "step": 4103 + }, + { + "epoch": 0.26, + "grad_norm": 0.953618049621582, + "learning_rate": 8.675875373435951e-06, + "loss": 0.5979, + "step": 4104 + }, + { + "epoch": 0.26, + "grad_norm": 0.9017611742019653, + "learning_rate": 8.67517979824324e-06, + "loss": 0.5706, + "step": 4105 + }, + { + "epoch": 0.26, + "grad_norm": 0.9118735194206238, + "learning_rate": 8.674484068301492e-06, + "loss": 0.6226, + "step": 4106 + }, + { + "epoch": 0.26, + "grad_norm": 0.8959848284721375, + "learning_rate": 8.673788183640001e-06, + "loss": 0.6742, + "step": 4107 + }, + { + "epoch": 0.26, + "grad_norm": 0.9677282571792603, + "learning_rate": 8.673092144288071e-06, + "loss": 0.6542, + "step": 4108 + }, + { + "epoch": 0.26, + "grad_norm": 0.8986738324165344, + "learning_rate": 8.672395950275008e-06, + "loss": 0.6457, + "step": 4109 + }, + { + "epoch": 0.26, + "grad_norm": 0.9157966375350952, + "learning_rate": 8.671699601630127e-06, + "loss": 0.6178, + "step": 4110 + }, + { + "epoch": 0.26, + "grad_norm": 0.8645839095115662, + "learning_rate": 8.67100309838275e-06, + "loss": 0.5971, + "step": 4111 + }, + { + "epoch": 0.26, + "grad_norm": 0.9088585376739502, + "learning_rate": 8.670306440562202e-06, + "loss": 0.635, + "step": 4112 + }, + { + "epoch": 0.26, + "grad_norm": 0.8277181386947632, + "learning_rate": 8.669609628197817e-06, + "loss": 0.5686, + "step": 4113 + }, + { + "epoch": 0.26, + "grad_norm": 0.9371722340583801, + "learning_rate": 8.668912661318938e-06, + "loss": 0.6229, + "step": 4114 + }, + { + "epoch": 0.26, + "grad_norm": 0.8745486736297607, + "learning_rate": 8.66821553995491e-06, + "loss": 0.6389, + "step": 4115 + }, + { + "epoch": 0.26, + "grad_norm": 0.8587163686752319, + "learning_rate": 8.667518264135085e-06, + "loss": 0.5837, + "step": 4116 + }, + { + "epoch": 0.26, + "grad_norm": 0.895158588886261, + "learning_rate": 8.666820833888825e-06, + "loss": 0.5817, + "step": 4117 + }, + { + "epoch": 0.26, + "grad_norm": 0.9290642738342285, + "learning_rate": 8.666123249245495e-06, + "loss": 0.6401, + "step": 4118 + }, + { + "epoch": 0.26, + "grad_norm": 0.9100977778434753, + "learning_rate": 8.665425510234469e-06, + "loss": 0.6622, + "step": 4119 + }, + { + "epoch": 0.26, + "grad_norm": 0.8745128512382507, + "learning_rate": 8.664727616885126e-06, + "loss": 0.613, + "step": 4120 + }, + { + "epoch": 0.26, + "grad_norm": 0.8843961954116821, + "learning_rate": 8.66402956922685e-06, + "loss": 0.6118, + "step": 4121 + }, + { + "epoch": 0.26, + "grad_norm": 0.9334408044815063, + "learning_rate": 8.663331367289038e-06, + "loss": 0.6604, + "step": 4122 + }, + { + "epoch": 0.26, + "grad_norm": 0.8388084769248962, + "learning_rate": 8.662633011101084e-06, + "loss": 0.5691, + "step": 4123 + }, + { + "epoch": 0.26, + "grad_norm": 0.8637480139732361, + "learning_rate": 8.661934500692395e-06, + "loss": 0.6299, + "step": 4124 + }, + { + "epoch": 0.26, + "grad_norm": 0.8830687403678894, + "learning_rate": 8.661235836092385e-06, + "loss": 0.6879, + "step": 4125 + }, + { + "epoch": 0.26, + "grad_norm": 0.91837477684021, + "learning_rate": 8.660537017330468e-06, + "loss": 0.6245, + "step": 4126 + }, + { + "epoch": 0.26, + "grad_norm": 0.9373289346694946, + "learning_rate": 8.659838044436074e-06, + "loss": 0.6387, + "step": 4127 + }, + { + "epoch": 0.26, + "grad_norm": 0.8295657634735107, + "learning_rate": 8.65913891743863e-06, + "loss": 0.546, + "step": 4128 + }, + { + "epoch": 0.26, + "grad_norm": 0.8394411206245422, + "learning_rate": 8.658439636367574e-06, + "loss": 0.5775, + "step": 4129 + }, + { + "epoch": 0.26, + "grad_norm": 0.9311953186988831, + "learning_rate": 8.657740201252353e-06, + "loss": 0.6699, + "step": 4130 + }, + { + "epoch": 0.26, + "grad_norm": 0.9466168284416199, + "learning_rate": 8.657040612122418e-06, + "loss": 0.6419, + "step": 4131 + }, + { + "epoch": 0.26, + "grad_norm": 0.9362534880638123, + "learning_rate": 8.656340869007225e-06, + "loss": 0.6982, + "step": 4132 + }, + { + "epoch": 0.26, + "grad_norm": 0.9404389262199402, + "learning_rate": 8.655640971936236e-06, + "loss": 0.6242, + "step": 4133 + }, + { + "epoch": 0.26, + "grad_norm": 0.9012186527252197, + "learning_rate": 8.654940920938922e-06, + "loss": 0.6187, + "step": 4134 + }, + { + "epoch": 0.26, + "grad_norm": 0.8309886455535889, + "learning_rate": 8.654240716044762e-06, + "loss": 0.6226, + "step": 4135 + }, + { + "epoch": 0.26, + "grad_norm": 0.9367273449897766, + "learning_rate": 8.653540357283236e-06, + "loss": 0.5919, + "step": 4136 + }, + { + "epoch": 0.26, + "grad_norm": 0.8980950713157654, + "learning_rate": 8.652839844683836e-06, + "loss": 0.5913, + "step": 4137 + }, + { + "epoch": 0.26, + "grad_norm": 0.8785884976387024, + "learning_rate": 8.652139178276058e-06, + "loss": 0.6348, + "step": 4138 + }, + { + "epoch": 0.26, + "grad_norm": 0.8896494507789612, + "learning_rate": 8.651438358089403e-06, + "loss": 0.6578, + "step": 4139 + }, + { + "epoch": 0.26, + "grad_norm": 0.9590379595756531, + "learning_rate": 8.650737384153382e-06, + "loss": 0.6917, + "step": 4140 + }, + { + "epoch": 0.26, + "grad_norm": 0.9541071653366089, + "learning_rate": 8.65003625649751e-06, + "loss": 0.582, + "step": 4141 + }, + { + "epoch": 0.26, + "grad_norm": 0.9491351246833801, + "learning_rate": 8.649334975151307e-06, + "loss": 0.6342, + "step": 4142 + }, + { + "epoch": 0.26, + "grad_norm": 0.979164183139801, + "learning_rate": 8.648633540144304e-06, + "loss": 0.6439, + "step": 4143 + }, + { + "epoch": 0.26, + "grad_norm": 0.8879642486572266, + "learning_rate": 8.647931951506037e-06, + "loss": 0.6109, + "step": 4144 + }, + { + "epoch": 0.26, + "grad_norm": 0.8990030884742737, + "learning_rate": 8.647230209266043e-06, + "loss": 0.6334, + "step": 4145 + }, + { + "epoch": 0.26, + "grad_norm": 0.9525482654571533, + "learning_rate": 8.646528313453876e-06, + "loss": 0.6203, + "step": 4146 + }, + { + "epoch": 0.26, + "grad_norm": 0.8282102942466736, + "learning_rate": 8.645826264099085e-06, + "loss": 0.573, + "step": 4147 + }, + { + "epoch": 0.26, + "grad_norm": 0.8854700922966003, + "learning_rate": 8.645124061231234e-06, + "loss": 0.6247, + "step": 4148 + }, + { + "epoch": 0.26, + "grad_norm": 0.8921488523483276, + "learning_rate": 8.644421704879889e-06, + "loss": 0.5295, + "step": 4149 + }, + { + "epoch": 0.26, + "grad_norm": 0.8470342755317688, + "learning_rate": 8.643719195074622e-06, + "loss": 0.5909, + "step": 4150 + }, + { + "epoch": 0.26, + "grad_norm": 0.8630185127258301, + "learning_rate": 8.643016531845017e-06, + "loss": 0.6125, + "step": 4151 + }, + { + "epoch": 0.26, + "grad_norm": 1.019774079322815, + "learning_rate": 8.642313715220659e-06, + "loss": 0.6089, + "step": 4152 + }, + { + "epoch": 0.26, + "grad_norm": 0.8334149122238159, + "learning_rate": 8.641610745231142e-06, + "loss": 0.5966, + "step": 4153 + }, + { + "epoch": 0.26, + "grad_norm": 0.8783389925956726, + "learning_rate": 8.640907621906062e-06, + "loss": 0.5849, + "step": 4154 + }, + { + "epoch": 0.26, + "grad_norm": 0.9363436698913574, + "learning_rate": 8.640204345275029e-06, + "loss": 0.6535, + "step": 4155 + }, + { + "epoch": 0.26, + "grad_norm": 0.9536002278327942, + "learning_rate": 8.639500915367656e-06, + "loss": 0.6491, + "step": 4156 + }, + { + "epoch": 0.26, + "grad_norm": 0.9206741452217102, + "learning_rate": 8.63879733221356e-06, + "loss": 0.657, + "step": 4157 + }, + { + "epoch": 0.26, + "grad_norm": 0.943328320980072, + "learning_rate": 8.638093595842366e-06, + "loss": 0.6666, + "step": 4158 + }, + { + "epoch": 0.26, + "grad_norm": 0.9073593616485596, + "learning_rate": 8.637389706283705e-06, + "loss": 0.5944, + "step": 4159 + }, + { + "epoch": 0.26, + "grad_norm": 0.9186743497848511, + "learning_rate": 8.636685663567219e-06, + "loss": 0.6469, + "step": 4160 + }, + { + "epoch": 0.26, + "grad_norm": 0.8272576928138733, + "learning_rate": 8.635981467722552e-06, + "loss": 0.6093, + "step": 4161 + }, + { + "epoch": 0.26, + "grad_norm": 0.827934741973877, + "learning_rate": 8.635277118779353e-06, + "loss": 0.5911, + "step": 4162 + }, + { + "epoch": 0.26, + "grad_norm": 0.8880283832550049, + "learning_rate": 8.63457261676728e-06, + "loss": 0.6092, + "step": 4163 + }, + { + "epoch": 0.26, + "grad_norm": 0.8852022886276245, + "learning_rate": 8.633867961715998e-06, + "loss": 0.5906, + "step": 4164 + }, + { + "epoch": 0.26, + "grad_norm": 0.8944527506828308, + "learning_rate": 8.633163153655178e-06, + "loss": 0.6314, + "step": 4165 + }, + { + "epoch": 0.26, + "grad_norm": 0.9245870113372803, + "learning_rate": 8.632458192614495e-06, + "loss": 0.6901, + "step": 4166 + }, + { + "epoch": 0.26, + "grad_norm": 0.8997650146484375, + "learning_rate": 8.631753078623635e-06, + "loss": 0.5836, + "step": 4167 + }, + { + "epoch": 0.26, + "grad_norm": 0.935129702091217, + "learning_rate": 8.631047811712288e-06, + "loss": 0.6776, + "step": 4168 + }, + { + "epoch": 0.26, + "grad_norm": 0.9850293397903442, + "learning_rate": 8.630342391910147e-06, + "loss": 0.6637, + "step": 4169 + }, + { + "epoch": 0.26, + "grad_norm": 0.9164685010910034, + "learning_rate": 8.629636819246919e-06, + "loss": 0.6207, + "step": 4170 + }, + { + "epoch": 0.26, + "grad_norm": 0.8634175658226013, + "learning_rate": 8.628931093752308e-06, + "loss": 0.6029, + "step": 4171 + }, + { + "epoch": 0.26, + "grad_norm": 0.8743361234664917, + "learning_rate": 8.628225215456037e-06, + "loss": 0.6149, + "step": 4172 + }, + { + "epoch": 0.26, + "grad_norm": 0.9644536972045898, + "learning_rate": 8.627519184387821e-06, + "loss": 0.6623, + "step": 4173 + }, + { + "epoch": 0.26, + "grad_norm": 0.9518513679504395, + "learning_rate": 8.626813000577393e-06, + "loss": 0.6665, + "step": 4174 + }, + { + "epoch": 0.26, + "grad_norm": 0.9795065522193909, + "learning_rate": 8.626106664054483e-06, + "loss": 0.6404, + "step": 4175 + }, + { + "epoch": 0.26, + "grad_norm": 0.8946532011032104, + "learning_rate": 8.62540017484884e-06, + "loss": 0.6109, + "step": 4176 + }, + { + "epoch": 0.26, + "grad_norm": 0.8872295618057251, + "learning_rate": 8.624693532990205e-06, + "loss": 0.591, + "step": 4177 + }, + { + "epoch": 0.26, + "grad_norm": 0.9337349534034729, + "learning_rate": 8.623986738508334e-06, + "loss": 0.641, + "step": 4178 + }, + { + "epoch": 0.26, + "grad_norm": 0.8817663788795471, + "learning_rate": 8.62327979143299e-06, + "loss": 0.5987, + "step": 4179 + }, + { + "epoch": 0.26, + "grad_norm": 0.9417575001716614, + "learning_rate": 8.622572691793937e-06, + "loss": 0.5693, + "step": 4180 + }, + { + "epoch": 0.26, + "grad_norm": 0.8882385492324829, + "learning_rate": 8.621865439620952e-06, + "loss": 0.5992, + "step": 4181 + }, + { + "epoch": 0.26, + "grad_norm": 0.8872155547142029, + "learning_rate": 8.621158034943812e-06, + "loss": 0.6055, + "step": 4182 + }, + { + "epoch": 0.27, + "grad_norm": 0.8701667189598083, + "learning_rate": 8.620450477792303e-06, + "loss": 0.6059, + "step": 4183 + }, + { + "epoch": 0.27, + "grad_norm": 0.8833332657814026, + "learning_rate": 8.619742768196221e-06, + "loss": 0.5834, + "step": 4184 + }, + { + "epoch": 0.27, + "grad_norm": 0.9163500070571899, + "learning_rate": 8.619034906185362e-06, + "loss": 0.6927, + "step": 4185 + }, + { + "epoch": 0.27, + "grad_norm": 0.9250738620758057, + "learning_rate": 8.618326891789534e-06, + "loss": 0.6408, + "step": 4186 + }, + { + "epoch": 0.27, + "grad_norm": 0.9231948852539062, + "learning_rate": 8.617618725038545e-06, + "loss": 0.6151, + "step": 4187 + }, + { + "epoch": 0.27, + "grad_norm": 0.8991936445236206, + "learning_rate": 8.61691040596222e-06, + "loss": 0.6433, + "step": 4188 + }, + { + "epoch": 0.27, + "grad_norm": 0.9138967990875244, + "learning_rate": 8.616201934590379e-06, + "loss": 0.6513, + "step": 4189 + }, + { + "epoch": 0.27, + "grad_norm": 0.9194620251655579, + "learning_rate": 8.615493310952852e-06, + "loss": 0.6536, + "step": 4190 + }, + { + "epoch": 0.27, + "grad_norm": 0.888721227645874, + "learning_rate": 8.614784535079482e-06, + "loss": 0.606, + "step": 4191 + }, + { + "epoch": 0.27, + "grad_norm": 0.9047959446907043, + "learning_rate": 8.614075607000108e-06, + "loss": 0.6485, + "step": 4192 + }, + { + "epoch": 0.27, + "grad_norm": 0.9056040644645691, + "learning_rate": 8.613366526744584e-06, + "loss": 0.5843, + "step": 4193 + }, + { + "epoch": 0.27, + "grad_norm": 0.9224606156349182, + "learning_rate": 8.612657294342765e-06, + "loss": 0.5978, + "step": 4194 + }, + { + "epoch": 0.27, + "grad_norm": 0.9035705327987671, + "learning_rate": 8.611947909824514e-06, + "loss": 0.651, + "step": 4195 + }, + { + "epoch": 0.27, + "grad_norm": 0.8923839330673218, + "learning_rate": 8.611238373219703e-06, + "loss": 0.5926, + "step": 4196 + }, + { + "epoch": 0.27, + "grad_norm": 0.9223050475120544, + "learning_rate": 8.610528684558206e-06, + "loss": 0.5893, + "step": 4197 + }, + { + "epoch": 0.27, + "grad_norm": 0.9211618900299072, + "learning_rate": 8.609818843869907e-06, + "loss": 0.6018, + "step": 4198 + }, + { + "epoch": 0.27, + "grad_norm": 0.8177082538604736, + "learning_rate": 8.609108851184693e-06, + "loss": 0.587, + "step": 4199 + }, + { + "epoch": 0.27, + "grad_norm": 0.8298165202140808, + "learning_rate": 8.608398706532462e-06, + "loss": 0.6308, + "step": 4200 + }, + { + "epoch": 0.27, + "grad_norm": 0.8628758192062378, + "learning_rate": 8.607688409943112e-06, + "loss": 0.5662, + "step": 4201 + }, + { + "epoch": 0.27, + "grad_norm": 0.8658290505409241, + "learning_rate": 8.606977961446554e-06, + "loss": 0.6113, + "step": 4202 + }, + { + "epoch": 0.27, + "grad_norm": 0.9051910638809204, + "learning_rate": 8.606267361072704e-06, + "loss": 0.6256, + "step": 4203 + }, + { + "epoch": 0.27, + "grad_norm": 0.8783097267150879, + "learning_rate": 8.605556608851478e-06, + "loss": 0.6607, + "step": 4204 + }, + { + "epoch": 0.27, + "grad_norm": 0.9676861763000488, + "learning_rate": 8.604845704812808e-06, + "loss": 0.6564, + "step": 4205 + }, + { + "epoch": 0.27, + "grad_norm": 0.9138243198394775, + "learning_rate": 8.604134648986625e-06, + "loss": 0.5926, + "step": 4206 + }, + { + "epoch": 0.27, + "grad_norm": 0.9041840434074402, + "learning_rate": 8.603423441402868e-06, + "loss": 0.6202, + "step": 4207 + }, + { + "epoch": 0.27, + "grad_norm": 0.8703333735466003, + "learning_rate": 8.602712082091487e-06, + "loss": 0.573, + "step": 4208 + }, + { + "epoch": 0.27, + "grad_norm": 0.9118040204048157, + "learning_rate": 8.602000571082432e-06, + "loss": 0.6348, + "step": 4209 + }, + { + "epoch": 0.27, + "grad_norm": 0.9517326354980469, + "learning_rate": 8.601288908405665e-06, + "loss": 0.622, + "step": 4210 + }, + { + "epoch": 0.27, + "grad_norm": 0.9293259978294373, + "learning_rate": 8.60057709409115e-06, + "loss": 0.6272, + "step": 4211 + }, + { + "epoch": 0.27, + "grad_norm": 0.8603157997131348, + "learning_rate": 8.599865128168858e-06, + "loss": 0.5833, + "step": 4212 + }, + { + "epoch": 0.27, + "grad_norm": 0.8905279040336609, + "learning_rate": 8.599153010668768e-06, + "loss": 0.5917, + "step": 4213 + }, + { + "epoch": 0.27, + "grad_norm": 0.9047275185585022, + "learning_rate": 8.598440741620868e-06, + "loss": 0.6405, + "step": 4214 + }, + { + "epoch": 0.27, + "grad_norm": 0.8636517524719238, + "learning_rate": 8.597728321055144e-06, + "loss": 0.5763, + "step": 4215 + }, + { + "epoch": 0.27, + "grad_norm": 0.8629072904586792, + "learning_rate": 8.597015749001596e-06, + "loss": 0.6013, + "step": 4216 + }, + { + "epoch": 0.27, + "grad_norm": 0.8857645988464355, + "learning_rate": 8.59630302549023e-06, + "loss": 0.6191, + "step": 4217 + }, + { + "epoch": 0.27, + "grad_norm": 0.9491539597511292, + "learning_rate": 8.595590150551052e-06, + "loss": 0.6271, + "step": 4218 + }, + { + "epoch": 0.27, + "grad_norm": 0.9557621479034424, + "learning_rate": 8.59487712421408e-06, + "loss": 0.6135, + "step": 4219 + }, + { + "epoch": 0.27, + "grad_norm": 0.9056437611579895, + "learning_rate": 8.594163946509339e-06, + "loss": 0.6211, + "step": 4220 + }, + { + "epoch": 0.27, + "grad_norm": 0.8638589978218079, + "learning_rate": 8.593450617466859e-06, + "loss": 0.5999, + "step": 4221 + }, + { + "epoch": 0.27, + "grad_norm": 0.9568116664886475, + "learning_rate": 8.592737137116673e-06, + "loss": 0.6038, + "step": 4222 + }, + { + "epoch": 0.27, + "grad_norm": 0.9060722589492798, + "learning_rate": 8.592023505488825e-06, + "loss": 0.6373, + "step": 4223 + }, + { + "epoch": 0.27, + "grad_norm": 0.7833012342453003, + "learning_rate": 8.591309722613362e-06, + "loss": 0.569, + "step": 4224 + }, + { + "epoch": 0.27, + "grad_norm": 0.9138297438621521, + "learning_rate": 8.590595788520342e-06, + "loss": 0.5829, + "step": 4225 + }, + { + "epoch": 0.27, + "grad_norm": 0.8410037755966187, + "learning_rate": 8.589881703239821e-06, + "loss": 0.5491, + "step": 4226 + }, + { + "epoch": 0.27, + "grad_norm": 0.8916024565696716, + "learning_rate": 8.58916746680187e-06, + "loss": 0.6094, + "step": 4227 + }, + { + "epoch": 0.27, + "grad_norm": 0.9920042157173157, + "learning_rate": 8.588453079236565e-06, + "loss": 0.6644, + "step": 4228 + }, + { + "epoch": 0.27, + "grad_norm": 0.9212594032287598, + "learning_rate": 8.587738540573984e-06, + "loss": 0.5878, + "step": 4229 + }, + { + "epoch": 0.27, + "grad_norm": 0.8286495804786682, + "learning_rate": 8.587023850844212e-06, + "loss": 0.6002, + "step": 4230 + }, + { + "epoch": 0.27, + "grad_norm": 0.8914030194282532, + "learning_rate": 8.586309010077345e-06, + "loss": 0.6672, + "step": 4231 + }, + { + "epoch": 0.27, + "grad_norm": 0.8013595342636108, + "learning_rate": 8.585594018303482e-06, + "loss": 0.6138, + "step": 4232 + }, + { + "epoch": 0.27, + "grad_norm": 0.8565639853477478, + "learning_rate": 8.584878875552727e-06, + "loss": 0.6073, + "step": 4233 + }, + { + "epoch": 0.27, + "grad_norm": 0.818520188331604, + "learning_rate": 8.584163581855194e-06, + "loss": 0.6158, + "step": 4234 + }, + { + "epoch": 0.27, + "grad_norm": 0.9362378120422363, + "learning_rate": 8.583448137241002e-06, + "loss": 0.629, + "step": 4235 + }, + { + "epoch": 0.27, + "grad_norm": 0.9456666111946106, + "learning_rate": 8.582732541740273e-06, + "loss": 0.617, + "step": 4236 + }, + { + "epoch": 0.27, + "grad_norm": 0.8908970952033997, + "learning_rate": 8.582016795383142e-06, + "loss": 0.5931, + "step": 4237 + }, + { + "epoch": 0.27, + "grad_norm": 0.8807900547981262, + "learning_rate": 8.581300898199743e-06, + "loss": 0.5685, + "step": 4238 + }, + { + "epoch": 0.27, + "grad_norm": 0.8527096509933472, + "learning_rate": 8.580584850220222e-06, + "loss": 0.6016, + "step": 4239 + }, + { + "epoch": 0.27, + "grad_norm": 0.942776620388031, + "learning_rate": 8.57986865147473e-06, + "loss": 0.5871, + "step": 4240 + }, + { + "epoch": 0.27, + "grad_norm": 0.9495031237602234, + "learning_rate": 8.57915230199342e-06, + "loss": 0.6078, + "step": 4241 + }, + { + "epoch": 0.27, + "grad_norm": 0.9065079092979431, + "learning_rate": 8.578435801806461e-06, + "loss": 0.6451, + "step": 4242 + }, + { + "epoch": 0.27, + "grad_norm": 0.8677025437355042, + "learning_rate": 8.577719150944017e-06, + "loss": 0.6228, + "step": 4243 + }, + { + "epoch": 0.27, + "grad_norm": 0.9314882755279541, + "learning_rate": 8.577002349436264e-06, + "loss": 0.5969, + "step": 4244 + }, + { + "epoch": 0.27, + "grad_norm": 1.0232270956039429, + "learning_rate": 8.57628539731339e-06, + "loss": 0.6652, + "step": 4245 + }, + { + "epoch": 0.27, + "grad_norm": 0.8840213418006897, + "learning_rate": 8.575568294605574e-06, + "loss": 0.6591, + "step": 4246 + }, + { + "epoch": 0.27, + "grad_norm": 0.935551643371582, + "learning_rate": 8.574851041343018e-06, + "loss": 0.5936, + "step": 4247 + }, + { + "epoch": 0.27, + "grad_norm": 0.9176490902900696, + "learning_rate": 8.574133637555921e-06, + "loss": 0.6103, + "step": 4248 + }, + { + "epoch": 0.27, + "grad_norm": 0.8537380695343018, + "learning_rate": 8.57341608327449e-06, + "loss": 0.5831, + "step": 4249 + }, + { + "epoch": 0.27, + "grad_norm": 0.8898982405662537, + "learning_rate": 8.572698378528937e-06, + "loss": 0.6522, + "step": 4250 + }, + { + "epoch": 0.27, + "grad_norm": 0.8428791761398315, + "learning_rate": 8.571980523349485e-06, + "loss": 0.6097, + "step": 4251 + }, + { + "epoch": 0.27, + "grad_norm": 0.9399141669273376, + "learning_rate": 8.57126251776636e-06, + "loss": 0.6514, + "step": 4252 + }, + { + "epoch": 0.27, + "grad_norm": 0.9143974781036377, + "learning_rate": 8.570544361809792e-06, + "loss": 0.6807, + "step": 4253 + }, + { + "epoch": 0.27, + "grad_norm": 0.9048095941543579, + "learning_rate": 8.569826055510025e-06, + "loss": 0.5986, + "step": 4254 + }, + { + "epoch": 0.27, + "grad_norm": 0.8654329776763916, + "learning_rate": 8.569107598897296e-06, + "loss": 0.5274, + "step": 4255 + }, + { + "epoch": 0.27, + "grad_norm": 0.9597179293632507, + "learning_rate": 8.568388992001868e-06, + "loss": 0.5958, + "step": 4256 + }, + { + "epoch": 0.27, + "grad_norm": 0.8860706090927124, + "learning_rate": 8.56767023485399e-06, + "loss": 0.5915, + "step": 4257 + }, + { + "epoch": 0.27, + "grad_norm": 0.8715736269950867, + "learning_rate": 8.56695132748393e-06, + "loss": 0.6533, + "step": 4258 + }, + { + "epoch": 0.27, + "grad_norm": 0.9161938428878784, + "learning_rate": 8.566232269921957e-06, + "loss": 0.7043, + "step": 4259 + }, + { + "epoch": 0.27, + "grad_norm": 0.8063193559646606, + "learning_rate": 8.565513062198351e-06, + "loss": 0.6129, + "step": 4260 + }, + { + "epoch": 0.27, + "grad_norm": 0.9243603944778442, + "learning_rate": 8.564793704343392e-06, + "loss": 0.5744, + "step": 4261 + }, + { + "epoch": 0.27, + "grad_norm": 0.9865625500679016, + "learning_rate": 8.564074196387371e-06, + "loss": 0.6796, + "step": 4262 + }, + { + "epoch": 0.27, + "grad_norm": 0.8942394256591797, + "learning_rate": 8.563354538360585e-06, + "loss": 0.6083, + "step": 4263 + }, + { + "epoch": 0.27, + "grad_norm": 0.9242582321166992, + "learning_rate": 8.562634730293335e-06, + "loss": 0.5982, + "step": 4264 + }, + { + "epoch": 0.27, + "grad_norm": 0.9217408895492554, + "learning_rate": 8.56191477221593e-06, + "loss": 0.6569, + "step": 4265 + }, + { + "epoch": 0.27, + "grad_norm": 0.9580654501914978, + "learning_rate": 8.561194664158685e-06, + "loss": 0.6733, + "step": 4266 + }, + { + "epoch": 0.27, + "grad_norm": 0.9393530488014221, + "learning_rate": 8.560474406151921e-06, + "loss": 0.668, + "step": 4267 + }, + { + "epoch": 0.27, + "grad_norm": 0.9454185962677002, + "learning_rate": 8.559753998225965e-06, + "loss": 0.6592, + "step": 4268 + }, + { + "epoch": 0.27, + "grad_norm": 0.9573476910591125, + "learning_rate": 8.559033440411155e-06, + "loss": 0.5933, + "step": 4269 + }, + { + "epoch": 0.27, + "grad_norm": 0.8191101551055908, + "learning_rate": 8.558312732737825e-06, + "loss": 0.5713, + "step": 4270 + }, + { + "epoch": 0.27, + "grad_norm": 0.8455954790115356, + "learning_rate": 8.557591875236323e-06, + "loss": 0.5984, + "step": 4271 + }, + { + "epoch": 0.27, + "grad_norm": 0.8731233477592468, + "learning_rate": 8.556870867937006e-06, + "loss": 0.5876, + "step": 4272 + }, + { + "epoch": 0.27, + "grad_norm": 0.8729333281517029, + "learning_rate": 8.55614971087023e-06, + "loss": 0.6102, + "step": 4273 + }, + { + "epoch": 0.27, + "grad_norm": 0.9293901324272156, + "learning_rate": 8.555428404066359e-06, + "loss": 0.6141, + "step": 4274 + }, + { + "epoch": 0.27, + "grad_norm": 0.8134398460388184, + "learning_rate": 8.554706947555766e-06, + "loss": 0.5814, + "step": 4275 + }, + { + "epoch": 0.27, + "grad_norm": 0.9086621999740601, + "learning_rate": 8.553985341368832e-06, + "loss": 0.6756, + "step": 4276 + }, + { + "epoch": 0.27, + "grad_norm": 0.8340302109718323, + "learning_rate": 8.553263585535937e-06, + "loss": 0.6272, + "step": 4277 + }, + { + "epoch": 0.27, + "grad_norm": 0.9644330143928528, + "learning_rate": 8.552541680087472e-06, + "loss": 0.611, + "step": 4278 + }, + { + "epoch": 0.27, + "grad_norm": 0.9474432468414307, + "learning_rate": 8.551819625053837e-06, + "loss": 0.6581, + "step": 4279 + }, + { + "epoch": 0.27, + "grad_norm": 0.8727615475654602, + "learning_rate": 8.551097420465432e-06, + "loss": 0.6059, + "step": 4280 + }, + { + "epoch": 0.27, + "grad_norm": 0.9292715191841125, + "learning_rate": 8.55037506635267e-06, + "loss": 0.5987, + "step": 4281 + }, + { + "epoch": 0.27, + "grad_norm": 0.8968216180801392, + "learning_rate": 8.549652562745963e-06, + "loss": 0.6109, + "step": 4282 + }, + { + "epoch": 0.27, + "grad_norm": 0.919104278087616, + "learning_rate": 8.548929909675736e-06, + "loss": 0.6043, + "step": 4283 + }, + { + "epoch": 0.27, + "grad_norm": 0.963595449924469, + "learning_rate": 8.548207107172417e-06, + "loss": 0.6421, + "step": 4284 + }, + { + "epoch": 0.27, + "grad_norm": 0.9195282459259033, + "learning_rate": 8.547484155266439e-06, + "loss": 0.6284, + "step": 4285 + }, + { + "epoch": 0.27, + "grad_norm": 0.9050331711769104, + "learning_rate": 8.546761053988244e-06, + "loss": 0.6787, + "step": 4286 + }, + { + "epoch": 0.27, + "grad_norm": 0.8294732570648193, + "learning_rate": 8.546037803368279e-06, + "loss": 0.5982, + "step": 4287 + }, + { + "epoch": 0.27, + "grad_norm": 0.8532490134239197, + "learning_rate": 8.545314403436998e-06, + "loss": 0.5664, + "step": 4288 + }, + { + "epoch": 0.27, + "grad_norm": 0.9732022881507874, + "learning_rate": 8.54459085422486e-06, + "loss": 0.6, + "step": 4289 + }, + { + "epoch": 0.27, + "grad_norm": 0.9613706469535828, + "learning_rate": 8.543867155762335e-06, + "loss": 0.6525, + "step": 4290 + }, + { + "epoch": 0.27, + "grad_norm": 0.9835689663887024, + "learning_rate": 8.543143308079888e-06, + "loss": 0.6368, + "step": 4291 + }, + { + "epoch": 0.27, + "grad_norm": 0.857182502746582, + "learning_rate": 8.542419311208006e-06, + "loss": 0.6265, + "step": 4292 + }, + { + "epoch": 0.27, + "grad_norm": 0.8491384983062744, + "learning_rate": 8.541695165177169e-06, + "loss": 0.664, + "step": 4293 + }, + { + "epoch": 0.27, + "grad_norm": 0.9267544150352478, + "learning_rate": 8.540970870017867e-06, + "loss": 0.6202, + "step": 4294 + }, + { + "epoch": 0.27, + "grad_norm": 0.9041336178779602, + "learning_rate": 8.540246425760602e-06, + "loss": 0.5934, + "step": 4295 + }, + { + "epoch": 0.27, + "grad_norm": 0.9102574586868286, + "learning_rate": 8.539521832435874e-06, + "loss": 0.5931, + "step": 4296 + }, + { + "epoch": 0.27, + "grad_norm": 0.8750420212745667, + "learning_rate": 8.538797090074196e-06, + "loss": 0.6128, + "step": 4297 + }, + { + "epoch": 0.27, + "grad_norm": 0.9216861724853516, + "learning_rate": 8.538072198706081e-06, + "loss": 0.6311, + "step": 4298 + }, + { + "epoch": 0.27, + "grad_norm": 0.8805850744247437, + "learning_rate": 8.537347158362056e-06, + "loss": 0.58, + "step": 4299 + }, + { + "epoch": 0.27, + "grad_norm": 0.8909803032875061, + "learning_rate": 8.536621969072648e-06, + "loss": 0.607, + "step": 4300 + }, + { + "epoch": 0.27, + "grad_norm": 0.9267565608024597, + "learning_rate": 8.53589663086839e-06, + "loss": 0.6457, + "step": 4301 + }, + { + "epoch": 0.27, + "grad_norm": 0.9968888759613037, + "learning_rate": 8.535171143779828e-06, + "loss": 0.6252, + "step": 4302 + }, + { + "epoch": 0.27, + "grad_norm": 0.8970872163772583, + "learning_rate": 8.534445507837505e-06, + "loss": 0.6065, + "step": 4303 + }, + { + "epoch": 0.27, + "grad_norm": 0.9261126518249512, + "learning_rate": 8.533719723071979e-06, + "loss": 0.6377, + "step": 4304 + }, + { + "epoch": 0.27, + "grad_norm": 0.9060932993888855, + "learning_rate": 8.532993789513805e-06, + "loss": 0.6167, + "step": 4305 + }, + { + "epoch": 0.27, + "grad_norm": 0.9795500636100769, + "learning_rate": 8.532267707193555e-06, + "loss": 0.6384, + "step": 4306 + }, + { + "epoch": 0.27, + "grad_norm": 0.8952150940895081, + "learning_rate": 8.5315414761418e-06, + "loss": 0.6448, + "step": 4307 + }, + { + "epoch": 0.27, + "grad_norm": 0.9257222414016724, + "learning_rate": 8.530815096389118e-06, + "loss": 0.5725, + "step": 4308 + }, + { + "epoch": 0.27, + "grad_norm": 0.871077299118042, + "learning_rate": 8.530088567966095e-06, + "loss": 0.6262, + "step": 4309 + }, + { + "epoch": 0.27, + "grad_norm": 0.8593372702598572, + "learning_rate": 8.529361890903323e-06, + "loss": 0.5855, + "step": 4310 + }, + { + "epoch": 0.27, + "grad_norm": 0.9580448865890503, + "learning_rate": 8.5286350652314e-06, + "loss": 0.6397, + "step": 4311 + }, + { + "epoch": 0.27, + "grad_norm": 0.8802589774131775, + "learning_rate": 8.527908090980929e-06, + "loss": 0.6593, + "step": 4312 + }, + { + "epoch": 0.27, + "grad_norm": 0.9041280746459961, + "learning_rate": 8.527180968182522e-06, + "loss": 0.5961, + "step": 4313 + }, + { + "epoch": 0.27, + "grad_norm": 0.8729889988899231, + "learning_rate": 8.526453696866794e-06, + "loss": 0.6, + "step": 4314 + }, + { + "epoch": 0.27, + "grad_norm": 0.8576443195343018, + "learning_rate": 8.525726277064368e-06, + "loss": 0.5911, + "step": 4315 + }, + { + "epoch": 0.27, + "grad_norm": 0.8359036445617676, + "learning_rate": 8.524998708805874e-06, + "loss": 0.5723, + "step": 4316 + }, + { + "epoch": 0.27, + "grad_norm": 0.8947839736938477, + "learning_rate": 8.524270992121948e-06, + "loss": 0.6163, + "step": 4317 + }, + { + "epoch": 0.27, + "grad_norm": 0.9303499460220337, + "learning_rate": 8.523543127043228e-06, + "loss": 0.6144, + "step": 4318 + }, + { + "epoch": 0.27, + "grad_norm": 0.8773894309997559, + "learning_rate": 8.522815113600366e-06, + "loss": 0.5884, + "step": 4319 + }, + { + "epoch": 0.27, + "grad_norm": 0.9222464561462402, + "learning_rate": 8.522086951824014e-06, + "loss": 0.6819, + "step": 4320 + }, + { + "epoch": 0.27, + "grad_norm": 0.8709927797317505, + "learning_rate": 8.521358641744834e-06, + "loss": 0.5886, + "step": 4321 + }, + { + "epoch": 0.27, + "grad_norm": 0.8806871175765991, + "learning_rate": 8.520630183393492e-06, + "loss": 0.616, + "step": 4322 + }, + { + "epoch": 0.27, + "grad_norm": 0.9203693866729736, + "learning_rate": 8.519901576800657e-06, + "loss": 0.6442, + "step": 4323 + }, + { + "epoch": 0.27, + "grad_norm": 0.9157525300979614, + "learning_rate": 8.519172821997015e-06, + "loss": 0.57, + "step": 4324 + }, + { + "epoch": 0.27, + "grad_norm": 0.8757469058036804, + "learning_rate": 8.518443919013247e-06, + "loss": 0.625, + "step": 4325 + }, + { + "epoch": 0.27, + "grad_norm": 0.8523043394088745, + "learning_rate": 8.517714867880044e-06, + "loss": 0.5748, + "step": 4326 + }, + { + "epoch": 0.27, + "grad_norm": 0.8662055730819702, + "learning_rate": 8.516985668628105e-06, + "loss": 0.5595, + "step": 4327 + }, + { + "epoch": 0.27, + "grad_norm": 0.8649899363517761, + "learning_rate": 8.516256321288136e-06, + "loss": 0.5697, + "step": 4328 + }, + { + "epoch": 0.27, + "grad_norm": 0.8986943960189819, + "learning_rate": 8.515526825890845e-06, + "loss": 0.5607, + "step": 4329 + }, + { + "epoch": 0.27, + "grad_norm": 0.9603455066680908, + "learning_rate": 8.514797182466948e-06, + "loss": 0.5942, + "step": 4330 + }, + { + "epoch": 0.27, + "grad_norm": 0.9389190673828125, + "learning_rate": 8.51406739104717e-06, + "loss": 0.6389, + "step": 4331 + }, + { + "epoch": 0.27, + "grad_norm": 0.9618402123451233, + "learning_rate": 8.513337451662238e-06, + "loss": 0.6588, + "step": 4332 + }, + { + "epoch": 0.27, + "grad_norm": 0.9515010118484497, + "learning_rate": 8.512607364342887e-06, + "loss": 0.6097, + "step": 4333 + }, + { + "epoch": 0.27, + "grad_norm": 0.8656193017959595, + "learning_rate": 8.51187712911986e-06, + "loss": 0.597, + "step": 4334 + }, + { + "epoch": 0.27, + "grad_norm": 0.9110217094421387, + "learning_rate": 8.511146746023905e-06, + "loss": 0.5888, + "step": 4335 + }, + { + "epoch": 0.27, + "grad_norm": 0.8885056376457214, + "learning_rate": 8.510416215085775e-06, + "loss": 0.6293, + "step": 4336 + }, + { + "epoch": 0.27, + "grad_norm": 0.8254531621932983, + "learning_rate": 8.509685536336229e-06, + "loss": 0.5644, + "step": 4337 + }, + { + "epoch": 0.27, + "grad_norm": 0.8862583041191101, + "learning_rate": 8.508954709806034e-06, + "loss": 0.633, + "step": 4338 + }, + { + "epoch": 0.27, + "grad_norm": 0.9127135872840881, + "learning_rate": 8.508223735525963e-06, + "loss": 0.624, + "step": 4339 + }, + { + "epoch": 0.27, + "grad_norm": 0.9787098169326782, + "learning_rate": 8.507492613526795e-06, + "loss": 0.6342, + "step": 4340 + }, + { + "epoch": 0.28, + "grad_norm": 0.844140887260437, + "learning_rate": 8.506761343839316e-06, + "loss": 0.6042, + "step": 4341 + }, + { + "epoch": 0.28, + "grad_norm": 0.9551699757575989, + "learning_rate": 8.506029926494315e-06, + "loss": 0.6294, + "step": 4342 + }, + { + "epoch": 0.28, + "grad_norm": 0.8815372586250305, + "learning_rate": 8.50529836152259e-06, + "loss": 0.678, + "step": 4343 + }, + { + "epoch": 0.28, + "grad_norm": 0.841645359992981, + "learning_rate": 8.504566648954947e-06, + "loss": 0.5792, + "step": 4344 + }, + { + "epoch": 0.28, + "grad_norm": 0.8906237483024597, + "learning_rate": 8.503834788822191e-06, + "loss": 0.6074, + "step": 4345 + }, + { + "epoch": 0.28, + "grad_norm": 0.871210515499115, + "learning_rate": 8.503102781155141e-06, + "loss": 0.5929, + "step": 4346 + }, + { + "epoch": 0.28, + "grad_norm": 0.823668897151947, + "learning_rate": 8.502370625984622e-06, + "loss": 0.5886, + "step": 4347 + }, + { + "epoch": 0.28, + "grad_norm": 0.9484293460845947, + "learning_rate": 8.501638323341459e-06, + "loss": 0.6557, + "step": 4348 + }, + { + "epoch": 0.28, + "grad_norm": 0.8655977249145508, + "learning_rate": 8.500905873256486e-06, + "loss": 0.5899, + "step": 4349 + }, + { + "epoch": 0.28, + "grad_norm": 0.9463286399841309, + "learning_rate": 8.500173275760546e-06, + "loss": 0.6128, + "step": 4350 + }, + { + "epoch": 0.28, + "grad_norm": 0.8562267422676086, + "learning_rate": 8.499440530884486e-06, + "loss": 0.5932, + "step": 4351 + }, + { + "epoch": 0.28, + "grad_norm": 0.9182244539260864, + "learning_rate": 8.498707638659159e-06, + "loss": 0.6024, + "step": 4352 + }, + { + "epoch": 0.28, + "grad_norm": 0.8319056034088135, + "learning_rate": 8.497974599115424e-06, + "loss": 0.5626, + "step": 4353 + }, + { + "epoch": 0.28, + "grad_norm": 0.9287349581718445, + "learning_rate": 8.497241412284147e-06, + "loss": 0.6092, + "step": 4354 + }, + { + "epoch": 0.28, + "grad_norm": 0.8886022567749023, + "learning_rate": 8.496508078196202e-06, + "loss": 0.6414, + "step": 4355 + }, + { + "epoch": 0.28, + "grad_norm": 0.916700005531311, + "learning_rate": 8.495774596882462e-06, + "loss": 0.5731, + "step": 4356 + }, + { + "epoch": 0.28, + "grad_norm": 0.8386786580085754, + "learning_rate": 8.495040968373817e-06, + "loss": 0.6356, + "step": 4357 + }, + { + "epoch": 0.28, + "grad_norm": 0.8589484095573425, + "learning_rate": 8.494307192701154e-06, + "loss": 0.5783, + "step": 4358 + }, + { + "epoch": 0.28, + "grad_norm": 0.882973849773407, + "learning_rate": 8.493573269895372e-06, + "loss": 0.5763, + "step": 4359 + }, + { + "epoch": 0.28, + "grad_norm": 0.8396306037902832, + "learning_rate": 8.492839199987373e-06, + "loss": 0.5836, + "step": 4360 + }, + { + "epoch": 0.28, + "grad_norm": 0.8653340935707092, + "learning_rate": 8.492104983008065e-06, + "loss": 0.5815, + "step": 4361 + }, + { + "epoch": 0.28, + "grad_norm": 0.8777982592582703, + "learning_rate": 8.491370618988367e-06, + "loss": 0.5753, + "step": 4362 + }, + { + "epoch": 0.28, + "grad_norm": 0.9289289116859436, + "learning_rate": 8.490636107959194e-06, + "loss": 0.5963, + "step": 4363 + }, + { + "epoch": 0.28, + "grad_norm": 0.9735289216041565, + "learning_rate": 8.489901449951478e-06, + "loss": 0.6477, + "step": 4364 + }, + { + "epoch": 0.28, + "grad_norm": 0.9543069005012512, + "learning_rate": 8.489166644996154e-06, + "loss": 0.6315, + "step": 4365 + }, + { + "epoch": 0.28, + "grad_norm": 0.9369723200798035, + "learning_rate": 8.48843169312416e-06, + "loss": 0.6351, + "step": 4366 + }, + { + "epoch": 0.28, + "grad_norm": 0.9586085677146912, + "learning_rate": 8.487696594366444e-06, + "loss": 0.6317, + "step": 4367 + }, + { + "epoch": 0.28, + "grad_norm": 0.8085949420928955, + "learning_rate": 8.486961348753954e-06, + "loss": 0.5292, + "step": 4368 + }, + { + "epoch": 0.28, + "grad_norm": 0.9245449900627136, + "learning_rate": 8.486225956317655e-06, + "loss": 0.6529, + "step": 4369 + }, + { + "epoch": 0.28, + "grad_norm": 0.8826268315315247, + "learning_rate": 8.48549041708851e-06, + "loss": 0.5822, + "step": 4370 + }, + { + "epoch": 0.28, + "grad_norm": 0.8296921253204346, + "learning_rate": 8.484754731097484e-06, + "loss": 0.565, + "step": 4371 + }, + { + "epoch": 0.28, + "grad_norm": 0.8971067667007446, + "learning_rate": 8.484018898375561e-06, + "loss": 0.606, + "step": 4372 + }, + { + "epoch": 0.28, + "grad_norm": 0.8723403215408325, + "learning_rate": 8.483282918953723e-06, + "loss": 0.6579, + "step": 4373 + }, + { + "epoch": 0.28, + "grad_norm": 0.9097625613212585, + "learning_rate": 8.482546792862957e-06, + "loss": 0.6365, + "step": 4374 + }, + { + "epoch": 0.28, + "grad_norm": 0.8853545784950256, + "learning_rate": 8.481810520134262e-06, + "loss": 0.5961, + "step": 4375 + }, + { + "epoch": 0.28, + "grad_norm": 0.8926584124565125, + "learning_rate": 8.481074100798638e-06, + "loss": 0.6374, + "step": 4376 + }, + { + "epoch": 0.28, + "grad_norm": 0.9190264940261841, + "learning_rate": 8.480337534887093e-06, + "loss": 0.6332, + "step": 4377 + }, + { + "epoch": 0.28, + "grad_norm": 0.9103266596794128, + "learning_rate": 8.479600822430642e-06, + "loss": 0.6575, + "step": 4378 + }, + { + "epoch": 0.28, + "grad_norm": 0.8518051505088806, + "learning_rate": 8.478863963460306e-06, + "loss": 0.5637, + "step": 4379 + }, + { + "epoch": 0.28, + "grad_norm": 0.8869740962982178, + "learning_rate": 8.478126958007108e-06, + "loss": 0.6089, + "step": 4380 + }, + { + "epoch": 0.28, + "grad_norm": 0.8450909852981567, + "learning_rate": 8.477389806102085e-06, + "loss": 0.6446, + "step": 4381 + }, + { + "epoch": 0.28, + "grad_norm": 0.9005980491638184, + "learning_rate": 8.476652507776274e-06, + "loss": 0.5715, + "step": 4382 + }, + { + "epoch": 0.28, + "grad_norm": 0.8654862642288208, + "learning_rate": 8.475915063060721e-06, + "loss": 0.6625, + "step": 4383 + }, + { + "epoch": 0.28, + "grad_norm": 0.9093218445777893, + "learning_rate": 8.475177471986476e-06, + "loss": 0.6045, + "step": 4384 + }, + { + "epoch": 0.28, + "grad_norm": 0.9266924858093262, + "learning_rate": 8.474439734584597e-06, + "loss": 0.611, + "step": 4385 + }, + { + "epoch": 0.28, + "grad_norm": 0.9059037566184998, + "learning_rate": 8.473701850886147e-06, + "loss": 0.6082, + "step": 4386 + }, + { + "epoch": 0.28, + "grad_norm": 0.8820655941963196, + "learning_rate": 8.472963820922195e-06, + "loss": 0.5618, + "step": 4387 + }, + { + "epoch": 0.28, + "grad_norm": 0.9292760491371155, + "learning_rate": 8.47222564472382e-06, + "loss": 0.636, + "step": 4388 + }, + { + "epoch": 0.28, + "grad_norm": 0.8835957050323486, + "learning_rate": 8.471487322322101e-06, + "loss": 0.5778, + "step": 4389 + }, + { + "epoch": 0.28, + "grad_norm": 0.8266465067863464, + "learning_rate": 8.47074885374813e-06, + "loss": 0.6343, + "step": 4390 + }, + { + "epoch": 0.28, + "grad_norm": 0.894709587097168, + "learning_rate": 8.470010239032995e-06, + "loss": 0.6356, + "step": 4391 + }, + { + "epoch": 0.28, + "grad_norm": 0.8928598761558533, + "learning_rate": 8.469271478207801e-06, + "loss": 0.5714, + "step": 4392 + }, + { + "epoch": 0.28, + "grad_norm": 0.8108189702033997, + "learning_rate": 8.468532571303655e-06, + "loss": 0.5671, + "step": 4393 + }, + { + "epoch": 0.28, + "grad_norm": 0.9048933386802673, + "learning_rate": 8.467793518351668e-06, + "loss": 0.6443, + "step": 4394 + }, + { + "epoch": 0.28, + "grad_norm": 0.9767211675643921, + "learning_rate": 8.46705431938296e-06, + "loss": 0.6349, + "step": 4395 + }, + { + "epoch": 0.28, + "grad_norm": 0.8677191138267517, + "learning_rate": 8.466314974428655e-06, + "loss": 0.6328, + "step": 4396 + }, + { + "epoch": 0.28, + "grad_norm": 0.8989687561988831, + "learning_rate": 8.465575483519883e-06, + "loss": 0.5977, + "step": 4397 + }, + { + "epoch": 0.28, + "grad_norm": 0.8818314075469971, + "learning_rate": 8.464835846687786e-06, + "loss": 0.6441, + "step": 4398 + }, + { + "epoch": 0.28, + "grad_norm": 0.8356281518936157, + "learning_rate": 8.464096063963503e-06, + "loss": 0.5723, + "step": 4399 + }, + { + "epoch": 0.28, + "grad_norm": 0.9221736192703247, + "learning_rate": 8.463356135378187e-06, + "loss": 0.5863, + "step": 4400 + }, + { + "epoch": 0.28, + "grad_norm": 0.9067344069480896, + "learning_rate": 8.462616060962992e-06, + "loss": 0.6029, + "step": 4401 + }, + { + "epoch": 0.28, + "grad_norm": 0.9068452715873718, + "learning_rate": 8.46187584074908e-06, + "loss": 0.6686, + "step": 4402 + }, + { + "epoch": 0.28, + "grad_norm": 0.8604983687400818, + "learning_rate": 8.461135474767618e-06, + "loss": 0.6051, + "step": 4403 + }, + { + "epoch": 0.28, + "grad_norm": 0.969758152961731, + "learning_rate": 8.460394963049784e-06, + "loss": 0.6334, + "step": 4404 + }, + { + "epoch": 0.28, + "grad_norm": 0.8745808005332947, + "learning_rate": 8.459654305626754e-06, + "loss": 0.6052, + "step": 4405 + }, + { + "epoch": 0.28, + "grad_norm": 0.8724889755249023, + "learning_rate": 8.458913502529718e-06, + "loss": 0.6038, + "step": 4406 + }, + { + "epoch": 0.28, + "grad_norm": 0.977708101272583, + "learning_rate": 8.458172553789866e-06, + "loss": 0.646, + "step": 4407 + }, + { + "epoch": 0.28, + "grad_norm": 0.900845468044281, + "learning_rate": 8.457431459438398e-06, + "loss": 0.6228, + "step": 4408 + }, + { + "epoch": 0.28, + "grad_norm": 0.9241088032722473, + "learning_rate": 8.456690219506519e-06, + "loss": 0.5887, + "step": 4409 + }, + { + "epoch": 0.28, + "grad_norm": 0.8947976231575012, + "learning_rate": 8.45594883402544e-06, + "loss": 0.6179, + "step": 4410 + }, + { + "epoch": 0.28, + "grad_norm": 0.9319069385528564, + "learning_rate": 8.455207303026378e-06, + "loss": 0.6356, + "step": 4411 + }, + { + "epoch": 0.28, + "grad_norm": 0.8791349530220032, + "learning_rate": 8.454465626540555e-06, + "loss": 0.5906, + "step": 4412 + }, + { + "epoch": 0.28, + "grad_norm": 0.9056016802787781, + "learning_rate": 8.453723804599203e-06, + "loss": 0.6095, + "step": 4413 + }, + { + "epoch": 0.28, + "grad_norm": 0.9093009233474731, + "learning_rate": 8.452981837233555e-06, + "loss": 0.6442, + "step": 4414 + }, + { + "epoch": 0.28, + "grad_norm": 0.9653396010398865, + "learning_rate": 8.452239724474856e-06, + "loss": 0.6397, + "step": 4415 + }, + { + "epoch": 0.28, + "grad_norm": 0.9115119576454163, + "learning_rate": 8.451497466354349e-06, + "loss": 0.5723, + "step": 4416 + }, + { + "epoch": 0.28, + "grad_norm": 0.9298482537269592, + "learning_rate": 8.450755062903293e-06, + "loss": 0.6244, + "step": 4417 + }, + { + "epoch": 0.28, + "grad_norm": 0.8901708126068115, + "learning_rate": 8.450012514152943e-06, + "loss": 0.6238, + "step": 4418 + }, + { + "epoch": 0.28, + "grad_norm": 0.8972589373588562, + "learning_rate": 8.44926982013457e-06, + "loss": 0.6162, + "step": 4419 + }, + { + "epoch": 0.28, + "grad_norm": 0.8598697185516357, + "learning_rate": 8.448526980879444e-06, + "loss": 0.5909, + "step": 4420 + }, + { + "epoch": 0.28, + "grad_norm": 1.0167523622512817, + "learning_rate": 8.447783996418843e-06, + "loss": 0.6784, + "step": 4421 + }, + { + "epoch": 0.28, + "grad_norm": 0.8606759905815125, + "learning_rate": 8.447040866784051e-06, + "loss": 0.5985, + "step": 4422 + }, + { + "epoch": 0.28, + "grad_norm": 0.9100238084793091, + "learning_rate": 8.446297592006361e-06, + "loss": 0.5486, + "step": 4423 + }, + { + "epoch": 0.28, + "grad_norm": 0.864998996257782, + "learning_rate": 8.445554172117066e-06, + "loss": 0.6308, + "step": 4424 + }, + { + "epoch": 0.28, + "grad_norm": 0.8984532356262207, + "learning_rate": 8.444810607147472e-06, + "loss": 0.5894, + "step": 4425 + }, + { + "epoch": 0.28, + "grad_norm": 0.8566537499427795, + "learning_rate": 8.444066897128888e-06, + "loss": 0.5764, + "step": 4426 + }, + { + "epoch": 0.28, + "grad_norm": 0.8784050941467285, + "learning_rate": 8.443323042092625e-06, + "loss": 0.5923, + "step": 4427 + }, + { + "epoch": 0.28, + "grad_norm": 0.9064181447029114, + "learning_rate": 8.442579042070011e-06, + "loss": 0.6279, + "step": 4428 + }, + { + "epoch": 0.28, + "grad_norm": 0.8186553120613098, + "learning_rate": 8.441834897092366e-06, + "loss": 0.6041, + "step": 4429 + }, + { + "epoch": 0.28, + "grad_norm": 0.9280451536178589, + "learning_rate": 8.44109060719103e-06, + "loss": 0.5901, + "step": 4430 + }, + { + "epoch": 0.28, + "grad_norm": 0.9555798172950745, + "learning_rate": 8.440346172397338e-06, + "loss": 0.672, + "step": 4431 + }, + { + "epoch": 0.28, + "grad_norm": 0.8926699161529541, + "learning_rate": 8.439601592742637e-06, + "loss": 0.6645, + "step": 4432 + }, + { + "epoch": 0.28, + "grad_norm": 0.8857988119125366, + "learning_rate": 8.438856868258278e-06, + "loss": 0.6439, + "step": 4433 + }, + { + "epoch": 0.28, + "grad_norm": 0.8523682951927185, + "learning_rate": 8.438111998975618e-06, + "loss": 0.6044, + "step": 4434 + }, + { + "epoch": 0.28, + "grad_norm": 0.8690520524978638, + "learning_rate": 8.437366984926023e-06, + "loss": 0.618, + "step": 4435 + }, + { + "epoch": 0.28, + "grad_norm": 0.8861067891120911, + "learning_rate": 8.436621826140863e-06, + "loss": 0.617, + "step": 4436 + }, + { + "epoch": 0.28, + "grad_norm": 0.8998048901557922, + "learning_rate": 8.435876522651512e-06, + "loss": 0.6881, + "step": 4437 + }, + { + "epoch": 0.28, + "grad_norm": 1.0284022092819214, + "learning_rate": 8.435131074489353e-06, + "loss": 0.6871, + "step": 4438 + }, + { + "epoch": 0.28, + "grad_norm": 0.8755271434783936, + "learning_rate": 8.434385481685776e-06, + "loss": 0.5637, + "step": 4439 + }, + { + "epoch": 0.28, + "grad_norm": 0.9131196737289429, + "learning_rate": 8.43363974427217e-06, + "loss": 0.6516, + "step": 4440 + }, + { + "epoch": 0.28, + "grad_norm": 0.8995763063430786, + "learning_rate": 8.432893862279943e-06, + "loss": 0.5847, + "step": 4441 + }, + { + "epoch": 0.28, + "grad_norm": 0.923299252986908, + "learning_rate": 8.432147835740496e-06, + "loss": 0.6213, + "step": 4442 + }, + { + "epoch": 0.28, + "grad_norm": 0.9042030572891235, + "learning_rate": 8.431401664685244e-06, + "loss": 0.6172, + "step": 4443 + }, + { + "epoch": 0.28, + "grad_norm": 0.963955283164978, + "learning_rate": 8.430655349145604e-06, + "loss": 0.6221, + "step": 4444 + }, + { + "epoch": 0.28, + "grad_norm": 0.9096510410308838, + "learning_rate": 8.429908889153003e-06, + "loss": 0.6646, + "step": 4445 + }, + { + "epoch": 0.28, + "grad_norm": 0.8882843852043152, + "learning_rate": 8.429162284738868e-06, + "loss": 0.6382, + "step": 4446 + }, + { + "epoch": 0.28, + "grad_norm": 0.8437566757202148, + "learning_rate": 8.42841553593464e-06, + "loss": 0.6169, + "step": 4447 + }, + { + "epoch": 0.28, + "grad_norm": 0.8963313102722168, + "learning_rate": 8.42766864277176e-06, + "loss": 0.6054, + "step": 4448 + }, + { + "epoch": 0.28, + "grad_norm": 0.8515428900718689, + "learning_rate": 8.426921605281677e-06, + "loss": 0.6261, + "step": 4449 + }, + { + "epoch": 0.28, + "grad_norm": 0.9076332449913025, + "learning_rate": 8.426174423495848e-06, + "loss": 0.6133, + "step": 4450 + }, + { + "epoch": 0.28, + "grad_norm": 0.9798647165298462, + "learning_rate": 8.425427097445733e-06, + "loss": 0.6373, + "step": 4451 + }, + { + "epoch": 0.28, + "grad_norm": 0.8840082883834839, + "learning_rate": 8.424679627162798e-06, + "loss": 0.65, + "step": 4452 + }, + { + "epoch": 0.28, + "grad_norm": 0.8393424153327942, + "learning_rate": 8.423932012678516e-06, + "loss": 0.5844, + "step": 4453 + }, + { + "epoch": 0.28, + "grad_norm": 0.9224118590354919, + "learning_rate": 8.42318425402437e-06, + "loss": 0.6875, + "step": 4454 + }, + { + "epoch": 0.28, + "grad_norm": 0.8217747211456299, + "learning_rate": 8.422436351231843e-06, + "loss": 0.5858, + "step": 4455 + }, + { + "epoch": 0.28, + "grad_norm": 0.8549429774284363, + "learning_rate": 8.421688304332428e-06, + "loss": 0.5739, + "step": 4456 + }, + { + "epoch": 0.28, + "grad_norm": 0.88507080078125, + "learning_rate": 8.42094011335762e-06, + "loss": 0.5718, + "step": 4457 + }, + { + "epoch": 0.28, + "grad_norm": 0.9432583451271057, + "learning_rate": 8.420191778338924e-06, + "loss": 0.5703, + "step": 4458 + }, + { + "epoch": 0.28, + "grad_norm": 0.893008291721344, + "learning_rate": 8.419443299307852e-06, + "loss": 0.6452, + "step": 4459 + }, + { + "epoch": 0.28, + "grad_norm": 0.8943834900856018, + "learning_rate": 8.418694676295918e-06, + "loss": 0.5895, + "step": 4460 + }, + { + "epoch": 0.28, + "grad_norm": 0.8623561859130859, + "learning_rate": 8.417945909334642e-06, + "loss": 0.6079, + "step": 4461 + }, + { + "epoch": 0.28, + "grad_norm": 0.8554010987281799, + "learning_rate": 8.417196998455555e-06, + "loss": 0.6034, + "step": 4462 + }, + { + "epoch": 0.28, + "grad_norm": 0.8964874148368835, + "learning_rate": 8.41644794369019e-06, + "loss": 0.5709, + "step": 4463 + }, + { + "epoch": 0.28, + "grad_norm": 0.8765043616294861, + "learning_rate": 8.415698745070088e-06, + "loss": 0.5924, + "step": 4464 + }, + { + "epoch": 0.28, + "grad_norm": 0.9031361937522888, + "learning_rate": 8.414949402626793e-06, + "loss": 0.644, + "step": 4465 + }, + { + "epoch": 0.28, + "grad_norm": 0.8381129503250122, + "learning_rate": 8.41419991639186e-06, + "loss": 0.5794, + "step": 4466 + }, + { + "epoch": 0.28, + "grad_norm": 0.9276309013366699, + "learning_rate": 8.413450286396845e-06, + "loss": 0.5939, + "step": 4467 + }, + { + "epoch": 0.28, + "grad_norm": 0.821047306060791, + "learning_rate": 8.41270051267331e-06, + "loss": 0.5748, + "step": 4468 + }, + { + "epoch": 0.28, + "grad_norm": 0.8938078880310059, + "learning_rate": 8.411950595252834e-06, + "loss": 0.613, + "step": 4469 + }, + { + "epoch": 0.28, + "grad_norm": 0.9239148497581482, + "learning_rate": 8.411200534166983e-06, + "loss": 0.6725, + "step": 4470 + }, + { + "epoch": 0.28, + "grad_norm": 0.8708427548408508, + "learning_rate": 8.410450329447346e-06, + "loss": 0.6503, + "step": 4471 + }, + { + "epoch": 0.28, + "grad_norm": 0.9489243626594543, + "learning_rate": 8.409699981125509e-06, + "loss": 0.6561, + "step": 4472 + }, + { + "epoch": 0.28, + "grad_norm": 0.9252210259437561, + "learning_rate": 8.408949489233068e-06, + "loss": 0.6548, + "step": 4473 + }, + { + "epoch": 0.28, + "grad_norm": 0.8737644553184509, + "learning_rate": 8.408198853801623e-06, + "loss": 0.5992, + "step": 4474 + }, + { + "epoch": 0.28, + "grad_norm": 0.9438381195068359, + "learning_rate": 8.40744807486278e-06, + "loss": 0.6524, + "step": 4475 + }, + { + "epoch": 0.28, + "grad_norm": 0.8789763450622559, + "learning_rate": 8.406697152448152e-06, + "loss": 0.6056, + "step": 4476 + }, + { + "epoch": 0.28, + "grad_norm": 0.9246413707733154, + "learning_rate": 8.405946086589359e-06, + "loss": 0.6097, + "step": 4477 + }, + { + "epoch": 0.28, + "grad_norm": 0.9346416592597961, + "learning_rate": 8.405194877318023e-06, + "loss": 0.6877, + "step": 4478 + }, + { + "epoch": 0.28, + "grad_norm": 0.8847804069519043, + "learning_rate": 8.404443524665777e-06, + "loss": 0.6492, + "step": 4479 + }, + { + "epoch": 0.28, + "grad_norm": 0.8874092698097229, + "learning_rate": 8.40369202866426e-06, + "loss": 0.5755, + "step": 4480 + }, + { + "epoch": 0.28, + "grad_norm": 0.9419736266136169, + "learning_rate": 8.40294038934511e-06, + "loss": 0.6488, + "step": 4481 + }, + { + "epoch": 0.28, + "grad_norm": 0.8550480604171753, + "learning_rate": 8.402188606739977e-06, + "loss": 0.5936, + "step": 4482 + }, + { + "epoch": 0.28, + "grad_norm": 0.9512335062026978, + "learning_rate": 8.401436680880518e-06, + "loss": 0.6031, + "step": 4483 + }, + { + "epoch": 0.28, + "grad_norm": 0.8816537857055664, + "learning_rate": 8.400684611798395e-06, + "loss": 0.5836, + "step": 4484 + }, + { + "epoch": 0.28, + "grad_norm": 0.8356591463088989, + "learning_rate": 8.39993239952527e-06, + "loss": 0.5666, + "step": 4485 + }, + { + "epoch": 0.28, + "grad_norm": 0.9029728770256042, + "learning_rate": 8.399180044092821e-06, + "loss": 0.5819, + "step": 4486 + }, + { + "epoch": 0.28, + "grad_norm": 0.9611971378326416, + "learning_rate": 8.398427545532726e-06, + "loss": 0.589, + "step": 4487 + }, + { + "epoch": 0.28, + "grad_norm": 0.9153091907501221, + "learning_rate": 8.397674903876667e-06, + "loss": 0.6034, + "step": 4488 + }, + { + "epoch": 0.28, + "grad_norm": 0.9280160665512085, + "learning_rate": 8.396922119156339e-06, + "loss": 0.6534, + "step": 4489 + }, + { + "epoch": 0.28, + "grad_norm": 0.9492883086204529, + "learning_rate": 8.396169191403438e-06, + "loss": 0.6022, + "step": 4490 + }, + { + "epoch": 0.28, + "grad_norm": 0.9128872156143188, + "learning_rate": 8.395416120649667e-06, + "loss": 0.5807, + "step": 4491 + }, + { + "epoch": 0.28, + "grad_norm": 0.8702619075775146, + "learning_rate": 8.394662906926734e-06, + "loss": 0.5689, + "step": 4492 + }, + { + "epoch": 0.28, + "grad_norm": 0.8514307141304016, + "learning_rate": 8.393909550266354e-06, + "loss": 0.6161, + "step": 4493 + }, + { + "epoch": 0.28, + "grad_norm": 0.8472135663032532, + "learning_rate": 8.393156050700252e-06, + "loss": 0.626, + "step": 4494 + }, + { + "epoch": 0.28, + "grad_norm": 0.8964636921882629, + "learning_rate": 8.39240240826015e-06, + "loss": 0.6282, + "step": 4495 + }, + { + "epoch": 0.28, + "grad_norm": 0.9277433753013611, + "learning_rate": 8.391648622977787e-06, + "loss": 0.6707, + "step": 4496 + }, + { + "epoch": 0.28, + "grad_norm": 0.8764444589614868, + "learning_rate": 8.390894694884896e-06, + "loss": 0.5962, + "step": 4497 + }, + { + "epoch": 0.28, + "grad_norm": 0.9093109965324402, + "learning_rate": 8.390140624013228e-06, + "loss": 0.6039, + "step": 4498 + }, + { + "epoch": 0.29, + "grad_norm": 0.9418292045593262, + "learning_rate": 8.38938641039453e-06, + "loss": 0.6415, + "step": 4499 + }, + { + "epoch": 0.29, + "grad_norm": 0.8592790961265564, + "learning_rate": 8.388632054060562e-06, + "loss": 0.5807, + "step": 4500 + }, + { + "epoch": 0.29, + "grad_norm": 0.9306639432907104, + "learning_rate": 8.387877555043086e-06, + "loss": 0.6477, + "step": 4501 + }, + { + "epoch": 0.29, + "grad_norm": 0.8802691698074341, + "learning_rate": 8.38712291337387e-06, + "loss": 0.6043, + "step": 4502 + }, + { + "epoch": 0.29, + "grad_norm": 0.8935637474060059, + "learning_rate": 8.386368129084695e-06, + "loss": 0.5958, + "step": 4503 + }, + { + "epoch": 0.29, + "grad_norm": 0.9453160166740417, + "learning_rate": 8.385613202207336e-06, + "loss": 0.6246, + "step": 4504 + }, + { + "epoch": 0.29, + "grad_norm": 0.8884761929512024, + "learning_rate": 8.384858132773582e-06, + "loss": 0.6038, + "step": 4505 + }, + { + "epoch": 0.29, + "grad_norm": 0.8889615535736084, + "learning_rate": 8.38410292081523e-06, + "loss": 0.563, + "step": 4506 + }, + { + "epoch": 0.29, + "grad_norm": 0.8603700995445251, + "learning_rate": 8.383347566364072e-06, + "loss": 0.5701, + "step": 4507 + }, + { + "epoch": 0.29, + "grad_norm": 0.8758067488670349, + "learning_rate": 8.38259206945192e-06, + "loss": 0.6018, + "step": 4508 + }, + { + "epoch": 0.29, + "grad_norm": 0.9259410500526428, + "learning_rate": 8.381836430110585e-06, + "loss": 0.5987, + "step": 4509 + }, + { + "epoch": 0.29, + "grad_norm": 0.913033127784729, + "learning_rate": 8.38108064837188e-06, + "loss": 0.6485, + "step": 4510 + }, + { + "epoch": 0.29, + "grad_norm": 0.88724285364151, + "learning_rate": 8.380324724267631e-06, + "loss": 0.6141, + "step": 4511 + }, + { + "epoch": 0.29, + "grad_norm": 0.8932639360427856, + "learning_rate": 8.379568657829669e-06, + "loss": 0.6432, + "step": 4512 + }, + { + "epoch": 0.29, + "grad_norm": 0.8454581499099731, + "learning_rate": 8.378812449089826e-06, + "loss": 0.6136, + "step": 4513 + }, + { + "epoch": 0.29, + "grad_norm": 0.9295586943626404, + "learning_rate": 8.378056098079946e-06, + "loss": 0.6594, + "step": 4514 + }, + { + "epoch": 0.29, + "grad_norm": 0.9568715691566467, + "learning_rate": 8.377299604831875e-06, + "loss": 0.664, + "step": 4515 + }, + { + "epoch": 0.29, + "grad_norm": 0.8817077875137329, + "learning_rate": 8.376542969377465e-06, + "loss": 0.6482, + "step": 4516 + }, + { + "epoch": 0.29, + "grad_norm": 0.924589216709137, + "learning_rate": 8.375786191748578e-06, + "loss": 0.6711, + "step": 4517 + }, + { + "epoch": 0.29, + "grad_norm": 0.9990555047988892, + "learning_rate": 8.375029271977076e-06, + "loss": 0.6757, + "step": 4518 + }, + { + "epoch": 0.29, + "grad_norm": 0.8449356555938721, + "learning_rate": 8.374272210094834e-06, + "loss": 0.6219, + "step": 4519 + }, + { + "epoch": 0.29, + "grad_norm": 0.8764523863792419, + "learning_rate": 8.373515006133728e-06, + "loss": 0.5765, + "step": 4520 + }, + { + "epoch": 0.29, + "grad_norm": 0.9035282731056213, + "learning_rate": 8.372757660125639e-06, + "loss": 0.5511, + "step": 4521 + }, + { + "epoch": 0.29, + "grad_norm": 0.9758896231651306, + "learning_rate": 8.372000172102459e-06, + "loss": 0.654, + "step": 4522 + }, + { + "epoch": 0.29, + "grad_norm": 0.9765152931213379, + "learning_rate": 8.37124254209608e-06, + "loss": 0.6125, + "step": 4523 + }, + { + "epoch": 0.29, + "grad_norm": 0.8869422674179077, + "learning_rate": 8.370484770138407e-06, + "loss": 0.6502, + "step": 4524 + }, + { + "epoch": 0.29, + "grad_norm": 0.9507737755775452, + "learning_rate": 8.369726856261346e-06, + "loss": 0.6427, + "step": 4525 + }, + { + "epoch": 0.29, + "grad_norm": 0.9340800642967224, + "learning_rate": 8.36896880049681e-06, + "loss": 0.576, + "step": 4526 + }, + { + "epoch": 0.29, + "grad_norm": 0.9077014327049255, + "learning_rate": 8.368210602876716e-06, + "loss": 0.5923, + "step": 4527 + }, + { + "epoch": 0.29, + "grad_norm": 0.8733184933662415, + "learning_rate": 8.36745226343299e-06, + "loss": 0.6116, + "step": 4528 + }, + { + "epoch": 0.29, + "grad_norm": 0.9413378834724426, + "learning_rate": 8.366693782197566e-06, + "loss": 0.6095, + "step": 4529 + }, + { + "epoch": 0.29, + "grad_norm": 0.9507108330726624, + "learning_rate": 8.365935159202378e-06, + "loss": 0.6222, + "step": 4530 + }, + { + "epoch": 0.29, + "grad_norm": 0.950071394443512, + "learning_rate": 8.365176394479368e-06, + "loss": 0.6427, + "step": 4531 + }, + { + "epoch": 0.29, + "grad_norm": 0.8926099538803101, + "learning_rate": 8.364417488060488e-06, + "loss": 0.6253, + "step": 4532 + }, + { + "epoch": 0.29, + "grad_norm": 0.8760389089584351, + "learning_rate": 8.363658439977693e-06, + "loss": 0.5829, + "step": 4533 + }, + { + "epoch": 0.29, + "grad_norm": 0.8548893332481384, + "learning_rate": 8.36289925026294e-06, + "loss": 0.5992, + "step": 4534 + }, + { + "epoch": 0.29, + "grad_norm": 0.9305916428565979, + "learning_rate": 8.362139918948198e-06, + "loss": 0.6084, + "step": 4535 + }, + { + "epoch": 0.29, + "grad_norm": 0.9650013446807861, + "learning_rate": 8.36138044606544e-06, + "loss": 0.6429, + "step": 4536 + }, + { + "epoch": 0.29, + "grad_norm": 0.8791600465774536, + "learning_rate": 8.360620831646647e-06, + "loss": 0.6104, + "step": 4537 + }, + { + "epoch": 0.29, + "grad_norm": 0.8988505601882935, + "learning_rate": 8.359861075723801e-06, + "loss": 0.6117, + "step": 4538 + }, + { + "epoch": 0.29, + "grad_norm": 0.9081864953041077, + "learning_rate": 8.359101178328893e-06, + "loss": 0.6432, + "step": 4539 + }, + { + "epoch": 0.29, + "grad_norm": 0.8397430181503296, + "learning_rate": 8.358341139493919e-06, + "loss": 0.5822, + "step": 4540 + }, + { + "epoch": 0.29, + "grad_norm": 0.9269049167633057, + "learning_rate": 8.357580959250882e-06, + "loss": 0.5718, + "step": 4541 + }, + { + "epoch": 0.29, + "grad_norm": 0.9173187017440796, + "learning_rate": 8.356820637631792e-06, + "loss": 0.6343, + "step": 4542 + }, + { + "epoch": 0.29, + "grad_norm": 0.9146298766136169, + "learning_rate": 8.356060174668663e-06, + "loss": 0.5987, + "step": 4543 + }, + { + "epoch": 0.29, + "grad_norm": 0.8490142226219177, + "learning_rate": 8.355299570393515e-06, + "loss": 0.5537, + "step": 4544 + }, + { + "epoch": 0.29, + "grad_norm": 0.9867364764213562, + "learning_rate": 8.354538824838373e-06, + "loss": 0.6229, + "step": 4545 + }, + { + "epoch": 0.29, + "grad_norm": 0.8394815921783447, + "learning_rate": 8.353777938035272e-06, + "loss": 0.5803, + "step": 4546 + }, + { + "epoch": 0.29, + "grad_norm": 0.9035863280296326, + "learning_rate": 8.353016910016247e-06, + "loss": 0.6028, + "step": 4547 + }, + { + "epoch": 0.29, + "grad_norm": 0.9454771876335144, + "learning_rate": 8.352255740813347e-06, + "loss": 0.6281, + "step": 4548 + }, + { + "epoch": 0.29, + "grad_norm": 0.8816177845001221, + "learning_rate": 8.351494430458617e-06, + "loss": 0.5853, + "step": 4549 + }, + { + "epoch": 0.29, + "grad_norm": 0.9621097445487976, + "learning_rate": 8.350732978984116e-06, + "loss": 0.64, + "step": 4550 + }, + { + "epoch": 0.29, + "grad_norm": 0.8992953896522522, + "learning_rate": 8.349971386421906e-06, + "loss": 0.609, + "step": 4551 + }, + { + "epoch": 0.29, + "grad_norm": 0.8685299754142761, + "learning_rate": 8.349209652804055e-06, + "loss": 0.5633, + "step": 4552 + }, + { + "epoch": 0.29, + "grad_norm": 0.8441104292869568, + "learning_rate": 8.348447778162636e-06, + "loss": 0.6342, + "step": 4553 + }, + { + "epoch": 0.29, + "grad_norm": 0.8986367583274841, + "learning_rate": 8.347685762529729e-06, + "loss": 0.6462, + "step": 4554 + }, + { + "epoch": 0.29, + "grad_norm": 0.8934696316719055, + "learning_rate": 8.34692360593742e-06, + "loss": 0.5742, + "step": 4555 + }, + { + "epoch": 0.29, + "grad_norm": 0.9160881042480469, + "learning_rate": 8.346161308417805e-06, + "loss": 0.6352, + "step": 4556 + }, + { + "epoch": 0.29, + "grad_norm": 0.8555467128753662, + "learning_rate": 8.345398870002972e-06, + "loss": 0.6251, + "step": 4557 + }, + { + "epoch": 0.29, + "grad_norm": 0.9095616340637207, + "learning_rate": 8.344636290725035e-06, + "loss": 0.6466, + "step": 4558 + }, + { + "epoch": 0.29, + "grad_norm": 0.8506302237510681, + "learning_rate": 8.343873570616097e-06, + "loss": 0.6207, + "step": 4559 + }, + { + "epoch": 0.29, + "grad_norm": 0.8929101228713989, + "learning_rate": 8.343110709708275e-06, + "loss": 0.6406, + "step": 4560 + }, + { + "epoch": 0.29, + "grad_norm": 0.919562816619873, + "learning_rate": 8.342347708033692e-06, + "loss": 0.6297, + "step": 4561 + }, + { + "epoch": 0.29, + "grad_norm": 0.9147757291793823, + "learning_rate": 8.341584565624471e-06, + "loss": 0.6507, + "step": 4562 + }, + { + "epoch": 0.29, + "grad_norm": 0.8911783695220947, + "learning_rate": 8.340821282512753e-06, + "loss": 0.6204, + "step": 4563 + }, + { + "epoch": 0.29, + "grad_norm": 0.9014183878898621, + "learning_rate": 8.34005785873067e-06, + "loss": 0.5942, + "step": 4564 + }, + { + "epoch": 0.29, + "grad_norm": 0.9250972270965576, + "learning_rate": 8.339294294310371e-06, + "loss": 0.6221, + "step": 4565 + }, + { + "epoch": 0.29, + "grad_norm": 0.8855701684951782, + "learning_rate": 8.338530589284005e-06, + "loss": 0.6531, + "step": 4566 + }, + { + "epoch": 0.29, + "grad_norm": 0.8788840174674988, + "learning_rate": 8.33776674368373e-06, + "loss": 0.5342, + "step": 4567 + }, + { + "epoch": 0.29, + "grad_norm": 0.9645684361457825, + "learning_rate": 8.337002757541708e-06, + "loss": 0.6643, + "step": 4568 + }, + { + "epoch": 0.29, + "grad_norm": 0.8790433406829834, + "learning_rate": 8.33623863089011e-06, + "loss": 0.5891, + "step": 4569 + }, + { + "epoch": 0.29, + "grad_norm": 0.8718952536582947, + "learning_rate": 8.335474363761109e-06, + "loss": 0.5621, + "step": 4570 + }, + { + "epoch": 0.29, + "grad_norm": 0.9348157048225403, + "learning_rate": 8.334709956186884e-06, + "loss": 0.6079, + "step": 4571 + }, + { + "epoch": 0.29, + "grad_norm": 0.8799747824668884, + "learning_rate": 8.333945408199624e-06, + "loss": 0.5964, + "step": 4572 + }, + { + "epoch": 0.29, + "grad_norm": 0.8926383852958679, + "learning_rate": 8.333180719831521e-06, + "loss": 0.6197, + "step": 4573 + }, + { + "epoch": 0.29, + "grad_norm": 0.9374673962593079, + "learning_rate": 8.332415891114774e-06, + "loss": 0.6275, + "step": 4574 + }, + { + "epoch": 0.29, + "grad_norm": 0.9162465333938599, + "learning_rate": 8.331650922081586e-06, + "loss": 0.5774, + "step": 4575 + }, + { + "epoch": 0.29, + "grad_norm": 0.8876767158508301, + "learning_rate": 8.330885812764168e-06, + "loss": 0.6274, + "step": 4576 + }, + { + "epoch": 0.29, + "grad_norm": 0.8842494487762451, + "learning_rate": 8.330120563194736e-06, + "loss": 0.6357, + "step": 4577 + }, + { + "epoch": 0.29, + "grad_norm": 0.8948314189910889, + "learning_rate": 8.32935517340551e-06, + "loss": 0.6393, + "step": 4578 + }, + { + "epoch": 0.29, + "grad_norm": 0.8375378251075745, + "learning_rate": 8.328589643428722e-06, + "loss": 0.5923, + "step": 4579 + }, + { + "epoch": 0.29, + "grad_norm": 0.904406726360321, + "learning_rate": 8.327823973296601e-06, + "loss": 0.6558, + "step": 4580 + }, + { + "epoch": 0.29, + "grad_norm": 0.9058637619018555, + "learning_rate": 8.32705816304139e-06, + "loss": 0.6308, + "step": 4581 + }, + { + "epoch": 0.29, + "grad_norm": 0.8600705862045288, + "learning_rate": 8.326292212695335e-06, + "loss": 0.663, + "step": 4582 + }, + { + "epoch": 0.29, + "grad_norm": 0.9292261004447937, + "learning_rate": 8.325526122290685e-06, + "loss": 0.6219, + "step": 4583 + }, + { + "epoch": 0.29, + "grad_norm": 0.8440708518028259, + "learning_rate": 8.3247598918597e-06, + "loss": 0.5731, + "step": 4584 + }, + { + "epoch": 0.29, + "grad_norm": 0.9342061281204224, + "learning_rate": 8.323993521434639e-06, + "loss": 0.6052, + "step": 4585 + }, + { + "epoch": 0.29, + "grad_norm": 0.9192177057266235, + "learning_rate": 8.323227011047777e-06, + "loss": 0.5993, + "step": 4586 + }, + { + "epoch": 0.29, + "grad_norm": 0.8514859676361084, + "learning_rate": 8.322460360731386e-06, + "loss": 0.6584, + "step": 4587 + }, + { + "epoch": 0.29, + "grad_norm": 0.8630070686340332, + "learning_rate": 8.321693570517745e-06, + "loss": 0.6247, + "step": 4588 + }, + { + "epoch": 0.29, + "grad_norm": 0.9018881320953369, + "learning_rate": 8.320926640439145e-06, + "loss": 0.5849, + "step": 4589 + }, + { + "epoch": 0.29, + "grad_norm": 0.8964559435844421, + "learning_rate": 8.320159570527876e-06, + "loss": 0.6018, + "step": 4590 + }, + { + "epoch": 0.29, + "grad_norm": 0.8430085182189941, + "learning_rate": 8.319392360816239e-06, + "loss": 0.6033, + "step": 4591 + }, + { + "epoch": 0.29, + "grad_norm": 0.8777481913566589, + "learning_rate": 8.318625011336533e-06, + "loss": 0.6466, + "step": 4592 + }, + { + "epoch": 0.29, + "grad_norm": 0.9354075789451599, + "learning_rate": 8.317857522121078e-06, + "loss": 0.6187, + "step": 4593 + }, + { + "epoch": 0.29, + "grad_norm": 0.9195157289505005, + "learning_rate": 8.317089893202181e-06, + "loss": 0.6094, + "step": 4594 + }, + { + "epoch": 0.29, + "grad_norm": 0.9029771089553833, + "learning_rate": 8.316322124612169e-06, + "loss": 0.6705, + "step": 4595 + }, + { + "epoch": 0.29, + "grad_norm": 0.9089044332504272, + "learning_rate": 8.315554216383368e-06, + "loss": 0.657, + "step": 4596 + }, + { + "epoch": 0.29, + "grad_norm": 0.9705564975738525, + "learning_rate": 8.314786168548115e-06, + "loss": 0.645, + "step": 4597 + }, + { + "epoch": 0.29, + "grad_norm": 0.8919417858123779, + "learning_rate": 8.314017981138746e-06, + "loss": 0.5922, + "step": 4598 + }, + { + "epoch": 0.29, + "grad_norm": 0.9287596344947815, + "learning_rate": 8.31324965418761e-06, + "loss": 0.629, + "step": 4599 + }, + { + "epoch": 0.29, + "grad_norm": 0.8940380811691284, + "learning_rate": 8.312481187727055e-06, + "loss": 0.6292, + "step": 4600 + }, + { + "epoch": 0.29, + "grad_norm": 0.8414455652236938, + "learning_rate": 8.311712581789442e-06, + "loss": 0.5379, + "step": 4601 + }, + { + "epoch": 0.29, + "grad_norm": 0.8981665372848511, + "learning_rate": 8.310943836407132e-06, + "loss": 0.6239, + "step": 4602 + }, + { + "epoch": 0.29, + "grad_norm": 0.9226404428482056, + "learning_rate": 8.310174951612495e-06, + "loss": 0.5864, + "step": 4603 + }, + { + "epoch": 0.29, + "grad_norm": 0.8723615407943726, + "learning_rate": 8.309405927437906e-06, + "loss": 0.5485, + "step": 4604 + }, + { + "epoch": 0.29, + "grad_norm": 0.8954591751098633, + "learning_rate": 8.308636763915746e-06, + "loss": 0.6198, + "step": 4605 + }, + { + "epoch": 0.29, + "grad_norm": 0.8918243050575256, + "learning_rate": 8.307867461078402e-06, + "loss": 0.6386, + "step": 4606 + }, + { + "epoch": 0.29, + "grad_norm": 0.9272078275680542, + "learning_rate": 8.307098018958266e-06, + "loss": 0.6456, + "step": 4607 + }, + { + "epoch": 0.29, + "grad_norm": 0.8392652869224548, + "learning_rate": 8.306328437587738e-06, + "loss": 0.6253, + "step": 4608 + }, + { + "epoch": 0.29, + "grad_norm": 0.8458937406539917, + "learning_rate": 8.305558716999221e-06, + "loss": 0.619, + "step": 4609 + }, + { + "epoch": 0.29, + "grad_norm": 0.9669510126113892, + "learning_rate": 8.304788857225126e-06, + "loss": 0.6159, + "step": 4610 + }, + { + "epoch": 0.29, + "grad_norm": 0.8691350221633911, + "learning_rate": 8.304018858297867e-06, + "loss": 0.5951, + "step": 4611 + }, + { + "epoch": 0.29, + "grad_norm": 0.9048541784286499, + "learning_rate": 8.30324872024987e-06, + "loss": 0.6122, + "step": 4612 + }, + { + "epoch": 0.29, + "grad_norm": 0.897702693939209, + "learning_rate": 8.30247844311356e-06, + "loss": 0.5482, + "step": 4613 + }, + { + "epoch": 0.29, + "grad_norm": 0.967581570148468, + "learning_rate": 8.301708026921371e-06, + "loss": 0.6607, + "step": 4614 + }, + { + "epoch": 0.29, + "grad_norm": 0.9215171933174133, + "learning_rate": 8.300937471705742e-06, + "loss": 0.6724, + "step": 4615 + }, + { + "epoch": 0.29, + "grad_norm": 0.8553723692893982, + "learning_rate": 8.300166777499119e-06, + "loss": 0.6192, + "step": 4616 + }, + { + "epoch": 0.29, + "grad_norm": 0.9200363755226135, + "learning_rate": 8.299395944333955e-06, + "loss": 0.6755, + "step": 4617 + }, + { + "epoch": 0.29, + "grad_norm": 0.883851170539856, + "learning_rate": 8.298624972242704e-06, + "loss": 0.5919, + "step": 4618 + }, + { + "epoch": 0.29, + "grad_norm": 0.9365254044532776, + "learning_rate": 8.297853861257831e-06, + "loss": 0.5648, + "step": 4619 + }, + { + "epoch": 0.29, + "grad_norm": 0.9034328460693359, + "learning_rate": 8.297082611411805e-06, + "loss": 0.6108, + "step": 4620 + }, + { + "epoch": 0.29, + "grad_norm": 0.8502330780029297, + "learning_rate": 8.296311222737099e-06, + "loss": 0.5599, + "step": 4621 + }, + { + "epoch": 0.29, + "grad_norm": 0.905636727809906, + "learning_rate": 8.295539695266195e-06, + "loss": 0.5831, + "step": 4622 + }, + { + "epoch": 0.29, + "grad_norm": 0.8426777124404907, + "learning_rate": 8.29476802903158e-06, + "loss": 0.5742, + "step": 4623 + }, + { + "epoch": 0.29, + "grad_norm": 0.8838980793952942, + "learning_rate": 8.293996224065742e-06, + "loss": 0.6093, + "step": 4624 + }, + { + "epoch": 0.29, + "grad_norm": 0.866264283657074, + "learning_rate": 8.293224280401185e-06, + "loss": 0.6257, + "step": 4625 + }, + { + "epoch": 0.29, + "grad_norm": 0.9148405194282532, + "learning_rate": 8.292452198070406e-06, + "loss": 0.6198, + "step": 4626 + }, + { + "epoch": 0.29, + "grad_norm": 0.8394678831100464, + "learning_rate": 8.291679977105922e-06, + "loss": 0.5995, + "step": 4627 + }, + { + "epoch": 0.29, + "grad_norm": 0.858686089515686, + "learning_rate": 8.290907617540244e-06, + "loss": 0.5912, + "step": 4628 + }, + { + "epoch": 0.29, + "grad_norm": 0.9442601203918457, + "learning_rate": 8.290135119405894e-06, + "loss": 0.5746, + "step": 4629 + }, + { + "epoch": 0.29, + "grad_norm": 0.8091254234313965, + "learning_rate": 8.2893624827354e-06, + "loss": 0.5738, + "step": 4630 + }, + { + "epoch": 0.29, + "grad_norm": 0.8589221835136414, + "learning_rate": 8.288589707561295e-06, + "loss": 0.5908, + "step": 4631 + }, + { + "epoch": 0.29, + "grad_norm": 0.8597394824028015, + "learning_rate": 8.287816793916119e-06, + "loss": 0.6029, + "step": 4632 + }, + { + "epoch": 0.29, + "grad_norm": 0.9113194942474365, + "learning_rate": 8.287043741832412e-06, + "loss": 0.6494, + "step": 4633 + }, + { + "epoch": 0.29, + "grad_norm": 0.8626760244369507, + "learning_rate": 8.28627055134273e-06, + "loss": 0.5452, + "step": 4634 + }, + { + "epoch": 0.29, + "grad_norm": 0.8131372332572937, + "learning_rate": 8.285497222479626e-06, + "loss": 0.5791, + "step": 4635 + }, + { + "epoch": 0.29, + "grad_norm": 0.8600938320159912, + "learning_rate": 8.284723755275666e-06, + "loss": 0.6142, + "step": 4636 + }, + { + "epoch": 0.29, + "grad_norm": 0.8754161596298218, + "learning_rate": 8.283950149763413e-06, + "loss": 0.571, + "step": 4637 + }, + { + "epoch": 0.29, + "grad_norm": 0.8484256863594055, + "learning_rate": 8.283176405975444e-06, + "loss": 0.5876, + "step": 4638 + }, + { + "epoch": 0.29, + "grad_norm": 0.8761142492294312, + "learning_rate": 8.282402523944338e-06, + "loss": 0.5813, + "step": 4639 + }, + { + "epoch": 0.29, + "grad_norm": 0.8704332709312439, + "learning_rate": 8.28162850370268e-06, + "loss": 0.5611, + "step": 4640 + }, + { + "epoch": 0.29, + "grad_norm": 0.9601176977157593, + "learning_rate": 8.28085434528306e-06, + "loss": 0.6592, + "step": 4641 + }, + { + "epoch": 0.29, + "grad_norm": 0.9141191244125366, + "learning_rate": 8.28008004871808e-06, + "loss": 0.5754, + "step": 4642 + }, + { + "epoch": 0.29, + "grad_norm": 0.8856356143951416, + "learning_rate": 8.279305614040337e-06, + "loss": 0.6057, + "step": 4643 + }, + { + "epoch": 0.29, + "grad_norm": 0.8992973566055298, + "learning_rate": 8.278531041282445e-06, + "loss": 0.5995, + "step": 4644 + }, + { + "epoch": 0.29, + "grad_norm": 0.9188979864120483, + "learning_rate": 8.277756330477013e-06, + "loss": 0.6201, + "step": 4645 + }, + { + "epoch": 0.29, + "grad_norm": 0.9234612584114075, + "learning_rate": 8.276981481656668e-06, + "loss": 0.6256, + "step": 4646 + }, + { + "epoch": 0.29, + "grad_norm": 0.9108220934867859, + "learning_rate": 8.276206494854029e-06, + "loss": 0.6551, + "step": 4647 + }, + { + "epoch": 0.29, + "grad_norm": 0.8664566874504089, + "learning_rate": 8.275431370101734e-06, + "loss": 0.5819, + "step": 4648 + }, + { + "epoch": 0.29, + "grad_norm": 0.9078052639961243, + "learning_rate": 8.274656107432418e-06, + "loss": 0.6217, + "step": 4649 + }, + { + "epoch": 0.29, + "grad_norm": 0.8800520896911621, + "learning_rate": 8.273880706878724e-06, + "loss": 0.6276, + "step": 4650 + }, + { + "epoch": 0.29, + "grad_norm": 0.977785587310791, + "learning_rate": 8.273105168473304e-06, + "loss": 0.5943, + "step": 4651 + }, + { + "epoch": 0.29, + "grad_norm": 0.9266806840896606, + "learning_rate": 8.27232949224881e-06, + "loss": 0.6371, + "step": 4652 + }, + { + "epoch": 0.29, + "grad_norm": 0.9237861037254333, + "learning_rate": 8.271553678237904e-06, + "loss": 0.6344, + "step": 4653 + }, + { + "epoch": 0.29, + "grad_norm": 0.9259735345840454, + "learning_rate": 8.270777726473256e-06, + "loss": 0.5926, + "step": 4654 + }, + { + "epoch": 0.29, + "grad_norm": 0.9366374611854553, + "learning_rate": 8.270001636987535e-06, + "loss": 0.5992, + "step": 4655 + }, + { + "epoch": 0.29, + "grad_norm": 0.9009166359901428, + "learning_rate": 8.26922540981342e-06, + "loss": 0.6138, + "step": 4656 + }, + { + "epoch": 0.3, + "grad_norm": 0.8974087834358215, + "learning_rate": 8.268449044983598e-06, + "loss": 0.5916, + "step": 4657 + }, + { + "epoch": 0.3, + "grad_norm": 0.8280764222145081, + "learning_rate": 8.267672542530753e-06, + "loss": 0.6036, + "step": 4658 + }, + { + "epoch": 0.3, + "grad_norm": 0.8438900709152222, + "learning_rate": 8.266895902487588e-06, + "loss": 0.5817, + "step": 4659 + }, + { + "epoch": 0.3, + "grad_norm": 0.8856135010719299, + "learning_rate": 8.2661191248868e-06, + "loss": 0.6245, + "step": 4660 + }, + { + "epoch": 0.3, + "grad_norm": 0.8787485957145691, + "learning_rate": 8.265342209761098e-06, + "loss": 0.6138, + "step": 4661 + }, + { + "epoch": 0.3, + "grad_norm": 0.8637370467185974, + "learning_rate": 8.264565157143194e-06, + "loss": 0.6444, + "step": 4662 + }, + { + "epoch": 0.3, + "grad_norm": 0.9093601107597351, + "learning_rate": 8.26378796706581e-06, + "loss": 0.6109, + "step": 4663 + }, + { + "epoch": 0.3, + "grad_norm": 0.8617517352104187, + "learning_rate": 8.263010639561666e-06, + "loss": 0.6166, + "step": 4664 + }, + { + "epoch": 0.3, + "grad_norm": 0.8573476076126099, + "learning_rate": 8.262233174663497e-06, + "loss": 0.5985, + "step": 4665 + }, + { + "epoch": 0.3, + "grad_norm": 0.9106038808822632, + "learning_rate": 8.261455572404036e-06, + "loss": 0.6184, + "step": 4666 + }, + { + "epoch": 0.3, + "grad_norm": 0.9015377163887024, + "learning_rate": 8.260677832816029e-06, + "loss": 0.5952, + "step": 4667 + }, + { + "epoch": 0.3, + "grad_norm": 0.8836144804954529, + "learning_rate": 8.259899955932218e-06, + "loss": 0.5964, + "step": 4668 + }, + { + "epoch": 0.3, + "grad_norm": 0.9044028520584106, + "learning_rate": 8.259121941785362e-06, + "loss": 0.626, + "step": 4669 + }, + { + "epoch": 0.3, + "grad_norm": 0.8526366353034973, + "learning_rate": 8.25834379040822e-06, + "loss": 0.5792, + "step": 4670 + }, + { + "epoch": 0.3, + "grad_norm": 0.8615383505821228, + "learning_rate": 8.257565501833555e-06, + "loss": 0.6326, + "step": 4671 + }, + { + "epoch": 0.3, + "grad_norm": 0.8761278986930847, + "learning_rate": 8.256787076094138e-06, + "loss": 0.5917, + "step": 4672 + }, + { + "epoch": 0.3, + "grad_norm": 0.9185283780097961, + "learning_rate": 8.256008513222747e-06, + "loss": 0.5846, + "step": 4673 + }, + { + "epoch": 0.3, + "grad_norm": 0.9208911061286926, + "learning_rate": 8.255229813252167e-06, + "loss": 0.6307, + "step": 4674 + }, + { + "epoch": 0.3, + "grad_norm": 0.8966217637062073, + "learning_rate": 8.25445097621518e-06, + "loss": 0.6079, + "step": 4675 + }, + { + "epoch": 0.3, + "grad_norm": 0.9059707522392273, + "learning_rate": 8.253672002144584e-06, + "loss": 0.5925, + "step": 4676 + }, + { + "epoch": 0.3, + "grad_norm": 0.8802112340927124, + "learning_rate": 8.25289289107318e-06, + "loss": 0.632, + "step": 4677 + }, + { + "epoch": 0.3, + "grad_norm": 0.8126215934753418, + "learning_rate": 8.252113643033774e-06, + "loss": 0.6072, + "step": 4678 + }, + { + "epoch": 0.3, + "grad_norm": 0.9150187969207764, + "learning_rate": 8.251334258059173e-06, + "loss": 0.6969, + "step": 4679 + }, + { + "epoch": 0.3, + "grad_norm": 0.8734495043754578, + "learning_rate": 8.250554736182199e-06, + "loss": 0.6336, + "step": 4680 + }, + { + "epoch": 0.3, + "grad_norm": 0.8438607454299927, + "learning_rate": 8.249775077435671e-06, + "loss": 0.6163, + "step": 4681 + }, + { + "epoch": 0.3, + "grad_norm": 0.9356812238693237, + "learning_rate": 8.24899528185242e-06, + "loss": 0.5518, + "step": 4682 + }, + { + "epoch": 0.3, + "grad_norm": 0.9281412959098816, + "learning_rate": 8.24821534946528e-06, + "loss": 0.6216, + "step": 4683 + }, + { + "epoch": 0.3, + "grad_norm": 0.897492527961731, + "learning_rate": 8.247435280307093e-06, + "loss": 0.6225, + "step": 4684 + }, + { + "epoch": 0.3, + "grad_norm": 0.8860996961593628, + "learning_rate": 8.246655074410703e-06, + "loss": 0.6648, + "step": 4685 + }, + { + "epoch": 0.3, + "grad_norm": 0.9270169138908386, + "learning_rate": 8.24587473180896e-06, + "loss": 0.6332, + "step": 4686 + }, + { + "epoch": 0.3, + "grad_norm": 0.9059301018714905, + "learning_rate": 8.245094252534727e-06, + "loss": 0.605, + "step": 4687 + }, + { + "epoch": 0.3, + "grad_norm": 0.9001350402832031, + "learning_rate": 8.244313636620862e-06, + "loss": 0.6293, + "step": 4688 + }, + { + "epoch": 0.3, + "grad_norm": 0.9218695759773254, + "learning_rate": 8.243532884100236e-06, + "loss": 0.6064, + "step": 4689 + }, + { + "epoch": 0.3, + "grad_norm": 0.8904708027839661, + "learning_rate": 8.242751995005721e-06, + "loss": 0.6136, + "step": 4690 + }, + { + "epoch": 0.3, + "grad_norm": 0.8279531002044678, + "learning_rate": 8.241970969370205e-06, + "loss": 0.5799, + "step": 4691 + }, + { + "epoch": 0.3, + "grad_norm": 0.887289822101593, + "learning_rate": 8.241189807226566e-06, + "loss": 0.6134, + "step": 4692 + }, + { + "epoch": 0.3, + "grad_norm": 0.9113506078720093, + "learning_rate": 8.240408508607703e-06, + "loss": 0.6129, + "step": 4693 + }, + { + "epoch": 0.3, + "grad_norm": 0.9350869655609131, + "learning_rate": 8.239627073546507e-06, + "loss": 0.6008, + "step": 4694 + }, + { + "epoch": 0.3, + "grad_norm": 0.8350124955177307, + "learning_rate": 8.238845502075886e-06, + "loss": 0.6183, + "step": 4695 + }, + { + "epoch": 0.3, + "grad_norm": 0.8774599432945251, + "learning_rate": 8.238063794228748e-06, + "loss": 0.5686, + "step": 4696 + }, + { + "epoch": 0.3, + "grad_norm": 0.8577974438667297, + "learning_rate": 8.237281950038008e-06, + "loss": 0.686, + "step": 4697 + }, + { + "epoch": 0.3, + "grad_norm": 0.8853060603141785, + "learning_rate": 8.236499969536585e-06, + "loss": 0.6389, + "step": 4698 + }, + { + "epoch": 0.3, + "grad_norm": 0.8757472038269043, + "learning_rate": 8.23571785275741e-06, + "loss": 0.6098, + "step": 4699 + }, + { + "epoch": 0.3, + "grad_norm": 0.9388381242752075, + "learning_rate": 8.234935599733412e-06, + "loss": 0.6703, + "step": 4700 + }, + { + "epoch": 0.3, + "grad_norm": 0.9161108136177063, + "learning_rate": 8.234153210497528e-06, + "loss": 0.5443, + "step": 4701 + }, + { + "epoch": 0.3, + "grad_norm": 0.8968355059623718, + "learning_rate": 8.233370685082704e-06, + "loss": 0.6287, + "step": 4702 + }, + { + "epoch": 0.3, + "grad_norm": 0.8884924650192261, + "learning_rate": 8.232588023521888e-06, + "loss": 0.5913, + "step": 4703 + }, + { + "epoch": 0.3, + "grad_norm": 0.8946593999862671, + "learning_rate": 8.231805225848035e-06, + "loss": 0.6407, + "step": 4704 + }, + { + "epoch": 0.3, + "grad_norm": 0.8582884669303894, + "learning_rate": 8.23102229209411e-06, + "loss": 0.5737, + "step": 4705 + }, + { + "epoch": 0.3, + "grad_norm": 0.9519075155258179, + "learning_rate": 8.230239222293073e-06, + "loss": 0.5501, + "step": 4706 + }, + { + "epoch": 0.3, + "grad_norm": 0.9213956594467163, + "learning_rate": 8.229456016477899e-06, + "loss": 0.5993, + "step": 4707 + }, + { + "epoch": 0.3, + "grad_norm": 0.9476253390312195, + "learning_rate": 8.228672674681568e-06, + "loss": 0.6097, + "step": 4708 + }, + { + "epoch": 0.3, + "grad_norm": 0.9826415181159973, + "learning_rate": 8.227889196937062e-06, + "loss": 0.697, + "step": 4709 + }, + { + "epoch": 0.3, + "grad_norm": 0.9420339465141296, + "learning_rate": 8.227105583277372e-06, + "loss": 0.5592, + "step": 4710 + }, + { + "epoch": 0.3, + "grad_norm": 0.8203204870223999, + "learning_rate": 8.22632183373549e-06, + "loss": 0.6001, + "step": 4711 + }, + { + "epoch": 0.3, + "grad_norm": 0.8779041171073914, + "learning_rate": 8.225537948344423e-06, + "loss": 0.6033, + "step": 4712 + }, + { + "epoch": 0.3, + "grad_norm": 0.8583627343177795, + "learning_rate": 8.224753927137171e-06, + "loss": 0.642, + "step": 4713 + }, + { + "epoch": 0.3, + "grad_norm": 0.9116830825805664, + "learning_rate": 8.22396977014675e-06, + "loss": 0.6005, + "step": 4714 + }, + { + "epoch": 0.3, + "grad_norm": 0.9073758125305176, + "learning_rate": 8.223185477406175e-06, + "loss": 0.6414, + "step": 4715 + }, + { + "epoch": 0.3, + "grad_norm": 0.9207981824874878, + "learning_rate": 8.222401048948476e-06, + "loss": 0.6138, + "step": 4716 + }, + { + "epoch": 0.3, + "grad_norm": 0.8661931753158569, + "learning_rate": 8.221616484806676e-06, + "loss": 0.6059, + "step": 4717 + }, + { + "epoch": 0.3, + "grad_norm": 0.8917931318283081, + "learning_rate": 8.220831785013814e-06, + "loss": 0.613, + "step": 4718 + }, + { + "epoch": 0.3, + "grad_norm": 0.8399578928947449, + "learning_rate": 8.22004694960293e-06, + "loss": 0.5698, + "step": 4719 + }, + { + "epoch": 0.3, + "grad_norm": 0.8338463306427002, + "learning_rate": 8.21926197860707e-06, + "loss": 0.5719, + "step": 4720 + }, + { + "epoch": 0.3, + "grad_norm": 0.9184006452560425, + "learning_rate": 8.218476872059288e-06, + "loss": 0.5921, + "step": 4721 + }, + { + "epoch": 0.3, + "grad_norm": 0.9487320780754089, + "learning_rate": 8.217691629992641e-06, + "loss": 0.6159, + "step": 4722 + }, + { + "epoch": 0.3, + "grad_norm": 0.8477067351341248, + "learning_rate": 8.216906252440193e-06, + "loss": 0.5805, + "step": 4723 + }, + { + "epoch": 0.3, + "grad_norm": 0.878990650177002, + "learning_rate": 8.216120739435013e-06, + "loss": 0.6369, + "step": 4724 + }, + { + "epoch": 0.3, + "grad_norm": 0.8777364492416382, + "learning_rate": 8.215335091010177e-06, + "loss": 0.5752, + "step": 4725 + }, + { + "epoch": 0.3, + "grad_norm": 0.8827346563339233, + "learning_rate": 8.214549307198765e-06, + "loss": 0.5918, + "step": 4726 + }, + { + "epoch": 0.3, + "grad_norm": 0.9704633355140686, + "learning_rate": 8.213763388033867e-06, + "loss": 0.6544, + "step": 4727 + }, + { + "epoch": 0.3, + "grad_norm": 0.946010172367096, + "learning_rate": 8.212977333548569e-06, + "loss": 0.6527, + "step": 4728 + }, + { + "epoch": 0.3, + "grad_norm": 0.9947195053100586, + "learning_rate": 8.212191143775973e-06, + "loss": 0.6222, + "step": 4729 + }, + { + "epoch": 0.3, + "grad_norm": 0.9032514691352844, + "learning_rate": 8.211404818749184e-06, + "loss": 0.6712, + "step": 4730 + }, + { + "epoch": 0.3, + "grad_norm": 0.8361782431602478, + "learning_rate": 8.21061835850131e-06, + "loss": 0.5807, + "step": 4731 + }, + { + "epoch": 0.3, + "grad_norm": 0.8890867233276367, + "learning_rate": 8.209831763065465e-06, + "loss": 0.6234, + "step": 4732 + }, + { + "epoch": 0.3, + "grad_norm": 0.9419566988945007, + "learning_rate": 8.209045032474773e-06, + "loss": 0.5666, + "step": 4733 + }, + { + "epoch": 0.3, + "grad_norm": 0.8726335763931274, + "learning_rate": 8.208258166762355e-06, + "loss": 0.5848, + "step": 4734 + }, + { + "epoch": 0.3, + "grad_norm": 0.8878278136253357, + "learning_rate": 8.207471165961347e-06, + "loss": 0.6069, + "step": 4735 + }, + { + "epoch": 0.3, + "grad_norm": 0.8903132081031799, + "learning_rate": 8.206684030104886e-06, + "loss": 0.6338, + "step": 4736 + }, + { + "epoch": 0.3, + "grad_norm": 0.8635721206665039, + "learning_rate": 8.205896759226115e-06, + "loss": 0.6088, + "step": 4737 + }, + { + "epoch": 0.3, + "grad_norm": 0.8765946626663208, + "learning_rate": 8.205109353358186e-06, + "loss": 0.637, + "step": 4738 + }, + { + "epoch": 0.3, + "grad_norm": 0.8656042218208313, + "learning_rate": 8.20432181253425e-06, + "loss": 0.5851, + "step": 4739 + }, + { + "epoch": 0.3, + "grad_norm": 0.8711687922477722, + "learning_rate": 8.203534136787473e-06, + "loss": 0.5607, + "step": 4740 + }, + { + "epoch": 0.3, + "grad_norm": 0.8796273469924927, + "learning_rate": 8.202746326151015e-06, + "loss": 0.5778, + "step": 4741 + }, + { + "epoch": 0.3, + "grad_norm": 0.8987690806388855, + "learning_rate": 8.20195838065805e-06, + "loss": 0.6896, + "step": 4742 + }, + { + "epoch": 0.3, + "grad_norm": 0.9169846177101135, + "learning_rate": 8.201170300341757e-06, + "loss": 0.6126, + "step": 4743 + }, + { + "epoch": 0.3, + "grad_norm": 0.8620352149009705, + "learning_rate": 8.20038208523532e-06, + "loss": 0.6509, + "step": 4744 + }, + { + "epoch": 0.3, + "grad_norm": 0.9679823517799377, + "learning_rate": 8.199593735371924e-06, + "loss": 0.6318, + "step": 4745 + }, + { + "epoch": 0.3, + "grad_norm": 0.8961201310157776, + "learning_rate": 8.198805250784769e-06, + "loss": 0.6057, + "step": 4746 + }, + { + "epoch": 0.3, + "grad_norm": 0.8943774700164795, + "learning_rate": 8.198016631507053e-06, + "loss": 0.5722, + "step": 4747 + }, + { + "epoch": 0.3, + "grad_norm": 0.9439212679862976, + "learning_rate": 8.19722787757198e-06, + "loss": 0.5788, + "step": 4748 + }, + { + "epoch": 0.3, + "grad_norm": 0.929137647151947, + "learning_rate": 8.196438989012765e-06, + "loss": 0.5959, + "step": 4749 + }, + { + "epoch": 0.3, + "grad_norm": 0.8944662809371948, + "learning_rate": 8.195649965862622e-06, + "loss": 0.5926, + "step": 4750 + }, + { + "epoch": 0.3, + "grad_norm": 0.910987138748169, + "learning_rate": 8.194860808154778e-06, + "loss": 0.5805, + "step": 4751 + }, + { + "epoch": 0.3, + "grad_norm": 0.8900378346443176, + "learning_rate": 8.194071515922456e-06, + "loss": 0.5787, + "step": 4752 + }, + { + "epoch": 0.3, + "grad_norm": 0.9534246325492859, + "learning_rate": 8.193282089198897e-06, + "loss": 0.6274, + "step": 4753 + }, + { + "epoch": 0.3, + "grad_norm": 0.9117621779441833, + "learning_rate": 8.192492528017337e-06, + "loss": 0.6155, + "step": 4754 + }, + { + "epoch": 0.3, + "grad_norm": 0.830488383769989, + "learning_rate": 8.191702832411023e-06, + "loss": 0.5826, + "step": 4755 + }, + { + "epoch": 0.3, + "grad_norm": 0.9440089464187622, + "learning_rate": 8.190913002413204e-06, + "loss": 0.652, + "step": 4756 + }, + { + "epoch": 0.3, + "grad_norm": 0.9013427495956421, + "learning_rate": 8.19012303805714e-06, + "loss": 0.6266, + "step": 4757 + }, + { + "epoch": 0.3, + "grad_norm": 0.8950908184051514, + "learning_rate": 8.189332939376092e-06, + "loss": 0.6494, + "step": 4758 + }, + { + "epoch": 0.3, + "grad_norm": 0.8194960951805115, + "learning_rate": 8.188542706403331e-06, + "loss": 0.5597, + "step": 4759 + }, + { + "epoch": 0.3, + "grad_norm": 0.8966452479362488, + "learning_rate": 8.187752339172126e-06, + "loss": 0.5973, + "step": 4760 + }, + { + "epoch": 0.3, + "grad_norm": 0.8810895681381226, + "learning_rate": 8.18696183771576e-06, + "loss": 0.6343, + "step": 4761 + }, + { + "epoch": 0.3, + "grad_norm": 0.9110032916069031, + "learning_rate": 8.18617120206752e-06, + "loss": 0.6262, + "step": 4762 + }, + { + "epoch": 0.3, + "grad_norm": 0.8125797510147095, + "learning_rate": 8.185380432260693e-06, + "loss": 0.5832, + "step": 4763 + }, + { + "epoch": 0.3, + "grad_norm": 0.9191034436225891, + "learning_rate": 8.184589528328576e-06, + "loss": 0.6396, + "step": 4764 + }, + { + "epoch": 0.3, + "grad_norm": 0.910497784614563, + "learning_rate": 8.183798490304473e-06, + "loss": 0.6085, + "step": 4765 + }, + { + "epoch": 0.3, + "grad_norm": 0.9907393455505371, + "learning_rate": 8.183007318221691e-06, + "loss": 0.6692, + "step": 4766 + }, + { + "epoch": 0.3, + "grad_norm": 0.897619903087616, + "learning_rate": 8.182216012113543e-06, + "loss": 0.5841, + "step": 4767 + }, + { + "epoch": 0.3, + "grad_norm": 0.8304966688156128, + "learning_rate": 8.181424572013352e-06, + "loss": 0.5711, + "step": 4768 + }, + { + "epoch": 0.3, + "grad_norm": 0.8721338510513306, + "learning_rate": 8.180632997954437e-06, + "loss": 0.5515, + "step": 4769 + }, + { + "epoch": 0.3, + "grad_norm": 0.9142031073570251, + "learning_rate": 8.179841289970132e-06, + "loss": 0.6018, + "step": 4770 + }, + { + "epoch": 0.3, + "grad_norm": 0.8813204765319824, + "learning_rate": 8.179049448093771e-06, + "loss": 0.6077, + "step": 4771 + }, + { + "epoch": 0.3, + "grad_norm": 0.8773213624954224, + "learning_rate": 8.178257472358697e-06, + "loss": 0.6325, + "step": 4772 + }, + { + "epoch": 0.3, + "grad_norm": 0.936450183391571, + "learning_rate": 8.177465362798259e-06, + "loss": 0.6321, + "step": 4773 + }, + { + "epoch": 0.3, + "grad_norm": 0.898252010345459, + "learning_rate": 8.176673119445807e-06, + "loss": 0.6171, + "step": 4774 + }, + { + "epoch": 0.3, + "grad_norm": 0.8829185366630554, + "learning_rate": 8.1758807423347e-06, + "loss": 0.6601, + "step": 4775 + }, + { + "epoch": 0.3, + "grad_norm": 0.8611942529678345, + "learning_rate": 8.175088231498304e-06, + "loss": 0.6078, + "step": 4776 + }, + { + "epoch": 0.3, + "grad_norm": 0.9188866019248962, + "learning_rate": 8.174295586969987e-06, + "loss": 0.5632, + "step": 4777 + }, + { + "epoch": 0.3, + "grad_norm": 0.9207391142845154, + "learning_rate": 8.173502808783127e-06, + "loss": 0.6337, + "step": 4778 + }, + { + "epoch": 0.3, + "grad_norm": 0.8784085512161255, + "learning_rate": 8.172709896971103e-06, + "loss": 0.5728, + "step": 4779 + }, + { + "epoch": 0.3, + "grad_norm": 0.9227593541145325, + "learning_rate": 8.1719168515673e-06, + "loss": 0.6868, + "step": 4780 + }, + { + "epoch": 0.3, + "grad_norm": 0.9259268045425415, + "learning_rate": 8.171123672605116e-06, + "loss": 0.6095, + "step": 4781 + }, + { + "epoch": 0.3, + "grad_norm": 0.8867712020874023, + "learning_rate": 8.170330360117944e-06, + "loss": 0.6493, + "step": 4782 + }, + { + "epoch": 0.3, + "grad_norm": 0.9128400087356567, + "learning_rate": 8.169536914139189e-06, + "loss": 0.5594, + "step": 4783 + }, + { + "epoch": 0.3, + "grad_norm": 0.9003540873527527, + "learning_rate": 8.168743334702262e-06, + "loss": 0.5926, + "step": 4784 + }, + { + "epoch": 0.3, + "grad_norm": 0.9833676218986511, + "learning_rate": 8.167949621840576e-06, + "loss": 0.621, + "step": 4785 + }, + { + "epoch": 0.3, + "grad_norm": 0.865376889705658, + "learning_rate": 8.16715577558755e-06, + "loss": 0.5874, + "step": 4786 + }, + { + "epoch": 0.3, + "grad_norm": 0.8381644487380981, + "learning_rate": 8.166361795976614e-06, + "loss": 0.6214, + "step": 4787 + }, + { + "epoch": 0.3, + "grad_norm": 0.9137545228004456, + "learning_rate": 8.165567683041197e-06, + "loss": 0.6128, + "step": 4788 + }, + { + "epoch": 0.3, + "grad_norm": 0.8159583210945129, + "learning_rate": 8.164773436814736e-06, + "loss": 0.5778, + "step": 4789 + }, + { + "epoch": 0.3, + "grad_norm": 0.9791309833526611, + "learning_rate": 8.163979057330677e-06, + "loss": 0.598, + "step": 4790 + }, + { + "epoch": 0.3, + "grad_norm": 0.8282786011695862, + "learning_rate": 8.163184544622467e-06, + "loss": 0.4893, + "step": 4791 + }, + { + "epoch": 0.3, + "grad_norm": 0.9099088907241821, + "learning_rate": 8.162389898723558e-06, + "loss": 0.6035, + "step": 4792 + }, + { + "epoch": 0.3, + "grad_norm": 0.8515049815177917, + "learning_rate": 8.161595119667413e-06, + "loss": 0.582, + "step": 4793 + }, + { + "epoch": 0.3, + "grad_norm": 0.9349850416183472, + "learning_rate": 8.160800207487495e-06, + "loss": 0.6135, + "step": 4794 + }, + { + "epoch": 0.3, + "grad_norm": 0.848458468914032, + "learning_rate": 8.160005162217275e-06, + "loss": 0.6004, + "step": 4795 + }, + { + "epoch": 0.3, + "grad_norm": 0.8345276117324829, + "learning_rate": 8.159209983890232e-06, + "loss": 0.5956, + "step": 4796 + }, + { + "epoch": 0.3, + "grad_norm": 0.8990775942802429, + "learning_rate": 8.158414672539845e-06, + "loss": 0.6247, + "step": 4797 + }, + { + "epoch": 0.3, + "grad_norm": 0.8795309662818909, + "learning_rate": 8.157619228199605e-06, + "loss": 0.5965, + "step": 4798 + }, + { + "epoch": 0.3, + "grad_norm": 0.9947782158851624, + "learning_rate": 8.156823650903003e-06, + "loss": 0.6742, + "step": 4799 + }, + { + "epoch": 0.3, + "grad_norm": 0.8884429931640625, + "learning_rate": 8.156027940683539e-06, + "loss": 0.5757, + "step": 4800 + }, + { + "epoch": 0.3, + "grad_norm": 0.888424277305603, + "learning_rate": 8.15523209757472e-06, + "loss": 0.6723, + "step": 4801 + }, + { + "epoch": 0.3, + "grad_norm": 0.9217067360877991, + "learning_rate": 8.15443612161005e-06, + "loss": 0.6168, + "step": 4802 + }, + { + "epoch": 0.3, + "grad_norm": 0.8976277709007263, + "learning_rate": 8.15364001282305e-06, + "loss": 0.6127, + "step": 4803 + }, + { + "epoch": 0.3, + "grad_norm": 0.8980615735054016, + "learning_rate": 8.15284377124724e-06, + "loss": 0.5577, + "step": 4804 + }, + { + "epoch": 0.3, + "grad_norm": 0.8790192008018494, + "learning_rate": 8.152047396916145e-06, + "loss": 0.5193, + "step": 4805 + }, + { + "epoch": 0.3, + "grad_norm": 0.9707584977149963, + "learning_rate": 8.1512508898633e-06, + "loss": 0.6459, + "step": 4806 + }, + { + "epoch": 0.3, + "grad_norm": 0.8137477040290833, + "learning_rate": 8.150454250122245e-06, + "loss": 0.585, + "step": 4807 + }, + { + "epoch": 0.3, + "grad_norm": 0.8875191807746887, + "learning_rate": 8.149657477726518e-06, + "loss": 0.581, + "step": 4808 + }, + { + "epoch": 0.3, + "grad_norm": 0.8823238611221313, + "learning_rate": 8.148860572709674e-06, + "loss": 0.6095, + "step": 4809 + }, + { + "epoch": 0.3, + "grad_norm": 0.8552770018577576, + "learning_rate": 8.148063535105261e-06, + "loss": 0.596, + "step": 4810 + }, + { + "epoch": 0.3, + "grad_norm": 0.9799924492835999, + "learning_rate": 8.147266364946848e-06, + "loss": 0.6214, + "step": 4811 + }, + { + "epoch": 0.3, + "grad_norm": 0.9276837706565857, + "learning_rate": 8.146469062267995e-06, + "loss": 0.6281, + "step": 4812 + }, + { + "epoch": 0.3, + "grad_norm": 0.9394620656967163, + "learning_rate": 8.145671627102277e-06, + "loss": 0.606, + "step": 4813 + }, + { + "epoch": 0.3, + "grad_norm": 0.8922251462936401, + "learning_rate": 8.14487405948327e-06, + "loss": 0.5859, + "step": 4814 + }, + { + "epoch": 0.31, + "grad_norm": 0.8474637866020203, + "learning_rate": 8.144076359444555e-06, + "loss": 0.5582, + "step": 4815 + }, + { + "epoch": 0.31, + "grad_norm": 0.9072783589363098, + "learning_rate": 8.143278527019722e-06, + "loss": 0.6062, + "step": 4816 + }, + { + "epoch": 0.31, + "grad_norm": 0.9250416159629822, + "learning_rate": 8.142480562242365e-06, + "loss": 0.6103, + "step": 4817 + }, + { + "epoch": 0.31, + "grad_norm": 0.9013091325759888, + "learning_rate": 8.141682465146084e-06, + "loss": 0.6005, + "step": 4818 + }, + { + "epoch": 0.31, + "grad_norm": 0.8904187083244324, + "learning_rate": 8.140884235764484e-06, + "loss": 0.6026, + "step": 4819 + }, + { + "epoch": 0.31, + "grad_norm": 0.884691596031189, + "learning_rate": 8.140085874131174e-06, + "loss": 0.5996, + "step": 4820 + }, + { + "epoch": 0.31, + "grad_norm": 0.8515493869781494, + "learning_rate": 8.139287380279773e-06, + "loss": 0.6056, + "step": 4821 + }, + { + "epoch": 0.31, + "grad_norm": 0.9234597086906433, + "learning_rate": 8.138488754243899e-06, + "loss": 0.6553, + "step": 4822 + }, + { + "epoch": 0.31, + "grad_norm": 0.9508641958236694, + "learning_rate": 8.137689996057183e-06, + "loss": 0.6402, + "step": 4823 + }, + { + "epoch": 0.31, + "grad_norm": 0.8529036641120911, + "learning_rate": 8.136891105753258e-06, + "loss": 0.5396, + "step": 4824 + }, + { + "epoch": 0.31, + "grad_norm": 0.888896107673645, + "learning_rate": 8.136092083365758e-06, + "loss": 0.6194, + "step": 4825 + }, + { + "epoch": 0.31, + "grad_norm": 0.8955714106559753, + "learning_rate": 8.13529292892833e-06, + "loss": 0.6351, + "step": 4826 + }, + { + "epoch": 0.31, + "grad_norm": 0.89292311668396, + "learning_rate": 8.134493642474625e-06, + "loss": 0.6317, + "step": 4827 + }, + { + "epoch": 0.31, + "grad_norm": 0.8899608850479126, + "learning_rate": 8.133694224038297e-06, + "loss": 0.5935, + "step": 4828 + }, + { + "epoch": 0.31, + "grad_norm": 0.8758254051208496, + "learning_rate": 8.132894673653007e-06, + "loss": 0.6203, + "step": 4829 + }, + { + "epoch": 0.31, + "grad_norm": 0.8891094326972961, + "learning_rate": 8.13209499135242e-06, + "loss": 0.5985, + "step": 4830 + }, + { + "epoch": 0.31, + "grad_norm": 0.9486083984375, + "learning_rate": 8.131295177170208e-06, + "loss": 0.6333, + "step": 4831 + }, + { + "epoch": 0.31, + "grad_norm": 0.9144448041915894, + "learning_rate": 8.13049523114005e-06, + "loss": 0.6241, + "step": 4832 + }, + { + "epoch": 0.31, + "grad_norm": 0.8380624651908875, + "learning_rate": 8.129695153295627e-06, + "loss": 0.5578, + "step": 4833 + }, + { + "epoch": 0.31, + "grad_norm": 0.8630735278129578, + "learning_rate": 8.128894943670631e-06, + "loss": 0.5934, + "step": 4834 + }, + { + "epoch": 0.31, + "grad_norm": 0.8654153943061829, + "learning_rate": 8.12809460229875e-06, + "loss": 0.5767, + "step": 4835 + }, + { + "epoch": 0.31, + "grad_norm": 0.8959805369377136, + "learning_rate": 8.127294129213691e-06, + "loss": 0.6212, + "step": 4836 + }, + { + "epoch": 0.31, + "grad_norm": 0.8940380215644836, + "learning_rate": 8.126493524449153e-06, + "loss": 0.6261, + "step": 4837 + }, + { + "epoch": 0.31, + "grad_norm": 0.946277916431427, + "learning_rate": 8.12569278803885e-06, + "loss": 0.617, + "step": 4838 + }, + { + "epoch": 0.31, + "grad_norm": 0.9186848998069763, + "learning_rate": 8.124891920016495e-06, + "loss": 0.6387, + "step": 4839 + }, + { + "epoch": 0.31, + "grad_norm": 0.9961265325546265, + "learning_rate": 8.124090920415814e-06, + "loss": 0.6452, + "step": 4840 + }, + { + "epoch": 0.31, + "grad_norm": 0.8689594268798828, + "learning_rate": 8.123289789270532e-06, + "loss": 0.6492, + "step": 4841 + }, + { + "epoch": 0.31, + "grad_norm": 0.9431710243225098, + "learning_rate": 8.12248852661438e-06, + "loss": 0.639, + "step": 4842 + }, + { + "epoch": 0.31, + "grad_norm": 0.8190953731536865, + "learning_rate": 8.121687132481101e-06, + "loss": 0.5752, + "step": 4843 + }, + { + "epoch": 0.31, + "grad_norm": 0.828509509563446, + "learning_rate": 8.120885606904436e-06, + "loss": 0.5913, + "step": 4844 + }, + { + "epoch": 0.31, + "grad_norm": 0.8336859345436096, + "learning_rate": 8.120083949918137e-06, + "loss": 0.5577, + "step": 4845 + }, + { + "epoch": 0.31, + "grad_norm": 0.8060721158981323, + "learning_rate": 8.119282161555952e-06, + "loss": 0.5642, + "step": 4846 + }, + { + "epoch": 0.31, + "grad_norm": 0.9661688208580017, + "learning_rate": 8.11848024185165e-06, + "loss": 0.6525, + "step": 4847 + }, + { + "epoch": 0.31, + "grad_norm": 0.8893968462944031, + "learning_rate": 8.117678190838991e-06, + "loss": 0.6042, + "step": 4848 + }, + { + "epoch": 0.31, + "grad_norm": 0.8722717761993408, + "learning_rate": 8.116876008551751e-06, + "loss": 0.5784, + "step": 4849 + }, + { + "epoch": 0.31, + "grad_norm": 0.8579627275466919, + "learning_rate": 8.116073695023704e-06, + "loss": 0.6056, + "step": 4850 + }, + { + "epoch": 0.31, + "grad_norm": 0.9025737643241882, + "learning_rate": 8.115271250288635e-06, + "loss": 0.5785, + "step": 4851 + }, + { + "epoch": 0.31, + "grad_norm": 0.9787115454673767, + "learning_rate": 8.114468674380328e-06, + "loss": 0.6641, + "step": 4852 + }, + { + "epoch": 0.31, + "grad_norm": 0.9400716423988342, + "learning_rate": 8.113665967332582e-06, + "loss": 0.604, + "step": 4853 + }, + { + "epoch": 0.31, + "grad_norm": 0.8718861937522888, + "learning_rate": 8.112863129179194e-06, + "loss": 0.644, + "step": 4854 + }, + { + "epoch": 0.31, + "grad_norm": 0.9167654514312744, + "learning_rate": 8.112060159953966e-06, + "loss": 0.6431, + "step": 4855 + }, + { + "epoch": 0.31, + "grad_norm": 0.9498146176338196, + "learning_rate": 8.111257059690714e-06, + "loss": 0.6549, + "step": 4856 + }, + { + "epoch": 0.31, + "grad_norm": 0.9236502051353455, + "learning_rate": 8.110453828423248e-06, + "loss": 0.6302, + "step": 4857 + }, + { + "epoch": 0.31, + "grad_norm": 0.9004657864570618, + "learning_rate": 8.109650466185394e-06, + "loss": 0.6053, + "step": 4858 + }, + { + "epoch": 0.31, + "grad_norm": 0.8638118505477905, + "learning_rate": 8.108846973010975e-06, + "loss": 0.558, + "step": 4859 + }, + { + "epoch": 0.31, + "grad_norm": 0.8778232932090759, + "learning_rate": 8.108043348933825e-06, + "loss": 0.626, + "step": 4860 + }, + { + "epoch": 0.31, + "grad_norm": 0.9175794720649719, + "learning_rate": 8.107239593987781e-06, + "loss": 0.5887, + "step": 4861 + }, + { + "epoch": 0.31, + "grad_norm": 0.9042779207229614, + "learning_rate": 8.10643570820669e-06, + "loss": 0.6235, + "step": 4862 + }, + { + "epoch": 0.31, + "grad_norm": 0.9080462455749512, + "learning_rate": 8.105631691624394e-06, + "loss": 0.6401, + "step": 4863 + }, + { + "epoch": 0.31, + "grad_norm": 0.9129647016525269, + "learning_rate": 8.104827544274754e-06, + "loss": 0.7069, + "step": 4864 + }, + { + "epoch": 0.31, + "grad_norm": 0.876524806022644, + "learning_rate": 8.104023266191625e-06, + "loss": 0.5799, + "step": 4865 + }, + { + "epoch": 0.31, + "grad_norm": 0.9011818766593933, + "learning_rate": 8.103218857408875e-06, + "loss": 0.6213, + "step": 4866 + }, + { + "epoch": 0.31, + "grad_norm": 0.9621694684028625, + "learning_rate": 8.102414317960373e-06, + "loss": 0.6267, + "step": 4867 + }, + { + "epoch": 0.31, + "grad_norm": 0.883078932762146, + "learning_rate": 8.10160964788e-06, + "loss": 0.5822, + "step": 4868 + }, + { + "epoch": 0.31, + "grad_norm": 0.9894744157791138, + "learning_rate": 8.100804847201632e-06, + "loss": 0.6117, + "step": 4869 + }, + { + "epoch": 0.31, + "grad_norm": 0.9744158983230591, + "learning_rate": 8.09999991595916e-06, + "loss": 0.5913, + "step": 4870 + }, + { + "epoch": 0.31, + "grad_norm": 0.9933215379714966, + "learning_rate": 8.099194854186475e-06, + "loss": 0.6645, + "step": 4871 + }, + { + "epoch": 0.31, + "grad_norm": 0.8537378907203674, + "learning_rate": 8.098389661917475e-06, + "loss": 0.5375, + "step": 4872 + }, + { + "epoch": 0.31, + "grad_norm": 0.9022486209869385, + "learning_rate": 8.097584339186066e-06, + "loss": 0.5917, + "step": 4873 + }, + { + "epoch": 0.31, + "grad_norm": 0.8206536173820496, + "learning_rate": 8.096778886026155e-06, + "loss": 0.6281, + "step": 4874 + }, + { + "epoch": 0.31, + "grad_norm": 0.9026719927787781, + "learning_rate": 8.09597330247166e-06, + "loss": 0.5709, + "step": 4875 + }, + { + "epoch": 0.31, + "grad_norm": 0.8792065978050232, + "learning_rate": 8.095167588556498e-06, + "loss": 0.6622, + "step": 4876 + }, + { + "epoch": 0.31, + "grad_norm": 0.938779890537262, + "learning_rate": 8.094361744314597e-06, + "loss": 0.6345, + "step": 4877 + }, + { + "epoch": 0.31, + "grad_norm": 0.84425288438797, + "learning_rate": 8.093555769779887e-06, + "loss": 0.5552, + "step": 4878 + }, + { + "epoch": 0.31, + "grad_norm": 0.9458581209182739, + "learning_rate": 8.092749664986304e-06, + "loss": 0.639, + "step": 4879 + }, + { + "epoch": 0.31, + "grad_norm": 0.8601045608520508, + "learning_rate": 8.091943429967792e-06, + "loss": 0.5757, + "step": 4880 + }, + { + "epoch": 0.31, + "grad_norm": 0.9777496457099915, + "learning_rate": 8.0911370647583e-06, + "loss": 0.6873, + "step": 4881 + }, + { + "epoch": 0.31, + "grad_norm": 0.830226480960846, + "learning_rate": 8.090330569391778e-06, + "loss": 0.5941, + "step": 4882 + }, + { + "epoch": 0.31, + "grad_norm": 0.9674537777900696, + "learning_rate": 8.089523943902187e-06, + "loss": 0.6356, + "step": 4883 + }, + { + "epoch": 0.31, + "grad_norm": 0.9554563164710999, + "learning_rate": 8.088717188323489e-06, + "loss": 0.6701, + "step": 4884 + }, + { + "epoch": 0.31, + "grad_norm": 0.8986421823501587, + "learning_rate": 8.087910302689656e-06, + "loss": 0.6009, + "step": 4885 + }, + { + "epoch": 0.31, + "grad_norm": 0.8413382172584534, + "learning_rate": 8.087103287034664e-06, + "loss": 0.6183, + "step": 4886 + }, + { + "epoch": 0.31, + "grad_norm": 0.8769293427467346, + "learning_rate": 8.086296141392489e-06, + "loss": 0.5598, + "step": 4887 + }, + { + "epoch": 0.31, + "grad_norm": 0.8853359818458557, + "learning_rate": 8.08548886579712e-06, + "loss": 0.6184, + "step": 4888 + }, + { + "epoch": 0.31, + "grad_norm": 0.9408413171768188, + "learning_rate": 8.08468146028255e-06, + "loss": 0.5866, + "step": 4889 + }, + { + "epoch": 0.31, + "grad_norm": 0.8464492559432983, + "learning_rate": 8.083873924882775e-06, + "loss": 0.5744, + "step": 4890 + }, + { + "epoch": 0.31, + "grad_norm": 0.9093937873840332, + "learning_rate": 8.083066259631796e-06, + "loss": 0.6079, + "step": 4891 + }, + { + "epoch": 0.31, + "grad_norm": 0.9300260543823242, + "learning_rate": 8.082258464563621e-06, + "loss": 0.6214, + "step": 4892 + }, + { + "epoch": 0.31, + "grad_norm": 0.8799288272857666, + "learning_rate": 8.081450539712266e-06, + "loss": 0.5775, + "step": 4893 + }, + { + "epoch": 0.31, + "grad_norm": 0.9392613768577576, + "learning_rate": 8.080642485111747e-06, + "loss": 0.5812, + "step": 4894 + }, + { + "epoch": 0.31, + "grad_norm": 0.8481096625328064, + "learning_rate": 8.07983430079609e-06, + "loss": 0.5847, + "step": 4895 + }, + { + "epoch": 0.31, + "grad_norm": 0.8689022660255432, + "learning_rate": 8.079025986799326e-06, + "loss": 0.5681, + "step": 4896 + }, + { + "epoch": 0.31, + "grad_norm": 0.8942854404449463, + "learning_rate": 8.078217543155488e-06, + "loss": 0.5555, + "step": 4897 + }, + { + "epoch": 0.31, + "grad_norm": 0.9350181818008423, + "learning_rate": 8.077408969898619e-06, + "loss": 0.6732, + "step": 4898 + }, + { + "epoch": 0.31, + "grad_norm": 0.8985404372215271, + "learning_rate": 8.076600267062761e-06, + "loss": 0.6312, + "step": 4899 + }, + { + "epoch": 0.31, + "grad_norm": 0.9145780801773071, + "learning_rate": 8.07579143468197e-06, + "loss": 0.6166, + "step": 4900 + }, + { + "epoch": 0.31, + "grad_norm": 0.8609732389450073, + "learning_rate": 8.074982472790302e-06, + "loss": 0.5519, + "step": 4901 + }, + { + "epoch": 0.31, + "grad_norm": 0.9401060938835144, + "learning_rate": 8.074173381421819e-06, + "loss": 0.6135, + "step": 4902 + }, + { + "epoch": 0.31, + "grad_norm": 0.8980786800384521, + "learning_rate": 8.073364160610589e-06, + "loss": 0.578, + "step": 4903 + }, + { + "epoch": 0.31, + "grad_norm": 0.8506133556365967, + "learning_rate": 8.072554810390685e-06, + "loss": 0.5842, + "step": 4904 + }, + { + "epoch": 0.31, + "grad_norm": 0.9556955099105835, + "learning_rate": 8.071745330796187e-06, + "loss": 0.6877, + "step": 4905 + }, + { + "epoch": 0.31, + "grad_norm": 0.8503575921058655, + "learning_rate": 8.070935721861178e-06, + "loss": 0.5922, + "step": 4906 + }, + { + "epoch": 0.31, + "grad_norm": 0.8888681530952454, + "learning_rate": 8.07012598361975e-06, + "loss": 0.6101, + "step": 4907 + }, + { + "epoch": 0.31, + "grad_norm": 0.8827106952667236, + "learning_rate": 8.069316116105996e-06, + "loss": 0.6722, + "step": 4908 + }, + { + "epoch": 0.31, + "grad_norm": 0.8604966998100281, + "learning_rate": 8.068506119354019e-06, + "loss": 0.566, + "step": 4909 + }, + { + "epoch": 0.31, + "grad_norm": 0.9307197332382202, + "learning_rate": 8.067695993397923e-06, + "loss": 0.6324, + "step": 4910 + }, + { + "epoch": 0.31, + "grad_norm": 0.8086503148078918, + "learning_rate": 8.066885738271821e-06, + "loss": 0.5555, + "step": 4911 + }, + { + "epoch": 0.31, + "grad_norm": 0.8632538914680481, + "learning_rate": 8.06607535400983e-06, + "loss": 0.5949, + "step": 4912 + }, + { + "epoch": 0.31, + "grad_norm": 0.893225908279419, + "learning_rate": 8.06526484064607e-06, + "loss": 0.5895, + "step": 4913 + }, + { + "epoch": 0.31, + "grad_norm": 0.9265469908714294, + "learning_rate": 8.064454198214673e-06, + "loss": 0.6288, + "step": 4914 + }, + { + "epoch": 0.31, + "grad_norm": 0.9373133778572083, + "learning_rate": 8.063643426749769e-06, + "loss": 0.6299, + "step": 4915 + }, + { + "epoch": 0.31, + "grad_norm": 0.9107393622398376, + "learning_rate": 8.062832526285498e-06, + "loss": 0.634, + "step": 4916 + }, + { + "epoch": 0.31, + "grad_norm": 0.9622877836227417, + "learning_rate": 8.062021496856004e-06, + "loss": 0.6507, + "step": 4917 + }, + { + "epoch": 0.31, + "grad_norm": 0.9220041632652283, + "learning_rate": 8.061210338495437e-06, + "loss": 0.6477, + "step": 4918 + }, + { + "epoch": 0.31, + "grad_norm": 0.8224441409111023, + "learning_rate": 8.060399051237952e-06, + "loss": 0.65, + "step": 4919 + }, + { + "epoch": 0.31, + "grad_norm": 0.8881222605705261, + "learning_rate": 8.059587635117709e-06, + "loss": 0.5975, + "step": 4920 + }, + { + "epoch": 0.31, + "grad_norm": 0.8618130683898926, + "learning_rate": 8.058776090168874e-06, + "loss": 0.5906, + "step": 4921 + }, + { + "epoch": 0.31, + "grad_norm": 0.8412930369377136, + "learning_rate": 8.057964416425618e-06, + "loss": 0.5719, + "step": 4922 + }, + { + "epoch": 0.31, + "grad_norm": 0.9025030136108398, + "learning_rate": 8.05715261392212e-06, + "loss": 0.5403, + "step": 4923 + }, + { + "epoch": 0.31, + "grad_norm": 0.8365161418914795, + "learning_rate": 8.05634068269256e-06, + "loss": 0.5598, + "step": 4924 + }, + { + "epoch": 0.31, + "grad_norm": 0.8508699536323547, + "learning_rate": 8.055528622771124e-06, + "loss": 0.6019, + "step": 4925 + }, + { + "epoch": 0.31, + "grad_norm": 0.8714786171913147, + "learning_rate": 8.05471643419201e-06, + "loss": 0.5831, + "step": 4926 + }, + { + "epoch": 0.31, + "grad_norm": 0.9067984819412231, + "learning_rate": 8.053904116989413e-06, + "loss": 0.6098, + "step": 4927 + }, + { + "epoch": 0.31, + "grad_norm": 0.7827737927436829, + "learning_rate": 8.053091671197537e-06, + "loss": 0.5421, + "step": 4928 + }, + { + "epoch": 0.31, + "grad_norm": 0.8685954809188843, + "learning_rate": 8.052279096850591e-06, + "loss": 0.6027, + "step": 4929 + }, + { + "epoch": 0.31, + "grad_norm": 0.8910870552062988, + "learning_rate": 8.051466393982792e-06, + "loss": 0.6068, + "step": 4930 + }, + { + "epoch": 0.31, + "grad_norm": 0.8768137693405151, + "learning_rate": 8.050653562628356e-06, + "loss": 0.6151, + "step": 4931 + }, + { + "epoch": 0.31, + "grad_norm": 0.9031566977500916, + "learning_rate": 8.049840602821512e-06, + "loss": 0.6637, + "step": 4932 + }, + { + "epoch": 0.31, + "grad_norm": 1.0350744724273682, + "learning_rate": 8.04902751459649e-06, + "loss": 0.606, + "step": 4933 + }, + { + "epoch": 0.31, + "grad_norm": 0.848858118057251, + "learning_rate": 8.048214297987526e-06, + "loss": 0.5559, + "step": 4934 + }, + { + "epoch": 0.31, + "grad_norm": 0.8456379771232605, + "learning_rate": 8.047400953028863e-06, + "loss": 0.569, + "step": 4935 + }, + { + "epoch": 0.31, + "grad_norm": 0.8597366213798523, + "learning_rate": 8.046587479754746e-06, + "loss": 0.5696, + "step": 4936 + }, + { + "epoch": 0.31, + "grad_norm": 0.9166806936264038, + "learning_rate": 8.04577387819943e-06, + "loss": 0.659, + "step": 4937 + }, + { + "epoch": 0.31, + "grad_norm": 0.8950727581977844, + "learning_rate": 8.044960148397168e-06, + "loss": 0.6182, + "step": 4938 + }, + { + "epoch": 0.31, + "grad_norm": 0.9122840166091919, + "learning_rate": 8.04414629038223e-06, + "loss": 0.6245, + "step": 4939 + }, + { + "epoch": 0.31, + "grad_norm": 0.8276764750480652, + "learning_rate": 8.04333230418888e-06, + "loss": 0.5669, + "step": 4940 + }, + { + "epoch": 0.31, + "grad_norm": 0.9038193821907043, + "learning_rate": 8.042518189851394e-06, + "loss": 0.5997, + "step": 4941 + }, + { + "epoch": 0.31, + "grad_norm": 0.894939124584198, + "learning_rate": 8.04170394740405e-06, + "loss": 0.6326, + "step": 4942 + }, + { + "epoch": 0.31, + "grad_norm": 0.8787042498588562, + "learning_rate": 8.040889576881136e-06, + "loss": 0.6027, + "step": 4943 + }, + { + "epoch": 0.31, + "grad_norm": 0.8947983980178833, + "learning_rate": 8.04007507831694e-06, + "loss": 0.6459, + "step": 4944 + }, + { + "epoch": 0.31, + "grad_norm": 0.8807556629180908, + "learning_rate": 8.039260451745758e-06, + "loss": 0.5848, + "step": 4945 + }, + { + "epoch": 0.31, + "grad_norm": 0.8896877765655518, + "learning_rate": 8.03844569720189e-06, + "loss": 0.6028, + "step": 4946 + }, + { + "epoch": 0.31, + "grad_norm": 0.872142493724823, + "learning_rate": 8.037630814719644e-06, + "loss": 0.5868, + "step": 4947 + }, + { + "epoch": 0.31, + "grad_norm": 0.808314859867096, + "learning_rate": 8.036815804333334e-06, + "loss": 0.5896, + "step": 4948 + }, + { + "epoch": 0.31, + "grad_norm": 0.8907493948936462, + "learning_rate": 8.036000666077273e-06, + "loss": 0.606, + "step": 4949 + }, + { + "epoch": 0.31, + "grad_norm": 0.8417367339134216, + "learning_rate": 8.035185399985784e-06, + "loss": 0.5841, + "step": 4950 + }, + { + "epoch": 0.31, + "grad_norm": 0.8830011487007141, + "learning_rate": 8.034370006093198e-06, + "loss": 0.6737, + "step": 4951 + }, + { + "epoch": 0.31, + "grad_norm": 1.0002917051315308, + "learning_rate": 8.033554484433848e-06, + "loss": 0.587, + "step": 4952 + }, + { + "epoch": 0.31, + "grad_norm": 0.9197138547897339, + "learning_rate": 8.032738835042068e-06, + "loss": 0.6633, + "step": 4953 + }, + { + "epoch": 0.31, + "grad_norm": 0.889056384563446, + "learning_rate": 8.031923057952208e-06, + "loss": 0.5941, + "step": 4954 + }, + { + "epoch": 0.31, + "grad_norm": 0.8778578042984009, + "learning_rate": 8.031107153198617e-06, + "loss": 0.6132, + "step": 4955 + }, + { + "epoch": 0.31, + "grad_norm": 0.925252377986908, + "learning_rate": 8.030291120815647e-06, + "loss": 0.6115, + "step": 4956 + }, + { + "epoch": 0.31, + "grad_norm": 0.9050502777099609, + "learning_rate": 8.029474960837657e-06, + "loss": 0.605, + "step": 4957 + }, + { + "epoch": 0.31, + "grad_norm": 0.8806825280189514, + "learning_rate": 8.028658673299019e-06, + "loss": 0.6294, + "step": 4958 + }, + { + "epoch": 0.31, + "grad_norm": 0.8301826119422913, + "learning_rate": 8.027842258234097e-06, + "loss": 0.5667, + "step": 4959 + }, + { + "epoch": 0.31, + "grad_norm": 0.9355791211128235, + "learning_rate": 8.027025715677273e-06, + "loss": 0.6805, + "step": 4960 + }, + { + "epoch": 0.31, + "grad_norm": 0.9568033814430237, + "learning_rate": 8.026209045662925e-06, + "loss": 0.6433, + "step": 4961 + }, + { + "epoch": 0.31, + "grad_norm": 0.8783117532730103, + "learning_rate": 8.025392248225444e-06, + "loss": 0.581, + "step": 4962 + }, + { + "epoch": 0.31, + "grad_norm": 0.8595120906829834, + "learning_rate": 8.024575323399217e-06, + "loss": 0.5877, + "step": 4963 + }, + { + "epoch": 0.31, + "grad_norm": 0.9189950823783875, + "learning_rate": 8.023758271218646e-06, + "loss": 0.6188, + "step": 4964 + }, + { + "epoch": 0.31, + "grad_norm": 0.8918676376342773, + "learning_rate": 8.022941091718133e-06, + "loss": 0.6064, + "step": 4965 + }, + { + "epoch": 0.31, + "grad_norm": 0.853366494178772, + "learning_rate": 8.022123784932085e-06, + "loss": 0.6376, + "step": 4966 + }, + { + "epoch": 0.31, + "grad_norm": 0.9495976567268372, + "learning_rate": 8.02130635089492e-06, + "loss": 0.6117, + "step": 4967 + }, + { + "epoch": 0.31, + "grad_norm": 0.9444292187690735, + "learning_rate": 8.020488789641054e-06, + "loss": 0.6688, + "step": 4968 + }, + { + "epoch": 0.31, + "grad_norm": 0.8615371584892273, + "learning_rate": 8.019671101204914e-06, + "loss": 0.5812, + "step": 4969 + }, + { + "epoch": 0.31, + "grad_norm": 0.8364808559417725, + "learning_rate": 8.018853285620926e-06, + "loss": 0.5657, + "step": 4970 + }, + { + "epoch": 0.31, + "grad_norm": 0.9400182962417603, + "learning_rate": 8.018035342923529e-06, + "loss": 0.6043, + "step": 4971 + }, + { + "epoch": 0.32, + "grad_norm": 0.8639470338821411, + "learning_rate": 8.017217273147165e-06, + "loss": 0.5418, + "step": 4972 + }, + { + "epoch": 0.32, + "grad_norm": 0.8635435104370117, + "learning_rate": 8.016399076326275e-06, + "loss": 0.6752, + "step": 4973 + }, + { + "epoch": 0.32, + "grad_norm": 0.974575400352478, + "learning_rate": 8.015580752495314e-06, + "loss": 0.6424, + "step": 4974 + }, + { + "epoch": 0.32, + "grad_norm": 0.9337494969367981, + "learning_rate": 8.014762301688737e-06, + "loss": 0.6497, + "step": 4975 + }, + { + "epoch": 0.32, + "grad_norm": 0.9488426446914673, + "learning_rate": 8.013943723941009e-06, + "loss": 0.5005, + "step": 4976 + }, + { + "epoch": 0.32, + "grad_norm": 0.8811922073364258, + "learning_rate": 8.013125019286594e-06, + "loss": 0.6492, + "step": 4977 + }, + { + "epoch": 0.32, + "grad_norm": 0.8622782230377197, + "learning_rate": 8.012306187759966e-06, + "loss": 0.6216, + "step": 4978 + }, + { + "epoch": 0.32, + "grad_norm": 0.9642921686172485, + "learning_rate": 8.011487229395605e-06, + "loss": 0.6454, + "step": 4979 + }, + { + "epoch": 0.32, + "grad_norm": 0.8489444851875305, + "learning_rate": 8.010668144227991e-06, + "loss": 0.6024, + "step": 4980 + }, + { + "epoch": 0.32, + "grad_norm": 0.9179771542549133, + "learning_rate": 8.009848932291617e-06, + "loss": 0.5687, + "step": 4981 + }, + { + "epoch": 0.32, + "grad_norm": 0.8957446813583374, + "learning_rate": 8.009029593620974e-06, + "loss": 0.5745, + "step": 4982 + }, + { + "epoch": 0.32, + "grad_norm": 0.9170886874198914, + "learning_rate": 8.008210128250563e-06, + "loss": 0.6323, + "step": 4983 + }, + { + "epoch": 0.32, + "grad_norm": 0.8754706382751465, + "learning_rate": 8.007390536214888e-06, + "loss": 0.6169, + "step": 4984 + }, + { + "epoch": 0.32, + "grad_norm": 0.8331484794616699, + "learning_rate": 8.006570817548457e-06, + "loss": 0.5864, + "step": 4985 + }, + { + "epoch": 0.32, + "grad_norm": 0.9085079431533813, + "learning_rate": 8.005750972285793e-06, + "loss": 0.6391, + "step": 4986 + }, + { + "epoch": 0.32, + "grad_norm": 0.8748310208320618, + "learning_rate": 8.004931000461408e-06, + "loss": 0.5741, + "step": 4987 + }, + { + "epoch": 0.32, + "grad_norm": 0.8841165900230408, + "learning_rate": 8.004110902109832e-06, + "loss": 0.6129, + "step": 4988 + }, + { + "epoch": 0.32, + "grad_norm": 0.88663649559021, + "learning_rate": 8.003290677265599e-06, + "loss": 0.6556, + "step": 4989 + }, + { + "epoch": 0.32, + "grad_norm": 0.8917930126190186, + "learning_rate": 8.002470325963241e-06, + "loss": 0.606, + "step": 4990 + }, + { + "epoch": 0.32, + "grad_norm": 0.7967976331710815, + "learning_rate": 8.001649848237303e-06, + "loss": 0.5492, + "step": 4991 + }, + { + "epoch": 0.32, + "grad_norm": 0.8872556090354919, + "learning_rate": 8.000829244122333e-06, + "loss": 0.6114, + "step": 4992 + }, + { + "epoch": 0.32, + "grad_norm": 1.745118498802185, + "learning_rate": 8.00000851365288e-06, + "loss": 0.6433, + "step": 4993 + }, + { + "epoch": 0.32, + "grad_norm": 0.8213765621185303, + "learning_rate": 7.999187656863507e-06, + "loss": 0.5746, + "step": 4994 + }, + { + "epoch": 0.32, + "grad_norm": 0.8834403157234192, + "learning_rate": 7.998366673788775e-06, + "loss": 0.5745, + "step": 4995 + }, + { + "epoch": 0.32, + "grad_norm": 0.9179670214653015, + "learning_rate": 7.997545564463251e-06, + "loss": 0.641, + "step": 4996 + }, + { + "epoch": 0.32, + "grad_norm": 0.9359582662582397, + "learning_rate": 7.996724328921514e-06, + "loss": 0.5567, + "step": 4997 + }, + { + "epoch": 0.32, + "grad_norm": 0.9402004480361938, + "learning_rate": 7.99590296719814e-06, + "loss": 0.638, + "step": 4998 + }, + { + "epoch": 0.32, + "grad_norm": 0.8939769268035889, + "learning_rate": 7.995081479327712e-06, + "loss": 0.6216, + "step": 4999 + }, + { + "epoch": 0.32, + "grad_norm": 0.9075430035591125, + "learning_rate": 7.994259865344822e-06, + "loss": 0.6213, + "step": 5000 + }, + { + "epoch": 0.32, + "grad_norm": 0.9165618419647217, + "learning_rate": 7.993438125284068e-06, + "loss": 0.6449, + "step": 5001 + }, + { + "epoch": 0.32, + "grad_norm": 0.9765704870223999, + "learning_rate": 7.992616259180045e-06, + "loss": 0.615, + "step": 5002 + }, + { + "epoch": 0.32, + "grad_norm": 0.9137974977493286, + "learning_rate": 7.991794267067363e-06, + "loss": 0.6025, + "step": 5003 + }, + { + "epoch": 0.32, + "grad_norm": 0.8844775557518005, + "learning_rate": 7.99097214898063e-06, + "loss": 0.6017, + "step": 5004 + }, + { + "epoch": 0.32, + "grad_norm": 0.9296790361404419, + "learning_rate": 7.99014990495447e-06, + "loss": 0.5938, + "step": 5005 + }, + { + "epoch": 0.32, + "grad_norm": 0.8395243287086487, + "learning_rate": 7.989327535023495e-06, + "loss": 0.6087, + "step": 5006 + }, + { + "epoch": 0.32, + "grad_norm": 0.9407158493995667, + "learning_rate": 7.988505039222339e-06, + "loss": 0.6039, + "step": 5007 + }, + { + "epoch": 0.32, + "grad_norm": 0.9456518292427063, + "learning_rate": 7.987682417585629e-06, + "loss": 0.6272, + "step": 5008 + }, + { + "epoch": 0.32, + "grad_norm": 0.9036068916320801, + "learning_rate": 7.98685967014801e-06, + "loss": 0.6433, + "step": 5009 + }, + { + "epoch": 0.32, + "grad_norm": 0.9327660202980042, + "learning_rate": 7.986036796944116e-06, + "loss": 0.6196, + "step": 5010 + }, + { + "epoch": 0.32, + "grad_norm": 0.9042969346046448, + "learning_rate": 7.985213798008605e-06, + "loss": 0.6259, + "step": 5011 + }, + { + "epoch": 0.32, + "grad_norm": 0.8693029880523682, + "learning_rate": 7.984390673376123e-06, + "loss": 0.6165, + "step": 5012 + }, + { + "epoch": 0.32, + "grad_norm": 0.8812036514282227, + "learning_rate": 7.983567423081331e-06, + "loss": 0.6037, + "step": 5013 + }, + { + "epoch": 0.32, + "grad_norm": 0.8530508279800415, + "learning_rate": 7.982744047158897e-06, + "loss": 0.6234, + "step": 5014 + }, + { + "epoch": 0.32, + "grad_norm": 0.9156954884529114, + "learning_rate": 7.981920545643485e-06, + "loss": 0.5921, + "step": 5015 + }, + { + "epoch": 0.32, + "grad_norm": 0.849946141242981, + "learning_rate": 7.981096918569773e-06, + "loss": 0.5624, + "step": 5016 + }, + { + "epoch": 0.32, + "grad_norm": 0.9375457763671875, + "learning_rate": 7.980273165972438e-06, + "loss": 0.587, + "step": 5017 + }, + { + "epoch": 0.32, + "grad_norm": 0.8364583253860474, + "learning_rate": 7.979449287886171e-06, + "loss": 0.5506, + "step": 5018 + }, + { + "epoch": 0.32, + "grad_norm": 0.8714501857757568, + "learning_rate": 7.978625284345657e-06, + "loss": 0.6281, + "step": 5019 + }, + { + "epoch": 0.32, + "grad_norm": 0.8554301857948303, + "learning_rate": 7.977801155385595e-06, + "loss": 0.5985, + "step": 5020 + }, + { + "epoch": 0.32, + "grad_norm": 0.9513722658157349, + "learning_rate": 7.976976901040686e-06, + "loss": 0.6487, + "step": 5021 + }, + { + "epoch": 0.32, + "grad_norm": 0.9231401085853577, + "learning_rate": 7.976152521345635e-06, + "loss": 0.6764, + "step": 5022 + }, + { + "epoch": 0.32, + "grad_norm": 0.9738418459892273, + "learning_rate": 7.975328016335154e-06, + "loss": 0.6647, + "step": 5023 + }, + { + "epoch": 0.32, + "grad_norm": 0.8537105917930603, + "learning_rate": 7.974503386043961e-06, + "loss": 0.6126, + "step": 5024 + }, + { + "epoch": 0.32, + "grad_norm": 0.9720308780670166, + "learning_rate": 7.973678630506778e-06, + "loss": 0.6771, + "step": 5025 + }, + { + "epoch": 0.32, + "grad_norm": 0.8640322089195251, + "learning_rate": 7.972853749758334e-06, + "loss": 0.6203, + "step": 5026 + }, + { + "epoch": 0.32, + "grad_norm": 0.9116325378417969, + "learning_rate": 7.972028743833357e-06, + "loss": 0.6164, + "step": 5027 + }, + { + "epoch": 0.32, + "grad_norm": 0.8856568336486816, + "learning_rate": 7.971203612766591e-06, + "loss": 0.5796, + "step": 5028 + }, + { + "epoch": 0.32, + "grad_norm": 0.8518129587173462, + "learning_rate": 7.970378356592779e-06, + "loss": 0.6571, + "step": 5029 + }, + { + "epoch": 0.32, + "grad_norm": 0.8910609483718872, + "learning_rate": 7.969552975346664e-06, + "loss": 0.6005, + "step": 5030 + }, + { + "epoch": 0.32, + "grad_norm": 0.9186645746231079, + "learning_rate": 7.968727469063005e-06, + "loss": 0.565, + "step": 5031 + }, + { + "epoch": 0.32, + "grad_norm": 0.8877920508384705, + "learning_rate": 7.967901837776559e-06, + "loss": 0.6059, + "step": 5032 + }, + { + "epoch": 0.32, + "grad_norm": 0.8737941384315491, + "learning_rate": 7.967076081522091e-06, + "loss": 0.6355, + "step": 5033 + }, + { + "epoch": 0.32, + "grad_norm": 0.9942765831947327, + "learning_rate": 7.966250200334373e-06, + "loss": 0.7055, + "step": 5034 + }, + { + "epoch": 0.32, + "grad_norm": 0.8824638724327087, + "learning_rate": 7.965424194248176e-06, + "loss": 0.5895, + "step": 5035 + }, + { + "epoch": 0.32, + "grad_norm": 0.9163713455200195, + "learning_rate": 7.964598063298282e-06, + "loss": 0.6511, + "step": 5036 + }, + { + "epoch": 0.32, + "grad_norm": 0.8366975784301758, + "learning_rate": 7.963771807519477e-06, + "loss": 0.5519, + "step": 5037 + }, + { + "epoch": 0.32, + "grad_norm": 0.8883844614028931, + "learning_rate": 7.962945426946552e-06, + "loss": 0.5771, + "step": 5038 + }, + { + "epoch": 0.32, + "grad_norm": 0.862853467464447, + "learning_rate": 7.962118921614302e-06, + "loss": 0.625, + "step": 5039 + }, + { + "epoch": 0.32, + "grad_norm": 0.8657647967338562, + "learning_rate": 7.961292291557529e-06, + "loss": 0.5691, + "step": 5040 + }, + { + "epoch": 0.32, + "grad_norm": 0.9227752089500427, + "learning_rate": 7.960465536811039e-06, + "loss": 0.602, + "step": 5041 + }, + { + "epoch": 0.32, + "grad_norm": 0.8963826298713684, + "learning_rate": 7.959638657409643e-06, + "loss": 0.5922, + "step": 5042 + }, + { + "epoch": 0.32, + "grad_norm": 0.8346092104911804, + "learning_rate": 7.95881165338816e-06, + "loss": 0.5559, + "step": 5043 + }, + { + "epoch": 0.32, + "grad_norm": 0.9060384631156921, + "learning_rate": 7.957984524781413e-06, + "loss": 0.6217, + "step": 5044 + }, + { + "epoch": 0.32, + "grad_norm": 0.9173614978790283, + "learning_rate": 7.957157271624225e-06, + "loss": 0.5922, + "step": 5045 + }, + { + "epoch": 0.32, + "grad_norm": 0.9467434883117676, + "learning_rate": 7.956329893951432e-06, + "loss": 0.6047, + "step": 5046 + }, + { + "epoch": 0.32, + "grad_norm": 1.0126010179519653, + "learning_rate": 7.95550239179787e-06, + "loss": 0.6554, + "step": 5047 + }, + { + "epoch": 0.32, + "grad_norm": 0.780703604221344, + "learning_rate": 7.954674765198386e-06, + "loss": 0.5616, + "step": 5048 + }, + { + "epoch": 0.32, + "grad_norm": 0.9652750492095947, + "learning_rate": 7.953847014187826e-06, + "loss": 0.6468, + "step": 5049 + }, + { + "epoch": 0.32, + "grad_norm": 0.9707852005958557, + "learning_rate": 7.953019138801045e-06, + "loss": 0.6298, + "step": 5050 + }, + { + "epoch": 0.32, + "grad_norm": 0.9064115881919861, + "learning_rate": 7.952191139072898e-06, + "loss": 0.6399, + "step": 5051 + }, + { + "epoch": 0.32, + "grad_norm": 0.9037145972251892, + "learning_rate": 7.951363015038254e-06, + "loss": 0.5806, + "step": 5052 + }, + { + "epoch": 0.32, + "grad_norm": 0.9301207065582275, + "learning_rate": 7.950534766731982e-06, + "loss": 0.6627, + "step": 5053 + }, + { + "epoch": 0.32, + "grad_norm": 0.8569024205207825, + "learning_rate": 7.949706394188951e-06, + "loss": 0.603, + "step": 5054 + }, + { + "epoch": 0.32, + "grad_norm": 0.9237979054450989, + "learning_rate": 7.948877897444047e-06, + "loss": 0.6087, + "step": 5055 + }, + { + "epoch": 0.32, + "grad_norm": 0.9367351531982422, + "learning_rate": 7.948049276532156e-06, + "loss": 0.6403, + "step": 5056 + }, + { + "epoch": 0.32, + "grad_norm": 0.8342140913009644, + "learning_rate": 7.94722053148816e-06, + "loss": 0.5697, + "step": 5057 + }, + { + "epoch": 0.32, + "grad_norm": 0.8935142755508423, + "learning_rate": 7.946391662346964e-06, + "loss": 0.6579, + "step": 5058 + }, + { + "epoch": 0.32, + "grad_norm": 0.9436396360397339, + "learning_rate": 7.945562669143463e-06, + "loss": 0.6328, + "step": 5059 + }, + { + "epoch": 0.32, + "grad_norm": 0.8714977502822876, + "learning_rate": 7.944733551912566e-06, + "loss": 0.5887, + "step": 5060 + }, + { + "epoch": 0.32, + "grad_norm": 0.8785292506217957, + "learning_rate": 7.943904310689184e-06, + "loss": 0.5927, + "step": 5061 + }, + { + "epoch": 0.32, + "grad_norm": 0.8961544036865234, + "learning_rate": 7.94307494550823e-06, + "loss": 0.6451, + "step": 5062 + }, + { + "epoch": 0.32, + "grad_norm": 0.9154882431030273, + "learning_rate": 7.94224545640463e-06, + "loss": 0.6019, + "step": 5063 + }, + { + "epoch": 0.32, + "grad_norm": 0.8385921716690063, + "learning_rate": 7.941415843413309e-06, + "loss": 0.5952, + "step": 5064 + }, + { + "epoch": 0.32, + "grad_norm": 0.8181779980659485, + "learning_rate": 7.940586106569198e-06, + "loss": 0.5941, + "step": 5065 + }, + { + "epoch": 0.32, + "grad_norm": 0.8897058367729187, + "learning_rate": 7.939756245907237e-06, + "loss": 0.6189, + "step": 5066 + }, + { + "epoch": 0.32, + "grad_norm": 0.9226515293121338, + "learning_rate": 7.938926261462366e-06, + "loss": 0.6463, + "step": 5067 + }, + { + "epoch": 0.32, + "grad_norm": 0.9354571104049683, + "learning_rate": 7.938096153269535e-06, + "loss": 0.6086, + "step": 5068 + }, + { + "epoch": 0.32, + "grad_norm": 0.8967651128768921, + "learning_rate": 7.937265921363695e-06, + "loss": 0.5475, + "step": 5069 + }, + { + "epoch": 0.32, + "grad_norm": 0.9139410853385925, + "learning_rate": 7.936435565779806e-06, + "loss": 0.5889, + "step": 5070 + }, + { + "epoch": 0.32, + "grad_norm": 0.894964337348938, + "learning_rate": 7.93560508655283e-06, + "loss": 0.59, + "step": 5071 + }, + { + "epoch": 0.32, + "grad_norm": 0.8929742574691772, + "learning_rate": 7.934774483717736e-06, + "loss": 0.5761, + "step": 5072 + }, + { + "epoch": 0.32, + "grad_norm": 0.8965078592300415, + "learning_rate": 7.933943757309498e-06, + "loss": 0.6356, + "step": 5073 + }, + { + "epoch": 0.32, + "grad_norm": 0.9059800505638123, + "learning_rate": 7.933112907363096e-06, + "loss": 0.5718, + "step": 5074 + }, + { + "epoch": 0.32, + "grad_norm": 0.8989181518554688, + "learning_rate": 7.93228193391351e-06, + "loss": 0.6147, + "step": 5075 + }, + { + "epoch": 0.32, + "grad_norm": 0.8238041400909424, + "learning_rate": 7.931450836995736e-06, + "loss": 0.5621, + "step": 5076 + }, + { + "epoch": 0.32, + "grad_norm": 0.8373918533325195, + "learning_rate": 7.930619616644761e-06, + "loss": 0.6033, + "step": 5077 + }, + { + "epoch": 0.32, + "grad_norm": 0.8767797946929932, + "learning_rate": 7.929788272895591e-06, + "loss": 0.6104, + "step": 5078 + }, + { + "epoch": 0.32, + "grad_norm": 0.9680573344230652, + "learning_rate": 7.928956805783228e-06, + "loss": 0.6186, + "step": 5079 + }, + { + "epoch": 0.32, + "grad_norm": 0.9051882028579712, + "learning_rate": 7.928125215342685e-06, + "loss": 0.6336, + "step": 5080 + }, + { + "epoch": 0.32, + "grad_norm": 0.9240115284919739, + "learning_rate": 7.927293501608975e-06, + "loss": 0.6207, + "step": 5081 + }, + { + "epoch": 0.32, + "grad_norm": 0.8769848346710205, + "learning_rate": 7.926461664617117e-06, + "loss": 0.6018, + "step": 5082 + }, + { + "epoch": 0.32, + "grad_norm": 0.7785282135009766, + "learning_rate": 7.92562970440214e-06, + "loss": 0.5859, + "step": 5083 + }, + { + "epoch": 0.32, + "grad_norm": 0.851161003112793, + "learning_rate": 7.924797620999074e-06, + "loss": 0.5716, + "step": 5084 + }, + { + "epoch": 0.32, + "grad_norm": 0.9216321706771851, + "learning_rate": 7.923965414442953e-06, + "loss": 0.6521, + "step": 5085 + }, + { + "epoch": 0.32, + "grad_norm": 0.9329628944396973, + "learning_rate": 7.923133084768822e-06, + "loss": 0.6118, + "step": 5086 + }, + { + "epoch": 0.32, + "grad_norm": 0.9301400184631348, + "learning_rate": 7.922300632011726e-06, + "loss": 0.6287, + "step": 5087 + }, + { + "epoch": 0.32, + "grad_norm": 0.8702458739280701, + "learning_rate": 7.921468056206715e-06, + "loss": 0.6279, + "step": 5088 + }, + { + "epoch": 0.32, + "grad_norm": 0.9146727323532104, + "learning_rate": 7.920635357388848e-06, + "loss": 0.5391, + "step": 5089 + }, + { + "epoch": 0.32, + "grad_norm": 0.8490307927131653, + "learning_rate": 7.919802535593185e-06, + "loss": 0.6225, + "step": 5090 + }, + { + "epoch": 0.32, + "grad_norm": 0.8508750796318054, + "learning_rate": 7.918969590854797e-06, + "loss": 0.6332, + "step": 5091 + }, + { + "epoch": 0.32, + "grad_norm": 0.8569998145103455, + "learning_rate": 7.91813652320875e-06, + "loss": 0.5871, + "step": 5092 + }, + { + "epoch": 0.32, + "grad_norm": 0.8754677176475525, + "learning_rate": 7.91730333269013e-06, + "loss": 0.6172, + "step": 5093 + }, + { + "epoch": 0.32, + "grad_norm": 0.9354038834571838, + "learning_rate": 7.916470019334012e-06, + "loss": 0.6477, + "step": 5094 + }, + { + "epoch": 0.32, + "grad_norm": 0.8642258048057556, + "learning_rate": 7.915636583175489e-06, + "loss": 0.5596, + "step": 5095 + }, + { + "epoch": 0.32, + "grad_norm": 0.9272780418395996, + "learning_rate": 7.91480302424965e-06, + "loss": 0.6006, + "step": 5096 + }, + { + "epoch": 0.32, + "grad_norm": 0.8797223567962646, + "learning_rate": 7.913969342591597e-06, + "loss": 0.5884, + "step": 5097 + }, + { + "epoch": 0.32, + "grad_norm": 0.8903371691703796, + "learning_rate": 7.913135538236432e-06, + "loss": 0.6636, + "step": 5098 + }, + { + "epoch": 0.32, + "grad_norm": 0.9428971409797668, + "learning_rate": 7.912301611219264e-06, + "loss": 0.5976, + "step": 5099 + }, + { + "epoch": 0.32, + "grad_norm": 0.8819142580032349, + "learning_rate": 7.911467561575204e-06, + "loss": 0.6289, + "step": 5100 + }, + { + "epoch": 0.32, + "grad_norm": 0.9539601802825928, + "learning_rate": 7.910633389339376e-06, + "loss": 0.6471, + "step": 5101 + }, + { + "epoch": 0.32, + "grad_norm": 0.816605806350708, + "learning_rate": 7.909799094546899e-06, + "loss": 0.5497, + "step": 5102 + }, + { + "epoch": 0.32, + "grad_norm": 0.8792059421539307, + "learning_rate": 7.908964677232906e-06, + "loss": 0.5623, + "step": 5103 + }, + { + "epoch": 0.32, + "grad_norm": 0.9570422172546387, + "learning_rate": 7.90813013743253e-06, + "loss": 0.628, + "step": 5104 + }, + { + "epoch": 0.32, + "grad_norm": 0.8918935060501099, + "learning_rate": 7.90729547518091e-06, + "loss": 0.6153, + "step": 5105 + }, + { + "epoch": 0.32, + "grad_norm": 0.9165834784507751, + "learning_rate": 7.906460690513192e-06, + "loss": 0.5937, + "step": 5106 + }, + { + "epoch": 0.32, + "grad_norm": 0.9291167259216309, + "learning_rate": 7.905625783464525e-06, + "loss": 0.6248, + "step": 5107 + }, + { + "epoch": 0.32, + "grad_norm": 0.8594471216201782, + "learning_rate": 7.904790754070063e-06, + "loss": 0.5916, + "step": 5108 + }, + { + "epoch": 0.32, + "grad_norm": 1.1345970630645752, + "learning_rate": 7.90395560236497e-06, + "loss": 0.5552, + "step": 5109 + }, + { + "epoch": 0.32, + "grad_norm": 0.8997986912727356, + "learning_rate": 7.903120328384406e-06, + "loss": 0.5698, + "step": 5110 + }, + { + "epoch": 0.32, + "grad_norm": 0.8802492618560791, + "learning_rate": 7.902284932163545e-06, + "loss": 0.5791, + "step": 5111 + }, + { + "epoch": 0.32, + "grad_norm": 0.8879519701004028, + "learning_rate": 7.901449413737562e-06, + "loss": 0.6044, + "step": 5112 + }, + { + "epoch": 0.32, + "grad_norm": 0.8550997972488403, + "learning_rate": 7.90061377314164e-06, + "loss": 0.6232, + "step": 5113 + }, + { + "epoch": 0.32, + "grad_norm": 0.8434523940086365, + "learning_rate": 7.899778010410958e-06, + "loss": 0.608, + "step": 5114 + }, + { + "epoch": 0.32, + "grad_norm": 0.8279407024383545, + "learning_rate": 7.898942125580715e-06, + "loss": 0.5741, + "step": 5115 + }, + { + "epoch": 0.32, + "grad_norm": 0.8244683742523193, + "learning_rate": 7.898106118686102e-06, + "loss": 0.5697, + "step": 5116 + }, + { + "epoch": 0.32, + "grad_norm": 0.9236017465591431, + "learning_rate": 7.897269989762322e-06, + "loss": 0.6289, + "step": 5117 + }, + { + "epoch": 0.32, + "grad_norm": 0.921940803527832, + "learning_rate": 7.896433738844583e-06, + "loss": 0.5958, + "step": 5118 + }, + { + "epoch": 0.32, + "grad_norm": 0.8436870574951172, + "learning_rate": 7.895597365968093e-06, + "loss": 0.6159, + "step": 5119 + }, + { + "epoch": 0.32, + "grad_norm": 0.8956601619720459, + "learning_rate": 7.894760871168074e-06, + "loss": 0.6182, + "step": 5120 + }, + { + "epoch": 0.32, + "grad_norm": 0.9135102033615112, + "learning_rate": 7.893924254479744e-06, + "loss": 0.6239, + "step": 5121 + }, + { + "epoch": 0.32, + "grad_norm": 0.9187552332878113, + "learning_rate": 7.893087515938329e-06, + "loss": 0.6376, + "step": 5122 + }, + { + "epoch": 0.32, + "grad_norm": 0.9064997434616089, + "learning_rate": 7.892250655579063e-06, + "loss": 0.6092, + "step": 5123 + }, + { + "epoch": 0.32, + "grad_norm": 0.9091038107872009, + "learning_rate": 7.891413673437185e-06, + "loss": 0.5968, + "step": 5124 + }, + { + "epoch": 0.32, + "grad_norm": 0.9700572490692139, + "learning_rate": 7.890576569547937e-06, + "loss": 0.6382, + "step": 5125 + }, + { + "epoch": 0.32, + "grad_norm": 0.8647416830062866, + "learning_rate": 7.889739343946561e-06, + "loss": 0.646, + "step": 5126 + }, + { + "epoch": 0.32, + "grad_norm": 0.8427348732948303, + "learning_rate": 7.888901996668317e-06, + "loss": 0.5889, + "step": 5127 + }, + { + "epoch": 0.32, + "grad_norm": 0.8934534192085266, + "learning_rate": 7.888064527748458e-06, + "loss": 0.6227, + "step": 5128 + }, + { + "epoch": 0.32, + "grad_norm": 0.9173450469970703, + "learning_rate": 7.887226937222252e-06, + "loss": 0.6438, + "step": 5129 + }, + { + "epoch": 0.33, + "grad_norm": 0.8801364898681641, + "learning_rate": 7.88638922512496e-06, + "loss": 0.6402, + "step": 5130 + }, + { + "epoch": 0.33, + "grad_norm": 0.9062999486923218, + "learning_rate": 7.88555139149186e-06, + "loss": 0.6434, + "step": 5131 + }, + { + "epoch": 0.33, + "grad_norm": 0.9464401602745056, + "learning_rate": 7.884713436358228e-06, + "loss": 0.5997, + "step": 5132 + }, + { + "epoch": 0.33, + "grad_norm": 0.889105498790741, + "learning_rate": 7.883875359759349e-06, + "loss": 0.6423, + "step": 5133 + }, + { + "epoch": 0.33, + "grad_norm": 0.9038829803466797, + "learning_rate": 7.883037161730511e-06, + "loss": 0.6008, + "step": 5134 + }, + { + "epoch": 0.33, + "grad_norm": 0.8674249649047852, + "learning_rate": 7.882198842307008e-06, + "loss": 0.605, + "step": 5135 + }, + { + "epoch": 0.33, + "grad_norm": 0.9415613412857056, + "learning_rate": 7.881360401524138e-06, + "loss": 0.6118, + "step": 5136 + }, + { + "epoch": 0.33, + "grad_norm": 0.9558926820755005, + "learning_rate": 7.880521839417206e-06, + "loss": 0.5798, + "step": 5137 + }, + { + "epoch": 0.33, + "grad_norm": 0.8905767798423767, + "learning_rate": 7.879683156021518e-06, + "loss": 0.611, + "step": 5138 + }, + { + "epoch": 0.33, + "grad_norm": 0.9160702228546143, + "learning_rate": 7.87884435137239e-06, + "loss": 0.6068, + "step": 5139 + }, + { + "epoch": 0.33, + "grad_norm": 0.8442513942718506, + "learning_rate": 7.878005425505143e-06, + "loss": 0.5846, + "step": 5140 + }, + { + "epoch": 0.33, + "grad_norm": 0.8762052059173584, + "learning_rate": 7.877166378455098e-06, + "loss": 0.6208, + "step": 5141 + }, + { + "epoch": 0.33, + "grad_norm": 0.934578537940979, + "learning_rate": 7.876327210257586e-06, + "loss": 0.6083, + "step": 5142 + }, + { + "epoch": 0.33, + "grad_norm": 0.899614155292511, + "learning_rate": 7.875487920947941e-06, + "loss": 0.6371, + "step": 5143 + }, + { + "epoch": 0.33, + "grad_norm": 0.8895543217658997, + "learning_rate": 7.874648510561503e-06, + "loss": 0.6393, + "step": 5144 + }, + { + "epoch": 0.33, + "grad_norm": 0.8901795744895935, + "learning_rate": 7.873808979133616e-06, + "loss": 0.6394, + "step": 5145 + }, + { + "epoch": 0.33, + "grad_norm": 0.8974289894104004, + "learning_rate": 7.872969326699631e-06, + "loss": 0.5565, + "step": 5146 + }, + { + "epoch": 0.33, + "grad_norm": 0.9629907608032227, + "learning_rate": 7.8721295532949e-06, + "loss": 0.6353, + "step": 5147 + }, + { + "epoch": 0.33, + "grad_norm": 0.91104656457901, + "learning_rate": 7.871289658954789e-06, + "loss": 0.6165, + "step": 5148 + }, + { + "epoch": 0.33, + "grad_norm": 0.9044172763824463, + "learning_rate": 7.870449643714654e-06, + "loss": 0.6053, + "step": 5149 + }, + { + "epoch": 0.33, + "grad_norm": 0.823835015296936, + "learning_rate": 7.869609507609874e-06, + "loss": 0.5482, + "step": 5150 + }, + { + "epoch": 0.33, + "grad_norm": 0.8716912865638733, + "learning_rate": 7.868769250675818e-06, + "loss": 0.6004, + "step": 5151 + }, + { + "epoch": 0.33, + "grad_norm": 0.8245472311973572, + "learning_rate": 7.867928872947869e-06, + "loss": 0.5591, + "step": 5152 + }, + { + "epoch": 0.33, + "grad_norm": 0.8959210515022278, + "learning_rate": 7.867088374461413e-06, + "loss": 0.6253, + "step": 5153 + }, + { + "epoch": 0.33, + "grad_norm": 0.8492079377174377, + "learning_rate": 7.866247755251838e-06, + "loss": 0.6169, + "step": 5154 + }, + { + "epoch": 0.33, + "grad_norm": 0.8312681317329407, + "learning_rate": 7.865407015354542e-06, + "loss": 0.5774, + "step": 5155 + }, + { + "epoch": 0.33, + "grad_norm": 0.9149585962295532, + "learning_rate": 7.864566154804925e-06, + "loss": 0.6262, + "step": 5156 + }, + { + "epoch": 0.33, + "grad_norm": 0.8946517705917358, + "learning_rate": 7.86372517363839e-06, + "loss": 0.563, + "step": 5157 + }, + { + "epoch": 0.33, + "grad_norm": 0.8930898904800415, + "learning_rate": 7.862884071890353e-06, + "loss": 0.6389, + "step": 5158 + }, + { + "epoch": 0.33, + "grad_norm": 0.8389832973480225, + "learning_rate": 7.862042849596225e-06, + "loss": 0.5617, + "step": 5159 + }, + { + "epoch": 0.33, + "grad_norm": 0.8541855216026306, + "learning_rate": 7.86120150679143e-06, + "loss": 0.6236, + "step": 5160 + }, + { + "epoch": 0.33, + "grad_norm": 0.8012550473213196, + "learning_rate": 7.860360043511392e-06, + "loss": 0.5796, + "step": 5161 + }, + { + "epoch": 0.33, + "grad_norm": 0.8516356348991394, + "learning_rate": 7.859518459791543e-06, + "loss": 0.6257, + "step": 5162 + }, + { + "epoch": 0.33, + "grad_norm": 0.8946587443351746, + "learning_rate": 7.85867675566732e-06, + "loss": 0.5812, + "step": 5163 + }, + { + "epoch": 0.33, + "grad_norm": 0.8651425838470459, + "learning_rate": 7.857834931174164e-06, + "loss": 0.6141, + "step": 5164 + }, + { + "epoch": 0.33, + "grad_norm": 0.8876100182533264, + "learning_rate": 7.85699298634752e-06, + "loss": 0.6099, + "step": 5165 + }, + { + "epoch": 0.33, + "grad_norm": 0.8882783055305481, + "learning_rate": 7.856150921222838e-06, + "loss": 0.5971, + "step": 5166 + }, + { + "epoch": 0.33, + "grad_norm": 0.862937867641449, + "learning_rate": 7.85530873583558e-06, + "loss": 0.6422, + "step": 5167 + }, + { + "epoch": 0.33, + "grad_norm": 0.8829284906387329, + "learning_rate": 7.854466430221203e-06, + "loss": 0.5815, + "step": 5168 + }, + { + "epoch": 0.33, + "grad_norm": 0.8932998776435852, + "learning_rate": 7.853624004415172e-06, + "loss": 0.6657, + "step": 5169 + }, + { + "epoch": 0.33, + "grad_norm": 0.8481628894805908, + "learning_rate": 7.852781458452964e-06, + "loss": 0.6036, + "step": 5170 + }, + { + "epoch": 0.33, + "grad_norm": 0.958634614944458, + "learning_rate": 7.851938792370053e-06, + "loss": 0.6527, + "step": 5171 + }, + { + "epoch": 0.33, + "grad_norm": 0.8003389239311218, + "learning_rate": 7.85109600620192e-06, + "loss": 0.5318, + "step": 5172 + }, + { + "epoch": 0.33, + "grad_norm": 0.922940731048584, + "learning_rate": 7.85025309998405e-06, + "loss": 0.5826, + "step": 5173 + }, + { + "epoch": 0.33, + "grad_norm": 0.8557353019714355, + "learning_rate": 7.849410073751942e-06, + "loss": 0.5537, + "step": 5174 + }, + { + "epoch": 0.33, + "grad_norm": 0.9081326723098755, + "learning_rate": 7.848566927541084e-06, + "loss": 0.5954, + "step": 5175 + }, + { + "epoch": 0.33, + "grad_norm": 0.8481424450874329, + "learning_rate": 7.847723661386985e-06, + "loss": 0.582, + "step": 5176 + }, + { + "epoch": 0.33, + "grad_norm": 0.9431670308113098, + "learning_rate": 7.846880275325149e-06, + "loss": 0.6132, + "step": 5177 + }, + { + "epoch": 0.33, + "grad_norm": 0.827930748462677, + "learning_rate": 7.846036769391086e-06, + "loss": 0.612, + "step": 5178 + }, + { + "epoch": 0.33, + "grad_norm": 0.8801954984664917, + "learning_rate": 7.845193143620316e-06, + "loss": 0.6171, + "step": 5179 + }, + { + "epoch": 0.33, + "grad_norm": 0.9372230768203735, + "learning_rate": 7.84434939804836e-06, + "loss": 0.636, + "step": 5180 + }, + { + "epoch": 0.33, + "grad_norm": 0.9458149075508118, + "learning_rate": 7.843505532710748e-06, + "loss": 0.6446, + "step": 5181 + }, + { + "epoch": 0.33, + "grad_norm": 0.8717585802078247, + "learning_rate": 7.84266154764301e-06, + "loss": 0.6349, + "step": 5182 + }, + { + "epoch": 0.33, + "grad_norm": 0.8793720602989197, + "learning_rate": 7.84181744288068e-06, + "loss": 0.6381, + "step": 5183 + }, + { + "epoch": 0.33, + "grad_norm": 0.8223835229873657, + "learning_rate": 7.840973218459305e-06, + "loss": 0.5489, + "step": 5184 + }, + { + "epoch": 0.33, + "grad_norm": 0.9283150434494019, + "learning_rate": 7.84012887441443e-06, + "loss": 0.6059, + "step": 5185 + }, + { + "epoch": 0.33, + "grad_norm": 0.9703242778778076, + "learning_rate": 7.839284410781609e-06, + "loss": 0.5753, + "step": 5186 + }, + { + "epoch": 0.33, + "grad_norm": 0.8721915483474731, + "learning_rate": 7.838439827596398e-06, + "loss": 0.624, + "step": 5187 + }, + { + "epoch": 0.33, + "grad_norm": 0.9139184355735779, + "learning_rate": 7.83759512489436e-06, + "loss": 0.6585, + "step": 5188 + }, + { + "epoch": 0.33, + "grad_norm": 0.8563583493232727, + "learning_rate": 7.836750302711065e-06, + "loss": 0.5913, + "step": 5189 + }, + { + "epoch": 0.33, + "grad_norm": 0.8751399517059326, + "learning_rate": 7.83590536108208e-06, + "loss": 0.5832, + "step": 5190 + }, + { + "epoch": 0.33, + "grad_norm": 0.8799748420715332, + "learning_rate": 7.835060300042986e-06, + "loss": 0.6191, + "step": 5191 + }, + { + "epoch": 0.33, + "grad_norm": 0.8769707679748535, + "learning_rate": 7.834215119629366e-06, + "loss": 0.5834, + "step": 5192 + }, + { + "epoch": 0.33, + "grad_norm": 0.8527321219444275, + "learning_rate": 7.833369819876809e-06, + "loss": 0.6034, + "step": 5193 + }, + { + "epoch": 0.33, + "grad_norm": 0.9315845370292664, + "learning_rate": 7.832524400820902e-06, + "loss": 0.5899, + "step": 5194 + }, + { + "epoch": 0.33, + "grad_norm": 0.9396250247955322, + "learning_rate": 7.831678862497248e-06, + "loss": 0.6207, + "step": 5195 + }, + { + "epoch": 0.33, + "grad_norm": 0.9503593444824219, + "learning_rate": 7.830833204941446e-06, + "loss": 0.655, + "step": 5196 + }, + { + "epoch": 0.33, + "grad_norm": 0.8898603916168213, + "learning_rate": 7.829987428189108e-06, + "loss": 0.6509, + "step": 5197 + }, + { + "epoch": 0.33, + "grad_norm": 0.85368812084198, + "learning_rate": 7.829141532275843e-06, + "loss": 0.5851, + "step": 5198 + }, + { + "epoch": 0.33, + "grad_norm": 0.9276217222213745, + "learning_rate": 7.82829551723727e-06, + "loss": 0.6307, + "step": 5199 + }, + { + "epoch": 0.33, + "grad_norm": 0.8713779449462891, + "learning_rate": 7.827449383109012e-06, + "loss": 0.5739, + "step": 5200 + }, + { + "epoch": 0.33, + "grad_norm": 0.9299573302268982, + "learning_rate": 7.826603129926696e-06, + "loss": 0.6233, + "step": 5201 + }, + { + "epoch": 0.33, + "grad_norm": 0.8474642634391785, + "learning_rate": 7.825756757725956e-06, + "loss": 0.6163, + "step": 5202 + }, + { + "epoch": 0.33, + "grad_norm": 0.9293124079704285, + "learning_rate": 7.824910266542426e-06, + "loss": 0.6704, + "step": 5203 + }, + { + "epoch": 0.33, + "grad_norm": 0.8167198896408081, + "learning_rate": 7.824063656411756e-06, + "loss": 0.5296, + "step": 5204 + }, + { + "epoch": 0.33, + "grad_norm": 0.8058587908744812, + "learning_rate": 7.823216927369588e-06, + "loss": 0.5909, + "step": 5205 + }, + { + "epoch": 0.33, + "grad_norm": 0.9068382382392883, + "learning_rate": 7.822370079451576e-06, + "loss": 0.6536, + "step": 5206 + }, + { + "epoch": 0.33, + "grad_norm": 0.8225257396697998, + "learning_rate": 7.821523112693377e-06, + "loss": 0.5772, + "step": 5207 + }, + { + "epoch": 0.33, + "grad_norm": 0.8437464833259583, + "learning_rate": 7.820676027130657e-06, + "loss": 0.5401, + "step": 5208 + }, + { + "epoch": 0.33, + "grad_norm": 0.8533555865287781, + "learning_rate": 7.81982882279908e-06, + "loss": 0.5964, + "step": 5209 + }, + { + "epoch": 0.33, + "grad_norm": 0.8721039295196533, + "learning_rate": 7.818981499734323e-06, + "loss": 0.5896, + "step": 5210 + }, + { + "epoch": 0.33, + "grad_norm": 0.8488752841949463, + "learning_rate": 7.818134057972062e-06, + "loss": 0.5717, + "step": 5211 + }, + { + "epoch": 0.33, + "grad_norm": 0.8960286378860474, + "learning_rate": 7.817286497547977e-06, + "loss": 0.5905, + "step": 5212 + }, + { + "epoch": 0.33, + "grad_norm": 0.9279623627662659, + "learning_rate": 7.81643881849776e-06, + "loss": 0.6313, + "step": 5213 + }, + { + "epoch": 0.33, + "grad_norm": 0.8772743940353394, + "learning_rate": 7.815591020857101e-06, + "loss": 0.6305, + "step": 5214 + }, + { + "epoch": 0.33, + "grad_norm": 0.9260540008544922, + "learning_rate": 7.8147431046617e-06, + "loss": 0.5967, + "step": 5215 + }, + { + "epoch": 0.33, + "grad_norm": 0.9077113270759583, + "learning_rate": 7.813895069947257e-06, + "loss": 0.6454, + "step": 5216 + }, + { + "epoch": 0.33, + "grad_norm": 0.8971432447433472, + "learning_rate": 7.813046916749483e-06, + "loss": 0.6458, + "step": 5217 + }, + { + "epoch": 0.33, + "grad_norm": 0.9562937617301941, + "learning_rate": 7.812198645104088e-06, + "loss": 0.6051, + "step": 5218 + }, + { + "epoch": 0.33, + "grad_norm": 0.9105967879295349, + "learning_rate": 7.811350255046792e-06, + "loss": 0.5909, + "step": 5219 + }, + { + "epoch": 0.33, + "grad_norm": 0.890044629573822, + "learning_rate": 7.810501746613316e-06, + "loss": 0.5771, + "step": 5220 + }, + { + "epoch": 0.33, + "grad_norm": 0.9129796028137207, + "learning_rate": 7.809653119839389e-06, + "loss": 0.5991, + "step": 5221 + }, + { + "epoch": 0.33, + "grad_norm": 0.9497199654579163, + "learning_rate": 7.808804374760742e-06, + "loss": 0.6003, + "step": 5222 + }, + { + "epoch": 0.33, + "grad_norm": 0.9804506301879883, + "learning_rate": 7.807955511413114e-06, + "loss": 0.6147, + "step": 5223 + }, + { + "epoch": 0.33, + "grad_norm": 0.8824604749679565, + "learning_rate": 7.80710652983225e-06, + "loss": 0.5427, + "step": 5224 + }, + { + "epoch": 0.33, + "grad_norm": 0.9190927743911743, + "learning_rate": 7.806257430053893e-06, + "loss": 0.5981, + "step": 5225 + }, + { + "epoch": 0.33, + "grad_norm": 0.9122849702835083, + "learning_rate": 7.8054082121138e-06, + "loss": 0.6007, + "step": 5226 + }, + { + "epoch": 0.33, + "grad_norm": 0.8687419295310974, + "learning_rate": 7.804558876047724e-06, + "loss": 0.6064, + "step": 5227 + }, + { + "epoch": 0.33, + "grad_norm": 0.8574259281158447, + "learning_rate": 7.80370942189143e-06, + "loss": 0.5869, + "step": 5228 + }, + { + "epoch": 0.33, + "grad_norm": 0.9501886367797852, + "learning_rate": 7.802859849680686e-06, + "loss": 0.6068, + "step": 5229 + }, + { + "epoch": 0.33, + "grad_norm": 0.8507223725318909, + "learning_rate": 7.802010159451267e-06, + "loss": 0.6234, + "step": 5230 + }, + { + "epoch": 0.33, + "grad_norm": 0.9953079223632812, + "learning_rate": 7.801160351238945e-06, + "loss": 0.6017, + "step": 5231 + }, + { + "epoch": 0.33, + "grad_norm": 0.8714452385902405, + "learning_rate": 7.800310425079505e-06, + "loss": 0.5615, + "step": 5232 + }, + { + "epoch": 0.33, + "grad_norm": 0.9719001054763794, + "learning_rate": 7.799460381008736e-06, + "loss": 0.5711, + "step": 5233 + }, + { + "epoch": 0.33, + "grad_norm": 0.889895498752594, + "learning_rate": 7.798610219062428e-06, + "loss": 0.6251, + "step": 5234 + }, + { + "epoch": 0.33, + "grad_norm": 0.8774588108062744, + "learning_rate": 7.79775993927638e-06, + "loss": 0.5806, + "step": 5235 + }, + { + "epoch": 0.33, + "grad_norm": 0.8740803599357605, + "learning_rate": 7.796909541686392e-06, + "loss": 0.5344, + "step": 5236 + }, + { + "epoch": 0.33, + "grad_norm": 0.8627974987030029, + "learning_rate": 7.796059026328274e-06, + "loss": 0.6076, + "step": 5237 + }, + { + "epoch": 0.33, + "grad_norm": 0.858439564704895, + "learning_rate": 7.795208393237839e-06, + "loss": 0.5604, + "step": 5238 + }, + { + "epoch": 0.33, + "grad_norm": 0.8907666802406311, + "learning_rate": 7.794357642450899e-06, + "loss": 0.6255, + "step": 5239 + }, + { + "epoch": 0.33, + "grad_norm": 0.8581748008728027, + "learning_rate": 7.793506774003282e-06, + "loss": 0.6136, + "step": 5240 + }, + { + "epoch": 0.33, + "grad_norm": 0.9139533638954163, + "learning_rate": 7.792655787930811e-06, + "loss": 0.6128, + "step": 5241 + }, + { + "epoch": 0.33, + "grad_norm": 0.8762749433517456, + "learning_rate": 7.791804684269322e-06, + "loss": 0.6169, + "step": 5242 + }, + { + "epoch": 0.33, + "grad_norm": 0.8736821413040161, + "learning_rate": 7.790953463054647e-06, + "loss": 0.6287, + "step": 5243 + }, + { + "epoch": 0.33, + "grad_norm": 0.8526340126991272, + "learning_rate": 7.790102124322633e-06, + "loss": 0.5913, + "step": 5244 + }, + { + "epoch": 0.33, + "grad_norm": 0.9196691513061523, + "learning_rate": 7.789250668109124e-06, + "loss": 0.6574, + "step": 5245 + }, + { + "epoch": 0.33, + "grad_norm": 0.8942427039146423, + "learning_rate": 7.788399094449971e-06, + "loss": 0.6133, + "step": 5246 + }, + { + "epoch": 0.33, + "grad_norm": 0.8590309023857117, + "learning_rate": 7.787547403381033e-06, + "loss": 0.5746, + "step": 5247 + }, + { + "epoch": 0.33, + "grad_norm": 0.9016396403312683, + "learning_rate": 7.786695594938172e-06, + "loss": 0.5917, + "step": 5248 + }, + { + "epoch": 0.33, + "grad_norm": 0.9520177245140076, + "learning_rate": 7.785843669157253e-06, + "loss": 0.657, + "step": 5249 + }, + { + "epoch": 0.33, + "grad_norm": 0.9555111527442932, + "learning_rate": 7.784991626074148e-06, + "loss": 0.6724, + "step": 5250 + }, + { + "epoch": 0.33, + "grad_norm": 0.9693423509597778, + "learning_rate": 7.784139465724734e-06, + "loss": 0.6453, + "step": 5251 + }, + { + "epoch": 0.33, + "grad_norm": 0.8132855296134949, + "learning_rate": 7.783287188144893e-06, + "loss": 0.5865, + "step": 5252 + }, + { + "epoch": 0.33, + "grad_norm": 0.8263188004493713, + "learning_rate": 7.78243479337051e-06, + "loss": 0.6248, + "step": 5253 + }, + { + "epoch": 0.33, + "grad_norm": 0.8053151369094849, + "learning_rate": 7.781582281437479e-06, + "loss": 0.5827, + "step": 5254 + }, + { + "epoch": 0.33, + "grad_norm": 0.9075903296470642, + "learning_rate": 7.780729652381694e-06, + "loss": 0.6344, + "step": 5255 + }, + { + "epoch": 0.33, + "grad_norm": 0.8380961418151855, + "learning_rate": 7.779876906239055e-06, + "loss": 0.607, + "step": 5256 + }, + { + "epoch": 0.33, + "grad_norm": 0.892805814743042, + "learning_rate": 7.779024043045471e-06, + "loss": 0.6279, + "step": 5257 + }, + { + "epoch": 0.33, + "grad_norm": 0.9007843136787415, + "learning_rate": 7.778171062836853e-06, + "loss": 0.6653, + "step": 5258 + }, + { + "epoch": 0.33, + "grad_norm": 0.9166417717933655, + "learning_rate": 7.777317965649114e-06, + "loss": 0.632, + "step": 5259 + }, + { + "epoch": 0.33, + "grad_norm": 0.9331604838371277, + "learning_rate": 7.776464751518177e-06, + "loss": 0.6262, + "step": 5260 + }, + { + "epoch": 0.33, + "grad_norm": 0.8771944642066956, + "learning_rate": 7.775611420479971e-06, + "loss": 0.5521, + "step": 5261 + }, + { + "epoch": 0.33, + "grad_norm": 0.8667744398117065, + "learning_rate": 7.774757972570423e-06, + "loss": 0.5917, + "step": 5262 + }, + { + "epoch": 0.33, + "grad_norm": 0.9010536074638367, + "learning_rate": 7.773904407825467e-06, + "loss": 0.6459, + "step": 5263 + }, + { + "epoch": 0.33, + "grad_norm": 0.8958863615989685, + "learning_rate": 7.773050726281048e-06, + "loss": 0.5939, + "step": 5264 + }, + { + "epoch": 0.33, + "grad_norm": 0.9226192831993103, + "learning_rate": 7.772196927973109e-06, + "loss": 0.6127, + "step": 5265 + }, + { + "epoch": 0.33, + "grad_norm": 0.8647396564483643, + "learning_rate": 7.771343012937602e-06, + "loss": 0.6057, + "step": 5266 + }, + { + "epoch": 0.33, + "grad_norm": 0.9021638631820679, + "learning_rate": 7.77048898121048e-06, + "loss": 0.6024, + "step": 5267 + }, + { + "epoch": 0.33, + "grad_norm": 0.9035550355911255, + "learning_rate": 7.769634832827706e-06, + "loss": 0.577, + "step": 5268 + }, + { + "epoch": 0.33, + "grad_norm": 0.8818480968475342, + "learning_rate": 7.768780567825243e-06, + "loss": 0.5895, + "step": 5269 + }, + { + "epoch": 0.33, + "grad_norm": 0.8871473670005798, + "learning_rate": 7.767926186239064e-06, + "loss": 0.6386, + "step": 5270 + }, + { + "epoch": 0.33, + "grad_norm": 0.9286932945251465, + "learning_rate": 7.76707168810514e-06, + "loss": 0.6352, + "step": 5271 + }, + { + "epoch": 0.33, + "grad_norm": 0.8643122315406799, + "learning_rate": 7.766217073459454e-06, + "loss": 0.5854, + "step": 5272 + }, + { + "epoch": 0.33, + "grad_norm": 0.8689426183700562, + "learning_rate": 7.765362342337991e-06, + "loss": 0.6032, + "step": 5273 + }, + { + "epoch": 0.33, + "grad_norm": 0.8007031679153442, + "learning_rate": 7.76450749477674e-06, + "loss": 0.5664, + "step": 5274 + }, + { + "epoch": 0.33, + "grad_norm": 0.8409014940261841, + "learning_rate": 7.763652530811692e-06, + "loss": 0.5953, + "step": 5275 + }, + { + "epoch": 0.33, + "grad_norm": 0.8317943215370178, + "learning_rate": 7.762797450478853e-06, + "loss": 0.6057, + "step": 5276 + }, + { + "epoch": 0.33, + "grad_norm": 0.8628614544868469, + "learning_rate": 7.761942253814225e-06, + "loss": 0.6164, + "step": 5277 + }, + { + "epoch": 0.33, + "grad_norm": 0.87236487865448, + "learning_rate": 7.761086940853814e-06, + "loss": 0.5065, + "step": 5278 + }, + { + "epoch": 0.33, + "grad_norm": 0.9762303233146667, + "learning_rate": 7.76023151163364e-06, + "loss": 0.5775, + "step": 5279 + }, + { + "epoch": 0.33, + "grad_norm": 0.907646119594574, + "learning_rate": 7.759375966189718e-06, + "loss": 0.601, + "step": 5280 + }, + { + "epoch": 0.33, + "grad_norm": 0.9219939112663269, + "learning_rate": 7.758520304558072e-06, + "loss": 0.5912, + "step": 5281 + }, + { + "epoch": 0.33, + "grad_norm": 0.9645958542823792, + "learning_rate": 7.757664526774733e-06, + "loss": 0.6087, + "step": 5282 + }, + { + "epoch": 0.33, + "grad_norm": 0.9233863353729248, + "learning_rate": 7.756808632875737e-06, + "loss": 0.6331, + "step": 5283 + }, + { + "epoch": 0.33, + "grad_norm": 0.8966994285583496, + "learning_rate": 7.755952622897117e-06, + "loss": 0.5706, + "step": 5284 + }, + { + "epoch": 0.33, + "grad_norm": 0.9332131743431091, + "learning_rate": 7.755096496874918e-06, + "loss": 0.5962, + "step": 5285 + }, + { + "epoch": 0.33, + "grad_norm": 0.8440611958503723, + "learning_rate": 7.75424025484519e-06, + "loss": 0.6056, + "step": 5286 + }, + { + "epoch": 0.33, + "grad_norm": 0.9401943683624268, + "learning_rate": 7.753383896843988e-06, + "loss": 0.6501, + "step": 5287 + }, + { + "epoch": 0.34, + "grad_norm": 0.8421300053596497, + "learning_rate": 7.752527422907368e-06, + "loss": 0.5683, + "step": 5288 + }, + { + "epoch": 0.34, + "grad_norm": 0.8217456340789795, + "learning_rate": 7.751670833071393e-06, + "loss": 0.5881, + "step": 5289 + }, + { + "epoch": 0.34, + "grad_norm": 0.9123767018318176, + "learning_rate": 7.750814127372131e-06, + "loss": 0.5491, + "step": 5290 + }, + { + "epoch": 0.34, + "grad_norm": 0.875048816204071, + "learning_rate": 7.749957305845656e-06, + "loss": 0.5582, + "step": 5291 + }, + { + "epoch": 0.34, + "grad_norm": 0.9074432253837585, + "learning_rate": 7.749100368528047e-06, + "loss": 0.6511, + "step": 5292 + }, + { + "epoch": 0.34, + "grad_norm": 0.8981906771659851, + "learning_rate": 7.748243315455382e-06, + "loss": 0.624, + "step": 5293 + }, + { + "epoch": 0.34, + "grad_norm": 0.8196624517440796, + "learning_rate": 7.747386146663753e-06, + "loss": 0.5937, + "step": 5294 + }, + { + "epoch": 0.34, + "grad_norm": 0.88856440782547, + "learning_rate": 7.746528862189251e-06, + "loss": 0.6291, + "step": 5295 + }, + { + "epoch": 0.34, + "grad_norm": 0.8899400234222412, + "learning_rate": 7.745671462067974e-06, + "loss": 0.6181, + "step": 5296 + }, + { + "epoch": 0.34, + "grad_norm": 0.910403847694397, + "learning_rate": 7.74481394633602e-06, + "loss": 0.628, + "step": 5297 + }, + { + "epoch": 0.34, + "grad_norm": 0.9819753170013428, + "learning_rate": 7.743956315029502e-06, + "loss": 0.6307, + "step": 5298 + }, + { + "epoch": 0.34, + "grad_norm": 0.9036092758178711, + "learning_rate": 7.743098568184529e-06, + "loss": 0.594, + "step": 5299 + }, + { + "epoch": 0.34, + "grad_norm": 0.9001262784004211, + "learning_rate": 7.742240705837217e-06, + "loss": 0.5737, + "step": 5300 + }, + { + "epoch": 0.34, + "grad_norm": 0.8720340132713318, + "learning_rate": 7.741382728023687e-06, + "loss": 0.6166, + "step": 5301 + }, + { + "epoch": 0.34, + "grad_norm": 0.8694612383842468, + "learning_rate": 7.74052463478007e-06, + "loss": 0.5872, + "step": 5302 + }, + { + "epoch": 0.34, + "grad_norm": 0.9097409844398499, + "learning_rate": 7.739666426142493e-06, + "loss": 0.5977, + "step": 5303 + }, + { + "epoch": 0.34, + "grad_norm": 0.9092093706130981, + "learning_rate": 7.738808102147093e-06, + "loss": 0.5701, + "step": 5304 + }, + { + "epoch": 0.34, + "grad_norm": 0.9413781762123108, + "learning_rate": 7.737949662830012e-06, + "loss": 0.6675, + "step": 5305 + }, + { + "epoch": 0.34, + "grad_norm": 0.9250045418739319, + "learning_rate": 7.737091108227395e-06, + "loss": 0.6216, + "step": 5306 + }, + { + "epoch": 0.34, + "grad_norm": 0.9547144174575806, + "learning_rate": 7.736232438375391e-06, + "loss": 0.6255, + "step": 5307 + }, + { + "epoch": 0.34, + "grad_norm": 0.8688421845436096, + "learning_rate": 7.735373653310161e-06, + "loss": 0.5789, + "step": 5308 + }, + { + "epoch": 0.34, + "grad_norm": 0.890227198600769, + "learning_rate": 7.73451475306786e-06, + "loss": 0.6464, + "step": 5309 + }, + { + "epoch": 0.34, + "grad_norm": 0.922257661819458, + "learning_rate": 7.733655737684657e-06, + "loss": 0.5882, + "step": 5310 + }, + { + "epoch": 0.34, + "grad_norm": 0.8927624821662903, + "learning_rate": 7.732796607196719e-06, + "loss": 0.6208, + "step": 5311 + }, + { + "epoch": 0.34, + "grad_norm": 0.9111786484718323, + "learning_rate": 7.731937361640223e-06, + "loss": 0.6283, + "step": 5312 + }, + { + "epoch": 0.34, + "grad_norm": 0.8802262544631958, + "learning_rate": 7.73107800105135e-06, + "loss": 0.6081, + "step": 5313 + }, + { + "epoch": 0.34, + "grad_norm": 0.9143234491348267, + "learning_rate": 7.730218525466283e-06, + "loss": 0.6289, + "step": 5314 + }, + { + "epoch": 0.34, + "grad_norm": 0.8903287649154663, + "learning_rate": 7.729358934921209e-06, + "loss": 0.6062, + "step": 5315 + }, + { + "epoch": 0.34, + "grad_norm": 0.8877756595611572, + "learning_rate": 7.728499229452326e-06, + "loss": 0.5689, + "step": 5316 + }, + { + "epoch": 0.34, + "grad_norm": 0.9442094564437866, + "learning_rate": 7.727639409095833e-06, + "loss": 0.6616, + "step": 5317 + }, + { + "epoch": 0.34, + "grad_norm": 0.9445149302482605, + "learning_rate": 7.726779473887933e-06, + "loss": 0.6455, + "step": 5318 + }, + { + "epoch": 0.34, + "grad_norm": 0.8811274766921997, + "learning_rate": 7.725919423864837e-06, + "loss": 0.6276, + "step": 5319 + }, + { + "epoch": 0.34, + "grad_norm": 0.8983349800109863, + "learning_rate": 7.725059259062753e-06, + "loss": 0.6362, + "step": 5320 + }, + { + "epoch": 0.34, + "grad_norm": 0.8891294002532959, + "learning_rate": 7.724198979517905e-06, + "loss": 0.59, + "step": 5321 + }, + { + "epoch": 0.34, + "grad_norm": 0.9269400238990784, + "learning_rate": 7.723338585266515e-06, + "loss": 0.6242, + "step": 5322 + }, + { + "epoch": 0.34, + "grad_norm": 0.8992114067077637, + "learning_rate": 7.722478076344812e-06, + "loss": 0.5932, + "step": 5323 + }, + { + "epoch": 0.34, + "grad_norm": 0.9274572134017944, + "learning_rate": 7.721617452789028e-06, + "loss": 0.6501, + "step": 5324 + }, + { + "epoch": 0.34, + "grad_norm": 0.8954104781150818, + "learning_rate": 7.7207567146354e-06, + "loss": 0.6378, + "step": 5325 + }, + { + "epoch": 0.34, + "grad_norm": 0.9126365780830383, + "learning_rate": 7.71989586192017e-06, + "loss": 0.5861, + "step": 5326 + }, + { + "epoch": 0.34, + "grad_norm": 0.9049072265625, + "learning_rate": 7.719034894679589e-06, + "loss": 0.6177, + "step": 5327 + }, + { + "epoch": 0.34, + "grad_norm": 0.8507171273231506, + "learning_rate": 7.718173812949908e-06, + "loss": 0.5861, + "step": 5328 + }, + { + "epoch": 0.34, + "grad_norm": 0.9381729960441589, + "learning_rate": 7.717312616767382e-06, + "loss": 0.6176, + "step": 5329 + }, + { + "epoch": 0.34, + "grad_norm": 0.8493825793266296, + "learning_rate": 7.716451306168276e-06, + "loss": 0.6133, + "step": 5330 + }, + { + "epoch": 0.34, + "grad_norm": 0.8845789432525635, + "learning_rate": 7.715589881188852e-06, + "loss": 0.5937, + "step": 5331 + }, + { + "epoch": 0.34, + "grad_norm": 0.9143087863922119, + "learning_rate": 7.71472834186539e-06, + "loss": 0.5789, + "step": 5332 + }, + { + "epoch": 0.34, + "grad_norm": 0.9418982863426208, + "learning_rate": 7.713866688234157e-06, + "loss": 0.6042, + "step": 5333 + }, + { + "epoch": 0.34, + "grad_norm": 0.8240166902542114, + "learning_rate": 7.713004920331441e-06, + "loss": 0.6097, + "step": 5334 + }, + { + "epoch": 0.34, + "grad_norm": 0.8444035649299622, + "learning_rate": 7.712143038193525e-06, + "loss": 0.5586, + "step": 5335 + }, + { + "epoch": 0.34, + "grad_norm": 0.8501242995262146, + "learning_rate": 7.7112810418567e-06, + "loss": 0.5876, + "step": 5336 + }, + { + "epoch": 0.34, + "grad_norm": 0.8870479464530945, + "learning_rate": 7.710418931357263e-06, + "loss": 0.6137, + "step": 5337 + }, + { + "epoch": 0.34, + "grad_norm": 0.8917999267578125, + "learning_rate": 7.709556706731514e-06, + "loss": 0.6208, + "step": 5338 + }, + { + "epoch": 0.34, + "grad_norm": 0.8900894522666931, + "learning_rate": 7.708694368015758e-06, + "loss": 0.5654, + "step": 5339 + }, + { + "epoch": 0.34, + "grad_norm": 0.9891944527626038, + "learning_rate": 7.707831915246304e-06, + "loss": 0.5949, + "step": 5340 + }, + { + "epoch": 0.34, + "grad_norm": 0.9004802703857422, + "learning_rate": 7.706969348459469e-06, + "loss": 0.6573, + "step": 5341 + }, + { + "epoch": 0.34, + "grad_norm": 0.9725054502487183, + "learning_rate": 7.70610666769157e-06, + "loss": 0.6963, + "step": 5342 + }, + { + "epoch": 0.34, + "grad_norm": 0.895476758480072, + "learning_rate": 7.705243872978935e-06, + "loss": 0.5893, + "step": 5343 + }, + { + "epoch": 0.34, + "grad_norm": 0.898909866809845, + "learning_rate": 7.704380964357889e-06, + "loss": 0.5709, + "step": 5344 + }, + { + "epoch": 0.34, + "grad_norm": 0.8911014795303345, + "learning_rate": 7.70351794186477e-06, + "loss": 0.631, + "step": 5345 + }, + { + "epoch": 0.34, + "grad_norm": 0.8815633654594421, + "learning_rate": 7.702654805535915e-06, + "loss": 0.5953, + "step": 5346 + }, + { + "epoch": 0.34, + "grad_norm": 0.8706081509590149, + "learning_rate": 7.701791555407669e-06, + "loss": 0.5798, + "step": 5347 + }, + { + "epoch": 0.34, + "grad_norm": 0.839159369468689, + "learning_rate": 7.700928191516378e-06, + "loss": 0.6532, + "step": 5348 + }, + { + "epoch": 0.34, + "grad_norm": 0.8313089609146118, + "learning_rate": 7.700064713898398e-06, + "loss": 0.6238, + "step": 5349 + }, + { + "epoch": 0.34, + "grad_norm": 0.9056754112243652, + "learning_rate": 7.699201122590086e-06, + "loss": 0.6051, + "step": 5350 + }, + { + "epoch": 0.34, + "grad_norm": 0.8567859530448914, + "learning_rate": 7.6983374176278e-06, + "loss": 0.6282, + "step": 5351 + }, + { + "epoch": 0.34, + "grad_norm": 0.8340045809745789, + "learning_rate": 7.697473599047918e-06, + "loss": 0.605, + "step": 5352 + }, + { + "epoch": 0.34, + "grad_norm": 0.8645469546318054, + "learning_rate": 7.696609666886805e-06, + "loss": 0.6075, + "step": 5353 + }, + { + "epoch": 0.34, + "grad_norm": 0.8319426774978638, + "learning_rate": 7.695745621180839e-06, + "loss": 0.5254, + "step": 5354 + }, + { + "epoch": 0.34, + "grad_norm": 0.824740469455719, + "learning_rate": 7.694881461966402e-06, + "loss": 0.636, + "step": 5355 + }, + { + "epoch": 0.34, + "grad_norm": 0.8777102828025818, + "learning_rate": 7.694017189279882e-06, + "loss": 0.6096, + "step": 5356 + }, + { + "epoch": 0.34, + "grad_norm": 0.8239105939865112, + "learning_rate": 7.69315280315767e-06, + "loss": 0.5593, + "step": 5357 + }, + { + "epoch": 0.34, + "grad_norm": 0.9346814751625061, + "learning_rate": 7.692288303636163e-06, + "loss": 0.6136, + "step": 5358 + }, + { + "epoch": 0.34, + "grad_norm": 0.8404369950294495, + "learning_rate": 7.69142369075176e-06, + "loss": 0.5977, + "step": 5359 + }, + { + "epoch": 0.34, + "grad_norm": 0.8172876834869385, + "learning_rate": 7.690558964540872e-06, + "loss": 0.6095, + "step": 5360 + }, + { + "epoch": 0.34, + "grad_norm": 0.913045346736908, + "learning_rate": 7.6896941250399e-06, + "loss": 0.5932, + "step": 5361 + }, + { + "epoch": 0.34, + "grad_norm": 0.8815491795539856, + "learning_rate": 7.688829172285267e-06, + "loss": 0.6035, + "step": 5362 + }, + { + "epoch": 0.34, + "grad_norm": 0.8652727007865906, + "learning_rate": 7.687964106313392e-06, + "loss": 0.5792, + "step": 5363 + }, + { + "epoch": 0.34, + "grad_norm": 0.8789160847663879, + "learning_rate": 7.687098927160701e-06, + "loss": 0.6358, + "step": 5364 + }, + { + "epoch": 0.34, + "grad_norm": 0.8862786889076233, + "learning_rate": 7.68623363486362e-06, + "loss": 0.6316, + "step": 5365 + }, + { + "epoch": 0.34, + "grad_norm": 0.9177654981613159, + "learning_rate": 7.685368229458584e-06, + "loss": 0.5892, + "step": 5366 + }, + { + "epoch": 0.34, + "grad_norm": 0.9102894067764282, + "learning_rate": 7.684502710982035e-06, + "loss": 0.6003, + "step": 5367 + }, + { + "epoch": 0.34, + "grad_norm": 0.8236129283905029, + "learning_rate": 7.683637079470418e-06, + "loss": 0.6175, + "step": 5368 + }, + { + "epoch": 0.34, + "grad_norm": 0.886927604675293, + "learning_rate": 7.682771334960178e-06, + "loss": 0.5757, + "step": 5369 + }, + { + "epoch": 0.34, + "grad_norm": 0.8804916739463806, + "learning_rate": 7.681905477487769e-06, + "loss": 0.6581, + "step": 5370 + }, + { + "epoch": 0.34, + "grad_norm": 0.8064201474189758, + "learning_rate": 7.68103950708965e-06, + "loss": 0.5431, + "step": 5371 + }, + { + "epoch": 0.34, + "grad_norm": 0.8837984204292297, + "learning_rate": 7.680173423802282e-06, + "loss": 0.6277, + "step": 5372 + }, + { + "epoch": 0.34, + "grad_norm": 0.9009150266647339, + "learning_rate": 7.679307227662136e-06, + "loss": 0.6023, + "step": 5373 + }, + { + "epoch": 0.34, + "grad_norm": 0.9290765523910522, + "learning_rate": 7.678440918705686e-06, + "loss": 0.6381, + "step": 5374 + }, + { + "epoch": 0.34, + "grad_norm": 0.9562059640884399, + "learning_rate": 7.677574496969404e-06, + "loss": 0.6542, + "step": 5375 + }, + { + "epoch": 0.34, + "grad_norm": 0.9217070937156677, + "learning_rate": 7.676707962489775e-06, + "loss": 0.6375, + "step": 5376 + }, + { + "epoch": 0.34, + "grad_norm": 0.8787111043930054, + "learning_rate": 7.675841315303284e-06, + "loss": 0.6749, + "step": 5377 + }, + { + "epoch": 0.34, + "grad_norm": 0.9050287008285522, + "learning_rate": 7.674974555446425e-06, + "loss": 0.6174, + "step": 5378 + }, + { + "epoch": 0.34, + "grad_norm": 0.8558552861213684, + "learning_rate": 7.674107682955693e-06, + "loss": 0.5902, + "step": 5379 + }, + { + "epoch": 0.34, + "grad_norm": 0.8936824202537537, + "learning_rate": 7.67324069786759e-06, + "loss": 0.6121, + "step": 5380 + }, + { + "epoch": 0.34, + "grad_norm": 0.9137732982635498, + "learning_rate": 7.67237360021862e-06, + "loss": 0.6546, + "step": 5381 + }, + { + "epoch": 0.34, + "grad_norm": 0.9589877128601074, + "learning_rate": 7.671506390045293e-06, + "loss": 0.6522, + "step": 5382 + }, + { + "epoch": 0.34, + "grad_norm": 0.9142245054244995, + "learning_rate": 7.670639067384126e-06, + "loss": 0.6284, + "step": 5383 + }, + { + "epoch": 0.34, + "grad_norm": 0.8741958141326904, + "learning_rate": 7.66977163227164e-06, + "loss": 0.5957, + "step": 5384 + }, + { + "epoch": 0.34, + "grad_norm": 0.9198216795921326, + "learning_rate": 7.668904084744357e-06, + "loss": 0.5629, + "step": 5385 + }, + { + "epoch": 0.34, + "grad_norm": 0.8666446805000305, + "learning_rate": 7.668036424838808e-06, + "loss": 0.5829, + "step": 5386 + }, + { + "epoch": 0.34, + "grad_norm": 0.8472068309783936, + "learning_rate": 7.667168652591524e-06, + "loss": 0.6183, + "step": 5387 + }, + { + "epoch": 0.34, + "grad_norm": 0.960817277431488, + "learning_rate": 7.66630076803905e-06, + "loss": 0.6487, + "step": 5388 + }, + { + "epoch": 0.34, + "grad_norm": 0.8368389010429382, + "learning_rate": 7.665432771217922e-06, + "loss": 0.5899, + "step": 5389 + }, + { + "epoch": 0.34, + "grad_norm": 0.8463855385780334, + "learning_rate": 7.664564662164696e-06, + "loss": 0.6046, + "step": 5390 + }, + { + "epoch": 0.34, + "grad_norm": 0.9495236277580261, + "learning_rate": 7.66369644091592e-06, + "loss": 0.6498, + "step": 5391 + }, + { + "epoch": 0.34, + "grad_norm": 0.8692662119865417, + "learning_rate": 7.662828107508153e-06, + "loss": 0.6034, + "step": 5392 + }, + { + "epoch": 0.34, + "grad_norm": 0.8595423698425293, + "learning_rate": 7.661959661977958e-06, + "loss": 0.5903, + "step": 5393 + }, + { + "epoch": 0.34, + "grad_norm": 0.9107503890991211, + "learning_rate": 7.661091104361902e-06, + "loss": 0.6285, + "step": 5394 + }, + { + "epoch": 0.34, + "grad_norm": 0.8617141842842102, + "learning_rate": 7.660222434696556e-06, + "loss": 0.6372, + "step": 5395 + }, + { + "epoch": 0.34, + "grad_norm": 0.8542279005050659, + "learning_rate": 7.6593536530185e-06, + "loss": 0.6045, + "step": 5396 + }, + { + "epoch": 0.34, + "grad_norm": 0.9125630855560303, + "learning_rate": 7.658484759364308e-06, + "loss": 0.6111, + "step": 5397 + }, + { + "epoch": 0.34, + "grad_norm": 0.9282498359680176, + "learning_rate": 7.657615753770575e-06, + "loss": 0.6401, + "step": 5398 + }, + { + "epoch": 0.34, + "grad_norm": 0.760006844997406, + "learning_rate": 7.656746636273889e-06, + "loss": 0.5258, + "step": 5399 + }, + { + "epoch": 0.34, + "grad_norm": 0.8629961013793945, + "learning_rate": 7.655877406910841e-06, + "loss": 0.5787, + "step": 5400 + }, + { + "epoch": 0.34, + "grad_norm": 0.9403144121170044, + "learning_rate": 7.655008065718036e-06, + "loss": 0.6448, + "step": 5401 + }, + { + "epoch": 0.34, + "grad_norm": 0.8610935211181641, + "learning_rate": 7.654138612732078e-06, + "loss": 0.6125, + "step": 5402 + }, + { + "epoch": 0.34, + "grad_norm": 0.9002783298492432, + "learning_rate": 7.653269047989575e-06, + "loss": 0.5981, + "step": 5403 + }, + { + "epoch": 0.34, + "grad_norm": 0.8649095892906189, + "learning_rate": 7.652399371527142e-06, + "loss": 0.6343, + "step": 5404 + }, + { + "epoch": 0.34, + "grad_norm": 0.9302815794944763, + "learning_rate": 7.651529583381398e-06, + "loss": 0.6527, + "step": 5405 + }, + { + "epoch": 0.34, + "grad_norm": 0.9225360751152039, + "learning_rate": 7.65065968358897e-06, + "loss": 0.6909, + "step": 5406 + }, + { + "epoch": 0.34, + "grad_norm": 0.9352942109107971, + "learning_rate": 7.649789672186483e-06, + "loss": 0.6563, + "step": 5407 + }, + { + "epoch": 0.34, + "grad_norm": 0.9485490918159485, + "learning_rate": 7.648919549210567e-06, + "loss": 0.6339, + "step": 5408 + }, + { + "epoch": 0.34, + "grad_norm": 0.8463318347930908, + "learning_rate": 7.648049314697869e-06, + "loss": 0.5555, + "step": 5409 + }, + { + "epoch": 0.34, + "grad_norm": 0.8683443069458008, + "learning_rate": 7.647178968685024e-06, + "loss": 0.5861, + "step": 5410 + }, + { + "epoch": 0.34, + "grad_norm": 0.95866459608078, + "learning_rate": 7.646308511208682e-06, + "loss": 0.6818, + "step": 5411 + }, + { + "epoch": 0.34, + "grad_norm": 0.856253981590271, + "learning_rate": 7.645437942305491e-06, + "loss": 0.5561, + "step": 5412 + }, + { + "epoch": 0.34, + "grad_norm": 0.9046028852462769, + "learning_rate": 7.644567262012115e-06, + "loss": 0.6405, + "step": 5413 + }, + { + "epoch": 0.34, + "grad_norm": 0.8811362981796265, + "learning_rate": 7.643696470365209e-06, + "loss": 0.6266, + "step": 5414 + }, + { + "epoch": 0.34, + "grad_norm": 0.8369075059890747, + "learning_rate": 7.642825567401444e-06, + "loss": 0.5538, + "step": 5415 + }, + { + "epoch": 0.34, + "grad_norm": 0.9165283441543579, + "learning_rate": 7.641954553157487e-06, + "loss": 0.5952, + "step": 5416 + }, + { + "epoch": 0.34, + "grad_norm": 0.8416288495063782, + "learning_rate": 7.641083427670014e-06, + "loss": 0.6023, + "step": 5417 + }, + { + "epoch": 0.34, + "grad_norm": 0.8895038962364197, + "learning_rate": 7.640212190975707e-06, + "loss": 0.5969, + "step": 5418 + }, + { + "epoch": 0.34, + "grad_norm": 0.8565618991851807, + "learning_rate": 7.639340843111247e-06, + "loss": 0.5769, + "step": 5419 + }, + { + "epoch": 0.34, + "grad_norm": 0.825664222240448, + "learning_rate": 7.638469384113328e-06, + "loss": 0.5199, + "step": 5420 + }, + { + "epoch": 0.34, + "grad_norm": 0.8779264092445374, + "learning_rate": 7.637597814018638e-06, + "loss": 0.5795, + "step": 5421 + }, + { + "epoch": 0.34, + "grad_norm": 0.8773237466812134, + "learning_rate": 7.636726132863883e-06, + "loss": 0.6019, + "step": 5422 + }, + { + "epoch": 0.34, + "grad_norm": 0.9273678660392761, + "learning_rate": 7.635854340685762e-06, + "loss": 0.6015, + "step": 5423 + }, + { + "epoch": 0.34, + "grad_norm": 0.9024190902709961, + "learning_rate": 7.634982437520984e-06, + "loss": 0.5763, + "step": 5424 + }, + { + "epoch": 0.34, + "grad_norm": 0.8656637668609619, + "learning_rate": 7.634110423406262e-06, + "loss": 0.5785, + "step": 5425 + }, + { + "epoch": 0.34, + "grad_norm": 0.8862728476524353, + "learning_rate": 7.633238298378315e-06, + "loss": 0.6294, + "step": 5426 + }, + { + "epoch": 0.34, + "grad_norm": 0.8349065184593201, + "learning_rate": 7.632366062473862e-06, + "loss": 0.5862, + "step": 5427 + }, + { + "epoch": 0.34, + "grad_norm": 0.8949868083000183, + "learning_rate": 7.631493715729632e-06, + "loss": 0.5676, + "step": 5428 + }, + { + "epoch": 0.34, + "grad_norm": 0.897675633430481, + "learning_rate": 7.630621258182354e-06, + "loss": 0.5963, + "step": 5429 + }, + { + "epoch": 0.34, + "grad_norm": 0.8373680114746094, + "learning_rate": 7.62974868986877e-06, + "loss": 0.5706, + "step": 5430 + }, + { + "epoch": 0.34, + "grad_norm": 0.9069997072219849, + "learning_rate": 7.628876010825614e-06, + "loss": 0.6501, + "step": 5431 + }, + { + "epoch": 0.34, + "grad_norm": 0.8189912438392639, + "learning_rate": 7.628003221089635e-06, + "loss": 0.5475, + "step": 5432 + }, + { + "epoch": 0.34, + "grad_norm": 0.9497076869010925, + "learning_rate": 7.6271303206975825e-06, + "loss": 0.6459, + "step": 5433 + }, + { + "epoch": 0.34, + "grad_norm": 0.8492891788482666, + "learning_rate": 7.626257309686211e-06, + "loss": 0.5883, + "step": 5434 + }, + { + "epoch": 0.34, + "grad_norm": 0.8823180198669434, + "learning_rate": 7.6253841880922805e-06, + "loss": 0.5968, + "step": 5435 + }, + { + "epoch": 0.34, + "grad_norm": 0.8924271464347839, + "learning_rate": 7.624510955952555e-06, + "loss": 0.5706, + "step": 5436 + }, + { + "epoch": 0.34, + "grad_norm": 0.8900327682495117, + "learning_rate": 7.623637613303805e-06, + "loss": 0.5903, + "step": 5437 + }, + { + "epoch": 0.34, + "grad_norm": 0.8470126986503601, + "learning_rate": 7.6227641601827996e-06, + "loss": 0.553, + "step": 5438 + }, + { + "epoch": 0.34, + "grad_norm": 0.8747822642326355, + "learning_rate": 7.62189059662632e-06, + "loss": 0.6095, + "step": 5439 + }, + { + "epoch": 0.34, + "grad_norm": 0.8955729603767395, + "learning_rate": 7.621016922671147e-06, + "loss": 0.5983, + "step": 5440 + }, + { + "epoch": 0.34, + "grad_norm": 0.8502835631370544, + "learning_rate": 7.620143138354072e-06, + "loss": 0.5978, + "step": 5441 + }, + { + "epoch": 0.34, + "grad_norm": 0.8627199530601501, + "learning_rate": 7.6192692437118825e-06, + "loss": 0.6227, + "step": 5442 + }, + { + "epoch": 0.34, + "grad_norm": 0.930798351764679, + "learning_rate": 7.618395238781377e-06, + "loss": 0.6769, + "step": 5443 + }, + { + "epoch": 0.34, + "grad_norm": 0.889930009841919, + "learning_rate": 7.617521123599356e-06, + "loss": 0.6135, + "step": 5444 + }, + { + "epoch": 0.34, + "grad_norm": 0.910830557346344, + "learning_rate": 7.616646898202629e-06, + "loss": 0.6337, + "step": 5445 + }, + { + "epoch": 0.35, + "grad_norm": 0.867741048336029, + "learning_rate": 7.6157725626280014e-06, + "loss": 0.5566, + "step": 5446 + }, + { + "epoch": 0.35, + "grad_norm": 0.8112003207206726, + "learning_rate": 7.61489811691229e-06, + "loss": 0.57, + "step": 5447 + }, + { + "epoch": 0.35, + "grad_norm": 0.9317660927772522, + "learning_rate": 7.614023561092319e-06, + "loss": 0.6141, + "step": 5448 + }, + { + "epoch": 0.35, + "grad_norm": 0.8938388228416443, + "learning_rate": 7.613148895204906e-06, + "loss": 0.6114, + "step": 5449 + }, + { + "epoch": 0.35, + "grad_norm": 0.8985342979431152, + "learning_rate": 7.612274119286884e-06, + "loss": 0.626, + "step": 5450 + }, + { + "epoch": 0.35, + "grad_norm": 0.9427514672279358, + "learning_rate": 7.611399233375087e-06, + "loss": 0.6303, + "step": 5451 + }, + { + "epoch": 0.35, + "grad_norm": 0.9037792682647705, + "learning_rate": 7.610524237506354e-06, + "loss": 0.6456, + "step": 5452 + }, + { + "epoch": 0.35, + "grad_norm": 0.8891815543174744, + "learning_rate": 7.6096491317175246e-06, + "loss": 0.6235, + "step": 5453 + }, + { + "epoch": 0.35, + "grad_norm": 0.8519503474235535, + "learning_rate": 7.608773916045449e-06, + "loss": 0.5835, + "step": 5454 + }, + { + "epoch": 0.35, + "grad_norm": 0.8248928785324097, + "learning_rate": 7.607898590526979e-06, + "loss": 0.5891, + "step": 5455 + }, + { + "epoch": 0.35, + "grad_norm": 0.8195099234580994, + "learning_rate": 7.607023155198973e-06, + "loss": 0.5548, + "step": 5456 + }, + { + "epoch": 0.35, + "grad_norm": 0.8967714309692383, + "learning_rate": 7.606147610098289e-06, + "loss": 0.6207, + "step": 5457 + }, + { + "epoch": 0.35, + "grad_norm": 0.8687184453010559, + "learning_rate": 7.605271955261796e-06, + "loss": 0.6149, + "step": 5458 + }, + { + "epoch": 0.35, + "grad_norm": 0.9524543285369873, + "learning_rate": 7.604396190726364e-06, + "loss": 0.5933, + "step": 5459 + }, + { + "epoch": 0.35, + "grad_norm": 0.9514956474304199, + "learning_rate": 7.603520316528869e-06, + "loss": 0.6466, + "step": 5460 + }, + { + "epoch": 0.35, + "grad_norm": 0.89705491065979, + "learning_rate": 7.60264433270619e-06, + "loss": 0.608, + "step": 5461 + }, + { + "epoch": 0.35, + "grad_norm": 0.803554117679596, + "learning_rate": 7.601768239295213e-06, + "loss": 0.5203, + "step": 5462 + }, + { + "epoch": 0.35, + "grad_norm": 0.9711521863937378, + "learning_rate": 7.600892036332825e-06, + "loss": 0.6572, + "step": 5463 + }, + { + "epoch": 0.35, + "grad_norm": 0.8963906168937683, + "learning_rate": 7.600015723855922e-06, + "loss": 0.6329, + "step": 5464 + }, + { + "epoch": 0.35, + "grad_norm": 0.9670395255088806, + "learning_rate": 7.599139301901401e-06, + "loss": 0.6172, + "step": 5465 + }, + { + "epoch": 0.35, + "grad_norm": 0.9355558156967163, + "learning_rate": 7.5982627705061666e-06, + "loss": 0.6574, + "step": 5466 + }, + { + "epoch": 0.35, + "grad_norm": 0.8632118105888367, + "learning_rate": 7.597386129707126e-06, + "loss": 0.6021, + "step": 5467 + }, + { + "epoch": 0.35, + "grad_norm": 0.8859368562698364, + "learning_rate": 7.596509379541191e-06, + "loss": 0.5763, + "step": 5468 + }, + { + "epoch": 0.35, + "grad_norm": 0.8613402843475342, + "learning_rate": 7.595632520045277e-06, + "loss": 0.6077, + "step": 5469 + }, + { + "epoch": 0.35, + "grad_norm": 0.8863072395324707, + "learning_rate": 7.594755551256308e-06, + "loss": 0.5881, + "step": 5470 + }, + { + "epoch": 0.35, + "grad_norm": 0.9150487780570984, + "learning_rate": 7.593878473211209e-06, + "loss": 0.6351, + "step": 5471 + }, + { + "epoch": 0.35, + "grad_norm": 0.8424960970878601, + "learning_rate": 7.593001285946913e-06, + "loss": 0.5416, + "step": 5472 + }, + { + "epoch": 0.35, + "grad_norm": 0.8403632640838623, + "learning_rate": 7.592123989500351e-06, + "loss": 0.6015, + "step": 5473 + }, + { + "epoch": 0.35, + "grad_norm": 0.9487394094467163, + "learning_rate": 7.591246583908465e-06, + "loss": 0.6518, + "step": 5474 + }, + { + "epoch": 0.35, + "grad_norm": 0.915139377117157, + "learning_rate": 7.590369069208201e-06, + "loss": 0.6258, + "step": 5475 + }, + { + "epoch": 0.35, + "grad_norm": 0.8754032254219055, + "learning_rate": 7.589491445436505e-06, + "loss": 0.6127, + "step": 5476 + }, + { + "epoch": 0.35, + "grad_norm": 0.8421617746353149, + "learning_rate": 7.588613712630334e-06, + "loss": 0.5761, + "step": 5477 + }, + { + "epoch": 0.35, + "grad_norm": 0.8702454566955566, + "learning_rate": 7.587735870826643e-06, + "loss": 0.5819, + "step": 5478 + }, + { + "epoch": 0.35, + "grad_norm": 0.8792976140975952, + "learning_rate": 7.586857920062399e-06, + "loss": 0.6374, + "step": 5479 + }, + { + "epoch": 0.35, + "grad_norm": 0.9013099074363708, + "learning_rate": 7.585979860374566e-06, + "loss": 0.6053, + "step": 5480 + }, + { + "epoch": 0.35, + "grad_norm": 0.8370474576950073, + "learning_rate": 7.5851016918001165e-06, + "loss": 0.5803, + "step": 5481 + }, + { + "epoch": 0.35, + "grad_norm": 0.8300336003303528, + "learning_rate": 7.584223414376028e-06, + "loss": 0.5983, + "step": 5482 + }, + { + "epoch": 0.35, + "grad_norm": 0.9231306910514832, + "learning_rate": 7.583345028139282e-06, + "loss": 0.6231, + "step": 5483 + }, + { + "epoch": 0.35, + "grad_norm": 0.8919202089309692, + "learning_rate": 7.582466533126863e-06, + "loss": 0.6033, + "step": 5484 + }, + { + "epoch": 0.35, + "grad_norm": 0.8878291845321655, + "learning_rate": 7.581587929375761e-06, + "loss": 0.6483, + "step": 5485 + }, + { + "epoch": 0.35, + "grad_norm": 0.7955220341682434, + "learning_rate": 7.580709216922973e-06, + "loss": 0.6065, + "step": 5486 + }, + { + "epoch": 0.35, + "grad_norm": 0.9067592620849609, + "learning_rate": 7.579830395805499e-06, + "loss": 0.6261, + "step": 5487 + }, + { + "epoch": 0.35, + "grad_norm": 0.9961644411087036, + "learning_rate": 7.578951466060341e-06, + "loss": 0.6041, + "step": 5488 + }, + { + "epoch": 0.35, + "grad_norm": 0.8630528450012207, + "learning_rate": 7.578072427724506e-06, + "loss": 0.5756, + "step": 5489 + }, + { + "epoch": 0.35, + "grad_norm": 0.8708525896072388, + "learning_rate": 7.577193280835011e-06, + "loss": 0.6126, + "step": 5490 + }, + { + "epoch": 0.35, + "grad_norm": 0.8305570483207703, + "learning_rate": 7.5763140254288716e-06, + "loss": 0.5874, + "step": 5491 + }, + { + "epoch": 0.35, + "grad_norm": 0.9040376543998718, + "learning_rate": 7.575434661543113e-06, + "loss": 0.6401, + "step": 5492 + }, + { + "epoch": 0.35, + "grad_norm": 0.9144179224967957, + "learning_rate": 7.574555189214756e-06, + "loss": 0.6298, + "step": 5493 + }, + { + "epoch": 0.35, + "grad_norm": 0.9132001399993896, + "learning_rate": 7.573675608480841e-06, + "loss": 0.5974, + "step": 5494 + }, + { + "epoch": 0.35, + "grad_norm": 0.8850140571594238, + "learning_rate": 7.5727959193783974e-06, + "loss": 0.6335, + "step": 5495 + }, + { + "epoch": 0.35, + "grad_norm": 0.9785036444664001, + "learning_rate": 7.571916121944467e-06, + "loss": 0.5492, + "step": 5496 + }, + { + "epoch": 0.35, + "grad_norm": 0.8609431385993958, + "learning_rate": 7.571036216216097e-06, + "loss": 0.5885, + "step": 5497 + }, + { + "epoch": 0.35, + "grad_norm": 0.9320406317710876, + "learning_rate": 7.570156202230335e-06, + "loss": 0.6477, + "step": 5498 + }, + { + "epoch": 0.35, + "grad_norm": 0.8788042664527893, + "learning_rate": 7.569276080024237e-06, + "loss": 0.6574, + "step": 5499 + }, + { + "epoch": 0.35, + "grad_norm": 0.8510634899139404, + "learning_rate": 7.5683958496348596e-06, + "loss": 0.6256, + "step": 5500 + }, + { + "epoch": 0.35, + "grad_norm": 0.8864413499832153, + "learning_rate": 7.567515511099268e-06, + "loss": 0.5793, + "step": 5501 + }, + { + "epoch": 0.35, + "grad_norm": 0.860865592956543, + "learning_rate": 7.56663506445453e-06, + "loss": 0.6229, + "step": 5502 + }, + { + "epoch": 0.35, + "grad_norm": 0.913250744342804, + "learning_rate": 7.5657545097377205e-06, + "loss": 0.5914, + "step": 5503 + }, + { + "epoch": 0.35, + "grad_norm": 0.8102872967720032, + "learning_rate": 7.564873846985912e-06, + "loss": 0.5833, + "step": 5504 + }, + { + "epoch": 0.35, + "grad_norm": 0.8643232583999634, + "learning_rate": 7.563993076236189e-06, + "loss": 0.6061, + "step": 5505 + }, + { + "epoch": 0.35, + "grad_norm": 0.9757564067840576, + "learning_rate": 7.563112197525637e-06, + "loss": 0.6515, + "step": 5506 + }, + { + "epoch": 0.35, + "grad_norm": 0.8703305721282959, + "learning_rate": 7.562231210891347e-06, + "loss": 0.5819, + "step": 5507 + }, + { + "epoch": 0.35, + "grad_norm": 0.8819752931594849, + "learning_rate": 7.561350116370413e-06, + "loss": 0.5966, + "step": 5508 + }, + { + "epoch": 0.35, + "grad_norm": 0.8967403173446655, + "learning_rate": 7.560468913999937e-06, + "loss": 0.6338, + "step": 5509 + }, + { + "epoch": 0.35, + "grad_norm": 0.8586651682853699, + "learning_rate": 7.559587603817022e-06, + "loss": 0.6135, + "step": 5510 + }, + { + "epoch": 0.35, + "grad_norm": 0.8888817429542542, + "learning_rate": 7.558706185858777e-06, + "loss": 0.6236, + "step": 5511 + }, + { + "epoch": 0.35, + "grad_norm": 0.8927393555641174, + "learning_rate": 7.557824660162316e-06, + "loss": 0.6012, + "step": 5512 + }, + { + "epoch": 0.35, + "grad_norm": 0.9518846273422241, + "learning_rate": 7.556943026764756e-06, + "loss": 0.5581, + "step": 5513 + }, + { + "epoch": 0.35, + "grad_norm": 0.8957030773162842, + "learning_rate": 7.55606128570322e-06, + "loss": 0.5849, + "step": 5514 + }, + { + "epoch": 0.35, + "grad_norm": 0.9107878804206848, + "learning_rate": 7.5551794370148366e-06, + "loss": 0.6504, + "step": 5515 + }, + { + "epoch": 0.35, + "grad_norm": 0.8559346795082092, + "learning_rate": 7.554297480736734e-06, + "loss": 0.5891, + "step": 5516 + }, + { + "epoch": 0.35, + "grad_norm": 0.8798370361328125, + "learning_rate": 7.553415416906051e-06, + "loss": 0.6028, + "step": 5517 + }, + { + "epoch": 0.35, + "grad_norm": 0.9414769411087036, + "learning_rate": 7.552533245559927e-06, + "loss": 0.6174, + "step": 5518 + }, + { + "epoch": 0.35, + "grad_norm": 0.8583175539970398, + "learning_rate": 7.551650966735509e-06, + "loss": 0.5641, + "step": 5519 + }, + { + "epoch": 0.35, + "grad_norm": 0.8779864311218262, + "learning_rate": 7.550768580469945e-06, + "loss": 0.6283, + "step": 5520 + }, + { + "epoch": 0.35, + "grad_norm": 0.8857389092445374, + "learning_rate": 7.549886086800389e-06, + "loss": 0.5855, + "step": 5521 + }, + { + "epoch": 0.35, + "grad_norm": 0.8128264546394348, + "learning_rate": 7.549003485763999e-06, + "loss": 0.4986, + "step": 5522 + }, + { + "epoch": 0.35, + "grad_norm": 0.9185560941696167, + "learning_rate": 7.548120777397941e-06, + "loss": 0.6204, + "step": 5523 + }, + { + "epoch": 0.35, + "grad_norm": 0.9126561284065247, + "learning_rate": 7.547237961739382e-06, + "loss": 0.6516, + "step": 5524 + }, + { + "epoch": 0.35, + "grad_norm": 0.8364182114601135, + "learning_rate": 7.546355038825492e-06, + "loss": 0.573, + "step": 5525 + }, + { + "epoch": 0.35, + "grad_norm": 0.8545491099357605, + "learning_rate": 7.545472008693451e-06, + "loss": 0.6251, + "step": 5526 + }, + { + "epoch": 0.35, + "grad_norm": 0.9368882775306702, + "learning_rate": 7.544588871380439e-06, + "loss": 0.6421, + "step": 5527 + }, + { + "epoch": 0.35, + "grad_norm": 0.8525586128234863, + "learning_rate": 7.54370562692364e-06, + "loss": 0.6311, + "step": 5528 + }, + { + "epoch": 0.35, + "grad_norm": 0.8583645820617676, + "learning_rate": 7.542822275360246e-06, + "loss": 0.6295, + "step": 5529 + }, + { + "epoch": 0.35, + "grad_norm": 0.9096074104309082, + "learning_rate": 7.541938816727453e-06, + "loss": 0.6628, + "step": 5530 + }, + { + "epoch": 0.35, + "grad_norm": 0.8697735071182251, + "learning_rate": 7.5410552510624594e-06, + "loss": 0.5986, + "step": 5531 + }, + { + "epoch": 0.35, + "grad_norm": 0.869107186794281, + "learning_rate": 7.540171578402466e-06, + "loss": 0.6293, + "step": 5532 + }, + { + "epoch": 0.35, + "grad_norm": 0.8785176873207092, + "learning_rate": 7.539287798784688e-06, + "loss": 0.5971, + "step": 5533 + }, + { + "epoch": 0.35, + "grad_norm": 0.9223856329917908, + "learning_rate": 7.538403912246333e-06, + "loss": 0.5812, + "step": 5534 + }, + { + "epoch": 0.35, + "grad_norm": 0.8824152946472168, + "learning_rate": 7.537519918824619e-06, + "loss": 0.5718, + "step": 5535 + }, + { + "epoch": 0.35, + "grad_norm": 0.8068228960037231, + "learning_rate": 7.5366358185567676e-06, + "loss": 0.5295, + "step": 5536 + }, + { + "epoch": 0.35, + "grad_norm": 0.8570433259010315, + "learning_rate": 7.5357516114800075e-06, + "loss": 0.6182, + "step": 5537 + }, + { + "epoch": 0.35, + "grad_norm": 0.8204308748245239, + "learning_rate": 7.534867297631569e-06, + "loss": 0.5698, + "step": 5538 + }, + { + "epoch": 0.35, + "grad_norm": 0.9290466904640198, + "learning_rate": 7.533982877048685e-06, + "loss": 0.6612, + "step": 5539 + }, + { + "epoch": 0.35, + "grad_norm": 0.925410807132721, + "learning_rate": 7.5330983497685975e-06, + "loss": 0.6015, + "step": 5540 + }, + { + "epoch": 0.35, + "grad_norm": 0.8890109658241272, + "learning_rate": 7.532213715828551e-06, + "loss": 0.5958, + "step": 5541 + }, + { + "epoch": 0.35, + "grad_norm": 0.8188264966011047, + "learning_rate": 7.531328975265795e-06, + "loss": 0.6184, + "step": 5542 + }, + { + "epoch": 0.35, + "grad_norm": 0.8602173328399658, + "learning_rate": 7.53044412811758e-06, + "loss": 0.5672, + "step": 5543 + }, + { + "epoch": 0.35, + "grad_norm": 0.8795886039733887, + "learning_rate": 7.529559174421167e-06, + "loss": 0.6153, + "step": 5544 + }, + { + "epoch": 0.35, + "grad_norm": 0.8424326777458191, + "learning_rate": 7.528674114213816e-06, + "loss": 0.6177, + "step": 5545 + }, + { + "epoch": 0.35, + "grad_norm": 0.9181726574897766, + "learning_rate": 7.527788947532795e-06, + "loss": 0.6457, + "step": 5546 + }, + { + "epoch": 0.35, + "grad_norm": 0.980117678642273, + "learning_rate": 7.526903674415373e-06, + "loss": 0.6007, + "step": 5547 + }, + { + "epoch": 0.35, + "grad_norm": 0.9220601916313171, + "learning_rate": 7.526018294898832e-06, + "loss": 0.6301, + "step": 5548 + }, + { + "epoch": 0.35, + "grad_norm": 0.9367707371711731, + "learning_rate": 7.525132809020443e-06, + "loss": 0.5758, + "step": 5549 + }, + { + "epoch": 0.35, + "grad_norm": 0.8081425428390503, + "learning_rate": 7.524247216817499e-06, + "loss": 0.5754, + "step": 5550 + }, + { + "epoch": 0.35, + "grad_norm": 0.8742004632949829, + "learning_rate": 7.5233615183272836e-06, + "loss": 0.5852, + "step": 5551 + }, + { + "epoch": 0.35, + "grad_norm": 0.9098623394966125, + "learning_rate": 7.522475713587095e-06, + "loss": 0.6307, + "step": 5552 + }, + { + "epoch": 0.35, + "grad_norm": 0.8602703809738159, + "learning_rate": 7.521589802634228e-06, + "loss": 0.633, + "step": 5553 + }, + { + "epoch": 0.35, + "grad_norm": 0.8878544569015503, + "learning_rate": 7.520703785505987e-06, + "loss": 0.6116, + "step": 5554 + }, + { + "epoch": 0.35, + "grad_norm": 0.8622645735740662, + "learning_rate": 7.519817662239678e-06, + "loss": 0.5874, + "step": 5555 + }, + { + "epoch": 0.35, + "grad_norm": 0.8816009759902954, + "learning_rate": 7.518931432872614e-06, + "loss": 0.6309, + "step": 5556 + }, + { + "epoch": 0.35, + "grad_norm": 0.8535116910934448, + "learning_rate": 7.518045097442111e-06, + "loss": 0.6286, + "step": 5557 + }, + { + "epoch": 0.35, + "grad_norm": 0.9062272310256958, + "learning_rate": 7.517158655985483e-06, + "loss": 0.6187, + "step": 5558 + }, + { + "epoch": 0.35, + "grad_norm": 0.9796926975250244, + "learning_rate": 7.516272108540066e-06, + "loss": 0.6543, + "step": 5559 + }, + { + "epoch": 0.35, + "grad_norm": 0.9051242470741272, + "learning_rate": 7.515385455143183e-06, + "loss": 0.6491, + "step": 5560 + }, + { + "epoch": 0.35, + "grad_norm": 0.935102105140686, + "learning_rate": 7.514498695832169e-06, + "loss": 0.6305, + "step": 5561 + }, + { + "epoch": 0.35, + "grad_norm": 0.8482328653335571, + "learning_rate": 7.51361183064436e-06, + "loss": 0.6013, + "step": 5562 + }, + { + "epoch": 0.35, + "grad_norm": 0.9039483070373535, + "learning_rate": 7.512724859617103e-06, + "loss": 0.591, + "step": 5563 + }, + { + "epoch": 0.35, + "grad_norm": 0.924065113067627, + "learning_rate": 7.511837782787743e-06, + "loss": 0.5901, + "step": 5564 + }, + { + "epoch": 0.35, + "grad_norm": 0.8983739614486694, + "learning_rate": 7.510950600193632e-06, + "loss": 0.5847, + "step": 5565 + }, + { + "epoch": 0.35, + "grad_norm": 0.8916130065917969, + "learning_rate": 7.510063311872125e-06, + "loss": 0.5815, + "step": 5566 + }, + { + "epoch": 0.35, + "grad_norm": 0.9395748972892761, + "learning_rate": 7.509175917860586e-06, + "loss": 0.6353, + "step": 5567 + }, + { + "epoch": 0.35, + "grad_norm": 0.8860333561897278, + "learning_rate": 7.508288418196377e-06, + "loss": 0.6206, + "step": 5568 + }, + { + "epoch": 0.35, + "grad_norm": 0.8644207715988159, + "learning_rate": 7.507400812916868e-06, + "loss": 0.5608, + "step": 5569 + }, + { + "epoch": 0.35, + "grad_norm": 0.8900479674339294, + "learning_rate": 7.5065131020594316e-06, + "loss": 0.6308, + "step": 5570 + }, + { + "epoch": 0.35, + "grad_norm": 0.862021267414093, + "learning_rate": 7.5056252856614505e-06, + "loss": 0.5858, + "step": 5571 + }, + { + "epoch": 0.35, + "grad_norm": 0.90825355052948, + "learning_rate": 7.504737363760306e-06, + "loss": 0.6993, + "step": 5572 + }, + { + "epoch": 0.35, + "grad_norm": 0.9253191351890564, + "learning_rate": 7.503849336393382e-06, + "loss": 0.6081, + "step": 5573 + }, + { + "epoch": 0.35, + "grad_norm": 0.9334720969200134, + "learning_rate": 7.502961203598074e-06, + "loss": 0.6203, + "step": 5574 + }, + { + "epoch": 0.35, + "grad_norm": 0.861369252204895, + "learning_rate": 7.502072965411776e-06, + "loss": 0.5873, + "step": 5575 + }, + { + "epoch": 0.35, + "grad_norm": 0.941525399684906, + "learning_rate": 7.501184621871891e-06, + "loss": 0.5849, + "step": 5576 + }, + { + "epoch": 0.35, + "grad_norm": 0.9132643342018127, + "learning_rate": 7.5002961730158204e-06, + "loss": 0.5786, + "step": 5577 + }, + { + "epoch": 0.35, + "grad_norm": 0.8970744013786316, + "learning_rate": 7.499407618880979e-06, + "loss": 0.6126, + "step": 5578 + }, + { + "epoch": 0.35, + "grad_norm": 0.8514313697814941, + "learning_rate": 7.498518959504775e-06, + "loss": 0.6322, + "step": 5579 + }, + { + "epoch": 0.35, + "grad_norm": 0.8997253775596619, + "learning_rate": 7.49763019492463e-06, + "loss": 0.6461, + "step": 5580 + }, + { + "epoch": 0.35, + "grad_norm": 0.8769670128822327, + "learning_rate": 7.4967413251779655e-06, + "loss": 0.6362, + "step": 5581 + }, + { + "epoch": 0.35, + "grad_norm": 0.928396999835968, + "learning_rate": 7.495852350302209e-06, + "loss": 0.6229, + "step": 5582 + }, + { + "epoch": 0.35, + "grad_norm": 0.8975219130516052, + "learning_rate": 7.494963270334794e-06, + "loss": 0.6457, + "step": 5583 + }, + { + "epoch": 0.35, + "grad_norm": 0.8608077764511108, + "learning_rate": 7.494074085313155e-06, + "loss": 0.5369, + "step": 5584 + }, + { + "epoch": 0.35, + "grad_norm": 0.917822003364563, + "learning_rate": 7.493184795274731e-06, + "loss": 0.6064, + "step": 5585 + }, + { + "epoch": 0.35, + "grad_norm": 0.9204185605049133, + "learning_rate": 7.49229540025697e-06, + "loss": 0.7078, + "step": 5586 + }, + { + "epoch": 0.35, + "grad_norm": 0.8705748915672302, + "learning_rate": 7.4914059002973185e-06, + "loss": 0.6384, + "step": 5587 + }, + { + "epoch": 0.35, + "grad_norm": 0.8483352661132812, + "learning_rate": 7.490516295433232e-06, + "loss": 0.5437, + "step": 5588 + }, + { + "epoch": 0.35, + "grad_norm": 0.8893619179725647, + "learning_rate": 7.489626585702169e-06, + "loss": 0.5999, + "step": 5589 + }, + { + "epoch": 0.35, + "grad_norm": 0.8645599484443665, + "learning_rate": 7.4887367711415905e-06, + "loss": 0.6121, + "step": 5590 + }, + { + "epoch": 0.35, + "grad_norm": 0.8719490766525269, + "learning_rate": 7.487846851788965e-06, + "loss": 0.6051, + "step": 5591 + }, + { + "epoch": 0.35, + "grad_norm": 0.9109401702880859, + "learning_rate": 7.486956827681761e-06, + "loss": 0.616, + "step": 5592 + }, + { + "epoch": 0.35, + "grad_norm": 0.9400895237922668, + "learning_rate": 7.4860666988574585e-06, + "loss": 0.6035, + "step": 5593 + }, + { + "epoch": 0.35, + "grad_norm": 0.8858636021614075, + "learning_rate": 7.485176465353534e-06, + "loss": 0.5885, + "step": 5594 + }, + { + "epoch": 0.35, + "grad_norm": 0.7887114882469177, + "learning_rate": 7.484286127207476e-06, + "loss": 0.5434, + "step": 5595 + }, + { + "epoch": 0.35, + "grad_norm": 0.8736209869384766, + "learning_rate": 7.48339568445677e-06, + "loss": 0.6051, + "step": 5596 + }, + { + "epoch": 0.35, + "grad_norm": 0.8536117672920227, + "learning_rate": 7.482505137138911e-06, + "loss": 0.6083, + "step": 5597 + }, + { + "epoch": 0.35, + "grad_norm": 0.9169653654098511, + "learning_rate": 7.4816144852913975e-06, + "loss": 0.6361, + "step": 5598 + }, + { + "epoch": 0.35, + "grad_norm": 0.9062714576721191, + "learning_rate": 7.480723728951731e-06, + "loss": 0.6284, + "step": 5599 + }, + { + "epoch": 0.35, + "grad_norm": 0.8766511678695679, + "learning_rate": 7.479832868157416e-06, + "loss": 0.6035, + "step": 5600 + }, + { + "epoch": 0.35, + "grad_norm": 0.8506543636322021, + "learning_rate": 7.4789419029459675e-06, + "loss": 0.5387, + "step": 5601 + }, + { + "epoch": 0.35, + "grad_norm": 0.8686463236808777, + "learning_rate": 7.478050833354897e-06, + "loss": 0.6335, + "step": 5602 + }, + { + "epoch": 0.35, + "grad_norm": 0.8849419951438904, + "learning_rate": 7.47715965942173e-06, + "loss": 0.5983, + "step": 5603 + }, + { + "epoch": 0.36, + "grad_norm": 0.8478937745094299, + "learning_rate": 7.476268381183984e-06, + "loss": 0.5266, + "step": 5604 + }, + { + "epoch": 0.36, + "grad_norm": 0.9055560827255249, + "learning_rate": 7.475376998679193e-06, + "loss": 0.6483, + "step": 5605 + }, + { + "epoch": 0.36, + "grad_norm": 0.9069551825523376, + "learning_rate": 7.474485511944887e-06, + "loss": 0.6147, + "step": 5606 + }, + { + "epoch": 0.36, + "grad_norm": 0.8674218058586121, + "learning_rate": 7.4735939210186036e-06, + "loss": 0.5723, + "step": 5607 + }, + { + "epoch": 0.36, + "grad_norm": 0.8519677519798279, + "learning_rate": 7.472702225937884e-06, + "loss": 0.5941, + "step": 5608 + }, + { + "epoch": 0.36, + "grad_norm": 0.9071281552314758, + "learning_rate": 7.471810426740278e-06, + "loss": 0.5995, + "step": 5609 + }, + { + "epoch": 0.36, + "grad_norm": 0.8679485321044922, + "learning_rate": 7.470918523463333e-06, + "loss": 0.5833, + "step": 5610 + }, + { + "epoch": 0.36, + "grad_norm": 0.8723646402359009, + "learning_rate": 7.470026516144604e-06, + "loss": 0.6437, + "step": 5611 + }, + { + "epoch": 0.36, + "grad_norm": 0.8494699001312256, + "learning_rate": 7.469134404821652e-06, + "loss": 0.5599, + "step": 5612 + }, + { + "epoch": 0.36, + "grad_norm": 0.9291670322418213, + "learning_rate": 7.468242189532039e-06, + "loss": 0.5898, + "step": 5613 + }, + { + "epoch": 0.36, + "grad_norm": 0.9132770895957947, + "learning_rate": 7.467349870313334e-06, + "loss": 0.5415, + "step": 5614 + }, + { + "epoch": 0.36, + "grad_norm": 0.8860681653022766, + "learning_rate": 7.466457447203109e-06, + "loss": 0.5958, + "step": 5615 + }, + { + "epoch": 0.36, + "grad_norm": 0.8410258293151855, + "learning_rate": 7.465564920238941e-06, + "loss": 0.5857, + "step": 5616 + }, + { + "epoch": 0.36, + "grad_norm": 0.8339051008224487, + "learning_rate": 7.464672289458411e-06, + "loss": 0.5913, + "step": 5617 + }, + { + "epoch": 0.36, + "grad_norm": 0.9605398774147034, + "learning_rate": 7.463779554899107e-06, + "loss": 0.6138, + "step": 5618 + }, + { + "epoch": 0.36, + "grad_norm": 0.8939738273620605, + "learning_rate": 7.462886716598614e-06, + "loss": 0.6042, + "step": 5619 + }, + { + "epoch": 0.36, + "grad_norm": 0.842354953289032, + "learning_rate": 7.46199377459453e-06, + "loss": 0.6018, + "step": 5620 + }, + { + "epoch": 0.36, + "grad_norm": 0.9019548892974854, + "learning_rate": 7.461100728924455e-06, + "loss": 0.556, + "step": 5621 + }, + { + "epoch": 0.36, + "grad_norm": 0.8871820569038391, + "learning_rate": 7.460207579625988e-06, + "loss": 0.5807, + "step": 5622 + }, + { + "epoch": 0.36, + "grad_norm": 0.8736592531204224, + "learning_rate": 7.459314326736738e-06, + "loss": 0.5672, + "step": 5623 + }, + { + "epoch": 0.36, + "grad_norm": 0.9413090348243713, + "learning_rate": 7.458420970294317e-06, + "loss": 0.6163, + "step": 5624 + }, + { + "epoch": 0.36, + "grad_norm": 0.8506051301956177, + "learning_rate": 7.457527510336342e-06, + "loss": 0.5363, + "step": 5625 + }, + { + "epoch": 0.36, + "grad_norm": 0.869382381439209, + "learning_rate": 7.456633946900432e-06, + "loss": 0.6099, + "step": 5626 + }, + { + "epoch": 0.36, + "grad_norm": 0.8384730219841003, + "learning_rate": 7.455740280024212e-06, + "loss": 0.5834, + "step": 5627 + }, + { + "epoch": 0.36, + "grad_norm": 0.8252652883529663, + "learning_rate": 7.454846509745311e-06, + "loss": 0.5918, + "step": 5628 + }, + { + "epoch": 0.36, + "grad_norm": 0.9574599862098694, + "learning_rate": 7.453952636101366e-06, + "loss": 0.6747, + "step": 5629 + }, + { + "epoch": 0.36, + "grad_norm": 0.8753709197044373, + "learning_rate": 7.45305865913001e-06, + "loss": 0.6559, + "step": 5630 + }, + { + "epoch": 0.36, + "grad_norm": 0.9628907442092896, + "learning_rate": 7.452164578868889e-06, + "loss": 0.6476, + "step": 5631 + }, + { + "epoch": 0.36, + "grad_norm": 0.9094507098197937, + "learning_rate": 7.451270395355647e-06, + "loss": 0.6579, + "step": 5632 + }, + { + "epoch": 0.36, + "grad_norm": 0.7960030436515808, + "learning_rate": 7.450376108627937e-06, + "loss": 0.5376, + "step": 5633 + }, + { + "epoch": 0.36, + "grad_norm": 0.8576752543449402, + "learning_rate": 7.449481718723411e-06, + "loss": 0.616, + "step": 5634 + }, + { + "epoch": 0.36, + "grad_norm": 0.8710610866546631, + "learning_rate": 7.448587225679733e-06, + "loss": 0.6292, + "step": 5635 + }, + { + "epoch": 0.36, + "grad_norm": 0.9258856177330017, + "learning_rate": 7.447692629534565e-06, + "loss": 0.5753, + "step": 5636 + }, + { + "epoch": 0.36, + "grad_norm": 0.911663830280304, + "learning_rate": 7.446797930325574e-06, + "loss": 0.6113, + "step": 5637 + }, + { + "epoch": 0.36, + "grad_norm": 0.8927462697029114, + "learning_rate": 7.445903128090435e-06, + "loss": 0.5843, + "step": 5638 + }, + { + "epoch": 0.36, + "grad_norm": 0.9059770703315735, + "learning_rate": 7.445008222866823e-06, + "loss": 0.5649, + "step": 5639 + }, + { + "epoch": 0.36, + "grad_norm": 0.8566960692405701, + "learning_rate": 7.444113214692422e-06, + "loss": 0.5713, + "step": 5640 + }, + { + "epoch": 0.36, + "grad_norm": 0.9214499592781067, + "learning_rate": 7.443218103604915e-06, + "loss": 0.5919, + "step": 5641 + }, + { + "epoch": 0.36, + "grad_norm": 0.9560672640800476, + "learning_rate": 7.442322889641992e-06, + "loss": 0.6563, + "step": 5642 + }, + { + "epoch": 0.36, + "grad_norm": 0.9713243246078491, + "learning_rate": 7.441427572841349e-06, + "loss": 0.6801, + "step": 5643 + }, + { + "epoch": 0.36, + "grad_norm": 0.8504186272621155, + "learning_rate": 7.440532153240685e-06, + "loss": 0.5809, + "step": 5644 + }, + { + "epoch": 0.36, + "grad_norm": 0.8800424337387085, + "learning_rate": 7.4396366308777015e-06, + "loss": 0.6323, + "step": 5645 + }, + { + "epoch": 0.36, + "grad_norm": 0.8435956239700317, + "learning_rate": 7.4387410057901056e-06, + "loss": 0.5616, + "step": 5646 + }, + { + "epoch": 0.36, + "grad_norm": 0.8319722414016724, + "learning_rate": 7.4378452780156094e-06, + "loss": 0.5398, + "step": 5647 + }, + { + "epoch": 0.36, + "grad_norm": 0.9279896020889282, + "learning_rate": 7.436949447591931e-06, + "loss": 0.5779, + "step": 5648 + }, + { + "epoch": 0.36, + "grad_norm": 0.8527793884277344, + "learning_rate": 7.4360535145567865e-06, + "loss": 0.5943, + "step": 5649 + }, + { + "epoch": 0.36, + "grad_norm": 0.8525310754776001, + "learning_rate": 7.435157478947905e-06, + "loss": 0.5427, + "step": 5650 + }, + { + "epoch": 0.36, + "grad_norm": 0.8320702910423279, + "learning_rate": 7.434261340803013e-06, + "loss": 0.5816, + "step": 5651 + }, + { + "epoch": 0.36, + "grad_norm": 0.8606296181678772, + "learning_rate": 7.433365100159844e-06, + "loss": 0.582, + "step": 5652 + }, + { + "epoch": 0.36, + "grad_norm": 0.9004180431365967, + "learning_rate": 7.432468757056136e-06, + "loss": 0.581, + "step": 5653 + }, + { + "epoch": 0.36, + "grad_norm": 0.858650267124176, + "learning_rate": 7.431572311529629e-06, + "loss": 0.6042, + "step": 5654 + }, + { + "epoch": 0.36, + "grad_norm": 0.9310391545295715, + "learning_rate": 7.4306757636180725e-06, + "loss": 0.5514, + "step": 5655 + }, + { + "epoch": 0.36, + "grad_norm": 0.8439887762069702, + "learning_rate": 7.429779113359214e-06, + "loss": 0.5636, + "step": 5656 + }, + { + "epoch": 0.36, + "grad_norm": 0.890603244304657, + "learning_rate": 7.428882360790811e-06, + "loss": 0.572, + "step": 5657 + }, + { + "epoch": 0.36, + "grad_norm": 0.9427062273025513, + "learning_rate": 7.427985505950619e-06, + "loss": 0.5997, + "step": 5658 + }, + { + "epoch": 0.36, + "grad_norm": 0.9088672399520874, + "learning_rate": 7.427088548876406e-06, + "loss": 0.6665, + "step": 5659 + }, + { + "epoch": 0.36, + "grad_norm": 0.9224042892456055, + "learning_rate": 7.426191489605936e-06, + "loss": 0.596, + "step": 5660 + }, + { + "epoch": 0.36, + "grad_norm": 0.8789502382278442, + "learning_rate": 7.425294328176984e-06, + "loss": 0.567, + "step": 5661 + }, + { + "epoch": 0.36, + "grad_norm": 0.8930298686027527, + "learning_rate": 7.4243970646273236e-06, + "loss": 0.5902, + "step": 5662 + }, + { + "epoch": 0.36, + "grad_norm": 0.8686020970344543, + "learning_rate": 7.423499698994737e-06, + "loss": 0.5841, + "step": 5663 + }, + { + "epoch": 0.36, + "grad_norm": 0.8149586319923401, + "learning_rate": 7.422602231317009e-06, + "loss": 0.5857, + "step": 5664 + }, + { + "epoch": 0.36, + "grad_norm": 0.9393472075462341, + "learning_rate": 7.421704661631929e-06, + "loss": 0.6012, + "step": 5665 + }, + { + "epoch": 0.36, + "grad_norm": 0.8844897150993347, + "learning_rate": 7.42080698997729e-06, + "loss": 0.6458, + "step": 5666 + }, + { + "epoch": 0.36, + "grad_norm": 0.8492723107337952, + "learning_rate": 7.419909216390889e-06, + "loss": 0.6077, + "step": 5667 + }, + { + "epoch": 0.36, + "grad_norm": 0.8630576133728027, + "learning_rate": 7.4190113409105304e-06, + "loss": 0.5597, + "step": 5668 + }, + { + "epoch": 0.36, + "grad_norm": 0.8691625595092773, + "learning_rate": 7.418113363574018e-06, + "loss": 0.5845, + "step": 5669 + }, + { + "epoch": 0.36, + "grad_norm": 0.9205952286720276, + "learning_rate": 7.417215284419165e-06, + "loss": 0.6316, + "step": 5670 + }, + { + "epoch": 0.36, + "grad_norm": 0.9179142713546753, + "learning_rate": 7.416317103483784e-06, + "loss": 0.6545, + "step": 5671 + }, + { + "epoch": 0.36, + "grad_norm": 0.9906255602836609, + "learning_rate": 7.415418820805698e-06, + "loss": 0.5923, + "step": 5672 + }, + { + "epoch": 0.36, + "grad_norm": 0.8854468464851379, + "learning_rate": 7.414520436422725e-06, + "loss": 0.6767, + "step": 5673 + }, + { + "epoch": 0.36, + "grad_norm": 0.8236328959465027, + "learning_rate": 7.413621950372698e-06, + "loss": 0.5705, + "step": 5674 + }, + { + "epoch": 0.36, + "grad_norm": 0.8900964856147766, + "learning_rate": 7.4127233626934456e-06, + "loss": 0.615, + "step": 5675 + }, + { + "epoch": 0.36, + "grad_norm": 0.9543713927268982, + "learning_rate": 7.411824673422808e-06, + "loss": 0.6227, + "step": 5676 + }, + { + "epoch": 0.36, + "grad_norm": 0.8835585713386536, + "learning_rate": 7.410925882598621e-06, + "loss": 0.6486, + "step": 5677 + }, + { + "epoch": 0.36, + "grad_norm": 0.8607789874076843, + "learning_rate": 7.410026990258734e-06, + "loss": 0.595, + "step": 5678 + }, + { + "epoch": 0.36, + "grad_norm": 0.8899136781692505, + "learning_rate": 7.409127996440993e-06, + "loss": 0.5775, + "step": 5679 + }, + { + "epoch": 0.36, + "grad_norm": 0.9142584204673767, + "learning_rate": 7.408228901183254e-06, + "loss": 0.6192, + "step": 5680 + }, + { + "epoch": 0.36, + "grad_norm": 0.9016997218132019, + "learning_rate": 7.407329704523372e-06, + "loss": 0.6105, + "step": 5681 + }, + { + "epoch": 0.36, + "grad_norm": 0.9058455228805542, + "learning_rate": 7.406430406499212e-06, + "loss": 0.6091, + "step": 5682 + }, + { + "epoch": 0.36, + "grad_norm": 0.8867766261100769, + "learning_rate": 7.405531007148638e-06, + "loss": 0.6034, + "step": 5683 + }, + { + "epoch": 0.36, + "grad_norm": 0.8825892210006714, + "learning_rate": 7.404631506509523e-06, + "loss": 0.5718, + "step": 5684 + }, + { + "epoch": 0.36, + "grad_norm": 0.9403483867645264, + "learning_rate": 7.403731904619739e-06, + "loss": 0.6664, + "step": 5685 + }, + { + "epoch": 0.36, + "grad_norm": 0.8549020886421204, + "learning_rate": 7.402832201517166e-06, + "loss": 0.6042, + "step": 5686 + }, + { + "epoch": 0.36, + "grad_norm": 0.9207231402397156, + "learning_rate": 7.40193239723969e-06, + "loss": 0.6183, + "step": 5687 + }, + { + "epoch": 0.36, + "grad_norm": 0.8826068639755249, + "learning_rate": 7.401032491825194e-06, + "loss": 0.6157, + "step": 5688 + }, + { + "epoch": 0.36, + "grad_norm": 0.9273738265037537, + "learning_rate": 7.400132485311573e-06, + "loss": 0.6135, + "step": 5689 + }, + { + "epoch": 0.36, + "grad_norm": 0.9432485699653625, + "learning_rate": 7.399232377736722e-06, + "loss": 0.641, + "step": 5690 + }, + { + "epoch": 0.36, + "grad_norm": 0.957802951335907, + "learning_rate": 7.398332169138544e-06, + "loss": 0.614, + "step": 5691 + }, + { + "epoch": 0.36, + "grad_norm": 0.9042625427246094, + "learning_rate": 7.397431859554941e-06, + "loss": 0.6075, + "step": 5692 + }, + { + "epoch": 0.36, + "grad_norm": 0.8862330317497253, + "learning_rate": 7.396531449023821e-06, + "loss": 0.5823, + "step": 5693 + }, + { + "epoch": 0.36, + "grad_norm": 0.8898954391479492, + "learning_rate": 7.395630937583099e-06, + "loss": 0.6319, + "step": 5694 + }, + { + "epoch": 0.36, + "grad_norm": 0.8645822405815125, + "learning_rate": 7.394730325270693e-06, + "loss": 0.6029, + "step": 5695 + }, + { + "epoch": 0.36, + "grad_norm": 0.9035110473632812, + "learning_rate": 7.393829612124524e-06, + "loss": 0.6147, + "step": 5696 + }, + { + "epoch": 0.36, + "grad_norm": 0.8781278133392334, + "learning_rate": 7.392928798182516e-06, + "loss": 0.5922, + "step": 5697 + }, + { + "epoch": 0.36, + "grad_norm": 0.8469416499137878, + "learning_rate": 7.392027883482602e-06, + "loss": 0.6564, + "step": 5698 + }, + { + "epoch": 0.36, + "grad_norm": 0.8922897577285767, + "learning_rate": 7.391126868062714e-06, + "loss": 0.5904, + "step": 5699 + }, + { + "epoch": 0.36, + "grad_norm": 0.8153558969497681, + "learning_rate": 7.390225751960792e-06, + "loss": 0.5945, + "step": 5700 + }, + { + "epoch": 0.36, + "grad_norm": 0.8306947350502014, + "learning_rate": 7.389324535214779e-06, + "loss": 0.623, + "step": 5701 + }, + { + "epoch": 0.36, + "grad_norm": 0.9121823906898499, + "learning_rate": 7.388423217862621e-06, + "loss": 0.6231, + "step": 5702 + }, + { + "epoch": 0.36, + "grad_norm": 0.9361130595207214, + "learning_rate": 7.387521799942271e-06, + "loss": 0.6028, + "step": 5703 + }, + { + "epoch": 0.36, + "grad_norm": 0.8886232972145081, + "learning_rate": 7.386620281491683e-06, + "loss": 0.6612, + "step": 5704 + }, + { + "epoch": 0.36, + "grad_norm": 0.8650026917457581, + "learning_rate": 7.385718662548817e-06, + "loss": 0.6552, + "step": 5705 + }, + { + "epoch": 0.36, + "grad_norm": 0.9328054785728455, + "learning_rate": 7.384816943151638e-06, + "loss": 0.604, + "step": 5706 + }, + { + "epoch": 0.36, + "grad_norm": 0.8479319214820862, + "learning_rate": 7.383915123338113e-06, + "loss": 0.5936, + "step": 5707 + }, + { + "epoch": 0.36, + "grad_norm": 0.8170728087425232, + "learning_rate": 7.3830132031462165e-06, + "loss": 0.6188, + "step": 5708 + }, + { + "epoch": 0.36, + "grad_norm": 0.8698776364326477, + "learning_rate": 7.382111182613923e-06, + "loss": 0.5548, + "step": 5709 + }, + { + "epoch": 0.36, + "grad_norm": 0.8348639607429504, + "learning_rate": 7.381209061779214e-06, + "loss": 0.635, + "step": 5710 + }, + { + "epoch": 0.36, + "grad_norm": 0.9122574329376221, + "learning_rate": 7.380306840680076e-06, + "loss": 0.6264, + "step": 5711 + }, + { + "epoch": 0.36, + "grad_norm": 0.8634544014930725, + "learning_rate": 7.379404519354496e-06, + "loss": 0.6421, + "step": 5712 + }, + { + "epoch": 0.36, + "grad_norm": 0.830940306186676, + "learning_rate": 7.378502097840471e-06, + "loss": 0.599, + "step": 5713 + }, + { + "epoch": 0.36, + "grad_norm": 0.9275731444358826, + "learning_rate": 7.377599576175995e-06, + "loss": 0.6653, + "step": 5714 + }, + { + "epoch": 0.36, + "grad_norm": 0.9509021639823914, + "learning_rate": 7.376696954399073e-06, + "loss": 0.6107, + "step": 5715 + }, + { + "epoch": 0.36, + "grad_norm": 0.8291517496109009, + "learning_rate": 7.37579423254771e-06, + "loss": 0.6103, + "step": 5716 + }, + { + "epoch": 0.36, + "grad_norm": 0.864422082901001, + "learning_rate": 7.374891410659917e-06, + "loss": 0.6241, + "step": 5717 + }, + { + "epoch": 0.36, + "grad_norm": 0.8923708200454712, + "learning_rate": 7.373988488773708e-06, + "loss": 0.6212, + "step": 5718 + }, + { + "epoch": 0.36, + "grad_norm": 0.8855364918708801, + "learning_rate": 7.3730854669271015e-06, + "loss": 0.5703, + "step": 5719 + }, + { + "epoch": 0.36, + "grad_norm": 0.8736538290977478, + "learning_rate": 7.372182345158122e-06, + "loss": 0.6469, + "step": 5720 + }, + { + "epoch": 0.36, + "grad_norm": 0.9270285964012146, + "learning_rate": 7.3712791235047976e-06, + "loss": 0.6194, + "step": 5721 + }, + { + "epoch": 0.36, + "grad_norm": 0.9518702030181885, + "learning_rate": 7.370375802005157e-06, + "loss": 0.6458, + "step": 5722 + }, + { + "epoch": 0.36, + "grad_norm": 0.948585569858551, + "learning_rate": 7.369472380697236e-06, + "loss": 0.5592, + "step": 5723 + }, + { + "epoch": 0.36, + "grad_norm": 0.8145323991775513, + "learning_rate": 7.368568859619078e-06, + "loss": 0.5643, + "step": 5724 + }, + { + "epoch": 0.36, + "grad_norm": 0.88991379737854, + "learning_rate": 7.3676652388087234e-06, + "loss": 0.6035, + "step": 5725 + }, + { + "epoch": 0.36, + "grad_norm": 0.9013904333114624, + "learning_rate": 7.366761518304223e-06, + "loss": 0.598, + "step": 5726 + }, + { + "epoch": 0.36, + "grad_norm": 0.9081125259399414, + "learning_rate": 7.365857698143628e-06, + "loss": 0.5988, + "step": 5727 + }, + { + "epoch": 0.36, + "grad_norm": 0.9681587219238281, + "learning_rate": 7.364953778364996e-06, + "loss": 0.6415, + "step": 5728 + }, + { + "epoch": 0.36, + "grad_norm": 0.8465878367424011, + "learning_rate": 7.364049759006387e-06, + "loss": 0.6199, + "step": 5729 + }, + { + "epoch": 0.36, + "grad_norm": 0.8657549619674683, + "learning_rate": 7.363145640105867e-06, + "loss": 0.5782, + "step": 5730 + }, + { + "epoch": 0.36, + "grad_norm": 0.8750969171524048, + "learning_rate": 7.362241421701505e-06, + "loss": 0.5967, + "step": 5731 + }, + { + "epoch": 0.36, + "grad_norm": 0.9876574277877808, + "learning_rate": 7.3613371038313744e-06, + "loss": 0.6121, + "step": 5732 + }, + { + "epoch": 0.36, + "grad_norm": 0.906506359577179, + "learning_rate": 7.360432686533552e-06, + "loss": 0.6573, + "step": 5733 + }, + { + "epoch": 0.36, + "grad_norm": 0.8797792196273804, + "learning_rate": 7.359528169846121e-06, + "loss": 0.6132, + "step": 5734 + }, + { + "epoch": 0.36, + "grad_norm": 0.7947115898132324, + "learning_rate": 7.358623553807167e-06, + "loss": 0.5339, + "step": 5735 + }, + { + "epoch": 0.36, + "grad_norm": 0.8234474658966064, + "learning_rate": 7.35771883845478e-06, + "loss": 0.5606, + "step": 5736 + }, + { + "epoch": 0.36, + "grad_norm": 0.827809751033783, + "learning_rate": 7.356814023827055e-06, + "loss": 0.5549, + "step": 5737 + }, + { + "epoch": 0.36, + "grad_norm": 0.9173133373260498, + "learning_rate": 7.35590910996209e-06, + "loss": 0.608, + "step": 5738 + }, + { + "epoch": 0.36, + "grad_norm": 0.8398633599281311, + "learning_rate": 7.355004096897987e-06, + "loss": 0.5656, + "step": 5739 + }, + { + "epoch": 0.36, + "grad_norm": 0.8507029414176941, + "learning_rate": 7.354098984672856e-06, + "loss": 0.5315, + "step": 5740 + }, + { + "epoch": 0.36, + "grad_norm": 0.9494758248329163, + "learning_rate": 7.353193773324805e-06, + "loss": 0.6437, + "step": 5741 + }, + { + "epoch": 0.36, + "grad_norm": 0.8865925669670105, + "learning_rate": 7.35228846289195e-06, + "loss": 0.5627, + "step": 5742 + }, + { + "epoch": 0.36, + "grad_norm": 0.9043111205101013, + "learning_rate": 7.351383053412411e-06, + "loss": 0.6526, + "step": 5743 + }, + { + "epoch": 0.36, + "grad_norm": 0.8272423148155212, + "learning_rate": 7.350477544924313e-06, + "loss": 0.6375, + "step": 5744 + }, + { + "epoch": 0.36, + "grad_norm": 0.8952882885932922, + "learning_rate": 7.349571937465782e-06, + "loss": 0.6383, + "step": 5745 + }, + { + "epoch": 0.36, + "grad_norm": 0.9154927730560303, + "learning_rate": 7.348666231074948e-06, + "loss": 0.5916, + "step": 5746 + }, + { + "epoch": 0.36, + "grad_norm": 0.8953961730003357, + "learning_rate": 7.3477604257899515e-06, + "loss": 0.6092, + "step": 5747 + }, + { + "epoch": 0.36, + "grad_norm": 0.9233314990997314, + "learning_rate": 7.346854521648929e-06, + "loss": 0.6414, + "step": 5748 + }, + { + "epoch": 0.36, + "grad_norm": 0.8458792567253113, + "learning_rate": 7.345948518690029e-06, + "loss": 0.556, + "step": 5749 + }, + { + "epoch": 0.36, + "grad_norm": 0.9279628396034241, + "learning_rate": 7.345042416951395e-06, + "loss": 0.6147, + "step": 5750 + }, + { + "epoch": 0.36, + "grad_norm": 0.8384361267089844, + "learning_rate": 7.344136216471185e-06, + "loss": 0.5691, + "step": 5751 + }, + { + "epoch": 0.36, + "grad_norm": 0.8720436096191406, + "learning_rate": 7.343229917287552e-06, + "loss": 0.5882, + "step": 5752 + }, + { + "epoch": 0.36, + "grad_norm": 0.9607126712799072, + "learning_rate": 7.34232351943866e-06, + "loss": 0.5573, + "step": 5753 + }, + { + "epoch": 0.36, + "grad_norm": 0.8432719707489014, + "learning_rate": 7.341417022962671e-06, + "loss": 0.54, + "step": 5754 + }, + { + "epoch": 0.36, + "grad_norm": 0.9096271395683289, + "learning_rate": 7.340510427897759e-06, + "loss": 0.5467, + "step": 5755 + }, + { + "epoch": 0.36, + "grad_norm": 0.9777395725250244, + "learning_rate": 7.339603734282093e-06, + "loss": 0.6271, + "step": 5756 + }, + { + "epoch": 0.36, + "grad_norm": 0.8779467344284058, + "learning_rate": 7.338696942153855e-06, + "loss": 0.6058, + "step": 5757 + }, + { + "epoch": 0.36, + "grad_norm": 0.8599120378494263, + "learning_rate": 7.337790051551221e-06, + "loss": 0.5776, + "step": 5758 + }, + { + "epoch": 0.36, + "grad_norm": 0.8779652118682861, + "learning_rate": 7.3368830625123835e-06, + "loss": 0.6134, + "step": 5759 + }, + { + "epoch": 0.36, + "grad_norm": 0.903643012046814, + "learning_rate": 7.335975975075529e-06, + "loss": 0.6908, + "step": 5760 + }, + { + "epoch": 0.36, + "grad_norm": 0.895206093788147, + "learning_rate": 7.3350687892788505e-06, + "loss": 0.5559, + "step": 5761 + }, + { + "epoch": 0.37, + "grad_norm": 0.907238781452179, + "learning_rate": 7.33416150516055e-06, + "loss": 0.5807, + "step": 5762 + }, + { + "epoch": 0.37, + "grad_norm": 0.877465546131134, + "learning_rate": 7.333254122758828e-06, + "loss": 0.6332, + "step": 5763 + }, + { + "epoch": 0.37, + "grad_norm": 0.8640191555023193, + "learning_rate": 7.332346642111893e-06, + "loss": 0.6103, + "step": 5764 + }, + { + "epoch": 0.37, + "grad_norm": 0.8886452317237854, + "learning_rate": 7.331439063257953e-06, + "loss": 0.6206, + "step": 5765 + }, + { + "epoch": 0.37, + "grad_norm": 0.8364898562431335, + "learning_rate": 7.330531386235225e-06, + "loss": 0.5864, + "step": 5766 + }, + { + "epoch": 0.37, + "grad_norm": 0.9376548528671265, + "learning_rate": 7.329623611081927e-06, + "loss": 0.6066, + "step": 5767 + }, + { + "epoch": 0.37, + "grad_norm": 0.8801112174987793, + "learning_rate": 7.3287157378362846e-06, + "loss": 0.5709, + "step": 5768 + }, + { + "epoch": 0.37, + "grad_norm": 0.8653738498687744, + "learning_rate": 7.327807766536521e-06, + "loss": 0.6407, + "step": 5769 + }, + { + "epoch": 0.37, + "grad_norm": 0.8694636821746826, + "learning_rate": 7.3268996972208725e-06, + "loss": 0.6672, + "step": 5770 + }, + { + "epoch": 0.37, + "grad_norm": 0.8904354572296143, + "learning_rate": 7.325991529927572e-06, + "loss": 0.6163, + "step": 5771 + }, + { + "epoch": 0.37, + "grad_norm": 0.9113852977752686, + "learning_rate": 7.325083264694859e-06, + "loss": 0.5539, + "step": 5772 + }, + { + "epoch": 0.37, + "grad_norm": 0.8761439919471741, + "learning_rate": 7.324174901560978e-06, + "loss": 0.6205, + "step": 5773 + }, + { + "epoch": 0.37, + "grad_norm": 0.8492023944854736, + "learning_rate": 7.323266440564177e-06, + "loss": 0.5814, + "step": 5774 + }, + { + "epoch": 0.37, + "grad_norm": 0.8769062757492065, + "learning_rate": 7.32235788174271e-06, + "loss": 0.5551, + "step": 5775 + }, + { + "epoch": 0.37, + "grad_norm": 0.8555404543876648, + "learning_rate": 7.32144922513483e-06, + "loss": 0.5793, + "step": 5776 + }, + { + "epoch": 0.37, + "grad_norm": 0.874083936214447, + "learning_rate": 7.320540470778799e-06, + "loss": 0.6028, + "step": 5777 + }, + { + "epoch": 0.37, + "grad_norm": 0.9265373945236206, + "learning_rate": 7.319631618712881e-06, + "loss": 0.6117, + "step": 5778 + }, + { + "epoch": 0.37, + "grad_norm": 0.8393657803535461, + "learning_rate": 7.318722668975347e-06, + "loss": 0.5443, + "step": 5779 + }, + { + "epoch": 0.37, + "grad_norm": 0.844636857509613, + "learning_rate": 7.317813621604466e-06, + "loss": 0.6026, + "step": 5780 + }, + { + "epoch": 0.37, + "grad_norm": 0.853661060333252, + "learning_rate": 7.316904476638515e-06, + "loss": 0.565, + "step": 5781 + }, + { + "epoch": 0.37, + "grad_norm": 0.871853768825531, + "learning_rate": 7.315995234115778e-06, + "loss": 0.5588, + "step": 5782 + }, + { + "epoch": 0.37, + "grad_norm": 0.814250111579895, + "learning_rate": 7.315085894074539e-06, + "loss": 0.5909, + "step": 5783 + }, + { + "epoch": 0.37, + "grad_norm": 0.904152512550354, + "learning_rate": 7.314176456553086e-06, + "loss": 0.5795, + "step": 5784 + }, + { + "epoch": 0.37, + "grad_norm": 0.8134939074516296, + "learning_rate": 7.3132669215897125e-06, + "loss": 0.6044, + "step": 5785 + }, + { + "epoch": 0.37, + "grad_norm": 0.8810901641845703, + "learning_rate": 7.312357289222717e-06, + "loss": 0.6512, + "step": 5786 + }, + { + "epoch": 0.37, + "grad_norm": 0.8648774027824402, + "learning_rate": 7.3114475594904e-06, + "loss": 0.5882, + "step": 5787 + }, + { + "epoch": 0.37, + "grad_norm": 0.8309141993522644, + "learning_rate": 7.310537732431067e-06, + "loss": 0.6525, + "step": 5788 + }, + { + "epoch": 0.37, + "grad_norm": 0.9296196699142456, + "learning_rate": 7.309627808083027e-06, + "loss": 0.642, + "step": 5789 + }, + { + "epoch": 0.37, + "grad_norm": 0.9500271081924438, + "learning_rate": 7.308717786484596e-06, + "loss": 0.5761, + "step": 5790 + }, + { + "epoch": 0.37, + "grad_norm": 0.9415786266326904, + "learning_rate": 7.30780766767409e-06, + "loss": 0.7613, + "step": 5791 + }, + { + "epoch": 0.37, + "grad_norm": 0.771344780921936, + "learning_rate": 7.306897451689832e-06, + "loss": 0.5429, + "step": 5792 + }, + { + "epoch": 0.37, + "grad_norm": 0.8383582830429077, + "learning_rate": 7.305987138570145e-06, + "loss": 0.5749, + "step": 5793 + }, + { + "epoch": 0.37, + "grad_norm": 0.9000876545906067, + "learning_rate": 7.305076728353364e-06, + "loss": 0.592, + "step": 5794 + }, + { + "epoch": 0.37, + "grad_norm": 0.841670572757721, + "learning_rate": 7.30416622107782e-06, + "loss": 0.5762, + "step": 5795 + }, + { + "epoch": 0.37, + "grad_norm": 0.8553557395935059, + "learning_rate": 7.303255616781853e-06, + "loss": 0.6068, + "step": 5796 + }, + { + "epoch": 0.37, + "grad_norm": 0.9532732367515564, + "learning_rate": 7.3023449155038016e-06, + "loss": 0.6489, + "step": 5797 + }, + { + "epoch": 0.37, + "grad_norm": 0.8923346996307373, + "learning_rate": 7.301434117282018e-06, + "loss": 0.6421, + "step": 5798 + }, + { + "epoch": 0.37, + "grad_norm": 0.8571204543113708, + "learning_rate": 7.300523222154848e-06, + "loss": 0.5909, + "step": 5799 + }, + { + "epoch": 0.37, + "grad_norm": 0.9138479232788086, + "learning_rate": 7.299612230160648e-06, + "loss": 0.6169, + "step": 5800 + }, + { + "epoch": 0.37, + "grad_norm": 0.9442511796951294, + "learning_rate": 7.298701141337778e-06, + "loss": 0.5826, + "step": 5801 + }, + { + "epoch": 0.37, + "grad_norm": 0.8205499053001404, + "learning_rate": 7.2977899557246e-06, + "loss": 0.6157, + "step": 5802 + }, + { + "epoch": 0.37, + "grad_norm": 0.8094413876533508, + "learning_rate": 7.2968786733594795e-06, + "loss": 0.5982, + "step": 5803 + }, + { + "epoch": 0.37, + "grad_norm": 0.8342402577400208, + "learning_rate": 7.295967294280788e-06, + "loss": 0.6123, + "step": 5804 + }, + { + "epoch": 0.37, + "grad_norm": 0.883686363697052, + "learning_rate": 7.2950558185269005e-06, + "loss": 0.5801, + "step": 5805 + }, + { + "epoch": 0.37, + "grad_norm": 0.9100261330604553, + "learning_rate": 7.294144246136198e-06, + "loss": 0.5688, + "step": 5806 + }, + { + "epoch": 0.37, + "grad_norm": 0.8626593351364136, + "learning_rate": 7.29323257714706e-06, + "loss": 0.6418, + "step": 5807 + }, + { + "epoch": 0.37, + "grad_norm": 0.8720927834510803, + "learning_rate": 7.292320811597877e-06, + "loss": 0.6389, + "step": 5808 + }, + { + "epoch": 0.37, + "grad_norm": 0.8386964797973633, + "learning_rate": 7.291408949527039e-06, + "loss": 0.5383, + "step": 5809 + }, + { + "epoch": 0.37, + "grad_norm": 0.921635091304779, + "learning_rate": 7.290496990972942e-06, + "loss": 0.5874, + "step": 5810 + }, + { + "epoch": 0.37, + "grad_norm": 0.8803329467773438, + "learning_rate": 7.2895849359739834e-06, + "loss": 0.5582, + "step": 5811 + }, + { + "epoch": 0.37, + "grad_norm": 0.9119853973388672, + "learning_rate": 7.288672784568568e-06, + "loss": 0.6075, + "step": 5812 + }, + { + "epoch": 0.37, + "grad_norm": 0.8550745844841003, + "learning_rate": 7.2877605367951055e-06, + "loss": 0.5818, + "step": 5813 + }, + { + "epoch": 0.37, + "grad_norm": 0.8705887794494629, + "learning_rate": 7.286848192692003e-06, + "loss": 0.5768, + "step": 5814 + }, + { + "epoch": 0.37, + "grad_norm": 0.8665969371795654, + "learning_rate": 7.28593575229768e-06, + "loss": 0.6108, + "step": 5815 + }, + { + "epoch": 0.37, + "grad_norm": 0.8779606819152832, + "learning_rate": 7.285023215650553e-06, + "loss": 0.5621, + "step": 5816 + }, + { + "epoch": 0.37, + "grad_norm": 0.8697792291641235, + "learning_rate": 7.2841105827890475e-06, + "loss": 0.6248, + "step": 5817 + }, + { + "epoch": 0.37, + "grad_norm": 0.9603003859519958, + "learning_rate": 7.283197853751593e-06, + "loss": 0.6527, + "step": 5818 + }, + { + "epoch": 0.37, + "grad_norm": 0.9190054535865784, + "learning_rate": 7.282285028576618e-06, + "loss": 0.6703, + "step": 5819 + }, + { + "epoch": 0.37, + "grad_norm": 0.9047878980636597, + "learning_rate": 7.28137210730256e-06, + "loss": 0.6406, + "step": 5820 + }, + { + "epoch": 0.37, + "grad_norm": 0.8862581849098206, + "learning_rate": 7.280459089967861e-06, + "loss": 0.5556, + "step": 5821 + }, + { + "epoch": 0.37, + "grad_norm": 0.8609002232551575, + "learning_rate": 7.279545976610961e-06, + "loss": 0.5763, + "step": 5822 + }, + { + "epoch": 0.37, + "grad_norm": 0.9380242824554443, + "learning_rate": 7.278632767270309e-06, + "loss": 0.617, + "step": 5823 + }, + { + "epoch": 0.37, + "grad_norm": 0.827458381652832, + "learning_rate": 7.277719461984361e-06, + "loss": 0.5788, + "step": 5824 + }, + { + "epoch": 0.37, + "grad_norm": 0.8551861047744751, + "learning_rate": 7.276806060791567e-06, + "loss": 0.6054, + "step": 5825 + }, + { + "epoch": 0.37, + "grad_norm": 0.8845090270042419, + "learning_rate": 7.275892563730393e-06, + "loss": 0.6049, + "step": 5826 + }, + { + "epoch": 0.37, + "grad_norm": 0.8537983894348145, + "learning_rate": 7.274978970839297e-06, + "loss": 0.5715, + "step": 5827 + }, + { + "epoch": 0.37, + "grad_norm": 0.8627631068229675, + "learning_rate": 7.274065282156752e-06, + "loss": 0.5343, + "step": 5828 + }, + { + "epoch": 0.37, + "grad_norm": 0.9428598284721375, + "learning_rate": 7.273151497721229e-06, + "loss": 0.6423, + "step": 5829 + }, + { + "epoch": 0.37, + "grad_norm": 0.8636415600776672, + "learning_rate": 7.272237617571205e-06, + "loss": 0.5829, + "step": 5830 + }, + { + "epoch": 0.37, + "grad_norm": 0.9982849359512329, + "learning_rate": 7.2713236417451584e-06, + "loss": 0.6376, + "step": 5831 + }, + { + "epoch": 0.37, + "grad_norm": 0.8668151497840881, + "learning_rate": 7.2704095702815754e-06, + "loss": 0.5882, + "step": 5832 + }, + { + "epoch": 0.37, + "grad_norm": 0.9315029382705688, + "learning_rate": 7.269495403218943e-06, + "loss": 0.5898, + "step": 5833 + }, + { + "epoch": 0.37, + "grad_norm": 0.8428326845169067, + "learning_rate": 7.268581140595754e-06, + "loss": 0.5528, + "step": 5834 + }, + { + "epoch": 0.37, + "grad_norm": 0.8342899084091187, + "learning_rate": 7.267666782450505e-06, + "loss": 0.5497, + "step": 5835 + }, + { + "epoch": 0.37, + "grad_norm": 0.9424355030059814, + "learning_rate": 7.266752328821698e-06, + "loss": 0.6838, + "step": 5836 + }, + { + "epoch": 0.37, + "grad_norm": 0.8566783666610718, + "learning_rate": 7.265837779747834e-06, + "loss": 0.5478, + "step": 5837 + }, + { + "epoch": 0.37, + "grad_norm": 0.982837975025177, + "learning_rate": 7.264923135267425e-06, + "loss": 0.6028, + "step": 5838 + }, + { + "epoch": 0.37, + "grad_norm": 0.9721706509590149, + "learning_rate": 7.264008395418981e-06, + "loss": 0.6461, + "step": 5839 + }, + { + "epoch": 0.37, + "grad_norm": 0.8464512825012207, + "learning_rate": 7.263093560241019e-06, + "loss": 0.5897, + "step": 5840 + }, + { + "epoch": 0.37, + "grad_norm": 0.8291548490524292, + "learning_rate": 7.262178629772061e-06, + "loss": 0.5641, + "step": 5841 + }, + { + "epoch": 0.37, + "grad_norm": 0.9384708404541016, + "learning_rate": 7.261263604050628e-06, + "loss": 0.5823, + "step": 5842 + }, + { + "epoch": 0.37, + "grad_norm": 0.8504778146743774, + "learning_rate": 7.260348483115254e-06, + "loss": 0.5374, + "step": 5843 + }, + { + "epoch": 0.37, + "grad_norm": 0.848728358745575, + "learning_rate": 7.259433267004466e-06, + "loss": 0.5656, + "step": 5844 + }, + { + "epoch": 0.37, + "grad_norm": 0.8592720031738281, + "learning_rate": 7.258517955756805e-06, + "loss": 0.6059, + "step": 5845 + }, + { + "epoch": 0.37, + "grad_norm": 0.8951132297515869, + "learning_rate": 7.257602549410808e-06, + "loss": 0.5942, + "step": 5846 + }, + { + "epoch": 0.37, + "grad_norm": 0.9378473162651062, + "learning_rate": 7.256687048005024e-06, + "loss": 0.6411, + "step": 5847 + }, + { + "epoch": 0.37, + "grad_norm": 0.9194514751434326, + "learning_rate": 7.255771451577996e-06, + "loss": 0.6337, + "step": 5848 + }, + { + "epoch": 0.37, + "grad_norm": 0.8979505300521851, + "learning_rate": 7.254855760168281e-06, + "loss": 0.5663, + "step": 5849 + }, + { + "epoch": 0.37, + "grad_norm": 0.8199179768562317, + "learning_rate": 7.2539399738144325e-06, + "loss": 0.5586, + "step": 5850 + }, + { + "epoch": 0.37, + "grad_norm": 0.9413596391677856, + "learning_rate": 7.2530240925550145e-06, + "loss": 0.589, + "step": 5851 + }, + { + "epoch": 0.37, + "grad_norm": 0.9242424964904785, + "learning_rate": 7.252108116428589e-06, + "loss": 0.5799, + "step": 5852 + }, + { + "epoch": 0.37, + "grad_norm": 0.9168336391448975, + "learning_rate": 7.251192045473725e-06, + "loss": 0.5839, + "step": 5853 + }, + { + "epoch": 0.37, + "grad_norm": 0.9121633768081665, + "learning_rate": 7.250275879728995e-06, + "loss": 0.5788, + "step": 5854 + }, + { + "epoch": 0.37, + "grad_norm": 0.8337844014167786, + "learning_rate": 7.249359619232976e-06, + "loss": 0.5781, + "step": 5855 + }, + { + "epoch": 0.37, + "grad_norm": 0.9401801228523254, + "learning_rate": 7.24844326402425e-06, + "loss": 0.6175, + "step": 5856 + }, + { + "epoch": 0.37, + "grad_norm": 0.8701263666152954, + "learning_rate": 7.247526814141398e-06, + "loss": 0.5735, + "step": 5857 + }, + { + "epoch": 0.37, + "grad_norm": 0.8712503910064697, + "learning_rate": 7.2466102696230115e-06, + "loss": 0.6065, + "step": 5858 + }, + { + "epoch": 0.37, + "grad_norm": 0.8291772603988647, + "learning_rate": 7.24569363050768e-06, + "loss": 0.6063, + "step": 5859 + }, + { + "epoch": 0.37, + "grad_norm": 0.843247652053833, + "learning_rate": 7.244776896834004e-06, + "loss": 0.5898, + "step": 5860 + }, + { + "epoch": 0.37, + "grad_norm": 0.8736797571182251, + "learning_rate": 7.243860068640581e-06, + "loss": 0.6175, + "step": 5861 + }, + { + "epoch": 0.37, + "grad_norm": 0.9105240106582642, + "learning_rate": 7.242943145966016e-06, + "loss": 0.5534, + "step": 5862 + }, + { + "epoch": 0.37, + "grad_norm": 0.901670515537262, + "learning_rate": 7.242026128848918e-06, + "loss": 0.5567, + "step": 5863 + }, + { + "epoch": 0.37, + "grad_norm": 0.8726474642753601, + "learning_rate": 7.241109017327901e-06, + "loss": 0.5742, + "step": 5864 + }, + { + "epoch": 0.37, + "grad_norm": 0.8959450125694275, + "learning_rate": 7.240191811441577e-06, + "loss": 0.6154, + "step": 5865 + }, + { + "epoch": 0.37, + "grad_norm": 0.9082683324813843, + "learning_rate": 7.239274511228569e-06, + "loss": 0.6233, + "step": 5866 + }, + { + "epoch": 0.37, + "grad_norm": 0.8369854092597961, + "learning_rate": 7.238357116727502e-06, + "loss": 0.634, + "step": 5867 + }, + { + "epoch": 0.37, + "grad_norm": 0.9661149978637695, + "learning_rate": 7.2374396279770044e-06, + "loss": 0.5991, + "step": 5868 + }, + { + "epoch": 0.37, + "grad_norm": 0.8639382719993591, + "learning_rate": 7.236522045015706e-06, + "loss": 0.5692, + "step": 5869 + }, + { + "epoch": 0.37, + "grad_norm": 0.8862959742546082, + "learning_rate": 7.235604367882245e-06, + "loss": 0.6189, + "step": 5870 + }, + { + "epoch": 0.37, + "grad_norm": 0.8773701190948486, + "learning_rate": 7.234686596615262e-06, + "loss": 0.56, + "step": 5871 + }, + { + "epoch": 0.37, + "grad_norm": 0.8855640292167664, + "learning_rate": 7.2337687312534e-06, + "loss": 0.618, + "step": 5872 + }, + { + "epoch": 0.37, + "grad_norm": 0.8535584807395935, + "learning_rate": 7.232850771835307e-06, + "loss": 0.6135, + "step": 5873 + }, + { + "epoch": 0.37, + "grad_norm": 0.9095380306243896, + "learning_rate": 7.231932718399635e-06, + "loss": 0.6002, + "step": 5874 + }, + { + "epoch": 0.37, + "grad_norm": 0.9614174962043762, + "learning_rate": 7.231014570985042e-06, + "loss": 0.6079, + "step": 5875 + }, + { + "epoch": 0.37, + "grad_norm": 0.8840222358703613, + "learning_rate": 7.230096329630185e-06, + "loss": 0.5708, + "step": 5876 + }, + { + "epoch": 0.37, + "grad_norm": 0.8881139755249023, + "learning_rate": 7.22917799437373e-06, + "loss": 0.5693, + "step": 5877 + }, + { + "epoch": 0.37, + "grad_norm": 0.8951361775398254, + "learning_rate": 7.228259565254345e-06, + "loss": 0.6344, + "step": 5878 + }, + { + "epoch": 0.37, + "grad_norm": 0.9418209791183472, + "learning_rate": 7.227341042310702e-06, + "loss": 0.595, + "step": 5879 + }, + { + "epoch": 0.37, + "grad_norm": 0.964740514755249, + "learning_rate": 7.226422425581474e-06, + "loss": 0.6433, + "step": 5880 + }, + { + "epoch": 0.37, + "grad_norm": 0.8945766687393188, + "learning_rate": 7.225503715105344e-06, + "loss": 0.5805, + "step": 5881 + }, + { + "epoch": 0.37, + "grad_norm": 0.8209680914878845, + "learning_rate": 7.224584910920994e-06, + "loss": 0.6353, + "step": 5882 + }, + { + "epoch": 0.37, + "grad_norm": 0.9142740368843079, + "learning_rate": 7.223666013067113e-06, + "loss": 0.5583, + "step": 5883 + }, + { + "epoch": 0.37, + "grad_norm": 0.9378098845481873, + "learning_rate": 7.222747021582392e-06, + "loss": 0.5952, + "step": 5884 + }, + { + "epoch": 0.37, + "grad_norm": 0.9350360035896301, + "learning_rate": 7.221827936505524e-06, + "loss": 0.6235, + "step": 5885 + }, + { + "epoch": 0.37, + "grad_norm": 0.8425854444503784, + "learning_rate": 7.220908757875214e-06, + "loss": 0.5706, + "step": 5886 + }, + { + "epoch": 0.37, + "grad_norm": 0.8196877837181091, + "learning_rate": 7.21998948573016e-06, + "loss": 0.6116, + "step": 5887 + }, + { + "epoch": 0.37, + "grad_norm": 0.8354714512825012, + "learning_rate": 7.219070120109072e-06, + "loss": 0.545, + "step": 5888 + }, + { + "epoch": 0.37, + "grad_norm": 0.9335945248603821, + "learning_rate": 7.2181506610506605e-06, + "loss": 0.5873, + "step": 5889 + }, + { + "epoch": 0.37, + "grad_norm": 0.9078087210655212, + "learning_rate": 7.217231108593642e-06, + "loss": 0.6323, + "step": 5890 + }, + { + "epoch": 0.37, + "grad_norm": 0.8889597058296204, + "learning_rate": 7.2163114627767336e-06, + "loss": 0.5855, + "step": 5891 + }, + { + "epoch": 0.37, + "grad_norm": 0.9393039345741272, + "learning_rate": 7.21539172363866e-06, + "loss": 0.6515, + "step": 5892 + }, + { + "epoch": 0.37, + "grad_norm": 0.8929221034049988, + "learning_rate": 7.214471891218147e-06, + "loss": 0.5601, + "step": 5893 + }, + { + "epoch": 0.37, + "grad_norm": 0.8714567422866821, + "learning_rate": 7.213551965553927e-06, + "loss": 0.5709, + "step": 5894 + }, + { + "epoch": 0.37, + "grad_norm": 0.8751015067100525, + "learning_rate": 7.212631946684735e-06, + "loss": 0.5834, + "step": 5895 + }, + { + "epoch": 0.37, + "grad_norm": 0.8570420742034912, + "learning_rate": 7.211711834649308e-06, + "loss": 0.6357, + "step": 5896 + }, + { + "epoch": 0.37, + "grad_norm": 0.8587523102760315, + "learning_rate": 7.210791629486389e-06, + "loss": 0.6232, + "step": 5897 + }, + { + "epoch": 0.37, + "grad_norm": 0.9013690948486328, + "learning_rate": 7.209871331234727e-06, + "loss": 0.5748, + "step": 5898 + }, + { + "epoch": 0.37, + "grad_norm": 0.9406622052192688, + "learning_rate": 7.208950939933069e-06, + "loss": 0.6136, + "step": 5899 + }, + { + "epoch": 0.37, + "grad_norm": 0.8297491073608398, + "learning_rate": 7.208030455620172e-06, + "loss": 0.6091, + "step": 5900 + }, + { + "epoch": 0.37, + "grad_norm": 0.8118994235992432, + "learning_rate": 7.207109878334794e-06, + "loss": 0.5538, + "step": 5901 + }, + { + "epoch": 0.37, + "grad_norm": 0.8709977865219116, + "learning_rate": 7.206189208115697e-06, + "loss": 0.6218, + "step": 5902 + }, + { + "epoch": 0.37, + "grad_norm": 0.7942225337028503, + "learning_rate": 7.205268445001647e-06, + "loss": 0.5634, + "step": 5903 + }, + { + "epoch": 0.37, + "grad_norm": 0.9106520414352417, + "learning_rate": 7.204347589031413e-06, + "loss": 0.6096, + "step": 5904 + }, + { + "epoch": 0.37, + "grad_norm": 0.8729263544082642, + "learning_rate": 7.203426640243772e-06, + "loss": 0.5695, + "step": 5905 + }, + { + "epoch": 0.37, + "grad_norm": 0.8718299865722656, + "learning_rate": 7.2025055986775e-06, + "loss": 0.584, + "step": 5906 + }, + { + "epoch": 0.37, + "grad_norm": 0.877406895160675, + "learning_rate": 7.201584464371378e-06, + "loss": 0.6209, + "step": 5907 + }, + { + "epoch": 0.37, + "grad_norm": 0.8972481489181519, + "learning_rate": 7.200663237364195e-06, + "loss": 0.6161, + "step": 5908 + }, + { + "epoch": 0.37, + "grad_norm": 0.8868620991706848, + "learning_rate": 7.199741917694738e-06, + "loss": 0.6095, + "step": 5909 + }, + { + "epoch": 0.37, + "grad_norm": 0.9140734672546387, + "learning_rate": 7.198820505401801e-06, + "loss": 0.6437, + "step": 5910 + }, + { + "epoch": 0.37, + "grad_norm": 0.9390980005264282, + "learning_rate": 7.197899000524181e-06, + "loss": 0.6443, + "step": 5911 + }, + { + "epoch": 0.37, + "grad_norm": 0.8791154623031616, + "learning_rate": 7.196977403100681e-06, + "loss": 0.6049, + "step": 5912 + }, + { + "epoch": 0.37, + "grad_norm": 0.8596461415290833, + "learning_rate": 7.196055713170105e-06, + "loss": 0.577, + "step": 5913 + }, + { + "epoch": 0.37, + "grad_norm": 0.8921295404434204, + "learning_rate": 7.195133930771263e-06, + "loss": 0.6139, + "step": 5914 + }, + { + "epoch": 0.37, + "grad_norm": 0.8871878981590271, + "learning_rate": 7.194212055942966e-06, + "loss": 0.6127, + "step": 5915 + }, + { + "epoch": 0.37, + "grad_norm": 0.8868473172187805, + "learning_rate": 7.193290088724034e-06, + "loss": 0.5821, + "step": 5916 + }, + { + "epoch": 0.37, + "grad_norm": 0.8867928981781006, + "learning_rate": 7.192368029153285e-06, + "loss": 0.6544, + "step": 5917 + }, + { + "epoch": 0.37, + "grad_norm": 0.8952857851982117, + "learning_rate": 7.191445877269548e-06, + "loss": 0.5514, + "step": 5918 + }, + { + "epoch": 0.38, + "grad_norm": 0.9083967804908752, + "learning_rate": 7.190523633111644e-06, + "loss": 0.6256, + "step": 5919 + }, + { + "epoch": 0.38, + "grad_norm": 0.8887345194816589, + "learning_rate": 7.189601296718413e-06, + "loss": 0.6002, + "step": 5920 + }, + { + "epoch": 0.38, + "grad_norm": 0.8916110992431641, + "learning_rate": 7.188678868128687e-06, + "loss": 0.6277, + "step": 5921 + }, + { + "epoch": 0.38, + "grad_norm": 0.8981056809425354, + "learning_rate": 7.18775634738131e-06, + "loss": 0.6223, + "step": 5922 + }, + { + "epoch": 0.38, + "grad_norm": 0.9082187414169312, + "learning_rate": 7.18683373451512e-06, + "loss": 0.6221, + "step": 5923 + }, + { + "epoch": 0.38, + "grad_norm": 0.8695595860481262, + "learning_rate": 7.185911029568972e-06, + "loss": 0.607, + "step": 5924 + }, + { + "epoch": 0.38, + "grad_norm": 0.8874411582946777, + "learning_rate": 7.184988232581713e-06, + "loss": 0.6072, + "step": 5925 + }, + { + "epoch": 0.38, + "grad_norm": 0.8543808460235596, + "learning_rate": 7.184065343592203e-06, + "loss": 0.6432, + "step": 5926 + }, + { + "epoch": 0.38, + "grad_norm": 0.8796266317367554, + "learning_rate": 7.183142362639296e-06, + "loss": 0.6275, + "step": 5927 + }, + { + "epoch": 0.38, + "grad_norm": 0.8801624178886414, + "learning_rate": 7.18221928976186e-06, + "loss": 0.5814, + "step": 5928 + }, + { + "epoch": 0.38, + "grad_norm": 0.8554267287254333, + "learning_rate": 7.181296124998762e-06, + "loss": 0.6135, + "step": 5929 + }, + { + "epoch": 0.38, + "grad_norm": 0.9125354290008545, + "learning_rate": 7.180372868388873e-06, + "loss": 0.6057, + "step": 5930 + }, + { + "epoch": 0.38, + "grad_norm": 0.8697827458381653, + "learning_rate": 7.179449519971066e-06, + "loss": 0.5761, + "step": 5931 + }, + { + "epoch": 0.38, + "grad_norm": 0.8693752288818359, + "learning_rate": 7.178526079784221e-06, + "loss": 0.5969, + "step": 5932 + }, + { + "epoch": 0.38, + "grad_norm": 0.9136356711387634, + "learning_rate": 7.1776025478672225e-06, + "loss": 0.6007, + "step": 5933 + }, + { + "epoch": 0.38, + "grad_norm": 0.9643456339836121, + "learning_rate": 7.176678924258955e-06, + "loss": 0.6225, + "step": 5934 + }, + { + "epoch": 0.38, + "grad_norm": 0.8101844191551208, + "learning_rate": 7.175755208998311e-06, + "loss": 0.5552, + "step": 5935 + }, + { + "epoch": 0.38, + "grad_norm": 0.8744382262229919, + "learning_rate": 7.174831402124184e-06, + "loss": 0.6181, + "step": 5936 + }, + { + "epoch": 0.38, + "grad_norm": 0.9439733624458313, + "learning_rate": 7.173907503675472e-06, + "loss": 0.6245, + "step": 5937 + }, + { + "epoch": 0.38, + "grad_norm": 0.9993674159049988, + "learning_rate": 7.172983513691076e-06, + "loss": 0.633, + "step": 5938 + }, + { + "epoch": 0.38, + "grad_norm": 0.9159564971923828, + "learning_rate": 7.172059432209907e-06, + "loss": 0.5969, + "step": 5939 + }, + { + "epoch": 0.38, + "grad_norm": 0.9775694608688354, + "learning_rate": 7.171135259270868e-06, + "loss": 0.6291, + "step": 5940 + }, + { + "epoch": 0.38, + "grad_norm": 0.8840250968933105, + "learning_rate": 7.170210994912878e-06, + "loss": 0.5855, + "step": 5941 + }, + { + "epoch": 0.38, + "grad_norm": 0.8848263025283813, + "learning_rate": 7.169286639174852e-06, + "loss": 0.604, + "step": 5942 + }, + { + "epoch": 0.38, + "grad_norm": 0.943367063999176, + "learning_rate": 7.168362192095712e-06, + "loss": 0.6189, + "step": 5943 + }, + { + "epoch": 0.38, + "grad_norm": 1.0210529565811157, + "learning_rate": 7.1674376537143845e-06, + "loss": 0.6232, + "step": 5944 + }, + { + "epoch": 0.38, + "grad_norm": 0.9326754212379456, + "learning_rate": 7.166513024069797e-06, + "loss": 0.6188, + "step": 5945 + }, + { + "epoch": 0.38, + "grad_norm": 0.8790732622146606, + "learning_rate": 7.16558830320088e-06, + "loss": 0.628, + "step": 5946 + }, + { + "epoch": 0.38, + "grad_norm": 0.8562813401222229, + "learning_rate": 7.1646634911465765e-06, + "loss": 0.5557, + "step": 5947 + }, + { + "epoch": 0.38, + "grad_norm": 0.8628082871437073, + "learning_rate": 7.163738587945822e-06, + "loss": 0.5901, + "step": 5948 + }, + { + "epoch": 0.38, + "grad_norm": 0.9249915480613708, + "learning_rate": 7.162813593637563e-06, + "loss": 0.5991, + "step": 5949 + }, + { + "epoch": 0.38, + "grad_norm": 0.8744149208068848, + "learning_rate": 7.161888508260748e-06, + "loss": 0.6241, + "step": 5950 + }, + { + "epoch": 0.38, + "grad_norm": 0.8531312942504883, + "learning_rate": 7.160963331854327e-06, + "loss": 0.5488, + "step": 5951 + }, + { + "epoch": 0.38, + "grad_norm": 0.8790968060493469, + "learning_rate": 7.16003806445726e-06, + "loss": 0.5869, + "step": 5952 + }, + { + "epoch": 0.38, + "grad_norm": 0.8855732679367065, + "learning_rate": 7.159112706108502e-06, + "loss": 0.5524, + "step": 5953 + }, + { + "epoch": 0.38, + "grad_norm": 0.8487377166748047, + "learning_rate": 7.15818725684702e-06, + "loss": 0.6133, + "step": 5954 + }, + { + "epoch": 0.38, + "grad_norm": 0.9325571060180664, + "learning_rate": 7.15726171671178e-06, + "loss": 0.6002, + "step": 5955 + }, + { + "epoch": 0.38, + "grad_norm": 0.9158957600593567, + "learning_rate": 7.156336085741755e-06, + "loss": 0.6271, + "step": 5956 + }, + { + "epoch": 0.38, + "grad_norm": 0.8471969962120056, + "learning_rate": 7.155410363975916e-06, + "loss": 0.5651, + "step": 5957 + }, + { + "epoch": 0.38, + "grad_norm": 0.8656317591667175, + "learning_rate": 7.154484551453247e-06, + "loss": 0.6275, + "step": 5958 + }, + { + "epoch": 0.38, + "grad_norm": 0.8509047627449036, + "learning_rate": 7.1535586482127284e-06, + "loss": 0.6528, + "step": 5959 + }, + { + "epoch": 0.38, + "grad_norm": 0.8533027768135071, + "learning_rate": 7.152632654293347e-06, + "loss": 0.6032, + "step": 5960 + }, + { + "epoch": 0.38, + "grad_norm": 0.9203348755836487, + "learning_rate": 7.151706569734091e-06, + "loss": 0.5851, + "step": 5961 + }, + { + "epoch": 0.38, + "grad_norm": 0.7722728848457336, + "learning_rate": 7.150780394573957e-06, + "loss": 0.5788, + "step": 5962 + }, + { + "epoch": 0.38, + "grad_norm": 0.873199462890625, + "learning_rate": 7.149854128851945e-06, + "loss": 0.5754, + "step": 5963 + }, + { + "epoch": 0.38, + "grad_norm": 0.8286789059638977, + "learning_rate": 7.148927772607053e-06, + "loss": 0.5779, + "step": 5964 + }, + { + "epoch": 0.38, + "grad_norm": 0.8518579602241516, + "learning_rate": 7.148001325878287e-06, + "loss": 0.5629, + "step": 5965 + }, + { + "epoch": 0.38, + "grad_norm": 0.9076201319694519, + "learning_rate": 7.147074788704659e-06, + "loss": 0.608, + "step": 5966 + }, + { + "epoch": 0.38, + "grad_norm": 0.9196124076843262, + "learning_rate": 7.14614816112518e-06, + "loss": 0.5983, + "step": 5967 + }, + { + "epoch": 0.38, + "grad_norm": 0.8748944997787476, + "learning_rate": 7.145221443178868e-06, + "loss": 0.6094, + "step": 5968 + }, + { + "epoch": 0.38, + "grad_norm": 0.9023792147636414, + "learning_rate": 7.144294634904744e-06, + "loss": 0.6188, + "step": 5969 + }, + { + "epoch": 0.38, + "grad_norm": 0.9357802271842957, + "learning_rate": 7.143367736341832e-06, + "loss": 0.6842, + "step": 5970 + }, + { + "epoch": 0.38, + "grad_norm": 0.9043236970901489, + "learning_rate": 7.142440747529161e-06, + "loss": 0.6354, + "step": 5971 + }, + { + "epoch": 0.38, + "grad_norm": 0.9322927594184875, + "learning_rate": 7.141513668505764e-06, + "loss": 0.5922, + "step": 5972 + }, + { + "epoch": 0.38, + "grad_norm": 0.8984158635139465, + "learning_rate": 7.140586499310674e-06, + "loss": 0.5912, + "step": 5973 + }, + { + "epoch": 0.38, + "grad_norm": 0.8156484961509705, + "learning_rate": 7.139659239982935e-06, + "loss": 0.5413, + "step": 5974 + }, + { + "epoch": 0.38, + "grad_norm": 0.8405022621154785, + "learning_rate": 7.138731890561589e-06, + "loss": 0.586, + "step": 5975 + }, + { + "epoch": 0.38, + "grad_norm": 0.8600237965583801, + "learning_rate": 7.1378044510856814e-06, + "loss": 0.5976, + "step": 5976 + }, + { + "epoch": 0.38, + "grad_norm": 0.8850138783454895, + "learning_rate": 7.136876921594267e-06, + "loss": 0.6245, + "step": 5977 + }, + { + "epoch": 0.38, + "grad_norm": 0.9403291344642639, + "learning_rate": 7.1359493021263986e-06, + "loss": 0.6494, + "step": 5978 + }, + { + "epoch": 0.38, + "grad_norm": 0.8556556701660156, + "learning_rate": 7.135021592721134e-06, + "loss": 0.5771, + "step": 5979 + }, + { + "epoch": 0.38, + "grad_norm": 0.8727120757102966, + "learning_rate": 7.134093793417539e-06, + "loss": 0.6104, + "step": 5980 + }, + { + "epoch": 0.38, + "grad_norm": 0.8781840205192566, + "learning_rate": 7.133165904254677e-06, + "loss": 0.5915, + "step": 5981 + }, + { + "epoch": 0.38, + "grad_norm": 0.9176463484764099, + "learning_rate": 7.132237925271621e-06, + "loss": 0.5915, + "step": 5982 + }, + { + "epoch": 0.38, + "grad_norm": 0.8665004968643188, + "learning_rate": 7.131309856507444e-06, + "loss": 0.6643, + "step": 5983 + }, + { + "epoch": 0.38, + "grad_norm": 0.9312930107116699, + "learning_rate": 7.13038169800122e-06, + "loss": 0.6334, + "step": 5984 + }, + { + "epoch": 0.38, + "grad_norm": 0.9924306869506836, + "learning_rate": 7.129453449792036e-06, + "loss": 0.6339, + "step": 5985 + }, + { + "epoch": 0.38, + "grad_norm": 0.8924956917762756, + "learning_rate": 7.1285251119189754e-06, + "loss": 0.5738, + "step": 5986 + }, + { + "epoch": 0.38, + "grad_norm": 0.997128963470459, + "learning_rate": 7.127596684421127e-06, + "loss": 0.6045, + "step": 5987 + }, + { + "epoch": 0.38, + "grad_norm": 0.8882451057434082, + "learning_rate": 7.126668167337583e-06, + "loss": 0.589, + "step": 5988 + }, + { + "epoch": 0.38, + "grad_norm": 0.855974018573761, + "learning_rate": 7.12573956070744e-06, + "loss": 0.6437, + "step": 5989 + }, + { + "epoch": 0.38, + "grad_norm": 0.885186493396759, + "learning_rate": 7.1248108645698e-06, + "loss": 0.6057, + "step": 5990 + }, + { + "epoch": 0.38, + "grad_norm": 0.8319755792617798, + "learning_rate": 7.123882078963766e-06, + "loss": 0.5789, + "step": 5991 + }, + { + "epoch": 0.38, + "grad_norm": 0.8926076292991638, + "learning_rate": 7.1229532039284455e-06, + "loss": 0.637, + "step": 5992 + }, + { + "epoch": 0.38, + "grad_norm": 0.9193412661552429, + "learning_rate": 7.122024239502951e-06, + "loss": 0.5881, + "step": 5993 + }, + { + "epoch": 0.38, + "grad_norm": 0.9050919413566589, + "learning_rate": 7.121095185726399e-06, + "loss": 0.6494, + "step": 5994 + }, + { + "epoch": 0.38, + "grad_norm": 0.8967909812927246, + "learning_rate": 7.120166042637906e-06, + "loss": 0.6335, + "step": 5995 + }, + { + "epoch": 0.38, + "grad_norm": 0.8294476866722107, + "learning_rate": 7.119236810276598e-06, + "loss": 0.5503, + "step": 5996 + }, + { + "epoch": 0.38, + "grad_norm": 0.8650161027908325, + "learning_rate": 7.118307488681598e-06, + "loss": 0.6328, + "step": 5997 + }, + { + "epoch": 0.38, + "grad_norm": 0.8785965442657471, + "learning_rate": 7.11737807789204e-06, + "loss": 0.58, + "step": 5998 + }, + { + "epoch": 0.38, + "grad_norm": 0.9463037252426147, + "learning_rate": 7.116448577947057e-06, + "loss": 0.5731, + "step": 5999 + }, + { + "epoch": 0.38, + "grad_norm": 0.8291397094726562, + "learning_rate": 7.115518988885785e-06, + "loss": 0.5948, + "step": 6000 + }, + { + "epoch": 0.38, + "grad_norm": 0.9187091588973999, + "learning_rate": 7.114589310747371e-06, + "loss": 0.6384, + "step": 6001 + }, + { + "epoch": 0.38, + "grad_norm": 0.8593400716781616, + "learning_rate": 7.113659543570956e-06, + "loss": 0.625, + "step": 6002 + }, + { + "epoch": 0.38, + "grad_norm": 0.8747579455375671, + "learning_rate": 7.11272968739569e-06, + "loss": 0.5569, + "step": 6003 + }, + { + "epoch": 0.38, + "grad_norm": 0.8783309459686279, + "learning_rate": 7.1117997422607264e-06, + "loss": 0.5986, + "step": 6004 + }, + { + "epoch": 0.38, + "grad_norm": 0.8772686123847961, + "learning_rate": 7.110869708205224e-06, + "loss": 0.5752, + "step": 6005 + }, + { + "epoch": 0.38, + "grad_norm": 0.8766029477119446, + "learning_rate": 7.109939585268339e-06, + "loss": 0.6299, + "step": 6006 + }, + { + "epoch": 0.38, + "grad_norm": 0.8981195688247681, + "learning_rate": 7.109009373489239e-06, + "loss": 0.6076, + "step": 6007 + }, + { + "epoch": 0.38, + "grad_norm": 0.8908311128616333, + "learning_rate": 7.10807907290709e-06, + "loss": 0.651, + "step": 6008 + }, + { + "epoch": 0.38, + "grad_norm": 0.9420418739318848, + "learning_rate": 7.107148683561066e-06, + "loss": 0.5797, + "step": 6009 + }, + { + "epoch": 0.38, + "grad_norm": 0.9113646149635315, + "learning_rate": 7.106218205490342e-06, + "loss": 0.6277, + "step": 6010 + }, + { + "epoch": 0.38, + "grad_norm": 0.9516562223434448, + "learning_rate": 7.105287638734093e-06, + "loss": 0.6429, + "step": 6011 + }, + { + "epoch": 0.38, + "grad_norm": 0.9115347862243652, + "learning_rate": 7.104356983331509e-06, + "loss": 0.6106, + "step": 6012 + }, + { + "epoch": 0.38, + "grad_norm": 0.9286765456199646, + "learning_rate": 7.1034262393217705e-06, + "loss": 0.5787, + "step": 6013 + }, + { + "epoch": 0.38, + "grad_norm": 0.9642840027809143, + "learning_rate": 7.1024954067440725e-06, + "loss": 0.6729, + "step": 6014 + }, + { + "epoch": 0.38, + "grad_norm": 0.9277244806289673, + "learning_rate": 7.101564485637603e-06, + "loss": 0.601, + "step": 6015 + }, + { + "epoch": 0.38, + "grad_norm": 0.8856588006019592, + "learning_rate": 7.1006334760415674e-06, + "loss": 0.6001, + "step": 6016 + }, + { + "epoch": 0.38, + "grad_norm": 0.916569709777832, + "learning_rate": 7.0997023779951625e-06, + "loss": 0.6309, + "step": 6017 + }, + { + "epoch": 0.38, + "grad_norm": 0.9436630606651306, + "learning_rate": 7.098771191537596e-06, + "loss": 0.6716, + "step": 6018 + }, + { + "epoch": 0.38, + "grad_norm": 0.897139847278595, + "learning_rate": 7.097839916708073e-06, + "loss": 0.6179, + "step": 6019 + }, + { + "epoch": 0.38, + "grad_norm": 1.0071852207183838, + "learning_rate": 7.096908553545812e-06, + "loss": 0.627, + "step": 6020 + }, + { + "epoch": 0.38, + "grad_norm": 0.830710232257843, + "learning_rate": 7.095977102090025e-06, + "loss": 0.6087, + "step": 6021 + }, + { + "epoch": 0.38, + "grad_norm": 0.9118586182594299, + "learning_rate": 7.095045562379934e-06, + "loss": 0.5829, + "step": 6022 + }, + { + "epoch": 0.38, + "grad_norm": 0.8319807052612305, + "learning_rate": 7.0941139344547605e-06, + "loss": 0.5733, + "step": 6023 + }, + { + "epoch": 0.38, + "grad_norm": 0.8906463980674744, + "learning_rate": 7.093182218353737e-06, + "loss": 0.6338, + "step": 6024 + }, + { + "epoch": 0.38, + "grad_norm": 0.8869120478630066, + "learning_rate": 7.092250414116091e-06, + "loss": 0.5613, + "step": 6025 + }, + { + "epoch": 0.38, + "grad_norm": 0.8718534111976624, + "learning_rate": 7.091318521781058e-06, + "loss": 0.5957, + "step": 6026 + }, + { + "epoch": 0.38, + "grad_norm": 0.8886241912841797, + "learning_rate": 7.090386541387878e-06, + "loss": 0.6346, + "step": 6027 + }, + { + "epoch": 0.38, + "grad_norm": 0.8198200464248657, + "learning_rate": 7.089454472975792e-06, + "loss": 0.5945, + "step": 6028 + }, + { + "epoch": 0.38, + "grad_norm": 0.8481683135032654, + "learning_rate": 7.088522316584048e-06, + "loss": 0.5899, + "step": 6029 + }, + { + "epoch": 0.38, + "grad_norm": 0.8683075904846191, + "learning_rate": 7.087590072251893e-06, + "loss": 0.5804, + "step": 6030 + }, + { + "epoch": 0.38, + "grad_norm": 0.8363116383552551, + "learning_rate": 7.086657740018582e-06, + "loss": 0.6085, + "step": 6031 + }, + { + "epoch": 0.38, + "grad_norm": 0.8278794288635254, + "learning_rate": 7.085725319923373e-06, + "loss": 0.5597, + "step": 6032 + }, + { + "epoch": 0.38, + "grad_norm": 0.9533769488334656, + "learning_rate": 7.084792812005528e-06, + "loss": 0.6417, + "step": 6033 + }, + { + "epoch": 0.38, + "grad_norm": 0.9329741597175598, + "learning_rate": 7.083860216304309e-06, + "loss": 0.6205, + "step": 6034 + }, + { + "epoch": 0.38, + "grad_norm": 0.9326625466346741, + "learning_rate": 7.082927532858985e-06, + "loss": 0.5771, + "step": 6035 + }, + { + "epoch": 0.38, + "grad_norm": 0.9433557987213135, + "learning_rate": 7.0819947617088294e-06, + "loss": 0.5943, + "step": 6036 + }, + { + "epoch": 0.38, + "grad_norm": 0.9084176421165466, + "learning_rate": 7.081061902893117e-06, + "loss": 0.6308, + "step": 6037 + }, + { + "epoch": 0.38, + "grad_norm": 1.0079909563064575, + "learning_rate": 7.080128956451125e-06, + "loss": 0.5854, + "step": 6038 + }, + { + "epoch": 0.38, + "grad_norm": 0.9684156179428101, + "learning_rate": 7.079195922422143e-06, + "loss": 0.6058, + "step": 6039 + }, + { + "epoch": 0.38, + "grad_norm": 0.8209320902824402, + "learning_rate": 7.078262800845453e-06, + "loss": 0.5948, + "step": 6040 + }, + { + "epoch": 0.38, + "grad_norm": 0.8796716928482056, + "learning_rate": 7.0773295917603445e-06, + "loss": 0.5924, + "step": 6041 + }, + { + "epoch": 0.38, + "grad_norm": 0.8752491474151611, + "learning_rate": 7.076396295206113e-06, + "loss": 0.5695, + "step": 6042 + }, + { + "epoch": 0.38, + "grad_norm": 0.9148269891738892, + "learning_rate": 7.075462911222057e-06, + "loss": 0.5703, + "step": 6043 + }, + { + "epoch": 0.38, + "grad_norm": 0.8726043701171875, + "learning_rate": 7.07452943984748e-06, + "loss": 0.5915, + "step": 6044 + }, + { + "epoch": 0.38, + "grad_norm": 0.8048043847084045, + "learning_rate": 7.073595881121683e-06, + "loss": 0.5756, + "step": 6045 + }, + { + "epoch": 0.38, + "grad_norm": 0.9457216262817383, + "learning_rate": 7.072662235083977e-06, + "loss": 0.6594, + "step": 6046 + }, + { + "epoch": 0.38, + "grad_norm": 0.9144176840782166, + "learning_rate": 7.071728501773675e-06, + "loss": 0.6171, + "step": 6047 + }, + { + "epoch": 0.38, + "grad_norm": 0.9629214406013489, + "learning_rate": 7.070794681230093e-06, + "loss": 0.6228, + "step": 6048 + }, + { + "epoch": 0.38, + "grad_norm": 0.9110321402549744, + "learning_rate": 7.06986077349255e-06, + "loss": 0.6194, + "step": 6049 + }, + { + "epoch": 0.38, + "grad_norm": 0.8530512452125549, + "learning_rate": 7.068926778600372e-06, + "loss": 0.6007, + "step": 6050 + }, + { + "epoch": 0.38, + "grad_norm": 0.8579297661781311, + "learning_rate": 7.067992696592882e-06, + "loss": 0.6179, + "step": 6051 + }, + { + "epoch": 0.38, + "grad_norm": 0.9101974368095398, + "learning_rate": 7.067058527509416e-06, + "loss": 0.5758, + "step": 6052 + }, + { + "epoch": 0.38, + "grad_norm": 0.92631995677948, + "learning_rate": 7.066124271389305e-06, + "loss": 0.5397, + "step": 6053 + }, + { + "epoch": 0.38, + "grad_norm": 0.8497442603111267, + "learning_rate": 7.0651899282718896e-06, + "loss": 0.64, + "step": 6054 + }, + { + "epoch": 0.38, + "grad_norm": 0.9552360773086548, + "learning_rate": 7.064255498196509e-06, + "loss": 0.6108, + "step": 6055 + }, + { + "epoch": 0.38, + "grad_norm": 0.9272350668907166, + "learning_rate": 7.0633209812025116e-06, + "loss": 0.6146, + "step": 6056 + }, + { + "epoch": 0.38, + "grad_norm": 0.9053919315338135, + "learning_rate": 7.062386377329245e-06, + "loss": 0.5826, + "step": 6057 + }, + { + "epoch": 0.38, + "grad_norm": 0.9134330749511719, + "learning_rate": 7.061451686616062e-06, + "loss": 0.5864, + "step": 6058 + }, + { + "epoch": 0.38, + "grad_norm": 1.0126466751098633, + "learning_rate": 7.0605169091023205e-06, + "loss": 0.6486, + "step": 6059 + }, + { + "epoch": 0.38, + "grad_norm": 0.9160744547843933, + "learning_rate": 7.05958204482738e-06, + "loss": 0.5698, + "step": 6060 + }, + { + "epoch": 0.38, + "grad_norm": 0.924263060092926, + "learning_rate": 7.058647093830604e-06, + "loss": 0.6247, + "step": 6061 + }, + { + "epoch": 0.38, + "grad_norm": 0.8898268342018127, + "learning_rate": 7.0577120561513604e-06, + "loss": 0.6066, + "step": 6062 + }, + { + "epoch": 0.38, + "grad_norm": 0.887617826461792, + "learning_rate": 7.056776931829021e-06, + "loss": 0.5524, + "step": 6063 + }, + { + "epoch": 0.38, + "grad_norm": 0.899122416973114, + "learning_rate": 7.055841720902959e-06, + "loss": 0.5709, + "step": 6064 + }, + { + "epoch": 0.38, + "grad_norm": 0.9129178524017334, + "learning_rate": 7.054906423412554e-06, + "loss": 0.6503, + "step": 6065 + }, + { + "epoch": 0.38, + "grad_norm": 0.9189284443855286, + "learning_rate": 7.053971039397188e-06, + "loss": 0.645, + "step": 6066 + }, + { + "epoch": 0.38, + "grad_norm": 0.9444376230239868, + "learning_rate": 7.0530355688962484e-06, + "loss": 0.6305, + "step": 6067 + }, + { + "epoch": 0.38, + "grad_norm": 0.8827232122421265, + "learning_rate": 7.0521000119491215e-06, + "loss": 0.5677, + "step": 6068 + }, + { + "epoch": 0.38, + "grad_norm": 0.918749213218689, + "learning_rate": 7.0511643685952014e-06, + "loss": 0.6198, + "step": 6069 + }, + { + "epoch": 0.38, + "grad_norm": 0.8609430193901062, + "learning_rate": 7.050228638873886e-06, + "loss": 0.5669, + "step": 6070 + }, + { + "epoch": 0.38, + "grad_norm": 0.8873887658119202, + "learning_rate": 7.049292822824575e-06, + "loss": 0.6614, + "step": 6071 + }, + { + "epoch": 0.38, + "grad_norm": 0.8995460271835327, + "learning_rate": 7.048356920486672e-06, + "loss": 0.6257, + "step": 6072 + }, + { + "epoch": 0.38, + "grad_norm": 0.8707825541496277, + "learning_rate": 7.047420931899585e-06, + "loss": 0.5955, + "step": 6073 + }, + { + "epoch": 0.38, + "grad_norm": 0.8987425565719604, + "learning_rate": 7.0464848571027246e-06, + "loss": 0.612, + "step": 6074 + }, + { + "epoch": 0.38, + "grad_norm": 0.9181625247001648, + "learning_rate": 7.045548696135506e-06, + "loss": 0.5886, + "step": 6075 + }, + { + "epoch": 0.38, + "grad_norm": 0.9375488758087158, + "learning_rate": 7.044612449037348e-06, + "loss": 0.6653, + "step": 6076 + }, + { + "epoch": 0.39, + "grad_norm": 0.8560453057289124, + "learning_rate": 7.0436761158476715e-06, + "loss": 0.601, + "step": 6077 + }, + { + "epoch": 0.39, + "grad_norm": 0.8806048035621643, + "learning_rate": 7.042739696605905e-06, + "loss": 0.588, + "step": 6078 + }, + { + "epoch": 0.39, + "grad_norm": 0.8374508619308472, + "learning_rate": 7.041803191351475e-06, + "loss": 0.5957, + "step": 6079 + }, + { + "epoch": 0.39, + "grad_norm": 0.888600766658783, + "learning_rate": 7.040866600123816e-06, + "loss": 0.6332, + "step": 6080 + }, + { + "epoch": 0.39, + "grad_norm": 0.9433616399765015, + "learning_rate": 7.039929922962363e-06, + "loss": 0.6253, + "step": 6081 + }, + { + "epoch": 0.39, + "grad_norm": 0.920203447341919, + "learning_rate": 7.038993159906558e-06, + "loss": 0.6191, + "step": 6082 + }, + { + "epoch": 0.39, + "grad_norm": 0.8820478916168213, + "learning_rate": 7.0380563109958445e-06, + "loss": 0.5925, + "step": 6083 + }, + { + "epoch": 0.39, + "grad_norm": 0.8885151147842407, + "learning_rate": 7.03711937626967e-06, + "loss": 0.6106, + "step": 6084 + }, + { + "epoch": 0.39, + "grad_norm": 0.8442419767379761, + "learning_rate": 7.036182355767485e-06, + "loss": 0.5689, + "step": 6085 + }, + { + "epoch": 0.39, + "grad_norm": 0.8483255505561829, + "learning_rate": 7.0352452495287435e-06, + "loss": 0.594, + "step": 6086 + }, + { + "epoch": 0.39, + "grad_norm": 0.8738897442817688, + "learning_rate": 7.034308057592907e-06, + "loss": 0.5607, + "step": 6087 + }, + { + "epoch": 0.39, + "grad_norm": 0.8586130738258362, + "learning_rate": 7.033370779999431e-06, + "loss": 0.641, + "step": 6088 + }, + { + "epoch": 0.39, + "grad_norm": 0.8719096183776855, + "learning_rate": 7.032433416787788e-06, + "loss": 0.6118, + "step": 6089 + }, + { + "epoch": 0.39, + "grad_norm": 0.8584408164024353, + "learning_rate": 7.031495967997444e-06, + "loss": 0.5459, + "step": 6090 + }, + { + "epoch": 0.39, + "grad_norm": 0.8801223635673523, + "learning_rate": 7.0305584336678715e-06, + "loss": 0.5927, + "step": 6091 + }, + { + "epoch": 0.39, + "grad_norm": 0.7907819151878357, + "learning_rate": 7.029620813838544e-06, + "loss": 0.5669, + "step": 6092 + }, + { + "epoch": 0.39, + "grad_norm": 0.8615099191665649, + "learning_rate": 7.02868310854895e-06, + "loss": 0.5749, + "step": 6093 + }, + { + "epoch": 0.39, + "grad_norm": 0.8567502498626709, + "learning_rate": 7.027745317838564e-06, + "loss": 0.6005, + "step": 6094 + }, + { + "epoch": 0.39, + "grad_norm": 0.8757819533348083, + "learning_rate": 7.026807441746879e-06, + "loss": 0.6121, + "step": 6095 + }, + { + "epoch": 0.39, + "grad_norm": 0.8814988732337952, + "learning_rate": 7.025869480313381e-06, + "loss": 0.6107, + "step": 6096 + }, + { + "epoch": 0.39, + "grad_norm": 0.8909090757369995, + "learning_rate": 7.0249314335775675e-06, + "loss": 0.5844, + "step": 6097 + }, + { + "epoch": 0.39, + "grad_norm": 0.8157296776771545, + "learning_rate": 7.023993301578935e-06, + "loss": 0.5491, + "step": 6098 + }, + { + "epoch": 0.39, + "grad_norm": 0.8694610595703125, + "learning_rate": 7.023055084356987e-06, + "loss": 0.5522, + "step": 6099 + }, + { + "epoch": 0.39, + "grad_norm": 0.863211989402771, + "learning_rate": 7.022116781951226e-06, + "loss": 0.6082, + "step": 6100 + }, + { + "epoch": 0.39, + "grad_norm": 0.8649691939353943, + "learning_rate": 7.021178394401162e-06, + "loss": 0.6182, + "step": 6101 + }, + { + "epoch": 0.39, + "grad_norm": 0.8408727049827576, + "learning_rate": 7.020239921746308e-06, + "loss": 0.6067, + "step": 6102 + }, + { + "epoch": 0.39, + "grad_norm": 0.920093297958374, + "learning_rate": 7.019301364026178e-06, + "loss": 0.6312, + "step": 6103 + }, + { + "epoch": 0.39, + "grad_norm": 0.9169816374778748, + "learning_rate": 7.018362721280292e-06, + "loss": 0.5826, + "step": 6104 + }, + { + "epoch": 0.39, + "grad_norm": 0.9006035327911377, + "learning_rate": 7.0174239935481735e-06, + "loss": 0.6304, + "step": 6105 + }, + { + "epoch": 0.39, + "grad_norm": 0.8806290626525879, + "learning_rate": 7.016485180869349e-06, + "loss": 0.582, + "step": 6106 + }, + { + "epoch": 0.39, + "grad_norm": 0.9303503036499023, + "learning_rate": 7.015546283283346e-06, + "loss": 0.5657, + "step": 6107 + }, + { + "epoch": 0.39, + "grad_norm": 0.9579445123672485, + "learning_rate": 7.014607300829703e-06, + "loss": 0.6414, + "step": 6108 + }, + { + "epoch": 0.39, + "grad_norm": 0.8906927108764648, + "learning_rate": 7.013668233547955e-06, + "loss": 0.5922, + "step": 6109 + }, + { + "epoch": 0.39, + "grad_norm": 0.866254448890686, + "learning_rate": 7.0127290814776424e-06, + "loss": 0.5812, + "step": 6110 + }, + { + "epoch": 0.39, + "grad_norm": 0.9502847194671631, + "learning_rate": 7.0117898446583084e-06, + "loss": 0.6458, + "step": 6111 + }, + { + "epoch": 0.39, + "grad_norm": 0.8791959285736084, + "learning_rate": 7.010850523129504e-06, + "loss": 0.5782, + "step": 6112 + }, + { + "epoch": 0.39, + "grad_norm": 0.8798953294754028, + "learning_rate": 7.009911116930779e-06, + "loss": 0.6134, + "step": 6113 + }, + { + "epoch": 0.39, + "grad_norm": 0.9204188585281372, + "learning_rate": 7.00897162610169e-06, + "loss": 0.6289, + "step": 6114 + }, + { + "epoch": 0.39, + "grad_norm": 0.9081289172172546, + "learning_rate": 7.0080320506817926e-06, + "loss": 0.5684, + "step": 6115 + }, + { + "epoch": 0.39, + "grad_norm": 0.8632351160049438, + "learning_rate": 7.007092390710652e-06, + "loss": 0.5876, + "step": 6116 + }, + { + "epoch": 0.39, + "grad_norm": 0.8665913939476013, + "learning_rate": 7.006152646227833e-06, + "loss": 0.5125, + "step": 6117 + }, + { + "epoch": 0.39, + "grad_norm": 0.8278458118438721, + "learning_rate": 7.005212817272905e-06, + "loss": 0.6409, + "step": 6118 + }, + { + "epoch": 0.39, + "grad_norm": 0.9356765151023865, + "learning_rate": 7.0042729038854405e-06, + "loss": 0.6375, + "step": 6119 + }, + { + "epoch": 0.39, + "grad_norm": 0.8514903783798218, + "learning_rate": 7.003332906105016e-06, + "loss": 0.6365, + "step": 6120 + }, + { + "epoch": 0.39, + "grad_norm": 0.9448802471160889, + "learning_rate": 7.002392823971214e-06, + "loss": 0.6518, + "step": 6121 + }, + { + "epoch": 0.39, + "grad_norm": 0.8504220247268677, + "learning_rate": 7.001452657523614e-06, + "loss": 0.503, + "step": 6122 + }, + { + "epoch": 0.39, + "grad_norm": 0.9173238277435303, + "learning_rate": 7.000512406801805e-06, + "loss": 0.6124, + "step": 6123 + }, + { + "epoch": 0.39, + "grad_norm": 0.8627074956893921, + "learning_rate": 6.9995720718453786e-06, + "loss": 0.5751, + "step": 6124 + }, + { + "epoch": 0.39, + "grad_norm": 0.997188925743103, + "learning_rate": 6.998631652693928e-06, + "loss": 0.6455, + "step": 6125 + }, + { + "epoch": 0.39, + "grad_norm": 0.8653777837753296, + "learning_rate": 6.997691149387052e-06, + "loss": 0.5966, + "step": 6126 + }, + { + "epoch": 0.39, + "grad_norm": 0.8478190302848816, + "learning_rate": 6.99675056196435e-06, + "loss": 0.5918, + "step": 6127 + }, + { + "epoch": 0.39, + "grad_norm": 0.8888818025588989, + "learning_rate": 6.995809890465428e-06, + "loss": 0.5978, + "step": 6128 + }, + { + "epoch": 0.39, + "grad_norm": 0.8966024518013, + "learning_rate": 6.994869134929895e-06, + "loss": 0.6194, + "step": 6129 + }, + { + "epoch": 0.39, + "grad_norm": 0.8759685158729553, + "learning_rate": 6.993928295397363e-06, + "loss": 0.6068, + "step": 6130 + }, + { + "epoch": 0.39, + "grad_norm": 0.8871753215789795, + "learning_rate": 6.992987371907446e-06, + "loss": 0.6187, + "step": 6131 + }, + { + "epoch": 0.39, + "grad_norm": 0.8602596521377563, + "learning_rate": 6.992046364499764e-06, + "loss": 0.5815, + "step": 6132 + }, + { + "epoch": 0.39, + "grad_norm": 0.8757937550544739, + "learning_rate": 6.991105273213939e-06, + "loss": 0.5496, + "step": 6133 + }, + { + "epoch": 0.39, + "grad_norm": 0.8693877458572388, + "learning_rate": 6.990164098089598e-06, + "loss": 0.6058, + "step": 6134 + }, + { + "epoch": 0.39, + "grad_norm": 0.8464959263801575, + "learning_rate": 6.9892228391663694e-06, + "loss": 0.5767, + "step": 6135 + }, + { + "epoch": 0.39, + "grad_norm": 0.8602965474128723, + "learning_rate": 6.988281496483888e-06, + "loss": 0.6125, + "step": 6136 + }, + { + "epoch": 0.39, + "grad_norm": 0.9073672294616699, + "learning_rate": 6.987340070081789e-06, + "loss": 0.6005, + "step": 6137 + }, + { + "epoch": 0.39, + "grad_norm": 0.9364018440246582, + "learning_rate": 6.986398559999714e-06, + "loss": 0.5963, + "step": 6138 + }, + { + "epoch": 0.39, + "grad_norm": 0.875133216381073, + "learning_rate": 6.9854569662773044e-06, + "loss": 0.5463, + "step": 6139 + }, + { + "epoch": 0.39, + "grad_norm": 0.934817373752594, + "learning_rate": 6.984515288954211e-06, + "loss": 0.6034, + "step": 6140 + }, + { + "epoch": 0.39, + "grad_norm": 0.9065064191818237, + "learning_rate": 6.98357352807008e-06, + "loss": 0.5662, + "step": 6141 + }, + { + "epoch": 0.39, + "grad_norm": 0.813168466091156, + "learning_rate": 6.982631683664569e-06, + "loss": 0.5632, + "step": 6142 + }, + { + "epoch": 0.39, + "grad_norm": 0.8873375654220581, + "learning_rate": 6.981689755777335e-06, + "loss": 0.5367, + "step": 6143 + }, + { + "epoch": 0.39, + "grad_norm": 0.8773168325424194, + "learning_rate": 6.98074774444804e-06, + "loss": 0.5601, + "step": 6144 + }, + { + "epoch": 0.39, + "grad_norm": 0.8021374344825745, + "learning_rate": 6.979805649716347e-06, + "loss": 0.5076, + "step": 6145 + }, + { + "epoch": 0.39, + "grad_norm": 0.8933539986610413, + "learning_rate": 6.978863471621925e-06, + "loss": 0.6894, + "step": 6146 + }, + { + "epoch": 0.39, + "grad_norm": 0.8887168169021606, + "learning_rate": 6.977921210204446e-06, + "loss": 0.647, + "step": 6147 + }, + { + "epoch": 0.39, + "grad_norm": 0.8803666234016418, + "learning_rate": 6.9769788655035875e-06, + "loss": 0.5892, + "step": 6148 + }, + { + "epoch": 0.39, + "grad_norm": 0.9113365411758423, + "learning_rate": 6.976036437559024e-06, + "loss": 0.6732, + "step": 6149 + }, + { + "epoch": 0.39, + "grad_norm": 0.8204461932182312, + "learning_rate": 6.975093926410441e-06, + "loss": 0.5916, + "step": 6150 + }, + { + "epoch": 0.39, + "grad_norm": 0.934197187423706, + "learning_rate": 6.974151332097525e-06, + "loss": 0.6305, + "step": 6151 + }, + { + "epoch": 0.39, + "grad_norm": 0.9386470913887024, + "learning_rate": 6.973208654659962e-06, + "loss": 0.6485, + "step": 6152 + }, + { + "epoch": 0.39, + "grad_norm": 0.9400019645690918, + "learning_rate": 6.9722658941374475e-06, + "loss": 0.5726, + "step": 6153 + }, + { + "epoch": 0.39, + "grad_norm": 0.8022521734237671, + "learning_rate": 6.971323050569677e-06, + "loss": 0.593, + "step": 6154 + }, + { + "epoch": 0.39, + "grad_norm": 0.8721299171447754, + "learning_rate": 6.970380123996352e-06, + "loss": 0.5738, + "step": 6155 + }, + { + "epoch": 0.39, + "grad_norm": 0.9494243264198303, + "learning_rate": 6.969437114457174e-06, + "loss": 0.6282, + "step": 6156 + }, + { + "epoch": 0.39, + "grad_norm": 0.8277761936187744, + "learning_rate": 6.968494021991848e-06, + "loss": 0.5913, + "step": 6157 + }, + { + "epoch": 0.39, + "grad_norm": 0.854987621307373, + "learning_rate": 6.967550846640089e-06, + "loss": 0.5491, + "step": 6158 + }, + { + "epoch": 0.39, + "grad_norm": 0.9130845665931702, + "learning_rate": 6.966607588441609e-06, + "loss": 0.6274, + "step": 6159 + }, + { + "epoch": 0.39, + "grad_norm": 0.8112385869026184, + "learning_rate": 6.9656642474361225e-06, + "loss": 0.5309, + "step": 6160 + }, + { + "epoch": 0.39, + "grad_norm": 0.8674074411392212, + "learning_rate": 6.964720823663353e-06, + "loss": 0.6072, + "step": 6161 + }, + { + "epoch": 0.39, + "grad_norm": 0.9010210633277893, + "learning_rate": 6.963777317163025e-06, + "loss": 0.604, + "step": 6162 + }, + { + "epoch": 0.39, + "grad_norm": 0.8281999230384827, + "learning_rate": 6.962833727974867e-06, + "loss": 0.5805, + "step": 6163 + }, + { + "epoch": 0.39, + "grad_norm": 0.879539966583252, + "learning_rate": 6.961890056138607e-06, + "loss": 0.5993, + "step": 6164 + }, + { + "epoch": 0.39, + "grad_norm": 0.9275795221328735, + "learning_rate": 6.9609463016939816e-06, + "loss": 0.6101, + "step": 6165 + }, + { + "epoch": 0.39, + "grad_norm": 0.8362293839454651, + "learning_rate": 6.960002464680731e-06, + "loss": 0.5565, + "step": 6166 + }, + { + "epoch": 0.39, + "grad_norm": 0.8443682193756104, + "learning_rate": 6.959058545138593e-06, + "loss": 0.5736, + "step": 6167 + }, + { + "epoch": 0.39, + "grad_norm": 0.9468548893928528, + "learning_rate": 6.958114543107315e-06, + "loss": 0.6321, + "step": 6168 + }, + { + "epoch": 0.39, + "grad_norm": 0.8098998069763184, + "learning_rate": 6.957170458626645e-06, + "loss": 0.552, + "step": 6169 + }, + { + "epoch": 0.39, + "grad_norm": 0.9221862554550171, + "learning_rate": 6.956226291736338e-06, + "loss": 0.6174, + "step": 6170 + }, + { + "epoch": 0.39, + "grad_norm": 0.8823233246803284, + "learning_rate": 6.955282042476144e-06, + "loss": 0.5788, + "step": 6171 + }, + { + "epoch": 0.39, + "grad_norm": 0.8700152039527893, + "learning_rate": 6.9543377108858265e-06, + "loss": 0.6143, + "step": 6172 + }, + { + "epoch": 0.39, + "grad_norm": 0.866326093673706, + "learning_rate": 6.9533932970051465e-06, + "loss": 0.586, + "step": 6173 + }, + { + "epoch": 0.39, + "grad_norm": 0.9445212483406067, + "learning_rate": 6.952448800873871e-06, + "loss": 0.6754, + "step": 6174 + }, + { + "epoch": 0.39, + "grad_norm": 0.9050667881965637, + "learning_rate": 6.951504222531768e-06, + "loss": 0.6266, + "step": 6175 + }, + { + "epoch": 0.39, + "grad_norm": 0.8842514157295227, + "learning_rate": 6.950559562018611e-06, + "loss": 0.6103, + "step": 6176 + }, + { + "epoch": 0.39, + "grad_norm": 0.8354772329330444, + "learning_rate": 6.949614819374175e-06, + "loss": 0.5891, + "step": 6177 + }, + { + "epoch": 0.39, + "grad_norm": 0.8761371970176697, + "learning_rate": 6.948669994638243e-06, + "loss": 0.6099, + "step": 6178 + }, + { + "epoch": 0.39, + "grad_norm": 0.827156126499176, + "learning_rate": 6.947725087850595e-06, + "loss": 0.5347, + "step": 6179 + }, + { + "epoch": 0.39, + "grad_norm": 0.8923287987709045, + "learning_rate": 6.94678009905102e-06, + "loss": 0.5873, + "step": 6180 + }, + { + "epoch": 0.39, + "grad_norm": 0.865619421005249, + "learning_rate": 6.945835028279308e-06, + "loss": 0.6504, + "step": 6181 + }, + { + "epoch": 0.39, + "grad_norm": 0.8588405251502991, + "learning_rate": 6.944889875575251e-06, + "loss": 0.5939, + "step": 6182 + }, + { + "epoch": 0.39, + "grad_norm": 0.8965503573417664, + "learning_rate": 6.943944640978648e-06, + "loss": 0.6188, + "step": 6183 + }, + { + "epoch": 0.39, + "grad_norm": 0.8754391670227051, + "learning_rate": 6.942999324529297e-06, + "loss": 0.5729, + "step": 6184 + }, + { + "epoch": 0.39, + "grad_norm": 0.873710036277771, + "learning_rate": 6.942053926267005e-06, + "loss": 0.5963, + "step": 6185 + }, + { + "epoch": 0.39, + "grad_norm": 0.8937984704971313, + "learning_rate": 6.941108446231578e-06, + "loss": 0.5968, + "step": 6186 + }, + { + "epoch": 0.39, + "grad_norm": 0.8646506071090698, + "learning_rate": 6.940162884462828e-06, + "loss": 0.5911, + "step": 6187 + }, + { + "epoch": 0.39, + "grad_norm": 0.8940115571022034, + "learning_rate": 6.9392172410005656e-06, + "loss": 0.6188, + "step": 6188 + }, + { + "epoch": 0.39, + "grad_norm": 0.8401895761489868, + "learning_rate": 6.9382715158846135e-06, + "loss": 0.5936, + "step": 6189 + }, + { + "epoch": 0.39, + "grad_norm": 0.8863813281059265, + "learning_rate": 6.93732570915479e-06, + "loss": 0.5897, + "step": 6190 + }, + { + "epoch": 0.39, + "grad_norm": 0.9222760796546936, + "learning_rate": 6.93637982085092e-06, + "loss": 0.6047, + "step": 6191 + }, + { + "epoch": 0.39, + "grad_norm": 0.8968461751937866, + "learning_rate": 6.9354338510128315e-06, + "loss": 0.5943, + "step": 6192 + }, + { + "epoch": 0.39, + "grad_norm": 0.9590244293212891, + "learning_rate": 6.934487799680357e-06, + "loss": 0.6274, + "step": 6193 + }, + { + "epoch": 0.39, + "grad_norm": 0.8756579756736755, + "learning_rate": 6.933541666893331e-06, + "loss": 0.6139, + "step": 6194 + }, + { + "epoch": 0.39, + "grad_norm": 0.921607494354248, + "learning_rate": 6.932595452691592e-06, + "loss": 0.64, + "step": 6195 + }, + { + "epoch": 0.39, + "grad_norm": 0.8667705059051514, + "learning_rate": 6.9316491571149815e-06, + "loss": 0.6098, + "step": 6196 + }, + { + "epoch": 0.39, + "grad_norm": 0.8910043835639954, + "learning_rate": 6.930702780203344e-06, + "loss": 0.6432, + "step": 6197 + }, + { + "epoch": 0.39, + "grad_norm": 0.9581403732299805, + "learning_rate": 6.929756321996529e-06, + "loss": 0.6453, + "step": 6198 + }, + { + "epoch": 0.39, + "grad_norm": 0.8930731415748596, + "learning_rate": 6.928809782534388e-06, + "loss": 0.6059, + "step": 6199 + }, + { + "epoch": 0.39, + "grad_norm": 0.9078335762023926, + "learning_rate": 6.927863161856778e-06, + "loss": 0.5956, + "step": 6200 + }, + { + "epoch": 0.39, + "grad_norm": 0.8804222345352173, + "learning_rate": 6.9269164600035555e-06, + "loss": 0.5862, + "step": 6201 + }, + { + "epoch": 0.39, + "grad_norm": 0.8888744711875916, + "learning_rate": 6.925969677014585e-06, + "loss": 0.6367, + "step": 6202 + }, + { + "epoch": 0.39, + "grad_norm": 0.9415931105613708, + "learning_rate": 6.92502281292973e-06, + "loss": 0.5966, + "step": 6203 + }, + { + "epoch": 0.39, + "grad_norm": 0.8707212209701538, + "learning_rate": 6.924075867788863e-06, + "loss": 0.6106, + "step": 6204 + }, + { + "epoch": 0.39, + "grad_norm": 0.866563081741333, + "learning_rate": 6.923128841631854e-06, + "loss": 0.5493, + "step": 6205 + }, + { + "epoch": 0.39, + "grad_norm": 0.9359866976737976, + "learning_rate": 6.92218173449858e-06, + "loss": 0.5749, + "step": 6206 + }, + { + "epoch": 0.39, + "grad_norm": 0.9220528602600098, + "learning_rate": 6.921234546428918e-06, + "loss": 0.5909, + "step": 6207 + }, + { + "epoch": 0.39, + "grad_norm": 0.8486345410346985, + "learning_rate": 6.920287277462755e-06, + "loss": 0.5765, + "step": 6208 + }, + { + "epoch": 0.39, + "grad_norm": 0.8374233245849609, + "learning_rate": 6.9193399276399745e-06, + "loss": 0.5556, + "step": 6209 + }, + { + "epoch": 0.39, + "grad_norm": 0.8650535345077515, + "learning_rate": 6.918392497000466e-06, + "loss": 0.6162, + "step": 6210 + }, + { + "epoch": 0.39, + "grad_norm": 0.8010015487670898, + "learning_rate": 6.917444985584122e-06, + "loss": 0.5534, + "step": 6211 + }, + { + "epoch": 0.39, + "grad_norm": 0.888006865978241, + "learning_rate": 6.916497393430841e-06, + "loss": 0.6161, + "step": 6212 + }, + { + "epoch": 0.39, + "grad_norm": 0.8319229483604431, + "learning_rate": 6.915549720580523e-06, + "loss": 0.5842, + "step": 6213 + }, + { + "epoch": 0.39, + "grad_norm": 0.8947864174842834, + "learning_rate": 6.914601967073068e-06, + "loss": 0.5607, + "step": 6214 + }, + { + "epoch": 0.39, + "grad_norm": 0.9286026358604431, + "learning_rate": 6.913654132948385e-06, + "loss": 0.6001, + "step": 6215 + }, + { + "epoch": 0.39, + "grad_norm": 0.8386892676353455, + "learning_rate": 6.912706218246384e-06, + "loss": 0.5296, + "step": 6216 + }, + { + "epoch": 0.39, + "grad_norm": 0.8397946357727051, + "learning_rate": 6.911758223006979e-06, + "loss": 0.5952, + "step": 6217 + }, + { + "epoch": 0.39, + "grad_norm": 0.8822040557861328, + "learning_rate": 6.910810147270084e-06, + "loss": 0.5506, + "step": 6218 + }, + { + "epoch": 0.39, + "grad_norm": 0.9485325217247009, + "learning_rate": 6.909861991075622e-06, + "loss": 0.6302, + "step": 6219 + }, + { + "epoch": 0.39, + "grad_norm": 0.9046191573143005, + "learning_rate": 6.908913754463514e-06, + "loss": 0.6251, + "step": 6220 + }, + { + "epoch": 0.39, + "grad_norm": 0.9548308849334717, + "learning_rate": 6.90796543747369e-06, + "loss": 0.6542, + "step": 6221 + }, + { + "epoch": 0.39, + "grad_norm": 0.9304654002189636, + "learning_rate": 6.907017040146078e-06, + "loss": 0.6334, + "step": 6222 + }, + { + "epoch": 0.39, + "grad_norm": 0.9015122652053833, + "learning_rate": 6.906068562520613e-06, + "loss": 0.6062, + "step": 6223 + }, + { + "epoch": 0.39, + "grad_norm": 0.8413129448890686, + "learning_rate": 6.905120004637232e-06, + "loss": 0.5425, + "step": 6224 + }, + { + "epoch": 0.39, + "grad_norm": 0.9578669667243958, + "learning_rate": 6.904171366535873e-06, + "loss": 0.607, + "step": 6225 + }, + { + "epoch": 0.39, + "grad_norm": 0.895363450050354, + "learning_rate": 6.9032226482564835e-06, + "loss": 0.5703, + "step": 6226 + }, + { + "epoch": 0.39, + "grad_norm": 0.9190669059753418, + "learning_rate": 6.9022738498390084e-06, + "loss": 0.6413, + "step": 6227 + }, + { + "epoch": 0.39, + "grad_norm": 0.8880024552345276, + "learning_rate": 6.9013249713234e-06, + "loss": 0.6153, + "step": 6228 + }, + { + "epoch": 0.39, + "grad_norm": 0.8834933042526245, + "learning_rate": 6.900376012749611e-06, + "loss": 0.5887, + "step": 6229 + }, + { + "epoch": 0.39, + "grad_norm": 0.9798893928527832, + "learning_rate": 6.899426974157598e-06, + "loss": 0.6217, + "step": 6230 + }, + { + "epoch": 0.39, + "grad_norm": 0.8374887704849243, + "learning_rate": 6.898477855587323e-06, + "loss": 0.6106, + "step": 6231 + }, + { + "epoch": 0.39, + "grad_norm": 0.8667147159576416, + "learning_rate": 6.897528657078752e-06, + "loss": 0.5879, + "step": 6232 + }, + { + "epoch": 0.39, + "grad_norm": 0.928011417388916, + "learning_rate": 6.8965793786718484e-06, + "loss": 0.591, + "step": 6233 + }, + { + "epoch": 0.39, + "grad_norm": 0.8557186126708984, + "learning_rate": 6.895630020406584e-06, + "loss": 0.5891, + "step": 6234 + }, + { + "epoch": 0.4, + "grad_norm": 0.9000698328018188, + "learning_rate": 6.894680582322934e-06, + "loss": 0.6082, + "step": 6235 + }, + { + "epoch": 0.4, + "grad_norm": 0.8863718509674072, + "learning_rate": 6.893731064460878e-06, + "loss": 0.6171, + "step": 6236 + }, + { + "epoch": 0.4, + "grad_norm": 0.9076705574989319, + "learning_rate": 6.892781466860393e-06, + "loss": 0.5794, + "step": 6237 + }, + { + "epoch": 0.4, + "grad_norm": 0.8823980689048767, + "learning_rate": 6.891831789561465e-06, + "loss": 0.6175, + "step": 6238 + }, + { + "epoch": 0.4, + "grad_norm": 0.9114968776702881, + "learning_rate": 6.8908820326040815e-06, + "loss": 0.6038, + "step": 6239 + }, + { + "epoch": 0.4, + "grad_norm": 0.8561393618583679, + "learning_rate": 6.889932196028235e-06, + "loss": 0.6196, + "step": 6240 + }, + { + "epoch": 0.4, + "grad_norm": 0.9283210635185242, + "learning_rate": 6.888982279873917e-06, + "loss": 0.582, + "step": 6241 + }, + { + "epoch": 0.4, + "grad_norm": 0.8675887584686279, + "learning_rate": 6.888032284181127e-06, + "loss": 0.583, + "step": 6242 + }, + { + "epoch": 0.4, + "grad_norm": 0.9557647109031677, + "learning_rate": 6.887082208989865e-06, + "loss": 0.6167, + "step": 6243 + }, + { + "epoch": 0.4, + "grad_norm": 0.9393128156661987, + "learning_rate": 6.886132054340136e-06, + "loss": 0.6255, + "step": 6244 + }, + { + "epoch": 0.4, + "grad_norm": 0.8403303027153015, + "learning_rate": 6.885181820271947e-06, + "loss": 0.6011, + "step": 6245 + }, + { + "epoch": 0.4, + "grad_norm": 0.8862718343734741, + "learning_rate": 6.88423150682531e-06, + "loss": 0.6226, + "step": 6246 + }, + { + "epoch": 0.4, + "grad_norm": 0.9034367799758911, + "learning_rate": 6.88328111404024e-06, + "loss": 0.5662, + "step": 6247 + }, + { + "epoch": 0.4, + "grad_norm": 0.8718511462211609, + "learning_rate": 6.882330641956752e-06, + "loss": 0.6259, + "step": 6248 + }, + { + "epoch": 0.4, + "grad_norm": 0.834060549736023, + "learning_rate": 6.881380090614871e-06, + "loss": 0.5645, + "step": 6249 + }, + { + "epoch": 0.4, + "grad_norm": 0.9293310046195984, + "learning_rate": 6.8804294600546175e-06, + "loss": 0.6016, + "step": 6250 + }, + { + "epoch": 0.4, + "grad_norm": 0.9908111095428467, + "learning_rate": 6.879478750316022e-06, + "loss": 0.6271, + "step": 6251 + }, + { + "epoch": 0.4, + "grad_norm": 0.9571794867515564, + "learning_rate": 6.878527961439113e-06, + "loss": 0.6243, + "step": 6252 + }, + { + "epoch": 0.4, + "grad_norm": 0.9029168486595154, + "learning_rate": 6.877577093463927e-06, + "loss": 0.6002, + "step": 6253 + }, + { + "epoch": 0.4, + "grad_norm": 0.9042819738388062, + "learning_rate": 6.876626146430502e-06, + "loss": 0.5916, + "step": 6254 + }, + { + "epoch": 0.4, + "grad_norm": 0.9775123000144958, + "learning_rate": 6.875675120378878e-06, + "loss": 0.6199, + "step": 6255 + }, + { + "epoch": 0.4, + "grad_norm": 0.909796416759491, + "learning_rate": 6.8747240153491e-06, + "loss": 0.5858, + "step": 6256 + }, + { + "epoch": 0.4, + "grad_norm": 0.847358763217926, + "learning_rate": 6.873772831381214e-06, + "loss": 0.6043, + "step": 6257 + }, + { + "epoch": 0.4, + "grad_norm": 0.9297115206718445, + "learning_rate": 6.872821568515275e-06, + "loss": 0.6586, + "step": 6258 + }, + { + "epoch": 0.4, + "grad_norm": 0.8991652727127075, + "learning_rate": 6.8718702267913325e-06, + "loss": 0.6056, + "step": 6259 + }, + { + "epoch": 0.4, + "grad_norm": 0.8950271010398865, + "learning_rate": 6.870918806249449e-06, + "loss": 0.6192, + "step": 6260 + }, + { + "epoch": 0.4, + "grad_norm": 0.8827762007713318, + "learning_rate": 6.8699673069296806e-06, + "loss": 0.588, + "step": 6261 + }, + { + "epoch": 0.4, + "grad_norm": 0.8640381693840027, + "learning_rate": 6.869015728872095e-06, + "loss": 0.6255, + "step": 6262 + }, + { + "epoch": 0.4, + "grad_norm": 0.8890305757522583, + "learning_rate": 6.868064072116758e-06, + "loss": 0.6502, + "step": 6263 + }, + { + "epoch": 0.4, + "grad_norm": 0.854560911655426, + "learning_rate": 6.867112336703743e-06, + "loss": 0.5748, + "step": 6264 + }, + { + "epoch": 0.4, + "grad_norm": 0.872962236404419, + "learning_rate": 6.866160522673121e-06, + "loss": 0.6329, + "step": 6265 + }, + { + "epoch": 0.4, + "grad_norm": 0.8564478754997253, + "learning_rate": 6.865208630064973e-06, + "loss": 0.6265, + "step": 6266 + }, + { + "epoch": 0.4, + "grad_norm": 0.8863121271133423, + "learning_rate": 6.864256658919377e-06, + "loss": 0.5473, + "step": 6267 + }, + { + "epoch": 0.4, + "grad_norm": 0.8578380942344666, + "learning_rate": 6.8633046092764174e-06, + "loss": 0.6347, + "step": 6268 + }, + { + "epoch": 0.4, + "grad_norm": 0.8845486640930176, + "learning_rate": 6.862352481176184e-06, + "loss": 0.6456, + "step": 6269 + }, + { + "epoch": 0.4, + "grad_norm": 0.9655935168266296, + "learning_rate": 6.861400274658767e-06, + "loss": 0.5902, + "step": 6270 + }, + { + "epoch": 0.4, + "grad_norm": 0.8958570957183838, + "learning_rate": 6.860447989764259e-06, + "loss": 0.5804, + "step": 6271 + }, + { + "epoch": 0.4, + "grad_norm": 0.8562657237052917, + "learning_rate": 6.8594956265327585e-06, + "loss": 0.574, + "step": 6272 + }, + { + "epoch": 0.4, + "grad_norm": 0.9815998077392578, + "learning_rate": 6.858543185004365e-06, + "loss": 0.6155, + "step": 6273 + }, + { + "epoch": 0.4, + "grad_norm": 0.8810309171676636, + "learning_rate": 6.857590665219185e-06, + "loss": 0.6283, + "step": 6274 + }, + { + "epoch": 0.4, + "grad_norm": 0.8395465016365051, + "learning_rate": 6.856638067217324e-06, + "loss": 0.5414, + "step": 6275 + }, + { + "epoch": 0.4, + "grad_norm": 0.9288424253463745, + "learning_rate": 6.85568539103889e-06, + "loss": 0.5853, + "step": 6276 + }, + { + "epoch": 0.4, + "grad_norm": 0.9081584215164185, + "learning_rate": 6.854732636724002e-06, + "loss": 0.6545, + "step": 6277 + }, + { + "epoch": 0.4, + "grad_norm": 0.8159523606300354, + "learning_rate": 6.853779804312775e-06, + "loss": 0.5649, + "step": 6278 + }, + { + "epoch": 0.4, + "grad_norm": 0.92462158203125, + "learning_rate": 6.8528268938453295e-06, + "loss": 0.5591, + "step": 6279 + }, + { + "epoch": 0.4, + "grad_norm": 0.9456450939178467, + "learning_rate": 6.851873905361786e-06, + "loss": 0.6015, + "step": 6280 + }, + { + "epoch": 0.4, + "grad_norm": 0.9764153957366943, + "learning_rate": 6.850920838902278e-06, + "loss": 0.6429, + "step": 6281 + }, + { + "epoch": 0.4, + "grad_norm": 0.893409252166748, + "learning_rate": 6.84996769450693e-06, + "loss": 0.5466, + "step": 6282 + }, + { + "epoch": 0.4, + "grad_norm": 0.9273908734321594, + "learning_rate": 6.84901447221588e-06, + "loss": 0.655, + "step": 6283 + }, + { + "epoch": 0.4, + "grad_norm": 0.8750333189964294, + "learning_rate": 6.84806117206926e-06, + "loss": 0.6498, + "step": 6284 + }, + { + "epoch": 0.4, + "grad_norm": 0.8754233121871948, + "learning_rate": 6.847107794107216e-06, + "loss": 0.5554, + "step": 6285 + }, + { + "epoch": 0.4, + "grad_norm": 0.93915194272995, + "learning_rate": 6.846154338369887e-06, + "loss": 0.6434, + "step": 6286 + }, + { + "epoch": 0.4, + "grad_norm": 0.835665225982666, + "learning_rate": 6.845200804897421e-06, + "loss": 0.6035, + "step": 6287 + }, + { + "epoch": 0.4, + "grad_norm": 0.8906847834587097, + "learning_rate": 6.844247193729968e-06, + "loss": 0.636, + "step": 6288 + }, + { + "epoch": 0.4, + "grad_norm": 0.8233811855316162, + "learning_rate": 6.843293504907682e-06, + "loss": 0.5461, + "step": 6289 + }, + { + "epoch": 0.4, + "grad_norm": 0.9119184613227844, + "learning_rate": 6.84233973847072e-06, + "loss": 0.5923, + "step": 6290 + }, + { + "epoch": 0.4, + "grad_norm": 0.9312586784362793, + "learning_rate": 6.8413858944592385e-06, + "loss": 0.66, + "step": 6291 + }, + { + "epoch": 0.4, + "grad_norm": 0.8756263256072998, + "learning_rate": 6.840431972913404e-06, + "loss": 0.6262, + "step": 6292 + }, + { + "epoch": 0.4, + "grad_norm": 0.8882813453674316, + "learning_rate": 6.83947797387338e-06, + "loss": 0.5798, + "step": 6293 + }, + { + "epoch": 0.4, + "grad_norm": 0.8455925583839417, + "learning_rate": 6.838523897379339e-06, + "loss": 0.618, + "step": 6294 + }, + { + "epoch": 0.4, + "grad_norm": 0.8319289684295654, + "learning_rate": 6.837569743471451e-06, + "loss": 0.6029, + "step": 6295 + }, + { + "epoch": 0.4, + "grad_norm": 0.8721569180488586, + "learning_rate": 6.836615512189895e-06, + "loss": 0.5526, + "step": 6296 + }, + { + "epoch": 0.4, + "grad_norm": 0.8549659848213196, + "learning_rate": 6.835661203574848e-06, + "loss": 0.5947, + "step": 6297 + }, + { + "epoch": 0.4, + "grad_norm": 0.8474895358085632, + "learning_rate": 6.834706817666495e-06, + "loss": 0.6118, + "step": 6298 + }, + { + "epoch": 0.4, + "grad_norm": 0.8855010271072388, + "learning_rate": 6.833752354505019e-06, + "loss": 0.5868, + "step": 6299 + }, + { + "epoch": 0.4, + "grad_norm": 0.8940566182136536, + "learning_rate": 6.832797814130611e-06, + "loss": 0.5843, + "step": 6300 + }, + { + "epoch": 0.4, + "grad_norm": 0.842008650302887, + "learning_rate": 6.831843196583462e-06, + "loss": 0.6042, + "step": 6301 + }, + { + "epoch": 0.4, + "grad_norm": 0.8390910029411316, + "learning_rate": 6.8308885019037695e-06, + "loss": 0.5937, + "step": 6302 + }, + { + "epoch": 0.4, + "grad_norm": 0.8749220967292786, + "learning_rate": 6.82993373013173e-06, + "loss": 0.6125, + "step": 6303 + }, + { + "epoch": 0.4, + "grad_norm": 0.9013246297836304, + "learning_rate": 6.8289788813075485e-06, + "loss": 0.5911, + "step": 6304 + }, + { + "epoch": 0.4, + "grad_norm": 0.9145839810371399, + "learning_rate": 6.82802395547143e-06, + "loss": 0.5748, + "step": 6305 + }, + { + "epoch": 0.4, + "grad_norm": 0.8356090188026428, + "learning_rate": 6.82706895266358e-06, + "loss": 0.4983, + "step": 6306 + }, + { + "epoch": 0.4, + "grad_norm": 0.8637154698371887, + "learning_rate": 6.826113872924213e-06, + "loss": 0.6237, + "step": 6307 + }, + { + "epoch": 0.4, + "grad_norm": 0.8588926792144775, + "learning_rate": 6.825158716293543e-06, + "loss": 0.6215, + "step": 6308 + }, + { + "epoch": 0.4, + "grad_norm": 0.8768167495727539, + "learning_rate": 6.824203482811788e-06, + "loss": 0.5862, + "step": 6309 + }, + { + "epoch": 0.4, + "grad_norm": 0.8740860819816589, + "learning_rate": 6.823248172519173e-06, + "loss": 0.5699, + "step": 6310 + }, + { + "epoch": 0.4, + "grad_norm": 0.937689483165741, + "learning_rate": 6.8222927854559175e-06, + "loss": 0.6146, + "step": 6311 + }, + { + "epoch": 0.4, + "grad_norm": 0.8367653489112854, + "learning_rate": 6.8213373216622514e-06, + "loss": 0.5808, + "step": 6312 + }, + { + "epoch": 0.4, + "grad_norm": 0.9312880635261536, + "learning_rate": 6.820381781178409e-06, + "loss": 0.6059, + "step": 6313 + }, + { + "epoch": 0.4, + "grad_norm": 0.9240770936012268, + "learning_rate": 6.819426164044622e-06, + "loss": 0.6084, + "step": 6314 + }, + { + "epoch": 0.4, + "grad_norm": 0.8993687629699707, + "learning_rate": 6.818470470301128e-06, + "loss": 0.5742, + "step": 6315 + }, + { + "epoch": 0.4, + "grad_norm": 0.8884747624397278, + "learning_rate": 6.817514699988168e-06, + "loss": 0.5959, + "step": 6316 + }, + { + "epoch": 0.4, + "grad_norm": 0.919092059135437, + "learning_rate": 6.8165588531459885e-06, + "loss": 0.612, + "step": 6317 + }, + { + "epoch": 0.4, + "grad_norm": 0.8630106449127197, + "learning_rate": 6.815602929814833e-06, + "loss": 0.5945, + "step": 6318 + }, + { + "epoch": 0.4, + "grad_norm": 0.8956739902496338, + "learning_rate": 6.814646930034954e-06, + "loss": 0.6494, + "step": 6319 + }, + { + "epoch": 0.4, + "grad_norm": 0.8530880808830261, + "learning_rate": 6.813690853846606e-06, + "loss": 0.5881, + "step": 6320 + }, + { + "epoch": 0.4, + "grad_norm": 0.9456024765968323, + "learning_rate": 6.8127347012900465e-06, + "loss": 0.592, + "step": 6321 + }, + { + "epoch": 0.4, + "grad_norm": 0.959709882736206, + "learning_rate": 6.811778472405534e-06, + "loss": 0.6175, + "step": 6322 + }, + { + "epoch": 0.4, + "grad_norm": 0.8467543721199036, + "learning_rate": 6.810822167233333e-06, + "loss": 0.5823, + "step": 6323 + }, + { + "epoch": 0.4, + "grad_norm": 0.9109113216400146, + "learning_rate": 6.80986578581371e-06, + "loss": 0.618, + "step": 6324 + }, + { + "epoch": 0.4, + "grad_norm": 0.9041874408721924, + "learning_rate": 6.808909328186934e-06, + "loss": 0.6321, + "step": 6325 + }, + { + "epoch": 0.4, + "grad_norm": 0.81452476978302, + "learning_rate": 6.80795279439328e-06, + "loss": 0.5553, + "step": 6326 + }, + { + "epoch": 0.4, + "grad_norm": 0.8997363448143005, + "learning_rate": 6.806996184473023e-06, + "loss": 0.623, + "step": 6327 + }, + { + "epoch": 0.4, + "grad_norm": 0.9020070433616638, + "learning_rate": 6.806039498466444e-06, + "loss": 0.5917, + "step": 6328 + }, + { + "epoch": 0.4, + "grad_norm": 0.8951176404953003, + "learning_rate": 6.805082736413822e-06, + "loss": 0.6224, + "step": 6329 + }, + { + "epoch": 0.4, + "grad_norm": 0.9783088564872742, + "learning_rate": 6.804125898355447e-06, + "loss": 0.5973, + "step": 6330 + }, + { + "epoch": 0.4, + "grad_norm": 0.8250484466552734, + "learning_rate": 6.8031689843316054e-06, + "loss": 0.5555, + "step": 6331 + }, + { + "epoch": 0.4, + "grad_norm": 0.8294229507446289, + "learning_rate": 6.802211994382591e-06, + "loss": 0.5801, + "step": 6332 + }, + { + "epoch": 0.4, + "grad_norm": 0.8849250078201294, + "learning_rate": 6.8012549285487e-06, + "loss": 0.6152, + "step": 6333 + }, + { + "epoch": 0.4, + "grad_norm": 0.8871194124221802, + "learning_rate": 6.800297786870228e-06, + "loss": 0.5972, + "step": 6334 + }, + { + "epoch": 0.4, + "grad_norm": 0.8779382705688477, + "learning_rate": 6.799340569387481e-06, + "loss": 0.548, + "step": 6335 + }, + { + "epoch": 0.4, + "grad_norm": 0.8883922100067139, + "learning_rate": 6.798383276140761e-06, + "loss": 0.611, + "step": 6336 + }, + { + "epoch": 0.4, + "grad_norm": 0.9105244874954224, + "learning_rate": 6.797425907170378e-06, + "loss": 0.5728, + "step": 6337 + }, + { + "epoch": 0.4, + "grad_norm": 0.8537696003913879, + "learning_rate": 6.796468462516642e-06, + "loss": 0.5878, + "step": 6338 + }, + { + "epoch": 0.4, + "grad_norm": 0.9340306520462036, + "learning_rate": 6.79551094221987e-06, + "loss": 0.6079, + "step": 6339 + }, + { + "epoch": 0.4, + "grad_norm": 0.8353374600410461, + "learning_rate": 6.794553346320376e-06, + "loss": 0.5569, + "step": 6340 + }, + { + "epoch": 0.4, + "grad_norm": 0.9598260521888733, + "learning_rate": 6.7935956748584855e-06, + "loss": 0.5305, + "step": 6341 + }, + { + "epoch": 0.4, + "grad_norm": 0.8836723566055298, + "learning_rate": 6.792637927874519e-06, + "loss": 0.6038, + "step": 6342 + }, + { + "epoch": 0.4, + "grad_norm": 0.930091142654419, + "learning_rate": 6.791680105408807e-06, + "loss": 0.6583, + "step": 6343 + }, + { + "epoch": 0.4, + "grad_norm": 0.9237890839576721, + "learning_rate": 6.790722207501678e-06, + "loss": 0.6554, + "step": 6344 + }, + { + "epoch": 0.4, + "grad_norm": 0.8894320726394653, + "learning_rate": 6.789764234193465e-06, + "loss": 0.5665, + "step": 6345 + }, + { + "epoch": 0.4, + "grad_norm": 0.9483606815338135, + "learning_rate": 6.788806185524508e-06, + "loss": 0.6341, + "step": 6346 + }, + { + "epoch": 0.4, + "grad_norm": 0.948627769947052, + "learning_rate": 6.787848061535145e-06, + "loss": 0.6711, + "step": 6347 + }, + { + "epoch": 0.4, + "grad_norm": 0.8971147537231445, + "learning_rate": 6.786889862265719e-06, + "loss": 0.5643, + "step": 6348 + }, + { + "epoch": 0.4, + "grad_norm": 0.8906237483024597, + "learning_rate": 6.7859315877565775e-06, + "loss": 0.5608, + "step": 6349 + }, + { + "epoch": 0.4, + "grad_norm": 0.9154103398323059, + "learning_rate": 6.784973238048069e-06, + "loss": 0.6419, + "step": 6350 + }, + { + "epoch": 0.4, + "grad_norm": 0.8681836128234863, + "learning_rate": 6.7840148131805485e-06, + "loss": 0.6173, + "step": 6351 + }, + { + "epoch": 0.4, + "grad_norm": 0.8321382403373718, + "learning_rate": 6.783056313194369e-06, + "loss": 0.5718, + "step": 6352 + }, + { + "epoch": 0.4, + "grad_norm": 0.8255459666252136, + "learning_rate": 6.7820977381298915e-06, + "loss": 0.6267, + "step": 6353 + }, + { + "epoch": 0.4, + "grad_norm": 0.8768226504325867, + "learning_rate": 6.781139088027477e-06, + "loss": 0.6143, + "step": 6354 + }, + { + "epoch": 0.4, + "grad_norm": 0.9021497368812561, + "learning_rate": 6.780180362927492e-06, + "loss": 0.5718, + "step": 6355 + }, + { + "epoch": 0.4, + "grad_norm": 0.8666380643844604, + "learning_rate": 6.779221562870306e-06, + "loss": 0.5783, + "step": 6356 + }, + { + "epoch": 0.4, + "grad_norm": 0.9858885407447815, + "learning_rate": 6.778262687896287e-06, + "loss": 0.6102, + "step": 6357 + }, + { + "epoch": 0.4, + "grad_norm": 0.973626434803009, + "learning_rate": 6.777303738045814e-06, + "loss": 0.6679, + "step": 6358 + }, + { + "epoch": 0.4, + "grad_norm": 0.8242490291595459, + "learning_rate": 6.776344713359263e-06, + "loss": 0.593, + "step": 6359 + }, + { + "epoch": 0.4, + "grad_norm": 0.8745444416999817, + "learning_rate": 6.775385613877016e-06, + "loss": 0.6231, + "step": 6360 + }, + { + "epoch": 0.4, + "grad_norm": 0.8920515775680542, + "learning_rate": 6.774426439639455e-06, + "loss": 0.5821, + "step": 6361 + }, + { + "epoch": 0.4, + "grad_norm": 0.9180237650871277, + "learning_rate": 6.773467190686972e-06, + "loss": 0.625, + "step": 6362 + }, + { + "epoch": 0.4, + "grad_norm": 0.9676087498664856, + "learning_rate": 6.772507867059953e-06, + "loss": 0.6289, + "step": 6363 + }, + { + "epoch": 0.4, + "grad_norm": 0.9148452281951904, + "learning_rate": 6.771548468798796e-06, + "loss": 0.5209, + "step": 6364 + }, + { + "epoch": 0.4, + "grad_norm": 0.9174354076385498, + "learning_rate": 6.770588995943893e-06, + "loss": 0.5529, + "step": 6365 + }, + { + "epoch": 0.4, + "grad_norm": 0.9325718879699707, + "learning_rate": 6.769629448535648e-06, + "loss": 0.6186, + "step": 6366 + }, + { + "epoch": 0.4, + "grad_norm": 0.9010034799575806, + "learning_rate": 6.768669826614464e-06, + "loss": 0.5967, + "step": 6367 + }, + { + "epoch": 0.4, + "grad_norm": 0.8638269901275635, + "learning_rate": 6.767710130220745e-06, + "loss": 0.6489, + "step": 6368 + }, + { + "epoch": 0.4, + "grad_norm": 0.8227560520172119, + "learning_rate": 6.766750359394904e-06, + "loss": 0.5682, + "step": 6369 + }, + { + "epoch": 0.4, + "grad_norm": 0.9004592895507812, + "learning_rate": 6.76579051417735e-06, + "loss": 0.6632, + "step": 6370 + }, + { + "epoch": 0.4, + "grad_norm": 0.8845899701118469, + "learning_rate": 6.7648305946085e-06, + "loss": 0.5959, + "step": 6371 + }, + { + "epoch": 0.4, + "grad_norm": 0.9487060904502869, + "learning_rate": 6.763870600728772e-06, + "loss": 0.6677, + "step": 6372 + }, + { + "epoch": 0.4, + "grad_norm": 0.8828071355819702, + "learning_rate": 6.76291053257859e-06, + "loss": 0.5542, + "step": 6373 + }, + { + "epoch": 0.4, + "grad_norm": 0.9216554164886475, + "learning_rate": 6.761950390198378e-06, + "loss": 0.6145, + "step": 6374 + }, + { + "epoch": 0.4, + "grad_norm": 0.8994758725166321, + "learning_rate": 6.760990173628566e-06, + "loss": 0.5999, + "step": 6375 + }, + { + "epoch": 0.4, + "grad_norm": 0.8886323571205139, + "learning_rate": 6.760029882909582e-06, + "loss": 0.5941, + "step": 6376 + }, + { + "epoch": 0.4, + "grad_norm": 0.8494300842285156, + "learning_rate": 6.759069518081863e-06, + "loss": 0.5829, + "step": 6377 + }, + { + "epoch": 0.4, + "grad_norm": 0.9388317465782166, + "learning_rate": 6.758109079185846e-06, + "loss": 0.593, + "step": 6378 + }, + { + "epoch": 0.4, + "grad_norm": 0.9075881838798523, + "learning_rate": 6.757148566261973e-06, + "loss": 0.5656, + "step": 6379 + }, + { + "epoch": 0.4, + "grad_norm": 0.9015637040138245, + "learning_rate": 6.756187979350684e-06, + "loss": 0.6046, + "step": 6380 + }, + { + "epoch": 0.4, + "grad_norm": 0.9175539016723633, + "learning_rate": 6.75522731849243e-06, + "loss": 0.5636, + "step": 6381 + }, + { + "epoch": 0.4, + "grad_norm": 0.8536416292190552, + "learning_rate": 6.754266583727659e-06, + "loss": 0.6258, + "step": 6382 + }, + { + "epoch": 0.4, + "grad_norm": 0.84648197889328, + "learning_rate": 6.753305775096826e-06, + "loss": 0.5732, + "step": 6383 + }, + { + "epoch": 0.4, + "grad_norm": 0.8069581389427185, + "learning_rate": 6.752344892640384e-06, + "loss": 0.5564, + "step": 6384 + }, + { + "epoch": 0.4, + "grad_norm": 0.8196657299995422, + "learning_rate": 6.751383936398796e-06, + "loss": 0.5909, + "step": 6385 + }, + { + "epoch": 0.4, + "grad_norm": 0.9850438237190247, + "learning_rate": 6.750422906412523e-06, + "loss": 0.6585, + "step": 6386 + }, + { + "epoch": 0.4, + "grad_norm": 0.9552303552627563, + "learning_rate": 6.749461802722032e-06, + "loss": 0.6039, + "step": 6387 + }, + { + "epoch": 0.4, + "grad_norm": 0.955740213394165, + "learning_rate": 6.7485006253677875e-06, + "loss": 0.6084, + "step": 6388 + }, + { + "epoch": 0.4, + "grad_norm": 1.0010960102081299, + "learning_rate": 6.747539374390266e-06, + "loss": 0.6799, + "step": 6389 + }, + { + "epoch": 0.4, + "grad_norm": 0.909136950969696, + "learning_rate": 6.746578049829942e-06, + "loss": 0.5671, + "step": 6390 + }, + { + "epoch": 0.4, + "grad_norm": 0.8283319473266602, + "learning_rate": 6.745616651727289e-06, + "loss": 0.5801, + "step": 6391 + }, + { + "epoch": 0.4, + "grad_norm": 0.8886178135871887, + "learning_rate": 6.744655180122793e-06, + "loss": 0.5932, + "step": 6392 + }, + { + "epoch": 0.41, + "grad_norm": 0.9179041385650635, + "learning_rate": 6.743693635056936e-06, + "loss": 0.6413, + "step": 6393 + }, + { + "epoch": 0.41, + "grad_norm": 0.9177907109260559, + "learning_rate": 6.742732016570207e-06, + "loss": 0.6361, + "step": 6394 + }, + { + "epoch": 0.41, + "grad_norm": 0.8754076361656189, + "learning_rate": 6.741770324703095e-06, + "loss": 0.6194, + "step": 6395 + }, + { + "epoch": 0.41, + "grad_norm": 0.8487926721572876, + "learning_rate": 6.740808559496093e-06, + "loss": 0.5599, + "step": 6396 + }, + { + "epoch": 0.41, + "grad_norm": 0.9245063066482544, + "learning_rate": 6.739846720989699e-06, + "loss": 0.6556, + "step": 6397 + }, + { + "epoch": 0.41, + "grad_norm": 0.9024572968482971, + "learning_rate": 6.738884809224413e-06, + "loss": 0.5621, + "step": 6398 + }, + { + "epoch": 0.41, + "grad_norm": 0.9168578386306763, + "learning_rate": 6.7379228242407345e-06, + "loss": 0.6098, + "step": 6399 + }, + { + "epoch": 0.41, + "grad_norm": 0.8839691281318665, + "learning_rate": 6.736960766079173e-06, + "loss": 0.5978, + "step": 6400 + }, + { + "epoch": 0.41, + "grad_norm": 0.9675304293632507, + "learning_rate": 6.735998634780238e-06, + "loss": 0.5849, + "step": 6401 + }, + { + "epoch": 0.41, + "grad_norm": 0.8997515439987183, + "learning_rate": 6.735036430384436e-06, + "loss": 0.5645, + "step": 6402 + }, + { + "epoch": 0.41, + "grad_norm": 0.8792773485183716, + "learning_rate": 6.7340741529322875e-06, + "loss": 0.6105, + "step": 6403 + }, + { + "epoch": 0.41, + "grad_norm": 0.9032172560691833, + "learning_rate": 6.733111802464308e-06, + "loss": 0.5789, + "step": 6404 + }, + { + "epoch": 0.41, + "grad_norm": 0.8126611113548279, + "learning_rate": 6.732149379021022e-06, + "loss": 0.5711, + "step": 6405 + }, + { + "epoch": 0.41, + "grad_norm": 0.8911159038543701, + "learning_rate": 6.7311868826429485e-06, + "loss": 0.6068, + "step": 6406 + }, + { + "epoch": 0.41, + "grad_norm": 0.9121822714805603, + "learning_rate": 6.730224313370619e-06, + "loss": 0.6165, + "step": 6407 + }, + { + "epoch": 0.41, + "grad_norm": 0.8678528070449829, + "learning_rate": 6.729261671244563e-06, + "loss": 0.5745, + "step": 6408 + }, + { + "epoch": 0.41, + "grad_norm": 0.9104927182197571, + "learning_rate": 6.728298956305313e-06, + "loss": 0.5595, + "step": 6409 + }, + { + "epoch": 0.41, + "grad_norm": 0.9237872958183289, + "learning_rate": 6.727336168593406e-06, + "loss": 0.582, + "step": 6410 + }, + { + "epoch": 0.41, + "grad_norm": 0.9053632020950317, + "learning_rate": 6.726373308149382e-06, + "loss": 0.5984, + "step": 6411 + }, + { + "epoch": 0.41, + "grad_norm": 0.85235995054245, + "learning_rate": 6.725410375013783e-06, + "loss": 0.5581, + "step": 6412 + }, + { + "epoch": 0.41, + "grad_norm": 0.8615298271179199, + "learning_rate": 6.724447369227159e-06, + "loss": 0.5921, + "step": 6413 + }, + { + "epoch": 0.41, + "grad_norm": 0.9467587471008301, + "learning_rate": 6.723484290830051e-06, + "loss": 0.5917, + "step": 6414 + }, + { + "epoch": 0.41, + "grad_norm": 0.9265984892845154, + "learning_rate": 6.722521139863017e-06, + "loss": 0.6216, + "step": 6415 + }, + { + "epoch": 0.41, + "grad_norm": 0.8947895169258118, + "learning_rate": 6.72155791636661e-06, + "loss": 0.6052, + "step": 6416 + }, + { + "epoch": 0.41, + "grad_norm": 0.8797786235809326, + "learning_rate": 6.720594620381387e-06, + "loss": 0.5621, + "step": 6417 + }, + { + "epoch": 0.41, + "grad_norm": 0.9423597455024719, + "learning_rate": 6.71963125194791e-06, + "loss": 0.6233, + "step": 6418 + }, + { + "epoch": 0.41, + "grad_norm": 0.9435870051383972, + "learning_rate": 6.718667811106744e-06, + "loss": 0.5961, + "step": 6419 + }, + { + "epoch": 0.41, + "grad_norm": 0.9278707504272461, + "learning_rate": 6.717704297898455e-06, + "loss": 0.5821, + "step": 6420 + }, + { + "epoch": 0.41, + "grad_norm": 0.8902246952056885, + "learning_rate": 6.716740712363614e-06, + "loss": 0.5672, + "step": 6421 + }, + { + "epoch": 0.41, + "grad_norm": 0.9437769651412964, + "learning_rate": 6.715777054542793e-06, + "loss": 0.6031, + "step": 6422 + }, + { + "epoch": 0.41, + "grad_norm": 0.9507419466972351, + "learning_rate": 6.714813324476569e-06, + "loss": 0.5812, + "step": 6423 + }, + { + "epoch": 0.41, + "grad_norm": 0.9317444562911987, + "learning_rate": 6.713849522205522e-06, + "loss": 0.5611, + "step": 6424 + }, + { + "epoch": 0.41, + "grad_norm": 0.8754682540893555, + "learning_rate": 6.712885647770233e-06, + "loss": 0.6031, + "step": 6425 + }, + { + "epoch": 0.41, + "grad_norm": 0.9129989743232727, + "learning_rate": 6.711921701211288e-06, + "loss": 0.5967, + "step": 6426 + }, + { + "epoch": 0.41, + "grad_norm": 0.9079276919364929, + "learning_rate": 6.710957682569276e-06, + "loss": 0.6603, + "step": 6427 + }, + { + "epoch": 0.41, + "grad_norm": 0.8813990950584412, + "learning_rate": 6.709993591884788e-06, + "loss": 0.5873, + "step": 6428 + }, + { + "epoch": 0.41, + "grad_norm": 0.8813159465789795, + "learning_rate": 6.709029429198418e-06, + "loss": 0.5746, + "step": 6429 + }, + { + "epoch": 0.41, + "grad_norm": 0.9071645140647888, + "learning_rate": 6.7080651945507645e-06, + "loss": 0.5743, + "step": 6430 + }, + { + "epoch": 0.41, + "grad_norm": 0.8338029384613037, + "learning_rate": 6.707100887982427e-06, + "loss": 0.5769, + "step": 6431 + }, + { + "epoch": 0.41, + "grad_norm": 0.8543631434440613, + "learning_rate": 6.7061365095340105e-06, + "loss": 0.592, + "step": 6432 + }, + { + "epoch": 0.41, + "grad_norm": 0.9253416061401367, + "learning_rate": 6.70517205924612e-06, + "loss": 0.6099, + "step": 6433 + }, + { + "epoch": 0.41, + "grad_norm": 0.846316933631897, + "learning_rate": 6.7042075371593665e-06, + "loss": 0.5734, + "step": 6434 + }, + { + "epoch": 0.41, + "grad_norm": 0.9376114010810852, + "learning_rate": 6.703242943314362e-06, + "loss": 0.6333, + "step": 6435 + }, + { + "epoch": 0.41, + "grad_norm": 0.970414400100708, + "learning_rate": 6.702278277751722e-06, + "loss": 0.5598, + "step": 6436 + }, + { + "epoch": 0.41, + "grad_norm": 0.957120418548584, + "learning_rate": 6.701313540512065e-06, + "loss": 0.6345, + "step": 6437 + }, + { + "epoch": 0.41, + "grad_norm": 0.932551920413971, + "learning_rate": 6.700348731636014e-06, + "loss": 0.5905, + "step": 6438 + }, + { + "epoch": 0.41, + "grad_norm": 0.9044030904769897, + "learning_rate": 6.699383851164194e-06, + "loss": 0.6525, + "step": 6439 + }, + { + "epoch": 0.41, + "grad_norm": 0.8771166205406189, + "learning_rate": 6.6984188991372305e-06, + "loss": 0.599, + "step": 6440 + }, + { + "epoch": 0.41, + "grad_norm": 0.8178818821907043, + "learning_rate": 6.697453875595755e-06, + "loss": 0.5609, + "step": 6441 + }, + { + "epoch": 0.41, + "grad_norm": 0.8368890881538391, + "learning_rate": 6.696488780580403e-06, + "loss": 0.543, + "step": 6442 + }, + { + "epoch": 0.41, + "grad_norm": 0.8307216763496399, + "learning_rate": 6.69552361413181e-06, + "loss": 0.5784, + "step": 6443 + }, + { + "epoch": 0.41, + "grad_norm": 0.8592568039894104, + "learning_rate": 6.694558376290615e-06, + "loss": 0.603, + "step": 6444 + }, + { + "epoch": 0.41, + "grad_norm": 0.8686701655387878, + "learning_rate": 6.693593067097462e-06, + "loss": 0.6026, + "step": 6445 + }, + { + "epoch": 0.41, + "grad_norm": 0.9390038251876831, + "learning_rate": 6.692627686592998e-06, + "loss": 0.6531, + "step": 6446 + }, + { + "epoch": 0.41, + "grad_norm": 0.9398483633995056, + "learning_rate": 6.691662234817869e-06, + "loss": 0.6016, + "step": 6447 + }, + { + "epoch": 0.41, + "grad_norm": 0.8840192556381226, + "learning_rate": 6.690696711812729e-06, + "loss": 0.5461, + "step": 6448 + }, + { + "epoch": 0.41, + "grad_norm": 0.8928658366203308, + "learning_rate": 6.68973111761823e-06, + "loss": 0.5753, + "step": 6449 + }, + { + "epoch": 0.41, + "grad_norm": 0.9356186985969543, + "learning_rate": 6.688765452275033e-06, + "loss": 0.6636, + "step": 6450 + }, + { + "epoch": 0.41, + "grad_norm": 0.8654458522796631, + "learning_rate": 6.687799715823798e-06, + "loss": 0.5351, + "step": 6451 + }, + { + "epoch": 0.41, + "grad_norm": 0.867955207824707, + "learning_rate": 6.686833908305188e-06, + "loss": 0.6091, + "step": 6452 + }, + { + "epoch": 0.41, + "grad_norm": 0.8342301845550537, + "learning_rate": 6.68586802975987e-06, + "loss": 0.591, + "step": 6453 + }, + { + "epoch": 0.41, + "grad_norm": 0.9121977090835571, + "learning_rate": 6.684902080228514e-06, + "loss": 0.5892, + "step": 6454 + }, + { + "epoch": 0.41, + "grad_norm": 0.9055156111717224, + "learning_rate": 6.6839360597517935e-06, + "loss": 0.5665, + "step": 6455 + }, + { + "epoch": 0.41, + "grad_norm": 0.8971875905990601, + "learning_rate": 6.682969968370383e-06, + "loss": 0.6021, + "step": 6456 + }, + { + "epoch": 0.41, + "grad_norm": 0.9293539524078369, + "learning_rate": 6.68200380612496e-06, + "loss": 0.5815, + "step": 6457 + }, + { + "epoch": 0.41, + "grad_norm": 0.9090824127197266, + "learning_rate": 6.681037573056211e-06, + "loss": 0.5778, + "step": 6458 + }, + { + "epoch": 0.41, + "grad_norm": 0.9384252429008484, + "learning_rate": 6.6800712692048164e-06, + "loss": 0.5974, + "step": 6459 + }, + { + "epoch": 0.41, + "grad_norm": 0.9273927211761475, + "learning_rate": 6.679104894611466e-06, + "loss": 0.6242, + "step": 6460 + }, + { + "epoch": 0.41, + "grad_norm": 0.9325118660926819, + "learning_rate": 6.678138449316848e-06, + "loss": 0.6443, + "step": 6461 + }, + { + "epoch": 0.41, + "grad_norm": 0.8972262740135193, + "learning_rate": 6.6771719333616584e-06, + "loss": 0.568, + "step": 6462 + }, + { + "epoch": 0.41, + "grad_norm": 0.828413724899292, + "learning_rate": 6.676205346786594e-06, + "loss": 0.5929, + "step": 6463 + }, + { + "epoch": 0.41, + "grad_norm": 0.8351660966873169, + "learning_rate": 6.6752386896323526e-06, + "loss": 0.6104, + "step": 6464 + }, + { + "epoch": 0.41, + "grad_norm": 0.9743680953979492, + "learning_rate": 6.674271961939638e-06, + "loss": 0.6608, + "step": 6465 + }, + { + "epoch": 0.41, + "grad_norm": 0.8384668231010437, + "learning_rate": 6.673305163749155e-06, + "loss": 0.5683, + "step": 6466 + }, + { + "epoch": 0.41, + "grad_norm": 0.8962710499763489, + "learning_rate": 6.672338295101614e-06, + "loss": 0.5661, + "step": 6467 + }, + { + "epoch": 0.41, + "grad_norm": 0.8527003526687622, + "learning_rate": 6.671371356037723e-06, + "loss": 0.6172, + "step": 6468 + }, + { + "epoch": 0.41, + "grad_norm": 0.967922568321228, + "learning_rate": 6.670404346598199e-06, + "loss": 0.605, + "step": 6469 + }, + { + "epoch": 0.41, + "grad_norm": 0.8897997736930847, + "learning_rate": 6.669437266823759e-06, + "loss": 0.6087, + "step": 6470 + }, + { + "epoch": 0.41, + "grad_norm": 0.9014569520950317, + "learning_rate": 6.668470116755125e-06, + "loss": 0.589, + "step": 6471 + }, + { + "epoch": 0.41, + "grad_norm": 0.8684948086738586, + "learning_rate": 6.6675028964330156e-06, + "loss": 0.5962, + "step": 6472 + }, + { + "epoch": 0.41, + "grad_norm": 0.8721036911010742, + "learning_rate": 6.666535605898162e-06, + "loss": 0.666, + "step": 6473 + }, + { + "epoch": 0.41, + "grad_norm": 0.8894490599632263, + "learning_rate": 6.6655682451912915e-06, + "loss": 0.5945, + "step": 6474 + }, + { + "epoch": 0.41, + "grad_norm": 0.8807538747787476, + "learning_rate": 6.664600814353137e-06, + "loss": 0.6073, + "step": 6475 + }, + { + "epoch": 0.41, + "grad_norm": 0.9010364413261414, + "learning_rate": 6.6636333134244305e-06, + "loss": 0.5884, + "step": 6476 + }, + { + "epoch": 0.41, + "grad_norm": 0.8854992985725403, + "learning_rate": 6.662665742445914e-06, + "loss": 0.5852, + "step": 6477 + }, + { + "epoch": 0.41, + "grad_norm": 0.8660020232200623, + "learning_rate": 6.661698101458327e-06, + "loss": 0.5954, + "step": 6478 + }, + { + "epoch": 0.41, + "grad_norm": 0.8577721118927002, + "learning_rate": 6.660730390502414e-06, + "loss": 0.5837, + "step": 6479 + }, + { + "epoch": 0.41, + "grad_norm": 0.8442829251289368, + "learning_rate": 6.6597626096189206e-06, + "loss": 0.619, + "step": 6480 + }, + { + "epoch": 0.41, + "grad_norm": 0.877422571182251, + "learning_rate": 6.658794758848598e-06, + "loss": 0.6028, + "step": 6481 + }, + { + "epoch": 0.41, + "grad_norm": 0.880001425743103, + "learning_rate": 6.6578268382322e-06, + "loss": 0.5807, + "step": 6482 + }, + { + "epoch": 0.41, + "grad_norm": 0.8305491209030151, + "learning_rate": 6.656858847810479e-06, + "loss": 0.5786, + "step": 6483 + }, + { + "epoch": 0.41, + "grad_norm": 0.8943942785263062, + "learning_rate": 6.655890787624195e-06, + "loss": 0.5586, + "step": 6484 + }, + { + "epoch": 0.41, + "grad_norm": 0.893250584602356, + "learning_rate": 6.654922657714112e-06, + "loss": 0.5612, + "step": 6485 + }, + { + "epoch": 0.41, + "grad_norm": 0.9150073528289795, + "learning_rate": 6.6539544581209935e-06, + "loss": 0.6442, + "step": 6486 + }, + { + "epoch": 0.41, + "grad_norm": 0.8710561394691467, + "learning_rate": 6.652986188885605e-06, + "loss": 0.6025, + "step": 6487 + }, + { + "epoch": 0.41, + "grad_norm": 0.8758864402770996, + "learning_rate": 6.652017850048719e-06, + "loss": 0.5879, + "step": 6488 + }, + { + "epoch": 0.41, + "grad_norm": 0.925520658493042, + "learning_rate": 6.651049441651107e-06, + "loss": 0.6254, + "step": 6489 + }, + { + "epoch": 0.41, + "grad_norm": 0.8633304834365845, + "learning_rate": 6.65008096373355e-06, + "loss": 0.5776, + "step": 6490 + }, + { + "epoch": 0.41, + "grad_norm": 0.9169586300849915, + "learning_rate": 6.6491124163368215e-06, + "loss": 0.587, + "step": 6491 + }, + { + "epoch": 0.41, + "grad_norm": 0.815740168094635, + "learning_rate": 6.648143799501705e-06, + "loss": 0.5786, + "step": 6492 + }, + { + "epoch": 0.41, + "grad_norm": 0.9177011847496033, + "learning_rate": 6.647175113268989e-06, + "loss": 0.5998, + "step": 6493 + }, + { + "epoch": 0.41, + "grad_norm": 0.9129186868667603, + "learning_rate": 6.646206357679458e-06, + "loss": 0.6242, + "step": 6494 + }, + { + "epoch": 0.41, + "grad_norm": 0.8686244487762451, + "learning_rate": 6.645237532773902e-06, + "loss": 0.6423, + "step": 6495 + }, + { + "epoch": 0.41, + "grad_norm": 0.9115392565727234, + "learning_rate": 6.64426863859312e-06, + "loss": 0.6303, + "step": 6496 + }, + { + "epoch": 0.41, + "grad_norm": 0.9037183523178101, + "learning_rate": 6.643299675177906e-06, + "loss": 0.6147, + "step": 6497 + }, + { + "epoch": 0.41, + "grad_norm": 0.9037627577781677, + "learning_rate": 6.642330642569056e-06, + "loss": 0.5879, + "step": 6498 + }, + { + "epoch": 0.41, + "grad_norm": 0.8843808770179749, + "learning_rate": 6.641361540807377e-06, + "loss": 0.5843, + "step": 6499 + }, + { + "epoch": 0.41, + "grad_norm": 0.9096183180809021, + "learning_rate": 6.640392369933675e-06, + "loss": 0.5984, + "step": 6500 + }, + { + "epoch": 0.41, + "grad_norm": 0.9090222120285034, + "learning_rate": 6.639423129988756e-06, + "loss": 0.5941, + "step": 6501 + }, + { + "epoch": 0.41, + "grad_norm": 0.9428609609603882, + "learning_rate": 6.638453821013431e-06, + "loss": 0.6811, + "step": 6502 + }, + { + "epoch": 0.41, + "grad_norm": 0.8241065144538879, + "learning_rate": 6.637484443048516e-06, + "loss": 0.59, + "step": 6503 + }, + { + "epoch": 0.41, + "grad_norm": 0.8859769701957703, + "learning_rate": 6.636514996134828e-06, + "loss": 0.555, + "step": 6504 + }, + { + "epoch": 0.41, + "grad_norm": 0.9394935965538025, + "learning_rate": 6.635545480313187e-06, + "loss": 0.6377, + "step": 6505 + }, + { + "epoch": 0.41, + "grad_norm": 0.8997877240180969, + "learning_rate": 6.634575895624414e-06, + "loss": 0.66, + "step": 6506 + }, + { + "epoch": 0.41, + "grad_norm": 0.9390882253646851, + "learning_rate": 6.6336062421093374e-06, + "loss": 0.6316, + "step": 6507 + }, + { + "epoch": 0.41, + "grad_norm": 0.8696218729019165, + "learning_rate": 6.632636519808785e-06, + "loss": 0.6006, + "step": 6508 + }, + { + "epoch": 0.41, + "grad_norm": 0.9233937859535217, + "learning_rate": 6.6316667287635875e-06, + "loss": 0.6195, + "step": 6509 + }, + { + "epoch": 0.41, + "grad_norm": 0.9679466485977173, + "learning_rate": 6.63069686901458e-06, + "loss": 0.6293, + "step": 6510 + }, + { + "epoch": 0.41, + "grad_norm": 0.8663052320480347, + "learning_rate": 6.629726940602601e-06, + "loss": 0.6051, + "step": 6511 + }, + { + "epoch": 0.41, + "grad_norm": 0.8438270092010498, + "learning_rate": 6.62875694356849e-06, + "loss": 0.6315, + "step": 6512 + }, + { + "epoch": 0.41, + "grad_norm": 0.9236611723899841, + "learning_rate": 6.62778687795309e-06, + "loss": 0.564, + "step": 6513 + }, + { + "epoch": 0.41, + "grad_norm": 0.8759667873382568, + "learning_rate": 6.626816743797246e-06, + "loss": 0.5565, + "step": 6514 + }, + { + "epoch": 0.41, + "grad_norm": 0.8147273659706116, + "learning_rate": 6.62584654114181e-06, + "loss": 0.5434, + "step": 6515 + }, + { + "epoch": 0.41, + "grad_norm": 0.8937092423439026, + "learning_rate": 6.6248762700276315e-06, + "loss": 0.6153, + "step": 6516 + }, + { + "epoch": 0.41, + "grad_norm": 0.8718124032020569, + "learning_rate": 6.623905930495565e-06, + "loss": 0.5902, + "step": 6517 + }, + { + "epoch": 0.41, + "grad_norm": 1.0065981149673462, + "learning_rate": 6.622935522586469e-06, + "loss": 0.5592, + "step": 6518 + }, + { + "epoch": 0.41, + "grad_norm": 0.8148283362388611, + "learning_rate": 6.6219650463412034e-06, + "loss": 0.5861, + "step": 6519 + }, + { + "epoch": 0.41, + "grad_norm": 0.884833574295044, + "learning_rate": 6.620994501800634e-06, + "loss": 0.5903, + "step": 6520 + }, + { + "epoch": 0.41, + "grad_norm": 0.873306393623352, + "learning_rate": 6.620023889005624e-06, + "loss": 0.6419, + "step": 6521 + }, + { + "epoch": 0.41, + "grad_norm": 0.9364440441131592, + "learning_rate": 6.619053207997043e-06, + "loss": 0.6001, + "step": 6522 + }, + { + "epoch": 0.41, + "grad_norm": 0.9023630619049072, + "learning_rate": 6.618082458815765e-06, + "loss": 0.5793, + "step": 6523 + }, + { + "epoch": 0.41, + "grad_norm": 0.8948296904563904, + "learning_rate": 6.617111641502664e-06, + "loss": 0.58, + "step": 6524 + }, + { + "epoch": 0.41, + "grad_norm": 0.8921267986297607, + "learning_rate": 6.616140756098617e-06, + "loss": 0.5626, + "step": 6525 + }, + { + "epoch": 0.41, + "grad_norm": 0.8996078968048096, + "learning_rate": 6.615169802644503e-06, + "loss": 0.6441, + "step": 6526 + }, + { + "epoch": 0.41, + "grad_norm": 0.992561936378479, + "learning_rate": 6.614198781181209e-06, + "loss": 0.6163, + "step": 6527 + }, + { + "epoch": 0.41, + "grad_norm": 0.8944520354270935, + "learning_rate": 6.613227691749619e-06, + "loss": 0.6454, + "step": 6528 + }, + { + "epoch": 0.41, + "grad_norm": 0.9010036587715149, + "learning_rate": 6.612256534390624e-06, + "loss": 0.6159, + "step": 6529 + }, + { + "epoch": 0.41, + "grad_norm": 0.893017053604126, + "learning_rate": 6.611285309145113e-06, + "loss": 0.5674, + "step": 6530 + }, + { + "epoch": 0.41, + "grad_norm": 0.849725067615509, + "learning_rate": 6.610314016053986e-06, + "loss": 0.5657, + "step": 6531 + }, + { + "epoch": 0.41, + "grad_norm": 0.8771043419837952, + "learning_rate": 6.609342655158135e-06, + "loss": 0.6204, + "step": 6532 + }, + { + "epoch": 0.41, + "grad_norm": 0.9552651643753052, + "learning_rate": 6.608371226498464e-06, + "loss": 0.5664, + "step": 6533 + }, + { + "epoch": 0.41, + "grad_norm": 0.9059584140777588, + "learning_rate": 6.607399730115875e-06, + "loss": 0.612, + "step": 6534 + }, + { + "epoch": 0.41, + "grad_norm": 0.8679310083389282, + "learning_rate": 6.6064281660512775e-06, + "loss": 0.5264, + "step": 6535 + }, + { + "epoch": 0.41, + "grad_norm": 0.9331749081611633, + "learning_rate": 6.6054565343455765e-06, + "loss": 0.6195, + "step": 6536 + }, + { + "epoch": 0.41, + "grad_norm": 0.8616225719451904, + "learning_rate": 6.604484835039686e-06, + "loss": 0.5867, + "step": 6537 + }, + { + "epoch": 0.41, + "grad_norm": 0.8593981862068176, + "learning_rate": 6.603513068174521e-06, + "loss": 0.5371, + "step": 6538 + }, + { + "epoch": 0.41, + "grad_norm": 0.8332374095916748, + "learning_rate": 6.602541233790999e-06, + "loss": 0.5885, + "step": 6539 + }, + { + "epoch": 0.41, + "grad_norm": 0.9694592356681824, + "learning_rate": 6.601569331930041e-06, + "loss": 0.6602, + "step": 6540 + }, + { + "epoch": 0.41, + "grad_norm": 0.8334494829177856, + "learning_rate": 6.600597362632568e-06, + "loss": 0.5729, + "step": 6541 + }, + { + "epoch": 0.41, + "grad_norm": 0.8962088823318481, + "learning_rate": 6.599625325939509e-06, + "loss": 0.6357, + "step": 6542 + }, + { + "epoch": 0.41, + "grad_norm": 0.9451315402984619, + "learning_rate": 6.598653221891793e-06, + "loss": 0.619, + "step": 6543 + }, + { + "epoch": 0.41, + "grad_norm": 0.9206660389900208, + "learning_rate": 6.597681050530351e-06, + "loss": 0.6387, + "step": 6544 + }, + { + "epoch": 0.41, + "grad_norm": 0.8884252905845642, + "learning_rate": 6.596708811896116e-06, + "loss": 0.61, + "step": 6545 + }, + { + "epoch": 0.41, + "grad_norm": 0.8867802619934082, + "learning_rate": 6.595736506030029e-06, + "loss": 0.6128, + "step": 6546 + }, + { + "epoch": 0.41, + "grad_norm": 0.8310429453849792, + "learning_rate": 6.59476413297303e-06, + "loss": 0.5547, + "step": 6547 + }, + { + "epoch": 0.41, + "grad_norm": 0.9180542230606079, + "learning_rate": 6.59379169276606e-06, + "loss": 0.5834, + "step": 6548 + }, + { + "epoch": 0.41, + "grad_norm": 0.8704774379730225, + "learning_rate": 6.5928191854500644e-06, + "loss": 0.6258, + "step": 6549 + }, + { + "epoch": 0.41, + "grad_norm": 0.8992159962654114, + "learning_rate": 6.591846611065997e-06, + "loss": 0.5977, + "step": 6550 + }, + { + "epoch": 0.42, + "grad_norm": 0.8730959892272949, + "learning_rate": 6.590873969654805e-06, + "loss": 0.6295, + "step": 6551 + }, + { + "epoch": 0.42, + "grad_norm": 0.9002555012702942, + "learning_rate": 6.589901261257445e-06, + "loss": 0.6144, + "step": 6552 + }, + { + "epoch": 0.42, + "grad_norm": 0.9010111093521118, + "learning_rate": 6.588928485914871e-06, + "loss": 0.6662, + "step": 6553 + }, + { + "epoch": 0.42, + "grad_norm": 0.8698523044586182, + "learning_rate": 6.587955643668049e-06, + "loss": 0.6023, + "step": 6554 + }, + { + "epoch": 0.42, + "grad_norm": 0.9001327753067017, + "learning_rate": 6.58698273455794e-06, + "loss": 0.5513, + "step": 6555 + }, + { + "epoch": 0.42, + "grad_norm": 0.9791713953018188, + "learning_rate": 6.586009758625507e-06, + "loss": 0.6439, + "step": 6556 + }, + { + "epoch": 0.42, + "grad_norm": 0.9007930159568787, + "learning_rate": 6.585036715911719e-06, + "loss": 0.5951, + "step": 6557 + }, + { + "epoch": 0.42, + "grad_norm": 0.8678936958312988, + "learning_rate": 6.58406360645755e-06, + "loss": 0.6341, + "step": 6558 + }, + { + "epoch": 0.42, + "grad_norm": 0.8232488036155701, + "learning_rate": 6.583090430303975e-06, + "loss": 0.5596, + "step": 6559 + }, + { + "epoch": 0.42, + "grad_norm": 0.8638771772384644, + "learning_rate": 6.582117187491967e-06, + "loss": 0.6657, + "step": 6560 + }, + { + "epoch": 0.42, + "grad_norm": 0.9044223427772522, + "learning_rate": 6.581143878062507e-06, + "loss": 0.6091, + "step": 6561 + }, + { + "epoch": 0.42, + "grad_norm": 0.9236576557159424, + "learning_rate": 6.58017050205658e-06, + "loss": 0.5625, + "step": 6562 + }, + { + "epoch": 0.42, + "grad_norm": 0.9195695519447327, + "learning_rate": 6.5791970595151714e-06, + "loss": 0.6354, + "step": 6563 + }, + { + "epoch": 0.42, + "grad_norm": 0.8628154993057251, + "learning_rate": 6.578223550479266e-06, + "loss": 0.6041, + "step": 6564 + }, + { + "epoch": 0.42, + "grad_norm": 0.779208779335022, + "learning_rate": 6.5772499749898585e-06, + "loss": 0.555, + "step": 6565 + }, + { + "epoch": 0.42, + "grad_norm": 1.0027345418930054, + "learning_rate": 6.576276333087941e-06, + "loss": 0.6062, + "step": 6566 + }, + { + "epoch": 0.42, + "grad_norm": 0.8687159419059753, + "learning_rate": 6.575302624814512e-06, + "loss": 0.5796, + "step": 6567 + }, + { + "epoch": 0.42, + "grad_norm": 0.9297102093696594, + "learning_rate": 6.5743288502105675e-06, + "loss": 0.5697, + "step": 6568 + }, + { + "epoch": 0.42, + "grad_norm": 0.9116488099098206, + "learning_rate": 6.5733550093171115e-06, + "loss": 0.5924, + "step": 6569 + }, + { + "epoch": 0.42, + "grad_norm": 0.9103240370750427, + "learning_rate": 6.572381102175151e-06, + "loss": 0.5937, + "step": 6570 + }, + { + "epoch": 0.42, + "grad_norm": 0.8994322419166565, + "learning_rate": 6.571407128825692e-06, + "loss": 0.6327, + "step": 6571 + }, + { + "epoch": 0.42, + "grad_norm": 0.9073256850242615, + "learning_rate": 6.570433089309745e-06, + "loss": 0.5543, + "step": 6572 + }, + { + "epoch": 0.42, + "grad_norm": 0.902117133140564, + "learning_rate": 6.569458983668323e-06, + "loss": 0.5465, + "step": 6573 + }, + { + "epoch": 0.42, + "grad_norm": 0.8766512870788574, + "learning_rate": 6.5684848119424435e-06, + "loss": 0.6265, + "step": 6574 + }, + { + "epoch": 0.42, + "grad_norm": 0.8914992213249207, + "learning_rate": 6.567510574173126e-06, + "loss": 0.6145, + "step": 6575 + }, + { + "epoch": 0.42, + "grad_norm": 0.9038758277893066, + "learning_rate": 6.566536270401389e-06, + "loss": 0.6472, + "step": 6576 + }, + { + "epoch": 0.42, + "grad_norm": 0.9073125123977661, + "learning_rate": 6.5655619006682604e-06, + "loss": 0.6354, + "step": 6577 + }, + { + "epoch": 0.42, + "grad_norm": 0.8908482193946838, + "learning_rate": 6.5645874650147676e-06, + "loss": 0.603, + "step": 6578 + }, + { + "epoch": 0.42, + "grad_norm": 0.9067582488059998, + "learning_rate": 6.563612963481938e-06, + "loss": 0.5947, + "step": 6579 + }, + { + "epoch": 0.42, + "grad_norm": 0.926496148109436, + "learning_rate": 6.562638396110805e-06, + "loss": 0.5645, + "step": 6580 + }, + { + "epoch": 0.42, + "grad_norm": 1.002769112586975, + "learning_rate": 6.561663762942407e-06, + "loss": 0.6122, + "step": 6581 + }, + { + "epoch": 0.42, + "grad_norm": 1.0203826427459717, + "learning_rate": 6.560689064017781e-06, + "loss": 0.5591, + "step": 6582 + }, + { + "epoch": 0.42, + "grad_norm": 0.9004783034324646, + "learning_rate": 6.559714299377966e-06, + "loss": 0.6435, + "step": 6583 + }, + { + "epoch": 0.42, + "grad_norm": 0.8488182425498962, + "learning_rate": 6.558739469064008e-06, + "loss": 0.6716, + "step": 6584 + }, + { + "epoch": 0.42, + "grad_norm": 0.9152944087982178, + "learning_rate": 6.5577645731169535e-06, + "loss": 0.6506, + "step": 6585 + }, + { + "epoch": 0.42, + "grad_norm": 1.0391813516616821, + "learning_rate": 6.556789611577854e-06, + "loss": 0.6031, + "step": 6586 + }, + { + "epoch": 0.42, + "grad_norm": 0.8500710129737854, + "learning_rate": 6.555814584487757e-06, + "loss": 0.604, + "step": 6587 + }, + { + "epoch": 0.42, + "grad_norm": 0.9001255631446838, + "learning_rate": 6.5548394918877216e-06, + "loss": 0.6199, + "step": 6588 + }, + { + "epoch": 0.42, + "grad_norm": 0.8187232613563538, + "learning_rate": 6.553864333818803e-06, + "loss": 0.5698, + "step": 6589 + }, + { + "epoch": 0.42, + "grad_norm": 0.8661369681358337, + "learning_rate": 6.552889110322062e-06, + "loss": 0.568, + "step": 6590 + }, + { + "epoch": 0.42, + "grad_norm": 0.9107438325881958, + "learning_rate": 6.551913821438565e-06, + "loss": 0.5363, + "step": 6591 + }, + { + "epoch": 0.42, + "grad_norm": 0.8906565308570862, + "learning_rate": 6.550938467209375e-06, + "loss": 0.5933, + "step": 6592 + }, + { + "epoch": 0.42, + "grad_norm": 0.865942656993866, + "learning_rate": 6.5499630476755616e-06, + "loss": 0.5402, + "step": 6593 + }, + { + "epoch": 0.42, + "grad_norm": 0.862751305103302, + "learning_rate": 6.548987562878195e-06, + "loss": 0.5899, + "step": 6594 + }, + { + "epoch": 0.42, + "grad_norm": 0.8719815015792847, + "learning_rate": 6.548012012858352e-06, + "loss": 0.5804, + "step": 6595 + }, + { + "epoch": 0.42, + "grad_norm": 0.9156465530395508, + "learning_rate": 6.547036397657106e-06, + "loss": 0.5877, + "step": 6596 + }, + { + "epoch": 0.42, + "grad_norm": 0.9153836965560913, + "learning_rate": 6.546060717315542e-06, + "loss": 0.6035, + "step": 6597 + }, + { + "epoch": 0.42, + "grad_norm": 0.9377827644348145, + "learning_rate": 6.545084971874738e-06, + "loss": 0.6147, + "step": 6598 + }, + { + "epoch": 0.42, + "grad_norm": 0.9083762168884277, + "learning_rate": 6.5441091613757805e-06, + "loss": 0.5889, + "step": 6599 + }, + { + "epoch": 0.42, + "grad_norm": 0.870985746383667, + "learning_rate": 6.543133285859758e-06, + "loss": 0.5776, + "step": 6600 + }, + { + "epoch": 0.42, + "grad_norm": 0.8286287188529968, + "learning_rate": 6.542157345367763e-06, + "loss": 0.5341, + "step": 6601 + }, + { + "epoch": 0.42, + "grad_norm": 0.9526362419128418, + "learning_rate": 6.5411813399408845e-06, + "loss": 0.6605, + "step": 6602 + }, + { + "epoch": 0.42, + "grad_norm": 0.8954978585243225, + "learning_rate": 6.540205269620221e-06, + "loss": 0.6206, + "step": 6603 + }, + { + "epoch": 0.42, + "grad_norm": 0.8499834537506104, + "learning_rate": 6.539229134446874e-06, + "loss": 0.5997, + "step": 6604 + }, + { + "epoch": 0.42, + "grad_norm": 0.9309713840484619, + "learning_rate": 6.538252934461941e-06, + "loss": 0.5951, + "step": 6605 + }, + { + "epoch": 0.42, + "grad_norm": 0.9219299554824829, + "learning_rate": 6.537276669706527e-06, + "loss": 0.6351, + "step": 6606 + }, + { + "epoch": 0.42, + "grad_norm": 0.8586229085922241, + "learning_rate": 6.536300340221742e-06, + "loss": 0.6374, + "step": 6607 + }, + { + "epoch": 0.42, + "grad_norm": 0.8765944242477417, + "learning_rate": 6.535323946048695e-06, + "loss": 0.5754, + "step": 6608 + }, + { + "epoch": 0.42, + "grad_norm": 0.8662661910057068, + "learning_rate": 6.534347487228495e-06, + "loss": 0.5382, + "step": 6609 + }, + { + "epoch": 0.42, + "grad_norm": 0.8375385999679565, + "learning_rate": 6.533370963802261e-06, + "loss": 0.5487, + "step": 6610 + }, + { + "epoch": 0.42, + "grad_norm": 0.9295637011528015, + "learning_rate": 6.532394375811111e-06, + "loss": 0.6287, + "step": 6611 + }, + { + "epoch": 0.42, + "grad_norm": 0.8826519250869751, + "learning_rate": 6.531417723296164e-06, + "loss": 0.5982, + "step": 6612 + }, + { + "epoch": 0.42, + "grad_norm": 0.9090942740440369, + "learning_rate": 6.530441006298544e-06, + "loss": 0.6041, + "step": 6613 + }, + { + "epoch": 0.42, + "grad_norm": 0.8962193131446838, + "learning_rate": 6.5294642248593765e-06, + "loss": 0.6588, + "step": 6614 + }, + { + "epoch": 0.42, + "grad_norm": 0.9374716281890869, + "learning_rate": 6.528487379019791e-06, + "loss": 0.5957, + "step": 6615 + }, + { + "epoch": 0.42, + "grad_norm": 0.8533490300178528, + "learning_rate": 6.5275104688209215e-06, + "loss": 0.567, + "step": 6616 + }, + { + "epoch": 0.42, + "grad_norm": 0.9722062349319458, + "learning_rate": 6.526533494303898e-06, + "loss": 0.65, + "step": 6617 + }, + { + "epoch": 0.42, + "grad_norm": 0.9122631549835205, + "learning_rate": 6.525556455509858e-06, + "loss": 0.6388, + "step": 6618 + }, + { + "epoch": 0.42, + "grad_norm": 0.8122672438621521, + "learning_rate": 6.5245793524799465e-06, + "loss": 0.5713, + "step": 6619 + }, + { + "epoch": 0.42, + "grad_norm": 0.8519312739372253, + "learning_rate": 6.5236021852553e-06, + "loss": 0.633, + "step": 6620 + }, + { + "epoch": 0.42, + "grad_norm": 0.9361836910247803, + "learning_rate": 6.522624953877066e-06, + "loss": 0.6151, + "step": 6621 + }, + { + "epoch": 0.42, + "grad_norm": 0.832336962223053, + "learning_rate": 6.52164765838639e-06, + "loss": 0.545, + "step": 6622 + }, + { + "epoch": 0.42, + "grad_norm": 0.8808085322380066, + "learning_rate": 6.520670298824428e-06, + "loss": 0.5869, + "step": 6623 + }, + { + "epoch": 0.42, + "grad_norm": 0.9161610007286072, + "learning_rate": 6.519692875232328e-06, + "loss": 0.5429, + "step": 6624 + }, + { + "epoch": 0.42, + "grad_norm": 0.9123284220695496, + "learning_rate": 6.518715387651249e-06, + "loss": 0.6242, + "step": 6625 + }, + { + "epoch": 0.42, + "grad_norm": 0.8876476883888245, + "learning_rate": 6.517737836122345e-06, + "loss": 0.6074, + "step": 6626 + }, + { + "epoch": 0.42, + "grad_norm": 0.8674134016036987, + "learning_rate": 6.516760220686783e-06, + "loss": 0.6166, + "step": 6627 + }, + { + "epoch": 0.42, + "grad_norm": 0.9016671776771545, + "learning_rate": 6.515782541385725e-06, + "loss": 0.6589, + "step": 6628 + }, + { + "epoch": 0.42, + "grad_norm": 0.9042580127716064, + "learning_rate": 6.514804798260337e-06, + "loss": 0.6446, + "step": 6629 + }, + { + "epoch": 0.42, + "grad_norm": 0.8501339554786682, + "learning_rate": 6.513826991351786e-06, + "loss": 0.6305, + "step": 6630 + }, + { + "epoch": 0.42, + "grad_norm": 0.8709739446640015, + "learning_rate": 6.512849120701249e-06, + "loss": 0.5833, + "step": 6631 + }, + { + "epoch": 0.42, + "grad_norm": 0.8446660041809082, + "learning_rate": 6.511871186349897e-06, + "loss": 0.5731, + "step": 6632 + }, + { + "epoch": 0.42, + "grad_norm": 0.8911131620407104, + "learning_rate": 6.510893188338911e-06, + "loss": 0.6193, + "step": 6633 + }, + { + "epoch": 0.42, + "grad_norm": 0.8830543756484985, + "learning_rate": 6.509915126709467e-06, + "loss": 0.5538, + "step": 6634 + }, + { + "epoch": 0.42, + "grad_norm": 0.9163636565208435, + "learning_rate": 6.50893700150275e-06, + "loss": 0.5837, + "step": 6635 + }, + { + "epoch": 0.42, + "grad_norm": 0.8984601497650146, + "learning_rate": 6.5079588127599455e-06, + "loss": 0.5719, + "step": 6636 + }, + { + "epoch": 0.42, + "grad_norm": 0.8621581196784973, + "learning_rate": 6.50698056052224e-06, + "loss": 0.5852, + "step": 6637 + }, + { + "epoch": 0.42, + "grad_norm": 0.837208092212677, + "learning_rate": 6.506002244830827e-06, + "loss": 0.5823, + "step": 6638 + }, + { + "epoch": 0.42, + "grad_norm": 0.8576619029045105, + "learning_rate": 6.505023865726898e-06, + "loss": 0.5895, + "step": 6639 + }, + { + "epoch": 0.42, + "grad_norm": 0.8191852569580078, + "learning_rate": 6.50404542325165e-06, + "loss": 0.5928, + "step": 6640 + }, + { + "epoch": 0.42, + "grad_norm": 0.9108313322067261, + "learning_rate": 6.503066917446279e-06, + "loss": 0.5934, + "step": 6641 + }, + { + "epoch": 0.42, + "grad_norm": 0.867838442325592, + "learning_rate": 6.502088348351992e-06, + "loss": 0.5703, + "step": 6642 + }, + { + "epoch": 0.42, + "grad_norm": 0.8657822608947754, + "learning_rate": 6.501109716009988e-06, + "loss": 0.5505, + "step": 6643 + }, + { + "epoch": 0.42, + "grad_norm": 0.8648907542228699, + "learning_rate": 6.500131020461477e-06, + "loss": 0.5656, + "step": 6644 + }, + { + "epoch": 0.42, + "grad_norm": 0.8719663619995117, + "learning_rate": 6.4991522617476666e-06, + "loss": 0.5562, + "step": 6645 + }, + { + "epoch": 0.42, + "grad_norm": 0.8621832728385925, + "learning_rate": 6.498173439909771e-06, + "loss": 0.6282, + "step": 6646 + }, + { + "epoch": 0.42, + "grad_norm": 0.8455116748809814, + "learning_rate": 6.497194554989001e-06, + "loss": 0.5634, + "step": 6647 + }, + { + "epoch": 0.42, + "grad_norm": 0.8869051933288574, + "learning_rate": 6.496215607026579e-06, + "loss": 0.5434, + "step": 6648 + }, + { + "epoch": 0.42, + "grad_norm": 0.8986787796020508, + "learning_rate": 6.495236596063722e-06, + "loss": 0.5891, + "step": 6649 + }, + { + "epoch": 0.42, + "grad_norm": 0.8959378004074097, + "learning_rate": 6.494257522141654e-06, + "loss": 0.5889, + "step": 6650 + }, + { + "epoch": 0.42, + "grad_norm": 1.0358039140701294, + "learning_rate": 6.4932783853016005e-06, + "loss": 0.6392, + "step": 6651 + }, + { + "epoch": 0.42, + "grad_norm": 0.8593500256538391, + "learning_rate": 6.492299185584787e-06, + "loss": 0.5139, + "step": 6652 + }, + { + "epoch": 0.42, + "grad_norm": 0.9131850600242615, + "learning_rate": 6.491319923032446e-06, + "loss": 0.5909, + "step": 6653 + }, + { + "epoch": 0.42, + "grad_norm": 0.8696557283401489, + "learning_rate": 6.490340597685811e-06, + "loss": 0.5991, + "step": 6654 + }, + { + "epoch": 0.42, + "grad_norm": 0.8535944819450378, + "learning_rate": 6.48936120958612e-06, + "loss": 0.5772, + "step": 6655 + }, + { + "epoch": 0.42, + "grad_norm": 0.8401122689247131, + "learning_rate": 6.488381758774609e-06, + "loss": 0.6119, + "step": 6656 + }, + { + "epoch": 0.42, + "grad_norm": 0.8727520704269409, + "learning_rate": 6.487402245292518e-06, + "loss": 0.5869, + "step": 6657 + }, + { + "epoch": 0.42, + "grad_norm": 0.9223040342330933, + "learning_rate": 6.486422669181094e-06, + "loss": 0.6218, + "step": 6658 + }, + { + "epoch": 0.42, + "grad_norm": 0.868571937084198, + "learning_rate": 6.485443030481583e-06, + "loss": 0.607, + "step": 6659 + }, + { + "epoch": 0.42, + "grad_norm": 0.9342830777168274, + "learning_rate": 6.4844633292352335e-06, + "loss": 0.6237, + "step": 6660 + }, + { + "epoch": 0.42, + "grad_norm": 0.8326634764671326, + "learning_rate": 6.483483565483295e-06, + "loss": 0.5574, + "step": 6661 + }, + { + "epoch": 0.42, + "grad_norm": 0.8713539242744446, + "learning_rate": 6.482503739267026e-06, + "loss": 0.5629, + "step": 6662 + }, + { + "epoch": 0.42, + "grad_norm": 0.8934717178344727, + "learning_rate": 6.481523850627682e-06, + "loss": 0.63, + "step": 6663 + }, + { + "epoch": 0.42, + "grad_norm": 0.9452871084213257, + "learning_rate": 6.4805438996065215e-06, + "loss": 0.6842, + "step": 6664 + }, + { + "epoch": 0.42, + "grad_norm": 0.8760863542556763, + "learning_rate": 6.479563886244809e-06, + "loss": 0.6127, + "step": 6665 + }, + { + "epoch": 0.42, + "grad_norm": 0.901567280292511, + "learning_rate": 6.478583810583807e-06, + "loss": 0.5937, + "step": 6666 + }, + { + "epoch": 0.42, + "grad_norm": 0.9208518266677856, + "learning_rate": 6.477603672664785e-06, + "loss": 0.5968, + "step": 6667 + }, + { + "epoch": 0.42, + "grad_norm": 0.8600721955299377, + "learning_rate": 6.476623472529012e-06, + "loss": 0.5893, + "step": 6668 + }, + { + "epoch": 0.42, + "grad_norm": 0.8686032295227051, + "learning_rate": 6.475643210217762e-06, + "loss": 0.6386, + "step": 6669 + }, + { + "epoch": 0.42, + "grad_norm": 0.9031897187232971, + "learning_rate": 6.47466288577231e-06, + "loss": 0.6378, + "step": 6670 + }, + { + "epoch": 0.42, + "grad_norm": 0.82213294506073, + "learning_rate": 6.473682499233934e-06, + "loss": 0.5587, + "step": 6671 + }, + { + "epoch": 0.42, + "grad_norm": 0.83423912525177, + "learning_rate": 6.472702050643913e-06, + "loss": 0.5684, + "step": 6672 + }, + { + "epoch": 0.42, + "grad_norm": 0.9469904899597168, + "learning_rate": 6.471721540043533e-06, + "loss": 0.5984, + "step": 6673 + }, + { + "epoch": 0.42, + "grad_norm": 0.9193524718284607, + "learning_rate": 6.47074096747408e-06, + "loss": 0.6047, + "step": 6674 + }, + { + "epoch": 0.42, + "grad_norm": 0.8525941967964172, + "learning_rate": 6.469760332976839e-06, + "loss": 0.5274, + "step": 6675 + }, + { + "epoch": 0.42, + "grad_norm": 0.9107722043991089, + "learning_rate": 6.4687796365931035e-06, + "loss": 0.5995, + "step": 6676 + }, + { + "epoch": 0.42, + "grad_norm": 0.8921878933906555, + "learning_rate": 6.467798878364168e-06, + "loss": 0.589, + "step": 6677 + }, + { + "epoch": 0.42, + "grad_norm": 0.8674684166908264, + "learning_rate": 6.466818058331328e-06, + "loss": 0.5953, + "step": 6678 + }, + { + "epoch": 0.42, + "grad_norm": 0.9107003211975098, + "learning_rate": 6.465837176535881e-06, + "loss": 0.5684, + "step": 6679 + }, + { + "epoch": 0.42, + "grad_norm": 0.8787494897842407, + "learning_rate": 6.46485623301913e-06, + "loss": 0.5651, + "step": 6680 + }, + { + "epoch": 0.42, + "grad_norm": 0.8581385016441345, + "learning_rate": 6.46387522782238e-06, + "loss": 0.6103, + "step": 6681 + }, + { + "epoch": 0.42, + "grad_norm": 0.8561550378799438, + "learning_rate": 6.462894160986937e-06, + "loss": 0.6005, + "step": 6682 + }, + { + "epoch": 0.42, + "grad_norm": 0.912476122379303, + "learning_rate": 6.461913032554108e-06, + "loss": 0.5908, + "step": 6683 + }, + { + "epoch": 0.42, + "grad_norm": 0.8894026875495911, + "learning_rate": 6.460931842565207e-06, + "loss": 0.59, + "step": 6684 + }, + { + "epoch": 0.42, + "grad_norm": 0.8487771153450012, + "learning_rate": 6.4599505910615505e-06, + "loss": 0.5819, + "step": 6685 + }, + { + "epoch": 0.42, + "grad_norm": 0.8792235255241394, + "learning_rate": 6.45896927808445e-06, + "loss": 0.5918, + "step": 6686 + }, + { + "epoch": 0.42, + "grad_norm": 0.8825286626815796, + "learning_rate": 6.4579879036752315e-06, + "loss": 0.6375, + "step": 6687 + }, + { + "epoch": 0.42, + "grad_norm": 0.857980489730835, + "learning_rate": 6.457006467875213e-06, + "loss": 0.5757, + "step": 6688 + }, + { + "epoch": 0.42, + "grad_norm": 0.9466935396194458, + "learning_rate": 6.456024970725722e-06, + "loss": 0.5619, + "step": 6689 + }, + { + "epoch": 0.42, + "grad_norm": 0.9351262450218201, + "learning_rate": 6.455043412268083e-06, + "loss": 0.5754, + "step": 6690 + }, + { + "epoch": 0.42, + "grad_norm": 0.9212837815284729, + "learning_rate": 6.4540617925436275e-06, + "loss": 0.5439, + "step": 6691 + }, + { + "epoch": 0.42, + "grad_norm": 0.8872720003128052, + "learning_rate": 6.453080111593689e-06, + "loss": 0.6074, + "step": 6692 + }, + { + "epoch": 0.42, + "grad_norm": 0.8125371336936951, + "learning_rate": 6.4520983694596025e-06, + "loss": 0.546, + "step": 6693 + }, + { + "epoch": 0.42, + "grad_norm": 0.9116303324699402, + "learning_rate": 6.451116566182704e-06, + "loss": 0.5957, + "step": 6694 + }, + { + "epoch": 0.42, + "grad_norm": 0.8791963458061218, + "learning_rate": 6.4501347018043356e-06, + "loss": 0.5968, + "step": 6695 + }, + { + "epoch": 0.42, + "grad_norm": 0.9115133285522461, + "learning_rate": 6.44915277636584e-06, + "loss": 0.6176, + "step": 6696 + }, + { + "epoch": 0.42, + "grad_norm": 0.9027078151702881, + "learning_rate": 6.4481707899085624e-06, + "loss": 0.5877, + "step": 6697 + }, + { + "epoch": 0.42, + "grad_norm": 0.9268640279769897, + "learning_rate": 6.447188742473849e-06, + "loss": 0.6045, + "step": 6698 + }, + { + "epoch": 0.42, + "grad_norm": 0.9698523879051208, + "learning_rate": 6.446206634103053e-06, + "loss": 0.6128, + "step": 6699 + }, + { + "epoch": 0.42, + "grad_norm": 0.8391386866569519, + "learning_rate": 6.445224464837527e-06, + "loss": 0.632, + "step": 6700 + }, + { + "epoch": 0.42, + "grad_norm": 0.8128573894500732, + "learning_rate": 6.444242234718626e-06, + "loss": 0.5394, + "step": 6701 + }, + { + "epoch": 0.42, + "grad_norm": 0.8981994986534119, + "learning_rate": 6.443259943787708e-06, + "loss": 0.613, + "step": 6702 + }, + { + "epoch": 0.42, + "grad_norm": 0.8788542747497559, + "learning_rate": 6.442277592086135e-06, + "loss": 0.6071, + "step": 6703 + }, + { + "epoch": 0.42, + "grad_norm": 0.8909327387809753, + "learning_rate": 6.4412951796552715e-06, + "loss": 0.643, + "step": 6704 + }, + { + "epoch": 0.42, + "grad_norm": 0.8961040377616882, + "learning_rate": 6.44031270653648e-06, + "loss": 0.6128, + "step": 6705 + }, + { + "epoch": 0.42, + "grad_norm": 0.9458435773849487, + "learning_rate": 6.4393301727711296e-06, + "loss": 0.6847, + "step": 6706 + }, + { + "epoch": 0.42, + "grad_norm": 0.9194031357765198, + "learning_rate": 6.438347578400595e-06, + "loss": 0.6484, + "step": 6707 + }, + { + "epoch": 0.42, + "grad_norm": 0.9502468705177307, + "learning_rate": 6.437364923466247e-06, + "loss": 0.6456, + "step": 6708 + }, + { + "epoch": 0.43, + "grad_norm": 0.8829242587089539, + "learning_rate": 6.436382208009463e-06, + "loss": 0.64, + "step": 6709 + }, + { + "epoch": 0.43, + "grad_norm": 0.9908746480941772, + "learning_rate": 6.43539943207162e-06, + "loss": 0.6066, + "step": 6710 + }, + { + "epoch": 0.43, + "grad_norm": 0.8264694213867188, + "learning_rate": 6.434416595694102e-06, + "loss": 0.5476, + "step": 6711 + }, + { + "epoch": 0.43, + "grad_norm": 0.8517434000968933, + "learning_rate": 6.433433698918289e-06, + "loss": 0.5964, + "step": 6712 + }, + { + "epoch": 0.43, + "grad_norm": 0.8980498909950256, + "learning_rate": 6.432450741785571e-06, + "loss": 0.5834, + "step": 6713 + }, + { + "epoch": 0.43, + "grad_norm": 0.9315409660339355, + "learning_rate": 6.431467724337332e-06, + "loss": 0.5854, + "step": 6714 + }, + { + "epoch": 0.43, + "grad_norm": 0.9002824425697327, + "learning_rate": 6.430484646614971e-06, + "loss": 0.7002, + "step": 6715 + }, + { + "epoch": 0.43, + "grad_norm": 0.8017958998680115, + "learning_rate": 6.429501508659877e-06, + "loss": 0.555, + "step": 6716 + }, + { + "epoch": 0.43, + "grad_norm": 0.87758469581604, + "learning_rate": 6.428518310513446e-06, + "loss": 0.6322, + "step": 6717 + }, + { + "epoch": 0.43, + "grad_norm": 0.9012987613677979, + "learning_rate": 6.427535052217078e-06, + "loss": 0.6406, + "step": 6718 + }, + { + "epoch": 0.43, + "grad_norm": 0.8559851050376892, + "learning_rate": 6.4265517338121764e-06, + "loss": 0.6203, + "step": 6719 + }, + { + "epoch": 0.43, + "grad_norm": 0.8328604102134705, + "learning_rate": 6.4255683553401435e-06, + "loss": 0.6212, + "step": 6720 + }, + { + "epoch": 0.43, + "grad_norm": 0.8184729814529419, + "learning_rate": 6.424584916842387e-06, + "loss": 0.5646, + "step": 6721 + }, + { + "epoch": 0.43, + "grad_norm": 0.8883371353149414, + "learning_rate": 6.423601418360314e-06, + "loss": 0.585, + "step": 6722 + }, + { + "epoch": 0.43, + "grad_norm": 0.9067890644073486, + "learning_rate": 6.4226178599353385e-06, + "loss": 0.6143, + "step": 6723 + }, + { + "epoch": 0.43, + "grad_norm": 0.9497420787811279, + "learning_rate": 6.421634241608874e-06, + "loss": 0.6008, + "step": 6724 + }, + { + "epoch": 0.43, + "grad_norm": 0.8506911396980286, + "learning_rate": 6.420650563422337e-06, + "loss": 0.5696, + "step": 6725 + }, + { + "epoch": 0.43, + "grad_norm": 0.8900071978569031, + "learning_rate": 6.419666825417147e-06, + "loss": 0.6127, + "step": 6726 + }, + { + "epoch": 0.43, + "grad_norm": 0.8769127726554871, + "learning_rate": 6.4186830276347246e-06, + "loss": 0.582, + "step": 6727 + }, + { + "epoch": 0.43, + "grad_norm": 0.9167845249176025, + "learning_rate": 6.417699170116497e-06, + "loss": 0.6236, + "step": 6728 + }, + { + "epoch": 0.43, + "grad_norm": 0.8981141448020935, + "learning_rate": 6.416715252903888e-06, + "loss": 0.6296, + "step": 6729 + }, + { + "epoch": 0.43, + "grad_norm": 0.8629951477050781, + "learning_rate": 6.415731276038327e-06, + "loss": 0.5923, + "step": 6730 + }, + { + "epoch": 0.43, + "grad_norm": 0.8666777014732361, + "learning_rate": 6.414747239561249e-06, + "loss": 0.616, + "step": 6731 + }, + { + "epoch": 0.43, + "grad_norm": 0.9089431762695312, + "learning_rate": 6.413763143514086e-06, + "loss": 0.6325, + "step": 6732 + }, + { + "epoch": 0.43, + "grad_norm": 0.9420037865638733, + "learning_rate": 6.412778987938273e-06, + "loss": 0.6228, + "step": 6733 + }, + { + "epoch": 0.43, + "grad_norm": 0.9398850202560425, + "learning_rate": 6.411794772875253e-06, + "loss": 0.6237, + "step": 6734 + }, + { + "epoch": 0.43, + "grad_norm": 0.842210054397583, + "learning_rate": 6.4108104983664665e-06, + "loss": 0.5431, + "step": 6735 + }, + { + "epoch": 0.43, + "grad_norm": 0.8711762428283691, + "learning_rate": 6.409826164453359e-06, + "loss": 0.5273, + "step": 6736 + }, + { + "epoch": 0.43, + "grad_norm": 0.9114711284637451, + "learning_rate": 6.408841771177373e-06, + "loss": 0.6513, + "step": 6737 + }, + { + "epoch": 0.43, + "grad_norm": 0.86746746301651, + "learning_rate": 6.407857318579963e-06, + "loss": 0.5329, + "step": 6738 + }, + { + "epoch": 0.43, + "grad_norm": 0.8684642314910889, + "learning_rate": 6.4068728067025785e-06, + "loss": 0.6305, + "step": 6739 + }, + { + "epoch": 0.43, + "grad_norm": 0.8810404539108276, + "learning_rate": 6.405888235586676e-06, + "loss": 0.5681, + "step": 6740 + }, + { + "epoch": 0.43, + "grad_norm": 0.9271003603935242, + "learning_rate": 6.4049036052737065e-06, + "loss": 0.6079, + "step": 6741 + }, + { + "epoch": 0.43, + "grad_norm": 0.9591821432113647, + "learning_rate": 6.403918915805138e-06, + "loss": 0.5997, + "step": 6742 + }, + { + "epoch": 0.43, + "grad_norm": 0.939398467540741, + "learning_rate": 6.402934167222427e-06, + "loss": 0.6271, + "step": 6743 + }, + { + "epoch": 0.43, + "grad_norm": 0.8856723308563232, + "learning_rate": 6.4019493595670365e-06, + "loss": 0.5538, + "step": 6744 + }, + { + "epoch": 0.43, + "grad_norm": 0.8420456051826477, + "learning_rate": 6.400964492880437e-06, + "loss": 0.6144, + "step": 6745 + }, + { + "epoch": 0.43, + "grad_norm": 0.9057135581970215, + "learning_rate": 6.399979567204096e-06, + "loss": 0.6294, + "step": 6746 + }, + { + "epoch": 0.43, + "grad_norm": 0.8914698958396912, + "learning_rate": 6.398994582579485e-06, + "loss": 0.6168, + "step": 6747 + }, + { + "epoch": 0.43, + "grad_norm": 0.7790830135345459, + "learning_rate": 6.39800953904808e-06, + "loss": 0.563, + "step": 6748 + }, + { + "epoch": 0.43, + "grad_norm": 0.8107707500457764, + "learning_rate": 6.397024436651356e-06, + "loss": 0.5594, + "step": 6749 + }, + { + "epoch": 0.43, + "grad_norm": 0.9206598401069641, + "learning_rate": 6.396039275430792e-06, + "loss": 0.6187, + "step": 6750 + }, + { + "epoch": 0.43, + "grad_norm": 0.973175048828125, + "learning_rate": 6.395054055427872e-06, + "loss": 0.6636, + "step": 6751 + }, + { + "epoch": 0.43, + "grad_norm": 0.8376911878585815, + "learning_rate": 6.394068776684078e-06, + "loss": 0.5941, + "step": 6752 + }, + { + "epoch": 0.43, + "grad_norm": 0.8403307795524597, + "learning_rate": 6.393083439240897e-06, + "loss": 0.5494, + "step": 6753 + }, + { + "epoch": 0.43, + "grad_norm": 0.8593806028366089, + "learning_rate": 6.39209804313982e-06, + "loss": 0.5894, + "step": 6754 + }, + { + "epoch": 0.43, + "grad_norm": 0.81999671459198, + "learning_rate": 6.391112588422337e-06, + "loss": 0.5574, + "step": 6755 + }, + { + "epoch": 0.43, + "grad_norm": 0.917597770690918, + "learning_rate": 6.390127075129941e-06, + "loss": 0.556, + "step": 6756 + }, + { + "epoch": 0.43, + "grad_norm": 0.9527899026870728, + "learning_rate": 6.38914150330413e-06, + "loss": 0.6124, + "step": 6757 + }, + { + "epoch": 0.43, + "grad_norm": 0.9341422319412231, + "learning_rate": 6.388155872986404e-06, + "loss": 0.5947, + "step": 6758 + }, + { + "epoch": 0.43, + "grad_norm": 0.8585883378982544, + "learning_rate": 6.3871701842182625e-06, + "loss": 0.579, + "step": 6759 + }, + { + "epoch": 0.43, + "grad_norm": 0.8847423195838928, + "learning_rate": 6.386184437041208e-06, + "loss": 0.6301, + "step": 6760 + }, + { + "epoch": 0.43, + "grad_norm": 0.9007404446601868, + "learning_rate": 6.385198631496752e-06, + "loss": 0.6414, + "step": 6761 + }, + { + "epoch": 0.43, + "grad_norm": 0.8374068140983582, + "learning_rate": 6.3842127676263995e-06, + "loss": 0.5746, + "step": 6762 + }, + { + "epoch": 0.43, + "grad_norm": 0.8941364884376526, + "learning_rate": 6.383226845471663e-06, + "loss": 0.6523, + "step": 6763 + }, + { + "epoch": 0.43, + "grad_norm": 0.8259331583976746, + "learning_rate": 6.382240865074055e-06, + "loss": 0.5926, + "step": 6764 + }, + { + "epoch": 0.43, + "grad_norm": 0.8802745342254639, + "learning_rate": 6.381254826475093e-06, + "loss": 0.5734, + "step": 6765 + }, + { + "epoch": 0.43, + "grad_norm": 0.8371772766113281, + "learning_rate": 6.380268729716296e-06, + "loss": 0.634, + "step": 6766 + }, + { + "epoch": 0.43, + "grad_norm": 0.9037656188011169, + "learning_rate": 6.379282574839184e-06, + "loss": 0.6121, + "step": 6767 + }, + { + "epoch": 0.43, + "grad_norm": 0.8730578422546387, + "learning_rate": 6.37829636188528e-06, + "loss": 0.5665, + "step": 6768 + }, + { + "epoch": 0.43, + "grad_norm": 0.9292997717857361, + "learning_rate": 6.377310090896112e-06, + "loss": 0.5958, + "step": 6769 + }, + { + "epoch": 0.43, + "grad_norm": 0.9569960236549377, + "learning_rate": 6.376323761913208e-06, + "loss": 0.5732, + "step": 6770 + }, + { + "epoch": 0.43, + "grad_norm": 0.8929063081741333, + "learning_rate": 6.375337374978097e-06, + "loss": 0.6509, + "step": 6771 + }, + { + "epoch": 0.43, + "grad_norm": 0.9145770072937012, + "learning_rate": 6.374350930132313e-06, + "loss": 0.6502, + "step": 6772 + }, + { + "epoch": 0.43, + "grad_norm": 0.8785668015480042, + "learning_rate": 6.373364427417395e-06, + "loss": 0.6297, + "step": 6773 + }, + { + "epoch": 0.43, + "grad_norm": 0.8315816521644592, + "learning_rate": 6.372377866874876e-06, + "loss": 0.5375, + "step": 6774 + }, + { + "epoch": 0.43, + "grad_norm": 0.8660714030265808, + "learning_rate": 6.371391248546299e-06, + "loss": 0.5814, + "step": 6775 + }, + { + "epoch": 0.43, + "grad_norm": 0.801703691482544, + "learning_rate": 6.370404572473209e-06, + "loss": 0.6316, + "step": 6776 + }, + { + "epoch": 0.43, + "grad_norm": 0.9656221270561218, + "learning_rate": 6.36941783869715e-06, + "loss": 0.6798, + "step": 6777 + }, + { + "epoch": 0.43, + "grad_norm": 0.8311265707015991, + "learning_rate": 6.368431047259668e-06, + "loss": 0.5343, + "step": 6778 + }, + { + "epoch": 0.43, + "grad_norm": 0.9228345155715942, + "learning_rate": 6.367444198202315e-06, + "loss": 0.6175, + "step": 6779 + }, + { + "epoch": 0.43, + "grad_norm": 0.9040692448616028, + "learning_rate": 6.366457291566645e-06, + "loss": 0.6427, + "step": 6780 + }, + { + "epoch": 0.43, + "grad_norm": 0.8820178508758545, + "learning_rate": 6.365470327394212e-06, + "loss": 0.6016, + "step": 6781 + }, + { + "epoch": 0.43, + "grad_norm": 0.8262830376625061, + "learning_rate": 6.3644833057265735e-06, + "loss": 0.615, + "step": 6782 + }, + { + "epoch": 0.43, + "grad_norm": 0.8388856053352356, + "learning_rate": 6.363496226605289e-06, + "loss": 0.5929, + "step": 6783 + }, + { + "epoch": 0.43, + "grad_norm": 0.9199455380439758, + "learning_rate": 6.362509090071922e-06, + "loss": 0.5725, + "step": 6784 + }, + { + "epoch": 0.43, + "grad_norm": 0.9267382621765137, + "learning_rate": 6.361521896168037e-06, + "loss": 0.6032, + "step": 6785 + }, + { + "epoch": 0.43, + "grad_norm": 0.858314573764801, + "learning_rate": 6.360534644935201e-06, + "loss": 0.6036, + "step": 6786 + }, + { + "epoch": 0.43, + "grad_norm": 0.8850862979888916, + "learning_rate": 6.359547336414985e-06, + "loss": 0.5966, + "step": 6787 + }, + { + "epoch": 0.43, + "grad_norm": 0.910456657409668, + "learning_rate": 6.358559970648958e-06, + "loss": 0.6172, + "step": 6788 + }, + { + "epoch": 0.43, + "grad_norm": 0.8374682068824768, + "learning_rate": 6.357572547678701e-06, + "loss": 0.5973, + "step": 6789 + }, + { + "epoch": 0.43, + "grad_norm": 0.8853792548179626, + "learning_rate": 6.356585067545784e-06, + "loss": 0.5811, + "step": 6790 + }, + { + "epoch": 0.43, + "grad_norm": 0.8989521861076355, + "learning_rate": 6.355597530291788e-06, + "loss": 0.6074, + "step": 6791 + }, + { + "epoch": 0.43, + "grad_norm": 0.9015896916389465, + "learning_rate": 6.354609935958298e-06, + "loss": 0.5856, + "step": 6792 + }, + { + "epoch": 0.43, + "grad_norm": 0.937824010848999, + "learning_rate": 6.3536222845868934e-06, + "loss": 0.6377, + "step": 6793 + }, + { + "epoch": 0.43, + "grad_norm": 0.9121703505516052, + "learning_rate": 6.3526345762191656e-06, + "loss": 0.6304, + "step": 6794 + }, + { + "epoch": 0.43, + "grad_norm": 0.8476263284683228, + "learning_rate": 6.351646810896699e-06, + "loss": 0.5777, + "step": 6795 + }, + { + "epoch": 0.43, + "grad_norm": 0.8620879650115967, + "learning_rate": 6.350658988661089e-06, + "loss": 0.5876, + "step": 6796 + }, + { + "epoch": 0.43, + "grad_norm": 0.8748513460159302, + "learning_rate": 6.349671109553928e-06, + "loss": 0.5557, + "step": 6797 + }, + { + "epoch": 0.43, + "grad_norm": 0.9378863573074341, + "learning_rate": 6.348683173616811e-06, + "loss": 0.5726, + "step": 6798 + }, + { + "epoch": 0.43, + "grad_norm": 0.8460593223571777, + "learning_rate": 6.347695180891337e-06, + "loss": 0.6111, + "step": 6799 + }, + { + "epoch": 0.43, + "grad_norm": 0.8482157588005066, + "learning_rate": 6.346707131419108e-06, + "loss": 0.5726, + "step": 6800 + }, + { + "epoch": 0.43, + "grad_norm": 0.935832142829895, + "learning_rate": 6.345719025241725e-06, + "loss": 0.5852, + "step": 6801 + }, + { + "epoch": 0.43, + "grad_norm": 0.8839829564094543, + "learning_rate": 6.3447308624007964e-06, + "loss": 0.6138, + "step": 6802 + }, + { + "epoch": 0.43, + "grad_norm": 0.9015828967094421, + "learning_rate": 6.343742642937929e-06, + "loss": 0.5436, + "step": 6803 + }, + { + "epoch": 0.43, + "grad_norm": 0.925391674041748, + "learning_rate": 6.342754366894735e-06, + "loss": 0.6357, + "step": 6804 + }, + { + "epoch": 0.43, + "grad_norm": 0.8897901177406311, + "learning_rate": 6.341766034312824e-06, + "loss": 0.6055, + "step": 6805 + }, + { + "epoch": 0.43, + "grad_norm": 0.9219132661819458, + "learning_rate": 6.340777645233811e-06, + "loss": 0.6218, + "step": 6806 + }, + { + "epoch": 0.43, + "grad_norm": 0.8790163993835449, + "learning_rate": 6.339789199699319e-06, + "loss": 0.5795, + "step": 6807 + }, + { + "epoch": 0.43, + "grad_norm": 0.860368549823761, + "learning_rate": 6.338800697750963e-06, + "loss": 0.5757, + "step": 6808 + }, + { + "epoch": 0.43, + "grad_norm": 0.8733096718788147, + "learning_rate": 6.337812139430368e-06, + "loss": 0.5503, + "step": 6809 + }, + { + "epoch": 0.43, + "grad_norm": 0.9531643986701965, + "learning_rate": 6.336823524779155e-06, + "loss": 0.6141, + "step": 6810 + }, + { + "epoch": 0.43, + "grad_norm": 0.8316904902458191, + "learning_rate": 6.335834853838957e-06, + "loss": 0.5442, + "step": 6811 + }, + { + "epoch": 0.43, + "grad_norm": 0.9344193339347839, + "learning_rate": 6.334846126651399e-06, + "loss": 0.6328, + "step": 6812 + }, + { + "epoch": 0.43, + "grad_norm": 0.9671064615249634, + "learning_rate": 6.333857343258115e-06, + "loss": 0.6196, + "step": 6813 + }, + { + "epoch": 0.43, + "grad_norm": 0.9183486104011536, + "learning_rate": 6.3328685037007365e-06, + "loss": 0.5904, + "step": 6814 + }, + { + "epoch": 0.43, + "grad_norm": 0.9257077574729919, + "learning_rate": 6.331879608020905e-06, + "loss": 0.5997, + "step": 6815 + }, + { + "epoch": 0.43, + "grad_norm": 0.9391463398933411, + "learning_rate": 6.330890656260253e-06, + "loss": 0.6602, + "step": 6816 + }, + { + "epoch": 0.43, + "grad_norm": 0.9092316627502441, + "learning_rate": 6.329901648460428e-06, + "loss": 0.6427, + "step": 6817 + }, + { + "epoch": 0.43, + "grad_norm": 0.9237379431724548, + "learning_rate": 6.32891258466307e-06, + "loss": 0.6052, + "step": 6818 + }, + { + "epoch": 0.43, + "grad_norm": 0.8574221134185791, + "learning_rate": 6.3279234649098265e-06, + "loss": 0.5748, + "step": 6819 + }, + { + "epoch": 0.43, + "grad_norm": 0.9020368456840515, + "learning_rate": 6.326934289242346e-06, + "loss": 0.5697, + "step": 6820 + }, + { + "epoch": 0.43, + "grad_norm": 0.9599592685699463, + "learning_rate": 6.325945057702276e-06, + "loss": 0.6449, + "step": 6821 + }, + { + "epoch": 0.43, + "grad_norm": 0.8588045239448547, + "learning_rate": 6.324955770331274e-06, + "loss": 0.6282, + "step": 6822 + }, + { + "epoch": 0.43, + "grad_norm": 0.8139827847480774, + "learning_rate": 6.323966427170993e-06, + "loss": 0.5639, + "step": 6823 + }, + { + "epoch": 0.43, + "grad_norm": 0.9147988557815552, + "learning_rate": 6.322977028263093e-06, + "loss": 0.6103, + "step": 6824 + }, + { + "epoch": 0.43, + "grad_norm": 0.9550712704658508, + "learning_rate": 6.321987573649232e-06, + "loss": 0.5802, + "step": 6825 + }, + { + "epoch": 0.43, + "grad_norm": 0.8623383045196533, + "learning_rate": 6.320998063371072e-06, + "loss": 0.5587, + "step": 6826 + }, + { + "epoch": 0.43, + "grad_norm": 0.8975523710250854, + "learning_rate": 6.320008497470281e-06, + "loss": 0.6382, + "step": 6827 + }, + { + "epoch": 0.43, + "grad_norm": 0.9629261493682861, + "learning_rate": 6.319018875988523e-06, + "loss": 0.6272, + "step": 6828 + }, + { + "epoch": 0.43, + "grad_norm": 0.9650130271911621, + "learning_rate": 6.318029198967468e-06, + "loss": 0.6143, + "step": 6829 + }, + { + "epoch": 0.43, + "grad_norm": 0.9213373064994812, + "learning_rate": 6.317039466448789e-06, + "loss": 0.6329, + "step": 6830 + }, + { + "epoch": 0.43, + "grad_norm": 0.8667360544204712, + "learning_rate": 6.316049678474159e-06, + "loss": 0.5997, + "step": 6831 + }, + { + "epoch": 0.43, + "grad_norm": 0.9180268049240112, + "learning_rate": 6.315059835085257e-06, + "loss": 0.5874, + "step": 6832 + }, + { + "epoch": 0.43, + "grad_norm": 0.9233614802360535, + "learning_rate": 6.314069936323759e-06, + "loss": 0.5789, + "step": 6833 + }, + { + "epoch": 0.43, + "grad_norm": 0.8247601985931396, + "learning_rate": 6.313079982231347e-06, + "loss": 0.5741, + "step": 6834 + }, + { + "epoch": 0.43, + "grad_norm": 0.893379807472229, + "learning_rate": 6.312089972849707e-06, + "loss": 0.615, + "step": 6835 + }, + { + "epoch": 0.43, + "grad_norm": 0.8536086082458496, + "learning_rate": 6.31109990822052e-06, + "loss": 0.6018, + "step": 6836 + }, + { + "epoch": 0.43, + "grad_norm": 0.8153089284896851, + "learning_rate": 6.3101097883854765e-06, + "loss": 0.6091, + "step": 6837 + }, + { + "epoch": 0.43, + "grad_norm": 0.9722812175750732, + "learning_rate": 6.30911961338627e-06, + "loss": 0.5822, + "step": 6838 + }, + { + "epoch": 0.43, + "grad_norm": 0.8883670568466187, + "learning_rate": 6.3081293832645896e-06, + "loss": 0.5889, + "step": 6839 + }, + { + "epoch": 0.43, + "grad_norm": 0.9067282676696777, + "learning_rate": 6.30713909806213e-06, + "loss": 0.538, + "step": 6840 + }, + { + "epoch": 0.43, + "grad_norm": 0.9098742008209229, + "learning_rate": 6.306148757820591e-06, + "loss": 0.5896, + "step": 6841 + }, + { + "epoch": 0.43, + "grad_norm": 0.8773499131202698, + "learning_rate": 6.3051583625816725e-06, + "loss": 0.5662, + "step": 6842 + }, + { + "epoch": 0.43, + "grad_norm": 0.81912761926651, + "learning_rate": 6.304167912387076e-06, + "loss": 0.5311, + "step": 6843 + }, + { + "epoch": 0.43, + "grad_norm": 0.8784845471382141, + "learning_rate": 6.303177407278504e-06, + "loss": 0.6069, + "step": 6844 + }, + { + "epoch": 0.43, + "grad_norm": 0.885051965713501, + "learning_rate": 6.302186847297666e-06, + "loss": 0.5553, + "step": 6845 + }, + { + "epoch": 0.43, + "grad_norm": 0.879306972026825, + "learning_rate": 6.301196232486269e-06, + "loss": 0.5763, + "step": 6846 + }, + { + "epoch": 0.43, + "grad_norm": 0.9128481149673462, + "learning_rate": 6.300205562886026e-06, + "loss": 0.5423, + "step": 6847 + }, + { + "epoch": 0.43, + "grad_norm": 0.9183526635169983, + "learning_rate": 6.29921483853865e-06, + "loss": 0.6028, + "step": 6848 + }, + { + "epoch": 0.43, + "grad_norm": 0.8842886090278625, + "learning_rate": 6.298224059485856e-06, + "loss": 0.5602, + "step": 6849 + }, + { + "epoch": 0.43, + "grad_norm": 0.87552809715271, + "learning_rate": 6.297233225769363e-06, + "loss": 0.6139, + "step": 6850 + }, + { + "epoch": 0.43, + "grad_norm": 0.8830863237380981, + "learning_rate": 6.296242337430892e-06, + "loss": 0.5313, + "step": 6851 + }, + { + "epoch": 0.43, + "grad_norm": 0.8993502259254456, + "learning_rate": 6.2952513945121654e-06, + "loss": 0.6239, + "step": 6852 + }, + { + "epoch": 0.43, + "grad_norm": 0.8739321827888489, + "learning_rate": 6.2942603970549075e-06, + "loss": 0.5823, + "step": 6853 + }, + { + "epoch": 0.43, + "grad_norm": 0.872380793094635, + "learning_rate": 6.293269345100849e-06, + "loss": 0.5773, + "step": 6854 + }, + { + "epoch": 0.43, + "grad_norm": 0.8558187484741211, + "learning_rate": 6.292278238691715e-06, + "loss": 0.6118, + "step": 6855 + }, + { + "epoch": 0.43, + "grad_norm": 0.8276113271713257, + "learning_rate": 6.29128707786924e-06, + "loss": 0.5871, + "step": 6856 + }, + { + "epoch": 0.43, + "grad_norm": 0.9095969200134277, + "learning_rate": 6.29029586267516e-06, + "loss": 0.5682, + "step": 6857 + }, + { + "epoch": 0.43, + "grad_norm": 0.9365728497505188, + "learning_rate": 6.289304593151209e-06, + "loss": 0.6114, + "step": 6858 + }, + { + "epoch": 0.43, + "grad_norm": 0.849093496799469, + "learning_rate": 6.288313269339126e-06, + "loss": 0.587, + "step": 6859 + }, + { + "epoch": 0.43, + "grad_norm": 0.871545672416687, + "learning_rate": 6.287321891280653e-06, + "loss": 0.5703, + "step": 6860 + }, + { + "epoch": 0.43, + "grad_norm": 0.8848944306373596, + "learning_rate": 6.2863304590175335e-06, + "loss": 0.5847, + "step": 6861 + }, + { + "epoch": 0.43, + "grad_norm": 0.8347170948982239, + "learning_rate": 6.2853389725915146e-06, + "loss": 0.5983, + "step": 6862 + }, + { + "epoch": 0.43, + "grad_norm": 0.8573687672615051, + "learning_rate": 6.284347432044342e-06, + "loss": 0.6016, + "step": 6863 + }, + { + "epoch": 0.43, + "grad_norm": 0.9318529963493347, + "learning_rate": 6.2833558374177664e-06, + "loss": 0.5946, + "step": 6864 + }, + { + "epoch": 0.43, + "grad_norm": 0.8856549859046936, + "learning_rate": 6.282364188753541e-06, + "loss": 0.5791, + "step": 6865 + }, + { + "epoch": 0.43, + "grad_norm": 0.8939905762672424, + "learning_rate": 6.281372486093422e-06, + "loss": 0.6297, + "step": 6866 + }, + { + "epoch": 0.44, + "grad_norm": 0.9018425941467285, + "learning_rate": 6.280380729479164e-06, + "loss": 0.5627, + "step": 6867 + }, + { + "epoch": 0.44, + "grad_norm": 0.9689738750457764, + "learning_rate": 6.279388918952527e-06, + "loss": 0.6036, + "step": 6868 + }, + { + "epoch": 0.44, + "grad_norm": 0.9120928049087524, + "learning_rate": 6.278397054555275e-06, + "loss": 0.6214, + "step": 6869 + }, + { + "epoch": 0.44, + "grad_norm": 0.9339777827262878, + "learning_rate": 6.277405136329169e-06, + "loss": 0.6575, + "step": 6870 + }, + { + "epoch": 0.44, + "grad_norm": 0.8833754658699036, + "learning_rate": 6.276413164315978e-06, + "loss": 0.5584, + "step": 6871 + }, + { + "epoch": 0.44, + "grad_norm": 0.8568885326385498, + "learning_rate": 6.2754211385574674e-06, + "loss": 0.5942, + "step": 6872 + }, + { + "epoch": 0.44, + "grad_norm": 0.9012584686279297, + "learning_rate": 6.274429059095411e-06, + "loss": 0.6192, + "step": 6873 + }, + { + "epoch": 0.44, + "grad_norm": 0.8778733611106873, + "learning_rate": 6.273436925971578e-06, + "loss": 0.5976, + "step": 6874 + }, + { + "epoch": 0.44, + "grad_norm": 0.8796364068984985, + "learning_rate": 6.272444739227748e-06, + "loss": 0.5434, + "step": 6875 + }, + { + "epoch": 0.44, + "grad_norm": 0.870852530002594, + "learning_rate": 6.2714524989056945e-06, + "loss": 0.6127, + "step": 6876 + }, + { + "epoch": 0.44, + "grad_norm": 0.9310038685798645, + "learning_rate": 6.270460205047202e-06, + "loss": 0.6447, + "step": 6877 + }, + { + "epoch": 0.44, + "grad_norm": 0.8664422035217285, + "learning_rate": 6.269467857694047e-06, + "loss": 0.5595, + "step": 6878 + }, + { + "epoch": 0.44, + "grad_norm": 0.9125611186027527, + "learning_rate": 6.268475456888019e-06, + "loss": 0.6122, + "step": 6879 + }, + { + "epoch": 0.44, + "grad_norm": 0.8741683959960938, + "learning_rate": 6.2674830026709014e-06, + "loss": 0.6289, + "step": 6880 + }, + { + "epoch": 0.44, + "grad_norm": 0.8851672410964966, + "learning_rate": 6.266490495084484e-06, + "loss": 0.5838, + "step": 6881 + }, + { + "epoch": 0.44, + "grad_norm": 0.9018517732620239, + "learning_rate": 6.265497934170559e-06, + "loss": 0.602, + "step": 6882 + }, + { + "epoch": 0.44, + "grad_norm": 0.8665854930877686, + "learning_rate": 6.264505319970915e-06, + "loss": 0.5744, + "step": 6883 + }, + { + "epoch": 0.44, + "grad_norm": 0.8087853789329529, + "learning_rate": 6.263512652527353e-06, + "loss": 0.5761, + "step": 6884 + }, + { + "epoch": 0.44, + "grad_norm": 0.8873346447944641, + "learning_rate": 6.262519931881669e-06, + "loss": 0.626, + "step": 6885 + }, + { + "epoch": 0.44, + "grad_norm": 0.9511377811431885, + "learning_rate": 6.261527158075662e-06, + "loss": 0.6155, + "step": 6886 + }, + { + "epoch": 0.44, + "grad_norm": 0.8355633616447449, + "learning_rate": 6.260534331151133e-06, + "loss": 0.5037, + "step": 6887 + }, + { + "epoch": 0.44, + "grad_norm": 0.8886730670928955, + "learning_rate": 6.259541451149892e-06, + "loss": 0.566, + "step": 6888 + }, + { + "epoch": 0.44, + "grad_norm": 0.8618388175964355, + "learning_rate": 6.258548518113741e-06, + "loss": 0.603, + "step": 6889 + }, + { + "epoch": 0.44, + "grad_norm": 0.8546575307846069, + "learning_rate": 6.257555532084489e-06, + "loss": 0.5327, + "step": 6890 + }, + { + "epoch": 0.44, + "grad_norm": 0.8894109725952148, + "learning_rate": 6.2565624931039485e-06, + "loss": 0.6208, + "step": 6891 + }, + { + "epoch": 0.44, + "grad_norm": 0.8885670900344849, + "learning_rate": 6.255569401213933e-06, + "loss": 0.6106, + "step": 6892 + }, + { + "epoch": 0.44, + "grad_norm": 0.8722066283226013, + "learning_rate": 6.254576256456257e-06, + "loss": 0.5616, + "step": 6893 + }, + { + "epoch": 0.44, + "grad_norm": 0.8073423504829407, + "learning_rate": 6.253583058872741e-06, + "loss": 0.5666, + "step": 6894 + }, + { + "epoch": 0.44, + "grad_norm": 0.8527199625968933, + "learning_rate": 6.2525898085052005e-06, + "loss": 0.5878, + "step": 6895 + }, + { + "epoch": 0.44, + "grad_norm": 0.8704131841659546, + "learning_rate": 6.251596505395463e-06, + "loss": 0.587, + "step": 6896 + }, + { + "epoch": 0.44, + "grad_norm": 0.9160160422325134, + "learning_rate": 6.25060314958535e-06, + "loss": 0.6092, + "step": 6897 + }, + { + "epoch": 0.44, + "grad_norm": 0.9297466278076172, + "learning_rate": 6.249609741116689e-06, + "loss": 0.573, + "step": 6898 + }, + { + "epoch": 0.44, + "grad_norm": 0.8892688155174255, + "learning_rate": 6.24861628003131e-06, + "loss": 0.633, + "step": 6899 + }, + { + "epoch": 0.44, + "grad_norm": 0.8495330214500427, + "learning_rate": 6.247622766371041e-06, + "loss": 0.5785, + "step": 6900 + }, + { + "epoch": 0.44, + "grad_norm": 0.8945955634117126, + "learning_rate": 6.246629200177718e-06, + "loss": 0.6395, + "step": 6901 + }, + { + "epoch": 0.44, + "grad_norm": 0.8665342330932617, + "learning_rate": 6.245635581493176e-06, + "loss": 0.5672, + "step": 6902 + }, + { + "epoch": 0.44, + "grad_norm": 0.9849283695220947, + "learning_rate": 6.244641910359254e-06, + "loss": 0.687, + "step": 6903 + }, + { + "epoch": 0.44, + "grad_norm": 0.8854192495346069, + "learning_rate": 6.24364818681779e-06, + "loss": 0.5928, + "step": 6904 + }, + { + "epoch": 0.44, + "grad_norm": 0.9012208580970764, + "learning_rate": 6.242654410910628e-06, + "loss": 0.5718, + "step": 6905 + }, + { + "epoch": 0.44, + "grad_norm": 0.9132283329963684, + "learning_rate": 6.2416605826796095e-06, + "loss": 0.647, + "step": 6906 + }, + { + "epoch": 0.44, + "grad_norm": 0.8352293968200684, + "learning_rate": 6.240666702166587e-06, + "loss": 0.6094, + "step": 6907 + }, + { + "epoch": 0.44, + "grad_norm": 0.8616271615028381, + "learning_rate": 6.239672769413403e-06, + "loss": 0.604, + "step": 6908 + }, + { + "epoch": 0.44, + "grad_norm": 0.8567937612533569, + "learning_rate": 6.238678784461913e-06, + "loss": 0.5418, + "step": 6909 + }, + { + "epoch": 0.44, + "grad_norm": 0.8747637867927551, + "learning_rate": 6.237684747353965e-06, + "loss": 0.5794, + "step": 6910 + }, + { + "epoch": 0.44, + "grad_norm": 0.8634200096130371, + "learning_rate": 6.23669065813142e-06, + "loss": 0.5864, + "step": 6911 + }, + { + "epoch": 0.44, + "grad_norm": 0.8566752076148987, + "learning_rate": 6.235696516836134e-06, + "loss": 0.601, + "step": 6912 + }, + { + "epoch": 0.44, + "grad_norm": 0.8930138945579529, + "learning_rate": 6.234702323509967e-06, + "loss": 0.6216, + "step": 6913 + }, + { + "epoch": 0.44, + "grad_norm": 0.9066216945648193, + "learning_rate": 6.233708078194778e-06, + "loss": 0.6281, + "step": 6914 + }, + { + "epoch": 0.44, + "grad_norm": 0.9339972138404846, + "learning_rate": 6.232713780932434e-06, + "loss": 0.6423, + "step": 6915 + }, + { + "epoch": 0.44, + "grad_norm": 0.9028674364089966, + "learning_rate": 6.231719431764804e-06, + "loss": 0.593, + "step": 6916 + }, + { + "epoch": 0.44, + "grad_norm": 0.8986188769340515, + "learning_rate": 6.230725030733751e-06, + "loss": 0.6325, + "step": 6917 + }, + { + "epoch": 0.44, + "grad_norm": 0.9198395013809204, + "learning_rate": 6.229730577881148e-06, + "loss": 0.6486, + "step": 6918 + }, + { + "epoch": 0.44, + "grad_norm": 0.8530875444412231, + "learning_rate": 6.2287360732488685e-06, + "loss": 0.5365, + "step": 6919 + }, + { + "epoch": 0.44, + "grad_norm": 0.9224251508712769, + "learning_rate": 6.227741516878789e-06, + "loss": 0.6252, + "step": 6920 + }, + { + "epoch": 0.44, + "grad_norm": 0.9238904118537903, + "learning_rate": 6.226746908812784e-06, + "loss": 0.6171, + "step": 6921 + }, + { + "epoch": 0.44, + "grad_norm": 0.9478338956832886, + "learning_rate": 6.225752249092734e-06, + "loss": 0.6544, + "step": 6922 + }, + { + "epoch": 0.44, + "grad_norm": 0.9255354404449463, + "learning_rate": 6.224757537760521e-06, + "loss": 0.6176, + "step": 6923 + }, + { + "epoch": 0.44, + "grad_norm": 0.8838732838630676, + "learning_rate": 6.2237627748580294e-06, + "loss": 0.633, + "step": 6924 + }, + { + "epoch": 0.44, + "grad_norm": 0.8878322243690491, + "learning_rate": 6.222767960427144e-06, + "loss": 0.6434, + "step": 6925 + }, + { + "epoch": 0.44, + "grad_norm": 0.8358001112937927, + "learning_rate": 6.221773094509753e-06, + "loss": 0.6189, + "step": 6926 + }, + { + "epoch": 0.44, + "grad_norm": 0.8636587858200073, + "learning_rate": 6.220778177147747e-06, + "loss": 0.5856, + "step": 6927 + }, + { + "epoch": 0.44, + "grad_norm": 0.859220027923584, + "learning_rate": 6.219783208383021e-06, + "loss": 0.568, + "step": 6928 + }, + { + "epoch": 0.44, + "grad_norm": 0.9622043967247009, + "learning_rate": 6.218788188257465e-06, + "loss": 0.5914, + "step": 6929 + }, + { + "epoch": 0.44, + "grad_norm": 0.8458547592163086, + "learning_rate": 6.217793116812979e-06, + "loss": 0.554, + "step": 6930 + }, + { + "epoch": 0.44, + "grad_norm": 0.8938260078430176, + "learning_rate": 6.216797994091462e-06, + "loss": 0.6286, + "step": 6931 + }, + { + "epoch": 0.44, + "grad_norm": 0.8333603739738464, + "learning_rate": 6.215802820134814e-06, + "loss": 0.5297, + "step": 6932 + }, + { + "epoch": 0.44, + "grad_norm": 0.8489833474159241, + "learning_rate": 6.214807594984939e-06, + "loss": 0.6139, + "step": 6933 + }, + { + "epoch": 0.44, + "grad_norm": 0.9169575572013855, + "learning_rate": 6.213812318683741e-06, + "loss": 0.6339, + "step": 6934 + }, + { + "epoch": 0.44, + "grad_norm": 0.8761480450630188, + "learning_rate": 6.2128169912731295e-06, + "loss": 0.6299, + "step": 6935 + }, + { + "epoch": 0.44, + "grad_norm": 0.948622465133667, + "learning_rate": 6.211821612795014e-06, + "loss": 0.5962, + "step": 6936 + }, + { + "epoch": 0.44, + "grad_norm": 0.874839186668396, + "learning_rate": 6.210826183291305e-06, + "loss": 0.5761, + "step": 6937 + }, + { + "epoch": 0.44, + "grad_norm": 0.8137356638908386, + "learning_rate": 6.209830702803918e-06, + "loss": 0.5615, + "step": 6938 + }, + { + "epoch": 0.44, + "grad_norm": 0.8627551794052124, + "learning_rate": 6.208835171374769e-06, + "loss": 0.6144, + "step": 6939 + }, + { + "epoch": 0.44, + "grad_norm": 0.9175261855125427, + "learning_rate": 6.207839589045777e-06, + "loss": 0.578, + "step": 6940 + }, + { + "epoch": 0.44, + "grad_norm": 0.8478714823722839, + "learning_rate": 6.20684395585886e-06, + "loss": 0.5606, + "step": 6941 + }, + { + "epoch": 0.44, + "grad_norm": 0.8721091747283936, + "learning_rate": 6.205848271855943e-06, + "loss": 0.5986, + "step": 6942 + }, + { + "epoch": 0.44, + "grad_norm": 0.9747815132141113, + "learning_rate": 6.204852537078952e-06, + "loss": 0.5936, + "step": 6943 + }, + { + "epoch": 0.44, + "grad_norm": 0.9325621724128723, + "learning_rate": 6.203856751569809e-06, + "loss": 0.6673, + "step": 6944 + }, + { + "epoch": 0.44, + "grad_norm": 0.8681188821792603, + "learning_rate": 6.202860915370447e-06, + "loss": 0.6053, + "step": 6945 + }, + { + "epoch": 0.44, + "grad_norm": 0.8858415484428406, + "learning_rate": 6.201865028522798e-06, + "loss": 0.5277, + "step": 6946 + }, + { + "epoch": 0.44, + "grad_norm": 0.9327991604804993, + "learning_rate": 6.200869091068791e-06, + "loss": 0.5919, + "step": 6947 + }, + { + "epoch": 0.44, + "grad_norm": 0.8470887541770935, + "learning_rate": 6.1998731030503655e-06, + "loss": 0.5295, + "step": 6948 + }, + { + "epoch": 0.44, + "grad_norm": 0.8615082502365112, + "learning_rate": 6.198877064509458e-06, + "loss": 0.5833, + "step": 6949 + }, + { + "epoch": 0.44, + "grad_norm": 0.9464468359947205, + "learning_rate": 6.1978809754880076e-06, + "loss": 0.6293, + "step": 6950 + }, + { + "epoch": 0.44, + "grad_norm": 0.8637322783470154, + "learning_rate": 6.196884836027957e-06, + "loss": 0.5632, + "step": 6951 + }, + { + "epoch": 0.44, + "grad_norm": 0.8578369617462158, + "learning_rate": 6.195888646171247e-06, + "loss": 0.5286, + "step": 6952 + }, + { + "epoch": 0.44, + "grad_norm": 0.8746157288551331, + "learning_rate": 6.194892405959829e-06, + "loss": 0.597, + "step": 6953 + }, + { + "epoch": 0.44, + "grad_norm": 0.8710793852806091, + "learning_rate": 6.193896115435648e-06, + "loss": 0.603, + "step": 6954 + }, + { + "epoch": 0.44, + "grad_norm": 0.8585920929908752, + "learning_rate": 6.192899774640655e-06, + "loss": 0.6162, + "step": 6955 + }, + { + "epoch": 0.44, + "grad_norm": 0.8971749544143677, + "learning_rate": 6.191903383616801e-06, + "loss": 0.5941, + "step": 6956 + }, + { + "epoch": 0.44, + "grad_norm": 0.8642129898071289, + "learning_rate": 6.190906942406043e-06, + "loss": 0.5589, + "step": 6957 + }, + { + "epoch": 0.44, + "grad_norm": 0.9450658559799194, + "learning_rate": 6.189910451050336e-06, + "loss": 0.5831, + "step": 6958 + }, + { + "epoch": 0.44, + "grad_norm": 0.9805313348770142, + "learning_rate": 6.1889139095916395e-06, + "loss": 0.6607, + "step": 6959 + }, + { + "epoch": 0.44, + "grad_norm": 0.8744614124298096, + "learning_rate": 6.187917318071914e-06, + "loss": 0.6163, + "step": 6960 + }, + { + "epoch": 0.44, + "grad_norm": 0.8556416034698486, + "learning_rate": 6.1869206765331234e-06, + "loss": 0.5893, + "step": 6961 + }, + { + "epoch": 0.44, + "grad_norm": 0.8845242261886597, + "learning_rate": 6.1859239850172325e-06, + "loss": 0.5842, + "step": 6962 + }, + { + "epoch": 0.44, + "grad_norm": 0.8819428086280823, + "learning_rate": 6.1849272435662065e-06, + "loss": 0.5875, + "step": 6963 + }, + { + "epoch": 0.44, + "grad_norm": 0.9113361239433289, + "learning_rate": 6.183930452222017e-06, + "loss": 0.5681, + "step": 6964 + }, + { + "epoch": 0.44, + "grad_norm": 0.8745653629302979, + "learning_rate": 6.1829336110266356e-06, + "loss": 0.5826, + "step": 6965 + }, + { + "epoch": 0.44, + "grad_norm": 0.8720842599868774, + "learning_rate": 6.181936720022033e-06, + "loss": 0.6105, + "step": 6966 + }, + { + "epoch": 0.44, + "grad_norm": 0.9576596617698669, + "learning_rate": 6.180939779250188e-06, + "loss": 0.6231, + "step": 6967 + }, + { + "epoch": 0.44, + "grad_norm": 0.8861308097839355, + "learning_rate": 6.179942788753077e-06, + "loss": 0.6204, + "step": 6968 + }, + { + "epoch": 0.44, + "grad_norm": 0.9912355542182922, + "learning_rate": 6.178945748572681e-06, + "loss": 0.6036, + "step": 6969 + }, + { + "epoch": 0.44, + "grad_norm": 0.9015969634056091, + "learning_rate": 6.177948658750979e-06, + "loss": 0.5217, + "step": 6970 + }, + { + "epoch": 0.44, + "grad_norm": 0.8962372541427612, + "learning_rate": 6.176951519329958e-06, + "loss": 0.5973, + "step": 6971 + }, + { + "epoch": 0.44, + "grad_norm": 0.9305719137191772, + "learning_rate": 6.1759543303516025e-06, + "loss": 0.5845, + "step": 6972 + }, + { + "epoch": 0.44, + "grad_norm": 0.9429194331169128, + "learning_rate": 6.174957091857901e-06, + "loss": 0.6336, + "step": 6973 + }, + { + "epoch": 0.44, + "grad_norm": 0.8551360368728638, + "learning_rate": 6.173959803890843e-06, + "loss": 0.5911, + "step": 6974 + }, + { + "epoch": 0.44, + "grad_norm": 0.9430440068244934, + "learning_rate": 6.172962466492423e-06, + "loss": 0.612, + "step": 6975 + }, + { + "epoch": 0.44, + "grad_norm": 0.8664399981498718, + "learning_rate": 6.171965079704634e-06, + "loss": 0.6186, + "step": 6976 + }, + { + "epoch": 0.44, + "grad_norm": 0.9083961248397827, + "learning_rate": 6.17096764356947e-06, + "loss": 0.5966, + "step": 6977 + }, + { + "epoch": 0.44, + "grad_norm": 0.8894690275192261, + "learning_rate": 6.169970158128935e-06, + "loss": 0.6315, + "step": 6978 + }, + { + "epoch": 0.44, + "grad_norm": 0.9232130646705627, + "learning_rate": 6.168972623425023e-06, + "loss": 0.5961, + "step": 6979 + }, + { + "epoch": 0.44, + "grad_norm": 0.8410488963127136, + "learning_rate": 6.167975039499744e-06, + "loss": 0.5795, + "step": 6980 + }, + { + "epoch": 0.44, + "grad_norm": 0.8683662414550781, + "learning_rate": 6.1669774063950985e-06, + "loss": 0.6774, + "step": 6981 + }, + { + "epoch": 0.44, + "grad_norm": 1.0402193069458008, + "learning_rate": 6.165979724153094e-06, + "loss": 0.636, + "step": 6982 + }, + { + "epoch": 0.44, + "grad_norm": 0.8895815014839172, + "learning_rate": 6.164981992815737e-06, + "loss": 0.5795, + "step": 6983 + }, + { + "epoch": 0.44, + "grad_norm": 0.8773569464683533, + "learning_rate": 6.163984212425043e-06, + "loss": 0.5905, + "step": 6984 + }, + { + "epoch": 0.44, + "grad_norm": 0.8979213833808899, + "learning_rate": 6.162986383023023e-06, + "loss": 0.6089, + "step": 6985 + }, + { + "epoch": 0.44, + "grad_norm": 0.953054666519165, + "learning_rate": 6.161988504651692e-06, + "loss": 0.6058, + "step": 6986 + }, + { + "epoch": 0.44, + "grad_norm": 0.8674301505088806, + "learning_rate": 6.160990577353066e-06, + "loss": 0.6171, + "step": 6987 + }, + { + "epoch": 0.44, + "grad_norm": 0.8784220814704895, + "learning_rate": 6.1599926011691695e-06, + "loss": 0.5925, + "step": 6988 + }, + { + "epoch": 0.44, + "grad_norm": 0.9530996084213257, + "learning_rate": 6.1589945761420166e-06, + "loss": 0.6118, + "step": 6989 + }, + { + "epoch": 0.44, + "grad_norm": 0.892207145690918, + "learning_rate": 6.157996502313635e-06, + "loss": 0.5923, + "step": 6990 + }, + { + "epoch": 0.44, + "grad_norm": 0.803701639175415, + "learning_rate": 6.156998379726048e-06, + "loss": 0.549, + "step": 6991 + }, + { + "epoch": 0.44, + "grad_norm": 0.9421709775924683, + "learning_rate": 6.1560002084212845e-06, + "loss": 0.5709, + "step": 6992 + }, + { + "epoch": 0.44, + "grad_norm": 0.8066360354423523, + "learning_rate": 6.155001988441375e-06, + "loss": 0.5746, + "step": 6993 + }, + { + "epoch": 0.44, + "grad_norm": 0.8635882139205933, + "learning_rate": 6.154003719828349e-06, + "loss": 0.5782, + "step": 6994 + }, + { + "epoch": 0.44, + "grad_norm": 0.8454831838607788, + "learning_rate": 6.1530054026242405e-06, + "loss": 0.5256, + "step": 6995 + }, + { + "epoch": 0.44, + "grad_norm": 0.890565037727356, + "learning_rate": 6.152007036871085e-06, + "loss": 0.5331, + "step": 6996 + }, + { + "epoch": 0.44, + "grad_norm": 0.8612550497055054, + "learning_rate": 6.151008622610921e-06, + "loss": 0.6093, + "step": 6997 + }, + { + "epoch": 0.44, + "grad_norm": 0.8857212066650391, + "learning_rate": 6.150010159885789e-06, + "loss": 0.5336, + "step": 6998 + }, + { + "epoch": 0.44, + "grad_norm": 0.9954962730407715, + "learning_rate": 6.149011648737728e-06, + "loss": 0.6686, + "step": 6999 + }, + { + "epoch": 0.44, + "grad_norm": 0.9537789225578308, + "learning_rate": 6.148013089208784e-06, + "loss": 0.6394, + "step": 7000 + }, + { + "epoch": 0.44, + "grad_norm": 0.872518002986908, + "learning_rate": 6.1470144813410045e-06, + "loss": 0.6226, + "step": 7001 + }, + { + "epoch": 0.44, + "grad_norm": 0.8990280032157898, + "learning_rate": 6.146015825176432e-06, + "loss": 0.5554, + "step": 7002 + }, + { + "epoch": 0.44, + "grad_norm": 0.8978776931762695, + "learning_rate": 6.145017120757123e-06, + "loss": 0.6101, + "step": 7003 + }, + { + "epoch": 0.44, + "grad_norm": 0.9086971879005432, + "learning_rate": 6.144018368125124e-06, + "loss": 0.5648, + "step": 7004 + }, + { + "epoch": 0.44, + "grad_norm": 0.8578811287879944, + "learning_rate": 6.143019567322493e-06, + "loss": 0.5522, + "step": 7005 + }, + { + "epoch": 0.44, + "grad_norm": 0.9005651473999023, + "learning_rate": 6.1420207183912824e-06, + "loss": 0.6659, + "step": 7006 + }, + { + "epoch": 0.44, + "grad_norm": 0.8784024715423584, + "learning_rate": 6.141021821373555e-06, + "loss": 0.5978, + "step": 7007 + }, + { + "epoch": 0.44, + "grad_norm": 0.9198904633522034, + "learning_rate": 6.140022876311367e-06, + "loss": 0.5903, + "step": 7008 + }, + { + "epoch": 0.44, + "grad_norm": 0.8121350407600403, + "learning_rate": 6.139023883246781e-06, + "loss": 0.5551, + "step": 7009 + }, + { + "epoch": 0.44, + "grad_norm": 0.8870401382446289, + "learning_rate": 6.1380248422218604e-06, + "loss": 0.521, + "step": 7010 + }, + { + "epoch": 0.44, + "grad_norm": 0.9249464869499207, + "learning_rate": 6.137025753278673e-06, + "loss": 0.6247, + "step": 7011 + }, + { + "epoch": 0.44, + "grad_norm": 0.8742251992225647, + "learning_rate": 6.1360266164592886e-06, + "loss": 0.6279, + "step": 7012 + }, + { + "epoch": 0.44, + "grad_norm": 0.9462286829948425, + "learning_rate": 6.135027431805774e-06, + "loss": 0.6258, + "step": 7013 + }, + { + "epoch": 0.44, + "grad_norm": 0.8585238456726074, + "learning_rate": 6.134028199360203e-06, + "loss": 0.5567, + "step": 7014 + }, + { + "epoch": 0.44, + "grad_norm": 0.8556974530220032, + "learning_rate": 6.133028919164647e-06, + "loss": 0.6523, + "step": 7015 + }, + { + "epoch": 0.44, + "grad_norm": 0.8407923579216003, + "learning_rate": 6.132029591261188e-06, + "loss": 0.5777, + "step": 7016 + }, + { + "epoch": 0.44, + "grad_norm": 0.9544344544410706, + "learning_rate": 6.1310302156919e-06, + "loss": 0.6063, + "step": 7017 + }, + { + "epoch": 0.44, + "grad_norm": 0.9588916301727295, + "learning_rate": 6.130030792498865e-06, + "loss": 0.6007, + "step": 7018 + }, + { + "epoch": 0.44, + "grad_norm": 0.8659964203834534, + "learning_rate": 6.129031321724163e-06, + "loss": 0.5848, + "step": 7019 + }, + { + "epoch": 0.44, + "grad_norm": 0.9121047854423523, + "learning_rate": 6.128031803409881e-06, + "loss": 0.5774, + "step": 7020 + }, + { + "epoch": 0.44, + "grad_norm": 0.9202248454093933, + "learning_rate": 6.127032237598102e-06, + "loss": 0.6225, + "step": 7021 + }, + { + "epoch": 0.44, + "grad_norm": 0.8752833604812622, + "learning_rate": 6.126032624330917e-06, + "loss": 0.5862, + "step": 7022 + }, + { + "epoch": 0.44, + "grad_norm": 0.8969300389289856, + "learning_rate": 6.125032963650417e-06, + "loss": 0.6297, + "step": 7023 + }, + { + "epoch": 0.45, + "grad_norm": 0.880032479763031, + "learning_rate": 6.124033255598691e-06, + "loss": 0.5669, + "step": 7024 + }, + { + "epoch": 0.45, + "grad_norm": 0.8935046195983887, + "learning_rate": 6.1230335002178345e-06, + "loss": 0.5929, + "step": 7025 + }, + { + "epoch": 0.45, + "grad_norm": 0.9103530645370483, + "learning_rate": 6.1220336975499435e-06, + "loss": 0.5772, + "step": 7026 + }, + { + "epoch": 0.45, + "grad_norm": 0.8994046449661255, + "learning_rate": 6.121033847637119e-06, + "loss": 0.63, + "step": 7027 + }, + { + "epoch": 0.45, + "grad_norm": 0.8760805726051331, + "learning_rate": 6.120033950521458e-06, + "loss": 0.568, + "step": 7028 + }, + { + "epoch": 0.45, + "grad_norm": 0.8472411632537842, + "learning_rate": 6.119034006245063e-06, + "loss": 0.5667, + "step": 7029 + }, + { + "epoch": 0.45, + "grad_norm": 0.9165576100349426, + "learning_rate": 6.118034014850039e-06, + "loss": 0.6196, + "step": 7030 + }, + { + "epoch": 0.45, + "grad_norm": 0.8201605677604675, + "learning_rate": 6.117033976378493e-06, + "loss": 0.5162, + "step": 7031 + }, + { + "epoch": 0.45, + "grad_norm": 0.9203435182571411, + "learning_rate": 6.116033890872531e-06, + "loss": 0.558, + "step": 7032 + }, + { + "epoch": 0.45, + "grad_norm": 0.8711187839508057, + "learning_rate": 6.115033758374265e-06, + "loss": 0.634, + "step": 7033 + }, + { + "epoch": 0.45, + "grad_norm": 0.8825559616088867, + "learning_rate": 6.114033578925805e-06, + "loss": 0.599, + "step": 7034 + }, + { + "epoch": 0.45, + "grad_norm": 0.8482293486595154, + "learning_rate": 6.1130333525692684e-06, + "loss": 0.583, + "step": 7035 + }, + { + "epoch": 0.45, + "grad_norm": 0.9043130278587341, + "learning_rate": 6.112033079346767e-06, + "loss": 0.575, + "step": 7036 + }, + { + "epoch": 0.45, + "grad_norm": 0.8233504295349121, + "learning_rate": 6.111032759300423e-06, + "loss": 0.6182, + "step": 7037 + }, + { + "epoch": 0.45, + "grad_norm": 0.8660386204719543, + "learning_rate": 6.110032392472354e-06, + "loss": 0.5851, + "step": 7038 + }, + { + "epoch": 0.45, + "grad_norm": 0.8297396302223206, + "learning_rate": 6.109031978904683e-06, + "loss": 0.5715, + "step": 7039 + }, + { + "epoch": 0.45, + "grad_norm": 0.8824520707130432, + "learning_rate": 6.108031518639532e-06, + "loss": 0.6218, + "step": 7040 + }, + { + "epoch": 0.45, + "grad_norm": 0.941839873790741, + "learning_rate": 6.107031011719029e-06, + "loss": 0.6708, + "step": 7041 + }, + { + "epoch": 0.45, + "grad_norm": 0.8961352705955505, + "learning_rate": 6.106030458185303e-06, + "loss": 0.5851, + "step": 7042 + }, + { + "epoch": 0.45, + "grad_norm": 0.9293150305747986, + "learning_rate": 6.105029858080479e-06, + "loss": 0.5899, + "step": 7043 + }, + { + "epoch": 0.45, + "grad_norm": 0.84063720703125, + "learning_rate": 6.1040292114466935e-06, + "loss": 0.5756, + "step": 7044 + }, + { + "epoch": 0.45, + "grad_norm": 0.8923290371894836, + "learning_rate": 6.103028518326077e-06, + "loss": 0.5649, + "step": 7045 + }, + { + "epoch": 0.45, + "grad_norm": 0.9170678853988647, + "learning_rate": 6.102027778760769e-06, + "loss": 0.6111, + "step": 7046 + }, + { + "epoch": 0.45, + "grad_norm": 0.9100625514984131, + "learning_rate": 6.101026992792904e-06, + "loss": 0.6542, + "step": 7047 + }, + { + "epoch": 0.45, + "grad_norm": 0.842936635017395, + "learning_rate": 6.100026160464621e-06, + "loss": 0.5677, + "step": 7048 + }, + { + "epoch": 0.45, + "grad_norm": 0.8089660406112671, + "learning_rate": 6.099025281818065e-06, + "loss": 0.5623, + "step": 7049 + }, + { + "epoch": 0.45, + "grad_norm": 0.8938851952552795, + "learning_rate": 6.098024356895378e-06, + "loss": 0.5826, + "step": 7050 + }, + { + "epoch": 0.45, + "grad_norm": 0.8716176152229309, + "learning_rate": 6.097023385738704e-06, + "loss": 0.5478, + "step": 7051 + }, + { + "epoch": 0.45, + "grad_norm": 0.9070749878883362, + "learning_rate": 6.096022368390191e-06, + "loss": 0.6217, + "step": 7052 + }, + { + "epoch": 0.45, + "grad_norm": 0.8996195197105408, + "learning_rate": 6.0950213048919895e-06, + "loss": 0.5936, + "step": 7053 + }, + { + "epoch": 0.45, + "grad_norm": 0.8879945278167725, + "learning_rate": 6.094020195286251e-06, + "loss": 0.57, + "step": 7054 + }, + { + "epoch": 0.45, + "grad_norm": 0.9025259017944336, + "learning_rate": 6.093019039615128e-06, + "loss": 0.6607, + "step": 7055 + }, + { + "epoch": 0.45, + "grad_norm": 0.8679327368736267, + "learning_rate": 6.092017837920773e-06, + "loss": 0.5673, + "step": 7056 + }, + { + "epoch": 0.45, + "grad_norm": 0.8866552114486694, + "learning_rate": 6.091016590245347e-06, + "loss": 0.5824, + "step": 7057 + }, + { + "epoch": 0.45, + "grad_norm": 0.885572075843811, + "learning_rate": 6.090015296631009e-06, + "loss": 0.5513, + "step": 7058 + }, + { + "epoch": 0.45, + "grad_norm": 0.9012414813041687, + "learning_rate": 6.089013957119918e-06, + "loss": 0.6531, + "step": 7059 + }, + { + "epoch": 0.45, + "grad_norm": 0.833052396774292, + "learning_rate": 6.088012571754236e-06, + "loss": 0.5201, + "step": 7060 + }, + { + "epoch": 0.45, + "grad_norm": 0.952321469783783, + "learning_rate": 6.087011140576132e-06, + "loss": 0.6119, + "step": 7061 + }, + { + "epoch": 0.45, + "grad_norm": 0.8854734301567078, + "learning_rate": 6.086009663627769e-06, + "loss": 0.5879, + "step": 7062 + }, + { + "epoch": 0.45, + "grad_norm": 0.899401068687439, + "learning_rate": 6.085008140951318e-06, + "loss": 0.5806, + "step": 7063 + }, + { + "epoch": 0.45, + "grad_norm": 0.945583164691925, + "learning_rate": 6.084006572588947e-06, + "loss": 0.6169, + "step": 7064 + }, + { + "epoch": 0.45, + "grad_norm": 0.8657901287078857, + "learning_rate": 6.083004958582832e-06, + "loss": 0.5392, + "step": 7065 + }, + { + "epoch": 0.45, + "grad_norm": 0.8929893374443054, + "learning_rate": 6.082003298975144e-06, + "loss": 0.6197, + "step": 7066 + }, + { + "epoch": 0.45, + "grad_norm": 0.8724581599235535, + "learning_rate": 6.081001593808063e-06, + "loss": 0.5692, + "step": 7067 + }, + { + "epoch": 0.45, + "grad_norm": 0.9062278866767883, + "learning_rate": 6.079999843123763e-06, + "loss": 0.6532, + "step": 7068 + }, + { + "epoch": 0.45, + "grad_norm": 0.8632785081863403, + "learning_rate": 6.07899804696443e-06, + "loss": 0.6221, + "step": 7069 + }, + { + "epoch": 0.45, + "grad_norm": 0.9054343104362488, + "learning_rate": 6.077996205372241e-06, + "loss": 0.5598, + "step": 7070 + }, + { + "epoch": 0.45, + "grad_norm": 0.8777742385864258, + "learning_rate": 6.07699431838938e-06, + "loss": 0.5767, + "step": 7071 + }, + { + "epoch": 0.45, + "grad_norm": 0.8978030681610107, + "learning_rate": 6.075992386058037e-06, + "loss": 0.5976, + "step": 7072 + }, + { + "epoch": 0.45, + "grad_norm": 0.9662826061248779, + "learning_rate": 6.074990408420397e-06, + "loss": 0.5744, + "step": 7073 + }, + { + "epoch": 0.45, + "grad_norm": 0.8414073586463928, + "learning_rate": 6.073988385518652e-06, + "loss": 0.543, + "step": 7074 + }, + { + "epoch": 0.45, + "grad_norm": 0.863689124584198, + "learning_rate": 6.07298631739499e-06, + "loss": 0.6149, + "step": 7075 + }, + { + "epoch": 0.45, + "grad_norm": 0.8683016896247864, + "learning_rate": 6.071984204091608e-06, + "loss": 0.6363, + "step": 7076 + }, + { + "epoch": 0.45, + "grad_norm": 0.9165253639221191, + "learning_rate": 6.0709820456507e-06, + "loss": 0.6226, + "step": 7077 + }, + { + "epoch": 0.45, + "grad_norm": 0.8738973140716553, + "learning_rate": 6.069979842114465e-06, + "loss": 0.5733, + "step": 7078 + }, + { + "epoch": 0.45, + "grad_norm": 0.954142153263092, + "learning_rate": 6.068977593525098e-06, + "loss": 0.625, + "step": 7079 + }, + { + "epoch": 0.45, + "grad_norm": 0.8756037354469299, + "learning_rate": 6.067975299924806e-06, + "loss": 0.6264, + "step": 7080 + }, + { + "epoch": 0.45, + "grad_norm": 0.8994114398956299, + "learning_rate": 6.066972961355788e-06, + "loss": 0.6367, + "step": 7081 + }, + { + "epoch": 0.45, + "grad_norm": 0.8711373805999756, + "learning_rate": 6.065970577860252e-06, + "loss": 0.5993, + "step": 7082 + }, + { + "epoch": 0.45, + "grad_norm": 0.9261775016784668, + "learning_rate": 6.0649681494804014e-06, + "loss": 0.619, + "step": 7083 + }, + { + "epoch": 0.45, + "grad_norm": 0.8910576701164246, + "learning_rate": 6.063965676258448e-06, + "loss": 0.594, + "step": 7084 + }, + { + "epoch": 0.45, + "grad_norm": 0.8395871520042419, + "learning_rate": 6.0629631582366015e-06, + "loss": 0.6077, + "step": 7085 + }, + { + "epoch": 0.45, + "grad_norm": 0.8894856572151184, + "learning_rate": 6.0619605954570726e-06, + "loss": 0.5605, + "step": 7086 + }, + { + "epoch": 0.45, + "grad_norm": 0.8762527704238892, + "learning_rate": 6.060957987962077e-06, + "loss": 0.5896, + "step": 7087 + }, + { + "epoch": 0.45, + "grad_norm": 0.8831724524497986, + "learning_rate": 6.059955335793832e-06, + "loss": 0.6159, + "step": 7088 + }, + { + "epoch": 0.45, + "grad_norm": 0.9416611790657043, + "learning_rate": 6.0589526389945576e-06, + "loss": 0.5841, + "step": 7089 + }, + { + "epoch": 0.45, + "grad_norm": 0.8676833510398865, + "learning_rate": 6.057949897606469e-06, + "loss": 0.5974, + "step": 7090 + }, + { + "epoch": 0.45, + "grad_norm": 0.8935969471931458, + "learning_rate": 6.05694711167179e-06, + "loss": 0.5916, + "step": 7091 + }, + { + "epoch": 0.45, + "grad_norm": 0.8953608870506287, + "learning_rate": 6.055944281232746e-06, + "loss": 0.6164, + "step": 7092 + }, + { + "epoch": 0.45, + "grad_norm": 0.9108656048774719, + "learning_rate": 6.0549414063315625e-06, + "loss": 0.6615, + "step": 7093 + }, + { + "epoch": 0.45, + "grad_norm": 0.8927263617515564, + "learning_rate": 6.053938487010464e-06, + "loss": 0.5843, + "step": 7094 + }, + { + "epoch": 0.45, + "grad_norm": 0.8488055467605591, + "learning_rate": 6.052935523311684e-06, + "loss": 0.5774, + "step": 7095 + }, + { + "epoch": 0.45, + "grad_norm": 0.861768901348114, + "learning_rate": 6.0519325152774515e-06, + "loss": 0.5815, + "step": 7096 + }, + { + "epoch": 0.45, + "grad_norm": 0.8488924503326416, + "learning_rate": 6.05092946295e-06, + "loss": 0.6129, + "step": 7097 + }, + { + "epoch": 0.45, + "grad_norm": 0.8515621423721313, + "learning_rate": 6.049926366371565e-06, + "loss": 0.6088, + "step": 7098 + }, + { + "epoch": 0.45, + "grad_norm": 0.8117412328720093, + "learning_rate": 6.048923225584383e-06, + "loss": 0.5814, + "step": 7099 + }, + { + "epoch": 0.45, + "grad_norm": 0.9428633451461792, + "learning_rate": 6.047920040630692e-06, + "loss": 0.5821, + "step": 7100 + }, + { + "epoch": 0.45, + "grad_norm": 0.8841315507888794, + "learning_rate": 6.046916811552735e-06, + "loss": 0.5685, + "step": 7101 + }, + { + "epoch": 0.45, + "grad_norm": 0.8266769051551819, + "learning_rate": 6.045913538392754e-06, + "loss": 0.5903, + "step": 7102 + }, + { + "epoch": 0.45, + "grad_norm": 0.9347844123840332, + "learning_rate": 6.04491022119299e-06, + "loss": 0.6212, + "step": 7103 + }, + { + "epoch": 0.45, + "grad_norm": 0.8492763042449951, + "learning_rate": 6.043906859995693e-06, + "loss": 0.5411, + "step": 7104 + }, + { + "epoch": 0.45, + "grad_norm": 0.8961597084999084, + "learning_rate": 6.042903454843109e-06, + "loss": 0.5772, + "step": 7105 + }, + { + "epoch": 0.45, + "grad_norm": 0.9451767802238464, + "learning_rate": 6.041900005777488e-06, + "loss": 0.6845, + "step": 7106 + }, + { + "epoch": 0.45, + "grad_norm": 0.8957589268684387, + "learning_rate": 6.040896512841083e-06, + "loss": 0.607, + "step": 7107 + }, + { + "epoch": 0.45, + "grad_norm": 0.9263405203819275, + "learning_rate": 6.039892976076147e-06, + "loss": 0.5677, + "step": 7108 + }, + { + "epoch": 0.45, + "grad_norm": 0.842929482460022, + "learning_rate": 6.038889395524935e-06, + "loss": 0.6016, + "step": 7109 + }, + { + "epoch": 0.45, + "grad_norm": 0.9199305772781372, + "learning_rate": 6.037885771229703e-06, + "loss": 0.6241, + "step": 7110 + }, + { + "epoch": 0.45, + "grad_norm": 0.8669660091400146, + "learning_rate": 6.036882103232714e-06, + "loss": 0.6024, + "step": 7111 + }, + { + "epoch": 0.45, + "grad_norm": 0.8775947690010071, + "learning_rate": 6.0358783915762265e-06, + "loss": 0.5895, + "step": 7112 + }, + { + "epoch": 0.45, + "grad_norm": 0.8761069178581238, + "learning_rate": 6.034874636302502e-06, + "loss": 0.5858, + "step": 7113 + }, + { + "epoch": 0.45, + "grad_norm": 0.9764485955238342, + "learning_rate": 6.033870837453808e-06, + "loss": 0.6528, + "step": 7114 + }, + { + "epoch": 0.45, + "grad_norm": 0.8723066449165344, + "learning_rate": 6.0328669950724096e-06, + "loss": 0.5737, + "step": 7115 + }, + { + "epoch": 0.45, + "grad_norm": 0.8759384751319885, + "learning_rate": 6.031863109200575e-06, + "loss": 0.6642, + "step": 7116 + }, + { + "epoch": 0.45, + "grad_norm": 0.8828076124191284, + "learning_rate": 6.030859179880574e-06, + "loss": 0.6082, + "step": 7117 + }, + { + "epoch": 0.45, + "grad_norm": 0.9559153318405151, + "learning_rate": 6.029855207154679e-06, + "loss": 0.599, + "step": 7118 + }, + { + "epoch": 0.45, + "grad_norm": 0.8825298547744751, + "learning_rate": 6.0288511910651644e-06, + "loss": 0.5871, + "step": 7119 + }, + { + "epoch": 0.45, + "grad_norm": 0.8728659152984619, + "learning_rate": 6.027847131654305e-06, + "loss": 0.5783, + "step": 7120 + }, + { + "epoch": 0.45, + "grad_norm": 0.9349566102027893, + "learning_rate": 6.026843028964378e-06, + "loss": 0.6797, + "step": 7121 + }, + { + "epoch": 0.45, + "grad_norm": 0.9346398711204529, + "learning_rate": 6.025838883037664e-06, + "loss": 0.5802, + "step": 7122 + }, + { + "epoch": 0.45, + "grad_norm": 1.0061968564987183, + "learning_rate": 6.024834693916443e-06, + "loss": 0.6027, + "step": 7123 + }, + { + "epoch": 0.45, + "grad_norm": 0.8729983568191528, + "learning_rate": 6.023830461642998e-06, + "loss": 0.5871, + "step": 7124 + }, + { + "epoch": 0.45, + "grad_norm": 0.8659200072288513, + "learning_rate": 6.022826186259614e-06, + "loss": 0.575, + "step": 7125 + }, + { + "epoch": 0.45, + "grad_norm": 0.8419411182403564, + "learning_rate": 6.021821867808576e-06, + "loss": 0.5713, + "step": 7126 + }, + { + "epoch": 0.45, + "grad_norm": 0.9160114526748657, + "learning_rate": 6.0208175063321765e-06, + "loss": 0.5911, + "step": 7127 + }, + { + "epoch": 0.45, + "grad_norm": 0.869117259979248, + "learning_rate": 6.019813101872701e-06, + "loss": 0.5719, + "step": 7128 + }, + { + "epoch": 0.45, + "grad_norm": 0.9373559355735779, + "learning_rate": 6.018808654472445e-06, + "loss": 0.5958, + "step": 7129 + }, + { + "epoch": 0.45, + "grad_norm": 0.9139472246170044, + "learning_rate": 6.017804164173698e-06, + "loss": 0.6223, + "step": 7130 + }, + { + "epoch": 0.45, + "grad_norm": 0.9792049527168274, + "learning_rate": 6.0167996310187615e-06, + "loss": 0.6056, + "step": 7131 + }, + { + "epoch": 0.45, + "grad_norm": 0.8391651511192322, + "learning_rate": 6.015795055049929e-06, + "loss": 0.5411, + "step": 7132 + }, + { + "epoch": 0.45, + "grad_norm": 0.9273377060890198, + "learning_rate": 6.014790436309499e-06, + "loss": 0.6351, + "step": 7133 + }, + { + "epoch": 0.45, + "grad_norm": 0.8438581228256226, + "learning_rate": 6.013785774839776e-06, + "loss": 0.5491, + "step": 7134 + }, + { + "epoch": 0.45, + "grad_norm": 0.858647882938385, + "learning_rate": 6.012781070683058e-06, + "loss": 0.6346, + "step": 7135 + }, + { + "epoch": 0.45, + "grad_norm": 0.8910332918167114, + "learning_rate": 6.011776323881654e-06, + "loss": 0.6019, + "step": 7136 + }, + { + "epoch": 0.45, + "grad_norm": 0.8092496991157532, + "learning_rate": 6.0107715344778684e-06, + "loss": 0.5526, + "step": 7137 + }, + { + "epoch": 0.45, + "grad_norm": 0.8764297962188721, + "learning_rate": 6.00976670251401e-06, + "loss": 0.6015, + "step": 7138 + }, + { + "epoch": 0.45, + "grad_norm": 0.8800103664398193, + "learning_rate": 6.008761828032389e-06, + "loss": 0.5809, + "step": 7139 + }, + { + "epoch": 0.45, + "grad_norm": 0.9022515416145325, + "learning_rate": 6.007756911075315e-06, + "loss": 0.5433, + "step": 7140 + }, + { + "epoch": 0.45, + "grad_norm": 0.8949940800666809, + "learning_rate": 6.006751951685104e-06, + "loss": 0.5678, + "step": 7141 + }, + { + "epoch": 0.45, + "grad_norm": 0.9681234955787659, + "learning_rate": 6.005746949904072e-06, + "loss": 0.6141, + "step": 7142 + }, + { + "epoch": 0.45, + "grad_norm": 0.863433301448822, + "learning_rate": 6.004741905774533e-06, + "loss": 0.6122, + "step": 7143 + }, + { + "epoch": 0.45, + "grad_norm": 0.9015092253684998, + "learning_rate": 6.003736819338808e-06, + "loss": 0.6173, + "step": 7144 + }, + { + "epoch": 0.45, + "grad_norm": 0.9383165836334229, + "learning_rate": 6.0027316906392165e-06, + "loss": 0.5596, + "step": 7145 + }, + { + "epoch": 0.45, + "grad_norm": 0.8968831896781921, + "learning_rate": 6.001726519718083e-06, + "loss": 0.5773, + "step": 7146 + }, + { + "epoch": 0.45, + "grad_norm": 0.9414469003677368, + "learning_rate": 6.000721306617731e-06, + "loss": 0.613, + "step": 7147 + }, + { + "epoch": 0.45, + "grad_norm": 0.8644320368766785, + "learning_rate": 5.999716051380484e-06, + "loss": 0.5843, + "step": 7148 + }, + { + "epoch": 0.45, + "grad_norm": 0.8745971322059631, + "learning_rate": 5.998710754048674e-06, + "loss": 0.6112, + "step": 7149 + }, + { + "epoch": 0.45, + "grad_norm": 0.9257605075836182, + "learning_rate": 5.997705414664627e-06, + "loss": 0.5828, + "step": 7150 + }, + { + "epoch": 0.45, + "grad_norm": 0.8571212887763977, + "learning_rate": 5.996700033270676e-06, + "loss": 0.5362, + "step": 7151 + }, + { + "epoch": 0.45, + "grad_norm": 0.895595908164978, + "learning_rate": 5.995694609909153e-06, + "loss": 0.5773, + "step": 7152 + }, + { + "epoch": 0.45, + "grad_norm": 0.8279756307601929, + "learning_rate": 5.9946891446223955e-06, + "loss": 0.5738, + "step": 7153 + }, + { + "epoch": 0.45, + "grad_norm": 0.8749321699142456, + "learning_rate": 5.993683637452736e-06, + "loss": 0.5683, + "step": 7154 + }, + { + "epoch": 0.45, + "grad_norm": 0.8859013319015503, + "learning_rate": 5.992678088442518e-06, + "loss": 0.6348, + "step": 7155 + }, + { + "epoch": 0.45, + "grad_norm": 0.8397629857063293, + "learning_rate": 5.991672497634076e-06, + "loss": 0.5788, + "step": 7156 + }, + { + "epoch": 0.45, + "grad_norm": 0.8651146292686462, + "learning_rate": 5.990666865069759e-06, + "loss": 0.5909, + "step": 7157 + }, + { + "epoch": 0.45, + "grad_norm": 0.8625426888465881, + "learning_rate": 5.9896611907919034e-06, + "loss": 0.6283, + "step": 7158 + }, + { + "epoch": 0.45, + "grad_norm": 0.8170305490493774, + "learning_rate": 5.98865547484286e-06, + "loss": 0.5283, + "step": 7159 + }, + { + "epoch": 0.45, + "grad_norm": 0.8646724820137024, + "learning_rate": 5.9876497172649704e-06, + "loss": 0.5889, + "step": 7160 + }, + { + "epoch": 0.45, + "grad_norm": 0.9071366786956787, + "learning_rate": 5.986643918100591e-06, + "loss": 0.6299, + "step": 7161 + }, + { + "epoch": 0.45, + "grad_norm": 0.8904988169670105, + "learning_rate": 5.985638077392066e-06, + "loss": 0.5889, + "step": 7162 + }, + { + "epoch": 0.45, + "grad_norm": 0.8769485950469971, + "learning_rate": 5.984632195181752e-06, + "loss": 0.5965, + "step": 7163 + }, + { + "epoch": 0.45, + "grad_norm": 0.8972844481468201, + "learning_rate": 5.983626271512e-06, + "loss": 0.6024, + "step": 7164 + }, + { + "epoch": 0.45, + "grad_norm": 0.92257159948349, + "learning_rate": 5.982620306425167e-06, + "loss": 0.6158, + "step": 7165 + }, + { + "epoch": 0.45, + "grad_norm": 0.8915507793426514, + "learning_rate": 5.981614299963614e-06, + "loss": 0.6001, + "step": 7166 + }, + { + "epoch": 0.45, + "grad_norm": 0.9429782032966614, + "learning_rate": 5.9806082521696936e-06, + "loss": 0.6177, + "step": 7167 + }, + { + "epoch": 0.45, + "grad_norm": 0.8852347135543823, + "learning_rate": 5.979602163085775e-06, + "loss": 0.5969, + "step": 7168 + }, + { + "epoch": 0.45, + "grad_norm": 0.880511999130249, + "learning_rate": 5.978596032754215e-06, + "loss": 0.5388, + "step": 7169 + }, + { + "epoch": 0.45, + "grad_norm": 0.8679192066192627, + "learning_rate": 5.977589861217381e-06, + "loss": 0.5925, + "step": 7170 + }, + { + "epoch": 0.45, + "grad_norm": 0.8661013245582581, + "learning_rate": 5.9765836485176376e-06, + "loss": 0.5717, + "step": 7171 + }, + { + "epoch": 0.45, + "grad_norm": 0.8547639846801758, + "learning_rate": 5.9755773946973546e-06, + "loss": 0.6167, + "step": 7172 + }, + { + "epoch": 0.45, + "grad_norm": 0.8579798936843872, + "learning_rate": 5.974571099798902e-06, + "loss": 0.584, + "step": 7173 + }, + { + "epoch": 0.45, + "grad_norm": 0.8616194128990173, + "learning_rate": 5.973564763864651e-06, + "loss": 0.6371, + "step": 7174 + }, + { + "epoch": 0.45, + "grad_norm": 0.8729208111763, + "learning_rate": 5.972558386936973e-06, + "loss": 0.6199, + "step": 7175 + }, + { + "epoch": 0.45, + "grad_norm": 0.8993321657180786, + "learning_rate": 5.971551969058246e-06, + "loss": 0.615, + "step": 7176 + }, + { + "epoch": 0.45, + "grad_norm": 0.8909555673599243, + "learning_rate": 5.970545510270845e-06, + "loss": 0.5902, + "step": 7177 + }, + { + "epoch": 0.45, + "grad_norm": 0.8814198970794678, + "learning_rate": 5.969539010617149e-06, + "loss": 0.5594, + "step": 7178 + }, + { + "epoch": 0.45, + "grad_norm": 0.8692460060119629, + "learning_rate": 5.968532470139537e-06, + "loss": 0.5863, + "step": 7179 + }, + { + "epoch": 0.45, + "grad_norm": 0.8972712755203247, + "learning_rate": 5.967525888880392e-06, + "loss": 0.5691, + "step": 7180 + }, + { + "epoch": 0.45, + "grad_norm": 0.8847329020500183, + "learning_rate": 5.966519266882099e-06, + "loss": 0.58, + "step": 7181 + }, + { + "epoch": 0.46, + "grad_norm": 0.9059928059577942, + "learning_rate": 5.965512604187041e-06, + "loss": 0.5482, + "step": 7182 + }, + { + "epoch": 0.46, + "grad_norm": 0.9167791604995728, + "learning_rate": 5.964505900837606e-06, + "loss": 0.5814, + "step": 7183 + }, + { + "epoch": 0.46, + "grad_norm": 0.9062039852142334, + "learning_rate": 5.963499156876182e-06, + "loss": 0.5696, + "step": 7184 + }, + { + "epoch": 0.46, + "grad_norm": 0.9285433888435364, + "learning_rate": 5.962492372345163e-06, + "loss": 0.5891, + "step": 7185 + }, + { + "epoch": 0.46, + "grad_norm": 0.8977758288383484, + "learning_rate": 5.961485547286936e-06, + "loss": 0.634, + "step": 7186 + }, + { + "epoch": 0.46, + "grad_norm": 0.8944379091262817, + "learning_rate": 5.960478681743897e-06, + "loss": 0.5478, + "step": 7187 + }, + { + "epoch": 0.46, + "grad_norm": 0.8805668950080872, + "learning_rate": 5.959471775758444e-06, + "loss": 0.6422, + "step": 7188 + }, + { + "epoch": 0.46, + "grad_norm": 0.8319005370140076, + "learning_rate": 5.9584648293729715e-06, + "loss": 0.5771, + "step": 7189 + }, + { + "epoch": 0.46, + "grad_norm": 0.834413468837738, + "learning_rate": 5.957457842629879e-06, + "loss": 0.5732, + "step": 7190 + }, + { + "epoch": 0.46, + "grad_norm": 0.8137858510017395, + "learning_rate": 5.956450815571567e-06, + "loss": 0.545, + "step": 7191 + }, + { + "epoch": 0.46, + "grad_norm": 0.8825639486312866, + "learning_rate": 5.955443748240439e-06, + "loss": 0.5388, + "step": 7192 + }, + { + "epoch": 0.46, + "grad_norm": 0.8979218006134033, + "learning_rate": 5.9544366406789e-06, + "loss": 0.6082, + "step": 7193 + }, + { + "epoch": 0.46, + "grad_norm": 0.8679836392402649, + "learning_rate": 5.953429492929352e-06, + "loss": 0.557, + "step": 7194 + }, + { + "epoch": 0.46, + "grad_norm": 0.9074422121047974, + "learning_rate": 5.952422305034206e-06, + "loss": 0.5523, + "step": 7195 + }, + { + "epoch": 0.46, + "grad_norm": 0.8395237326622009, + "learning_rate": 5.95141507703587e-06, + "loss": 0.5881, + "step": 7196 + }, + { + "epoch": 0.46, + "grad_norm": 0.8752167820930481, + "learning_rate": 5.9504078089767545e-06, + "loss": 0.6212, + "step": 7197 + }, + { + "epoch": 0.46, + "grad_norm": 0.9009909629821777, + "learning_rate": 5.949400500899272e-06, + "loss": 0.6038, + "step": 7198 + }, + { + "epoch": 0.46, + "grad_norm": 0.8272262215614319, + "learning_rate": 5.948393152845837e-06, + "loss": 0.492, + "step": 7199 + }, + { + "epoch": 0.46, + "grad_norm": 0.9322216510772705, + "learning_rate": 5.9473857648588665e-06, + "loss": 0.6591, + "step": 7200 + }, + { + "epoch": 0.46, + "grad_norm": 0.831906795501709, + "learning_rate": 5.9463783369807775e-06, + "loss": 0.601, + "step": 7201 + }, + { + "epoch": 0.46, + "grad_norm": 0.844941258430481, + "learning_rate": 5.945370869253987e-06, + "loss": 0.5914, + "step": 7202 + }, + { + "epoch": 0.46, + "grad_norm": 0.8600195050239563, + "learning_rate": 5.944363361720919e-06, + "loss": 0.6095, + "step": 7203 + }, + { + "epoch": 0.46, + "grad_norm": 0.8606268167495728, + "learning_rate": 5.943355814423996e-06, + "loss": 0.5522, + "step": 7204 + }, + { + "epoch": 0.46, + "grad_norm": 0.8842875361442566, + "learning_rate": 5.94234822740564e-06, + "loss": 0.6165, + "step": 7205 + }, + { + "epoch": 0.46, + "grad_norm": 0.8753437995910645, + "learning_rate": 5.941340600708279e-06, + "loss": 0.5917, + "step": 7206 + }, + { + "epoch": 0.46, + "grad_norm": 0.9226531386375427, + "learning_rate": 5.9403329343743385e-06, + "loss": 0.6226, + "step": 7207 + }, + { + "epoch": 0.46, + "grad_norm": 0.9380946755409241, + "learning_rate": 5.939325228446251e-06, + "loss": 0.6713, + "step": 7208 + }, + { + "epoch": 0.46, + "grad_norm": 0.8104608058929443, + "learning_rate": 5.938317482966446e-06, + "loss": 0.5834, + "step": 7209 + }, + { + "epoch": 0.46, + "grad_norm": 0.8702726364135742, + "learning_rate": 5.937309697977355e-06, + "loss": 0.5383, + "step": 7210 + }, + { + "epoch": 0.46, + "grad_norm": 0.8711553812026978, + "learning_rate": 5.936301873521414e-06, + "loss": 0.5848, + "step": 7211 + }, + { + "epoch": 0.46, + "grad_norm": 0.9385100603103638, + "learning_rate": 5.935294009641057e-06, + "loss": 0.6047, + "step": 7212 + }, + { + "epoch": 0.46, + "grad_norm": 0.8969570994377136, + "learning_rate": 5.934286106378724e-06, + "loss": 0.6154, + "step": 7213 + }, + { + "epoch": 0.46, + "grad_norm": 0.9219831824302673, + "learning_rate": 5.933278163776852e-06, + "loss": 0.62, + "step": 7214 + }, + { + "epoch": 0.46, + "grad_norm": 0.9561776518821716, + "learning_rate": 5.932270181877886e-06, + "loss": 0.6364, + "step": 7215 + }, + { + "epoch": 0.46, + "grad_norm": 0.8352993130683899, + "learning_rate": 5.9312621607242625e-06, + "loss": 0.5626, + "step": 7216 + }, + { + "epoch": 0.46, + "grad_norm": 0.8720530271530151, + "learning_rate": 5.93025410035843e-06, + "loss": 0.569, + "step": 7217 + }, + { + "epoch": 0.46, + "grad_norm": 0.8734807372093201, + "learning_rate": 5.929246000822835e-06, + "loss": 0.6127, + "step": 7218 + }, + { + "epoch": 0.46, + "grad_norm": 0.9370132088661194, + "learning_rate": 5.928237862159922e-06, + "loss": 0.6122, + "step": 7219 + }, + { + "epoch": 0.46, + "grad_norm": 0.918322741985321, + "learning_rate": 5.927229684412143e-06, + "loss": 0.6148, + "step": 7220 + }, + { + "epoch": 0.46, + "grad_norm": 0.8534547090530396, + "learning_rate": 5.926221467621945e-06, + "loss": 0.5618, + "step": 7221 + }, + { + "epoch": 0.46, + "grad_norm": 0.8477325439453125, + "learning_rate": 5.925213211831785e-06, + "loss": 0.5562, + "step": 7222 + }, + { + "epoch": 0.46, + "grad_norm": 0.881864070892334, + "learning_rate": 5.924204917084116e-06, + "loss": 0.5994, + "step": 7223 + }, + { + "epoch": 0.46, + "grad_norm": 0.8880230784416199, + "learning_rate": 5.923196583421392e-06, + "loss": 0.5846, + "step": 7224 + }, + { + "epoch": 0.46, + "grad_norm": 0.8969435691833496, + "learning_rate": 5.922188210886071e-06, + "loss": 0.57, + "step": 7225 + }, + { + "epoch": 0.46, + "grad_norm": 0.8515308499336243, + "learning_rate": 5.921179799520613e-06, + "loss": 0.6143, + "step": 7226 + }, + { + "epoch": 0.46, + "grad_norm": 0.9298543334007263, + "learning_rate": 5.920171349367478e-06, + "loss": 0.6082, + "step": 7227 + }, + { + "epoch": 0.46, + "grad_norm": 0.8796992301940918, + "learning_rate": 5.919162860469129e-06, + "loss": 0.5899, + "step": 7228 + }, + { + "epoch": 0.46, + "grad_norm": 0.8747338056564331, + "learning_rate": 5.9181543328680295e-06, + "loss": 0.6593, + "step": 7229 + }, + { + "epoch": 0.46, + "grad_norm": 0.9241954684257507, + "learning_rate": 5.917145766606645e-06, + "loss": 0.6421, + "step": 7230 + }, + { + "epoch": 0.46, + "grad_norm": 0.8967227935791016, + "learning_rate": 5.9161371617274425e-06, + "loss": 0.5775, + "step": 7231 + }, + { + "epoch": 0.46, + "grad_norm": 0.8867378830909729, + "learning_rate": 5.91512851827289e-06, + "loss": 0.5834, + "step": 7232 + }, + { + "epoch": 0.46, + "grad_norm": 0.8239137530326843, + "learning_rate": 5.914119836285461e-06, + "loss": 0.5708, + "step": 7233 + }, + { + "epoch": 0.46, + "grad_norm": 0.8044520020484924, + "learning_rate": 5.913111115807626e-06, + "loss": 0.6005, + "step": 7234 + }, + { + "epoch": 0.46, + "grad_norm": 0.8815402388572693, + "learning_rate": 5.912102356881857e-06, + "loss": 0.5627, + "step": 7235 + }, + { + "epoch": 0.46, + "grad_norm": 0.9280535578727722, + "learning_rate": 5.91109355955063e-06, + "loss": 0.5705, + "step": 7236 + }, + { + "epoch": 0.46, + "grad_norm": 0.8546304702758789, + "learning_rate": 5.910084723856424e-06, + "loss": 0.5808, + "step": 7237 + }, + { + "epoch": 0.46, + "grad_norm": 0.8888003826141357, + "learning_rate": 5.909075849841717e-06, + "loss": 0.596, + "step": 7238 + }, + { + "epoch": 0.46, + "grad_norm": 0.8333981037139893, + "learning_rate": 5.908066937548987e-06, + "loss": 0.5282, + "step": 7239 + }, + { + "epoch": 0.46, + "grad_norm": 0.8448134660720825, + "learning_rate": 5.907057987020717e-06, + "loss": 0.5851, + "step": 7240 + }, + { + "epoch": 0.46, + "grad_norm": 0.8403011560440063, + "learning_rate": 5.906048998299392e-06, + "loss": 0.5617, + "step": 7241 + }, + { + "epoch": 0.46, + "grad_norm": 0.8628389835357666, + "learning_rate": 5.905039971427494e-06, + "loss": 0.621, + "step": 7242 + }, + { + "epoch": 0.46, + "grad_norm": 0.9215841889381409, + "learning_rate": 5.9040309064475136e-06, + "loss": 0.6134, + "step": 7243 + }, + { + "epoch": 0.46, + "grad_norm": 0.858690083026886, + "learning_rate": 5.903021803401933e-06, + "loss": 0.5893, + "step": 7244 + }, + { + "epoch": 0.46, + "grad_norm": 0.8196396231651306, + "learning_rate": 5.902012662333248e-06, + "loss": 0.5492, + "step": 7245 + }, + { + "epoch": 0.46, + "grad_norm": 0.8949219584465027, + "learning_rate": 5.9010034832839466e-06, + "loss": 0.6423, + "step": 7246 + }, + { + "epoch": 0.46, + "grad_norm": 0.9517080187797546, + "learning_rate": 5.899994266296525e-06, + "loss": 0.6048, + "step": 7247 + }, + { + "epoch": 0.46, + "grad_norm": 0.8856121897697449, + "learning_rate": 5.898985011413473e-06, + "loss": 0.549, + "step": 7248 + }, + { + "epoch": 0.46, + "grad_norm": 0.8450676798820496, + "learning_rate": 5.897975718677291e-06, + "loss": 0.5636, + "step": 7249 + }, + { + "epoch": 0.46, + "grad_norm": 0.8568273782730103, + "learning_rate": 5.896966388130475e-06, + "loss": 0.5788, + "step": 7250 + }, + { + "epoch": 0.46, + "grad_norm": 0.8017422556877136, + "learning_rate": 5.895957019815526e-06, + "loss": 0.5543, + "step": 7251 + }, + { + "epoch": 0.46, + "grad_norm": 0.9004830718040466, + "learning_rate": 5.894947613774942e-06, + "loss": 0.5613, + "step": 7252 + }, + { + "epoch": 0.46, + "grad_norm": 0.7895128726959229, + "learning_rate": 5.8939381700512275e-06, + "loss": 0.5361, + "step": 7253 + }, + { + "epoch": 0.46, + "grad_norm": 0.8576557040214539, + "learning_rate": 5.892928688686887e-06, + "loss": 0.6323, + "step": 7254 + }, + { + "epoch": 0.46, + "grad_norm": 0.8445666432380676, + "learning_rate": 5.891919169724426e-06, + "loss": 0.5944, + "step": 7255 + }, + { + "epoch": 0.46, + "grad_norm": 0.8773793578147888, + "learning_rate": 5.890909613206351e-06, + "loss": 0.6197, + "step": 7256 + }, + { + "epoch": 0.46, + "grad_norm": 0.853339672088623, + "learning_rate": 5.889900019175171e-06, + "loss": 0.5519, + "step": 7257 + }, + { + "epoch": 0.46, + "grad_norm": 0.9311347007751465, + "learning_rate": 5.888890387673398e-06, + "loss": 0.5779, + "step": 7258 + }, + { + "epoch": 0.46, + "grad_norm": 0.823020339012146, + "learning_rate": 5.887880718743541e-06, + "loss": 0.5509, + "step": 7259 + }, + { + "epoch": 0.46, + "grad_norm": 0.8491629362106323, + "learning_rate": 5.886871012428117e-06, + "loss": 0.5738, + "step": 7260 + }, + { + "epoch": 0.46, + "grad_norm": 0.8535467982292175, + "learning_rate": 5.885861268769641e-06, + "loss": 0.5945, + "step": 7261 + }, + { + "epoch": 0.46, + "grad_norm": 0.8396848440170288, + "learning_rate": 5.8848514878106275e-06, + "loss": 0.5535, + "step": 7262 + }, + { + "epoch": 0.46, + "grad_norm": 0.9186087250709534, + "learning_rate": 5.883841669593595e-06, + "loss": 0.6172, + "step": 7263 + }, + { + "epoch": 0.46, + "grad_norm": 0.877123236656189, + "learning_rate": 5.882831814161065e-06, + "loss": 0.6176, + "step": 7264 + }, + { + "epoch": 0.46, + "grad_norm": 0.8525793552398682, + "learning_rate": 5.881821921555559e-06, + "loss": 0.5688, + "step": 7265 + }, + { + "epoch": 0.46, + "grad_norm": 0.8865832090377808, + "learning_rate": 5.880811991819601e-06, + "loss": 0.6026, + "step": 7266 + }, + { + "epoch": 0.46, + "grad_norm": 0.9039906859397888, + "learning_rate": 5.879802024995712e-06, + "loss": 0.6023, + "step": 7267 + }, + { + "epoch": 0.46, + "grad_norm": 0.9067119359970093, + "learning_rate": 5.878792021126421e-06, + "loss": 0.6153, + "step": 7268 + }, + { + "epoch": 0.46, + "grad_norm": 0.8938568830490112, + "learning_rate": 5.877781980254255e-06, + "loss": 0.6366, + "step": 7269 + }, + { + "epoch": 0.46, + "grad_norm": 0.8254221081733704, + "learning_rate": 5.876771902421743e-06, + "loss": 0.5323, + "step": 7270 + }, + { + "epoch": 0.46, + "grad_norm": 0.9549217820167542, + "learning_rate": 5.875761787671416e-06, + "loss": 0.6151, + "step": 7271 + }, + { + "epoch": 0.46, + "grad_norm": 0.8977713584899902, + "learning_rate": 5.874751636045808e-06, + "loss": 0.5451, + "step": 7272 + }, + { + "epoch": 0.46, + "grad_norm": 0.8796578645706177, + "learning_rate": 5.873741447587451e-06, + "loss": 0.5895, + "step": 7273 + }, + { + "epoch": 0.46, + "grad_norm": 0.8962649703025818, + "learning_rate": 5.8727312223388814e-06, + "loss": 0.632, + "step": 7274 + }, + { + "epoch": 0.46, + "grad_norm": 0.8637465238571167, + "learning_rate": 5.871720960342635e-06, + "loss": 0.6002, + "step": 7275 + }, + { + "epoch": 0.46, + "grad_norm": 0.8970744013786316, + "learning_rate": 5.870710661641252e-06, + "loss": 0.551, + "step": 7276 + }, + { + "epoch": 0.46, + "grad_norm": 0.8728744387626648, + "learning_rate": 5.869700326277273e-06, + "loss": 0.6214, + "step": 7277 + }, + { + "epoch": 0.46, + "grad_norm": 0.8638222217559814, + "learning_rate": 5.868689954293239e-06, + "loss": 0.583, + "step": 7278 + }, + { + "epoch": 0.46, + "grad_norm": 0.8397230505943298, + "learning_rate": 5.86767954573169e-06, + "loss": 0.5571, + "step": 7279 + }, + { + "epoch": 0.46, + "grad_norm": 0.921284556388855, + "learning_rate": 5.866669100635176e-06, + "loss": 0.6216, + "step": 7280 + }, + { + "epoch": 0.46, + "grad_norm": 0.9188320636749268, + "learning_rate": 5.865658619046242e-06, + "loss": 0.5926, + "step": 7281 + }, + { + "epoch": 0.46, + "grad_norm": 0.831984281539917, + "learning_rate": 5.864648101007433e-06, + "loss": 0.5531, + "step": 7282 + }, + { + "epoch": 0.46, + "grad_norm": 0.8987353444099426, + "learning_rate": 5.863637546561301e-06, + "loss": 0.5643, + "step": 7283 + }, + { + "epoch": 0.46, + "grad_norm": 0.9372497200965881, + "learning_rate": 5.862626955750397e-06, + "loss": 0.6315, + "step": 7284 + }, + { + "epoch": 0.46, + "grad_norm": 0.8938281536102295, + "learning_rate": 5.8616163286172726e-06, + "loss": 0.6466, + "step": 7285 + }, + { + "epoch": 0.46, + "grad_norm": 0.8691145777702332, + "learning_rate": 5.8606056652044805e-06, + "loss": 0.5714, + "step": 7286 + }, + { + "epoch": 0.46, + "grad_norm": 0.9212241172790527, + "learning_rate": 5.859594965554579e-06, + "loss": 0.6383, + "step": 7287 + }, + { + "epoch": 0.46, + "grad_norm": 0.8839470148086548, + "learning_rate": 5.858584229710124e-06, + "loss": 0.6086, + "step": 7288 + }, + { + "epoch": 0.46, + "grad_norm": 0.8715324401855469, + "learning_rate": 5.857573457713674e-06, + "loss": 0.5746, + "step": 7289 + }, + { + "epoch": 0.46, + "grad_norm": 0.8384736776351929, + "learning_rate": 5.856562649607788e-06, + "loss": 0.6008, + "step": 7290 + }, + { + "epoch": 0.46, + "grad_norm": 0.9453056454658508, + "learning_rate": 5.855551805435028e-06, + "loss": 0.589, + "step": 7291 + }, + { + "epoch": 0.46, + "grad_norm": 0.8720511198043823, + "learning_rate": 5.854540925237959e-06, + "loss": 0.622, + "step": 7292 + }, + { + "epoch": 0.46, + "grad_norm": 0.950019896030426, + "learning_rate": 5.853530009059144e-06, + "loss": 0.6021, + "step": 7293 + }, + { + "epoch": 0.46, + "grad_norm": 0.9160835146903992, + "learning_rate": 5.852519056941149e-06, + "loss": 0.5935, + "step": 7294 + }, + { + "epoch": 0.46, + "grad_norm": 0.8492597937583923, + "learning_rate": 5.851508068926542e-06, + "loss": 0.5688, + "step": 7295 + }, + { + "epoch": 0.46, + "grad_norm": 0.947134792804718, + "learning_rate": 5.850497045057895e-06, + "loss": 0.6288, + "step": 7296 + }, + { + "epoch": 0.46, + "grad_norm": 0.8388863205909729, + "learning_rate": 5.849485985377774e-06, + "loss": 0.6021, + "step": 7297 + }, + { + "epoch": 0.46, + "grad_norm": 0.886326014995575, + "learning_rate": 5.848474889928753e-06, + "loss": 0.5793, + "step": 7298 + }, + { + "epoch": 0.46, + "grad_norm": 0.9255046248435974, + "learning_rate": 5.8474637587534065e-06, + "loss": 0.5786, + "step": 7299 + }, + { + "epoch": 0.46, + "grad_norm": 0.9360898733139038, + "learning_rate": 5.84645259189431e-06, + "loss": 0.6297, + "step": 7300 + }, + { + "epoch": 0.46, + "grad_norm": 0.9143325686454773, + "learning_rate": 5.845441389394039e-06, + "loss": 0.631, + "step": 7301 + }, + { + "epoch": 0.46, + "grad_norm": 0.8604922294616699, + "learning_rate": 5.844430151295171e-06, + "loss": 0.5886, + "step": 7302 + }, + { + "epoch": 0.46, + "grad_norm": 0.8875581622123718, + "learning_rate": 5.843418877640289e-06, + "loss": 0.6584, + "step": 7303 + }, + { + "epoch": 0.46, + "grad_norm": 0.8978346586227417, + "learning_rate": 5.842407568471971e-06, + "loss": 0.6007, + "step": 7304 + }, + { + "epoch": 0.46, + "grad_norm": 0.8847493529319763, + "learning_rate": 5.8413962238328e-06, + "loss": 0.5763, + "step": 7305 + }, + { + "epoch": 0.46, + "grad_norm": 0.8711661696434021, + "learning_rate": 5.840384843765361e-06, + "loss": 0.5876, + "step": 7306 + }, + { + "epoch": 0.46, + "grad_norm": 1.0056418180465698, + "learning_rate": 5.839373428312242e-06, + "loss": 0.5685, + "step": 7307 + }, + { + "epoch": 0.46, + "grad_norm": 0.9755017161369324, + "learning_rate": 5.838361977516026e-06, + "loss": 0.6263, + "step": 7308 + }, + { + "epoch": 0.46, + "grad_norm": 0.9591142535209656, + "learning_rate": 5.837350491419304e-06, + "loss": 0.6817, + "step": 7309 + }, + { + "epoch": 0.46, + "grad_norm": 0.8851566314697266, + "learning_rate": 5.836338970064664e-06, + "loss": 0.5556, + "step": 7310 + }, + { + "epoch": 0.46, + "grad_norm": 0.9328012466430664, + "learning_rate": 5.835327413494702e-06, + "loss": 0.634, + "step": 7311 + }, + { + "epoch": 0.46, + "grad_norm": 0.8392686247825623, + "learning_rate": 5.834315821752008e-06, + "loss": 0.609, + "step": 7312 + }, + { + "epoch": 0.46, + "grad_norm": 0.9183607697486877, + "learning_rate": 5.833304194879176e-06, + "loss": 0.6487, + "step": 7313 + }, + { + "epoch": 0.46, + "grad_norm": 0.8311749696731567, + "learning_rate": 5.832292532918804e-06, + "loss": 0.5665, + "step": 7314 + }, + { + "epoch": 0.46, + "grad_norm": 0.7938551902770996, + "learning_rate": 5.831280835913489e-06, + "loss": 0.5891, + "step": 7315 + }, + { + "epoch": 0.46, + "grad_norm": 0.8665035963058472, + "learning_rate": 5.83026910390583e-06, + "loss": 0.6339, + "step": 7316 + }, + { + "epoch": 0.46, + "grad_norm": 0.8929190039634705, + "learning_rate": 5.829257336938427e-06, + "loss": 0.6195, + "step": 7317 + }, + { + "epoch": 0.46, + "grad_norm": 0.948819637298584, + "learning_rate": 5.8282455350538815e-06, + "loss": 0.6272, + "step": 7318 + }, + { + "epoch": 0.46, + "grad_norm": 0.8194432854652405, + "learning_rate": 5.827233698294799e-06, + "loss": 0.6038, + "step": 7319 + }, + { + "epoch": 0.46, + "grad_norm": 0.8802596926689148, + "learning_rate": 5.826221826703783e-06, + "loss": 0.6913, + "step": 7320 + }, + { + "epoch": 0.46, + "grad_norm": 0.8479204177856445, + "learning_rate": 5.825209920323438e-06, + "loss": 0.5725, + "step": 7321 + }, + { + "epoch": 0.46, + "grad_norm": 0.8597609400749207, + "learning_rate": 5.824197979196377e-06, + "loss": 0.595, + "step": 7322 + }, + { + "epoch": 0.46, + "grad_norm": 0.874144434928894, + "learning_rate": 5.823186003365205e-06, + "loss": 0.6125, + "step": 7323 + }, + { + "epoch": 0.46, + "grad_norm": 0.904408872127533, + "learning_rate": 5.822173992872534e-06, + "loss": 0.5542, + "step": 7324 + }, + { + "epoch": 0.46, + "grad_norm": 0.8976813554763794, + "learning_rate": 5.821161947760975e-06, + "loss": 0.5595, + "step": 7325 + }, + { + "epoch": 0.46, + "grad_norm": 0.9583491683006287, + "learning_rate": 5.820149868073145e-06, + "loss": 0.7058, + "step": 7326 + }, + { + "epoch": 0.46, + "grad_norm": 0.8859381675720215, + "learning_rate": 5.819137753851656e-06, + "loss": 0.6169, + "step": 7327 + }, + { + "epoch": 0.46, + "grad_norm": 0.8623588681221008, + "learning_rate": 5.8181256051391276e-06, + "loss": 0.5796, + "step": 7328 + }, + { + "epoch": 0.46, + "grad_norm": 0.9704835414886475, + "learning_rate": 5.817113421978173e-06, + "loss": 0.6314, + "step": 7329 + }, + { + "epoch": 0.46, + "grad_norm": 0.8964600563049316, + "learning_rate": 5.816101204411417e-06, + "loss": 0.5712, + "step": 7330 + }, + { + "epoch": 0.46, + "grad_norm": 0.8039814233779907, + "learning_rate": 5.815088952481478e-06, + "loss": 0.5073, + "step": 7331 + }, + { + "epoch": 0.46, + "grad_norm": 0.8570845723152161, + "learning_rate": 5.814076666230978e-06, + "loss": 0.613, + "step": 7332 + }, + { + "epoch": 0.46, + "grad_norm": 0.8780226707458496, + "learning_rate": 5.813064345702542e-06, + "loss": 0.5941, + "step": 7333 + }, + { + "epoch": 0.46, + "grad_norm": 0.9203137159347534, + "learning_rate": 5.812051990938794e-06, + "loss": 0.5627, + "step": 7334 + }, + { + "epoch": 0.46, + "grad_norm": 0.9094358682632446, + "learning_rate": 5.811039601982363e-06, + "loss": 0.6046, + "step": 7335 + }, + { + "epoch": 0.46, + "grad_norm": 0.8867512345314026, + "learning_rate": 5.810027178875875e-06, + "loss": 0.5973, + "step": 7336 + }, + { + "epoch": 0.46, + "grad_norm": 0.8854659795761108, + "learning_rate": 5.809014721661961e-06, + "loss": 0.6152, + "step": 7337 + }, + { + "epoch": 0.46, + "grad_norm": 0.8991286158561707, + "learning_rate": 5.808002230383249e-06, + "loss": 0.6124, + "step": 7338 + }, + { + "epoch": 0.46, + "grad_norm": 0.8840140104293823, + "learning_rate": 5.806989705082377e-06, + "loss": 0.5494, + "step": 7339 + }, + { + "epoch": 0.47, + "grad_norm": 0.8739617466926575, + "learning_rate": 5.805977145801975e-06, + "loss": 0.6322, + "step": 7340 + }, + { + "epoch": 0.47, + "grad_norm": 0.8306631445884705, + "learning_rate": 5.8049645525846785e-06, + "loss": 0.526, + "step": 7341 + }, + { + "epoch": 0.47, + "grad_norm": 0.8292911648750305, + "learning_rate": 5.8039519254731245e-06, + "loss": 0.5709, + "step": 7342 + }, + { + "epoch": 0.47, + "grad_norm": 0.8418349623680115, + "learning_rate": 5.802939264509954e-06, + "loss": 0.6032, + "step": 7343 + }, + { + "epoch": 0.47, + "grad_norm": 0.906843364238739, + "learning_rate": 5.801926569737802e-06, + "loss": 0.5835, + "step": 7344 + }, + { + "epoch": 0.47, + "grad_norm": 0.8574205040931702, + "learning_rate": 5.800913841199312e-06, + "loss": 0.612, + "step": 7345 + }, + { + "epoch": 0.47, + "grad_norm": 0.9067574739456177, + "learning_rate": 5.799901078937127e-06, + "loss": 0.5773, + "step": 7346 + }, + { + "epoch": 0.47, + "grad_norm": 0.894777238368988, + "learning_rate": 5.798888282993891e-06, + "loss": 0.5373, + "step": 7347 + }, + { + "epoch": 0.47, + "grad_norm": 0.9085848331451416, + "learning_rate": 5.7978754534122465e-06, + "loss": 0.5839, + "step": 7348 + }, + { + "epoch": 0.47, + "grad_norm": 0.8634418249130249, + "learning_rate": 5.7968625902348445e-06, + "loss": 0.5919, + "step": 7349 + }, + { + "epoch": 0.47, + "grad_norm": 0.8625919818878174, + "learning_rate": 5.7958496935043296e-06, + "loss": 0.5809, + "step": 7350 + }, + { + "epoch": 0.47, + "grad_norm": 0.8647257089614868, + "learning_rate": 5.794836763263353e-06, + "loss": 0.6084, + "step": 7351 + }, + { + "epoch": 0.47, + "grad_norm": 0.877373456954956, + "learning_rate": 5.793823799554564e-06, + "loss": 0.5515, + "step": 7352 + }, + { + "epoch": 0.47, + "grad_norm": 0.8372183442115784, + "learning_rate": 5.792810802420618e-06, + "loss": 0.6313, + "step": 7353 + }, + { + "epoch": 0.47, + "grad_norm": 0.9380940198898315, + "learning_rate": 5.791797771904168e-06, + "loss": 0.5946, + "step": 7354 + }, + { + "epoch": 0.47, + "grad_norm": 0.9767922759056091, + "learning_rate": 5.790784708047866e-06, + "loss": 0.6452, + "step": 7355 + }, + { + "epoch": 0.47, + "grad_norm": 0.8654528856277466, + "learning_rate": 5.789771610894371e-06, + "loss": 0.5671, + "step": 7356 + }, + { + "epoch": 0.47, + "grad_norm": 0.8537338972091675, + "learning_rate": 5.7887584804863414e-06, + "loss": 0.5607, + "step": 7357 + }, + { + "epoch": 0.47, + "grad_norm": 0.879467785358429, + "learning_rate": 5.787745316866438e-06, + "loss": 0.5736, + "step": 7358 + }, + { + "epoch": 0.47, + "grad_norm": 0.8996354341506958, + "learning_rate": 5.786732120077318e-06, + "loss": 0.5326, + "step": 7359 + }, + { + "epoch": 0.47, + "grad_norm": 0.9491006135940552, + "learning_rate": 5.7857188901616444e-06, + "loss": 0.617, + "step": 7360 + }, + { + "epoch": 0.47, + "grad_norm": 0.9167372584342957, + "learning_rate": 5.7847056271620815e-06, + "loss": 0.5909, + "step": 7361 + }, + { + "epoch": 0.47, + "grad_norm": 0.88127201795578, + "learning_rate": 5.783692331121296e-06, + "loss": 0.6109, + "step": 7362 + }, + { + "epoch": 0.47, + "grad_norm": 0.9358324408531189, + "learning_rate": 5.7826790020819525e-06, + "loss": 0.6228, + "step": 7363 + }, + { + "epoch": 0.47, + "grad_norm": 0.8748879432678223, + "learning_rate": 5.781665640086719e-06, + "loss": 0.5388, + "step": 7364 + }, + { + "epoch": 0.47, + "grad_norm": 0.9955645203590393, + "learning_rate": 5.780652245178263e-06, + "loss": 0.5945, + "step": 7365 + }, + { + "epoch": 0.47, + "grad_norm": 1.0095912218093872, + "learning_rate": 5.779638817399259e-06, + "loss": 0.5464, + "step": 7366 + }, + { + "epoch": 0.47, + "grad_norm": 0.8746544718742371, + "learning_rate": 5.778625356792376e-06, + "loss": 0.5783, + "step": 7367 + }, + { + "epoch": 0.47, + "grad_norm": 0.8243803381919861, + "learning_rate": 5.7776118634002865e-06, + "loss": 0.4783, + "step": 7368 + }, + { + "epoch": 0.47, + "grad_norm": 0.9888680577278137, + "learning_rate": 5.776598337265668e-06, + "loss": 0.5734, + "step": 7369 + }, + { + "epoch": 0.47, + "grad_norm": 0.8647724986076355, + "learning_rate": 5.775584778431194e-06, + "loss": 0.6255, + "step": 7370 + }, + { + "epoch": 0.47, + "grad_norm": 0.9031562805175781, + "learning_rate": 5.774571186939543e-06, + "loss": 0.5906, + "step": 7371 + }, + { + "epoch": 0.47, + "grad_norm": 0.9837010502815247, + "learning_rate": 5.773557562833394e-06, + "loss": 0.6282, + "step": 7372 + }, + { + "epoch": 0.47, + "grad_norm": 0.9058972597122192, + "learning_rate": 5.772543906155429e-06, + "loss": 0.6202, + "step": 7373 + }, + { + "epoch": 0.47, + "grad_norm": 0.8994501233100891, + "learning_rate": 5.7715302169483254e-06, + "loss": 0.5639, + "step": 7374 + }, + { + "epoch": 0.47, + "grad_norm": 0.8851544260978699, + "learning_rate": 5.770516495254769e-06, + "loss": 0.6659, + "step": 7375 + }, + { + "epoch": 0.47, + "grad_norm": 0.93473881483078, + "learning_rate": 5.769502741117443e-06, + "loss": 0.6339, + "step": 7376 + }, + { + "epoch": 0.47, + "grad_norm": 0.8682790398597717, + "learning_rate": 5.7684889545790346e-06, + "loss": 0.5733, + "step": 7377 + }, + { + "epoch": 0.47, + "grad_norm": 0.9060890078544617, + "learning_rate": 5.767475135682228e-06, + "loss": 0.604, + "step": 7378 + }, + { + "epoch": 0.47, + "grad_norm": 0.862415075302124, + "learning_rate": 5.766461284469714e-06, + "loss": 0.6114, + "step": 7379 + }, + { + "epoch": 0.47, + "grad_norm": 0.9211068153381348, + "learning_rate": 5.765447400984182e-06, + "loss": 0.6212, + "step": 7380 + }, + { + "epoch": 0.47, + "grad_norm": 0.9584636092185974, + "learning_rate": 5.7644334852683236e-06, + "loss": 0.6299, + "step": 7381 + }, + { + "epoch": 0.47, + "grad_norm": 0.8537229299545288, + "learning_rate": 5.763419537364828e-06, + "loss": 0.5601, + "step": 7382 + }, + { + "epoch": 0.47, + "grad_norm": 0.9321445822715759, + "learning_rate": 5.762405557316393e-06, + "loss": 0.598, + "step": 7383 + }, + { + "epoch": 0.47, + "grad_norm": 0.9108582735061646, + "learning_rate": 5.761391545165713e-06, + "loss": 0.5775, + "step": 7384 + }, + { + "epoch": 0.47, + "grad_norm": 0.903030514717102, + "learning_rate": 5.760377500955483e-06, + "loss": 0.622, + "step": 7385 + }, + { + "epoch": 0.47, + "grad_norm": 0.88475102186203, + "learning_rate": 5.759363424728401e-06, + "loss": 0.6028, + "step": 7386 + }, + { + "epoch": 0.47, + "grad_norm": 0.952921986579895, + "learning_rate": 5.758349316527166e-06, + "loss": 0.6262, + "step": 7387 + }, + { + "epoch": 0.47, + "grad_norm": 0.867470383644104, + "learning_rate": 5.7573351763944815e-06, + "loss": 0.5876, + "step": 7388 + }, + { + "epoch": 0.47, + "grad_norm": 0.8601709604263306, + "learning_rate": 5.756321004373047e-06, + "loss": 0.5715, + "step": 7389 + }, + { + "epoch": 0.47, + "grad_norm": 0.910184919834137, + "learning_rate": 5.755306800505564e-06, + "loss": 0.6218, + "step": 7390 + }, + { + "epoch": 0.47, + "grad_norm": 0.9151275157928467, + "learning_rate": 5.754292564834741e-06, + "loss": 0.6137, + "step": 7391 + }, + { + "epoch": 0.47, + "grad_norm": 0.9013386964797974, + "learning_rate": 5.753278297403282e-06, + "loss": 0.6175, + "step": 7392 + }, + { + "epoch": 0.47, + "grad_norm": 0.8471426367759705, + "learning_rate": 5.752263998253893e-06, + "loss": 0.5546, + "step": 7393 + }, + { + "epoch": 0.47, + "grad_norm": 0.9509868025779724, + "learning_rate": 5.751249667429285e-06, + "loss": 0.6276, + "step": 7394 + }, + { + "epoch": 0.47, + "grad_norm": 0.9001954793930054, + "learning_rate": 5.7502353049721674e-06, + "loss": 0.595, + "step": 7395 + }, + { + "epoch": 0.47, + "grad_norm": 0.8812659382820129, + "learning_rate": 5.74922091092525e-06, + "loss": 0.5687, + "step": 7396 + }, + { + "epoch": 0.47, + "grad_norm": 0.8915801644325256, + "learning_rate": 5.748206485331247e-06, + "loss": 0.6137, + "step": 7397 + }, + { + "epoch": 0.47, + "grad_norm": 0.8607172966003418, + "learning_rate": 5.747192028232872e-06, + "loss": 0.5964, + "step": 7398 + }, + { + "epoch": 0.47, + "grad_norm": 0.9319098591804504, + "learning_rate": 5.746177539672841e-06, + "loss": 0.5956, + "step": 7399 + }, + { + "epoch": 0.47, + "grad_norm": 0.9447979927062988, + "learning_rate": 5.745163019693867e-06, + "loss": 0.5742, + "step": 7400 + }, + { + "epoch": 0.47, + "grad_norm": 0.9158990979194641, + "learning_rate": 5.744148468338671e-06, + "loss": 0.6162, + "step": 7401 + }, + { + "epoch": 0.47, + "grad_norm": 0.9429414868354797, + "learning_rate": 5.743133885649972e-06, + "loss": 0.6195, + "step": 7402 + }, + { + "epoch": 0.47, + "grad_norm": 0.8871979117393494, + "learning_rate": 5.742119271670491e-06, + "loss": 0.5716, + "step": 7403 + }, + { + "epoch": 0.47, + "grad_norm": 0.9028074741363525, + "learning_rate": 5.741104626442948e-06, + "loss": 0.6184, + "step": 7404 + }, + { + "epoch": 0.47, + "grad_norm": 0.8605913519859314, + "learning_rate": 5.740089950010068e-06, + "loss": 0.5238, + "step": 7405 + }, + { + "epoch": 0.47, + "grad_norm": 0.8414510488510132, + "learning_rate": 5.739075242414575e-06, + "loss": 0.5995, + "step": 7406 + }, + { + "epoch": 0.47, + "grad_norm": 0.851617157459259, + "learning_rate": 5.738060503699194e-06, + "loss": 0.5505, + "step": 7407 + }, + { + "epoch": 0.47, + "grad_norm": 0.9584022164344788, + "learning_rate": 5.737045733906653e-06, + "loss": 0.5901, + "step": 7408 + }, + { + "epoch": 0.47, + "grad_norm": 0.9968786239624023, + "learning_rate": 5.7360309330796805e-06, + "loss": 0.6653, + "step": 7409 + }, + { + "epoch": 0.47, + "grad_norm": 0.9742656350135803, + "learning_rate": 5.735016101261005e-06, + "loss": 0.6075, + "step": 7410 + }, + { + "epoch": 0.47, + "grad_norm": 0.8474623560905457, + "learning_rate": 5.7340012384933595e-06, + "loss": 0.6144, + "step": 7411 + }, + { + "epoch": 0.47, + "grad_norm": 0.9446218609809875, + "learning_rate": 5.732986344819475e-06, + "loss": 0.6079, + "step": 7412 + }, + { + "epoch": 0.47, + "grad_norm": 0.842605710029602, + "learning_rate": 5.731971420282085e-06, + "loss": 0.5098, + "step": 7413 + }, + { + "epoch": 0.47, + "grad_norm": 0.9913969039916992, + "learning_rate": 5.730956464923926e-06, + "loss": 0.594, + "step": 7414 + }, + { + "epoch": 0.47, + "grad_norm": 0.8629537224769592, + "learning_rate": 5.729941478787732e-06, + "loss": 0.5961, + "step": 7415 + }, + { + "epoch": 0.47, + "grad_norm": 0.897515058517456, + "learning_rate": 5.728926461916242e-06, + "loss": 0.5481, + "step": 7416 + }, + { + "epoch": 0.47, + "grad_norm": 0.8936898708343506, + "learning_rate": 5.727911414352192e-06, + "loss": 0.5766, + "step": 7417 + }, + { + "epoch": 0.47, + "grad_norm": 0.936795711517334, + "learning_rate": 5.726896336138328e-06, + "loss": 0.6159, + "step": 7418 + }, + { + "epoch": 0.47, + "grad_norm": 0.83265620470047, + "learning_rate": 5.725881227317386e-06, + "loss": 0.5623, + "step": 7419 + }, + { + "epoch": 0.47, + "grad_norm": 0.8322146534919739, + "learning_rate": 5.724866087932113e-06, + "loss": 0.5664, + "step": 7420 + }, + { + "epoch": 0.47, + "grad_norm": 0.8756260871887207, + "learning_rate": 5.723850918025246e-06, + "loss": 0.5767, + "step": 7421 + }, + { + "epoch": 0.47, + "grad_norm": 0.9313575029373169, + "learning_rate": 5.722835717639539e-06, + "loss": 0.5878, + "step": 7422 + }, + { + "epoch": 0.47, + "grad_norm": 0.9568567872047424, + "learning_rate": 5.721820486817733e-06, + "loss": 0.5955, + "step": 7423 + }, + { + "epoch": 0.47, + "grad_norm": 0.8222607970237732, + "learning_rate": 5.720805225602579e-06, + "loss": 0.5706, + "step": 7424 + }, + { + "epoch": 0.47, + "grad_norm": 0.8587454557418823, + "learning_rate": 5.719789934036821e-06, + "loss": 0.5963, + "step": 7425 + }, + { + "epoch": 0.47, + "grad_norm": 0.8979743123054504, + "learning_rate": 5.718774612163216e-06, + "loss": 0.5787, + "step": 7426 + }, + { + "epoch": 0.47, + "grad_norm": 0.8900598883628845, + "learning_rate": 5.717759260024511e-06, + "loss": 0.6332, + "step": 7427 + }, + { + "epoch": 0.47, + "grad_norm": 0.8718476891517639, + "learning_rate": 5.716743877663462e-06, + "loss": 0.5851, + "step": 7428 + }, + { + "epoch": 0.47, + "grad_norm": 0.9177529215812683, + "learning_rate": 5.715728465122821e-06, + "loss": 0.6083, + "step": 7429 + }, + { + "epoch": 0.47, + "grad_norm": 0.9315070509910583, + "learning_rate": 5.714713022445344e-06, + "loss": 0.6009, + "step": 7430 + }, + { + "epoch": 0.47, + "grad_norm": 0.8802310228347778, + "learning_rate": 5.713697549673788e-06, + "loss": 0.5769, + "step": 7431 + }, + { + "epoch": 0.47, + "grad_norm": 0.9262186884880066, + "learning_rate": 5.712682046850909e-06, + "loss": 0.5989, + "step": 7432 + }, + { + "epoch": 0.47, + "grad_norm": 0.939360499382019, + "learning_rate": 5.711666514019472e-06, + "loss": 0.6666, + "step": 7433 + }, + { + "epoch": 0.47, + "grad_norm": 0.8618243336677551, + "learning_rate": 5.710650951222231e-06, + "loss": 0.5733, + "step": 7434 + }, + { + "epoch": 0.47, + "grad_norm": 0.8601074814796448, + "learning_rate": 5.709635358501952e-06, + "loss": 0.5871, + "step": 7435 + }, + { + "epoch": 0.47, + "grad_norm": 0.9362636804580688, + "learning_rate": 5.708619735901394e-06, + "loss": 0.573, + "step": 7436 + }, + { + "epoch": 0.47, + "grad_norm": 0.8923870325088501, + "learning_rate": 5.707604083463327e-06, + "loss": 0.5884, + "step": 7437 + }, + { + "epoch": 0.47, + "grad_norm": 0.9044986367225647, + "learning_rate": 5.706588401230513e-06, + "loss": 0.573, + "step": 7438 + }, + { + "epoch": 0.47, + "grad_norm": 0.8761860728263855, + "learning_rate": 5.70557268924572e-06, + "loss": 0.6136, + "step": 7439 + }, + { + "epoch": 0.47, + "grad_norm": 0.8322923183441162, + "learning_rate": 5.7045569475517126e-06, + "loss": 0.5395, + "step": 7440 + }, + { + "epoch": 0.47, + "grad_norm": 0.9440213441848755, + "learning_rate": 5.703541176191266e-06, + "loss": 0.6287, + "step": 7441 + }, + { + "epoch": 0.47, + "grad_norm": 0.815701425075531, + "learning_rate": 5.702525375207147e-06, + "loss": 0.536, + "step": 7442 + }, + { + "epoch": 0.47, + "grad_norm": 0.9086024761199951, + "learning_rate": 5.70150954464213e-06, + "loss": 0.537, + "step": 7443 + }, + { + "epoch": 0.47, + "grad_norm": 0.9420557618141174, + "learning_rate": 5.700493684538984e-06, + "loss": 0.5822, + "step": 7444 + }, + { + "epoch": 0.47, + "grad_norm": 0.8448433876037598, + "learning_rate": 5.699477794940487e-06, + "loss": 0.599, + "step": 7445 + }, + { + "epoch": 0.47, + "grad_norm": 0.8653044104576111, + "learning_rate": 5.698461875889414e-06, + "loss": 0.5989, + "step": 7446 + }, + { + "epoch": 0.47, + "grad_norm": 0.8957589864730835, + "learning_rate": 5.6974459274285395e-06, + "loss": 0.6335, + "step": 7447 + }, + { + "epoch": 0.47, + "grad_norm": 0.8396274447441101, + "learning_rate": 5.696429949600643e-06, + "loss": 0.5672, + "step": 7448 + }, + { + "epoch": 0.47, + "grad_norm": 0.900188148021698, + "learning_rate": 5.695413942448505e-06, + "loss": 0.5975, + "step": 7449 + }, + { + "epoch": 0.47, + "grad_norm": 0.8910940289497375, + "learning_rate": 5.694397906014907e-06, + "loss": 0.5868, + "step": 7450 + }, + { + "epoch": 0.47, + "grad_norm": 0.8663356900215149, + "learning_rate": 5.693381840342626e-06, + "loss": 0.5789, + "step": 7451 + }, + { + "epoch": 0.47, + "grad_norm": 0.8898122310638428, + "learning_rate": 5.692365745474448e-06, + "loss": 0.595, + "step": 7452 + }, + { + "epoch": 0.47, + "grad_norm": 0.7971364259719849, + "learning_rate": 5.691349621453158e-06, + "loss": 0.5529, + "step": 7453 + }, + { + "epoch": 0.47, + "grad_norm": 0.813381552696228, + "learning_rate": 5.6903334683215416e-06, + "loss": 0.5635, + "step": 7454 + }, + { + "epoch": 0.47, + "grad_norm": 0.9315560460090637, + "learning_rate": 5.689317286122382e-06, + "loss": 0.6557, + "step": 7455 + }, + { + "epoch": 0.47, + "grad_norm": 0.9134712815284729, + "learning_rate": 5.68830107489847e-06, + "loss": 0.5853, + "step": 7456 + }, + { + "epoch": 0.47, + "grad_norm": 0.966414749622345, + "learning_rate": 5.687284834692595e-06, + "loss": 0.5889, + "step": 7457 + }, + { + "epoch": 0.47, + "grad_norm": 0.9134582281112671, + "learning_rate": 5.686268565547547e-06, + "loss": 0.6798, + "step": 7458 + }, + { + "epoch": 0.47, + "grad_norm": 0.8649297952651978, + "learning_rate": 5.685252267506116e-06, + "loss": 0.5932, + "step": 7459 + }, + { + "epoch": 0.47, + "grad_norm": 0.94404536485672, + "learning_rate": 5.6842359406110945e-06, + "loss": 0.6098, + "step": 7460 + }, + { + "epoch": 0.47, + "grad_norm": 0.8642643094062805, + "learning_rate": 5.683219584905281e-06, + "loss": 0.5948, + "step": 7461 + }, + { + "epoch": 0.47, + "grad_norm": 0.9007630944252014, + "learning_rate": 5.682203200431465e-06, + "loss": 0.5938, + "step": 7462 + }, + { + "epoch": 0.47, + "grad_norm": 0.901642918586731, + "learning_rate": 5.6811867872324465e-06, + "loss": 0.6043, + "step": 7463 + }, + { + "epoch": 0.47, + "grad_norm": 0.9297770857810974, + "learning_rate": 5.680170345351021e-06, + "loss": 0.6118, + "step": 7464 + }, + { + "epoch": 0.47, + "grad_norm": 0.9477009177207947, + "learning_rate": 5.67915387482999e-06, + "loss": 0.6211, + "step": 7465 + }, + { + "epoch": 0.47, + "grad_norm": 0.9416295289993286, + "learning_rate": 5.678137375712152e-06, + "loss": 0.6327, + "step": 7466 + }, + { + "epoch": 0.47, + "grad_norm": 0.8348994851112366, + "learning_rate": 5.6771208480403065e-06, + "loss": 0.5896, + "step": 7467 + }, + { + "epoch": 0.47, + "grad_norm": 0.8662605285644531, + "learning_rate": 5.6761042918572585e-06, + "loss": 0.6347, + "step": 7468 + }, + { + "epoch": 0.47, + "grad_norm": 0.9073401689529419, + "learning_rate": 5.675087707205811e-06, + "loss": 0.5953, + "step": 7469 + }, + { + "epoch": 0.47, + "grad_norm": 0.8502627015113831, + "learning_rate": 5.674071094128768e-06, + "loss": 0.56, + "step": 7470 + }, + { + "epoch": 0.47, + "grad_norm": 0.8814505934715271, + "learning_rate": 5.673054452668936e-06, + "loss": 0.5533, + "step": 7471 + }, + { + "epoch": 0.47, + "grad_norm": 0.8160227537155151, + "learning_rate": 5.672037782869123e-06, + "loss": 0.5823, + "step": 7472 + }, + { + "epoch": 0.47, + "grad_norm": 0.9062105417251587, + "learning_rate": 5.671021084772137e-06, + "loss": 0.629, + "step": 7473 + }, + { + "epoch": 0.47, + "grad_norm": 0.8528299927711487, + "learning_rate": 5.670004358420786e-06, + "loss": 0.5908, + "step": 7474 + }, + { + "epoch": 0.47, + "grad_norm": 0.8880297541618347, + "learning_rate": 5.668987603857884e-06, + "loss": 0.5729, + "step": 7475 + }, + { + "epoch": 0.47, + "grad_norm": 0.8712710738182068, + "learning_rate": 5.6679708211262415e-06, + "loss": 0.5703, + "step": 7476 + }, + { + "epoch": 0.47, + "grad_norm": 0.8730039596557617, + "learning_rate": 5.66695401026867e-06, + "loss": 0.5824, + "step": 7477 + }, + { + "epoch": 0.47, + "grad_norm": 0.9115322232246399, + "learning_rate": 5.665937171327985e-06, + "loss": 0.6206, + "step": 7478 + }, + { + "epoch": 0.47, + "grad_norm": 0.8727670311927795, + "learning_rate": 5.664920304347004e-06, + "loss": 0.5998, + "step": 7479 + }, + { + "epoch": 0.47, + "grad_norm": 0.8780418634414673, + "learning_rate": 5.6639034093685416e-06, + "loss": 0.6111, + "step": 7480 + }, + { + "epoch": 0.47, + "grad_norm": 0.902927577495575, + "learning_rate": 5.662886486435415e-06, + "loss": 0.5843, + "step": 7481 + }, + { + "epoch": 0.47, + "grad_norm": 0.9045441746711731, + "learning_rate": 5.6618695355904456e-06, + "loss": 0.5906, + "step": 7482 + }, + { + "epoch": 0.47, + "grad_norm": 0.8809554576873779, + "learning_rate": 5.660852556876452e-06, + "loss": 0.5882, + "step": 7483 + }, + { + "epoch": 0.47, + "grad_norm": 0.8581720590591431, + "learning_rate": 5.659835550336257e-06, + "loss": 0.5565, + "step": 7484 + }, + { + "epoch": 0.47, + "grad_norm": 0.836743175983429, + "learning_rate": 5.658818516012681e-06, + "loss": 0.6018, + "step": 7485 + }, + { + "epoch": 0.47, + "grad_norm": 0.8696823716163635, + "learning_rate": 5.65780145394855e-06, + "loss": 0.5717, + "step": 7486 + }, + { + "epoch": 0.47, + "grad_norm": 0.8720874786376953, + "learning_rate": 5.656784364186687e-06, + "loss": 0.6145, + "step": 7487 + }, + { + "epoch": 0.47, + "grad_norm": 0.8161273002624512, + "learning_rate": 5.655767246769921e-06, + "loss": 0.5921, + "step": 7488 + }, + { + "epoch": 0.47, + "grad_norm": 0.9067077040672302, + "learning_rate": 5.6547501017410765e-06, + "loss": 0.5551, + "step": 7489 + }, + { + "epoch": 0.47, + "grad_norm": 0.8600305318832397, + "learning_rate": 5.6537329291429835e-06, + "loss": 0.5712, + "step": 7490 + }, + { + "epoch": 0.47, + "grad_norm": 0.9493293762207031, + "learning_rate": 5.652715729018471e-06, + "loss": 0.5631, + "step": 7491 + }, + { + "epoch": 0.47, + "grad_norm": 0.9080491065979004, + "learning_rate": 5.65169850141037e-06, + "loss": 0.6021, + "step": 7492 + }, + { + "epoch": 0.47, + "grad_norm": 0.8665675520896912, + "learning_rate": 5.650681246361511e-06, + "loss": 0.5662, + "step": 7493 + }, + { + "epoch": 0.47, + "grad_norm": 0.8644539713859558, + "learning_rate": 5.649663963914729e-06, + "loss": 0.5776, + "step": 7494 + }, + { + "epoch": 0.47, + "grad_norm": 0.8949490785598755, + "learning_rate": 5.6486466541128575e-06, + "loss": 0.6025, + "step": 7495 + }, + { + "epoch": 0.47, + "grad_norm": 0.8618309497833252, + "learning_rate": 5.6476293169987314e-06, + "loss": 0.5809, + "step": 7496 + }, + { + "epoch": 0.47, + "grad_norm": 0.9121665954589844, + "learning_rate": 5.646611952615188e-06, + "loss": 0.5776, + "step": 7497 + }, + { + "epoch": 0.48, + "grad_norm": 0.9601690173149109, + "learning_rate": 5.645594561005064e-06, + "loss": 0.6445, + "step": 7498 + }, + { + "epoch": 0.48, + "grad_norm": 0.8770933151245117, + "learning_rate": 5.6445771422112005e-06, + "loss": 0.5561, + "step": 7499 + }, + { + "epoch": 0.48, + "grad_norm": 0.9160272479057312, + "learning_rate": 5.643559696276435e-06, + "loss": 0.5923, + "step": 7500 + }, + { + "epoch": 0.48, + "grad_norm": 0.8537315726280212, + "learning_rate": 5.6425422232436085e-06, + "loss": 0.5297, + "step": 7501 + }, + { + "epoch": 0.48, + "grad_norm": 0.9248793125152588, + "learning_rate": 5.641524723155566e-06, + "loss": 0.6002, + "step": 7502 + }, + { + "epoch": 0.48, + "grad_norm": 0.8903481960296631, + "learning_rate": 5.6405071960551485e-06, + "loss": 0.5629, + "step": 7503 + }, + { + "epoch": 0.48, + "grad_norm": 0.8650072813034058, + "learning_rate": 5.639489641985201e-06, + "loss": 0.6217, + "step": 7504 + }, + { + "epoch": 0.48, + "grad_norm": 0.9362791180610657, + "learning_rate": 5.638472060988569e-06, + "loss": 0.6735, + "step": 7505 + }, + { + "epoch": 0.48, + "grad_norm": 0.8435890078544617, + "learning_rate": 5.6374544531081e-06, + "loss": 0.5357, + "step": 7506 + }, + { + "epoch": 0.48, + "grad_norm": 0.8968993425369263, + "learning_rate": 5.636436818386641e-06, + "loss": 0.6284, + "step": 7507 + }, + { + "epoch": 0.48, + "grad_norm": 0.8205461502075195, + "learning_rate": 5.635419156867043e-06, + "loss": 0.5537, + "step": 7508 + }, + { + "epoch": 0.48, + "grad_norm": 0.8897349238395691, + "learning_rate": 5.634401468592152e-06, + "loss": 0.5937, + "step": 7509 + }, + { + "epoch": 0.48, + "grad_norm": 0.8807753920555115, + "learning_rate": 5.6333837536048255e-06, + "loss": 0.5546, + "step": 7510 + }, + { + "epoch": 0.48, + "grad_norm": 0.8418203592300415, + "learning_rate": 5.63236601194791e-06, + "loss": 0.5596, + "step": 7511 + }, + { + "epoch": 0.48, + "grad_norm": 0.849711000919342, + "learning_rate": 5.631348243664263e-06, + "loss": 0.6252, + "step": 7512 + }, + { + "epoch": 0.48, + "grad_norm": 0.9313555955886841, + "learning_rate": 5.630330448796736e-06, + "loss": 0.6325, + "step": 7513 + }, + { + "epoch": 0.48, + "grad_norm": 0.9534391164779663, + "learning_rate": 5.629312627388188e-06, + "loss": 0.6615, + "step": 7514 + }, + { + "epoch": 0.48, + "grad_norm": 0.8927303552627563, + "learning_rate": 5.628294779481474e-06, + "loss": 0.5842, + "step": 7515 + }, + { + "epoch": 0.48, + "grad_norm": 0.8985401391983032, + "learning_rate": 5.6272769051194535e-06, + "loss": 0.6764, + "step": 7516 + }, + { + "epoch": 0.48, + "grad_norm": 0.8968629240989685, + "learning_rate": 5.626259004344983e-06, + "loss": 0.5754, + "step": 7517 + }, + { + "epoch": 0.48, + "grad_norm": 0.8578152656555176, + "learning_rate": 5.625241077200926e-06, + "loss": 0.6018, + "step": 7518 + }, + { + "epoch": 0.48, + "grad_norm": 0.8742191791534424, + "learning_rate": 5.624223123730141e-06, + "loss": 0.5841, + "step": 7519 + }, + { + "epoch": 0.48, + "grad_norm": 0.8463053703308105, + "learning_rate": 5.6232051439754935e-06, + "loss": 0.5883, + "step": 7520 + }, + { + "epoch": 0.48, + "grad_norm": 0.8800484538078308, + "learning_rate": 5.622187137979843e-06, + "loss": 0.544, + "step": 7521 + }, + { + "epoch": 0.48, + "grad_norm": 0.8594365119934082, + "learning_rate": 5.621169105786057e-06, + "loss": 0.5719, + "step": 7522 + }, + { + "epoch": 0.48, + "grad_norm": 0.8680949807167053, + "learning_rate": 5.620151047437004e-06, + "loss": 0.6466, + "step": 7523 + }, + { + "epoch": 0.48, + "grad_norm": 0.8772831559181213, + "learning_rate": 5.619132962975544e-06, + "loss": 0.6038, + "step": 7524 + }, + { + "epoch": 0.48, + "grad_norm": 0.9212350845336914, + "learning_rate": 5.6181148524445506e-06, + "loss": 0.586, + "step": 7525 + }, + { + "epoch": 0.48, + "grad_norm": 0.9081183671951294, + "learning_rate": 5.617096715886889e-06, + "loss": 0.6006, + "step": 7526 + }, + { + "epoch": 0.48, + "grad_norm": 0.9265548586845398, + "learning_rate": 5.616078553345434e-06, + "loss": 0.6323, + "step": 7527 + }, + { + "epoch": 0.48, + "grad_norm": 0.8656793236732483, + "learning_rate": 5.615060364863053e-06, + "loss": 0.5746, + "step": 7528 + }, + { + "epoch": 0.48, + "grad_norm": 0.8917694091796875, + "learning_rate": 5.6140421504826205e-06, + "loss": 0.5804, + "step": 7529 + }, + { + "epoch": 0.48, + "grad_norm": 0.9244688153266907, + "learning_rate": 5.6130239102470075e-06, + "loss": 0.6397, + "step": 7530 + }, + { + "epoch": 0.48, + "grad_norm": 0.873084306716919, + "learning_rate": 5.612005644199092e-06, + "loss": 0.5719, + "step": 7531 + }, + { + "epoch": 0.48, + "grad_norm": 0.8609447479248047, + "learning_rate": 5.610987352381747e-06, + "loss": 0.5613, + "step": 7532 + }, + { + "epoch": 0.48, + "grad_norm": 0.9368327856063843, + "learning_rate": 5.60996903483785e-06, + "loss": 0.6308, + "step": 7533 + }, + { + "epoch": 0.48, + "grad_norm": 0.8809864521026611, + "learning_rate": 5.608950691610279e-06, + "loss": 0.5802, + "step": 7534 + }, + { + "epoch": 0.48, + "grad_norm": 0.926231324672699, + "learning_rate": 5.607932322741912e-06, + "loss": 0.6366, + "step": 7535 + }, + { + "epoch": 0.48, + "grad_norm": 0.9103180766105652, + "learning_rate": 5.60691392827563e-06, + "loss": 0.6085, + "step": 7536 + }, + { + "epoch": 0.48, + "grad_norm": 0.9746480584144592, + "learning_rate": 5.605895508254315e-06, + "loss": 0.5465, + "step": 7537 + }, + { + "epoch": 0.48, + "grad_norm": 0.9505468606948853, + "learning_rate": 5.604877062720848e-06, + "loss": 0.6671, + "step": 7538 + }, + { + "epoch": 0.48, + "grad_norm": 0.8591948747634888, + "learning_rate": 5.603858591718111e-06, + "loss": 0.5763, + "step": 7539 + }, + { + "epoch": 0.48, + "grad_norm": 0.9409388303756714, + "learning_rate": 5.602840095288989e-06, + "loss": 0.6302, + "step": 7540 + }, + { + "epoch": 0.48, + "grad_norm": 0.8928959965705872, + "learning_rate": 5.6018215734763685e-06, + "loss": 0.6324, + "step": 7541 + }, + { + "epoch": 0.48, + "grad_norm": 0.8751816749572754, + "learning_rate": 5.600803026323136e-06, + "loss": 0.5362, + "step": 7542 + }, + { + "epoch": 0.48, + "grad_norm": 0.9378029704093933, + "learning_rate": 5.599784453872177e-06, + "loss": 0.5904, + "step": 7543 + }, + { + "epoch": 0.48, + "grad_norm": 0.926987886428833, + "learning_rate": 5.5987658561663805e-06, + "loss": 0.6353, + "step": 7544 + }, + { + "epoch": 0.48, + "grad_norm": 0.8623868823051453, + "learning_rate": 5.597747233248637e-06, + "loss": 0.5966, + "step": 7545 + }, + { + "epoch": 0.48, + "grad_norm": 0.8656692504882812, + "learning_rate": 5.596728585161838e-06, + "loss": 0.5803, + "step": 7546 + }, + { + "epoch": 0.48, + "grad_norm": 0.8522694110870361, + "learning_rate": 5.595709911948873e-06, + "loss": 0.6405, + "step": 7547 + }, + { + "epoch": 0.48, + "grad_norm": 0.8766559958457947, + "learning_rate": 5.5946912136526365e-06, + "loss": 0.6045, + "step": 7548 + }, + { + "epoch": 0.48, + "grad_norm": 0.8855379223823547, + "learning_rate": 5.593672490316022e-06, + "loss": 0.6335, + "step": 7549 + }, + { + "epoch": 0.48, + "grad_norm": 0.8828189969062805, + "learning_rate": 5.5926537419819234e-06, + "loss": 0.6326, + "step": 7550 + }, + { + "epoch": 0.48, + "grad_norm": 0.8517670035362244, + "learning_rate": 5.591634968693238e-06, + "loss": 0.6034, + "step": 7551 + }, + { + "epoch": 0.48, + "grad_norm": 0.8935458064079285, + "learning_rate": 5.590616170492862e-06, + "loss": 0.628, + "step": 7552 + }, + { + "epoch": 0.48, + "grad_norm": 0.9340348839759827, + "learning_rate": 5.589597347423696e-06, + "loss": 0.6073, + "step": 7553 + }, + { + "epoch": 0.48, + "grad_norm": 0.8754671812057495, + "learning_rate": 5.588578499528633e-06, + "loss": 0.584, + "step": 7554 + }, + { + "epoch": 0.48, + "grad_norm": 0.8489634990692139, + "learning_rate": 5.587559626850578e-06, + "loss": 0.5701, + "step": 7555 + }, + { + "epoch": 0.48, + "grad_norm": 0.871929407119751, + "learning_rate": 5.586540729432431e-06, + "loss": 0.5916, + "step": 7556 + }, + { + "epoch": 0.48, + "grad_norm": 0.905117928981781, + "learning_rate": 5.585521807317097e-06, + "loss": 0.6647, + "step": 7557 + }, + { + "epoch": 0.48, + "grad_norm": 0.9271255731582642, + "learning_rate": 5.584502860547474e-06, + "loss": 0.6214, + "step": 7558 + }, + { + "epoch": 0.48, + "grad_norm": 0.8846172094345093, + "learning_rate": 5.5834838891664685e-06, + "loss": 0.6159, + "step": 7559 + }, + { + "epoch": 0.48, + "grad_norm": 0.8657467365264893, + "learning_rate": 5.582464893216987e-06, + "loss": 0.5704, + "step": 7560 + }, + { + "epoch": 0.48, + "grad_norm": 0.8502207398414612, + "learning_rate": 5.5814458727419365e-06, + "loss": 0.5878, + "step": 7561 + }, + { + "epoch": 0.48, + "grad_norm": 0.8335548639297485, + "learning_rate": 5.580426827784221e-06, + "loss": 0.612, + "step": 7562 + }, + { + "epoch": 0.48, + "grad_norm": 0.8756707310676575, + "learning_rate": 5.579407758386751e-06, + "loss": 0.6283, + "step": 7563 + }, + { + "epoch": 0.48, + "grad_norm": 0.9669787883758545, + "learning_rate": 5.578388664592435e-06, + "loss": 0.5999, + "step": 7564 + }, + { + "epoch": 0.48, + "grad_norm": 0.9120867848396301, + "learning_rate": 5.577369546444188e-06, + "loss": 0.6249, + "step": 7565 + }, + { + "epoch": 0.48, + "grad_norm": 0.9110515117645264, + "learning_rate": 5.576350403984915e-06, + "loss": 0.6314, + "step": 7566 + }, + { + "epoch": 0.48, + "grad_norm": 0.9408080577850342, + "learning_rate": 5.575331237257532e-06, + "loss": 0.5728, + "step": 7567 + }, + { + "epoch": 0.48, + "grad_norm": 0.9744350910186768, + "learning_rate": 5.574312046304954e-06, + "loss": 0.6502, + "step": 7568 + }, + { + "epoch": 0.48, + "grad_norm": 0.9024521112442017, + "learning_rate": 5.5732928311700906e-06, + "loss": 0.5861, + "step": 7569 + }, + { + "epoch": 0.48, + "grad_norm": 0.8772839903831482, + "learning_rate": 5.5722735918958614e-06, + "loss": 0.5825, + "step": 7570 + }, + { + "epoch": 0.48, + "grad_norm": 0.9152007699012756, + "learning_rate": 5.571254328525183e-06, + "loss": 0.5854, + "step": 7571 + }, + { + "epoch": 0.48, + "grad_norm": 0.9742832183837891, + "learning_rate": 5.570235041100972e-06, + "loss": 0.6213, + "step": 7572 + }, + { + "epoch": 0.48, + "grad_norm": 0.8699829578399658, + "learning_rate": 5.569215729666146e-06, + "loss": 0.5945, + "step": 7573 + }, + { + "epoch": 0.48, + "grad_norm": 0.8996490240097046, + "learning_rate": 5.568196394263626e-06, + "loss": 0.6015, + "step": 7574 + }, + { + "epoch": 0.48, + "grad_norm": 0.8309650421142578, + "learning_rate": 5.567177034936333e-06, + "loss": 0.5423, + "step": 7575 + }, + { + "epoch": 0.48, + "grad_norm": 0.884103000164032, + "learning_rate": 5.566157651727189e-06, + "loss": 0.6507, + "step": 7576 + }, + { + "epoch": 0.48, + "grad_norm": 0.8329902291297913, + "learning_rate": 5.5651382446791134e-06, + "loss": 0.5838, + "step": 7577 + }, + { + "epoch": 0.48, + "grad_norm": 0.8965960741043091, + "learning_rate": 5.564118813835033e-06, + "loss": 0.5781, + "step": 7578 + }, + { + "epoch": 0.48, + "grad_norm": 0.8552922010421753, + "learning_rate": 5.563099359237872e-06, + "loss": 0.5883, + "step": 7579 + }, + { + "epoch": 0.48, + "grad_norm": 0.8796671628952026, + "learning_rate": 5.5620798809305575e-06, + "loss": 0.6016, + "step": 7580 + }, + { + "epoch": 0.48, + "grad_norm": 0.9553985595703125, + "learning_rate": 5.561060378956014e-06, + "loss": 0.6185, + "step": 7581 + }, + { + "epoch": 0.48, + "grad_norm": 0.836025059223175, + "learning_rate": 5.560040853357168e-06, + "loss": 0.5931, + "step": 7582 + }, + { + "epoch": 0.48, + "grad_norm": 0.9648067355155945, + "learning_rate": 5.55902130417695e-06, + "loss": 0.6206, + "step": 7583 + }, + { + "epoch": 0.48, + "grad_norm": 0.8633977174758911, + "learning_rate": 5.558001731458293e-06, + "loss": 0.6114, + "step": 7584 + }, + { + "epoch": 0.48, + "grad_norm": 0.885905921459198, + "learning_rate": 5.556982135244121e-06, + "loss": 0.6113, + "step": 7585 + }, + { + "epoch": 0.48, + "grad_norm": 0.8822622299194336, + "learning_rate": 5.5559625155773685e-06, + "loss": 0.5788, + "step": 7586 + }, + { + "epoch": 0.48, + "grad_norm": 0.8463605046272278, + "learning_rate": 5.554942872500971e-06, + "loss": 0.5707, + "step": 7587 + }, + { + "epoch": 0.48, + "grad_norm": 0.8903326392173767, + "learning_rate": 5.5539232060578574e-06, + "loss": 0.6062, + "step": 7588 + }, + { + "epoch": 0.48, + "grad_norm": 0.8166199922561646, + "learning_rate": 5.552903516290966e-06, + "loss": 0.5806, + "step": 7589 + }, + { + "epoch": 0.48, + "grad_norm": 0.8574814200401306, + "learning_rate": 5.55188380324323e-06, + "loss": 0.586, + "step": 7590 + }, + { + "epoch": 0.48, + "grad_norm": 0.8767586946487427, + "learning_rate": 5.550864066957587e-06, + "loss": 0.6098, + "step": 7591 + }, + { + "epoch": 0.48, + "grad_norm": 0.8981362581253052, + "learning_rate": 5.549844307476975e-06, + "loss": 0.6253, + "step": 7592 + }, + { + "epoch": 0.48, + "grad_norm": 0.9011292457580566, + "learning_rate": 5.548824524844333e-06, + "loss": 0.6296, + "step": 7593 + }, + { + "epoch": 0.48, + "grad_norm": 0.8577702045440674, + "learning_rate": 5.547804719102596e-06, + "loss": 0.5661, + "step": 7594 + }, + { + "epoch": 0.48, + "grad_norm": 0.9309691190719604, + "learning_rate": 5.546784890294712e-06, + "loss": 0.646, + "step": 7595 + }, + { + "epoch": 0.48, + "grad_norm": 0.9005495309829712, + "learning_rate": 5.545765038463615e-06, + "loss": 0.634, + "step": 7596 + }, + { + "epoch": 0.48, + "grad_norm": 0.9504218697547913, + "learning_rate": 5.544745163652253e-06, + "loss": 0.5943, + "step": 7597 + }, + { + "epoch": 0.48, + "grad_norm": 0.880858302116394, + "learning_rate": 5.543725265903565e-06, + "loss": 0.5743, + "step": 7598 + }, + { + "epoch": 0.48, + "grad_norm": 0.9303281307220459, + "learning_rate": 5.5427053452605004e-06, + "loss": 0.6134, + "step": 7599 + }, + { + "epoch": 0.48, + "grad_norm": 0.8957832455635071, + "learning_rate": 5.541685401766001e-06, + "loss": 0.6142, + "step": 7600 + }, + { + "epoch": 0.48, + "grad_norm": 0.860815703868866, + "learning_rate": 5.540665435463013e-06, + "loss": 0.5398, + "step": 7601 + }, + { + "epoch": 0.48, + "grad_norm": 0.8271889090538025, + "learning_rate": 5.539645446394485e-06, + "loss": 0.6002, + "step": 7602 + }, + { + "epoch": 0.48, + "grad_norm": 0.8800649046897888, + "learning_rate": 5.538625434603363e-06, + "loss": 0.6247, + "step": 7603 + }, + { + "epoch": 0.48, + "grad_norm": 0.8922380208969116, + "learning_rate": 5.5376054001326e-06, + "loss": 0.6069, + "step": 7604 + }, + { + "epoch": 0.48, + "grad_norm": 0.8567295074462891, + "learning_rate": 5.53658534302514e-06, + "loss": 0.6585, + "step": 7605 + }, + { + "epoch": 0.48, + "grad_norm": 0.9114384651184082, + "learning_rate": 5.535565263323942e-06, + "loss": 0.5325, + "step": 7606 + }, + { + "epoch": 0.48, + "grad_norm": 0.8971738219261169, + "learning_rate": 5.534545161071951e-06, + "loss": 0.6266, + "step": 7607 + }, + { + "epoch": 0.48, + "grad_norm": 0.8661735653877258, + "learning_rate": 5.533525036312125e-06, + "loss": 0.5861, + "step": 7608 + }, + { + "epoch": 0.48, + "grad_norm": 0.9209964275360107, + "learning_rate": 5.532504889087413e-06, + "loss": 0.608, + "step": 7609 + }, + { + "epoch": 0.48, + "grad_norm": 0.9354657530784607, + "learning_rate": 5.531484719440776e-06, + "loss": 0.6116, + "step": 7610 + }, + { + "epoch": 0.48, + "grad_norm": 0.8302944898605347, + "learning_rate": 5.530464527415164e-06, + "loss": 0.5349, + "step": 7611 + }, + { + "epoch": 0.48, + "grad_norm": 0.9335947632789612, + "learning_rate": 5.529444313053538e-06, + "loss": 0.5976, + "step": 7612 + }, + { + "epoch": 0.48, + "grad_norm": 0.9276868104934692, + "learning_rate": 5.528424076398851e-06, + "loss": 0.6024, + "step": 7613 + }, + { + "epoch": 0.48, + "grad_norm": 0.8259304165840149, + "learning_rate": 5.527403817494067e-06, + "loss": 0.6018, + "step": 7614 + }, + { + "epoch": 0.48, + "grad_norm": 0.8607040643692017, + "learning_rate": 5.526383536382142e-06, + "loss": 0.5221, + "step": 7615 + }, + { + "epoch": 0.48, + "grad_norm": 0.8541266918182373, + "learning_rate": 5.525363233106037e-06, + "loss": 0.5861, + "step": 7616 + }, + { + "epoch": 0.48, + "grad_norm": 0.8392813801765442, + "learning_rate": 5.524342907708714e-06, + "loss": 0.6229, + "step": 7617 + }, + { + "epoch": 0.48, + "grad_norm": 0.8519023060798645, + "learning_rate": 5.5233225602331355e-06, + "loss": 0.5533, + "step": 7618 + }, + { + "epoch": 0.48, + "grad_norm": 0.9389131665229797, + "learning_rate": 5.522302190722264e-06, + "loss": 0.5698, + "step": 7619 + }, + { + "epoch": 0.48, + "grad_norm": 0.8677113056182861, + "learning_rate": 5.5212817992190644e-06, + "loss": 0.6011, + "step": 7620 + }, + { + "epoch": 0.48, + "grad_norm": 1.0327938795089722, + "learning_rate": 5.5202613857665025e-06, + "loss": 0.622, + "step": 7621 + }, + { + "epoch": 0.48, + "grad_norm": 0.8638737797737122, + "learning_rate": 5.5192409504075416e-06, + "loss": 0.5677, + "step": 7622 + }, + { + "epoch": 0.48, + "grad_norm": 0.9155073761940002, + "learning_rate": 5.518220493185153e-06, + "loss": 0.5872, + "step": 7623 + }, + { + "epoch": 0.48, + "grad_norm": 0.8531370162963867, + "learning_rate": 5.517200014142301e-06, + "loss": 0.5778, + "step": 7624 + }, + { + "epoch": 0.48, + "grad_norm": 0.865264356136322, + "learning_rate": 5.516179513321955e-06, + "loss": 0.5836, + "step": 7625 + }, + { + "epoch": 0.48, + "grad_norm": 0.8751364946365356, + "learning_rate": 5.5151589907670856e-06, + "loss": 0.572, + "step": 7626 + }, + { + "epoch": 0.48, + "grad_norm": 0.8969213962554932, + "learning_rate": 5.514138446520664e-06, + "loss": 0.6152, + "step": 7627 + }, + { + "epoch": 0.48, + "grad_norm": 0.8782392740249634, + "learning_rate": 5.51311788062566e-06, + "loss": 0.5778, + "step": 7628 + }, + { + "epoch": 0.48, + "grad_norm": 0.8104063868522644, + "learning_rate": 5.512097293125047e-06, + "loss": 0.5285, + "step": 7629 + }, + { + "epoch": 0.48, + "grad_norm": 0.8759908676147461, + "learning_rate": 5.511076684061799e-06, + "loss": 0.5613, + "step": 7630 + }, + { + "epoch": 0.48, + "grad_norm": 0.8901430368423462, + "learning_rate": 5.51005605347889e-06, + "loss": 0.6212, + "step": 7631 + }, + { + "epoch": 0.48, + "grad_norm": 0.8541998863220215, + "learning_rate": 5.509035401419296e-06, + "loss": 0.5491, + "step": 7632 + }, + { + "epoch": 0.48, + "grad_norm": 0.938401460647583, + "learning_rate": 5.50801472792599e-06, + "loss": 0.5846, + "step": 7633 + }, + { + "epoch": 0.48, + "grad_norm": 0.8890235424041748, + "learning_rate": 5.5069940330419525e-06, + "loss": 0.5504, + "step": 7634 + }, + { + "epoch": 0.48, + "grad_norm": 0.9128061532974243, + "learning_rate": 5.5059733168101596e-06, + "loss": 0.6344, + "step": 7635 + }, + { + "epoch": 0.48, + "grad_norm": 0.9124163389205933, + "learning_rate": 5.504952579273589e-06, + "loss": 0.5604, + "step": 7636 + }, + { + "epoch": 0.48, + "grad_norm": 0.9557987451553345, + "learning_rate": 5.503931820475223e-06, + "loss": 0.6476, + "step": 7637 + }, + { + "epoch": 0.48, + "grad_norm": 0.8558072447776794, + "learning_rate": 5.502911040458042e-06, + "loss": 0.6132, + "step": 7638 + }, + { + "epoch": 0.48, + "grad_norm": 0.8189815878868103, + "learning_rate": 5.501890239265025e-06, + "loss": 0.5687, + "step": 7639 + }, + { + "epoch": 0.48, + "grad_norm": 0.8753035664558411, + "learning_rate": 5.500869416939156e-06, + "loss": 0.6288, + "step": 7640 + }, + { + "epoch": 0.48, + "grad_norm": 0.8629709482192993, + "learning_rate": 5.49984857352342e-06, + "loss": 0.5766, + "step": 7641 + }, + { + "epoch": 0.48, + "grad_norm": 0.9087205529212952, + "learning_rate": 5.4988277090607986e-06, + "loss": 0.5434, + "step": 7642 + }, + { + "epoch": 0.48, + "grad_norm": 0.9986720085144043, + "learning_rate": 5.4978068235942775e-06, + "loss": 0.6495, + "step": 7643 + }, + { + "epoch": 0.48, + "grad_norm": 0.957332193851471, + "learning_rate": 5.496785917166843e-06, + "loss": 0.6054, + "step": 7644 + }, + { + "epoch": 0.48, + "grad_norm": 0.8836731910705566, + "learning_rate": 5.49576498982148e-06, + "loss": 0.5649, + "step": 7645 + }, + { + "epoch": 0.48, + "grad_norm": 0.8434025049209595, + "learning_rate": 5.49474404160118e-06, + "loss": 0.5556, + "step": 7646 + }, + { + "epoch": 0.48, + "grad_norm": 0.89605712890625, + "learning_rate": 5.4937230725489285e-06, + "loss": 0.6331, + "step": 7647 + }, + { + "epoch": 0.48, + "grad_norm": 0.8855474591255188, + "learning_rate": 5.492702082707716e-06, + "loss": 0.5908, + "step": 7648 + }, + { + "epoch": 0.48, + "grad_norm": 0.8697336316108704, + "learning_rate": 5.491681072120534e-06, + "loss": 0.6265, + "step": 7649 + }, + { + "epoch": 0.48, + "grad_norm": 0.9648655652999878, + "learning_rate": 5.4906600408303715e-06, + "loss": 0.6178, + "step": 7650 + }, + { + "epoch": 0.48, + "grad_norm": 0.901523768901825, + "learning_rate": 5.489638988880222e-06, + "loss": 0.562, + "step": 7651 + }, + { + "epoch": 0.48, + "grad_norm": 0.9213078618049622, + "learning_rate": 5.488617916313077e-06, + "loss": 0.5578, + "step": 7652 + }, + { + "epoch": 0.48, + "grad_norm": 0.8305292725563049, + "learning_rate": 5.487596823171932e-06, + "loss": 0.5244, + "step": 7653 + }, + { + "epoch": 0.48, + "grad_norm": 0.8835660219192505, + "learning_rate": 5.486575709499782e-06, + "loss": 0.6254, + "step": 7654 + }, + { + "epoch": 0.48, + "grad_norm": 0.9087215065956116, + "learning_rate": 5.48555457533962e-06, + "loss": 0.6071, + "step": 7655 + }, + { + "epoch": 0.49, + "grad_norm": 0.8418556451797485, + "learning_rate": 5.484533420734444e-06, + "loss": 0.5622, + "step": 7656 + }, + { + "epoch": 0.49, + "grad_norm": 0.8559536337852478, + "learning_rate": 5.483512245727252e-06, + "loss": 0.625, + "step": 7657 + }, + { + "epoch": 0.49, + "grad_norm": 0.8496268391609192, + "learning_rate": 5.482491050361041e-06, + "loss": 0.5712, + "step": 7658 + }, + { + "epoch": 0.49, + "grad_norm": 0.8907086253166199, + "learning_rate": 5.48146983467881e-06, + "loss": 0.618, + "step": 7659 + }, + { + "epoch": 0.49, + "grad_norm": 0.9585722088813782, + "learning_rate": 5.480448598723559e-06, + "loss": 0.6301, + "step": 7660 + }, + { + "epoch": 0.49, + "grad_norm": 0.9126084446907043, + "learning_rate": 5.47942734253829e-06, + "loss": 0.6009, + "step": 7661 + }, + { + "epoch": 0.49, + "grad_norm": 0.8798913955688477, + "learning_rate": 5.478406066166003e-06, + "loss": 0.5604, + "step": 7662 + }, + { + "epoch": 0.49, + "grad_norm": 0.8995177745819092, + "learning_rate": 5.477384769649701e-06, + "loss": 0.6143, + "step": 7663 + }, + { + "epoch": 0.49, + "grad_norm": 0.9048047661781311, + "learning_rate": 5.476363453032387e-06, + "loss": 0.5813, + "step": 7664 + }, + { + "epoch": 0.49, + "grad_norm": 0.808437168598175, + "learning_rate": 5.475342116357064e-06, + "loss": 0.6184, + "step": 7665 + }, + { + "epoch": 0.49, + "grad_norm": 0.9141887426376343, + "learning_rate": 5.474320759666739e-06, + "loss": 0.5828, + "step": 7666 + }, + { + "epoch": 0.49, + "grad_norm": 0.885321855545044, + "learning_rate": 5.473299383004417e-06, + "loss": 0.6431, + "step": 7667 + }, + { + "epoch": 0.49, + "grad_norm": 0.828567385673523, + "learning_rate": 5.472277986413104e-06, + "loss": 0.5438, + "step": 7668 + }, + { + "epoch": 0.49, + "grad_norm": 0.8383506536483765, + "learning_rate": 5.471256569935809e-06, + "loss": 0.5765, + "step": 7669 + }, + { + "epoch": 0.49, + "grad_norm": 0.8781369924545288, + "learning_rate": 5.470235133615538e-06, + "loss": 0.5844, + "step": 7670 + }, + { + "epoch": 0.49, + "grad_norm": 0.9070538282394409, + "learning_rate": 5.4692136774953004e-06, + "loss": 0.5773, + "step": 7671 + }, + { + "epoch": 0.49, + "grad_norm": 0.8789056539535522, + "learning_rate": 5.46819220161811e-06, + "loss": 0.5749, + "step": 7672 + }, + { + "epoch": 0.49, + "grad_norm": 0.8679722547531128, + "learning_rate": 5.467170706026973e-06, + "loss": 0.5518, + "step": 7673 + }, + { + "epoch": 0.49, + "grad_norm": 0.8629012107849121, + "learning_rate": 5.466149190764902e-06, + "loss": 0.5732, + "step": 7674 + }, + { + "epoch": 0.49, + "grad_norm": 0.8968449831008911, + "learning_rate": 5.465127655874911e-06, + "loss": 0.6644, + "step": 7675 + }, + { + "epoch": 0.49, + "grad_norm": 0.9048300385475159, + "learning_rate": 5.464106101400013e-06, + "loss": 0.5802, + "step": 7676 + }, + { + "epoch": 0.49, + "grad_norm": 0.8762057423591614, + "learning_rate": 5.463084527383222e-06, + "loss": 0.5703, + "step": 7677 + }, + { + "epoch": 0.49, + "grad_norm": 0.8548493385314941, + "learning_rate": 5.4620629338675505e-06, + "loss": 0.5622, + "step": 7678 + }, + { + "epoch": 0.49, + "grad_norm": 0.9109390377998352, + "learning_rate": 5.461041320896019e-06, + "loss": 0.6231, + "step": 7679 + }, + { + "epoch": 0.49, + "grad_norm": 0.8898347616195679, + "learning_rate": 5.460019688511639e-06, + "loss": 0.5709, + "step": 7680 + }, + { + "epoch": 0.49, + "grad_norm": 0.9044870138168335, + "learning_rate": 5.458998036757431e-06, + "loss": 0.6593, + "step": 7681 + }, + { + "epoch": 0.49, + "grad_norm": 0.8833417296409607, + "learning_rate": 5.4579763656764115e-06, + "loss": 0.6153, + "step": 7682 + }, + { + "epoch": 0.49, + "grad_norm": 0.9275777339935303, + "learning_rate": 5.456954675311602e-06, + "loss": 0.5869, + "step": 7683 + }, + { + "epoch": 0.49, + "grad_norm": 0.9542369246482849, + "learning_rate": 5.45593296570602e-06, + "loss": 0.6183, + "step": 7684 + }, + { + "epoch": 0.49, + "grad_norm": 0.9259552359580994, + "learning_rate": 5.454911236902687e-06, + "loss": 0.6372, + "step": 7685 + }, + { + "epoch": 0.49, + "grad_norm": 0.9347798824310303, + "learning_rate": 5.453889488944623e-06, + "loss": 0.5539, + "step": 7686 + }, + { + "epoch": 0.49, + "grad_norm": 0.9027972221374512, + "learning_rate": 5.452867721874854e-06, + "loss": 0.6572, + "step": 7687 + }, + { + "epoch": 0.49, + "grad_norm": 0.904701828956604, + "learning_rate": 5.4518459357364e-06, + "loss": 0.6378, + "step": 7688 + }, + { + "epoch": 0.49, + "grad_norm": 0.8021993637084961, + "learning_rate": 5.4508241305722856e-06, + "loss": 0.6006, + "step": 7689 + }, + { + "epoch": 0.49, + "grad_norm": 0.8704695701599121, + "learning_rate": 5.449802306425532e-06, + "loss": 0.5669, + "step": 7690 + }, + { + "epoch": 0.49, + "grad_norm": 0.8994425535202026, + "learning_rate": 5.448780463339172e-06, + "loss": 0.6242, + "step": 7691 + }, + { + "epoch": 0.49, + "grad_norm": 0.8955239653587341, + "learning_rate": 5.447758601356226e-06, + "loss": 0.6172, + "step": 7692 + }, + { + "epoch": 0.49, + "grad_norm": 0.8894906044006348, + "learning_rate": 5.446736720519725e-06, + "loss": 0.592, + "step": 7693 + }, + { + "epoch": 0.49, + "grad_norm": 0.9087505340576172, + "learning_rate": 5.445714820872693e-06, + "loss": 0.5644, + "step": 7694 + }, + { + "epoch": 0.49, + "grad_norm": 0.8230986595153809, + "learning_rate": 5.4446929024581606e-06, + "loss": 0.5527, + "step": 7695 + }, + { + "epoch": 0.49, + "grad_norm": 0.9331679344177246, + "learning_rate": 5.4436709653191575e-06, + "loss": 0.5922, + "step": 7696 + }, + { + "epoch": 0.49, + "grad_norm": 0.8441013097763062, + "learning_rate": 5.442649009498713e-06, + "loss": 0.5654, + "step": 7697 + }, + { + "epoch": 0.49, + "grad_norm": 0.9754238128662109, + "learning_rate": 5.441627035039859e-06, + "loss": 0.6082, + "step": 7698 + }, + { + "epoch": 0.49, + "grad_norm": 0.8756945133209229, + "learning_rate": 5.440605041985626e-06, + "loss": 0.5359, + "step": 7699 + }, + { + "epoch": 0.49, + "grad_norm": 0.7946870923042297, + "learning_rate": 5.439583030379049e-06, + "loss": 0.5282, + "step": 7700 + }, + { + "epoch": 0.49, + "grad_norm": 0.9179619550704956, + "learning_rate": 5.438561000263157e-06, + "loss": 0.5532, + "step": 7701 + }, + { + "epoch": 0.49, + "grad_norm": 0.8969404697418213, + "learning_rate": 5.4375389516809895e-06, + "loss": 0.6416, + "step": 7702 + }, + { + "epoch": 0.49, + "grad_norm": 0.9204484820365906, + "learning_rate": 5.436516884675579e-06, + "loss": 0.6281, + "step": 7703 + }, + { + "epoch": 0.49, + "grad_norm": 0.9213035702705383, + "learning_rate": 5.43549479928996e-06, + "loss": 0.5883, + "step": 7704 + }, + { + "epoch": 0.49, + "grad_norm": 0.8417788743972778, + "learning_rate": 5.434472695567169e-06, + "loss": 0.5565, + "step": 7705 + }, + { + "epoch": 0.49, + "grad_norm": 0.8737784028053284, + "learning_rate": 5.433450573550246e-06, + "loss": 0.5677, + "step": 7706 + }, + { + "epoch": 0.49, + "grad_norm": 0.8927813768386841, + "learning_rate": 5.432428433282226e-06, + "loss": 0.5841, + "step": 7707 + }, + { + "epoch": 0.49, + "grad_norm": 0.9724695086479187, + "learning_rate": 5.43140627480615e-06, + "loss": 0.6503, + "step": 7708 + }, + { + "epoch": 0.49, + "grad_norm": 0.854300856590271, + "learning_rate": 5.4303840981650565e-06, + "loss": 0.5347, + "step": 7709 + }, + { + "epoch": 0.49, + "grad_norm": 0.8913581371307373, + "learning_rate": 5.429361903401985e-06, + "loss": 0.6083, + "step": 7710 + }, + { + "epoch": 0.49, + "grad_norm": 0.8786452412605286, + "learning_rate": 5.4283396905599785e-06, + "loss": 0.5958, + "step": 7711 + }, + { + "epoch": 0.49, + "grad_norm": 0.9056409001350403, + "learning_rate": 5.427317459682076e-06, + "loss": 0.6015, + "step": 7712 + }, + { + "epoch": 0.49, + "grad_norm": 0.8648980855941772, + "learning_rate": 5.426295210811323e-06, + "loss": 0.5982, + "step": 7713 + }, + { + "epoch": 0.49, + "grad_norm": 0.8439405560493469, + "learning_rate": 5.425272943990761e-06, + "loss": 0.5646, + "step": 7714 + }, + { + "epoch": 0.49, + "grad_norm": 0.9676143527030945, + "learning_rate": 5.4242506592634354e-06, + "loss": 0.5852, + "step": 7715 + }, + { + "epoch": 0.49, + "grad_norm": 0.8673433661460876, + "learning_rate": 5.423228356672391e-06, + "loss": 0.5583, + "step": 7716 + }, + { + "epoch": 0.49, + "grad_norm": 0.878349244594574, + "learning_rate": 5.422206036260671e-06, + "loss": 0.5877, + "step": 7717 + }, + { + "epoch": 0.49, + "grad_norm": 0.863304853439331, + "learning_rate": 5.421183698071325e-06, + "loss": 0.633, + "step": 7718 + }, + { + "epoch": 0.49, + "grad_norm": 0.8977344036102295, + "learning_rate": 5.420161342147399e-06, + "loss": 0.5786, + "step": 7719 + }, + { + "epoch": 0.49, + "grad_norm": 0.9075315594673157, + "learning_rate": 5.4191389685319395e-06, + "loss": 0.6107, + "step": 7720 + }, + { + "epoch": 0.49, + "grad_norm": 0.8574069738388062, + "learning_rate": 5.4181165772679955e-06, + "loss": 0.5664, + "step": 7721 + }, + { + "epoch": 0.49, + "grad_norm": 0.8526822328567505, + "learning_rate": 5.417094168398618e-06, + "loss": 0.5912, + "step": 7722 + }, + { + "epoch": 0.49, + "grad_norm": 0.8845213055610657, + "learning_rate": 5.416071741966856e-06, + "loss": 0.6056, + "step": 7723 + }, + { + "epoch": 0.49, + "grad_norm": 0.8884814977645874, + "learning_rate": 5.41504929801576e-06, + "loss": 0.6047, + "step": 7724 + }, + { + "epoch": 0.49, + "grad_norm": 0.8512783050537109, + "learning_rate": 5.414026836588382e-06, + "loss": 0.5831, + "step": 7725 + }, + { + "epoch": 0.49, + "grad_norm": 0.8551806807518005, + "learning_rate": 5.413004357727775e-06, + "loss": 0.5865, + "step": 7726 + }, + { + "epoch": 0.49, + "grad_norm": 0.9021192789077759, + "learning_rate": 5.411981861476991e-06, + "loss": 0.5534, + "step": 7727 + }, + { + "epoch": 0.49, + "grad_norm": 0.8763885498046875, + "learning_rate": 5.4109593478790825e-06, + "loss": 0.544, + "step": 7728 + }, + { + "epoch": 0.49, + "grad_norm": 0.9107353687286377, + "learning_rate": 5.409936816977106e-06, + "loss": 0.654, + "step": 7729 + }, + { + "epoch": 0.49, + "grad_norm": 0.8590700626373291, + "learning_rate": 5.408914268814117e-06, + "loss": 0.594, + "step": 7730 + }, + { + "epoch": 0.49, + "grad_norm": 0.883228063583374, + "learning_rate": 5.4078917034331705e-06, + "loss": 0.5545, + "step": 7731 + }, + { + "epoch": 0.49, + "grad_norm": 0.8624158501625061, + "learning_rate": 5.4068691208773225e-06, + "loss": 0.5184, + "step": 7732 + }, + { + "epoch": 0.49, + "grad_norm": 0.8744218945503235, + "learning_rate": 5.405846521189632e-06, + "loss": 0.6158, + "step": 7733 + }, + { + "epoch": 0.49, + "grad_norm": 0.9129903316497803, + "learning_rate": 5.404823904413157e-06, + "loss": 0.5886, + "step": 7734 + }, + { + "epoch": 0.49, + "grad_norm": 0.8881204128265381, + "learning_rate": 5.403801270590955e-06, + "loss": 0.5861, + "step": 7735 + }, + { + "epoch": 0.49, + "grad_norm": 0.938651442527771, + "learning_rate": 5.402778619766086e-06, + "loss": 0.6119, + "step": 7736 + }, + { + "epoch": 0.49, + "grad_norm": 0.8968808650970459, + "learning_rate": 5.40175595198161e-06, + "loss": 0.6051, + "step": 7737 + }, + { + "epoch": 0.49, + "grad_norm": 0.8762619495391846, + "learning_rate": 5.400733267280589e-06, + "loss": 0.5904, + "step": 7738 + }, + { + "epoch": 0.49, + "grad_norm": 0.863160252571106, + "learning_rate": 5.399710565706084e-06, + "loss": 0.6209, + "step": 7739 + }, + { + "epoch": 0.49, + "grad_norm": 0.8458674550056458, + "learning_rate": 5.3986878473011585e-06, + "loss": 0.5892, + "step": 7740 + }, + { + "epoch": 0.49, + "grad_norm": 0.8740219473838806, + "learning_rate": 5.397665112108874e-06, + "loss": 0.5339, + "step": 7741 + }, + { + "epoch": 0.49, + "grad_norm": 0.8719754815101624, + "learning_rate": 5.3966423601722955e-06, + "loss": 0.5679, + "step": 7742 + }, + { + "epoch": 0.49, + "grad_norm": 1.0123850107192993, + "learning_rate": 5.3956195915344855e-06, + "loss": 0.6039, + "step": 7743 + }, + { + "epoch": 0.49, + "grad_norm": 0.9461089372634888, + "learning_rate": 5.394596806238511e-06, + "loss": 0.6337, + "step": 7744 + }, + { + "epoch": 0.49, + "grad_norm": 0.8724687695503235, + "learning_rate": 5.39357400432744e-06, + "loss": 0.524, + "step": 7745 + }, + { + "epoch": 0.49, + "grad_norm": 0.8834648728370667, + "learning_rate": 5.392551185844334e-06, + "loss": 0.6, + "step": 7746 + }, + { + "epoch": 0.49, + "grad_norm": 0.8506259322166443, + "learning_rate": 5.391528350832265e-06, + "loss": 0.5867, + "step": 7747 + }, + { + "epoch": 0.49, + "grad_norm": 0.9510423541069031, + "learning_rate": 5.3905054993342985e-06, + "loss": 0.6107, + "step": 7748 + }, + { + "epoch": 0.49, + "grad_norm": 0.8999704122543335, + "learning_rate": 5.389482631393504e-06, + "loss": 0.6091, + "step": 7749 + }, + { + "epoch": 0.49, + "grad_norm": 0.8855018019676208, + "learning_rate": 5.388459747052951e-06, + "loss": 0.5748, + "step": 7750 + }, + { + "epoch": 0.49, + "grad_norm": 0.8717223405838013, + "learning_rate": 5.387436846355709e-06, + "loss": 0.5826, + "step": 7751 + }, + { + "epoch": 0.49, + "grad_norm": 0.8774482607841492, + "learning_rate": 5.386413929344849e-06, + "loss": 0.5812, + "step": 7752 + }, + { + "epoch": 0.49, + "grad_norm": 0.96014004945755, + "learning_rate": 5.3853909960634446e-06, + "loss": 0.5428, + "step": 7753 + }, + { + "epoch": 0.49, + "grad_norm": 0.8995871543884277, + "learning_rate": 5.3843680465545635e-06, + "loss": 0.6025, + "step": 7754 + }, + { + "epoch": 0.49, + "grad_norm": 0.9755268096923828, + "learning_rate": 5.3833450808612816e-06, + "loss": 0.6194, + "step": 7755 + }, + { + "epoch": 0.49, + "grad_norm": 0.8455917835235596, + "learning_rate": 5.382322099026673e-06, + "loss": 0.5979, + "step": 7756 + }, + { + "epoch": 0.49, + "grad_norm": 0.85719895362854, + "learning_rate": 5.38129910109381e-06, + "loss": 0.5648, + "step": 7757 + }, + { + "epoch": 0.49, + "grad_norm": 0.8735889196395874, + "learning_rate": 5.380276087105769e-06, + "loss": 0.5679, + "step": 7758 + }, + { + "epoch": 0.49, + "grad_norm": 0.8770782351493835, + "learning_rate": 5.379253057105623e-06, + "loss": 0.5705, + "step": 7759 + }, + { + "epoch": 0.49, + "grad_norm": 0.8773306012153625, + "learning_rate": 5.378230011136453e-06, + "loss": 0.6444, + "step": 7760 + }, + { + "epoch": 0.49, + "grad_norm": 0.8813656568527222, + "learning_rate": 5.37720694924133e-06, + "loss": 0.5294, + "step": 7761 + }, + { + "epoch": 0.49, + "grad_norm": 0.8682379126548767, + "learning_rate": 5.376183871463336e-06, + "loss": 0.5526, + "step": 7762 + }, + { + "epoch": 0.49, + "grad_norm": 0.881206214427948, + "learning_rate": 5.375160777845548e-06, + "loss": 0.587, + "step": 7763 + }, + { + "epoch": 0.49, + "grad_norm": 0.8982335329055786, + "learning_rate": 5.3741376684310455e-06, + "loss": 0.5905, + "step": 7764 + }, + { + "epoch": 0.49, + "grad_norm": 0.9069334864616394, + "learning_rate": 5.3731145432629065e-06, + "loss": 0.6072, + "step": 7765 + }, + { + "epoch": 0.49, + "grad_norm": 0.900351881980896, + "learning_rate": 5.3720914023842105e-06, + "loss": 0.5809, + "step": 7766 + }, + { + "epoch": 0.49, + "grad_norm": 0.8970122337341309, + "learning_rate": 5.371068245838042e-06, + "loss": 0.6318, + "step": 7767 + }, + { + "epoch": 0.49, + "grad_norm": 0.854917049407959, + "learning_rate": 5.37004507366748e-06, + "loss": 0.595, + "step": 7768 + }, + { + "epoch": 0.49, + "grad_norm": 0.904750645160675, + "learning_rate": 5.369021885915607e-06, + "loss": 0.6008, + "step": 7769 + }, + { + "epoch": 0.49, + "grad_norm": 0.9690344333648682, + "learning_rate": 5.367998682625506e-06, + "loss": 0.6083, + "step": 7770 + }, + { + "epoch": 0.49, + "grad_norm": 0.829422116279602, + "learning_rate": 5.366975463840262e-06, + "loss": 0.5196, + "step": 7771 + }, + { + "epoch": 0.49, + "grad_norm": 0.8954147696495056, + "learning_rate": 5.365952229602956e-06, + "loss": 0.5388, + "step": 7772 + }, + { + "epoch": 0.49, + "grad_norm": 0.8637533187866211, + "learning_rate": 5.3649289799566766e-06, + "loss": 0.6015, + "step": 7773 + }, + { + "epoch": 0.49, + "grad_norm": 0.9479194283485413, + "learning_rate": 5.363905714944505e-06, + "loss": 0.5729, + "step": 7774 + }, + { + "epoch": 0.49, + "grad_norm": 0.9531112313270569, + "learning_rate": 5.362882434609531e-06, + "loss": 0.6434, + "step": 7775 + }, + { + "epoch": 0.49, + "grad_norm": 0.8943924307823181, + "learning_rate": 5.36185913899484e-06, + "loss": 0.576, + "step": 7776 + }, + { + "epoch": 0.49, + "grad_norm": 0.9110773801803589, + "learning_rate": 5.36083582814352e-06, + "loss": 0.6053, + "step": 7777 + }, + { + "epoch": 0.49, + "grad_norm": 0.8563951849937439, + "learning_rate": 5.359812502098657e-06, + "loss": 0.5905, + "step": 7778 + }, + { + "epoch": 0.49, + "grad_norm": 0.8632087707519531, + "learning_rate": 5.358789160903343e-06, + "loss": 0.5819, + "step": 7779 + }, + { + "epoch": 0.49, + "grad_norm": 0.8864629864692688, + "learning_rate": 5.357765804600664e-06, + "loss": 0.5689, + "step": 7780 + }, + { + "epoch": 0.49, + "grad_norm": 0.9528976678848267, + "learning_rate": 5.3567424332337125e-06, + "loss": 0.5989, + "step": 7781 + }, + { + "epoch": 0.49, + "grad_norm": 0.92073655128479, + "learning_rate": 5.355719046845577e-06, + "loss": 0.6029, + "step": 7782 + }, + { + "epoch": 0.49, + "grad_norm": 0.8833118677139282, + "learning_rate": 5.354695645479352e-06, + "loss": 0.5792, + "step": 7783 + }, + { + "epoch": 0.49, + "grad_norm": 0.8913655877113342, + "learning_rate": 5.353672229178125e-06, + "loss": 0.564, + "step": 7784 + }, + { + "epoch": 0.49, + "grad_norm": 0.8653914332389832, + "learning_rate": 5.352648797984993e-06, + "loss": 0.5835, + "step": 7785 + }, + { + "epoch": 0.49, + "grad_norm": 0.8871545791625977, + "learning_rate": 5.351625351943044e-06, + "loss": 0.6209, + "step": 7786 + }, + { + "epoch": 0.49, + "grad_norm": 0.888781726360321, + "learning_rate": 5.350601891095377e-06, + "loss": 0.5891, + "step": 7787 + }, + { + "epoch": 0.49, + "grad_norm": 0.7901937961578369, + "learning_rate": 5.349578415485085e-06, + "loss": 0.4945, + "step": 7788 + }, + { + "epoch": 0.49, + "grad_norm": 0.9174894094467163, + "learning_rate": 5.34855492515526e-06, + "loss": 0.5817, + "step": 7789 + }, + { + "epoch": 0.49, + "grad_norm": 0.9078687429428101, + "learning_rate": 5.347531420148999e-06, + "loss": 0.6333, + "step": 7790 + }, + { + "epoch": 0.49, + "grad_norm": 0.8857147097587585, + "learning_rate": 5.3465079005094e-06, + "loss": 0.5257, + "step": 7791 + }, + { + "epoch": 0.49, + "grad_norm": 0.8582876920700073, + "learning_rate": 5.34548436627956e-06, + "loss": 0.6155, + "step": 7792 + }, + { + "epoch": 0.49, + "grad_norm": 0.9454988837242126, + "learning_rate": 5.344460817502573e-06, + "loss": 0.6312, + "step": 7793 + }, + { + "epoch": 0.49, + "grad_norm": 0.8494389057159424, + "learning_rate": 5.34343725422154e-06, + "loss": 0.5916, + "step": 7794 + }, + { + "epoch": 0.49, + "grad_norm": 0.8725386261940002, + "learning_rate": 5.342413676479559e-06, + "loss": 0.5982, + "step": 7795 + }, + { + "epoch": 0.49, + "grad_norm": 0.890536904335022, + "learning_rate": 5.34139008431973e-06, + "loss": 0.5657, + "step": 7796 + }, + { + "epoch": 0.49, + "grad_norm": 0.8989502787590027, + "learning_rate": 5.34036647778515e-06, + "loss": 0.6311, + "step": 7797 + }, + { + "epoch": 0.49, + "grad_norm": 0.8554948568344116, + "learning_rate": 5.3393428569189235e-06, + "loss": 0.5428, + "step": 7798 + }, + { + "epoch": 0.49, + "grad_norm": 0.8485680818557739, + "learning_rate": 5.338319221764149e-06, + "loss": 0.5813, + "step": 7799 + }, + { + "epoch": 0.49, + "grad_norm": 0.8714006543159485, + "learning_rate": 5.33729557236393e-06, + "loss": 0.6127, + "step": 7800 + }, + { + "epoch": 0.49, + "grad_norm": 0.8680577874183655, + "learning_rate": 5.336271908761367e-06, + "loss": 0.6045, + "step": 7801 + }, + { + "epoch": 0.49, + "grad_norm": 0.8042650818824768, + "learning_rate": 5.335248230999565e-06, + "loss": 0.5189, + "step": 7802 + }, + { + "epoch": 0.49, + "grad_norm": 0.9379438757896423, + "learning_rate": 5.334224539121625e-06, + "loss": 0.5976, + "step": 7803 + }, + { + "epoch": 0.49, + "grad_norm": 0.8921198844909668, + "learning_rate": 5.333200833170652e-06, + "loss": 0.5507, + "step": 7804 + }, + { + "epoch": 0.49, + "grad_norm": 0.8879731893539429, + "learning_rate": 5.332177113189751e-06, + "loss": 0.6268, + "step": 7805 + }, + { + "epoch": 0.49, + "grad_norm": 0.8605756759643555, + "learning_rate": 5.331153379222028e-06, + "loss": 0.6194, + "step": 7806 + }, + { + "epoch": 0.49, + "grad_norm": 0.9379689693450928, + "learning_rate": 5.330129631310589e-06, + "loss": 0.6294, + "step": 7807 + }, + { + "epoch": 0.49, + "grad_norm": 0.8884453177452087, + "learning_rate": 5.3291058694985385e-06, + "loss": 0.6167, + "step": 7808 + }, + { + "epoch": 0.49, + "grad_norm": 0.8566985726356506, + "learning_rate": 5.328082093828984e-06, + "loss": 0.6185, + "step": 7809 + }, + { + "epoch": 0.49, + "grad_norm": 0.8915068507194519, + "learning_rate": 5.327058304345035e-06, + "loss": 0.5716, + "step": 7810 + }, + { + "epoch": 0.49, + "grad_norm": 0.8683719635009766, + "learning_rate": 5.3260345010898e-06, + "loss": 0.5944, + "step": 7811 + }, + { + "epoch": 0.49, + "grad_norm": 0.9614280462265015, + "learning_rate": 5.325010684106384e-06, + "loss": 0.5791, + "step": 7812 + }, + { + "epoch": 0.49, + "grad_norm": 0.8619272708892822, + "learning_rate": 5.323986853437899e-06, + "loss": 0.5457, + "step": 7813 + }, + { + "epoch": 0.5, + "grad_norm": 0.9838071465492249, + "learning_rate": 5.322963009127454e-06, + "loss": 0.618, + "step": 7814 + }, + { + "epoch": 0.5, + "grad_norm": 0.8671537637710571, + "learning_rate": 5.321939151218163e-06, + "loss": 0.5641, + "step": 7815 + }, + { + "epoch": 0.5, + "grad_norm": 0.8439149856567383, + "learning_rate": 5.320915279753132e-06, + "loss": 0.592, + "step": 7816 + }, + { + "epoch": 0.5, + "grad_norm": 0.8067424297332764, + "learning_rate": 5.319891394775475e-06, + "loss": 0.5262, + "step": 7817 + }, + { + "epoch": 0.5, + "grad_norm": 0.979844331741333, + "learning_rate": 5.3188674963283064e-06, + "loss": 0.5845, + "step": 7818 + }, + { + "epoch": 0.5, + "grad_norm": 0.8906669020652771, + "learning_rate": 5.317843584454734e-06, + "loss": 0.5639, + "step": 7819 + }, + { + "epoch": 0.5, + "grad_norm": 0.9191656112670898, + "learning_rate": 5.316819659197875e-06, + "loss": 0.5816, + "step": 7820 + }, + { + "epoch": 0.5, + "grad_norm": 0.917048990726471, + "learning_rate": 5.315795720600842e-06, + "loss": 0.5991, + "step": 7821 + }, + { + "epoch": 0.5, + "grad_norm": 0.8632756471633911, + "learning_rate": 5.314771768706751e-06, + "loss": 0.5967, + "step": 7822 + }, + { + "epoch": 0.5, + "grad_norm": 0.9136775732040405, + "learning_rate": 5.313747803558714e-06, + "loss": 0.5945, + "step": 7823 + }, + { + "epoch": 0.5, + "grad_norm": 0.84110426902771, + "learning_rate": 5.312723825199849e-06, + "loss": 0.6218, + "step": 7824 + }, + { + "epoch": 0.5, + "grad_norm": 0.9239146113395691, + "learning_rate": 5.311699833673273e-06, + "loss": 0.6421, + "step": 7825 + }, + { + "epoch": 0.5, + "grad_norm": 0.9365952014923096, + "learning_rate": 5.310675829022101e-06, + "loss": 0.5668, + "step": 7826 + }, + { + "epoch": 0.5, + "grad_norm": 0.9483537673950195, + "learning_rate": 5.309651811289449e-06, + "loss": 0.5903, + "step": 7827 + }, + { + "epoch": 0.5, + "grad_norm": 0.9416823387145996, + "learning_rate": 5.308627780518437e-06, + "loss": 0.5688, + "step": 7828 + }, + { + "epoch": 0.5, + "grad_norm": 0.9334666728973389, + "learning_rate": 5.307603736752183e-06, + "loss": 0.5561, + "step": 7829 + }, + { + "epoch": 0.5, + "grad_norm": 0.8541433215141296, + "learning_rate": 5.306579680033807e-06, + "loss": 0.592, + "step": 7830 + }, + { + "epoch": 0.5, + "grad_norm": 0.9449893832206726, + "learning_rate": 5.305555610406425e-06, + "loss": 0.6002, + "step": 7831 + }, + { + "epoch": 0.5, + "grad_norm": 0.8886929154396057, + "learning_rate": 5.30453152791316e-06, + "loss": 0.6695, + "step": 7832 + }, + { + "epoch": 0.5, + "grad_norm": 0.8141634464263916, + "learning_rate": 5.303507432597134e-06, + "loss": 0.5946, + "step": 7833 + }, + { + "epoch": 0.5, + "grad_norm": 0.8616921901702881, + "learning_rate": 5.302483324501463e-06, + "loss": 0.6024, + "step": 7834 + }, + { + "epoch": 0.5, + "grad_norm": 0.8720713257789612, + "learning_rate": 5.3014592036692715e-06, + "loss": 0.5934, + "step": 7835 + }, + { + "epoch": 0.5, + "grad_norm": 0.954289436340332, + "learning_rate": 5.300435070143683e-06, + "loss": 0.5998, + "step": 7836 + }, + { + "epoch": 0.5, + "grad_norm": 0.8757979273796082, + "learning_rate": 5.2994109239678185e-06, + "loss": 0.6295, + "step": 7837 + }, + { + "epoch": 0.5, + "grad_norm": 0.9314550161361694, + "learning_rate": 5.298386765184801e-06, + "loss": 0.6031, + "step": 7838 + }, + { + "epoch": 0.5, + "grad_norm": 0.8398404121398926, + "learning_rate": 5.297362593837755e-06, + "loss": 0.5565, + "step": 7839 + }, + { + "epoch": 0.5, + "grad_norm": 0.8812541365623474, + "learning_rate": 5.296338409969805e-06, + "loss": 0.5657, + "step": 7840 + }, + { + "epoch": 0.5, + "grad_norm": 0.8774970173835754, + "learning_rate": 5.295314213624076e-06, + "loss": 0.5786, + "step": 7841 + }, + { + "epoch": 0.5, + "grad_norm": 0.8290955424308777, + "learning_rate": 5.2942900048436914e-06, + "loss": 0.5563, + "step": 7842 + }, + { + "epoch": 0.5, + "grad_norm": 0.9258725047111511, + "learning_rate": 5.293265783671778e-06, + "loss": 0.5809, + "step": 7843 + }, + { + "epoch": 0.5, + "grad_norm": 0.8245546817779541, + "learning_rate": 5.292241550151465e-06, + "loss": 0.6013, + "step": 7844 + }, + { + "epoch": 0.5, + "grad_norm": 0.9611520767211914, + "learning_rate": 5.291217304325875e-06, + "loss": 0.6349, + "step": 7845 + }, + { + "epoch": 0.5, + "grad_norm": 0.89043790102005, + "learning_rate": 5.290193046238139e-06, + "loss": 0.6121, + "step": 7846 + }, + { + "epoch": 0.5, + "grad_norm": 0.9023299217224121, + "learning_rate": 5.289168775931381e-06, + "loss": 0.5966, + "step": 7847 + }, + { + "epoch": 0.5, + "grad_norm": 0.881334125995636, + "learning_rate": 5.288144493448733e-06, + "loss": 0.5643, + "step": 7848 + }, + { + "epoch": 0.5, + "grad_norm": 0.9082907438278198, + "learning_rate": 5.287120198833324e-06, + "loss": 0.5767, + "step": 7849 + }, + { + "epoch": 0.5, + "grad_norm": 0.9419313073158264, + "learning_rate": 5.286095892128282e-06, + "loss": 0.6172, + "step": 7850 + }, + { + "epoch": 0.5, + "grad_norm": 0.9367068409919739, + "learning_rate": 5.285071573376735e-06, + "loss": 0.6101, + "step": 7851 + }, + { + "epoch": 0.5, + "grad_norm": 0.9079290628433228, + "learning_rate": 5.2840472426218185e-06, + "loss": 0.589, + "step": 7852 + }, + { + "epoch": 0.5, + "grad_norm": 0.948851466178894, + "learning_rate": 5.283022899906659e-06, + "loss": 0.6335, + "step": 7853 + }, + { + "epoch": 0.5, + "grad_norm": 0.921149492263794, + "learning_rate": 5.28199854527439e-06, + "loss": 0.5849, + "step": 7854 + }, + { + "epoch": 0.5, + "grad_norm": 0.8445439338684082, + "learning_rate": 5.280974178768144e-06, + "loss": 0.54, + "step": 7855 + }, + { + "epoch": 0.5, + "grad_norm": 0.9971843361854553, + "learning_rate": 5.279949800431052e-06, + "loss": 0.6041, + "step": 7856 + }, + { + "epoch": 0.5, + "grad_norm": 0.8367643356323242, + "learning_rate": 5.278925410306248e-06, + "loss": 0.5955, + "step": 7857 + }, + { + "epoch": 0.5, + "grad_norm": 0.8564116358757019, + "learning_rate": 5.277901008436865e-06, + "loss": 0.5757, + "step": 7858 + }, + { + "epoch": 0.5, + "grad_norm": 0.8856030702590942, + "learning_rate": 5.276876594866037e-06, + "loss": 0.5963, + "step": 7859 + }, + { + "epoch": 0.5, + "grad_norm": 0.8912267088890076, + "learning_rate": 5.2758521696369e-06, + "loss": 0.5597, + "step": 7860 + }, + { + "epoch": 0.5, + "grad_norm": 0.8481583595275879, + "learning_rate": 5.274827732792587e-06, + "loss": 0.5526, + "step": 7861 + }, + { + "epoch": 0.5, + "grad_norm": 0.9108606576919556, + "learning_rate": 5.273803284376234e-06, + "loss": 0.6079, + "step": 7862 + }, + { + "epoch": 0.5, + "grad_norm": 0.9559755921363831, + "learning_rate": 5.272778824430977e-06, + "loss": 0.6008, + "step": 7863 + }, + { + "epoch": 0.5, + "grad_norm": 0.8783113360404968, + "learning_rate": 5.271754352999953e-06, + "loss": 0.6102, + "step": 7864 + }, + { + "epoch": 0.5, + "grad_norm": 0.8379794359207153, + "learning_rate": 5.2707298701263e-06, + "loss": 0.5744, + "step": 7865 + }, + { + "epoch": 0.5, + "grad_norm": 0.8685166835784912, + "learning_rate": 5.269705375853151e-06, + "loss": 0.604, + "step": 7866 + }, + { + "epoch": 0.5, + "grad_norm": 0.875748336315155, + "learning_rate": 5.26868087022365e-06, + "loss": 0.6116, + "step": 7867 + }, + { + "epoch": 0.5, + "grad_norm": 0.890408992767334, + "learning_rate": 5.26765635328093e-06, + "loss": 0.567, + "step": 7868 + }, + { + "epoch": 0.5, + "grad_norm": 0.9582130312919617, + "learning_rate": 5.266631825068134e-06, + "loss": 0.6553, + "step": 7869 + }, + { + "epoch": 0.5, + "grad_norm": 0.888396143913269, + "learning_rate": 5.265607285628397e-06, + "loss": 0.611, + "step": 7870 + }, + { + "epoch": 0.5, + "grad_norm": 0.869216799736023, + "learning_rate": 5.264582735004863e-06, + "loss": 0.5906, + "step": 7871 + }, + { + "epoch": 0.5, + "grad_norm": 0.8864418864250183, + "learning_rate": 5.26355817324067e-06, + "loss": 0.6488, + "step": 7872 + }, + { + "epoch": 0.5, + "grad_norm": 0.8367258906364441, + "learning_rate": 5.26253360037896e-06, + "loss": 0.6189, + "step": 7873 + }, + { + "epoch": 0.5, + "grad_norm": 0.8717927932739258, + "learning_rate": 5.2615090164628705e-06, + "loss": 0.6071, + "step": 7874 + }, + { + "epoch": 0.5, + "grad_norm": 0.8906144499778748, + "learning_rate": 5.2604844215355484e-06, + "loss": 0.5616, + "step": 7875 + }, + { + "epoch": 0.5, + "grad_norm": 0.9154402017593384, + "learning_rate": 5.259459815640133e-06, + "loss": 0.6081, + "step": 7876 + }, + { + "epoch": 0.5, + "grad_norm": 0.9059274792671204, + "learning_rate": 5.258435198819768e-06, + "loss": 0.6212, + "step": 7877 + }, + { + "epoch": 0.5, + "grad_norm": 0.9382339715957642, + "learning_rate": 5.257410571117594e-06, + "loss": 0.6418, + "step": 7878 + }, + { + "epoch": 0.5, + "grad_norm": 0.8434200882911682, + "learning_rate": 5.256385932576759e-06, + "loss": 0.5638, + "step": 7879 + }, + { + "epoch": 0.5, + "grad_norm": 0.8744908571243286, + "learning_rate": 5.255361283240402e-06, + "loss": 0.5436, + "step": 7880 + }, + { + "epoch": 0.5, + "grad_norm": 0.8957458138465881, + "learning_rate": 5.254336623151672e-06, + "loss": 0.6203, + "step": 7881 + }, + { + "epoch": 0.5, + "grad_norm": 0.8486526608467102, + "learning_rate": 5.253311952353708e-06, + "loss": 0.5835, + "step": 7882 + }, + { + "epoch": 0.5, + "grad_norm": 0.9576562643051147, + "learning_rate": 5.252287270889661e-06, + "loss": 0.6513, + "step": 7883 + }, + { + "epoch": 0.5, + "grad_norm": 0.85997474193573, + "learning_rate": 5.251262578802675e-06, + "loss": 0.5634, + "step": 7884 + }, + { + "epoch": 0.5, + "grad_norm": 0.87550950050354, + "learning_rate": 5.250237876135895e-06, + "loss": 0.6243, + "step": 7885 + }, + { + "epoch": 0.5, + "grad_norm": 0.8734540343284607, + "learning_rate": 5.2492131629324695e-06, + "loss": 0.5659, + "step": 7886 + }, + { + "epoch": 0.5, + "grad_norm": 0.8869773745536804, + "learning_rate": 5.248188439235544e-06, + "loss": 0.578, + "step": 7887 + }, + { + "epoch": 0.5, + "grad_norm": 0.8749696016311646, + "learning_rate": 5.247163705088267e-06, + "loss": 0.564, + "step": 7888 + }, + { + "epoch": 0.5, + "grad_norm": 0.8944323658943176, + "learning_rate": 5.246138960533786e-06, + "loss": 0.6297, + "step": 7889 + }, + { + "epoch": 0.5, + "grad_norm": 0.9447425603866577, + "learning_rate": 5.245114205615249e-06, + "loss": 0.5535, + "step": 7890 + }, + { + "epoch": 0.5, + "grad_norm": 0.8836696743965149, + "learning_rate": 5.244089440375807e-06, + "loss": 0.5838, + "step": 7891 + }, + { + "epoch": 0.5, + "grad_norm": 0.8536423444747925, + "learning_rate": 5.243064664858607e-06, + "loss": 0.5484, + "step": 7892 + }, + { + "epoch": 0.5, + "grad_norm": 0.9463775157928467, + "learning_rate": 5.242039879106799e-06, + "loss": 0.631, + "step": 7893 + }, + { + "epoch": 0.5, + "grad_norm": 0.9138554334640503, + "learning_rate": 5.241015083163534e-06, + "loss": 0.6952, + "step": 7894 + }, + { + "epoch": 0.5, + "grad_norm": 0.8552803993225098, + "learning_rate": 5.239990277071962e-06, + "loss": 0.625, + "step": 7895 + }, + { + "epoch": 0.5, + "grad_norm": 0.894889235496521, + "learning_rate": 5.238965460875236e-06, + "loss": 0.5667, + "step": 7896 + }, + { + "epoch": 0.5, + "grad_norm": 0.8741210699081421, + "learning_rate": 5.237940634616504e-06, + "loss": 0.5868, + "step": 7897 + }, + { + "epoch": 0.5, + "grad_norm": 0.8499166965484619, + "learning_rate": 5.2369157983389205e-06, + "loss": 0.6187, + "step": 7898 + }, + { + "epoch": 0.5, + "grad_norm": 0.9158671498298645, + "learning_rate": 5.235890952085637e-06, + "loss": 0.5634, + "step": 7899 + }, + { + "epoch": 0.5, + "grad_norm": 0.8855353593826294, + "learning_rate": 5.234866095899806e-06, + "loss": 0.5651, + "step": 7900 + }, + { + "epoch": 0.5, + "grad_norm": 0.9134857654571533, + "learning_rate": 5.23384122982458e-06, + "loss": 0.588, + "step": 7901 + }, + { + "epoch": 0.5, + "grad_norm": 0.9252248406410217, + "learning_rate": 5.232816353903113e-06, + "loss": 0.6017, + "step": 7902 + }, + { + "epoch": 0.5, + "grad_norm": 0.8008279800415039, + "learning_rate": 5.231791468178561e-06, + "loss": 0.5136, + "step": 7903 + }, + { + "epoch": 0.5, + "grad_norm": 0.8626922965049744, + "learning_rate": 5.230766572694075e-06, + "loss": 0.5724, + "step": 7904 + }, + { + "epoch": 0.5, + "grad_norm": 0.9324626326560974, + "learning_rate": 5.229741667492811e-06, + "loss": 0.6267, + "step": 7905 + }, + { + "epoch": 0.5, + "grad_norm": 0.8620643615722656, + "learning_rate": 5.228716752617926e-06, + "loss": 0.5924, + "step": 7906 + }, + { + "epoch": 0.5, + "grad_norm": 0.8927160501480103, + "learning_rate": 5.2276918281125744e-06, + "loss": 0.6103, + "step": 7907 + }, + { + "epoch": 0.5, + "grad_norm": 0.8659266233444214, + "learning_rate": 5.22666689401991e-06, + "loss": 0.5934, + "step": 7908 + }, + { + "epoch": 0.5, + "grad_norm": 0.8656795620918274, + "learning_rate": 5.225641950383094e-06, + "loss": 0.6328, + "step": 7909 + }, + { + "epoch": 0.5, + "grad_norm": 0.881079375743866, + "learning_rate": 5.2246169972452775e-06, + "loss": 0.6129, + "step": 7910 + }, + { + "epoch": 0.5, + "grad_norm": 0.8573868870735168, + "learning_rate": 5.223592034649624e-06, + "loss": 0.5608, + "step": 7911 + }, + { + "epoch": 0.5, + "grad_norm": 0.8543702960014343, + "learning_rate": 5.2225670626392845e-06, + "loss": 0.5469, + "step": 7912 + }, + { + "epoch": 0.5, + "grad_norm": 0.8963991403579712, + "learning_rate": 5.221542081257421e-06, + "loss": 0.6221, + "step": 7913 + }, + { + "epoch": 0.5, + "grad_norm": 0.9009084105491638, + "learning_rate": 5.220517090547194e-06, + "loss": 0.5719, + "step": 7914 + }, + { + "epoch": 0.5, + "grad_norm": 0.9578242301940918, + "learning_rate": 5.219492090551757e-06, + "loss": 0.6152, + "step": 7915 + }, + { + "epoch": 0.5, + "grad_norm": 0.9097537398338318, + "learning_rate": 5.21846708131427e-06, + "loss": 0.6263, + "step": 7916 + }, + { + "epoch": 0.5, + "grad_norm": 0.932669997215271, + "learning_rate": 5.217442062877897e-06, + "loss": 0.5925, + "step": 7917 + }, + { + "epoch": 0.5, + "grad_norm": 0.8461833000183105, + "learning_rate": 5.216417035285795e-06, + "loss": 0.6158, + "step": 7918 + }, + { + "epoch": 0.5, + "grad_norm": 0.9005031585693359, + "learning_rate": 5.215391998581123e-06, + "loss": 0.6002, + "step": 7919 + }, + { + "epoch": 0.5, + "grad_norm": 0.8439646363258362, + "learning_rate": 5.214366952807043e-06, + "loss": 0.5732, + "step": 7920 + }, + { + "epoch": 0.5, + "grad_norm": 0.839756190776825, + "learning_rate": 5.213341898006718e-06, + "loss": 0.5291, + "step": 7921 + }, + { + "epoch": 0.5, + "grad_norm": 0.8541595935821533, + "learning_rate": 5.212316834223307e-06, + "loss": 0.6199, + "step": 7922 + }, + { + "epoch": 0.5, + "grad_norm": 0.8544859886169434, + "learning_rate": 5.211291761499973e-06, + "loss": 0.558, + "step": 7923 + }, + { + "epoch": 0.5, + "grad_norm": 0.8676169514656067, + "learning_rate": 5.210266679879877e-06, + "loss": 0.5531, + "step": 7924 + }, + { + "epoch": 0.5, + "grad_norm": 0.9017534255981445, + "learning_rate": 5.209241589406183e-06, + "loss": 0.5912, + "step": 7925 + }, + { + "epoch": 0.5, + "grad_norm": 0.8735457062721252, + "learning_rate": 5.208216490122055e-06, + "loss": 0.5727, + "step": 7926 + }, + { + "epoch": 0.5, + "grad_norm": 0.9029328227043152, + "learning_rate": 5.207191382070653e-06, + "loss": 0.5819, + "step": 7927 + }, + { + "epoch": 0.5, + "grad_norm": 0.9156153202056885, + "learning_rate": 5.206166265295143e-06, + "loss": 0.5943, + "step": 7928 + }, + { + "epoch": 0.5, + "grad_norm": 0.8806928396224976, + "learning_rate": 5.205141139838691e-06, + "loss": 0.5618, + "step": 7929 + }, + { + "epoch": 0.5, + "grad_norm": 0.903069257736206, + "learning_rate": 5.204116005744456e-06, + "loss": 0.5822, + "step": 7930 + }, + { + "epoch": 0.5, + "grad_norm": 0.9287469983100891, + "learning_rate": 5.2030908630556075e-06, + "loss": 0.6082, + "step": 7931 + }, + { + "epoch": 0.5, + "grad_norm": 0.8750594258308411, + "learning_rate": 5.202065711815309e-06, + "loss": 0.5648, + "step": 7932 + }, + { + "epoch": 0.5, + "grad_norm": 0.8411305546760559, + "learning_rate": 5.201040552066727e-06, + "loss": 0.5076, + "step": 7933 + }, + { + "epoch": 0.5, + "grad_norm": 0.9401187896728516, + "learning_rate": 5.200015383853026e-06, + "loss": 0.5915, + "step": 7934 + }, + { + "epoch": 0.5, + "grad_norm": 0.8993878364562988, + "learning_rate": 5.1989902072173735e-06, + "loss": 0.6175, + "step": 7935 + }, + { + "epoch": 0.5, + "grad_norm": 0.9325996041297913, + "learning_rate": 5.197965022202935e-06, + "loss": 0.5977, + "step": 7936 + }, + { + "epoch": 0.5, + "grad_norm": 0.8501147627830505, + "learning_rate": 5.196939828852879e-06, + "loss": 0.5955, + "step": 7937 + }, + { + "epoch": 0.5, + "grad_norm": 0.8839433789253235, + "learning_rate": 5.195914627210372e-06, + "loss": 0.5685, + "step": 7938 + }, + { + "epoch": 0.5, + "grad_norm": 0.8878698945045471, + "learning_rate": 5.19488941731858e-06, + "loss": 0.6122, + "step": 7939 + }, + { + "epoch": 0.5, + "grad_norm": 0.8705379366874695, + "learning_rate": 5.193864199220674e-06, + "loss": 0.5531, + "step": 7940 + }, + { + "epoch": 0.5, + "grad_norm": 0.8977400064468384, + "learning_rate": 5.192838972959821e-06, + "loss": 0.6069, + "step": 7941 + }, + { + "epoch": 0.5, + "grad_norm": 0.8894720673561096, + "learning_rate": 5.19181373857919e-06, + "loss": 0.5976, + "step": 7942 + }, + { + "epoch": 0.5, + "grad_norm": 0.8529515862464905, + "learning_rate": 5.190788496121948e-06, + "loss": 0.5842, + "step": 7943 + }, + { + "epoch": 0.5, + "grad_norm": 0.8185912370681763, + "learning_rate": 5.189763245631268e-06, + "loss": 0.5169, + "step": 7944 + }, + { + "epoch": 0.5, + "grad_norm": 0.8798929452896118, + "learning_rate": 5.188737987150316e-06, + "loss": 0.6247, + "step": 7945 + }, + { + "epoch": 0.5, + "grad_norm": 0.8841909766197205, + "learning_rate": 5.1877127207222666e-06, + "loss": 0.5642, + "step": 7946 + }, + { + "epoch": 0.5, + "grad_norm": 0.8578714728355408, + "learning_rate": 5.186687446390284e-06, + "loss": 0.5656, + "step": 7947 + }, + { + "epoch": 0.5, + "grad_norm": 0.8991813659667969, + "learning_rate": 5.185662164197546e-06, + "loss": 0.5925, + "step": 7948 + }, + { + "epoch": 0.5, + "grad_norm": 0.8656896352767944, + "learning_rate": 5.184636874187218e-06, + "loss": 0.5762, + "step": 7949 + }, + { + "epoch": 0.5, + "grad_norm": 0.9480549097061157, + "learning_rate": 5.183611576402474e-06, + "loss": 0.5916, + "step": 7950 + }, + { + "epoch": 0.5, + "grad_norm": 0.8683533072471619, + "learning_rate": 5.182586270886485e-06, + "loss": 0.6007, + "step": 7951 + }, + { + "epoch": 0.5, + "grad_norm": 0.8761510848999023, + "learning_rate": 5.181560957682423e-06, + "loss": 0.5939, + "step": 7952 + }, + { + "epoch": 0.5, + "grad_norm": 0.8311535716056824, + "learning_rate": 5.180535636833462e-06, + "loss": 0.5621, + "step": 7953 + }, + { + "epoch": 0.5, + "grad_norm": 0.859836995601654, + "learning_rate": 5.179510308382773e-06, + "loss": 0.5844, + "step": 7954 + }, + { + "epoch": 0.5, + "grad_norm": 0.897769033908844, + "learning_rate": 5.178484972373528e-06, + "loss": 0.6163, + "step": 7955 + }, + { + "epoch": 0.5, + "grad_norm": 0.8741475343704224, + "learning_rate": 5.177459628848903e-06, + "loss": 0.6248, + "step": 7956 + }, + { + "epoch": 0.5, + "grad_norm": 0.8983214497566223, + "learning_rate": 5.17643427785207e-06, + "loss": 0.6236, + "step": 7957 + }, + { + "epoch": 0.5, + "grad_norm": 0.8339930772781372, + "learning_rate": 5.175408919426204e-06, + "loss": 0.5713, + "step": 7958 + }, + { + "epoch": 0.5, + "grad_norm": 0.890082061290741, + "learning_rate": 5.174383553614478e-06, + "loss": 0.5438, + "step": 7959 + }, + { + "epoch": 0.5, + "grad_norm": 0.8514465689659119, + "learning_rate": 5.1733581804600674e-06, + "loss": 0.5949, + "step": 7960 + }, + { + "epoch": 0.5, + "grad_norm": 0.9061854481697083, + "learning_rate": 5.172332800006147e-06, + "loss": 0.6432, + "step": 7961 + }, + { + "epoch": 0.5, + "grad_norm": 0.8398959636688232, + "learning_rate": 5.171307412295892e-06, + "loss": 0.5946, + "step": 7962 + }, + { + "epoch": 0.5, + "grad_norm": 0.8187358379364014, + "learning_rate": 5.1702820173724766e-06, + "loss": 0.5614, + "step": 7963 + }, + { + "epoch": 0.5, + "grad_norm": 0.9887537360191345, + "learning_rate": 5.169256615279078e-06, + "loss": 0.6059, + "step": 7964 + }, + { + "epoch": 0.5, + "grad_norm": 0.8645609617233276, + "learning_rate": 5.168231206058874e-06, + "loss": 0.5553, + "step": 7965 + }, + { + "epoch": 0.5, + "grad_norm": 0.8241131901741028, + "learning_rate": 5.167205789755037e-06, + "loss": 0.5472, + "step": 7966 + }, + { + "epoch": 0.5, + "grad_norm": 0.8981542587280273, + "learning_rate": 5.1661803664107465e-06, + "loss": 0.5675, + "step": 7967 + }, + { + "epoch": 0.5, + "grad_norm": 0.8625651001930237, + "learning_rate": 5.16515493606918e-06, + "loss": 0.5916, + "step": 7968 + }, + { + "epoch": 0.5, + "grad_norm": 0.8327503800392151, + "learning_rate": 5.164129498773513e-06, + "loss": 0.5837, + "step": 7969 + }, + { + "epoch": 0.5, + "grad_norm": 0.8808488845825195, + "learning_rate": 5.163104054566922e-06, + "loss": 0.6029, + "step": 7970 + }, + { + "epoch": 0.51, + "grad_norm": 0.8956292867660522, + "learning_rate": 5.16207860349259e-06, + "loss": 0.5893, + "step": 7971 + }, + { + "epoch": 0.51, + "grad_norm": 0.8336197137832642, + "learning_rate": 5.16105314559369e-06, + "loss": 0.5671, + "step": 7972 + }, + { + "epoch": 0.51, + "grad_norm": 0.8766692280769348, + "learning_rate": 5.160027680913402e-06, + "loss": 0.5806, + "step": 7973 + }, + { + "epoch": 0.51, + "grad_norm": 0.8673431873321533, + "learning_rate": 5.159002209494905e-06, + "loss": 0.6534, + "step": 7974 + }, + { + "epoch": 0.51, + "grad_norm": 0.8875123858451843, + "learning_rate": 5.157976731381379e-06, + "loss": 0.5969, + "step": 7975 + }, + { + "epoch": 0.51, + "grad_norm": 0.9223279356956482, + "learning_rate": 5.1569512466160025e-06, + "loss": 0.6288, + "step": 7976 + }, + { + "epoch": 0.51, + "grad_norm": 0.9694954752922058, + "learning_rate": 5.155925755241954e-06, + "loss": 0.6392, + "step": 7977 + }, + { + "epoch": 0.51, + "grad_norm": 0.9013630151748657, + "learning_rate": 5.1549002573024144e-06, + "loss": 0.5671, + "step": 7978 + }, + { + "epoch": 0.51, + "grad_norm": 0.8672821521759033, + "learning_rate": 5.153874752840564e-06, + "loss": 0.5583, + "step": 7979 + }, + { + "epoch": 0.51, + "grad_norm": 1.076423168182373, + "learning_rate": 5.152849241899585e-06, + "loss": 0.5713, + "step": 7980 + }, + { + "epoch": 0.51, + "grad_norm": 0.9117089509963989, + "learning_rate": 5.151823724522653e-06, + "loss": 0.5954, + "step": 7981 + }, + { + "epoch": 0.51, + "grad_norm": 0.8285648226737976, + "learning_rate": 5.150798200752953e-06, + "loss": 0.5856, + "step": 7982 + }, + { + "epoch": 0.51, + "grad_norm": 0.8754099607467651, + "learning_rate": 5.149772670633666e-06, + "loss": 0.5748, + "step": 7983 + }, + { + "epoch": 0.51, + "grad_norm": 0.8837385177612305, + "learning_rate": 5.148747134207974e-06, + "loss": 0.5615, + "step": 7984 + }, + { + "epoch": 0.51, + "grad_norm": 0.8902435302734375, + "learning_rate": 5.147721591519056e-06, + "loss": 0.5814, + "step": 7985 + }, + { + "epoch": 0.51, + "grad_norm": 0.8963085412979126, + "learning_rate": 5.146696042610095e-06, + "loss": 0.5477, + "step": 7986 + }, + { + "epoch": 0.51, + "grad_norm": 0.9228818416595459, + "learning_rate": 5.145670487524276e-06, + "loss": 0.6119, + "step": 7987 + }, + { + "epoch": 0.51, + "grad_norm": 0.9034307599067688, + "learning_rate": 5.144644926304778e-06, + "loss": 0.6482, + "step": 7988 + }, + { + "epoch": 0.51, + "grad_norm": 0.9602980017662048, + "learning_rate": 5.1436193589947855e-06, + "loss": 0.5889, + "step": 7989 + }, + { + "epoch": 0.51, + "grad_norm": 0.9697549939155579, + "learning_rate": 5.1425937856374816e-06, + "loss": 0.6406, + "step": 7990 + }, + { + "epoch": 0.51, + "grad_norm": 0.8972442746162415, + "learning_rate": 5.141568206276051e-06, + "loss": 0.6258, + "step": 7991 + }, + { + "epoch": 0.51, + "grad_norm": 0.8347691297531128, + "learning_rate": 5.140542620953675e-06, + "loss": 0.5349, + "step": 7992 + }, + { + "epoch": 0.51, + "grad_norm": 1.0211116075515747, + "learning_rate": 5.139517029713537e-06, + "loss": 0.665, + "step": 7993 + }, + { + "epoch": 0.51, + "grad_norm": 0.8696901202201843, + "learning_rate": 5.138491432598822e-06, + "loss": 0.5532, + "step": 7994 + }, + { + "epoch": 0.51, + "grad_norm": 0.9338617920875549, + "learning_rate": 5.137465829652716e-06, + "loss": 0.5866, + "step": 7995 + }, + { + "epoch": 0.51, + "grad_norm": 0.9527667760848999, + "learning_rate": 5.136440220918401e-06, + "loss": 0.5829, + "step": 7996 + }, + { + "epoch": 0.51, + "grad_norm": 0.9329034686088562, + "learning_rate": 5.135414606439063e-06, + "loss": 0.6293, + "step": 7997 + }, + { + "epoch": 0.51, + "grad_norm": 0.924534797668457, + "learning_rate": 5.134388986257887e-06, + "loss": 0.5911, + "step": 7998 + }, + { + "epoch": 0.51, + "grad_norm": 0.8370699286460876, + "learning_rate": 5.133363360418059e-06, + "loss": 0.6032, + "step": 7999 + }, + { + "epoch": 0.51, + "grad_norm": 0.8892449736595154, + "learning_rate": 5.132337728962763e-06, + "loss": 0.6089, + "step": 8000 + }, + { + "epoch": 0.51, + "grad_norm": 0.8967301249504089, + "learning_rate": 5.131312091935186e-06, + "loss": 0.5924, + "step": 8001 + }, + { + "epoch": 0.51, + "grad_norm": 0.8810504674911499, + "learning_rate": 5.130286449378513e-06, + "loss": 0.6515, + "step": 8002 + }, + { + "epoch": 0.51, + "grad_norm": 0.9304781556129456, + "learning_rate": 5.129260801335932e-06, + "loss": 0.6081, + "step": 8003 + }, + { + "epoch": 0.51, + "grad_norm": 0.8867761492729187, + "learning_rate": 5.128235147850629e-06, + "loss": 0.6011, + "step": 8004 + }, + { + "epoch": 0.51, + "grad_norm": 0.9013170003890991, + "learning_rate": 5.127209488965787e-06, + "loss": 0.5825, + "step": 8005 + }, + { + "epoch": 0.51, + "grad_norm": 0.8430556654930115, + "learning_rate": 5.1261838247246e-06, + "loss": 0.5425, + "step": 8006 + }, + { + "epoch": 0.51, + "grad_norm": 0.8869624733924866, + "learning_rate": 5.125158155170248e-06, + "loss": 0.5767, + "step": 8007 + }, + { + "epoch": 0.51, + "grad_norm": 0.9233295321464539, + "learning_rate": 5.124132480345922e-06, + "loss": 0.5623, + "step": 8008 + }, + { + "epoch": 0.51, + "grad_norm": 0.9272169470787048, + "learning_rate": 5.123106800294809e-06, + "loss": 0.596, + "step": 8009 + }, + { + "epoch": 0.51, + "grad_norm": 0.8874875903129578, + "learning_rate": 5.122081115060098e-06, + "loss": 0.5913, + "step": 8010 + }, + { + "epoch": 0.51, + "grad_norm": 0.8825517296791077, + "learning_rate": 5.121055424684975e-06, + "loss": 0.5532, + "step": 8011 + }, + { + "epoch": 0.51, + "grad_norm": 0.8856724500656128, + "learning_rate": 5.12002972921263e-06, + "loss": 0.5788, + "step": 8012 + }, + { + "epoch": 0.51, + "grad_norm": 0.9288915395736694, + "learning_rate": 5.119004028686249e-06, + "loss": 0.5705, + "step": 8013 + }, + { + "epoch": 0.51, + "grad_norm": 0.897471010684967, + "learning_rate": 5.117978323149025e-06, + "loss": 0.6277, + "step": 8014 + }, + { + "epoch": 0.51, + "grad_norm": 0.8995818495750427, + "learning_rate": 5.116952612644141e-06, + "loss": 0.5288, + "step": 8015 + }, + { + "epoch": 0.51, + "grad_norm": 0.9045858979225159, + "learning_rate": 5.1159268972147915e-06, + "loss": 0.6051, + "step": 8016 + }, + { + "epoch": 0.51, + "grad_norm": 0.913692057132721, + "learning_rate": 5.114901176904164e-06, + "loss": 0.5748, + "step": 8017 + }, + { + "epoch": 0.51, + "grad_norm": 0.865149736404419, + "learning_rate": 5.113875451755447e-06, + "loss": 0.6055, + "step": 8018 + }, + { + "epoch": 0.51, + "grad_norm": 0.828730046749115, + "learning_rate": 5.11284972181183e-06, + "loss": 0.5581, + "step": 8019 + }, + { + "epoch": 0.51, + "grad_norm": 0.8808106184005737, + "learning_rate": 5.111823987116504e-06, + "loss": 0.5795, + "step": 8020 + }, + { + "epoch": 0.51, + "grad_norm": 0.8963019847869873, + "learning_rate": 5.110798247712661e-06, + "loss": 0.5901, + "step": 8021 + }, + { + "epoch": 0.51, + "grad_norm": 0.9240871667861938, + "learning_rate": 5.109772503643486e-06, + "loss": 0.6433, + "step": 8022 + }, + { + "epoch": 0.51, + "grad_norm": 0.8749609589576721, + "learning_rate": 5.108746754952177e-06, + "loss": 0.5391, + "step": 8023 + }, + { + "epoch": 0.51, + "grad_norm": 0.906970202922821, + "learning_rate": 5.107721001681915e-06, + "loss": 0.6189, + "step": 8024 + }, + { + "epoch": 0.51, + "grad_norm": 0.8912851214408875, + "learning_rate": 5.1066952438759e-06, + "loss": 0.5633, + "step": 8025 + }, + { + "epoch": 0.51, + "grad_norm": 0.8463259339332581, + "learning_rate": 5.105669481577319e-06, + "loss": 0.6057, + "step": 8026 + }, + { + "epoch": 0.51, + "grad_norm": 0.8847749829292297, + "learning_rate": 5.104643714829362e-06, + "loss": 0.6348, + "step": 8027 + }, + { + "epoch": 0.51, + "grad_norm": 0.8036050796508789, + "learning_rate": 5.103617943675224e-06, + "loss": 0.586, + "step": 8028 + }, + { + "epoch": 0.51, + "grad_norm": 0.8839384913444519, + "learning_rate": 5.102592168158095e-06, + "loss": 0.5924, + "step": 8029 + }, + { + "epoch": 0.51, + "grad_norm": 0.9251484870910645, + "learning_rate": 5.101566388321165e-06, + "loss": 0.572, + "step": 8030 + }, + { + "epoch": 0.51, + "grad_norm": 0.8279865980148315, + "learning_rate": 5.100540604207629e-06, + "loss": 0.5797, + "step": 8031 + }, + { + "epoch": 0.51, + "grad_norm": 0.9217899441719055, + "learning_rate": 5.099514815860678e-06, + "loss": 0.5839, + "step": 8032 + }, + { + "epoch": 0.51, + "grad_norm": 0.903213381767273, + "learning_rate": 5.098489023323504e-06, + "loss": 0.5239, + "step": 8033 + }, + { + "epoch": 0.51, + "grad_norm": 0.8775154948234558, + "learning_rate": 5.0974632266393e-06, + "loss": 0.591, + "step": 8034 + }, + { + "epoch": 0.51, + "grad_norm": 0.8651240468025208, + "learning_rate": 5.0964374258512585e-06, + "loss": 0.5767, + "step": 8035 + }, + { + "epoch": 0.51, + "grad_norm": 0.975160539150238, + "learning_rate": 5.0954116210025725e-06, + "loss": 0.6185, + "step": 8036 + }, + { + "epoch": 0.51, + "grad_norm": 0.9247754812240601, + "learning_rate": 5.094385812136435e-06, + "loss": 0.5795, + "step": 8037 + }, + { + "epoch": 0.51, + "grad_norm": 0.8965883255004883, + "learning_rate": 5.09335999929604e-06, + "loss": 0.577, + "step": 8038 + }, + { + "epoch": 0.51, + "grad_norm": 0.8666002750396729, + "learning_rate": 5.092334182524578e-06, + "loss": 0.5766, + "step": 8039 + }, + { + "epoch": 0.51, + "grad_norm": 0.94881272315979, + "learning_rate": 5.091308361865247e-06, + "loss": 0.6627, + "step": 8040 + }, + { + "epoch": 0.51, + "grad_norm": 0.8409824371337891, + "learning_rate": 5.090282537361237e-06, + "loss": 0.5406, + "step": 8041 + }, + { + "epoch": 0.51, + "grad_norm": 0.9426827430725098, + "learning_rate": 5.089256709055745e-06, + "loss": 0.6425, + "step": 8042 + }, + { + "epoch": 0.51, + "grad_norm": 0.925849199295044, + "learning_rate": 5.088230876991962e-06, + "loss": 0.5865, + "step": 8043 + }, + { + "epoch": 0.51, + "grad_norm": 0.8730261325836182, + "learning_rate": 5.087205041213085e-06, + "loss": 0.6125, + "step": 8044 + }, + { + "epoch": 0.51, + "grad_norm": 0.9450942277908325, + "learning_rate": 5.086179201762306e-06, + "loss": 0.6118, + "step": 8045 + }, + { + "epoch": 0.51, + "grad_norm": 0.9059416055679321, + "learning_rate": 5.085153358682822e-06, + "loss": 0.5707, + "step": 8046 + }, + { + "epoch": 0.51, + "grad_norm": 0.867950975894928, + "learning_rate": 5.084127512017823e-06, + "loss": 0.5792, + "step": 8047 + }, + { + "epoch": 0.51, + "grad_norm": 0.8605546951293945, + "learning_rate": 5.083101661810511e-06, + "loss": 0.5895, + "step": 8048 + }, + { + "epoch": 0.51, + "grad_norm": 0.9312983155250549, + "learning_rate": 5.082075808104075e-06, + "loss": 0.5799, + "step": 8049 + }, + { + "epoch": 0.51, + "grad_norm": 0.8603020310401917, + "learning_rate": 5.081049950941713e-06, + "loss": 0.6026, + "step": 8050 + }, + { + "epoch": 0.51, + "grad_norm": 0.8669036626815796, + "learning_rate": 5.080024090366618e-06, + "loss": 0.6017, + "step": 8051 + }, + { + "epoch": 0.51, + "grad_norm": 0.9047536253929138, + "learning_rate": 5.078998226421989e-06, + "loss": 0.6222, + "step": 8052 + }, + { + "epoch": 0.51, + "grad_norm": 0.9225742816925049, + "learning_rate": 5.07797235915102e-06, + "loss": 0.6049, + "step": 8053 + }, + { + "epoch": 0.51, + "grad_norm": 0.9004045724868774, + "learning_rate": 5.076946488596905e-06, + "loss": 0.6042, + "step": 8054 + }, + { + "epoch": 0.51, + "grad_norm": 0.9334387183189392, + "learning_rate": 5.07592061480284e-06, + "loss": 0.6317, + "step": 8055 + }, + { + "epoch": 0.51, + "grad_norm": 0.890455424785614, + "learning_rate": 5.074894737812023e-06, + "loss": 0.5758, + "step": 8056 + }, + { + "epoch": 0.51, + "grad_norm": 0.8868134021759033, + "learning_rate": 5.07386885766765e-06, + "loss": 0.6435, + "step": 8057 + }, + { + "epoch": 0.51, + "grad_norm": 0.8172594904899597, + "learning_rate": 5.072842974412916e-06, + "loss": 0.5172, + "step": 8058 + }, + { + "epoch": 0.51, + "grad_norm": 0.8145936727523804, + "learning_rate": 5.071817088091017e-06, + "loss": 0.5327, + "step": 8059 + }, + { + "epoch": 0.51, + "grad_norm": 0.8804033994674683, + "learning_rate": 5.0707911987451496e-06, + "loss": 0.6371, + "step": 8060 + }, + { + "epoch": 0.51, + "grad_norm": 0.9473575353622437, + "learning_rate": 5.0697653064185125e-06, + "loss": 0.6165, + "step": 8061 + }, + { + "epoch": 0.51, + "grad_norm": 0.894706130027771, + "learning_rate": 5.068739411154301e-06, + "loss": 0.5932, + "step": 8062 + }, + { + "epoch": 0.51, + "grad_norm": 0.8926814198493958, + "learning_rate": 5.0677135129957115e-06, + "loss": 0.6044, + "step": 8063 + }, + { + "epoch": 0.51, + "grad_norm": 0.8846773505210876, + "learning_rate": 5.066687611985941e-06, + "loss": 0.5754, + "step": 8064 + }, + { + "epoch": 0.51, + "grad_norm": 0.8815335631370544, + "learning_rate": 5.065661708168188e-06, + "loss": 0.5586, + "step": 8065 + }, + { + "epoch": 0.51, + "grad_norm": 0.916784942150116, + "learning_rate": 5.064635801585649e-06, + "loss": 0.5575, + "step": 8066 + }, + { + "epoch": 0.51, + "grad_norm": 0.8539628982543945, + "learning_rate": 5.06360989228152e-06, + "loss": 0.5518, + "step": 8067 + }, + { + "epoch": 0.51, + "grad_norm": 0.9269511103630066, + "learning_rate": 5.062583980299002e-06, + "loss": 0.5955, + "step": 8068 + }, + { + "epoch": 0.51, + "grad_norm": 0.856561541557312, + "learning_rate": 5.061558065681288e-06, + "loss": 0.5672, + "step": 8069 + }, + { + "epoch": 0.51, + "grad_norm": 0.8841691017150879, + "learning_rate": 5.060532148471578e-06, + "loss": 0.6283, + "step": 8070 + }, + { + "epoch": 0.51, + "grad_norm": 0.832876980304718, + "learning_rate": 5.059506228713071e-06, + "loss": 0.5384, + "step": 8071 + }, + { + "epoch": 0.51, + "grad_norm": 0.8955254554748535, + "learning_rate": 5.058480306448965e-06, + "loss": 0.6119, + "step": 8072 + }, + { + "epoch": 0.51, + "grad_norm": 0.8828347325325012, + "learning_rate": 5.057454381722455e-06, + "loss": 0.5756, + "step": 8073 + }, + { + "epoch": 0.51, + "grad_norm": 0.9125185012817383, + "learning_rate": 5.056428454576741e-06, + "loss": 0.5958, + "step": 8074 + }, + { + "epoch": 0.51, + "grad_norm": 0.8723667860031128, + "learning_rate": 5.0554025250550195e-06, + "loss": 0.6055, + "step": 8075 + }, + { + "epoch": 0.51, + "grad_norm": 0.8208953738212585, + "learning_rate": 5.054376593200493e-06, + "loss": 0.5141, + "step": 8076 + }, + { + "epoch": 0.51, + "grad_norm": 0.8895772695541382, + "learning_rate": 5.053350659056356e-06, + "loss": 0.5693, + "step": 8077 + }, + { + "epoch": 0.51, + "grad_norm": 0.9235116243362427, + "learning_rate": 5.052324722665809e-06, + "loss": 0.6041, + "step": 8078 + }, + { + "epoch": 0.51, + "grad_norm": 0.9034695625305176, + "learning_rate": 5.0512987840720495e-06, + "loss": 0.5887, + "step": 8079 + }, + { + "epoch": 0.51, + "grad_norm": 0.9228042960166931, + "learning_rate": 5.0502728433182765e-06, + "loss": 0.5828, + "step": 8080 + }, + { + "epoch": 0.51, + "grad_norm": 0.927101731300354, + "learning_rate": 5.049246900447689e-06, + "loss": 0.6073, + "step": 8081 + }, + { + "epoch": 0.51, + "grad_norm": 0.8888689279556274, + "learning_rate": 5.048220955503487e-06, + "loss": 0.581, + "step": 8082 + }, + { + "epoch": 0.51, + "grad_norm": 0.8097081184387207, + "learning_rate": 5.047195008528868e-06, + "loss": 0.5485, + "step": 8083 + }, + { + "epoch": 0.51, + "grad_norm": 0.8781763315200806, + "learning_rate": 5.04616905956703e-06, + "loss": 0.555, + "step": 8084 + }, + { + "epoch": 0.51, + "grad_norm": 0.8688362836837769, + "learning_rate": 5.045143108661174e-06, + "loss": 0.5991, + "step": 8085 + }, + { + "epoch": 0.51, + "grad_norm": 0.8414211869239807, + "learning_rate": 5.044117155854499e-06, + "loss": 0.6283, + "step": 8086 + }, + { + "epoch": 0.51, + "grad_norm": 0.7982466816902161, + "learning_rate": 5.043091201190204e-06, + "loss": 0.5701, + "step": 8087 + }, + { + "epoch": 0.51, + "grad_norm": 0.8860836029052734, + "learning_rate": 5.042065244711488e-06, + "loss": 0.5306, + "step": 8088 + }, + { + "epoch": 0.51, + "grad_norm": 0.8865799903869629, + "learning_rate": 5.041039286461552e-06, + "loss": 0.5947, + "step": 8089 + }, + { + "epoch": 0.51, + "grad_norm": 0.8962934017181396, + "learning_rate": 5.040013326483593e-06, + "loss": 0.596, + "step": 8090 + }, + { + "epoch": 0.51, + "grad_norm": 1.0645703077316284, + "learning_rate": 5.038987364820813e-06, + "loss": 0.65, + "step": 8091 + }, + { + "epoch": 0.51, + "grad_norm": 0.8976729512214661, + "learning_rate": 5.037961401516411e-06, + "loss": 0.5364, + "step": 8092 + }, + { + "epoch": 0.51, + "grad_norm": 0.8850423097610474, + "learning_rate": 5.036935436613586e-06, + "loss": 0.5901, + "step": 8093 + }, + { + "epoch": 0.51, + "grad_norm": 0.9318758845329285, + "learning_rate": 5.0359094701555375e-06, + "loss": 0.5786, + "step": 8094 + }, + { + "epoch": 0.51, + "grad_norm": 0.8748635649681091, + "learning_rate": 5.034883502185467e-06, + "loss": 0.5917, + "step": 8095 + }, + { + "epoch": 0.51, + "grad_norm": 0.894095242023468, + "learning_rate": 5.033857532746573e-06, + "loss": 0.5896, + "step": 8096 + }, + { + "epoch": 0.51, + "grad_norm": 0.8149279952049255, + "learning_rate": 5.032831561882057e-06, + "loss": 0.5542, + "step": 8097 + }, + { + "epoch": 0.51, + "grad_norm": 0.852733850479126, + "learning_rate": 5.0318055896351185e-06, + "loss": 0.5888, + "step": 8098 + }, + { + "epoch": 0.51, + "grad_norm": 0.7920023202896118, + "learning_rate": 5.030779616048955e-06, + "loss": 0.513, + "step": 8099 + }, + { + "epoch": 0.51, + "grad_norm": 0.9614823460578918, + "learning_rate": 5.02975364116677e-06, + "loss": 0.594, + "step": 8100 + }, + { + "epoch": 0.51, + "grad_norm": 0.9171684980392456, + "learning_rate": 5.0287276650317626e-06, + "loss": 0.5822, + "step": 8101 + }, + { + "epoch": 0.51, + "grad_norm": 0.8981472849845886, + "learning_rate": 5.027701687687135e-06, + "loss": 0.601, + "step": 8102 + }, + { + "epoch": 0.51, + "grad_norm": 0.8312231302261353, + "learning_rate": 5.026675709176084e-06, + "loss": 0.5113, + "step": 8103 + }, + { + "epoch": 0.51, + "grad_norm": 0.915739893913269, + "learning_rate": 5.0256497295418115e-06, + "loss": 0.5998, + "step": 8104 + }, + { + "epoch": 0.51, + "grad_norm": 0.9062038660049438, + "learning_rate": 5.0246237488275185e-06, + "loss": 0.5541, + "step": 8105 + }, + { + "epoch": 0.51, + "grad_norm": 0.8854556679725647, + "learning_rate": 5.0235977670764055e-06, + "loss": 0.5467, + "step": 8106 + }, + { + "epoch": 0.51, + "grad_norm": 0.8781667947769165, + "learning_rate": 5.022571784331672e-06, + "loss": 0.6031, + "step": 8107 + }, + { + "epoch": 0.51, + "grad_norm": 0.8494471311569214, + "learning_rate": 5.021545800636519e-06, + "loss": 0.5906, + "step": 8108 + }, + { + "epoch": 0.51, + "grad_norm": 0.8764198422431946, + "learning_rate": 5.020519816034148e-06, + "loss": 0.5826, + "step": 8109 + }, + { + "epoch": 0.51, + "grad_norm": 0.9023407101631165, + "learning_rate": 5.019493830567758e-06, + "loss": 0.605, + "step": 8110 + }, + { + "epoch": 0.51, + "grad_norm": 0.8451856374740601, + "learning_rate": 5.018467844280553e-06, + "loss": 0.5689, + "step": 8111 + }, + { + "epoch": 0.51, + "grad_norm": 0.8786736130714417, + "learning_rate": 5.0174418572157276e-06, + "loss": 0.5584, + "step": 8112 + }, + { + "epoch": 0.51, + "grad_norm": 0.8404189348220825, + "learning_rate": 5.0164158694164884e-06, + "loss": 0.5621, + "step": 8113 + }, + { + "epoch": 0.51, + "grad_norm": 0.9702364802360535, + "learning_rate": 5.015389880926035e-06, + "loss": 0.614, + "step": 8114 + }, + { + "epoch": 0.51, + "grad_norm": 0.8589154481887817, + "learning_rate": 5.014363891787567e-06, + "loss": 0.5671, + "step": 8115 + }, + { + "epoch": 0.51, + "grad_norm": 0.9409849047660828, + "learning_rate": 5.013337902044283e-06, + "loss": 0.5915, + "step": 8116 + }, + { + "epoch": 0.51, + "grad_norm": 1.0001648664474487, + "learning_rate": 5.0123119117393894e-06, + "loss": 0.5942, + "step": 8117 + }, + { + "epoch": 0.51, + "grad_norm": 0.86786288022995, + "learning_rate": 5.011285920916082e-06, + "loss": 0.6033, + "step": 8118 + }, + { + "epoch": 0.51, + "grad_norm": 0.8556507229804993, + "learning_rate": 5.010259929617565e-06, + "loss": 0.6032, + "step": 8119 + }, + { + "epoch": 0.51, + "grad_norm": 0.9855061769485474, + "learning_rate": 5.009233937887036e-06, + "loss": 0.5679, + "step": 8120 + }, + { + "epoch": 0.51, + "grad_norm": 0.8764082789421082, + "learning_rate": 5.0082079457677e-06, + "loss": 0.558, + "step": 8121 + }, + { + "epoch": 0.51, + "grad_norm": 0.9630783796310425, + "learning_rate": 5.007181953302755e-06, + "loss": 0.6147, + "step": 8122 + }, + { + "epoch": 0.51, + "grad_norm": 0.882135808467865, + "learning_rate": 5.006155960535405e-06, + "loss": 0.606, + "step": 8123 + }, + { + "epoch": 0.51, + "grad_norm": 0.8694536685943604, + "learning_rate": 5.005129967508845e-06, + "loss": 0.6031, + "step": 8124 + }, + { + "epoch": 0.51, + "grad_norm": 0.8778092265129089, + "learning_rate": 5.004103974266284e-06, + "loss": 0.5793, + "step": 8125 + }, + { + "epoch": 0.51, + "grad_norm": 0.869263768196106, + "learning_rate": 5.0030779808509155e-06, + "loss": 0.5959, + "step": 8126 + }, + { + "epoch": 0.51, + "grad_norm": 0.8371315598487854, + "learning_rate": 5.002051987305947e-06, + "loss": 0.5804, + "step": 8127 + }, + { + "epoch": 0.51, + "grad_norm": 0.8696556091308594, + "learning_rate": 5.0010259936745735e-06, + "loss": 0.544, + "step": 8128 + }, + { + "epoch": 0.52, + "grad_norm": 0.8770456314086914, + "learning_rate": 5e-06, + "loss": 0.6189, + "step": 8129 + }, + { + "epoch": 0.52, + "grad_norm": 0.8599352240562439, + "learning_rate": 4.998974006325428e-06, + "loss": 0.5789, + "step": 8130 + }, + { + "epoch": 0.52, + "grad_norm": 0.9081400036811829, + "learning_rate": 4.997948012694056e-06, + "loss": 0.5858, + "step": 8131 + }, + { + "epoch": 0.52, + "grad_norm": 0.9235000014305115, + "learning_rate": 4.9969220191490845e-06, + "loss": 0.6132, + "step": 8132 + }, + { + "epoch": 0.52, + "grad_norm": 0.8584170341491699, + "learning_rate": 4.995896025733719e-06, + "loss": 0.6016, + "step": 8133 + }, + { + "epoch": 0.52, + "grad_norm": 0.9502587914466858, + "learning_rate": 4.994870032491156e-06, + "loss": 0.5802, + "step": 8134 + }, + { + "epoch": 0.52, + "grad_norm": 0.8924700021743774, + "learning_rate": 4.993844039464598e-06, + "loss": 0.6686, + "step": 8135 + }, + { + "epoch": 0.52, + "grad_norm": 0.8873922824859619, + "learning_rate": 4.992818046697245e-06, + "loss": 0.5937, + "step": 8136 + }, + { + "epoch": 0.52, + "grad_norm": 0.9198696613311768, + "learning_rate": 4.991792054232301e-06, + "loss": 0.5419, + "step": 8137 + }, + { + "epoch": 0.52, + "grad_norm": 0.8334248661994934, + "learning_rate": 4.990766062112966e-06, + "loss": 0.5722, + "step": 8138 + }, + { + "epoch": 0.52, + "grad_norm": 0.8983075022697449, + "learning_rate": 4.989740070382438e-06, + "loss": 0.588, + "step": 8139 + }, + { + "epoch": 0.52, + "grad_norm": 0.8269035220146179, + "learning_rate": 4.988714079083918e-06, + "loss": 0.5973, + "step": 8140 + }, + { + "epoch": 0.52, + "grad_norm": 1.0999228954315186, + "learning_rate": 4.987688088260613e-06, + "loss": 0.5795, + "step": 8141 + }, + { + "epoch": 0.52, + "grad_norm": 0.9255691766738892, + "learning_rate": 4.986662097955718e-06, + "loss": 0.5924, + "step": 8142 + }, + { + "epoch": 0.52, + "grad_norm": 0.8680478930473328, + "learning_rate": 4.985636108212435e-06, + "loss": 0.6024, + "step": 8143 + }, + { + "epoch": 0.52, + "grad_norm": 0.844215452671051, + "learning_rate": 4.984610119073965e-06, + "loss": 0.5356, + "step": 8144 + }, + { + "epoch": 0.52, + "grad_norm": 0.8514224886894226, + "learning_rate": 4.9835841305835115e-06, + "loss": 0.5734, + "step": 8145 + }, + { + "epoch": 0.52, + "grad_norm": 0.8678837418556213, + "learning_rate": 4.982558142784273e-06, + "loss": 0.6142, + "step": 8146 + }, + { + "epoch": 0.52, + "grad_norm": 0.8894163966178894, + "learning_rate": 4.98153215571945e-06, + "loss": 0.6196, + "step": 8147 + }, + { + "epoch": 0.52, + "grad_norm": 0.9071709513664246, + "learning_rate": 4.980506169432243e-06, + "loss": 0.5878, + "step": 8148 + }, + { + "epoch": 0.52, + "grad_norm": 0.9013687372207642, + "learning_rate": 4.979480183965852e-06, + "loss": 0.6077, + "step": 8149 + }, + { + "epoch": 0.52, + "grad_norm": 0.8970010876655579, + "learning_rate": 4.9784541993634824e-06, + "loss": 0.5885, + "step": 8150 + }, + { + "epoch": 0.52, + "grad_norm": 0.9553268551826477, + "learning_rate": 4.977428215668329e-06, + "loss": 0.6642, + "step": 8151 + }, + { + "epoch": 0.52, + "grad_norm": 0.8925964832305908, + "learning_rate": 4.976402232923597e-06, + "loss": 0.6099, + "step": 8152 + }, + { + "epoch": 0.52, + "grad_norm": 0.9235319495201111, + "learning_rate": 4.9753762511724815e-06, + "loss": 0.548, + "step": 8153 + }, + { + "epoch": 0.52, + "grad_norm": 0.8916828036308289, + "learning_rate": 4.974350270458189e-06, + "loss": 0.6115, + "step": 8154 + }, + { + "epoch": 0.52, + "grad_norm": 0.8752048015594482, + "learning_rate": 4.9733242908239175e-06, + "loss": 0.6241, + "step": 8155 + }, + { + "epoch": 0.52, + "grad_norm": 0.9507616758346558, + "learning_rate": 4.972298312312867e-06, + "loss": 0.6082, + "step": 8156 + }, + { + "epoch": 0.52, + "grad_norm": 0.9458578824996948, + "learning_rate": 4.9712723349682365e-06, + "loss": 0.5945, + "step": 8157 + }, + { + "epoch": 0.52, + "grad_norm": 0.8483637571334839, + "learning_rate": 4.970246358833231e-06, + "loss": 0.5418, + "step": 8158 + }, + { + "epoch": 0.52, + "grad_norm": 0.8706662654876709, + "learning_rate": 4.969220383951046e-06, + "loss": 0.5383, + "step": 8159 + }, + { + "epoch": 0.52, + "grad_norm": 0.843956470489502, + "learning_rate": 4.968194410364884e-06, + "loss": 0.5799, + "step": 8160 + }, + { + "epoch": 0.52, + "grad_norm": 0.887324333190918, + "learning_rate": 4.967168438117945e-06, + "loss": 0.5922, + "step": 8161 + }, + { + "epoch": 0.52, + "grad_norm": 0.8479996919631958, + "learning_rate": 4.966142467253428e-06, + "loss": 0.5402, + "step": 8162 + }, + { + "epoch": 0.52, + "grad_norm": 0.906588077545166, + "learning_rate": 4.965116497814534e-06, + "loss": 0.6009, + "step": 8163 + }, + { + "epoch": 0.52, + "grad_norm": 0.8441720008850098, + "learning_rate": 4.964090529844464e-06, + "loss": 0.5834, + "step": 8164 + }, + { + "epoch": 0.52, + "grad_norm": 0.8537503480911255, + "learning_rate": 4.963064563386416e-06, + "loss": 0.5948, + "step": 8165 + }, + { + "epoch": 0.52, + "grad_norm": 0.9106093049049377, + "learning_rate": 4.96203859848359e-06, + "loss": 0.5898, + "step": 8166 + }, + { + "epoch": 0.52, + "grad_norm": 0.9316169619560242, + "learning_rate": 4.961012635179188e-06, + "loss": 0.5515, + "step": 8167 + }, + { + "epoch": 0.52, + "grad_norm": 0.8743208646774292, + "learning_rate": 4.959986673516408e-06, + "loss": 0.5791, + "step": 8168 + }, + { + "epoch": 0.52, + "grad_norm": 0.878601610660553, + "learning_rate": 4.95896071353845e-06, + "loss": 0.5654, + "step": 8169 + }, + { + "epoch": 0.52, + "grad_norm": 0.9046491384506226, + "learning_rate": 4.9579347552885125e-06, + "loss": 0.6205, + "step": 8170 + }, + { + "epoch": 0.52, + "grad_norm": 0.9015951156616211, + "learning_rate": 4.956908798809797e-06, + "loss": 0.6079, + "step": 8171 + }, + { + "epoch": 0.52, + "grad_norm": 0.9551298022270203, + "learning_rate": 4.955882844145503e-06, + "loss": 0.6354, + "step": 8172 + }, + { + "epoch": 0.52, + "grad_norm": 0.9143627882003784, + "learning_rate": 4.954856891338827e-06, + "loss": 0.6224, + "step": 8173 + }, + { + "epoch": 0.52, + "grad_norm": 0.9006348252296448, + "learning_rate": 4.95383094043297e-06, + "loss": 0.5597, + "step": 8174 + }, + { + "epoch": 0.52, + "grad_norm": 0.8101087808609009, + "learning_rate": 4.952804991471134e-06, + "loss": 0.5693, + "step": 8175 + }, + { + "epoch": 0.52, + "grad_norm": 0.847748339176178, + "learning_rate": 4.951779044496515e-06, + "loss": 0.5625, + "step": 8176 + }, + { + "epoch": 0.52, + "grad_norm": 0.950564980506897, + "learning_rate": 4.9507530995523115e-06, + "loss": 0.5894, + "step": 8177 + }, + { + "epoch": 0.52, + "grad_norm": 0.8164709806442261, + "learning_rate": 4.949727156681726e-06, + "loss": 0.6046, + "step": 8178 + }, + { + "epoch": 0.52, + "grad_norm": 0.887380838394165, + "learning_rate": 4.948701215927951e-06, + "loss": 0.5241, + "step": 8179 + }, + { + "epoch": 0.52, + "grad_norm": 0.8414967060089111, + "learning_rate": 4.947675277334193e-06, + "loss": 0.5771, + "step": 8180 + }, + { + "epoch": 0.52, + "grad_norm": 0.9173058867454529, + "learning_rate": 4.946649340943645e-06, + "loss": 0.6376, + "step": 8181 + }, + { + "epoch": 0.52, + "grad_norm": 0.9363717436790466, + "learning_rate": 4.9456234067995094e-06, + "loss": 0.6236, + "step": 8182 + }, + { + "epoch": 0.52, + "grad_norm": 0.8463205099105835, + "learning_rate": 4.9445974749449805e-06, + "loss": 0.555, + "step": 8183 + }, + { + "epoch": 0.52, + "grad_norm": 0.8751280307769775, + "learning_rate": 4.9435715454232615e-06, + "loss": 0.584, + "step": 8184 + }, + { + "epoch": 0.52, + "grad_norm": 0.9037527441978455, + "learning_rate": 4.942545618277547e-06, + "loss": 0.614, + "step": 8185 + }, + { + "epoch": 0.52, + "grad_norm": 0.8870174884796143, + "learning_rate": 4.9415196935510375e-06, + "loss": 0.5755, + "step": 8186 + }, + { + "epoch": 0.52, + "grad_norm": 0.9150660037994385, + "learning_rate": 4.940493771286929e-06, + "loss": 0.5779, + "step": 8187 + }, + { + "epoch": 0.52, + "grad_norm": 0.8672343492507935, + "learning_rate": 4.939467851528423e-06, + "loss": 0.5905, + "step": 8188 + }, + { + "epoch": 0.52, + "grad_norm": 0.8112956881523132, + "learning_rate": 4.938441934318713e-06, + "loss": 0.5317, + "step": 8189 + }, + { + "epoch": 0.52, + "grad_norm": 0.8447852730751038, + "learning_rate": 4.937416019701e-06, + "loss": 0.5753, + "step": 8190 + }, + { + "epoch": 0.52, + "grad_norm": 0.8433228135108948, + "learning_rate": 4.93639010771848e-06, + "loss": 0.5283, + "step": 8191 + }, + { + "epoch": 0.52, + "grad_norm": 0.8930540084838867, + "learning_rate": 4.9353641984143526e-06, + "loss": 0.5907, + "step": 8192 + }, + { + "epoch": 0.52, + "grad_norm": 0.8250675201416016, + "learning_rate": 4.934338291831813e-06, + "loss": 0.5775, + "step": 8193 + }, + { + "epoch": 0.52, + "grad_norm": 0.8587763905525208, + "learning_rate": 4.93331238801406e-06, + "loss": 0.5706, + "step": 8194 + }, + { + "epoch": 0.52, + "grad_norm": 0.9937714338302612, + "learning_rate": 4.932286487004291e-06, + "loss": 0.6685, + "step": 8195 + }, + { + "epoch": 0.52, + "grad_norm": 0.8941221833229065, + "learning_rate": 4.931260588845701e-06, + "loss": 0.5856, + "step": 8196 + }, + { + "epoch": 0.52, + "grad_norm": 0.8236309885978699, + "learning_rate": 4.930234693581489e-06, + "loss": 0.595, + "step": 8197 + }, + { + "epoch": 0.52, + "grad_norm": 0.8598278760910034, + "learning_rate": 4.929208801254851e-06, + "loss": 0.5957, + "step": 8198 + }, + { + "epoch": 0.52, + "grad_norm": 0.9491175413131714, + "learning_rate": 4.928182911908987e-06, + "loss": 0.6515, + "step": 8199 + }, + { + "epoch": 0.52, + "grad_norm": 0.847444474697113, + "learning_rate": 4.927157025587086e-06, + "loss": 0.5541, + "step": 8200 + }, + { + "epoch": 0.52, + "grad_norm": 0.9040679335594177, + "learning_rate": 4.926131142332351e-06, + "loss": 0.6053, + "step": 8201 + }, + { + "epoch": 0.52, + "grad_norm": 0.8832661509513855, + "learning_rate": 4.925105262187978e-06, + "loss": 0.6243, + "step": 8202 + }, + { + "epoch": 0.52, + "grad_norm": 0.8717993497848511, + "learning_rate": 4.924079385197162e-06, + "loss": 0.563, + "step": 8203 + }, + { + "epoch": 0.52, + "grad_norm": 0.8877679705619812, + "learning_rate": 4.923053511403096e-06, + "loss": 0.6599, + "step": 8204 + }, + { + "epoch": 0.52, + "grad_norm": 0.8722405433654785, + "learning_rate": 4.922027640848981e-06, + "loss": 0.5793, + "step": 8205 + }, + { + "epoch": 0.52, + "grad_norm": 0.9440850019454956, + "learning_rate": 4.921001773578012e-06, + "loss": 0.6429, + "step": 8206 + }, + { + "epoch": 0.52, + "grad_norm": 0.9616214632987976, + "learning_rate": 4.9199759096333825e-06, + "loss": 0.6532, + "step": 8207 + }, + { + "epoch": 0.52, + "grad_norm": 0.8866004943847656, + "learning_rate": 4.918950049058289e-06, + "loss": 0.5907, + "step": 8208 + }, + { + "epoch": 0.52, + "grad_norm": 0.8617315888404846, + "learning_rate": 4.9179241918959255e-06, + "loss": 0.6039, + "step": 8209 + }, + { + "epoch": 0.52, + "grad_norm": 0.8040612936019897, + "learning_rate": 4.916898338189491e-06, + "loss": 0.5269, + "step": 8210 + }, + { + "epoch": 0.52, + "grad_norm": 0.8695709705352783, + "learning_rate": 4.9158724879821775e-06, + "loss": 0.5651, + "step": 8211 + }, + { + "epoch": 0.52, + "grad_norm": 0.8399918675422668, + "learning_rate": 4.914846641317181e-06, + "loss": 0.5193, + "step": 8212 + }, + { + "epoch": 0.52, + "grad_norm": 0.8823307752609253, + "learning_rate": 4.913820798237695e-06, + "loss": 0.5814, + "step": 8213 + }, + { + "epoch": 0.52, + "grad_norm": 0.9517965912818909, + "learning_rate": 4.912794958786917e-06, + "loss": 0.5904, + "step": 8214 + }, + { + "epoch": 0.52, + "grad_norm": 0.9135156273841858, + "learning_rate": 4.91176912300804e-06, + "loss": 0.5795, + "step": 8215 + }, + { + "epoch": 0.52, + "grad_norm": 1.0179460048675537, + "learning_rate": 4.9107432909442575e-06, + "loss": 0.5925, + "step": 8216 + }, + { + "epoch": 0.52, + "grad_norm": 0.91028892993927, + "learning_rate": 4.909717462638763e-06, + "loss": 0.625, + "step": 8217 + }, + { + "epoch": 0.52, + "grad_norm": 0.9520250558853149, + "learning_rate": 4.908691638134754e-06, + "loss": 0.6201, + "step": 8218 + }, + { + "epoch": 0.52, + "grad_norm": 0.897201418876648, + "learning_rate": 4.907665817475424e-06, + "loss": 0.5532, + "step": 8219 + }, + { + "epoch": 0.52, + "grad_norm": 0.8576155304908752, + "learning_rate": 4.906640000703963e-06, + "loss": 0.5918, + "step": 8220 + }, + { + "epoch": 0.52, + "grad_norm": 0.8770981431007385, + "learning_rate": 4.905614187863565e-06, + "loss": 0.6275, + "step": 8221 + }, + { + "epoch": 0.52, + "grad_norm": 1.008365273475647, + "learning_rate": 4.904588378997428e-06, + "loss": 0.6307, + "step": 8222 + }, + { + "epoch": 0.52, + "grad_norm": 0.8657634258270264, + "learning_rate": 4.903562574148744e-06, + "loss": 0.6345, + "step": 8223 + }, + { + "epoch": 0.52, + "grad_norm": 0.9766127467155457, + "learning_rate": 4.902536773360702e-06, + "loss": 0.5598, + "step": 8224 + }, + { + "epoch": 0.52, + "grad_norm": 0.8664228916168213, + "learning_rate": 4.9015109766764985e-06, + "loss": 0.6031, + "step": 8225 + }, + { + "epoch": 0.52, + "grad_norm": 0.8865102529525757, + "learning_rate": 4.900485184139323e-06, + "loss": 0.5766, + "step": 8226 + }, + { + "epoch": 0.52, + "grad_norm": 0.9038271307945251, + "learning_rate": 4.899459395792373e-06, + "loss": 0.6025, + "step": 8227 + }, + { + "epoch": 0.52, + "grad_norm": 0.8609294295310974, + "learning_rate": 4.8984336116788355e-06, + "loss": 0.5279, + "step": 8228 + }, + { + "epoch": 0.52, + "grad_norm": 0.846961259841919, + "learning_rate": 4.897407831841908e-06, + "loss": 0.5631, + "step": 8229 + }, + { + "epoch": 0.52, + "grad_norm": 0.8961449861526489, + "learning_rate": 4.8963820563247765e-06, + "loss": 0.5892, + "step": 8230 + }, + { + "epoch": 0.52, + "grad_norm": 0.9013886451721191, + "learning_rate": 4.8953562851706385e-06, + "loss": 0.5458, + "step": 8231 + }, + { + "epoch": 0.52, + "grad_norm": 0.8823043704032898, + "learning_rate": 4.894330518422683e-06, + "loss": 0.5935, + "step": 8232 + }, + { + "epoch": 0.52, + "grad_norm": 0.8829339742660522, + "learning_rate": 4.893304756124102e-06, + "loss": 0.5716, + "step": 8233 + }, + { + "epoch": 0.52, + "grad_norm": 0.8946317434310913, + "learning_rate": 4.8922789983180854e-06, + "loss": 0.6174, + "step": 8234 + }, + { + "epoch": 0.52, + "grad_norm": 0.8930938839912415, + "learning_rate": 4.891253245047826e-06, + "loss": 0.5584, + "step": 8235 + }, + { + "epoch": 0.52, + "grad_norm": 0.8877846002578735, + "learning_rate": 4.890227496356515e-06, + "loss": 0.5851, + "step": 8236 + }, + { + "epoch": 0.52, + "grad_norm": 0.8552438616752625, + "learning_rate": 4.889201752287342e-06, + "loss": 0.5844, + "step": 8237 + }, + { + "epoch": 0.52, + "grad_norm": 0.9162623882293701, + "learning_rate": 4.888176012883496e-06, + "loss": 0.6057, + "step": 8238 + }, + { + "epoch": 0.52, + "grad_norm": 0.8288585543632507, + "learning_rate": 4.88715027818817e-06, + "loss": 0.5871, + "step": 8239 + }, + { + "epoch": 0.52, + "grad_norm": 0.8896382451057434, + "learning_rate": 4.886124548244555e-06, + "loss": 0.6483, + "step": 8240 + }, + { + "epoch": 0.52, + "grad_norm": 0.9036986231803894, + "learning_rate": 4.885098823095838e-06, + "loss": 0.609, + "step": 8241 + }, + { + "epoch": 0.52, + "grad_norm": 0.828501284122467, + "learning_rate": 4.884073102785209e-06, + "loss": 0.5929, + "step": 8242 + }, + { + "epoch": 0.52, + "grad_norm": 0.8982778191566467, + "learning_rate": 4.883047387355858e-06, + "loss": 0.5726, + "step": 8243 + }, + { + "epoch": 0.52, + "grad_norm": 0.9407196640968323, + "learning_rate": 4.882021676850977e-06, + "loss": 0.5888, + "step": 8244 + }, + { + "epoch": 0.52, + "grad_norm": 0.9057026505470276, + "learning_rate": 4.880995971313752e-06, + "loss": 0.5436, + "step": 8245 + }, + { + "epoch": 0.52, + "grad_norm": 0.8921209573745728, + "learning_rate": 4.879970270787372e-06, + "loss": 0.6365, + "step": 8246 + }, + { + "epoch": 0.52, + "grad_norm": 0.9471856951713562, + "learning_rate": 4.878944575315025e-06, + "loss": 0.5888, + "step": 8247 + }, + { + "epoch": 0.52, + "grad_norm": 0.8575695753097534, + "learning_rate": 4.877918884939903e-06, + "loss": 0.599, + "step": 8248 + }, + { + "epoch": 0.52, + "grad_norm": 0.9358868598937988, + "learning_rate": 4.8768931997051925e-06, + "loss": 0.5986, + "step": 8249 + }, + { + "epoch": 0.52, + "grad_norm": 0.8470869660377502, + "learning_rate": 4.8758675196540795e-06, + "loss": 0.5713, + "step": 8250 + }, + { + "epoch": 0.52, + "grad_norm": 0.8792859315872192, + "learning_rate": 4.874841844829753e-06, + "loss": 0.5646, + "step": 8251 + }, + { + "epoch": 0.52, + "grad_norm": 0.870421826839447, + "learning_rate": 4.873816175275402e-06, + "loss": 0.5701, + "step": 8252 + }, + { + "epoch": 0.52, + "grad_norm": 0.882820188999176, + "learning_rate": 4.8727905110342135e-06, + "loss": 0.6186, + "step": 8253 + }, + { + "epoch": 0.52, + "grad_norm": 0.8869359493255615, + "learning_rate": 4.871764852149373e-06, + "loss": 0.6131, + "step": 8254 + }, + { + "epoch": 0.52, + "grad_norm": 0.870141327381134, + "learning_rate": 4.87073919866407e-06, + "loss": 0.5999, + "step": 8255 + }, + { + "epoch": 0.52, + "grad_norm": 0.8610088229179382, + "learning_rate": 4.869713550621487e-06, + "loss": 0.5949, + "step": 8256 + }, + { + "epoch": 0.52, + "grad_norm": 0.8822341561317444, + "learning_rate": 4.868687908064815e-06, + "loss": 0.5805, + "step": 8257 + }, + { + "epoch": 0.52, + "grad_norm": 0.8881772756576538, + "learning_rate": 4.867662271037238e-06, + "loss": 0.5319, + "step": 8258 + }, + { + "epoch": 0.52, + "grad_norm": 0.9677478075027466, + "learning_rate": 4.866636639581943e-06, + "loss": 0.5925, + "step": 8259 + }, + { + "epoch": 0.52, + "grad_norm": 0.8670486211776733, + "learning_rate": 4.865611013742114e-06, + "loss": 0.5811, + "step": 8260 + }, + { + "epoch": 0.52, + "grad_norm": 0.8827394247055054, + "learning_rate": 4.864585393560939e-06, + "loss": 0.5945, + "step": 8261 + }, + { + "epoch": 0.52, + "grad_norm": 0.9279113411903381, + "learning_rate": 4.863559779081601e-06, + "loss": 0.5824, + "step": 8262 + }, + { + "epoch": 0.52, + "grad_norm": 0.8230646848678589, + "learning_rate": 4.862534170347287e-06, + "loss": 0.5946, + "step": 8263 + }, + { + "epoch": 0.52, + "grad_norm": 0.8288192749023438, + "learning_rate": 4.861508567401179e-06, + "loss": 0.5486, + "step": 8264 + }, + { + "epoch": 0.52, + "grad_norm": 0.882305383682251, + "learning_rate": 4.860482970286465e-06, + "loss": 0.5531, + "step": 8265 + }, + { + "epoch": 0.52, + "grad_norm": 0.881271481513977, + "learning_rate": 4.859457379046327e-06, + "loss": 0.577, + "step": 8266 + }, + { + "epoch": 0.52, + "grad_norm": 0.8755255937576294, + "learning_rate": 4.858431793723952e-06, + "loss": 0.5614, + "step": 8267 + }, + { + "epoch": 0.52, + "grad_norm": 0.8271751999855042, + "learning_rate": 4.857406214362518e-06, + "loss": 0.5615, + "step": 8268 + }, + { + "epoch": 0.52, + "grad_norm": 0.9304192066192627, + "learning_rate": 4.856380641005215e-06, + "loss": 0.5808, + "step": 8269 + }, + { + "epoch": 0.52, + "grad_norm": 0.8733910918235779, + "learning_rate": 4.855355073695223e-06, + "loss": 0.6571, + "step": 8270 + }, + { + "epoch": 0.52, + "grad_norm": 0.944700300693512, + "learning_rate": 4.8543295124757265e-06, + "loss": 0.5915, + "step": 8271 + }, + { + "epoch": 0.52, + "grad_norm": 0.9210183024406433, + "learning_rate": 4.8533039573899075e-06, + "loss": 0.6014, + "step": 8272 + }, + { + "epoch": 0.52, + "grad_norm": 0.8870010375976562, + "learning_rate": 4.852278408480946e-06, + "loss": 0.5976, + "step": 8273 + }, + { + "epoch": 0.52, + "grad_norm": 1.0010098218917847, + "learning_rate": 4.8512528657920275e-06, + "loss": 0.5804, + "step": 8274 + }, + { + "epoch": 0.52, + "grad_norm": 0.9052338600158691, + "learning_rate": 4.850227329366335e-06, + "loss": 0.6216, + "step": 8275 + }, + { + "epoch": 0.52, + "grad_norm": 0.8478895425796509, + "learning_rate": 4.849201799247049e-06, + "loss": 0.5468, + "step": 8276 + }, + { + "epoch": 0.52, + "grad_norm": 0.8541980981826782, + "learning_rate": 4.848176275477348e-06, + "loss": 0.5529, + "step": 8277 + }, + { + "epoch": 0.52, + "grad_norm": 0.881534218788147, + "learning_rate": 4.847150758100418e-06, + "loss": 0.581, + "step": 8278 + }, + { + "epoch": 0.52, + "grad_norm": 0.8824727535247803, + "learning_rate": 4.846125247159437e-06, + "loss": 0.5844, + "step": 8279 + }, + { + "epoch": 0.52, + "grad_norm": 0.861589252948761, + "learning_rate": 4.845099742697588e-06, + "loss": 0.5607, + "step": 8280 + }, + { + "epoch": 0.52, + "grad_norm": 0.8586124777793884, + "learning_rate": 4.844074244758047e-06, + "loss": 0.5151, + "step": 8281 + }, + { + "epoch": 0.52, + "grad_norm": 0.9040012955665588, + "learning_rate": 4.843048753383998e-06, + "loss": 0.586, + "step": 8282 + }, + { + "epoch": 0.52, + "grad_norm": 0.8967165350914001, + "learning_rate": 4.8420232686186226e-06, + "loss": 0.5654, + "step": 8283 + }, + { + "epoch": 0.52, + "grad_norm": 0.8572660684585571, + "learning_rate": 4.840997790505097e-06, + "loss": 0.5538, + "step": 8284 + }, + { + "epoch": 0.52, + "grad_norm": 0.859514594078064, + "learning_rate": 4.8399723190866e-06, + "loss": 0.5347, + "step": 8285 + }, + { + "epoch": 0.52, + "grad_norm": 0.8236177563667297, + "learning_rate": 4.838946854406311e-06, + "loss": 0.5735, + "step": 8286 + }, + { + "epoch": 0.53, + "grad_norm": 0.8584608435630798, + "learning_rate": 4.8379213965074125e-06, + "loss": 0.5974, + "step": 8287 + }, + { + "epoch": 0.53, + "grad_norm": 0.8580573797225952, + "learning_rate": 4.83689594543308e-06, + "loss": 0.5857, + "step": 8288 + }, + { + "epoch": 0.53, + "grad_norm": 0.898115873336792, + "learning_rate": 4.835870501226489e-06, + "loss": 0.6063, + "step": 8289 + }, + { + "epoch": 0.53, + "grad_norm": 0.8824769258499146, + "learning_rate": 4.834845063930821e-06, + "loss": 0.5794, + "step": 8290 + }, + { + "epoch": 0.53, + "grad_norm": 0.7949787378311157, + "learning_rate": 4.833819633589254e-06, + "loss": 0.5864, + "step": 8291 + }, + { + "epoch": 0.53, + "grad_norm": 0.8064171671867371, + "learning_rate": 4.832794210244965e-06, + "loss": 0.5185, + "step": 8292 + }, + { + "epoch": 0.53, + "grad_norm": 0.9789409041404724, + "learning_rate": 4.831768793941129e-06, + "loss": 0.6399, + "step": 8293 + }, + { + "epoch": 0.53, + "grad_norm": 0.8709642887115479, + "learning_rate": 4.830743384720922e-06, + "loss": 0.5817, + "step": 8294 + }, + { + "epoch": 0.53, + "grad_norm": 0.9149221181869507, + "learning_rate": 4.829717982627525e-06, + "loss": 0.5949, + "step": 8295 + }, + { + "epoch": 0.53, + "grad_norm": 0.8690757751464844, + "learning_rate": 4.82869258770411e-06, + "loss": 0.6369, + "step": 8296 + }, + { + "epoch": 0.53, + "grad_norm": 0.8303024172782898, + "learning_rate": 4.827667199993855e-06, + "loss": 0.5615, + "step": 8297 + }, + { + "epoch": 0.53, + "grad_norm": 0.8637316226959229, + "learning_rate": 4.826641819539933e-06, + "loss": 0.557, + "step": 8298 + }, + { + "epoch": 0.53, + "grad_norm": 0.8349990844726562, + "learning_rate": 4.825616446385523e-06, + "loss": 0.5814, + "step": 8299 + }, + { + "epoch": 0.53, + "grad_norm": 0.8609099388122559, + "learning_rate": 4.824591080573797e-06, + "loss": 0.5872, + "step": 8300 + }, + { + "epoch": 0.53, + "grad_norm": 0.92775958776474, + "learning_rate": 4.823565722147932e-06, + "loss": 0.6211, + "step": 8301 + }, + { + "epoch": 0.53, + "grad_norm": 0.8916222453117371, + "learning_rate": 4.8225403711511e-06, + "loss": 0.5705, + "step": 8302 + }, + { + "epoch": 0.53, + "grad_norm": 0.8630041480064392, + "learning_rate": 4.821515027626473e-06, + "loss": 0.5799, + "step": 8303 + }, + { + "epoch": 0.53, + "grad_norm": 0.8404906988143921, + "learning_rate": 4.8204896916172285e-06, + "loss": 0.5419, + "step": 8304 + }, + { + "epoch": 0.53, + "grad_norm": 0.8835939168930054, + "learning_rate": 4.819464363166539e-06, + "loss": 0.5335, + "step": 8305 + }, + { + "epoch": 0.53, + "grad_norm": 0.9106584191322327, + "learning_rate": 4.818439042317578e-06, + "loss": 0.5901, + "step": 8306 + }, + { + "epoch": 0.53, + "grad_norm": 0.8627772331237793, + "learning_rate": 4.817413729113516e-06, + "loss": 0.5799, + "step": 8307 + }, + { + "epoch": 0.53, + "grad_norm": 0.9338002800941467, + "learning_rate": 4.816388423597527e-06, + "loss": 0.5736, + "step": 8308 + }, + { + "epoch": 0.53, + "grad_norm": 0.9331300258636475, + "learning_rate": 4.815363125812784e-06, + "loss": 0.6421, + "step": 8309 + }, + { + "epoch": 0.53, + "grad_norm": 0.8660625219345093, + "learning_rate": 4.814337835802457e-06, + "loss": 0.614, + "step": 8310 + }, + { + "epoch": 0.53, + "grad_norm": 0.8572609424591064, + "learning_rate": 4.813312553609716e-06, + "loss": 0.5237, + "step": 8311 + }, + { + "epoch": 0.53, + "grad_norm": 0.8259177207946777, + "learning_rate": 4.812287279277735e-06, + "loss": 0.5701, + "step": 8312 + }, + { + "epoch": 0.53, + "grad_norm": 0.853283703327179, + "learning_rate": 4.811262012849685e-06, + "loss": 0.5947, + "step": 8313 + }, + { + "epoch": 0.53, + "grad_norm": 0.885016143321991, + "learning_rate": 4.810236754368735e-06, + "loss": 0.6032, + "step": 8314 + }, + { + "epoch": 0.53, + "grad_norm": 0.8650339841842651, + "learning_rate": 4.8092115038780525e-06, + "loss": 0.6111, + "step": 8315 + }, + { + "epoch": 0.53, + "grad_norm": 0.8747149109840393, + "learning_rate": 4.808186261420811e-06, + "loss": 0.5894, + "step": 8316 + }, + { + "epoch": 0.53, + "grad_norm": 0.8412078619003296, + "learning_rate": 4.80716102704018e-06, + "loss": 0.581, + "step": 8317 + }, + { + "epoch": 0.53, + "grad_norm": 0.917317271232605, + "learning_rate": 4.806135800779328e-06, + "loss": 0.5797, + "step": 8318 + }, + { + "epoch": 0.53, + "grad_norm": 0.8281989693641663, + "learning_rate": 4.805110582681421e-06, + "loss": 0.5697, + "step": 8319 + }, + { + "epoch": 0.53, + "grad_norm": 0.9350634217262268, + "learning_rate": 4.804085372789629e-06, + "loss": 0.6051, + "step": 8320 + }, + { + "epoch": 0.53, + "grad_norm": 0.9457853436470032, + "learning_rate": 4.803060171147122e-06, + "loss": 0.6187, + "step": 8321 + }, + { + "epoch": 0.53, + "grad_norm": 0.9334213733673096, + "learning_rate": 4.802034977797066e-06, + "loss": 0.6349, + "step": 8322 + }, + { + "epoch": 0.53, + "grad_norm": 0.8959923982620239, + "learning_rate": 4.801009792782627e-06, + "loss": 0.5949, + "step": 8323 + }, + { + "epoch": 0.53, + "grad_norm": 0.8187436461448669, + "learning_rate": 4.799984616146974e-06, + "loss": 0.5693, + "step": 8324 + }, + { + "epoch": 0.53, + "grad_norm": 0.896421492099762, + "learning_rate": 4.798959447933274e-06, + "loss": 0.6583, + "step": 8325 + }, + { + "epoch": 0.53, + "grad_norm": 0.9043596386909485, + "learning_rate": 4.797934288184692e-06, + "loss": 0.5758, + "step": 8326 + }, + { + "epoch": 0.53, + "grad_norm": 0.959918200969696, + "learning_rate": 4.796909136944394e-06, + "loss": 0.6453, + "step": 8327 + }, + { + "epoch": 0.53, + "grad_norm": 0.855787992477417, + "learning_rate": 4.795883994255544e-06, + "loss": 0.5633, + "step": 8328 + }, + { + "epoch": 0.53, + "grad_norm": 0.9476739764213562, + "learning_rate": 4.794858860161311e-06, + "loss": 0.674, + "step": 8329 + }, + { + "epoch": 0.53, + "grad_norm": 0.8766798973083496, + "learning_rate": 4.793833734704858e-06, + "loss": 0.6058, + "step": 8330 + }, + { + "epoch": 0.53, + "grad_norm": 0.8943171501159668, + "learning_rate": 4.792808617929348e-06, + "loss": 0.59, + "step": 8331 + }, + { + "epoch": 0.53, + "grad_norm": 0.8322863578796387, + "learning_rate": 4.791783509877948e-06, + "loss": 0.5921, + "step": 8332 + }, + { + "epoch": 0.53, + "grad_norm": 0.9394057393074036, + "learning_rate": 4.790758410593818e-06, + "loss": 0.6143, + "step": 8333 + }, + { + "epoch": 0.53, + "grad_norm": 0.9464383721351624, + "learning_rate": 4.789733320120124e-06, + "loss": 0.5695, + "step": 8334 + }, + { + "epoch": 0.53, + "grad_norm": 0.8929427266120911, + "learning_rate": 4.788708238500029e-06, + "loss": 0.5768, + "step": 8335 + }, + { + "epoch": 0.53, + "grad_norm": 0.8872730731964111, + "learning_rate": 4.787683165776695e-06, + "loss": 0.5809, + "step": 8336 + }, + { + "epoch": 0.53, + "grad_norm": 0.8962015509605408, + "learning_rate": 4.786658101993283e-06, + "loss": 0.6007, + "step": 8337 + }, + { + "epoch": 0.53, + "grad_norm": 0.8641744256019592, + "learning_rate": 4.785633047192959e-06, + "loss": 0.5726, + "step": 8338 + }, + { + "epoch": 0.53, + "grad_norm": 0.9444864988327026, + "learning_rate": 4.7846080014188786e-06, + "loss": 0.6105, + "step": 8339 + }, + { + "epoch": 0.53, + "grad_norm": 0.8568362593650818, + "learning_rate": 4.783582964714209e-06, + "loss": 0.6058, + "step": 8340 + }, + { + "epoch": 0.53, + "grad_norm": 0.8523517847061157, + "learning_rate": 4.782557937122104e-06, + "loss": 0.5627, + "step": 8341 + }, + { + "epoch": 0.53, + "grad_norm": 0.9169915914535522, + "learning_rate": 4.781532918685731e-06, + "loss": 0.556, + "step": 8342 + }, + { + "epoch": 0.53, + "grad_norm": 0.9116235375404358, + "learning_rate": 4.780507909448246e-06, + "loss": 0.6041, + "step": 8343 + }, + { + "epoch": 0.53, + "grad_norm": 0.9121682047843933, + "learning_rate": 4.77948290945281e-06, + "loss": 0.5696, + "step": 8344 + }, + { + "epoch": 0.53, + "grad_norm": 0.9193983674049377, + "learning_rate": 4.778457918742579e-06, + "loss": 0.5995, + "step": 8345 + }, + { + "epoch": 0.53, + "grad_norm": 0.8698511123657227, + "learning_rate": 4.777432937360716e-06, + "loss": 0.6134, + "step": 8346 + }, + { + "epoch": 0.53, + "grad_norm": 0.9621423482894897, + "learning_rate": 4.776407965350378e-06, + "loss": 0.5889, + "step": 8347 + }, + { + "epoch": 0.53, + "grad_norm": 0.9202246069908142, + "learning_rate": 4.775383002754723e-06, + "loss": 0.6282, + "step": 8348 + }, + { + "epoch": 0.53, + "grad_norm": 0.8784829378128052, + "learning_rate": 4.7743580496169095e-06, + "loss": 0.6325, + "step": 8349 + }, + { + "epoch": 0.53, + "grad_norm": 0.8858938813209534, + "learning_rate": 4.773333105980091e-06, + "loss": 0.5691, + "step": 8350 + }, + { + "epoch": 0.53, + "grad_norm": 0.8641536831855774, + "learning_rate": 4.772308171887427e-06, + "loss": 0.5179, + "step": 8351 + }, + { + "epoch": 0.53, + "grad_norm": 0.9512357115745544, + "learning_rate": 4.771283247382076e-06, + "loss": 0.6028, + "step": 8352 + }, + { + "epoch": 0.53, + "grad_norm": 0.852192223072052, + "learning_rate": 4.770258332507191e-06, + "loss": 0.5482, + "step": 8353 + }, + { + "epoch": 0.53, + "grad_norm": 0.8949208855628967, + "learning_rate": 4.7692334273059265e-06, + "loss": 0.6007, + "step": 8354 + }, + { + "epoch": 0.53, + "grad_norm": 0.9022393822669983, + "learning_rate": 4.768208531821441e-06, + "loss": 0.5518, + "step": 8355 + }, + { + "epoch": 0.53, + "grad_norm": 0.8701500296592712, + "learning_rate": 4.767183646096889e-06, + "loss": 0.5991, + "step": 8356 + }, + { + "epoch": 0.53, + "grad_norm": 0.8898680806159973, + "learning_rate": 4.766158770175422e-06, + "loss": 0.6007, + "step": 8357 + }, + { + "epoch": 0.53, + "grad_norm": 0.8867197036743164, + "learning_rate": 4.765133904100196e-06, + "loss": 0.5388, + "step": 8358 + }, + { + "epoch": 0.53, + "grad_norm": 0.9473680257797241, + "learning_rate": 4.764109047914365e-06, + "loss": 0.582, + "step": 8359 + }, + { + "epoch": 0.53, + "grad_norm": 0.9777132272720337, + "learning_rate": 4.763084201661081e-06, + "loss": 0.5981, + "step": 8360 + }, + { + "epoch": 0.53, + "grad_norm": 0.9255326390266418, + "learning_rate": 4.762059365383497e-06, + "loss": 0.6236, + "step": 8361 + }, + { + "epoch": 0.53, + "grad_norm": 0.834649920463562, + "learning_rate": 4.761034539124765e-06, + "loss": 0.5596, + "step": 8362 + }, + { + "epoch": 0.53, + "grad_norm": 0.970477819442749, + "learning_rate": 4.760009722928038e-06, + "loss": 0.6285, + "step": 8363 + }, + { + "epoch": 0.53, + "grad_norm": 0.9015950560569763, + "learning_rate": 4.7589849168364675e-06, + "loss": 0.5778, + "step": 8364 + }, + { + "epoch": 0.53, + "grad_norm": 0.9307251572608948, + "learning_rate": 4.7579601208932015e-06, + "loss": 0.6193, + "step": 8365 + }, + { + "epoch": 0.53, + "grad_norm": 0.8972157835960388, + "learning_rate": 4.756935335141395e-06, + "loss": 0.5971, + "step": 8366 + }, + { + "epoch": 0.53, + "grad_norm": 0.8648176193237305, + "learning_rate": 4.755910559624194e-06, + "loss": 0.5711, + "step": 8367 + }, + { + "epoch": 0.53, + "grad_norm": 0.8773466348648071, + "learning_rate": 4.754885794384752e-06, + "loss": 0.5989, + "step": 8368 + }, + { + "epoch": 0.53, + "grad_norm": 0.9213095903396606, + "learning_rate": 4.7538610394662156e-06, + "loss": 0.5402, + "step": 8369 + }, + { + "epoch": 0.53, + "grad_norm": 0.9090799689292908, + "learning_rate": 4.7528362949117355e-06, + "loss": 0.5578, + "step": 8370 + }, + { + "epoch": 0.53, + "grad_norm": 0.8713887929916382, + "learning_rate": 4.751811560764457e-06, + "loss": 0.5654, + "step": 8371 + }, + { + "epoch": 0.53, + "grad_norm": 0.8781121373176575, + "learning_rate": 4.750786837067532e-06, + "loss": 0.5971, + "step": 8372 + }, + { + "epoch": 0.53, + "grad_norm": 0.955092191696167, + "learning_rate": 4.7497621238641055e-06, + "loss": 0.612, + "step": 8373 + }, + { + "epoch": 0.53, + "grad_norm": 0.9178210496902466, + "learning_rate": 4.7487374211973266e-06, + "loss": 0.6232, + "step": 8374 + }, + { + "epoch": 0.53, + "grad_norm": 0.8522612452507019, + "learning_rate": 4.747712729110339e-06, + "loss": 0.5371, + "step": 8375 + }, + { + "epoch": 0.53, + "grad_norm": 0.8494625687599182, + "learning_rate": 4.746688047646293e-06, + "loss": 0.5617, + "step": 8376 + }, + { + "epoch": 0.53, + "grad_norm": 0.8461270332336426, + "learning_rate": 4.745663376848331e-06, + "loss": 0.5576, + "step": 8377 + }, + { + "epoch": 0.53, + "grad_norm": 0.9114232659339905, + "learning_rate": 4.744638716759599e-06, + "loss": 0.6225, + "step": 8378 + }, + { + "epoch": 0.53, + "grad_norm": 0.8230855464935303, + "learning_rate": 4.743614067423245e-06, + "loss": 0.6225, + "step": 8379 + }, + { + "epoch": 0.53, + "grad_norm": 0.8851600885391235, + "learning_rate": 4.742589428882406e-06, + "loss": 0.5544, + "step": 8380 + }, + { + "epoch": 0.53, + "grad_norm": 0.8462688326835632, + "learning_rate": 4.7415648011802335e-06, + "loss": 0.6182, + "step": 8381 + }, + { + "epoch": 0.53, + "grad_norm": 0.8912700414657593, + "learning_rate": 4.7405401843598686e-06, + "loss": 0.5913, + "step": 8382 + }, + { + "epoch": 0.53, + "grad_norm": 0.8330943584442139, + "learning_rate": 4.739515578464454e-06, + "loss": 0.5526, + "step": 8383 + }, + { + "epoch": 0.53, + "grad_norm": 0.9202174544334412, + "learning_rate": 4.73849098353713e-06, + "loss": 0.6407, + "step": 8384 + }, + { + "epoch": 0.53, + "grad_norm": 0.8993576169013977, + "learning_rate": 4.737466399621043e-06, + "loss": 0.5996, + "step": 8385 + }, + { + "epoch": 0.53, + "grad_norm": 0.9567261934280396, + "learning_rate": 4.736441826759332e-06, + "loss": 0.5523, + "step": 8386 + }, + { + "epoch": 0.53, + "grad_norm": 0.8996643424034119, + "learning_rate": 4.73541726499514e-06, + "loss": 0.5853, + "step": 8387 + }, + { + "epoch": 0.53, + "grad_norm": 0.8818598389625549, + "learning_rate": 4.734392714371603e-06, + "loss": 0.6365, + "step": 8388 + }, + { + "epoch": 0.53, + "grad_norm": 0.8730207681655884, + "learning_rate": 4.733368174931867e-06, + "loss": 0.5728, + "step": 8389 + }, + { + "epoch": 0.53, + "grad_norm": 0.870194673538208, + "learning_rate": 4.7323436467190705e-06, + "loss": 0.549, + "step": 8390 + }, + { + "epoch": 0.53, + "grad_norm": 0.9448916912078857, + "learning_rate": 4.7313191297763524e-06, + "loss": 0.5897, + "step": 8391 + }, + { + "epoch": 0.53, + "grad_norm": 0.859308123588562, + "learning_rate": 4.730294624146849e-06, + "loss": 0.5922, + "step": 8392 + }, + { + "epoch": 0.53, + "grad_norm": 0.9392966628074646, + "learning_rate": 4.729270129873701e-06, + "loss": 0.5768, + "step": 8393 + }, + { + "epoch": 0.53, + "grad_norm": 0.919874370098114, + "learning_rate": 4.728245647000047e-06, + "loss": 0.5809, + "step": 8394 + }, + { + "epoch": 0.53, + "grad_norm": 0.8396472930908203, + "learning_rate": 4.7272211755690245e-06, + "loss": 0.6086, + "step": 8395 + }, + { + "epoch": 0.53, + "grad_norm": 0.8304100632667542, + "learning_rate": 4.7261967156237676e-06, + "loss": 0.5668, + "step": 8396 + }, + { + "epoch": 0.53, + "grad_norm": 0.856200098991394, + "learning_rate": 4.725172267207413e-06, + "loss": 0.5228, + "step": 8397 + }, + { + "epoch": 0.53, + "grad_norm": 0.8551792502403259, + "learning_rate": 4.724147830363101e-06, + "loss": 0.5902, + "step": 8398 + }, + { + "epoch": 0.53, + "grad_norm": 0.8715303540229797, + "learning_rate": 4.723123405133965e-06, + "loss": 0.6097, + "step": 8399 + }, + { + "epoch": 0.53, + "grad_norm": 0.8818318843841553, + "learning_rate": 4.722098991563137e-06, + "loss": 0.584, + "step": 8400 + }, + { + "epoch": 0.53, + "grad_norm": 0.8724188804626465, + "learning_rate": 4.721074589693753e-06, + "loss": 0.5802, + "step": 8401 + }, + { + "epoch": 0.53, + "grad_norm": 0.8455575108528137, + "learning_rate": 4.72005019956895e-06, + "loss": 0.5751, + "step": 8402 + }, + { + "epoch": 0.53, + "grad_norm": 0.8873419165611267, + "learning_rate": 4.719025821231859e-06, + "loss": 0.5904, + "step": 8403 + }, + { + "epoch": 0.53, + "grad_norm": 0.9218294620513916, + "learning_rate": 4.718001454725612e-06, + "loss": 0.5189, + "step": 8404 + }, + { + "epoch": 0.53, + "grad_norm": 0.9355472326278687, + "learning_rate": 4.716977100093342e-06, + "loss": 0.6187, + "step": 8405 + }, + { + "epoch": 0.53, + "grad_norm": 0.881987988948822, + "learning_rate": 4.715952757378183e-06, + "loss": 0.5762, + "step": 8406 + }, + { + "epoch": 0.53, + "grad_norm": 0.9212351441383362, + "learning_rate": 4.714928426623266e-06, + "loss": 0.5961, + "step": 8407 + }, + { + "epoch": 0.53, + "grad_norm": 0.890076220035553, + "learning_rate": 4.71390410787172e-06, + "loss": 0.6016, + "step": 8408 + }, + { + "epoch": 0.53, + "grad_norm": 0.9456012845039368, + "learning_rate": 4.712879801166676e-06, + "loss": 0.5956, + "step": 8409 + }, + { + "epoch": 0.53, + "grad_norm": 0.9153468012809753, + "learning_rate": 4.711855506551267e-06, + "loss": 0.6155, + "step": 8410 + }, + { + "epoch": 0.53, + "grad_norm": 0.9316279292106628, + "learning_rate": 4.71083122406862e-06, + "loss": 0.5859, + "step": 8411 + }, + { + "epoch": 0.53, + "grad_norm": 0.879622220993042, + "learning_rate": 4.709806953761863e-06, + "loss": 0.6248, + "step": 8412 + }, + { + "epoch": 0.53, + "grad_norm": 0.8345575928688049, + "learning_rate": 4.7087826956741266e-06, + "loss": 0.6002, + "step": 8413 + }, + { + "epoch": 0.53, + "grad_norm": 0.8680174946784973, + "learning_rate": 4.707758449848536e-06, + "loss": 0.5105, + "step": 8414 + }, + { + "epoch": 0.53, + "grad_norm": 0.9455978870391846, + "learning_rate": 4.7067342163282225e-06, + "loss": 0.571, + "step": 8415 + }, + { + "epoch": 0.53, + "grad_norm": 0.828173816204071, + "learning_rate": 4.70570999515631e-06, + "loss": 0.5763, + "step": 8416 + }, + { + "epoch": 0.53, + "grad_norm": 0.9333354234695435, + "learning_rate": 4.704685786375927e-06, + "loss": 0.5924, + "step": 8417 + }, + { + "epoch": 0.53, + "grad_norm": 0.9066340923309326, + "learning_rate": 4.703661590030196e-06, + "loss": 0.5833, + "step": 8418 + }, + { + "epoch": 0.53, + "grad_norm": 0.8967267274856567, + "learning_rate": 4.702637406162247e-06, + "loss": 0.6445, + "step": 8419 + }, + { + "epoch": 0.53, + "grad_norm": 0.8336849808692932, + "learning_rate": 4.7016132348152e-06, + "loss": 0.5238, + "step": 8420 + }, + { + "epoch": 0.53, + "grad_norm": 1.0905916690826416, + "learning_rate": 4.700589076032184e-06, + "loss": 0.5929, + "step": 8421 + }, + { + "epoch": 0.53, + "grad_norm": 0.8906887173652649, + "learning_rate": 4.699564929856318e-06, + "loss": 0.6375, + "step": 8422 + }, + { + "epoch": 0.53, + "grad_norm": 0.8552356362342834, + "learning_rate": 4.698540796330729e-06, + "loss": 0.5987, + "step": 8423 + }, + { + "epoch": 0.53, + "grad_norm": 0.8900651931762695, + "learning_rate": 4.697516675498538e-06, + "loss": 0.5935, + "step": 8424 + }, + { + "epoch": 0.53, + "grad_norm": 0.9135156869888306, + "learning_rate": 4.69649256740287e-06, + "loss": 0.5729, + "step": 8425 + }, + { + "epoch": 0.53, + "grad_norm": 0.9399777054786682, + "learning_rate": 4.695468472086841e-06, + "loss": 0.6642, + "step": 8426 + }, + { + "epoch": 0.53, + "grad_norm": 0.9039340019226074, + "learning_rate": 4.694444389593576e-06, + "loss": 0.5794, + "step": 8427 + }, + { + "epoch": 0.53, + "grad_norm": 0.8976691961288452, + "learning_rate": 4.693420319966195e-06, + "loss": 0.6221, + "step": 8428 + }, + { + "epoch": 0.53, + "grad_norm": 0.8583334684371948, + "learning_rate": 4.692396263247818e-06, + "loss": 0.6189, + "step": 8429 + }, + { + "epoch": 0.53, + "grad_norm": 0.8833329677581787, + "learning_rate": 4.691372219481564e-06, + "loss": 0.5687, + "step": 8430 + }, + { + "epoch": 0.53, + "grad_norm": 0.8784264326095581, + "learning_rate": 4.690348188710552e-06, + "loss": 0.6358, + "step": 8431 + }, + { + "epoch": 0.53, + "grad_norm": 0.8706404566764832, + "learning_rate": 4.689324170977901e-06, + "loss": 0.5894, + "step": 8432 + }, + { + "epoch": 0.53, + "grad_norm": 0.82457035779953, + "learning_rate": 4.688300166326729e-06, + "loss": 0.5753, + "step": 8433 + }, + { + "epoch": 0.53, + "grad_norm": 0.8955191969871521, + "learning_rate": 4.6872761748001515e-06, + "loss": 0.5895, + "step": 8434 + }, + { + "epoch": 0.53, + "grad_norm": 0.8949235081672668, + "learning_rate": 4.6862521964412865e-06, + "loss": 0.6284, + "step": 8435 + }, + { + "epoch": 0.53, + "grad_norm": 0.9302405714988708, + "learning_rate": 4.6852282312932505e-06, + "loss": 0.6205, + "step": 8436 + }, + { + "epoch": 0.53, + "grad_norm": 0.9242597818374634, + "learning_rate": 4.684204279399159e-06, + "loss": 0.6397, + "step": 8437 + }, + { + "epoch": 0.53, + "grad_norm": 0.907974898815155, + "learning_rate": 4.683180340802126e-06, + "loss": 0.6082, + "step": 8438 + }, + { + "epoch": 0.53, + "grad_norm": 0.8790863752365112, + "learning_rate": 4.682156415545266e-06, + "loss": 0.5588, + "step": 8439 + }, + { + "epoch": 0.53, + "grad_norm": 0.8933101892471313, + "learning_rate": 4.681132503671696e-06, + "loss": 0.5786, + "step": 8440 + }, + { + "epoch": 0.53, + "grad_norm": 0.9507586359977722, + "learning_rate": 4.680108605224526e-06, + "loss": 0.5959, + "step": 8441 + }, + { + "epoch": 0.53, + "grad_norm": 0.8982723355293274, + "learning_rate": 4.679084720246869e-06, + "loss": 0.587, + "step": 8442 + }, + { + "epoch": 0.53, + "grad_norm": 0.8750470280647278, + "learning_rate": 4.67806084878184e-06, + "loss": 0.5799, + "step": 8443 + }, + { + "epoch": 0.53, + "grad_norm": 0.859678328037262, + "learning_rate": 4.677036990872546e-06, + "loss": 0.565, + "step": 8444 + }, + { + "epoch": 0.54, + "grad_norm": 0.8528252243995667, + "learning_rate": 4.676013146562103e-06, + "loss": 0.5786, + "step": 8445 + }, + { + "epoch": 0.54, + "grad_norm": 1.003036379814148, + "learning_rate": 4.674989315893618e-06, + "loss": 0.6294, + "step": 8446 + }, + { + "epoch": 0.54, + "grad_norm": 0.849640429019928, + "learning_rate": 4.6739654989102034e-06, + "loss": 0.5834, + "step": 8447 + }, + { + "epoch": 0.54, + "grad_norm": 0.9035535454750061, + "learning_rate": 4.672941695654965e-06, + "loss": 0.5989, + "step": 8448 + }, + { + "epoch": 0.54, + "grad_norm": 0.8059716820716858, + "learning_rate": 4.6719179061710164e-06, + "loss": 0.5307, + "step": 8449 + }, + { + "epoch": 0.54, + "grad_norm": 0.8504626154899597, + "learning_rate": 4.670894130501462e-06, + "loss": 0.5043, + "step": 8450 + }, + { + "epoch": 0.54, + "grad_norm": 0.9729040861129761, + "learning_rate": 4.669870368689414e-06, + "loss": 0.5991, + "step": 8451 + }, + { + "epoch": 0.54, + "grad_norm": 0.9525192975997925, + "learning_rate": 4.668846620777972e-06, + "loss": 0.6014, + "step": 8452 + }, + { + "epoch": 0.54, + "grad_norm": 0.8673512935638428, + "learning_rate": 4.6678228868102495e-06, + "loss": 0.6118, + "step": 8453 + }, + { + "epoch": 0.54, + "grad_norm": 0.8499407172203064, + "learning_rate": 4.666799166829349e-06, + "loss": 0.5924, + "step": 8454 + }, + { + "epoch": 0.54, + "grad_norm": 0.930546224117279, + "learning_rate": 4.665775460878377e-06, + "loss": 0.5947, + "step": 8455 + }, + { + "epoch": 0.54, + "grad_norm": 0.8475348949432373, + "learning_rate": 4.664751769000436e-06, + "loss": 0.5222, + "step": 8456 + }, + { + "epoch": 0.54, + "grad_norm": 0.9042877554893494, + "learning_rate": 4.663728091238634e-06, + "loss": 0.58, + "step": 8457 + }, + { + "epoch": 0.54, + "grad_norm": 0.8196518421173096, + "learning_rate": 4.662704427636071e-06, + "loss": 0.5491, + "step": 8458 + }, + { + "epoch": 0.54, + "grad_norm": 0.9162909984588623, + "learning_rate": 4.661680778235852e-06, + "loss": 0.5964, + "step": 8459 + }, + { + "epoch": 0.54, + "grad_norm": 0.907317042350769, + "learning_rate": 4.660657143081079e-06, + "loss": 0.6112, + "step": 8460 + }, + { + "epoch": 0.54, + "grad_norm": 0.9425126314163208, + "learning_rate": 4.65963352221485e-06, + "loss": 0.591, + "step": 8461 + }, + { + "epoch": 0.54, + "grad_norm": 0.8683360815048218, + "learning_rate": 4.658609915680272e-06, + "loss": 0.5176, + "step": 8462 + }, + { + "epoch": 0.54, + "grad_norm": 0.8283640742301941, + "learning_rate": 4.657586323520443e-06, + "loss": 0.5724, + "step": 8463 + }, + { + "epoch": 0.54, + "grad_norm": 0.873866617679596, + "learning_rate": 4.6565627457784625e-06, + "loss": 0.5928, + "step": 8464 + }, + { + "epoch": 0.54, + "grad_norm": 0.8793148398399353, + "learning_rate": 4.655539182497428e-06, + "loss": 0.5796, + "step": 8465 + }, + { + "epoch": 0.54, + "grad_norm": 0.8088488578796387, + "learning_rate": 4.654515633720442e-06, + "loss": 0.4934, + "step": 8466 + }, + { + "epoch": 0.54, + "grad_norm": 0.8243443369865417, + "learning_rate": 4.653492099490601e-06, + "loss": 0.5183, + "step": 8467 + }, + { + "epoch": 0.54, + "grad_norm": 0.9187846779823303, + "learning_rate": 4.6524685798510025e-06, + "loss": 0.6225, + "step": 8468 + }, + { + "epoch": 0.54, + "grad_norm": 0.8890441060066223, + "learning_rate": 4.651445074844742e-06, + "loss": 0.5769, + "step": 8469 + }, + { + "epoch": 0.54, + "grad_norm": 0.9298631548881531, + "learning_rate": 4.650421584514917e-06, + "loss": 0.5943, + "step": 8470 + }, + { + "epoch": 0.54, + "grad_norm": 0.8094522356987, + "learning_rate": 4.649398108904624e-06, + "loss": 0.5371, + "step": 8471 + }, + { + "epoch": 0.54, + "grad_norm": 0.8985278606414795, + "learning_rate": 4.648374648056957e-06, + "loss": 0.5962, + "step": 8472 + }, + { + "epoch": 0.54, + "grad_norm": 0.870664656162262, + "learning_rate": 4.64735120201501e-06, + "loss": 0.5929, + "step": 8473 + }, + { + "epoch": 0.54, + "grad_norm": 0.8985655307769775, + "learning_rate": 4.646327770821875e-06, + "loss": 0.6354, + "step": 8474 + }, + { + "epoch": 0.54, + "grad_norm": 0.8748487830162048, + "learning_rate": 4.64530435452065e-06, + "loss": 0.5795, + "step": 8475 + }, + { + "epoch": 0.54, + "grad_norm": 0.8997553586959839, + "learning_rate": 4.644280953154424e-06, + "loss": 0.6348, + "step": 8476 + }, + { + "epoch": 0.54, + "grad_norm": 0.9536176323890686, + "learning_rate": 4.643257566766289e-06, + "loss": 0.5866, + "step": 8477 + }, + { + "epoch": 0.54, + "grad_norm": 0.8656853437423706, + "learning_rate": 4.642234195399336e-06, + "loss": 0.5839, + "step": 8478 + }, + { + "epoch": 0.54, + "grad_norm": 0.8885663151741028, + "learning_rate": 4.641210839096659e-06, + "loss": 0.5734, + "step": 8479 + }, + { + "epoch": 0.54, + "grad_norm": 0.9137561917304993, + "learning_rate": 4.6401874979013455e-06, + "loss": 0.5855, + "step": 8480 + }, + { + "epoch": 0.54, + "grad_norm": 0.8827475905418396, + "learning_rate": 4.639164171856483e-06, + "loss": 0.6211, + "step": 8481 + }, + { + "epoch": 0.54, + "grad_norm": 0.9084077477455139, + "learning_rate": 4.6381408610051605e-06, + "loss": 0.5965, + "step": 8482 + }, + { + "epoch": 0.54, + "grad_norm": 0.9235100746154785, + "learning_rate": 4.63711756539047e-06, + "loss": 0.6093, + "step": 8483 + }, + { + "epoch": 0.54, + "grad_norm": 0.8328654170036316, + "learning_rate": 4.636094285055497e-06, + "loss": 0.5547, + "step": 8484 + }, + { + "epoch": 0.54, + "grad_norm": 0.8300716280937195, + "learning_rate": 4.635071020043326e-06, + "loss": 0.534, + "step": 8485 + }, + { + "epoch": 0.54, + "grad_norm": 0.914543628692627, + "learning_rate": 4.634047770397044e-06, + "loss": 0.5686, + "step": 8486 + }, + { + "epoch": 0.54, + "grad_norm": 0.9101009964942932, + "learning_rate": 4.633024536159739e-06, + "loss": 0.5694, + "step": 8487 + }, + { + "epoch": 0.54, + "grad_norm": 0.8731689453125, + "learning_rate": 4.632001317374495e-06, + "loss": 0.5888, + "step": 8488 + }, + { + "epoch": 0.54, + "grad_norm": 0.8360764384269714, + "learning_rate": 4.630978114084394e-06, + "loss": 0.5559, + "step": 8489 + }, + { + "epoch": 0.54, + "grad_norm": 0.956150233745575, + "learning_rate": 4.629954926332522e-06, + "loss": 0.6186, + "step": 8490 + }, + { + "epoch": 0.54, + "grad_norm": 0.9069817066192627, + "learning_rate": 4.628931754161959e-06, + "loss": 0.5812, + "step": 8491 + }, + { + "epoch": 0.54, + "grad_norm": 0.8588123917579651, + "learning_rate": 4.62790859761579e-06, + "loss": 0.5806, + "step": 8492 + }, + { + "epoch": 0.54, + "grad_norm": 0.9087151288986206, + "learning_rate": 4.626885456737095e-06, + "loss": 0.6061, + "step": 8493 + }, + { + "epoch": 0.54, + "grad_norm": 0.9100707173347473, + "learning_rate": 4.625862331568957e-06, + "loss": 0.5807, + "step": 8494 + }, + { + "epoch": 0.54, + "grad_norm": 0.9260814785957336, + "learning_rate": 4.624839222154453e-06, + "loss": 0.5917, + "step": 8495 + }, + { + "epoch": 0.54, + "grad_norm": 0.9124268293380737, + "learning_rate": 4.623816128536665e-06, + "loss": 0.5771, + "step": 8496 + }, + { + "epoch": 0.54, + "grad_norm": 0.9149200320243835, + "learning_rate": 4.6227930507586705e-06, + "loss": 0.5622, + "step": 8497 + }, + { + "epoch": 0.54, + "grad_norm": 0.9365261197090149, + "learning_rate": 4.62176998886355e-06, + "loss": 0.6093, + "step": 8498 + }, + { + "epoch": 0.54, + "grad_norm": 0.8503932952880859, + "learning_rate": 4.620746942894377e-06, + "loss": 0.6117, + "step": 8499 + }, + { + "epoch": 0.54, + "grad_norm": 0.8979615569114685, + "learning_rate": 4.619723912894232e-06, + "loss": 0.5852, + "step": 8500 + }, + { + "epoch": 0.54, + "grad_norm": 0.8997284770011902, + "learning_rate": 4.618700898906191e-06, + "loss": 0.5506, + "step": 8501 + }, + { + "epoch": 0.54, + "grad_norm": 0.8395345211029053, + "learning_rate": 4.6176779009733295e-06, + "loss": 0.5371, + "step": 8502 + }, + { + "epoch": 0.54, + "grad_norm": 0.8006191253662109, + "learning_rate": 4.616654919138719e-06, + "loss": 0.5285, + "step": 8503 + }, + { + "epoch": 0.54, + "grad_norm": 0.9190979599952698, + "learning_rate": 4.6156319534454365e-06, + "loss": 0.6529, + "step": 8504 + }, + { + "epoch": 0.54, + "grad_norm": 0.875033438205719, + "learning_rate": 4.614609003936558e-06, + "loss": 0.5774, + "step": 8505 + }, + { + "epoch": 0.54, + "grad_norm": 0.8436771035194397, + "learning_rate": 4.613586070655152e-06, + "loss": 0.5751, + "step": 8506 + }, + { + "epoch": 0.54, + "grad_norm": 0.8874161243438721, + "learning_rate": 4.612563153644292e-06, + "loss": 0.581, + "step": 8507 + }, + { + "epoch": 0.54, + "grad_norm": 0.8197293281555176, + "learning_rate": 4.6115402529470495e-06, + "loss": 0.6048, + "step": 8508 + }, + { + "epoch": 0.54, + "grad_norm": 0.901355504989624, + "learning_rate": 4.610517368606497e-06, + "loss": 0.5648, + "step": 8509 + }, + { + "epoch": 0.54, + "grad_norm": 0.8736656308174133, + "learning_rate": 4.609494500665703e-06, + "loss": 0.5775, + "step": 8510 + }, + { + "epoch": 0.54, + "grad_norm": 0.9123381972312927, + "learning_rate": 4.608471649167737e-06, + "loss": 0.5824, + "step": 8511 + }, + { + "epoch": 0.54, + "grad_norm": 0.895682156085968, + "learning_rate": 4.6074488141556656e-06, + "loss": 0.6338, + "step": 8512 + }, + { + "epoch": 0.54, + "grad_norm": 0.8595967292785645, + "learning_rate": 4.606425995672562e-06, + "loss": 0.5794, + "step": 8513 + }, + { + "epoch": 0.54, + "grad_norm": 0.8449206948280334, + "learning_rate": 4.605403193761489e-06, + "loss": 0.5957, + "step": 8514 + }, + { + "epoch": 0.54, + "grad_norm": 0.8243349194526672, + "learning_rate": 4.604380408465516e-06, + "loss": 0.55, + "step": 8515 + }, + { + "epoch": 0.54, + "grad_norm": 0.8854864239692688, + "learning_rate": 4.603357639827705e-06, + "loss": 0.6184, + "step": 8516 + }, + { + "epoch": 0.54, + "grad_norm": 0.9017980098724365, + "learning_rate": 4.602334887891127e-06, + "loss": 0.6258, + "step": 8517 + }, + { + "epoch": 0.54, + "grad_norm": 0.9609394669532776, + "learning_rate": 4.601312152698843e-06, + "loss": 0.6221, + "step": 8518 + }, + { + "epoch": 0.54, + "grad_norm": 0.9184016585350037, + "learning_rate": 4.600289434293917e-06, + "loss": 0.5955, + "step": 8519 + }, + { + "epoch": 0.54, + "grad_norm": 0.9105634093284607, + "learning_rate": 4.599266732719413e-06, + "loss": 0.5936, + "step": 8520 + }, + { + "epoch": 0.54, + "grad_norm": 0.8601149320602417, + "learning_rate": 4.598244048018391e-06, + "loss": 0.5765, + "step": 8521 + }, + { + "epoch": 0.54, + "grad_norm": 0.8680559396743774, + "learning_rate": 4.5972213802339165e-06, + "loss": 0.6048, + "step": 8522 + }, + { + "epoch": 0.54, + "grad_norm": 0.8764021396636963, + "learning_rate": 4.596198729409047e-06, + "loss": 0.6259, + "step": 8523 + }, + { + "epoch": 0.54, + "grad_norm": 1.0313016176223755, + "learning_rate": 4.5951760955868455e-06, + "loss": 0.5857, + "step": 8524 + }, + { + "epoch": 0.54, + "grad_norm": 0.9298897385597229, + "learning_rate": 4.594153478810368e-06, + "loss": 0.603, + "step": 8525 + }, + { + "epoch": 0.54, + "grad_norm": 0.904453456401825, + "learning_rate": 4.593130879122678e-06, + "loss": 0.5608, + "step": 8526 + }, + { + "epoch": 0.54, + "grad_norm": 0.9432054162025452, + "learning_rate": 4.59210829656683e-06, + "loss": 0.5982, + "step": 8527 + }, + { + "epoch": 0.54, + "grad_norm": 0.8348836302757263, + "learning_rate": 4.591085731185885e-06, + "loss": 0.5458, + "step": 8528 + }, + { + "epoch": 0.54, + "grad_norm": 0.9127042293548584, + "learning_rate": 4.590063183022894e-06, + "loss": 0.5765, + "step": 8529 + }, + { + "epoch": 0.54, + "grad_norm": 0.8295519351959229, + "learning_rate": 4.589040652120919e-06, + "loss": 0.5914, + "step": 8530 + }, + { + "epoch": 0.54, + "grad_norm": 0.9889672994613647, + "learning_rate": 4.588018138523011e-06, + "loss": 0.5926, + "step": 8531 + }, + { + "epoch": 0.54, + "grad_norm": 0.8858618140220642, + "learning_rate": 4.5869956422722274e-06, + "loss": 0.5637, + "step": 8532 + }, + { + "epoch": 0.54, + "grad_norm": 0.8900063633918762, + "learning_rate": 4.585973163411618e-06, + "loss": 0.5885, + "step": 8533 + }, + { + "epoch": 0.54, + "grad_norm": 0.8373422026634216, + "learning_rate": 4.584950701984241e-06, + "loss": 0.56, + "step": 8534 + }, + { + "epoch": 0.54, + "grad_norm": 0.8420644998550415, + "learning_rate": 4.583928258033145e-06, + "loss": 0.5761, + "step": 8535 + }, + { + "epoch": 0.54, + "grad_norm": 0.8812116980552673, + "learning_rate": 4.5829058316013835e-06, + "loss": 0.5584, + "step": 8536 + }, + { + "epoch": 0.54, + "grad_norm": 0.967736542224884, + "learning_rate": 4.581883422732007e-06, + "loss": 0.615, + "step": 8537 + }, + { + "epoch": 0.54, + "grad_norm": 0.8752156496047974, + "learning_rate": 4.580861031468062e-06, + "loss": 0.5622, + "step": 8538 + }, + { + "epoch": 0.54, + "grad_norm": 0.845308780670166, + "learning_rate": 4.579838657852603e-06, + "loss": 0.5925, + "step": 8539 + }, + { + "epoch": 0.54, + "grad_norm": 0.8537322282791138, + "learning_rate": 4.578816301928677e-06, + "loss": 0.5804, + "step": 8540 + }, + { + "epoch": 0.54, + "grad_norm": 0.8931176066398621, + "learning_rate": 4.577793963739331e-06, + "loss": 0.5816, + "step": 8541 + }, + { + "epoch": 0.54, + "grad_norm": 0.855497419834137, + "learning_rate": 4.576771643327611e-06, + "loss": 0.5514, + "step": 8542 + }, + { + "epoch": 0.54, + "grad_norm": 0.8563072681427002, + "learning_rate": 4.575749340736565e-06, + "loss": 0.5706, + "step": 8543 + }, + { + "epoch": 0.54, + "grad_norm": 0.8625338673591614, + "learning_rate": 4.57472705600924e-06, + "loss": 0.5584, + "step": 8544 + }, + { + "epoch": 0.54, + "grad_norm": 0.9388693571090698, + "learning_rate": 4.573704789188679e-06, + "loss": 0.6424, + "step": 8545 + }, + { + "epoch": 0.54, + "grad_norm": 0.8577854633331299, + "learning_rate": 4.5726825403179245e-06, + "loss": 0.6327, + "step": 8546 + }, + { + "epoch": 0.54, + "grad_norm": 0.8097984194755554, + "learning_rate": 4.571660309440022e-06, + "loss": 0.5456, + "step": 8547 + }, + { + "epoch": 0.54, + "grad_norm": 0.9322377443313599, + "learning_rate": 4.570638096598016e-06, + "loss": 0.6238, + "step": 8548 + }, + { + "epoch": 0.54, + "grad_norm": 0.9196782112121582, + "learning_rate": 4.569615901834946e-06, + "loss": 0.578, + "step": 8549 + }, + { + "epoch": 0.54, + "grad_norm": 0.9435470700263977, + "learning_rate": 4.568593725193852e-06, + "loss": 0.5887, + "step": 8550 + }, + { + "epoch": 0.54, + "grad_norm": 0.8405277132987976, + "learning_rate": 4.567571566717774e-06, + "loss": 0.5792, + "step": 8551 + }, + { + "epoch": 0.54, + "grad_norm": 0.8100456595420837, + "learning_rate": 4.566549426449755e-06, + "loss": 0.5389, + "step": 8552 + }, + { + "epoch": 0.54, + "grad_norm": 0.8953537940979004, + "learning_rate": 4.565527304432833e-06, + "loss": 0.5842, + "step": 8553 + }, + { + "epoch": 0.54, + "grad_norm": 0.8631918430328369, + "learning_rate": 4.564505200710042e-06, + "loss": 0.5341, + "step": 8554 + }, + { + "epoch": 0.54, + "grad_norm": 0.8625524640083313, + "learning_rate": 4.5634831153244215e-06, + "loss": 0.5662, + "step": 8555 + }, + { + "epoch": 0.54, + "grad_norm": 0.8663583993911743, + "learning_rate": 4.562461048319011e-06, + "loss": 0.563, + "step": 8556 + }, + { + "epoch": 0.54, + "grad_norm": 0.9523765444755554, + "learning_rate": 4.561438999736844e-06, + "loss": 0.6671, + "step": 8557 + }, + { + "epoch": 0.54, + "grad_norm": 0.9273942708969116, + "learning_rate": 4.5604169696209535e-06, + "loss": 0.6043, + "step": 8558 + }, + { + "epoch": 0.54, + "grad_norm": 0.8723426461219788, + "learning_rate": 4.559394958014375e-06, + "loss": 0.6176, + "step": 8559 + }, + { + "epoch": 0.54, + "grad_norm": 0.9040724635124207, + "learning_rate": 4.558372964960142e-06, + "loss": 0.594, + "step": 8560 + }, + { + "epoch": 0.54, + "grad_norm": 0.8431350588798523, + "learning_rate": 4.557350990501288e-06, + "loss": 0.5618, + "step": 8561 + }, + { + "epoch": 0.54, + "grad_norm": 0.8898385763168335, + "learning_rate": 4.556329034680845e-06, + "loss": 0.5719, + "step": 8562 + }, + { + "epoch": 0.54, + "grad_norm": 0.9010729789733887, + "learning_rate": 4.55530709754184e-06, + "loss": 0.6032, + "step": 8563 + }, + { + "epoch": 0.54, + "grad_norm": 0.9148017764091492, + "learning_rate": 4.5542851791273085e-06, + "loss": 0.6184, + "step": 8564 + }, + { + "epoch": 0.54, + "grad_norm": 0.8323689103126526, + "learning_rate": 4.5532632794802766e-06, + "loss": 0.5297, + "step": 8565 + }, + { + "epoch": 0.54, + "grad_norm": 0.8298326134681702, + "learning_rate": 4.5522413986437745e-06, + "loss": 0.5933, + "step": 8566 + }, + { + "epoch": 0.54, + "grad_norm": 0.9563860893249512, + "learning_rate": 4.55121953666083e-06, + "loss": 0.5663, + "step": 8567 + }, + { + "epoch": 0.54, + "grad_norm": 0.8822575211524963, + "learning_rate": 4.550197693574468e-06, + "loss": 0.5713, + "step": 8568 + }, + { + "epoch": 0.54, + "grad_norm": 0.9293971657752991, + "learning_rate": 4.549175869427717e-06, + "loss": 0.5852, + "step": 8569 + }, + { + "epoch": 0.54, + "grad_norm": 0.846694827079773, + "learning_rate": 4.548154064263603e-06, + "loss": 0.558, + "step": 8570 + }, + { + "epoch": 0.54, + "grad_norm": 1.0178662538528442, + "learning_rate": 4.547132278125149e-06, + "loss": 0.5941, + "step": 8571 + }, + { + "epoch": 0.54, + "grad_norm": 0.9423683881759644, + "learning_rate": 4.546110511055377e-06, + "loss": 0.5525, + "step": 8572 + }, + { + "epoch": 0.54, + "grad_norm": 0.8660984039306641, + "learning_rate": 4.545088763097314e-06, + "loss": 0.5657, + "step": 8573 + }, + { + "epoch": 0.54, + "grad_norm": 0.8998304605484009, + "learning_rate": 4.544067034293982e-06, + "loss": 0.5886, + "step": 8574 + }, + { + "epoch": 0.54, + "grad_norm": 0.8911488056182861, + "learning_rate": 4.543045324688401e-06, + "loss": 0.55, + "step": 8575 + }, + { + "epoch": 0.54, + "grad_norm": 0.8904201984405518, + "learning_rate": 4.542023634323589e-06, + "loss": 0.5812, + "step": 8576 + }, + { + "epoch": 0.54, + "grad_norm": 0.8831244111061096, + "learning_rate": 4.54100196324257e-06, + "loss": 0.5895, + "step": 8577 + }, + { + "epoch": 0.54, + "grad_norm": 0.877057671546936, + "learning_rate": 4.539980311488363e-06, + "loss": 0.5412, + "step": 8578 + }, + { + "epoch": 0.54, + "grad_norm": 0.8660837411880493, + "learning_rate": 4.538958679103984e-06, + "loss": 0.5673, + "step": 8579 + }, + { + "epoch": 0.54, + "grad_norm": 0.8540948033332825, + "learning_rate": 4.5379370661324495e-06, + "loss": 0.5545, + "step": 8580 + }, + { + "epoch": 0.54, + "grad_norm": 0.8788366317749023, + "learning_rate": 4.536915472616779e-06, + "loss": 0.577, + "step": 8581 + }, + { + "epoch": 0.54, + "grad_norm": 0.8697229623794556, + "learning_rate": 4.535893898599988e-06, + "loss": 0.5517, + "step": 8582 + }, + { + "epoch": 0.54, + "grad_norm": 0.9252592921257019, + "learning_rate": 4.53487234412509e-06, + "loss": 0.5577, + "step": 8583 + }, + { + "epoch": 0.54, + "grad_norm": 0.8896933197975159, + "learning_rate": 4.533850809235099e-06, + "loss": 0.6227, + "step": 8584 + }, + { + "epoch": 0.54, + "grad_norm": 0.9034548401832581, + "learning_rate": 4.532829293973028e-06, + "loss": 0.6235, + "step": 8585 + }, + { + "epoch": 0.54, + "grad_norm": 0.9039227962493896, + "learning_rate": 4.531807798381892e-06, + "loss": 0.5804, + "step": 8586 + }, + { + "epoch": 0.54, + "grad_norm": 0.8506115674972534, + "learning_rate": 4.5307863225047e-06, + "loss": 0.5546, + "step": 8587 + }, + { + "epoch": 0.54, + "grad_norm": 0.9655522108078003, + "learning_rate": 4.529764866384464e-06, + "loss": 0.6156, + "step": 8588 + }, + { + "epoch": 0.54, + "grad_norm": 0.8476807475090027, + "learning_rate": 4.528743430064192e-06, + "loss": 0.5422, + "step": 8589 + }, + { + "epoch": 0.54, + "grad_norm": 0.9019517302513123, + "learning_rate": 4.527722013586897e-06, + "loss": 0.5895, + "step": 8590 + }, + { + "epoch": 0.54, + "grad_norm": 0.9369930624961853, + "learning_rate": 4.5267006169955855e-06, + "loss": 0.6462, + "step": 8591 + }, + { + "epoch": 0.54, + "grad_norm": 0.9092143774032593, + "learning_rate": 4.525679240333262e-06, + "loss": 0.5999, + "step": 8592 + }, + { + "epoch": 0.54, + "grad_norm": 0.8883408904075623, + "learning_rate": 4.524657883642936e-06, + "loss": 0.629, + "step": 8593 + }, + { + "epoch": 0.54, + "grad_norm": 0.8866313099861145, + "learning_rate": 4.5236365469676144e-06, + "loss": 0.5621, + "step": 8594 + }, + { + "epoch": 0.54, + "grad_norm": 0.8671311736106873, + "learning_rate": 4.522615230350302e-06, + "loss": 0.633, + "step": 8595 + }, + { + "epoch": 0.54, + "grad_norm": 0.8295920491218567, + "learning_rate": 4.521593933833998e-06, + "loss": 0.5545, + "step": 8596 + }, + { + "epoch": 0.54, + "grad_norm": 0.8872106671333313, + "learning_rate": 4.520572657461712e-06, + "loss": 0.5561, + "step": 8597 + }, + { + "epoch": 0.54, + "grad_norm": 0.9319486021995544, + "learning_rate": 4.519551401276441e-06, + "loss": 0.5819, + "step": 8598 + }, + { + "epoch": 0.54, + "grad_norm": 0.8339968323707581, + "learning_rate": 4.518530165321192e-06, + "loss": 0.5802, + "step": 8599 + }, + { + "epoch": 0.54, + "grad_norm": 0.910203218460083, + "learning_rate": 4.517508949638961e-06, + "loss": 0.5946, + "step": 8600 + }, + { + "epoch": 0.54, + "grad_norm": 0.8642169833183289, + "learning_rate": 4.516487754272751e-06, + "loss": 0.6044, + "step": 8601 + }, + { + "epoch": 0.54, + "grad_norm": 0.8620786070823669, + "learning_rate": 4.515466579265557e-06, + "loss": 0.5943, + "step": 8602 + }, + { + "epoch": 0.55, + "grad_norm": 0.865822970867157, + "learning_rate": 4.5144454246603816e-06, + "loss": 0.5797, + "step": 8603 + }, + { + "epoch": 0.55, + "grad_norm": 0.9090112447738647, + "learning_rate": 4.51342429050022e-06, + "loss": 0.5848, + "step": 8604 + }, + { + "epoch": 0.55, + "grad_norm": 0.8540524244308472, + "learning_rate": 4.51240317682807e-06, + "loss": 0.6534, + "step": 8605 + }, + { + "epoch": 0.55, + "grad_norm": 0.8696991801261902, + "learning_rate": 4.5113820836869234e-06, + "loss": 0.5707, + "step": 8606 + }, + { + "epoch": 0.55, + "grad_norm": 0.9889391660690308, + "learning_rate": 4.51036101111978e-06, + "loss": 0.6631, + "step": 8607 + }, + { + "epoch": 0.55, + "grad_norm": 0.8236830234527588, + "learning_rate": 4.509339959169629e-06, + "loss": 0.55, + "step": 8608 + }, + { + "epoch": 0.55, + "grad_norm": 0.9895977973937988, + "learning_rate": 4.508318927879468e-06, + "loss": 0.6829, + "step": 8609 + }, + { + "epoch": 0.55, + "grad_norm": 0.8496975302696228, + "learning_rate": 4.507297917292284e-06, + "loss": 0.537, + "step": 8610 + }, + { + "epoch": 0.55, + "grad_norm": 0.8107864260673523, + "learning_rate": 4.506276927451072e-06, + "loss": 0.5522, + "step": 8611 + }, + { + "epoch": 0.55, + "grad_norm": 0.8916783928871155, + "learning_rate": 4.505255958398821e-06, + "loss": 0.6091, + "step": 8612 + }, + { + "epoch": 0.55, + "grad_norm": 0.9526239037513733, + "learning_rate": 4.504235010178521e-06, + "loss": 0.5811, + "step": 8613 + }, + { + "epoch": 0.55, + "grad_norm": 0.9013256430625916, + "learning_rate": 4.503214082833161e-06, + "loss": 0.5874, + "step": 8614 + }, + { + "epoch": 0.55, + "grad_norm": 0.8835528492927551, + "learning_rate": 4.502193176405724e-06, + "loss": 0.5748, + "step": 8615 + }, + { + "epoch": 0.55, + "grad_norm": 0.8880060315132141, + "learning_rate": 4.501172290939203e-06, + "loss": 0.559, + "step": 8616 + }, + { + "epoch": 0.55, + "grad_norm": 0.8688036799430847, + "learning_rate": 4.5001514264765826e-06, + "loss": 0.5764, + "step": 8617 + }, + { + "epoch": 0.55, + "grad_norm": 0.8470838069915771, + "learning_rate": 4.499130583060845e-06, + "loss": 0.558, + "step": 8618 + }, + { + "epoch": 0.55, + "grad_norm": 0.8374934196472168, + "learning_rate": 4.4981097607349764e-06, + "loss": 0.5753, + "step": 8619 + }, + { + "epoch": 0.55, + "grad_norm": 0.8633030652999878, + "learning_rate": 4.49708895954196e-06, + "loss": 0.5417, + "step": 8620 + }, + { + "epoch": 0.55, + "grad_norm": 0.9553747177124023, + "learning_rate": 4.496068179524778e-06, + "loss": 0.6072, + "step": 8621 + }, + { + "epoch": 0.55, + "grad_norm": 0.9089109301567078, + "learning_rate": 4.495047420726412e-06, + "loss": 0.6185, + "step": 8622 + }, + { + "epoch": 0.55, + "grad_norm": 0.881161630153656, + "learning_rate": 4.494026683189843e-06, + "loss": 0.6265, + "step": 8623 + }, + { + "epoch": 0.55, + "grad_norm": 0.943405032157898, + "learning_rate": 4.493005966958049e-06, + "loss": 0.5904, + "step": 8624 + }, + { + "epoch": 0.55, + "grad_norm": 0.8863667845726013, + "learning_rate": 4.4919852720740115e-06, + "loss": 0.5585, + "step": 8625 + }, + { + "epoch": 0.55, + "grad_norm": 0.9154953956604004, + "learning_rate": 4.490964598580706e-06, + "loss": 0.6232, + "step": 8626 + }, + { + "epoch": 0.55, + "grad_norm": 0.885014533996582, + "learning_rate": 4.489943946521111e-06, + "loss": 0.5734, + "step": 8627 + }, + { + "epoch": 0.55, + "grad_norm": 0.9125478267669678, + "learning_rate": 4.4889233159382e-06, + "loss": 0.6038, + "step": 8628 + }, + { + "epoch": 0.55, + "grad_norm": 0.9338325262069702, + "learning_rate": 4.487902706874954e-06, + "loss": 0.6203, + "step": 8629 + }, + { + "epoch": 0.55, + "grad_norm": 0.98078852891922, + "learning_rate": 4.486882119374341e-06, + "loss": 0.6177, + "step": 8630 + }, + { + "epoch": 0.55, + "grad_norm": 0.9384462833404541, + "learning_rate": 4.485861553479338e-06, + "loss": 0.6567, + "step": 8631 + }, + { + "epoch": 0.55, + "grad_norm": 0.9040749669075012, + "learning_rate": 4.484841009232914e-06, + "loss": 0.6106, + "step": 8632 + }, + { + "epoch": 0.55, + "grad_norm": 0.8856215476989746, + "learning_rate": 4.483820486678047e-06, + "loss": 0.5848, + "step": 8633 + }, + { + "epoch": 0.55, + "grad_norm": 0.9175398349761963, + "learning_rate": 4.482799985857701e-06, + "loss": 0.6391, + "step": 8634 + }, + { + "epoch": 0.55, + "grad_norm": 0.8846474885940552, + "learning_rate": 4.48177950681485e-06, + "loss": 0.568, + "step": 8635 + }, + { + "epoch": 0.55, + "grad_norm": 0.8936927318572998, + "learning_rate": 4.480759049592458e-06, + "loss": 0.6202, + "step": 8636 + }, + { + "epoch": 0.55, + "grad_norm": 0.8895360231399536, + "learning_rate": 4.4797386142335e-06, + "loss": 0.5881, + "step": 8637 + }, + { + "epoch": 0.55, + "grad_norm": 0.8970397710800171, + "learning_rate": 4.478718200780936e-06, + "loss": 0.622, + "step": 8638 + }, + { + "epoch": 0.55, + "grad_norm": 0.8260961771011353, + "learning_rate": 4.477697809277738e-06, + "loss": 0.5248, + "step": 8639 + }, + { + "epoch": 0.55, + "grad_norm": 0.8553221821784973, + "learning_rate": 4.476677439766865e-06, + "loss": 0.5772, + "step": 8640 + }, + { + "epoch": 0.55, + "grad_norm": 0.9180177450180054, + "learning_rate": 4.475657092291287e-06, + "loss": 0.6119, + "step": 8641 + }, + { + "epoch": 0.55, + "grad_norm": 0.9326549172401428, + "learning_rate": 4.4746367668939646e-06, + "loss": 0.6103, + "step": 8642 + }, + { + "epoch": 0.55, + "grad_norm": 0.9045451879501343, + "learning_rate": 4.4736164636178605e-06, + "loss": 0.5519, + "step": 8643 + }, + { + "epoch": 0.55, + "grad_norm": 0.8994148969650269, + "learning_rate": 4.472596182505936e-06, + "loss": 0.5811, + "step": 8644 + }, + { + "epoch": 0.55, + "grad_norm": 0.9174748659133911, + "learning_rate": 4.47157592360115e-06, + "loss": 0.5954, + "step": 8645 + }, + { + "epoch": 0.55, + "grad_norm": 0.881083607673645, + "learning_rate": 4.470555686946464e-06, + "loss": 0.5622, + "step": 8646 + }, + { + "epoch": 0.55, + "grad_norm": 0.9171141982078552, + "learning_rate": 4.469535472584837e-06, + "loss": 0.5954, + "step": 8647 + }, + { + "epoch": 0.55, + "grad_norm": 0.9388046264648438, + "learning_rate": 4.468515280559227e-06, + "loss": 0.598, + "step": 8648 + }, + { + "epoch": 0.55, + "grad_norm": 0.8907988667488098, + "learning_rate": 4.467495110912587e-06, + "loss": 0.6237, + "step": 8649 + }, + { + "epoch": 0.55, + "grad_norm": 0.8721764087677002, + "learning_rate": 4.466474963687876e-06, + "loss": 0.6328, + "step": 8650 + }, + { + "epoch": 0.55, + "grad_norm": 0.8680887222290039, + "learning_rate": 4.46545483892805e-06, + "loss": 0.6172, + "step": 8651 + }, + { + "epoch": 0.55, + "grad_norm": 1.012911081314087, + "learning_rate": 4.464434736676061e-06, + "loss": 0.5685, + "step": 8652 + }, + { + "epoch": 0.55, + "grad_norm": 0.9347690343856812, + "learning_rate": 4.46341465697486e-06, + "loss": 0.6096, + "step": 8653 + }, + { + "epoch": 0.55, + "grad_norm": 0.7885945439338684, + "learning_rate": 4.462394599867402e-06, + "loss": 0.5374, + "step": 8654 + }, + { + "epoch": 0.55, + "grad_norm": 0.8694291114807129, + "learning_rate": 4.461374565396638e-06, + "loss": 0.561, + "step": 8655 + }, + { + "epoch": 0.55, + "grad_norm": 0.8917961716651917, + "learning_rate": 4.460354553605518e-06, + "loss": 0.6115, + "step": 8656 + }, + { + "epoch": 0.55, + "grad_norm": 0.8918887376785278, + "learning_rate": 4.459334564536988e-06, + "loss": 0.6392, + "step": 8657 + }, + { + "epoch": 0.55, + "grad_norm": 0.8710853457450867, + "learning_rate": 4.458314598234e-06, + "loss": 0.579, + "step": 8658 + }, + { + "epoch": 0.55, + "grad_norm": 0.8684004545211792, + "learning_rate": 4.4572946547395e-06, + "loss": 0.5717, + "step": 8659 + }, + { + "epoch": 0.55, + "grad_norm": 0.8856059908866882, + "learning_rate": 4.456274734096436e-06, + "loss": 0.5882, + "step": 8660 + }, + { + "epoch": 0.55, + "grad_norm": 0.9372711181640625, + "learning_rate": 4.455254836347749e-06, + "loss": 0.6284, + "step": 8661 + }, + { + "epoch": 0.55, + "grad_norm": 0.8738864064216614, + "learning_rate": 4.454234961536384e-06, + "loss": 0.5937, + "step": 8662 + }, + { + "epoch": 0.55, + "grad_norm": 0.8427755832672119, + "learning_rate": 4.45321510970529e-06, + "loss": 0.5822, + "step": 8663 + }, + { + "epoch": 0.55, + "grad_norm": 0.8868844509124756, + "learning_rate": 4.452195280897405e-06, + "loss": 0.5819, + "step": 8664 + }, + { + "epoch": 0.55, + "grad_norm": 0.8627145290374756, + "learning_rate": 4.451175475155669e-06, + "loss": 0.5566, + "step": 8665 + }, + { + "epoch": 0.55, + "grad_norm": 0.9276217222213745, + "learning_rate": 4.450155692523025e-06, + "loss": 0.5756, + "step": 8666 + }, + { + "epoch": 0.55, + "grad_norm": 0.8811351656913757, + "learning_rate": 4.449135933042414e-06, + "loss": 0.5945, + "step": 8667 + }, + { + "epoch": 0.55, + "grad_norm": 0.9855297803878784, + "learning_rate": 4.448116196756771e-06, + "loss": 0.6194, + "step": 8668 + }, + { + "epoch": 0.55, + "grad_norm": 0.813127338886261, + "learning_rate": 4.447096483709035e-06, + "loss": 0.5267, + "step": 8669 + }, + { + "epoch": 0.55, + "grad_norm": 0.8596179485321045, + "learning_rate": 4.4460767939421425e-06, + "loss": 0.5878, + "step": 8670 + }, + { + "epoch": 0.55, + "grad_norm": 0.9059436917304993, + "learning_rate": 4.44505712749903e-06, + "loss": 0.5663, + "step": 8671 + }, + { + "epoch": 0.55, + "grad_norm": 0.9185728430747986, + "learning_rate": 4.444037484422632e-06, + "loss": 0.6195, + "step": 8672 + }, + { + "epoch": 0.55, + "grad_norm": 0.8514521718025208, + "learning_rate": 4.44301786475588e-06, + "loss": 0.5572, + "step": 8673 + }, + { + "epoch": 0.55, + "grad_norm": 0.8482089042663574, + "learning_rate": 4.441998268541708e-06, + "loss": 0.5652, + "step": 8674 + }, + { + "epoch": 0.55, + "grad_norm": 0.8797301054000854, + "learning_rate": 4.440978695823049e-06, + "loss": 0.5766, + "step": 8675 + }, + { + "epoch": 0.55, + "grad_norm": 0.8916350603103638, + "learning_rate": 4.439959146642833e-06, + "loss": 0.5874, + "step": 8676 + }, + { + "epoch": 0.55, + "grad_norm": 0.9491859078407288, + "learning_rate": 4.4389396210439886e-06, + "loss": 0.5796, + "step": 8677 + }, + { + "epoch": 0.55, + "grad_norm": 0.8709646463394165, + "learning_rate": 4.437920119069445e-06, + "loss": 0.6025, + "step": 8678 + }, + { + "epoch": 0.55, + "grad_norm": 0.9544169306755066, + "learning_rate": 4.436900640762128e-06, + "loss": 0.6039, + "step": 8679 + }, + { + "epoch": 0.55, + "grad_norm": 0.9094875454902649, + "learning_rate": 4.435881186164968e-06, + "loss": 0.595, + "step": 8680 + }, + { + "epoch": 0.55, + "grad_norm": 0.9330776333808899, + "learning_rate": 4.434861755320888e-06, + "loss": 0.5634, + "step": 8681 + }, + { + "epoch": 0.55, + "grad_norm": 0.8398060202598572, + "learning_rate": 4.433842348272815e-06, + "loss": 0.573, + "step": 8682 + }, + { + "epoch": 0.55, + "grad_norm": 0.8635173439979553, + "learning_rate": 4.4328229650636676e-06, + "loss": 0.6207, + "step": 8683 + }, + { + "epoch": 0.55, + "grad_norm": 0.8932275176048279, + "learning_rate": 4.431803605736376e-06, + "loss": 0.5626, + "step": 8684 + }, + { + "epoch": 0.55, + "grad_norm": 0.8580974340438843, + "learning_rate": 4.430784270333855e-06, + "loss": 0.6231, + "step": 8685 + }, + { + "epoch": 0.55, + "grad_norm": 0.8090934753417969, + "learning_rate": 4.429764958899031e-06, + "loss": 0.524, + "step": 8686 + }, + { + "epoch": 0.55, + "grad_norm": 0.8802515864372253, + "learning_rate": 4.428745671474818e-06, + "loss": 0.5884, + "step": 8687 + }, + { + "epoch": 0.55, + "grad_norm": 0.9674070477485657, + "learning_rate": 4.427726408104139e-06, + "loss": 0.6131, + "step": 8688 + }, + { + "epoch": 0.55, + "grad_norm": 0.8582731485366821, + "learning_rate": 4.42670716882991e-06, + "loss": 0.6055, + "step": 8689 + }, + { + "epoch": 0.55, + "grad_norm": 0.87099689245224, + "learning_rate": 4.4256879536950495e-06, + "loss": 0.5777, + "step": 8690 + }, + { + "epoch": 0.55, + "grad_norm": 0.8034923076629639, + "learning_rate": 4.4246687627424686e-06, + "loss": 0.5599, + "step": 8691 + }, + { + "epoch": 0.55, + "grad_norm": 0.9542539119720459, + "learning_rate": 4.423649596015086e-06, + "loss": 0.6035, + "step": 8692 + }, + { + "epoch": 0.55, + "grad_norm": 0.8825756907463074, + "learning_rate": 4.422630453555814e-06, + "loss": 0.5617, + "step": 8693 + }, + { + "epoch": 0.55, + "grad_norm": 0.9769675135612488, + "learning_rate": 4.4216113354075654e-06, + "loss": 0.5966, + "step": 8694 + }, + { + "epoch": 0.55, + "grad_norm": 0.8933631777763367, + "learning_rate": 4.420592241613251e-06, + "loss": 0.6053, + "step": 8695 + }, + { + "epoch": 0.55, + "grad_norm": 0.9478098154067993, + "learning_rate": 4.4195731722157805e-06, + "loss": 0.5765, + "step": 8696 + }, + { + "epoch": 0.55, + "grad_norm": 0.8842805027961731, + "learning_rate": 4.418554127258066e-06, + "loss": 0.6167, + "step": 8697 + }, + { + "epoch": 0.55, + "grad_norm": 0.9180606007575989, + "learning_rate": 4.417535106783015e-06, + "loss": 0.6466, + "step": 8698 + }, + { + "epoch": 0.55, + "grad_norm": 0.8405973315238953, + "learning_rate": 4.416516110833533e-06, + "loss": 0.5296, + "step": 8699 + }, + { + "epoch": 0.55, + "grad_norm": 0.8814043998718262, + "learning_rate": 4.415497139452528e-06, + "loss": 0.6272, + "step": 8700 + }, + { + "epoch": 0.55, + "grad_norm": 0.9587448835372925, + "learning_rate": 4.414478192682905e-06, + "loss": 0.6149, + "step": 8701 + }, + { + "epoch": 0.55, + "grad_norm": 0.8646758198738098, + "learning_rate": 4.41345927056757e-06, + "loss": 0.53, + "step": 8702 + }, + { + "epoch": 0.55, + "grad_norm": 0.917334794998169, + "learning_rate": 4.4124403731494235e-06, + "loss": 0.6441, + "step": 8703 + }, + { + "epoch": 0.55, + "grad_norm": 0.8852252960205078, + "learning_rate": 4.4114215004713665e-06, + "loss": 0.5874, + "step": 8704 + }, + { + "epoch": 0.55, + "grad_norm": 0.8766316771507263, + "learning_rate": 4.410402652576307e-06, + "loss": 0.5342, + "step": 8705 + }, + { + "epoch": 0.55, + "grad_norm": 0.904898464679718, + "learning_rate": 4.409383829507139e-06, + "loss": 0.5368, + "step": 8706 + }, + { + "epoch": 0.55, + "grad_norm": 0.877981960773468, + "learning_rate": 4.408365031306763e-06, + "loss": 0.5601, + "step": 8707 + }, + { + "epoch": 0.55, + "grad_norm": 0.9853324890136719, + "learning_rate": 4.407346258018078e-06, + "loss": 0.6007, + "step": 8708 + }, + { + "epoch": 0.55, + "grad_norm": 0.8749459981918335, + "learning_rate": 4.4063275096839785e-06, + "loss": 0.5521, + "step": 8709 + }, + { + "epoch": 0.55, + "grad_norm": 0.9380457997322083, + "learning_rate": 4.405308786347365e-06, + "loss": 0.5872, + "step": 8710 + }, + { + "epoch": 0.55, + "grad_norm": 0.9295644164085388, + "learning_rate": 4.404290088051128e-06, + "loss": 0.5885, + "step": 8711 + }, + { + "epoch": 0.55, + "grad_norm": 0.8839128613471985, + "learning_rate": 4.403271414838164e-06, + "loss": 0.5699, + "step": 8712 + }, + { + "epoch": 0.55, + "grad_norm": 0.9140700101852417, + "learning_rate": 4.402252766751363e-06, + "loss": 0.5752, + "step": 8713 + }, + { + "epoch": 0.55, + "grad_norm": 0.8850396871566772, + "learning_rate": 4.401234143833621e-06, + "loss": 0.5811, + "step": 8714 + }, + { + "epoch": 0.55, + "grad_norm": 0.8859695196151733, + "learning_rate": 4.400215546127825e-06, + "loss": 0.5739, + "step": 8715 + }, + { + "epoch": 0.55, + "grad_norm": 0.8375210762023926, + "learning_rate": 4.399196973676867e-06, + "loss": 0.5541, + "step": 8716 + }, + { + "epoch": 0.55, + "grad_norm": 0.9590082168579102, + "learning_rate": 4.398178426523632e-06, + "loss": 0.6474, + "step": 8717 + }, + { + "epoch": 0.55, + "grad_norm": 0.9406694769859314, + "learning_rate": 4.3971599047110116e-06, + "loss": 0.5898, + "step": 8718 + }, + { + "epoch": 0.55, + "grad_norm": 0.8761373162269592, + "learning_rate": 4.3961414082818904e-06, + "loss": 0.5739, + "step": 8719 + }, + { + "epoch": 0.55, + "grad_norm": 0.9185157418251038, + "learning_rate": 4.395122937279154e-06, + "loss": 0.6258, + "step": 8720 + }, + { + "epoch": 0.55, + "grad_norm": 0.8924233317375183, + "learning_rate": 4.394104491745686e-06, + "loss": 0.5819, + "step": 8721 + }, + { + "epoch": 0.55, + "grad_norm": 0.7826727032661438, + "learning_rate": 4.393086071724371e-06, + "loss": 0.5175, + "step": 8722 + }, + { + "epoch": 0.55, + "grad_norm": 0.9112175107002258, + "learning_rate": 4.392067677258089e-06, + "loss": 0.644, + "step": 8723 + }, + { + "epoch": 0.55, + "grad_norm": 0.8790960907936096, + "learning_rate": 4.391049308389723e-06, + "loss": 0.5537, + "step": 8724 + }, + { + "epoch": 0.55, + "grad_norm": 0.8678017854690552, + "learning_rate": 4.390030965162153e-06, + "loss": 0.5639, + "step": 8725 + }, + { + "epoch": 0.55, + "grad_norm": 0.8150790929794312, + "learning_rate": 4.389012647618255e-06, + "loss": 0.5596, + "step": 8726 + }, + { + "epoch": 0.55, + "grad_norm": 0.9205260872840881, + "learning_rate": 4.387994355800909e-06, + "loss": 0.6169, + "step": 8727 + }, + { + "epoch": 0.55, + "grad_norm": 0.8948895931243896, + "learning_rate": 4.386976089752994e-06, + "loss": 0.5627, + "step": 8728 + }, + { + "epoch": 0.55, + "grad_norm": 0.9792115688323975, + "learning_rate": 4.385957849517383e-06, + "loss": 0.6722, + "step": 8729 + }, + { + "epoch": 0.55, + "grad_norm": 0.8200992941856384, + "learning_rate": 4.384939635136948e-06, + "loss": 0.5813, + "step": 8730 + }, + { + "epoch": 0.55, + "grad_norm": 0.8706687092781067, + "learning_rate": 4.383921446654567e-06, + "loss": 0.5613, + "step": 8731 + }, + { + "epoch": 0.55, + "grad_norm": 0.9445881247520447, + "learning_rate": 4.3829032841131116e-06, + "loss": 0.5922, + "step": 8732 + }, + { + "epoch": 0.55, + "grad_norm": 0.9358285665512085, + "learning_rate": 4.381885147555453e-06, + "loss": 0.6582, + "step": 8733 + }, + { + "epoch": 0.55, + "grad_norm": 0.9091733694076538, + "learning_rate": 4.380867037024457e-06, + "loss": 0.5389, + "step": 8734 + }, + { + "epoch": 0.55, + "grad_norm": 0.8815348744392395, + "learning_rate": 4.379848952562999e-06, + "loss": 0.6216, + "step": 8735 + }, + { + "epoch": 0.55, + "grad_norm": 0.859954833984375, + "learning_rate": 4.3788308942139435e-06, + "loss": 0.568, + "step": 8736 + }, + { + "epoch": 0.55, + "grad_norm": 0.838782548904419, + "learning_rate": 4.3778128620201595e-06, + "loss": 0.546, + "step": 8737 + }, + { + "epoch": 0.55, + "grad_norm": 0.8781520128250122, + "learning_rate": 4.376794856024509e-06, + "loss": 0.6458, + "step": 8738 + }, + { + "epoch": 0.55, + "grad_norm": 0.908060610294342, + "learning_rate": 4.37577687626986e-06, + "loss": 0.5873, + "step": 8739 + }, + { + "epoch": 0.55, + "grad_norm": 0.8939605355262756, + "learning_rate": 4.374758922799076e-06, + "loss": 0.5933, + "step": 8740 + }, + { + "epoch": 0.55, + "grad_norm": 0.9307653307914734, + "learning_rate": 4.373740995655019e-06, + "loss": 0.6065, + "step": 8741 + }, + { + "epoch": 0.55, + "grad_norm": 0.9014899134635925, + "learning_rate": 4.372723094880549e-06, + "loss": 0.667, + "step": 8742 + }, + { + "epoch": 0.55, + "grad_norm": 0.9142792820930481, + "learning_rate": 4.371705220518526e-06, + "loss": 0.5868, + "step": 8743 + }, + { + "epoch": 0.55, + "grad_norm": 0.8495569229125977, + "learning_rate": 4.3706873726118135e-06, + "loss": 0.5737, + "step": 8744 + }, + { + "epoch": 0.55, + "grad_norm": 0.8663256168365479, + "learning_rate": 4.369669551203266e-06, + "loss": 0.5857, + "step": 8745 + }, + { + "epoch": 0.55, + "grad_norm": 0.8795154690742493, + "learning_rate": 4.368651756335739e-06, + "loss": 0.601, + "step": 8746 + }, + { + "epoch": 0.55, + "grad_norm": 0.9166411757469177, + "learning_rate": 4.36763398805209e-06, + "loss": 0.5774, + "step": 8747 + }, + { + "epoch": 0.55, + "grad_norm": 0.9235051870346069, + "learning_rate": 4.366616246395177e-06, + "loss": 0.5795, + "step": 8748 + }, + { + "epoch": 0.55, + "grad_norm": 0.9201914072036743, + "learning_rate": 4.365598531407849e-06, + "loss": 0.6128, + "step": 8749 + }, + { + "epoch": 0.55, + "grad_norm": 0.9426406025886536, + "learning_rate": 4.364580843132959e-06, + "loss": 0.5561, + "step": 8750 + }, + { + "epoch": 0.55, + "grad_norm": 0.8633618354797363, + "learning_rate": 4.363563181613359e-06, + "loss": 0.5643, + "step": 8751 + }, + { + "epoch": 0.55, + "grad_norm": 0.8648019433021545, + "learning_rate": 4.362545546891901e-06, + "loss": 0.5829, + "step": 8752 + }, + { + "epoch": 0.55, + "grad_norm": 0.9268175363540649, + "learning_rate": 4.361527939011433e-06, + "loss": 0.5497, + "step": 8753 + }, + { + "epoch": 0.55, + "grad_norm": 0.9331282377243042, + "learning_rate": 4.360510358014801e-06, + "loss": 0.6365, + "step": 8754 + }, + { + "epoch": 0.55, + "grad_norm": 0.9760857820510864, + "learning_rate": 4.359492803944854e-06, + "loss": 0.617, + "step": 8755 + }, + { + "epoch": 0.55, + "grad_norm": 0.8864888548851013, + "learning_rate": 4.358475276844435e-06, + "loss": 0.5794, + "step": 8756 + }, + { + "epoch": 0.55, + "grad_norm": 0.8550977110862732, + "learning_rate": 4.357457776756392e-06, + "loss": 0.6164, + "step": 8757 + }, + { + "epoch": 0.55, + "grad_norm": 0.8886324167251587, + "learning_rate": 4.3564403037235666e-06, + "loss": 0.5582, + "step": 8758 + }, + { + "epoch": 0.55, + "grad_norm": 0.8365561962127686, + "learning_rate": 4.355422857788802e-06, + "loss": 0.5514, + "step": 8759 + }, + { + "epoch": 0.55, + "grad_norm": 0.8547555208206177, + "learning_rate": 4.3544054389949366e-06, + "loss": 0.579, + "step": 8760 + }, + { + "epoch": 0.56, + "grad_norm": 0.9086821675300598, + "learning_rate": 4.353388047384813e-06, + "loss": 0.5918, + "step": 8761 + }, + { + "epoch": 0.56, + "grad_norm": 0.8336657881736755, + "learning_rate": 4.35237068300127e-06, + "loss": 0.5914, + "step": 8762 + }, + { + "epoch": 0.56, + "grad_norm": 1.0008983612060547, + "learning_rate": 4.351353345887145e-06, + "loss": 0.6075, + "step": 8763 + }, + { + "epoch": 0.56, + "grad_norm": 0.9368928074836731, + "learning_rate": 4.350336036085272e-06, + "loss": 0.6175, + "step": 8764 + }, + { + "epoch": 0.56, + "grad_norm": 0.9230781197547913, + "learning_rate": 4.349318753638491e-06, + "loss": 0.6663, + "step": 8765 + }, + { + "epoch": 0.56, + "grad_norm": 0.8669142127037048, + "learning_rate": 4.348301498589632e-06, + "loss": 0.5702, + "step": 8766 + }, + { + "epoch": 0.56, + "grad_norm": 0.908332347869873, + "learning_rate": 4.347284270981531e-06, + "loss": 0.6157, + "step": 8767 + }, + { + "epoch": 0.56, + "grad_norm": 0.8867782950401306, + "learning_rate": 4.346267070857017e-06, + "loss": 0.5932, + "step": 8768 + }, + { + "epoch": 0.56, + "grad_norm": 0.8559575080871582, + "learning_rate": 4.3452498982589234e-06, + "loss": 0.5792, + "step": 8769 + }, + { + "epoch": 0.56, + "grad_norm": 0.8476456999778748, + "learning_rate": 4.34423275323008e-06, + "loss": 0.5813, + "step": 8770 + }, + { + "epoch": 0.56, + "grad_norm": 0.8500044941902161, + "learning_rate": 4.343215635813314e-06, + "loss": 0.5623, + "step": 8771 + }, + { + "epoch": 0.56, + "grad_norm": 0.8590050935745239, + "learning_rate": 4.3421985460514515e-06, + "loss": 0.5822, + "step": 8772 + }, + { + "epoch": 0.56, + "grad_norm": 0.882090151309967, + "learning_rate": 4.341181483987319e-06, + "loss": 0.5638, + "step": 8773 + }, + { + "epoch": 0.56, + "grad_norm": 0.8285457491874695, + "learning_rate": 4.340164449663745e-06, + "loss": 0.5572, + "step": 8774 + }, + { + "epoch": 0.56, + "grad_norm": 0.8915181159973145, + "learning_rate": 4.33914744312355e-06, + "loss": 0.5627, + "step": 8775 + }, + { + "epoch": 0.56, + "grad_norm": 0.9251353740692139, + "learning_rate": 4.338130464409556e-06, + "loss": 0.5431, + "step": 8776 + }, + { + "epoch": 0.56, + "grad_norm": 0.9160726070404053, + "learning_rate": 4.3371135135645845e-06, + "loss": 0.6369, + "step": 8777 + }, + { + "epoch": 0.56, + "grad_norm": 0.9499028325080872, + "learning_rate": 4.33609659063146e-06, + "loss": 0.5955, + "step": 8778 + }, + { + "epoch": 0.56, + "grad_norm": 0.8448708653450012, + "learning_rate": 4.335079695652998e-06, + "loss": 0.6101, + "step": 8779 + }, + { + "epoch": 0.56, + "grad_norm": 1.0195928812026978, + "learning_rate": 4.334062828672016e-06, + "loss": 0.6204, + "step": 8780 + }, + { + "epoch": 0.56, + "grad_norm": 0.9017850756645203, + "learning_rate": 4.3330459897313305e-06, + "loss": 0.5725, + "step": 8781 + }, + { + "epoch": 0.56, + "grad_norm": 0.8847092390060425, + "learning_rate": 4.33202917887376e-06, + "loss": 0.5784, + "step": 8782 + }, + { + "epoch": 0.56, + "grad_norm": 0.864553689956665, + "learning_rate": 4.331012396142117e-06, + "loss": 0.5691, + "step": 8783 + }, + { + "epoch": 0.56, + "grad_norm": 0.8894702792167664, + "learning_rate": 4.3299956415792145e-06, + "loss": 0.6365, + "step": 8784 + }, + { + "epoch": 0.56, + "grad_norm": 0.8423247337341309, + "learning_rate": 4.328978915227866e-06, + "loss": 0.5971, + "step": 8785 + }, + { + "epoch": 0.56, + "grad_norm": 0.9544634819030762, + "learning_rate": 4.327962217130878e-06, + "loss": 0.6287, + "step": 8786 + }, + { + "epoch": 0.56, + "grad_norm": 0.9328646659851074, + "learning_rate": 4.326945547331065e-06, + "loss": 0.5529, + "step": 8787 + }, + { + "epoch": 0.56, + "grad_norm": 0.921759843826294, + "learning_rate": 4.325928905871233e-06, + "loss": 0.6143, + "step": 8788 + }, + { + "epoch": 0.56, + "grad_norm": 0.8561935424804688, + "learning_rate": 4.324912292794192e-06, + "loss": 0.6107, + "step": 8789 + }, + { + "epoch": 0.56, + "grad_norm": 0.8923735618591309, + "learning_rate": 4.323895708142742e-06, + "loss": 0.5331, + "step": 8790 + }, + { + "epoch": 0.56, + "grad_norm": 0.8794368505477905, + "learning_rate": 4.322879151959695e-06, + "loss": 0.5809, + "step": 8791 + }, + { + "epoch": 0.56, + "grad_norm": 0.8946419358253479, + "learning_rate": 4.321862624287851e-06, + "loss": 0.5801, + "step": 8792 + }, + { + "epoch": 0.56, + "grad_norm": 0.9291636943817139, + "learning_rate": 4.320846125170012e-06, + "loss": 0.6148, + "step": 8793 + }, + { + "epoch": 0.56, + "grad_norm": 0.8626858592033386, + "learning_rate": 4.31982965464898e-06, + "loss": 0.5753, + "step": 8794 + }, + { + "epoch": 0.56, + "grad_norm": 0.9002351760864258, + "learning_rate": 4.318813212767555e-06, + "loss": 0.5691, + "step": 8795 + }, + { + "epoch": 0.56, + "grad_norm": 0.8788061141967773, + "learning_rate": 4.317796799568536e-06, + "loss": 0.6002, + "step": 8796 + }, + { + "epoch": 0.56, + "grad_norm": 0.8354102373123169, + "learning_rate": 4.316780415094722e-06, + "loss": 0.5693, + "step": 8797 + }, + { + "epoch": 0.56, + "grad_norm": 0.89030921459198, + "learning_rate": 4.315764059388905e-06, + "loss": 0.5916, + "step": 8798 + }, + { + "epoch": 0.56, + "grad_norm": 0.856412947177887, + "learning_rate": 4.314747732493886e-06, + "loss": 0.563, + "step": 8799 + }, + { + "epoch": 0.56, + "grad_norm": 0.832213282585144, + "learning_rate": 4.313731434452455e-06, + "loss": 0.5612, + "step": 8800 + }, + { + "epoch": 0.56, + "grad_norm": 0.8731396794319153, + "learning_rate": 4.312715165307407e-06, + "loss": 0.6631, + "step": 8801 + }, + { + "epoch": 0.56, + "grad_norm": 0.9026145935058594, + "learning_rate": 4.311698925101532e-06, + "loss": 0.5776, + "step": 8802 + }, + { + "epoch": 0.56, + "grad_norm": 0.8666503429412842, + "learning_rate": 4.310682713877619e-06, + "loss": 0.5579, + "step": 8803 + }, + { + "epoch": 0.56, + "grad_norm": 0.9560415744781494, + "learning_rate": 4.30966653167846e-06, + "loss": 0.6415, + "step": 8804 + }, + { + "epoch": 0.56, + "grad_norm": 0.8633235096931458, + "learning_rate": 4.308650378546843e-06, + "loss": 0.5844, + "step": 8805 + }, + { + "epoch": 0.56, + "grad_norm": 0.8731099367141724, + "learning_rate": 4.3076342545255535e-06, + "loss": 0.5678, + "step": 8806 + }, + { + "epoch": 0.56, + "grad_norm": 0.8647497296333313, + "learning_rate": 4.306618159657375e-06, + "loss": 0.5964, + "step": 8807 + }, + { + "epoch": 0.56, + "grad_norm": 0.867325484752655, + "learning_rate": 4.305602093985095e-06, + "loss": 0.597, + "step": 8808 + }, + { + "epoch": 0.56, + "grad_norm": 0.8929917812347412, + "learning_rate": 4.3045860575514955e-06, + "loss": 0.5933, + "step": 8809 + }, + { + "epoch": 0.56, + "grad_norm": 0.8812966346740723, + "learning_rate": 4.303570050399358e-06, + "loss": 0.6035, + "step": 8810 + }, + { + "epoch": 0.56, + "grad_norm": 0.8197950124740601, + "learning_rate": 4.302554072571461e-06, + "loss": 0.5531, + "step": 8811 + }, + { + "epoch": 0.56, + "grad_norm": 0.8902185559272766, + "learning_rate": 4.301538124110588e-06, + "loss": 0.6078, + "step": 8812 + }, + { + "epoch": 0.56, + "grad_norm": 0.8458168506622314, + "learning_rate": 4.300522205059515e-06, + "loss": 0.5865, + "step": 8813 + }, + { + "epoch": 0.56, + "grad_norm": 0.9090011119842529, + "learning_rate": 4.299506315461018e-06, + "loss": 0.5848, + "step": 8814 + }, + { + "epoch": 0.56, + "grad_norm": 0.8977993726730347, + "learning_rate": 4.2984904553578725e-06, + "loss": 0.607, + "step": 8815 + }, + { + "epoch": 0.56, + "grad_norm": 0.888264000415802, + "learning_rate": 4.297474624792853e-06, + "loss": 0.5694, + "step": 8816 + }, + { + "epoch": 0.56, + "grad_norm": 0.8837360739707947, + "learning_rate": 4.296458823808735e-06, + "loss": 0.5943, + "step": 8817 + }, + { + "epoch": 0.56, + "grad_norm": 0.9838821887969971, + "learning_rate": 4.295443052448288e-06, + "loss": 0.6391, + "step": 8818 + }, + { + "epoch": 0.56, + "grad_norm": 0.8406442999839783, + "learning_rate": 4.294427310754283e-06, + "loss": 0.5576, + "step": 8819 + }, + { + "epoch": 0.56, + "grad_norm": 0.9092972278594971, + "learning_rate": 4.293411598769487e-06, + "loss": 0.6397, + "step": 8820 + }, + { + "epoch": 0.56, + "grad_norm": 0.8684690594673157, + "learning_rate": 4.292395916536674e-06, + "loss": 0.5996, + "step": 8821 + }, + { + "epoch": 0.56, + "grad_norm": 0.938960075378418, + "learning_rate": 4.291380264098607e-06, + "loss": 0.6217, + "step": 8822 + }, + { + "epoch": 0.56, + "grad_norm": 0.9801902174949646, + "learning_rate": 4.290364641498051e-06, + "loss": 0.5536, + "step": 8823 + }, + { + "epoch": 0.56, + "grad_norm": 0.8995389938354492, + "learning_rate": 4.28934904877777e-06, + "loss": 0.5696, + "step": 8824 + }, + { + "epoch": 0.56, + "grad_norm": 0.9216705560684204, + "learning_rate": 4.288333485980531e-06, + "loss": 0.5857, + "step": 8825 + }, + { + "epoch": 0.56, + "grad_norm": 0.8806384205818176, + "learning_rate": 4.287317953149092e-06, + "loss": 0.592, + "step": 8826 + }, + { + "epoch": 0.56, + "grad_norm": 0.8204985857009888, + "learning_rate": 4.2863024503262146e-06, + "loss": 0.4938, + "step": 8827 + }, + { + "epoch": 0.56, + "grad_norm": 0.9380052089691162, + "learning_rate": 4.285286977554657e-06, + "loss": 0.6195, + "step": 8828 + }, + { + "epoch": 0.56, + "grad_norm": 0.9966148734092712, + "learning_rate": 4.284271534877181e-06, + "loss": 0.6133, + "step": 8829 + }, + { + "epoch": 0.56, + "grad_norm": 0.9887740015983582, + "learning_rate": 4.283256122336539e-06, + "loss": 0.6519, + "step": 8830 + }, + { + "epoch": 0.56, + "grad_norm": 0.8869448900222778, + "learning_rate": 4.28224073997549e-06, + "loss": 0.6057, + "step": 8831 + }, + { + "epoch": 0.56, + "grad_norm": 0.8631427884101868, + "learning_rate": 4.281225387836786e-06, + "loss": 0.552, + "step": 8832 + }, + { + "epoch": 0.56, + "grad_norm": 0.8739815354347229, + "learning_rate": 4.280210065963179e-06, + "loss": 0.5631, + "step": 8833 + }, + { + "epoch": 0.56, + "grad_norm": 0.8833276629447937, + "learning_rate": 4.279194774397422e-06, + "loss": 0.5637, + "step": 8834 + }, + { + "epoch": 0.56, + "grad_norm": 0.8888053894042969, + "learning_rate": 4.278179513182268e-06, + "loss": 0.5978, + "step": 8835 + }, + { + "epoch": 0.56, + "grad_norm": 0.9251044392585754, + "learning_rate": 4.2771642823604635e-06, + "loss": 0.5844, + "step": 8836 + }, + { + "epoch": 0.56, + "grad_norm": 0.9090611934661865, + "learning_rate": 4.276149081974754e-06, + "loss": 0.5963, + "step": 8837 + }, + { + "epoch": 0.56, + "grad_norm": 0.8514662384986877, + "learning_rate": 4.275133912067889e-06, + "loss": 0.5199, + "step": 8838 + }, + { + "epoch": 0.56, + "grad_norm": 0.9209812879562378, + "learning_rate": 4.274118772682615e-06, + "loss": 0.5519, + "step": 8839 + }, + { + "epoch": 0.56, + "grad_norm": 0.9110792279243469, + "learning_rate": 4.273103663861675e-06, + "loss": 0.582, + "step": 8840 + }, + { + "epoch": 0.56, + "grad_norm": 0.8700342774391174, + "learning_rate": 4.272088585647808e-06, + "loss": 0.5325, + "step": 8841 + }, + { + "epoch": 0.56, + "grad_norm": 0.8676977157592773, + "learning_rate": 4.27107353808376e-06, + "loss": 0.5971, + "step": 8842 + }, + { + "epoch": 0.56, + "grad_norm": 0.8846719861030579, + "learning_rate": 4.2700585212122705e-06, + "loss": 0.5739, + "step": 8843 + }, + { + "epoch": 0.56, + "grad_norm": 0.9149625301361084, + "learning_rate": 4.269043535076077e-06, + "loss": 0.5704, + "step": 8844 + }, + { + "epoch": 0.56, + "grad_norm": 0.8597497344017029, + "learning_rate": 4.2680285797179155e-06, + "loss": 0.5798, + "step": 8845 + }, + { + "epoch": 0.56, + "grad_norm": 0.8914947509765625, + "learning_rate": 4.267013655180526e-06, + "loss": 0.6291, + "step": 8846 + }, + { + "epoch": 0.56, + "grad_norm": 0.8733804821968079, + "learning_rate": 4.265998761506641e-06, + "loss": 0.62, + "step": 8847 + }, + { + "epoch": 0.56, + "grad_norm": 0.8868311047554016, + "learning_rate": 4.264983898738996e-06, + "loss": 0.5277, + "step": 8848 + }, + { + "epoch": 0.56, + "grad_norm": 0.9130145907402039, + "learning_rate": 4.263969066920321e-06, + "loss": 0.5653, + "step": 8849 + }, + { + "epoch": 0.56, + "grad_norm": 0.9107689261436462, + "learning_rate": 4.262954266093347e-06, + "loss": 0.603, + "step": 8850 + }, + { + "epoch": 0.56, + "grad_norm": 0.8246250748634338, + "learning_rate": 4.261939496300807e-06, + "loss": 0.545, + "step": 8851 + }, + { + "epoch": 0.56, + "grad_norm": 0.8571567535400391, + "learning_rate": 4.260924757585427e-06, + "loss": 0.5724, + "step": 8852 + }, + { + "epoch": 0.56, + "grad_norm": 0.8799802660942078, + "learning_rate": 4.259910049989933e-06, + "loss": 0.5336, + "step": 8853 + }, + { + "epoch": 0.56, + "grad_norm": 0.8496525287628174, + "learning_rate": 4.258895373557051e-06, + "loss": 0.5659, + "step": 8854 + }, + { + "epoch": 0.56, + "grad_norm": 0.8902441263198853, + "learning_rate": 4.25788072832951e-06, + "loss": 0.6043, + "step": 8855 + }, + { + "epoch": 0.56, + "grad_norm": 0.8644466400146484, + "learning_rate": 4.256866114350029e-06, + "loss": 0.5516, + "step": 8856 + }, + { + "epoch": 0.56, + "grad_norm": 0.9049948453903198, + "learning_rate": 4.25585153166133e-06, + "loss": 0.669, + "step": 8857 + }, + { + "epoch": 0.56, + "grad_norm": 0.9145426750183105, + "learning_rate": 4.254836980306134e-06, + "loss": 0.5955, + "step": 8858 + }, + { + "epoch": 0.56, + "grad_norm": 0.8813319206237793, + "learning_rate": 4.253822460327162e-06, + "loss": 0.5738, + "step": 8859 + }, + { + "epoch": 0.56, + "grad_norm": 0.994666576385498, + "learning_rate": 4.25280797176713e-06, + "loss": 0.5967, + "step": 8860 + }, + { + "epoch": 0.56, + "grad_norm": 0.9201557636260986, + "learning_rate": 4.251793514668754e-06, + "loss": 0.5901, + "step": 8861 + }, + { + "epoch": 0.56, + "grad_norm": 0.8030241131782532, + "learning_rate": 4.250779089074752e-06, + "loss": 0.5547, + "step": 8862 + }, + { + "epoch": 0.56, + "grad_norm": 0.8798213601112366, + "learning_rate": 4.249764695027833e-06, + "loss": 0.6044, + "step": 8863 + }, + { + "epoch": 0.56, + "grad_norm": 0.8339963555335999, + "learning_rate": 4.248750332570716e-06, + "loss": 0.6645, + "step": 8864 + }, + { + "epoch": 0.56, + "grad_norm": 0.9063261151313782, + "learning_rate": 4.247736001746108e-06, + "loss": 0.5697, + "step": 8865 + }, + { + "epoch": 0.56, + "grad_norm": 0.9362940192222595, + "learning_rate": 4.246721702596721e-06, + "loss": 0.5943, + "step": 8866 + }, + { + "epoch": 0.56, + "grad_norm": 0.904593288898468, + "learning_rate": 4.24570743516526e-06, + "loss": 0.626, + "step": 8867 + }, + { + "epoch": 0.56, + "grad_norm": 0.9311546683311462, + "learning_rate": 4.2446931994944375e-06, + "loss": 0.5865, + "step": 8868 + }, + { + "epoch": 0.56, + "grad_norm": 0.9218093156814575, + "learning_rate": 4.243678995626955e-06, + "loss": 0.5746, + "step": 8869 + }, + { + "epoch": 0.56, + "grad_norm": 0.8506073355674744, + "learning_rate": 4.242664823605521e-06, + "loss": 0.5585, + "step": 8870 + }, + { + "epoch": 0.56, + "grad_norm": 0.9345227479934692, + "learning_rate": 4.241650683472834e-06, + "loss": 0.5323, + "step": 8871 + }, + { + "epoch": 0.56, + "grad_norm": 0.8558427095413208, + "learning_rate": 4.240636575271601e-06, + "loss": 0.4936, + "step": 8872 + }, + { + "epoch": 0.56, + "grad_norm": 0.8252081871032715, + "learning_rate": 4.239622499044519e-06, + "loss": 0.55, + "step": 8873 + }, + { + "epoch": 0.56, + "grad_norm": 0.8779731392860413, + "learning_rate": 4.23860845483429e-06, + "loss": 0.5766, + "step": 8874 + }, + { + "epoch": 0.56, + "grad_norm": 0.9093831181526184, + "learning_rate": 4.237594442683607e-06, + "loss": 0.6104, + "step": 8875 + }, + { + "epoch": 0.56, + "grad_norm": 0.8918717503547668, + "learning_rate": 4.236580462635173e-06, + "loss": 0.5824, + "step": 8876 + }, + { + "epoch": 0.56, + "grad_norm": 0.8895564675331116, + "learning_rate": 4.235566514731678e-06, + "loss": 0.6093, + "step": 8877 + }, + { + "epoch": 0.56, + "grad_norm": 0.9440225958824158, + "learning_rate": 4.23455259901582e-06, + "loss": 0.617, + "step": 8878 + }, + { + "epoch": 0.56, + "grad_norm": 0.8962016105651855, + "learning_rate": 4.2335387155302885e-06, + "loss": 0.5789, + "step": 8879 + }, + { + "epoch": 0.56, + "grad_norm": 0.8680998682975769, + "learning_rate": 4.232524864317773e-06, + "loss": 0.549, + "step": 8880 + }, + { + "epoch": 0.56, + "grad_norm": 0.8944227695465088, + "learning_rate": 4.231511045420967e-06, + "loss": 0.6008, + "step": 8881 + }, + { + "epoch": 0.56, + "grad_norm": 0.8181406259536743, + "learning_rate": 4.230497258882559e-06, + "loss": 0.5423, + "step": 8882 + }, + { + "epoch": 0.56, + "grad_norm": 0.9459832906723022, + "learning_rate": 4.229483504745233e-06, + "loss": 0.6108, + "step": 8883 + }, + { + "epoch": 0.56, + "grad_norm": 0.8519952297210693, + "learning_rate": 4.228469783051676e-06, + "loss": 0.5936, + "step": 8884 + }, + { + "epoch": 0.56, + "grad_norm": 0.8907895088195801, + "learning_rate": 4.227456093844573e-06, + "loss": 0.5805, + "step": 8885 + }, + { + "epoch": 0.56, + "grad_norm": 1.0412497520446777, + "learning_rate": 4.226442437166607e-06, + "loss": 0.5508, + "step": 8886 + }, + { + "epoch": 0.56, + "grad_norm": 0.8901419639587402, + "learning_rate": 4.225428813060459e-06, + "loss": 0.5822, + "step": 8887 + }, + { + "epoch": 0.56, + "grad_norm": 0.8553881049156189, + "learning_rate": 4.224415221568807e-06, + "loss": 0.5854, + "step": 8888 + }, + { + "epoch": 0.56, + "grad_norm": 0.8875113129615784, + "learning_rate": 4.223401662734333e-06, + "loss": 0.5348, + "step": 8889 + }, + { + "epoch": 0.56, + "grad_norm": 0.8207681179046631, + "learning_rate": 4.222388136599715e-06, + "loss": 0.5878, + "step": 8890 + }, + { + "epoch": 0.56, + "grad_norm": 0.8918472528457642, + "learning_rate": 4.221374643207626e-06, + "loss": 0.5744, + "step": 8891 + }, + { + "epoch": 0.56, + "grad_norm": 0.8446689248085022, + "learning_rate": 4.220361182600742e-06, + "loss": 0.6045, + "step": 8892 + }, + { + "epoch": 0.56, + "grad_norm": 0.883139967918396, + "learning_rate": 4.219347754821737e-06, + "loss": 0.5616, + "step": 8893 + }, + { + "epoch": 0.56, + "grad_norm": 0.9404736161231995, + "learning_rate": 4.218334359913283e-06, + "loss": 0.565, + "step": 8894 + }, + { + "epoch": 0.56, + "grad_norm": 0.8452960848808289, + "learning_rate": 4.217320997918048e-06, + "loss": 0.5913, + "step": 8895 + }, + { + "epoch": 0.56, + "grad_norm": 0.9132777452468872, + "learning_rate": 4.216307668878706e-06, + "loss": 0.6401, + "step": 8896 + }, + { + "epoch": 0.56, + "grad_norm": 0.8934757113456726, + "learning_rate": 4.2152943728379185e-06, + "loss": 0.618, + "step": 8897 + }, + { + "epoch": 0.56, + "grad_norm": 0.9089536070823669, + "learning_rate": 4.214281109838357e-06, + "loss": 0.545, + "step": 8898 + }, + { + "epoch": 0.56, + "grad_norm": 0.8987053036689758, + "learning_rate": 4.213267879922685e-06, + "loss": 0.6345, + "step": 8899 + }, + { + "epoch": 0.56, + "grad_norm": 0.8274092078208923, + "learning_rate": 4.212254683133565e-06, + "loss": 0.5152, + "step": 8900 + }, + { + "epoch": 0.56, + "grad_norm": 0.8774511814117432, + "learning_rate": 4.2112415195136585e-06, + "loss": 0.5542, + "step": 8901 + }, + { + "epoch": 0.56, + "grad_norm": 0.9276379942893982, + "learning_rate": 4.21022838910563e-06, + "loss": 0.5648, + "step": 8902 + }, + { + "epoch": 0.56, + "grad_norm": 0.8499544858932495, + "learning_rate": 4.209215291952135e-06, + "loss": 0.5782, + "step": 8903 + }, + { + "epoch": 0.56, + "grad_norm": 0.8849813938140869, + "learning_rate": 4.208202228095835e-06, + "loss": 0.5964, + "step": 8904 + }, + { + "epoch": 0.56, + "grad_norm": 0.9096781611442566, + "learning_rate": 4.207189197579382e-06, + "loss": 0.5476, + "step": 8905 + }, + { + "epoch": 0.56, + "grad_norm": 0.7918185591697693, + "learning_rate": 4.2061762004454365e-06, + "loss": 0.5343, + "step": 8906 + }, + { + "epoch": 0.56, + "grad_norm": 0.9129202365875244, + "learning_rate": 4.2051632367366485e-06, + "loss": 0.6456, + "step": 8907 + }, + { + "epoch": 0.56, + "grad_norm": 0.8676325082778931, + "learning_rate": 4.204150306495672e-06, + "loss": 0.6489, + "step": 8908 + }, + { + "epoch": 0.56, + "grad_norm": 0.8340794444084167, + "learning_rate": 4.203137409765159e-06, + "loss": 0.5279, + "step": 8909 + }, + { + "epoch": 0.56, + "grad_norm": 0.8473523855209351, + "learning_rate": 4.202124546587754e-06, + "loss": 0.5894, + "step": 8910 + }, + { + "epoch": 0.56, + "grad_norm": 0.8485411405563354, + "learning_rate": 4.201111717006111e-06, + "loss": 0.6015, + "step": 8911 + }, + { + "epoch": 0.56, + "grad_norm": 1.0016659498214722, + "learning_rate": 4.200098921062875e-06, + "loss": 0.5994, + "step": 8912 + }, + { + "epoch": 0.56, + "grad_norm": 0.8975883722305298, + "learning_rate": 4.19908615880069e-06, + "loss": 0.6251, + "step": 8913 + }, + { + "epoch": 0.56, + "grad_norm": 0.9073837399482727, + "learning_rate": 4.198073430262199e-06, + "loss": 0.617, + "step": 8914 + }, + { + "epoch": 0.56, + "grad_norm": 0.8681656122207642, + "learning_rate": 4.197060735490048e-06, + "loss": 0.5584, + "step": 8915 + }, + { + "epoch": 0.56, + "grad_norm": 0.8720282912254333, + "learning_rate": 4.196048074526876e-06, + "loss": 0.6311, + "step": 8916 + }, + { + "epoch": 0.56, + "grad_norm": 0.9659051299095154, + "learning_rate": 4.195035447415324e-06, + "loss": 0.625, + "step": 8917 + }, + { + "epoch": 0.57, + "grad_norm": 0.8648727536201477, + "learning_rate": 4.194022854198026e-06, + "loss": 0.6112, + "step": 8918 + }, + { + "epoch": 0.57, + "grad_norm": 0.8482996225357056, + "learning_rate": 4.193010294917624e-06, + "loss": 0.5382, + "step": 8919 + }, + { + "epoch": 0.57, + "grad_norm": 0.877569317817688, + "learning_rate": 4.1919977696167515e-06, + "loss": 0.5412, + "step": 8920 + }, + { + "epoch": 0.57, + "grad_norm": 0.8534306287765503, + "learning_rate": 4.190985278338042e-06, + "loss": 0.5792, + "step": 8921 + }, + { + "epoch": 0.57, + "grad_norm": 0.9521181583404541, + "learning_rate": 4.189972821124126e-06, + "loss": 0.6252, + "step": 8922 + }, + { + "epoch": 0.57, + "grad_norm": 0.8283462524414062, + "learning_rate": 4.188960398017638e-06, + "loss": 0.5668, + "step": 8923 + }, + { + "epoch": 0.57, + "grad_norm": 0.8727411031723022, + "learning_rate": 4.187948009061207e-06, + "loss": 0.5511, + "step": 8924 + }, + { + "epoch": 0.57, + "grad_norm": 0.8867582678794861, + "learning_rate": 4.186935654297461e-06, + "loss": 0.59, + "step": 8925 + }, + { + "epoch": 0.57, + "grad_norm": 0.859950602054596, + "learning_rate": 4.1859233337690245e-06, + "loss": 0.5337, + "step": 8926 + }, + { + "epoch": 0.57, + "grad_norm": 0.9106714129447937, + "learning_rate": 4.1849110475185225e-06, + "loss": 0.5833, + "step": 8927 + }, + { + "epoch": 0.57, + "grad_norm": 0.9669057130813599, + "learning_rate": 4.183898795588584e-06, + "loss": 0.5751, + "step": 8928 + }, + { + "epoch": 0.57, + "grad_norm": 0.8599669933319092, + "learning_rate": 4.1828865780218285e-06, + "loss": 0.5511, + "step": 8929 + }, + { + "epoch": 0.57, + "grad_norm": 0.8547632694244385, + "learning_rate": 4.181874394860875e-06, + "loss": 0.5715, + "step": 8930 + }, + { + "epoch": 0.57, + "grad_norm": 0.8837994337081909, + "learning_rate": 4.180862246148344e-06, + "loss": 0.5981, + "step": 8931 + }, + { + "epoch": 0.57, + "grad_norm": 0.8469040989875793, + "learning_rate": 4.1798501319268565e-06, + "loss": 0.5338, + "step": 8932 + }, + { + "epoch": 0.57, + "grad_norm": 0.9077805876731873, + "learning_rate": 4.178838052239027e-06, + "loss": 0.5999, + "step": 8933 + }, + { + "epoch": 0.57, + "grad_norm": 0.8736510276794434, + "learning_rate": 4.177826007127468e-06, + "loss": 0.561, + "step": 8934 + }, + { + "epoch": 0.57, + "grad_norm": 0.8860734701156616, + "learning_rate": 4.176813996634796e-06, + "loss": 0.5776, + "step": 8935 + }, + { + "epoch": 0.57, + "grad_norm": 0.8876895904541016, + "learning_rate": 4.175802020803624e-06, + "loss": 0.5732, + "step": 8936 + }, + { + "epoch": 0.57, + "grad_norm": 0.9345043301582336, + "learning_rate": 4.174790079676563e-06, + "loss": 0.5576, + "step": 8937 + }, + { + "epoch": 0.57, + "grad_norm": 0.8652613162994385, + "learning_rate": 4.173778173296219e-06, + "loss": 0.5698, + "step": 8938 + }, + { + "epoch": 0.57, + "grad_norm": 0.990037739276886, + "learning_rate": 4.172766301705202e-06, + "loss": 0.6013, + "step": 8939 + }, + { + "epoch": 0.57, + "grad_norm": 0.9194901585578918, + "learning_rate": 4.171754464946119e-06, + "loss": 0.6357, + "step": 8940 + }, + { + "epoch": 0.57, + "grad_norm": 0.8597732782363892, + "learning_rate": 4.170742663061575e-06, + "loss": 0.5582, + "step": 8941 + }, + { + "epoch": 0.57, + "grad_norm": 0.9255541563034058, + "learning_rate": 4.169730896094172e-06, + "loss": 0.5464, + "step": 8942 + }, + { + "epoch": 0.57, + "grad_norm": 0.9251505732536316, + "learning_rate": 4.1687191640865135e-06, + "loss": 0.5523, + "step": 8943 + }, + { + "epoch": 0.57, + "grad_norm": 0.8354572653770447, + "learning_rate": 4.167707467081197e-06, + "loss": 0.5892, + "step": 8944 + }, + { + "epoch": 0.57, + "grad_norm": 0.9447482228279114, + "learning_rate": 4.166695805120825e-06, + "loss": 0.7007, + "step": 8945 + }, + { + "epoch": 0.57, + "grad_norm": 0.8327589631080627, + "learning_rate": 4.165684178247993e-06, + "loss": 0.5409, + "step": 8946 + }, + { + "epoch": 0.57, + "grad_norm": 0.9203556180000305, + "learning_rate": 4.1646725865053005e-06, + "loss": 0.6141, + "step": 8947 + }, + { + "epoch": 0.57, + "grad_norm": 0.9170238375663757, + "learning_rate": 4.163661029935336e-06, + "loss": 0.545, + "step": 8948 + }, + { + "epoch": 0.57, + "grad_norm": 0.8779581785202026, + "learning_rate": 4.162649508580698e-06, + "loss": 0.5778, + "step": 8949 + }, + { + "epoch": 0.57, + "grad_norm": 0.8849088549613953, + "learning_rate": 4.161638022483976e-06, + "loss": 0.5548, + "step": 8950 + }, + { + "epoch": 0.57, + "grad_norm": 0.8839111924171448, + "learning_rate": 4.160626571687761e-06, + "loss": 0.5711, + "step": 8951 + }, + { + "epoch": 0.57, + "grad_norm": 0.8382406234741211, + "learning_rate": 4.159615156234639e-06, + "loss": 0.5591, + "step": 8952 + }, + { + "epoch": 0.57, + "grad_norm": 0.8632530570030212, + "learning_rate": 4.158603776167201e-06, + "loss": 0.5473, + "step": 8953 + }, + { + "epoch": 0.57, + "grad_norm": 0.8566288352012634, + "learning_rate": 4.157592431528031e-06, + "loss": 0.5317, + "step": 8954 + }, + { + "epoch": 0.57, + "grad_norm": 0.8643941283226013, + "learning_rate": 4.156581122359714e-06, + "loss": 0.6105, + "step": 8955 + }, + { + "epoch": 0.57, + "grad_norm": 0.8673588633537292, + "learning_rate": 4.15556984870483e-06, + "loss": 0.5787, + "step": 8956 + }, + { + "epoch": 0.57, + "grad_norm": 0.8547856211662292, + "learning_rate": 4.1545586106059636e-06, + "loss": 0.5776, + "step": 8957 + }, + { + "epoch": 0.57, + "grad_norm": 0.8713629841804504, + "learning_rate": 4.153547408105691e-06, + "loss": 0.605, + "step": 8958 + }, + { + "epoch": 0.57, + "grad_norm": 0.8869353532791138, + "learning_rate": 4.152536241246595e-06, + "loss": 0.5782, + "step": 8959 + }, + { + "epoch": 0.57, + "grad_norm": 0.9460669755935669, + "learning_rate": 4.151525110071248e-06, + "loss": 0.5934, + "step": 8960 + }, + { + "epoch": 0.57, + "grad_norm": 0.8945161700248718, + "learning_rate": 4.1505140146222276e-06, + "loss": 0.6079, + "step": 8961 + }, + { + "epoch": 0.57, + "grad_norm": 0.8501721024513245, + "learning_rate": 4.149502954942107e-06, + "loss": 0.58, + "step": 8962 + }, + { + "epoch": 0.57, + "grad_norm": 0.8856709599494934, + "learning_rate": 4.148491931073459e-06, + "loss": 0.5962, + "step": 8963 + }, + { + "epoch": 0.57, + "grad_norm": 0.8691068887710571, + "learning_rate": 4.147480943058852e-06, + "loss": 0.6176, + "step": 8964 + }, + { + "epoch": 0.57, + "grad_norm": 0.9234523177146912, + "learning_rate": 4.146469990940858e-06, + "loss": 0.5897, + "step": 8965 + }, + { + "epoch": 0.57, + "grad_norm": 0.8816432356834412, + "learning_rate": 4.1454590747620424e-06, + "loss": 0.6297, + "step": 8966 + }, + { + "epoch": 0.57, + "grad_norm": 0.8956805467605591, + "learning_rate": 4.144448194564973e-06, + "loss": 0.5716, + "step": 8967 + }, + { + "epoch": 0.57, + "grad_norm": 0.9496785998344421, + "learning_rate": 4.1434373503922145e-06, + "loss": 0.5964, + "step": 8968 + }, + { + "epoch": 0.57, + "grad_norm": 0.8747205138206482, + "learning_rate": 4.142426542286329e-06, + "loss": 0.6098, + "step": 8969 + }, + { + "epoch": 0.57, + "grad_norm": 0.9270417094230652, + "learning_rate": 4.141415770289877e-06, + "loss": 0.5876, + "step": 8970 + }, + { + "epoch": 0.57, + "grad_norm": 0.8411609530448914, + "learning_rate": 4.140405034445423e-06, + "loss": 0.5882, + "step": 8971 + }, + { + "epoch": 0.57, + "grad_norm": 0.926416277885437, + "learning_rate": 4.13939433479552e-06, + "loss": 0.5833, + "step": 8972 + }, + { + "epoch": 0.57, + "grad_norm": 0.8865971565246582, + "learning_rate": 4.13838367138273e-06, + "loss": 0.5632, + "step": 8973 + }, + { + "epoch": 0.57, + "grad_norm": 0.9375487565994263, + "learning_rate": 4.137373044249604e-06, + "loss": 0.5751, + "step": 8974 + }, + { + "epoch": 0.57, + "grad_norm": 0.9372237324714661, + "learning_rate": 4.1363624534387e-06, + "loss": 0.5975, + "step": 8975 + }, + { + "epoch": 0.57, + "grad_norm": 0.8482964634895325, + "learning_rate": 4.135351898992568e-06, + "loss": 0.5627, + "step": 8976 + }, + { + "epoch": 0.57, + "grad_norm": 0.9388363361358643, + "learning_rate": 4.134341380953761e-06, + "loss": 0.5835, + "step": 8977 + }, + { + "epoch": 0.57, + "grad_norm": 0.8574067950248718, + "learning_rate": 4.133330899364824e-06, + "loss": 0.5772, + "step": 8978 + }, + { + "epoch": 0.57, + "grad_norm": 0.907927393913269, + "learning_rate": 4.1323204542683105e-06, + "loss": 0.6602, + "step": 8979 + }, + { + "epoch": 0.57, + "grad_norm": 0.926572322845459, + "learning_rate": 4.131310045706763e-06, + "loss": 0.6016, + "step": 8980 + }, + { + "epoch": 0.57, + "grad_norm": 0.9103202223777771, + "learning_rate": 4.130299673722729e-06, + "loss": 0.6473, + "step": 8981 + }, + { + "epoch": 0.57, + "grad_norm": 0.9199764728546143, + "learning_rate": 4.129289338358748e-06, + "loss": 0.5697, + "step": 8982 + }, + { + "epoch": 0.57, + "grad_norm": 0.904728889465332, + "learning_rate": 4.128279039657366e-06, + "loss": 0.627, + "step": 8983 + }, + { + "epoch": 0.57, + "grad_norm": 0.8930543065071106, + "learning_rate": 4.127268777661119e-06, + "loss": 0.6144, + "step": 8984 + }, + { + "epoch": 0.57, + "grad_norm": 0.8956807255744934, + "learning_rate": 4.126258552412551e-06, + "loss": 0.5895, + "step": 8985 + }, + { + "epoch": 0.57, + "grad_norm": 0.8710659742355347, + "learning_rate": 4.125248363954192e-06, + "loss": 0.5893, + "step": 8986 + }, + { + "epoch": 0.57, + "grad_norm": 0.852942943572998, + "learning_rate": 4.124238212328585e-06, + "loss": 0.5658, + "step": 8987 + }, + { + "epoch": 0.57, + "grad_norm": 0.9231775403022766, + "learning_rate": 4.123228097578258e-06, + "loss": 0.6293, + "step": 8988 + }, + { + "epoch": 0.57, + "grad_norm": 0.9329462647438049, + "learning_rate": 4.122218019745748e-06, + "loss": 0.6025, + "step": 8989 + }, + { + "epoch": 0.57, + "grad_norm": 0.9070497751235962, + "learning_rate": 4.121207978873582e-06, + "loss": 0.537, + "step": 8990 + }, + { + "epoch": 0.57, + "grad_norm": 0.8512255549430847, + "learning_rate": 4.12019797500429e-06, + "loss": 0.5603, + "step": 8991 + }, + { + "epoch": 0.57, + "grad_norm": 0.9133707880973816, + "learning_rate": 4.119188008180401e-06, + "loss": 0.5676, + "step": 8992 + }, + { + "epoch": 0.57, + "grad_norm": 0.9240803122520447, + "learning_rate": 4.118178078444442e-06, + "loss": 0.6035, + "step": 8993 + }, + { + "epoch": 0.57, + "grad_norm": 0.951643705368042, + "learning_rate": 4.117168185838936e-06, + "loss": 0.5913, + "step": 8994 + }, + { + "epoch": 0.57, + "grad_norm": 0.8662564754486084, + "learning_rate": 4.1161583304064055e-06, + "loss": 0.5592, + "step": 8995 + }, + { + "epoch": 0.57, + "grad_norm": 0.8806678056716919, + "learning_rate": 4.115148512189374e-06, + "loss": 0.621, + "step": 8996 + }, + { + "epoch": 0.57, + "grad_norm": 0.9231857657432556, + "learning_rate": 4.114138731230362e-06, + "loss": 0.6269, + "step": 8997 + }, + { + "epoch": 0.57, + "grad_norm": 0.8965012431144714, + "learning_rate": 4.113128987571885e-06, + "loss": 0.6336, + "step": 8998 + }, + { + "epoch": 0.57, + "grad_norm": 0.8867535591125488, + "learning_rate": 4.1121192812564595e-06, + "loss": 0.5398, + "step": 8999 + }, + { + "epoch": 0.57, + "grad_norm": 0.9486203789710999, + "learning_rate": 4.111109612326603e-06, + "loss": 0.6183, + "step": 9000 + }, + { + "epoch": 0.57, + "grad_norm": 0.9139353632926941, + "learning_rate": 4.110099980824831e-06, + "loss": 0.5937, + "step": 9001 + }, + { + "epoch": 0.57, + "grad_norm": 0.8802381753921509, + "learning_rate": 4.109090386793652e-06, + "loss": 0.56, + "step": 9002 + }, + { + "epoch": 0.57, + "grad_norm": 0.880913496017456, + "learning_rate": 4.108080830275576e-06, + "loss": 0.6136, + "step": 9003 + }, + { + "epoch": 0.57, + "grad_norm": 0.9269407987594604, + "learning_rate": 4.107071311313113e-06, + "loss": 0.5653, + "step": 9004 + }, + { + "epoch": 0.57, + "grad_norm": 0.9228689670562744, + "learning_rate": 4.106061829948773e-06, + "loss": 0.6108, + "step": 9005 + }, + { + "epoch": 0.57, + "grad_norm": 0.921231746673584, + "learning_rate": 4.10505238622506e-06, + "loss": 0.5842, + "step": 9006 + }, + { + "epoch": 0.57, + "grad_norm": 0.8868432641029358, + "learning_rate": 4.104042980184476e-06, + "loss": 0.6736, + "step": 9007 + }, + { + "epoch": 0.57, + "grad_norm": 0.8439784646034241, + "learning_rate": 4.103033611869525e-06, + "loss": 0.5566, + "step": 9008 + }, + { + "epoch": 0.57, + "grad_norm": 0.8885878920555115, + "learning_rate": 4.1020242813227096e-06, + "loss": 0.5977, + "step": 9009 + }, + { + "epoch": 0.57, + "grad_norm": 0.8744617700576782, + "learning_rate": 4.101014988586528e-06, + "loss": 0.5894, + "step": 9010 + }, + { + "epoch": 0.57, + "grad_norm": 0.8461993932723999, + "learning_rate": 4.100005733703477e-06, + "loss": 0.5497, + "step": 9011 + }, + { + "epoch": 0.57, + "grad_norm": 0.8714662790298462, + "learning_rate": 4.0989965167160526e-06, + "loss": 0.5959, + "step": 9012 + }, + { + "epoch": 0.57, + "grad_norm": 0.883986234664917, + "learning_rate": 4.097987337666753e-06, + "loss": 0.5854, + "step": 9013 + }, + { + "epoch": 0.57, + "grad_norm": 0.8724504113197327, + "learning_rate": 4.096978196598068e-06, + "loss": 0.5916, + "step": 9014 + }, + { + "epoch": 0.57, + "grad_norm": 0.9002840518951416, + "learning_rate": 4.09596909355249e-06, + "loss": 0.5975, + "step": 9015 + }, + { + "epoch": 0.57, + "grad_norm": 0.8268336057662964, + "learning_rate": 4.094960028572506e-06, + "loss": 0.5894, + "step": 9016 + }, + { + "epoch": 0.57, + "grad_norm": 0.8918128609657288, + "learning_rate": 4.0939510017006095e-06, + "loss": 0.5321, + "step": 9017 + }, + { + "epoch": 0.57, + "grad_norm": 0.8402930498123169, + "learning_rate": 4.092942012979285e-06, + "loss": 0.5594, + "step": 9018 + }, + { + "epoch": 0.57, + "grad_norm": 0.9471001029014587, + "learning_rate": 4.091933062451015e-06, + "loss": 0.5805, + "step": 9019 + }, + { + "epoch": 0.57, + "grad_norm": 0.9170734286308289, + "learning_rate": 4.0909241501582865e-06, + "loss": 0.6064, + "step": 9020 + }, + { + "epoch": 0.57, + "grad_norm": 0.9813190698623657, + "learning_rate": 4.089915276143577e-06, + "loss": 0.6328, + "step": 9021 + }, + { + "epoch": 0.57, + "grad_norm": 0.9079948663711548, + "learning_rate": 4.088906440449371e-06, + "loss": 0.5542, + "step": 9022 + }, + { + "epoch": 0.57, + "grad_norm": 0.855984091758728, + "learning_rate": 4.087897643118145e-06, + "loss": 0.5719, + "step": 9023 + }, + { + "epoch": 0.57, + "grad_norm": 0.8991562724113464, + "learning_rate": 4.086888884192377e-06, + "loss": 0.5942, + "step": 9024 + }, + { + "epoch": 0.57, + "grad_norm": 0.9502757787704468, + "learning_rate": 4.0858801637145395e-06, + "loss": 0.5597, + "step": 9025 + }, + { + "epoch": 0.57, + "grad_norm": 0.910291314125061, + "learning_rate": 4.084871481727111e-06, + "loss": 0.5413, + "step": 9026 + }, + { + "epoch": 0.57, + "grad_norm": 0.8761973977088928, + "learning_rate": 4.083862838272559e-06, + "loss": 0.588, + "step": 9027 + }, + { + "epoch": 0.57, + "grad_norm": 0.935142993927002, + "learning_rate": 4.082854233393358e-06, + "loss": 0.5596, + "step": 9028 + }, + { + "epoch": 0.57, + "grad_norm": 0.9134296178817749, + "learning_rate": 4.081845667131971e-06, + "loss": 0.6049, + "step": 9029 + }, + { + "epoch": 0.57, + "grad_norm": 0.9424194693565369, + "learning_rate": 4.080837139530872e-06, + "loss": 0.5892, + "step": 9030 + }, + { + "epoch": 0.57, + "grad_norm": 0.8782743215560913, + "learning_rate": 4.0798286506325225e-06, + "loss": 0.5253, + "step": 9031 + }, + { + "epoch": 0.57, + "grad_norm": 0.8680989742279053, + "learning_rate": 4.078820200479389e-06, + "loss": 0.588, + "step": 9032 + }, + { + "epoch": 0.57, + "grad_norm": 0.905407726764679, + "learning_rate": 4.077811789113929e-06, + "loss": 0.6169, + "step": 9033 + }, + { + "epoch": 0.57, + "grad_norm": 0.9795319437980652, + "learning_rate": 4.076803416578608e-06, + "loss": 0.627, + "step": 9034 + }, + { + "epoch": 0.57, + "grad_norm": 0.8296229243278503, + "learning_rate": 4.0757950829158855e-06, + "loss": 0.5553, + "step": 9035 + }, + { + "epoch": 0.57, + "grad_norm": 0.8955538272857666, + "learning_rate": 4.074786788168216e-06, + "loss": 0.6238, + "step": 9036 + }, + { + "epoch": 0.57, + "grad_norm": 0.8968479037284851, + "learning_rate": 4.073778532378056e-06, + "loss": 0.619, + "step": 9037 + }, + { + "epoch": 0.57, + "grad_norm": 0.9429267644882202, + "learning_rate": 4.072770315587858e-06, + "loss": 0.5866, + "step": 9038 + }, + { + "epoch": 0.57, + "grad_norm": 0.8396599292755127, + "learning_rate": 4.071762137840079e-06, + "loss": 0.5358, + "step": 9039 + }, + { + "epoch": 0.57, + "grad_norm": 0.8964661955833435, + "learning_rate": 4.070753999177167e-06, + "loss": 0.5945, + "step": 9040 + }, + { + "epoch": 0.57, + "grad_norm": 0.8709607720375061, + "learning_rate": 4.069745899641571e-06, + "loss": 0.5417, + "step": 9041 + }, + { + "epoch": 0.57, + "grad_norm": 0.9348841309547424, + "learning_rate": 4.0687378392757374e-06, + "loss": 0.6048, + "step": 9042 + }, + { + "epoch": 0.57, + "grad_norm": 0.8811603784561157, + "learning_rate": 4.0677298181221155e-06, + "loss": 0.5896, + "step": 9043 + }, + { + "epoch": 0.57, + "grad_norm": 0.8758918046951294, + "learning_rate": 4.066721836223149e-06, + "loss": 0.5902, + "step": 9044 + }, + { + "epoch": 0.57, + "grad_norm": 0.9369110465049744, + "learning_rate": 4.065713893621278e-06, + "loss": 0.6596, + "step": 9045 + }, + { + "epoch": 0.57, + "grad_norm": 0.8938471674919128, + "learning_rate": 4.064705990358943e-06, + "loss": 0.5437, + "step": 9046 + }, + { + "epoch": 0.57, + "grad_norm": 0.8944480419158936, + "learning_rate": 4.063698126478587e-06, + "loss": 0.5737, + "step": 9047 + }, + { + "epoch": 0.57, + "grad_norm": 0.9232917428016663, + "learning_rate": 4.062690302022647e-06, + "loss": 0.5618, + "step": 9048 + }, + { + "epoch": 0.57, + "grad_norm": 0.8441494107246399, + "learning_rate": 4.0616825170335565e-06, + "loss": 0.5753, + "step": 9049 + }, + { + "epoch": 0.57, + "grad_norm": 0.8715497255325317, + "learning_rate": 4.060674771553751e-06, + "loss": 0.5928, + "step": 9050 + }, + { + "epoch": 0.57, + "grad_norm": 0.863179087638855, + "learning_rate": 4.059667065625662e-06, + "loss": 0.5379, + "step": 9051 + }, + { + "epoch": 0.57, + "grad_norm": 0.8319960236549377, + "learning_rate": 4.058659399291724e-06, + "loss": 0.5393, + "step": 9052 + }, + { + "epoch": 0.57, + "grad_norm": 0.9125126004219055, + "learning_rate": 4.057651772594362e-06, + "loss": 0.6624, + "step": 9053 + }, + { + "epoch": 0.57, + "grad_norm": 0.8907890915870667, + "learning_rate": 4.056644185576007e-06, + "loss": 0.6439, + "step": 9054 + }, + { + "epoch": 0.57, + "grad_norm": 0.8446599841117859, + "learning_rate": 4.055636638279082e-06, + "loss": 0.537, + "step": 9055 + }, + { + "epoch": 0.57, + "grad_norm": 0.9353048801422119, + "learning_rate": 4.054629130746015e-06, + "loss": 0.6079, + "step": 9056 + }, + { + "epoch": 0.57, + "grad_norm": 0.9255784749984741, + "learning_rate": 4.053621663019225e-06, + "loss": 0.5508, + "step": 9057 + }, + { + "epoch": 0.57, + "grad_norm": 0.8582807183265686, + "learning_rate": 4.052614235141136e-06, + "loss": 0.5567, + "step": 9058 + }, + { + "epoch": 0.57, + "grad_norm": 0.8390825390815735, + "learning_rate": 4.051606847154164e-06, + "loss": 0.5467, + "step": 9059 + }, + { + "epoch": 0.57, + "grad_norm": 0.8816949129104614, + "learning_rate": 4.05059949910073e-06, + "loss": 0.6016, + "step": 9060 + }, + { + "epoch": 0.57, + "grad_norm": 0.875059187412262, + "learning_rate": 4.049592191023247e-06, + "loss": 0.5172, + "step": 9061 + }, + { + "epoch": 0.57, + "grad_norm": 0.9037113189697266, + "learning_rate": 4.0485849229641325e-06, + "loss": 0.5985, + "step": 9062 + }, + { + "epoch": 0.57, + "grad_norm": 0.8470078110694885, + "learning_rate": 4.047577694965794e-06, + "loss": 0.5989, + "step": 9063 + }, + { + "epoch": 0.57, + "grad_norm": 0.9105969071388245, + "learning_rate": 4.046570507070649e-06, + "loss": 0.6467, + "step": 9064 + }, + { + "epoch": 0.57, + "grad_norm": 0.8537124395370483, + "learning_rate": 4.045563359321102e-06, + "loss": 0.553, + "step": 9065 + }, + { + "epoch": 0.57, + "grad_norm": 0.917719304561615, + "learning_rate": 4.044556251759562e-06, + "loss": 0.6008, + "step": 9066 + }, + { + "epoch": 0.57, + "grad_norm": 0.8611663579940796, + "learning_rate": 4.043549184428434e-06, + "loss": 0.582, + "step": 9067 + }, + { + "epoch": 0.57, + "grad_norm": 0.9147241115570068, + "learning_rate": 4.042542157370122e-06, + "loss": 0.6259, + "step": 9068 + }, + { + "epoch": 0.57, + "grad_norm": 0.9152358770370483, + "learning_rate": 4.041535170627029e-06, + "loss": 0.5821, + "step": 9069 + }, + { + "epoch": 0.57, + "grad_norm": 0.8824336528778076, + "learning_rate": 4.040528224241558e-06, + "loss": 0.5594, + "step": 9070 + }, + { + "epoch": 0.57, + "grad_norm": 0.805570662021637, + "learning_rate": 4.039521318256104e-06, + "loss": 0.4743, + "step": 9071 + }, + { + "epoch": 0.57, + "grad_norm": 0.9145142436027527, + "learning_rate": 4.038514452713065e-06, + "loss": 0.569, + "step": 9072 + }, + { + "epoch": 0.57, + "grad_norm": 0.8494529128074646, + "learning_rate": 4.037507627654838e-06, + "loss": 0.5572, + "step": 9073 + }, + { + "epoch": 0.57, + "grad_norm": 0.897002637386322, + "learning_rate": 4.0365008431238184e-06, + "loss": 0.6247, + "step": 9074 + }, + { + "epoch": 0.57, + "grad_norm": 0.9091158509254456, + "learning_rate": 4.035494099162396e-06, + "loss": 0.6193, + "step": 9075 + }, + { + "epoch": 0.58, + "grad_norm": 0.8396137952804565, + "learning_rate": 4.03448739581296e-06, + "loss": 0.5578, + "step": 9076 + }, + { + "epoch": 0.58, + "grad_norm": 0.8554840683937073, + "learning_rate": 4.033480733117902e-06, + "loss": 0.5752, + "step": 9077 + }, + { + "epoch": 0.58, + "grad_norm": 0.8953068852424622, + "learning_rate": 4.032474111119609e-06, + "loss": 0.6019, + "step": 9078 + }, + { + "epoch": 0.58, + "grad_norm": 0.9367779493331909, + "learning_rate": 4.031467529860466e-06, + "loss": 0.6226, + "step": 9079 + }, + { + "epoch": 0.58, + "grad_norm": 0.8574473857879639, + "learning_rate": 4.030460989382853e-06, + "loss": 0.5676, + "step": 9080 + }, + { + "epoch": 0.58, + "grad_norm": 0.8605020046234131, + "learning_rate": 4.029454489729156e-06, + "loss": 0.5517, + "step": 9081 + }, + { + "epoch": 0.58, + "grad_norm": 0.890446126461029, + "learning_rate": 4.028448030941756e-06, + "loss": 0.5869, + "step": 9082 + }, + { + "epoch": 0.58, + "grad_norm": 0.8330382704734802, + "learning_rate": 4.027441613063029e-06, + "loss": 0.582, + "step": 9083 + }, + { + "epoch": 0.58, + "grad_norm": 0.8821123242378235, + "learning_rate": 4.026435236135351e-06, + "loss": 0.6191, + "step": 9084 + }, + { + "epoch": 0.58, + "grad_norm": 0.9941993355751038, + "learning_rate": 4.025428900201098e-06, + "loss": 0.6336, + "step": 9085 + }, + { + "epoch": 0.58, + "grad_norm": 0.9467496275901794, + "learning_rate": 4.024422605302646e-06, + "loss": 0.629, + "step": 9086 + }, + { + "epoch": 0.58, + "grad_norm": 0.8554012179374695, + "learning_rate": 4.023416351482364e-06, + "loss": 0.533, + "step": 9087 + }, + { + "epoch": 0.58, + "grad_norm": 0.8936252593994141, + "learning_rate": 4.022410138782621e-06, + "loss": 0.6261, + "step": 9088 + }, + { + "epoch": 0.58, + "grad_norm": 0.8527365922927856, + "learning_rate": 4.021403967245786e-06, + "loss": 0.54, + "step": 9089 + }, + { + "epoch": 0.58, + "grad_norm": 0.8804008364677429, + "learning_rate": 4.020397836914227e-06, + "loss": 0.5582, + "step": 9090 + }, + { + "epoch": 0.58, + "grad_norm": 0.8681939840316772, + "learning_rate": 4.019391747830307e-06, + "loss": 0.567, + "step": 9091 + }, + { + "epoch": 0.58, + "grad_norm": 0.9508828520774841, + "learning_rate": 4.018385700036389e-06, + "loss": 0.611, + "step": 9092 + }, + { + "epoch": 0.58, + "grad_norm": 0.8974758982658386, + "learning_rate": 4.017379693574833e-06, + "loss": 0.5596, + "step": 9093 + }, + { + "epoch": 0.58, + "grad_norm": 0.8851727247238159, + "learning_rate": 4.016373728488002e-06, + "loss": 0.6184, + "step": 9094 + }, + { + "epoch": 0.58, + "grad_norm": 0.8081380724906921, + "learning_rate": 4.01536780481825e-06, + "loss": 0.5798, + "step": 9095 + }, + { + "epoch": 0.58, + "grad_norm": 0.8617830276489258, + "learning_rate": 4.014361922607936e-06, + "loss": 0.6021, + "step": 9096 + }, + { + "epoch": 0.58, + "grad_norm": 0.895240306854248, + "learning_rate": 4.013356081899412e-06, + "loss": 0.5756, + "step": 9097 + }, + { + "epoch": 0.58, + "grad_norm": 0.8988040089607239, + "learning_rate": 4.0123502827350295e-06, + "loss": 0.5726, + "step": 9098 + }, + { + "epoch": 0.58, + "grad_norm": 0.932150661945343, + "learning_rate": 4.011344525157141e-06, + "loss": 0.6415, + "step": 9099 + }, + { + "epoch": 0.58, + "grad_norm": 0.9083916544914246, + "learning_rate": 4.010338809208098e-06, + "loss": 0.6164, + "step": 9100 + }, + { + "epoch": 0.58, + "grad_norm": 0.8748500943183899, + "learning_rate": 4.009333134930244e-06, + "loss": 0.5558, + "step": 9101 + }, + { + "epoch": 0.58, + "grad_norm": 0.905542254447937, + "learning_rate": 4.0083275023659236e-06, + "loss": 0.6417, + "step": 9102 + }, + { + "epoch": 0.58, + "grad_norm": 0.8471083045005798, + "learning_rate": 4.007321911557483e-06, + "loss": 0.6012, + "step": 9103 + }, + { + "epoch": 0.58, + "grad_norm": 0.8830850124359131, + "learning_rate": 4.0063163625472645e-06, + "loss": 0.5628, + "step": 9104 + }, + { + "epoch": 0.58, + "grad_norm": 0.8656706213951111, + "learning_rate": 4.005310855377608e-06, + "loss": 0.6062, + "step": 9105 + }, + { + "epoch": 0.58, + "grad_norm": 0.8246329426765442, + "learning_rate": 4.004305390090848e-06, + "loss": 0.5715, + "step": 9106 + }, + { + "epoch": 0.58, + "grad_norm": 0.9124095439910889, + "learning_rate": 4.003299966729325e-06, + "loss": 0.5901, + "step": 9107 + }, + { + "epoch": 0.58, + "grad_norm": 0.9204335808753967, + "learning_rate": 4.002294585335375e-06, + "loss": 0.5478, + "step": 9108 + }, + { + "epoch": 0.58, + "grad_norm": 0.9215397834777832, + "learning_rate": 4.001289245951329e-06, + "loss": 0.5763, + "step": 9109 + }, + { + "epoch": 0.58, + "grad_norm": 0.9767603278160095, + "learning_rate": 4.000283948619517e-06, + "loss": 0.5892, + "step": 9110 + }, + { + "epoch": 0.58, + "grad_norm": 0.8919650316238403, + "learning_rate": 3.99927869338227e-06, + "loss": 0.551, + "step": 9111 + }, + { + "epoch": 0.58, + "grad_norm": 0.9372937679290771, + "learning_rate": 3.998273480281919e-06, + "loss": 0.5895, + "step": 9112 + }, + { + "epoch": 0.58, + "grad_norm": 1.0142635107040405, + "learning_rate": 3.997268309360785e-06, + "loss": 0.5965, + "step": 9113 + }, + { + "epoch": 0.58, + "grad_norm": 0.8675452470779419, + "learning_rate": 3.996263180661194e-06, + "loss": 0.5961, + "step": 9114 + }, + { + "epoch": 0.58, + "grad_norm": 0.847707211971283, + "learning_rate": 3.995258094225468e-06, + "loss": 0.5375, + "step": 9115 + }, + { + "epoch": 0.58, + "grad_norm": 0.8482071161270142, + "learning_rate": 3.99425305009593e-06, + "loss": 0.5372, + "step": 9116 + }, + { + "epoch": 0.58, + "grad_norm": 0.9109798669815063, + "learning_rate": 3.993248048314897e-06, + "loss": 0.5797, + "step": 9117 + }, + { + "epoch": 0.58, + "grad_norm": 0.9148001670837402, + "learning_rate": 3.992243088924686e-06, + "loss": 0.575, + "step": 9118 + }, + { + "epoch": 0.58, + "grad_norm": 0.8979611396789551, + "learning_rate": 3.991238171967612e-06, + "loss": 0.5755, + "step": 9119 + }, + { + "epoch": 0.58, + "grad_norm": 0.9315516352653503, + "learning_rate": 3.9902332974859906e-06, + "loss": 0.6237, + "step": 9120 + }, + { + "epoch": 0.58, + "grad_norm": 0.8877137899398804, + "learning_rate": 3.989228465522133e-06, + "loss": 0.6188, + "step": 9121 + }, + { + "epoch": 0.58, + "grad_norm": 0.9431552886962891, + "learning_rate": 3.9882236761183476e-06, + "loss": 0.6138, + "step": 9122 + }, + { + "epoch": 0.58, + "grad_norm": 0.877837061882019, + "learning_rate": 3.987218929316942e-06, + "loss": 0.6009, + "step": 9123 + }, + { + "epoch": 0.58, + "grad_norm": 0.8428844809532166, + "learning_rate": 3.986214225160226e-06, + "loss": 0.565, + "step": 9124 + }, + { + "epoch": 0.58, + "grad_norm": 0.9297831654548645, + "learning_rate": 3.9852095636905026e-06, + "loss": 0.6212, + "step": 9125 + }, + { + "epoch": 0.58, + "grad_norm": 0.9488120079040527, + "learning_rate": 3.984204944950073e-06, + "loss": 0.63, + "step": 9126 + }, + { + "epoch": 0.58, + "grad_norm": 0.9093875288963318, + "learning_rate": 3.983200368981241e-06, + "loss": 0.6243, + "step": 9127 + }, + { + "epoch": 0.58, + "grad_norm": 0.8893300294876099, + "learning_rate": 3.982195835826302e-06, + "loss": 0.5697, + "step": 9128 + }, + { + "epoch": 0.58, + "grad_norm": 0.8863883018493652, + "learning_rate": 3.981191345527558e-06, + "loss": 0.6158, + "step": 9129 + }, + { + "epoch": 0.58, + "grad_norm": 0.98292475938797, + "learning_rate": 3.9801868981273e-06, + "loss": 0.6121, + "step": 9130 + }, + { + "epoch": 0.58, + "grad_norm": 0.8693172335624695, + "learning_rate": 3.979182493667826e-06, + "loss": 0.5718, + "step": 9131 + }, + { + "epoch": 0.58, + "grad_norm": 0.9281517863273621, + "learning_rate": 3.978178132191424e-06, + "loss": 0.5888, + "step": 9132 + }, + { + "epoch": 0.58, + "grad_norm": 0.9215491414070129, + "learning_rate": 3.9771738137403885e-06, + "loss": 0.5927, + "step": 9133 + }, + { + "epoch": 0.58, + "grad_norm": 0.9008755683898926, + "learning_rate": 3.976169538357004e-06, + "loss": 0.5982, + "step": 9134 + }, + { + "epoch": 0.58, + "grad_norm": 0.9302978515625, + "learning_rate": 3.97516530608356e-06, + "loss": 0.6214, + "step": 9135 + }, + { + "epoch": 0.58, + "grad_norm": 0.8978670239448547, + "learning_rate": 3.974161116962337e-06, + "loss": 0.6003, + "step": 9136 + }, + { + "epoch": 0.58, + "grad_norm": 0.851223886013031, + "learning_rate": 3.973156971035623e-06, + "loss": 0.5936, + "step": 9137 + }, + { + "epoch": 0.58, + "grad_norm": 0.8626120686531067, + "learning_rate": 3.9721528683456966e-06, + "loss": 0.5732, + "step": 9138 + }, + { + "epoch": 0.58, + "grad_norm": 0.9312442541122437, + "learning_rate": 3.971148808934838e-06, + "loss": 0.6243, + "step": 9139 + }, + { + "epoch": 0.58, + "grad_norm": 0.8899321556091309, + "learning_rate": 3.970144792845322e-06, + "loss": 0.5334, + "step": 9140 + }, + { + "epoch": 0.58, + "grad_norm": 0.8440714478492737, + "learning_rate": 3.9691408201194275e-06, + "loss": 0.6048, + "step": 9141 + }, + { + "epoch": 0.58, + "grad_norm": 0.8218972086906433, + "learning_rate": 3.968136890799426e-06, + "loss": 0.5506, + "step": 9142 + }, + { + "epoch": 0.58, + "grad_norm": 0.8766604661941528, + "learning_rate": 3.967133004927592e-06, + "loss": 0.5832, + "step": 9143 + }, + { + "epoch": 0.58, + "grad_norm": 0.9405858516693115, + "learning_rate": 3.9661291625461945e-06, + "loss": 0.6241, + "step": 9144 + }, + { + "epoch": 0.58, + "grad_norm": 0.8780211806297302, + "learning_rate": 3.965125363697499e-06, + "loss": 0.599, + "step": 9145 + }, + { + "epoch": 0.58, + "grad_norm": 0.8795492649078369, + "learning_rate": 3.964121608423775e-06, + "loss": 0.6152, + "step": 9146 + }, + { + "epoch": 0.58, + "grad_norm": 0.8778110146522522, + "learning_rate": 3.963117896767288e-06, + "loss": 0.6185, + "step": 9147 + }, + { + "epoch": 0.58, + "grad_norm": 0.8248224258422852, + "learning_rate": 3.962114228770299e-06, + "loss": 0.5828, + "step": 9148 + }, + { + "epoch": 0.58, + "grad_norm": 0.8475858569145203, + "learning_rate": 3.961110604475067e-06, + "loss": 0.5455, + "step": 9149 + }, + { + "epoch": 0.58, + "grad_norm": 0.8509166240692139, + "learning_rate": 3.960107023923855e-06, + "loss": 0.6004, + "step": 9150 + }, + { + "epoch": 0.58, + "grad_norm": 0.8684119582176208, + "learning_rate": 3.959103487158919e-06, + "loss": 0.5847, + "step": 9151 + }, + { + "epoch": 0.58, + "grad_norm": 0.799341082572937, + "learning_rate": 3.958099994222515e-06, + "loss": 0.5715, + "step": 9152 + }, + { + "epoch": 0.58, + "grad_norm": 0.8934925198554993, + "learning_rate": 3.957096545156893e-06, + "loss": 0.6094, + "step": 9153 + }, + { + "epoch": 0.58, + "grad_norm": 0.9568246603012085, + "learning_rate": 3.956093140004308e-06, + "loss": 0.5888, + "step": 9154 + }, + { + "epoch": 0.58, + "grad_norm": 0.9072986245155334, + "learning_rate": 3.955089778807012e-06, + "loss": 0.597, + "step": 9155 + }, + { + "epoch": 0.58, + "grad_norm": 0.8377887010574341, + "learning_rate": 3.954086461607248e-06, + "loss": 0.5394, + "step": 9156 + }, + { + "epoch": 0.58, + "grad_norm": 0.8361679315567017, + "learning_rate": 3.9530831884472655e-06, + "loss": 0.5454, + "step": 9157 + }, + { + "epoch": 0.58, + "grad_norm": 0.9167816638946533, + "learning_rate": 3.952079959369308e-06, + "loss": 0.5748, + "step": 9158 + }, + { + "epoch": 0.58, + "grad_norm": 1.0050288438796997, + "learning_rate": 3.951076774415619e-06, + "loss": 0.603, + "step": 9159 + }, + { + "epoch": 0.58, + "grad_norm": 0.8782682418823242, + "learning_rate": 3.950073633628436e-06, + "loss": 0.6133, + "step": 9160 + }, + { + "epoch": 0.58, + "grad_norm": 0.8905148506164551, + "learning_rate": 3.949070537050002e-06, + "loss": 0.6327, + "step": 9161 + }, + { + "epoch": 0.58, + "grad_norm": 0.9059675335884094, + "learning_rate": 3.948067484722549e-06, + "loss": 0.638, + "step": 9162 + }, + { + "epoch": 0.58, + "grad_norm": 0.8948028087615967, + "learning_rate": 3.947064476688318e-06, + "loss": 0.5723, + "step": 9163 + }, + { + "epoch": 0.58, + "grad_norm": 0.9257702231407166, + "learning_rate": 3.946061512989537e-06, + "loss": 0.6301, + "step": 9164 + }, + { + "epoch": 0.58, + "grad_norm": 0.8525533676147461, + "learning_rate": 3.94505859366844e-06, + "loss": 0.5937, + "step": 9165 + }, + { + "epoch": 0.58, + "grad_norm": 0.8956154584884644, + "learning_rate": 3.944055718767255e-06, + "loss": 0.5908, + "step": 9166 + }, + { + "epoch": 0.58, + "grad_norm": 0.8894206881523132, + "learning_rate": 3.943052888328211e-06, + "loss": 0.5536, + "step": 9167 + }, + { + "epoch": 0.58, + "grad_norm": 0.8910138010978699, + "learning_rate": 3.942050102393533e-06, + "loss": 0.6453, + "step": 9168 + }, + { + "epoch": 0.58, + "grad_norm": 0.8522058129310608, + "learning_rate": 3.941047361005445e-06, + "loss": 0.5562, + "step": 9169 + }, + { + "epoch": 0.58, + "grad_norm": 0.9396253228187561, + "learning_rate": 3.940044664206168e-06, + "loss": 0.6234, + "step": 9170 + }, + { + "epoch": 0.58, + "grad_norm": 0.914997398853302, + "learning_rate": 3.939042012037924e-06, + "loss": 0.5733, + "step": 9171 + }, + { + "epoch": 0.58, + "grad_norm": 0.8962453603744507, + "learning_rate": 3.938039404542929e-06, + "loss": 0.5559, + "step": 9172 + }, + { + "epoch": 0.58, + "grad_norm": 0.9689484238624573, + "learning_rate": 3.937036841763401e-06, + "loss": 0.5797, + "step": 9173 + }, + { + "epoch": 0.58, + "grad_norm": 0.8645898103713989, + "learning_rate": 3.936034323741555e-06, + "loss": 0.5752, + "step": 9174 + }, + { + "epoch": 0.58, + "grad_norm": 0.9438555240631104, + "learning_rate": 3.935031850519599e-06, + "loss": 0.5716, + "step": 9175 + }, + { + "epoch": 0.58, + "grad_norm": 1.1834338903427124, + "learning_rate": 3.934029422139749e-06, + "loss": 0.5701, + "step": 9176 + }, + { + "epoch": 0.58, + "grad_norm": 0.954289972782135, + "learning_rate": 3.933027038644213e-06, + "loss": 0.5488, + "step": 9177 + }, + { + "epoch": 0.58, + "grad_norm": 0.9095122218132019, + "learning_rate": 3.932024700075196e-06, + "loss": 0.5646, + "step": 9178 + }, + { + "epoch": 0.58, + "grad_norm": 0.8213743567466736, + "learning_rate": 3.931022406474902e-06, + "loss": 0.5665, + "step": 9179 + }, + { + "epoch": 0.58, + "grad_norm": 0.9303811192512512, + "learning_rate": 3.930020157885537e-06, + "loss": 0.5638, + "step": 9180 + }, + { + "epoch": 0.58, + "grad_norm": 0.8715723156929016, + "learning_rate": 3.929017954349301e-06, + "loss": 0.5983, + "step": 9181 + }, + { + "epoch": 0.58, + "grad_norm": 0.9098017811775208, + "learning_rate": 3.928015795908394e-06, + "loss": 0.566, + "step": 9182 + }, + { + "epoch": 0.58, + "grad_norm": 0.8678735494613647, + "learning_rate": 3.927013682605011e-06, + "loss": 0.5558, + "step": 9183 + }, + { + "epoch": 0.58, + "grad_norm": 0.870380699634552, + "learning_rate": 3.9260116144813495e-06, + "loss": 0.6038, + "step": 9184 + }, + { + "epoch": 0.58, + "grad_norm": 0.8809983730316162, + "learning_rate": 3.925009591579604e-06, + "loss": 0.5607, + "step": 9185 + }, + { + "epoch": 0.58, + "grad_norm": 0.8992043137550354, + "learning_rate": 3.9240076139419655e-06, + "loss": 0.5617, + "step": 9186 + }, + { + "epoch": 0.58, + "grad_norm": 0.884871244430542, + "learning_rate": 3.92300568161062e-06, + "loss": 0.5958, + "step": 9187 + }, + { + "epoch": 0.58, + "grad_norm": 0.9241304993629456, + "learning_rate": 3.9220037946277606e-06, + "loss": 0.5833, + "step": 9188 + }, + { + "epoch": 0.58, + "grad_norm": 0.837876558303833, + "learning_rate": 3.921001953035573e-06, + "loss": 0.5648, + "step": 9189 + }, + { + "epoch": 0.58, + "grad_norm": 0.8822311162948608, + "learning_rate": 3.920000156876238e-06, + "loss": 0.5887, + "step": 9190 + }, + { + "epoch": 0.58, + "grad_norm": 0.8584680557250977, + "learning_rate": 3.91899840619194e-06, + "loss": 0.5667, + "step": 9191 + }, + { + "epoch": 0.58, + "grad_norm": 0.9705455303192139, + "learning_rate": 3.9179967010248556e-06, + "loss": 0.6367, + "step": 9192 + }, + { + "epoch": 0.58, + "grad_norm": 0.8703861236572266, + "learning_rate": 3.91699504141717e-06, + "loss": 0.5526, + "step": 9193 + }, + { + "epoch": 0.58, + "grad_norm": 0.8837298154830933, + "learning_rate": 3.915993427411054e-06, + "loss": 0.6264, + "step": 9194 + }, + { + "epoch": 0.58, + "grad_norm": 0.9295274615287781, + "learning_rate": 3.914991859048684e-06, + "loss": 0.5771, + "step": 9195 + }, + { + "epoch": 0.58, + "grad_norm": 0.8776589035987854, + "learning_rate": 3.913990336372231e-06, + "loss": 0.562, + "step": 9196 + }, + { + "epoch": 0.58, + "grad_norm": 0.9219998717308044, + "learning_rate": 3.912988859423869e-06, + "loss": 0.5872, + "step": 9197 + }, + { + "epoch": 0.58, + "grad_norm": 0.9244682788848877, + "learning_rate": 3.911987428245765e-06, + "loss": 0.5853, + "step": 9198 + }, + { + "epoch": 0.58, + "grad_norm": 0.908510148525238, + "learning_rate": 3.9109860428800845e-06, + "loss": 0.5842, + "step": 9199 + }, + { + "epoch": 0.58, + "grad_norm": 0.8179001212120056, + "learning_rate": 3.909984703368992e-06, + "loss": 0.5537, + "step": 9200 + }, + { + "epoch": 0.58, + "grad_norm": 0.8670381903648376, + "learning_rate": 3.9089834097546534e-06, + "loss": 0.5738, + "step": 9201 + }, + { + "epoch": 0.58, + "grad_norm": 0.9407733678817749, + "learning_rate": 3.907982162079229e-06, + "loss": 0.5843, + "step": 9202 + }, + { + "epoch": 0.58, + "grad_norm": 0.8975993394851685, + "learning_rate": 3.906980960384875e-06, + "loss": 0.592, + "step": 9203 + }, + { + "epoch": 0.58, + "grad_norm": 0.9089202880859375, + "learning_rate": 3.90597980471375e-06, + "loss": 0.5626, + "step": 9204 + }, + { + "epoch": 0.58, + "grad_norm": 0.9043983221054077, + "learning_rate": 3.904978695108011e-06, + "loss": 0.6234, + "step": 9205 + }, + { + "epoch": 0.58, + "grad_norm": 0.8751869201660156, + "learning_rate": 3.9039776316098104e-06, + "loss": 0.584, + "step": 9206 + }, + { + "epoch": 0.58, + "grad_norm": 0.9038695693016052, + "learning_rate": 3.902976614261298e-06, + "loss": 0.5378, + "step": 9207 + }, + { + "epoch": 0.58, + "grad_norm": 0.920074462890625, + "learning_rate": 3.901975643104625e-06, + "loss": 0.5855, + "step": 9208 + }, + { + "epoch": 0.58, + "grad_norm": 0.8642706871032715, + "learning_rate": 3.9009747181819355e-06, + "loss": 0.5913, + "step": 9209 + }, + { + "epoch": 0.58, + "grad_norm": 0.9230958223342896, + "learning_rate": 3.8999738395353795e-06, + "loss": 0.6275, + "step": 9210 + }, + { + "epoch": 0.58, + "grad_norm": 0.8580319285392761, + "learning_rate": 3.898973007207097e-06, + "loss": 0.5848, + "step": 9211 + }, + { + "epoch": 0.58, + "grad_norm": 0.9039139747619629, + "learning_rate": 3.897972221239233e-06, + "loss": 0.6364, + "step": 9212 + }, + { + "epoch": 0.58, + "grad_norm": 0.8444435596466064, + "learning_rate": 3.896971481673923e-06, + "loss": 0.5956, + "step": 9213 + }, + { + "epoch": 0.58, + "grad_norm": 0.832820475101471, + "learning_rate": 3.895970788553308e-06, + "loss": 0.5442, + "step": 9214 + }, + { + "epoch": 0.58, + "grad_norm": 0.9238991141319275, + "learning_rate": 3.894970141919522e-06, + "loss": 0.5961, + "step": 9215 + }, + { + "epoch": 0.58, + "grad_norm": 0.8753307461738586, + "learning_rate": 3.8939695418147e-06, + "loss": 0.5827, + "step": 9216 + }, + { + "epoch": 0.58, + "grad_norm": 0.8677202463150024, + "learning_rate": 3.892968988280971e-06, + "loss": 0.6043, + "step": 9217 + }, + { + "epoch": 0.58, + "grad_norm": 0.8751778602600098, + "learning_rate": 3.891968481360469e-06, + "loss": 0.6046, + "step": 9218 + }, + { + "epoch": 0.58, + "grad_norm": 0.8408955931663513, + "learning_rate": 3.890968021095318e-06, + "loss": 0.5691, + "step": 9219 + }, + { + "epoch": 0.58, + "grad_norm": 0.8872222900390625, + "learning_rate": 3.889967607527648e-06, + "loss": 0.5943, + "step": 9220 + }, + { + "epoch": 0.58, + "grad_norm": 0.8292039036750793, + "learning_rate": 3.888967240699578e-06, + "loss": 0.5651, + "step": 9221 + }, + { + "epoch": 0.58, + "grad_norm": 0.8514560461044312, + "learning_rate": 3.887966920653234e-06, + "loss": 0.5667, + "step": 9222 + }, + { + "epoch": 0.58, + "grad_norm": 0.8210045695304871, + "learning_rate": 3.886966647430733e-06, + "loss": 0.4967, + "step": 9223 + }, + { + "epoch": 0.58, + "grad_norm": 0.9208805561065674, + "learning_rate": 3.8859664210741965e-06, + "loss": 0.537, + "step": 9224 + }, + { + "epoch": 0.58, + "grad_norm": 1.0000219345092773, + "learning_rate": 3.884966241625737e-06, + "loss": 0.6296, + "step": 9225 + }, + { + "epoch": 0.58, + "grad_norm": 0.9203490018844604, + "learning_rate": 3.88396610912747e-06, + "loss": 0.5744, + "step": 9226 + }, + { + "epoch": 0.58, + "grad_norm": 0.8076592087745667, + "learning_rate": 3.882966023621509e-06, + "loss": 0.5488, + "step": 9227 + }, + { + "epoch": 0.58, + "grad_norm": 0.8537278771400452, + "learning_rate": 3.881965985149962e-06, + "loss": 0.5721, + "step": 9228 + }, + { + "epoch": 0.58, + "grad_norm": 0.9415518641471863, + "learning_rate": 3.880965993754939e-06, + "loss": 0.6237, + "step": 9229 + }, + { + "epoch": 0.58, + "grad_norm": 0.8622970581054688, + "learning_rate": 3.879966049478544e-06, + "loss": 0.5502, + "step": 9230 + }, + { + "epoch": 0.58, + "grad_norm": 0.9145261645317078, + "learning_rate": 3.878966152362882e-06, + "loss": 0.5996, + "step": 9231 + }, + { + "epoch": 0.58, + "grad_norm": 0.8953229784965515, + "learning_rate": 3.877966302450057e-06, + "loss": 0.5741, + "step": 9232 + }, + { + "epoch": 0.58, + "grad_norm": 0.8525993824005127, + "learning_rate": 3.876966499782168e-06, + "loss": 0.5531, + "step": 9233 + }, + { + "epoch": 0.59, + "grad_norm": 0.8899672627449036, + "learning_rate": 3.875966744401311e-06, + "loss": 0.6231, + "step": 9234 + }, + { + "epoch": 0.59, + "grad_norm": 0.9033567905426025, + "learning_rate": 3.874967036349585e-06, + "loss": 0.5987, + "step": 9235 + }, + { + "epoch": 0.59, + "grad_norm": 0.8257200717926025, + "learning_rate": 3.8739673756690845e-06, + "loss": 0.5333, + "step": 9236 + }, + { + "epoch": 0.59, + "grad_norm": 0.9355111718177795, + "learning_rate": 3.872967762401899e-06, + "loss": 0.5712, + "step": 9237 + }, + { + "epoch": 0.59, + "grad_norm": 0.9281190037727356, + "learning_rate": 3.8719681965901225e-06, + "loss": 0.6097, + "step": 9238 + }, + { + "epoch": 0.59, + "grad_norm": 0.8676934242248535, + "learning_rate": 3.870968678275838e-06, + "loss": 0.6332, + "step": 9239 + }, + { + "epoch": 0.59, + "grad_norm": 0.8591299057006836, + "learning_rate": 3.869969207501138e-06, + "loss": 0.616, + "step": 9240 + }, + { + "epoch": 0.59, + "grad_norm": 0.9023558497428894, + "learning_rate": 3.868969784308101e-06, + "loss": 0.6094, + "step": 9241 + }, + { + "epoch": 0.59, + "grad_norm": 0.8794646859169006, + "learning_rate": 3.867970408738814e-06, + "loss": 0.566, + "step": 9242 + }, + { + "epoch": 0.59, + "grad_norm": 0.8649892210960388, + "learning_rate": 3.866971080835352e-06, + "loss": 0.5934, + "step": 9243 + }, + { + "epoch": 0.59, + "grad_norm": 0.9679709672927856, + "learning_rate": 3.8659718006398e-06, + "loss": 0.6249, + "step": 9244 + }, + { + "epoch": 0.59, + "grad_norm": 0.8926246166229248, + "learning_rate": 3.864972568194227e-06, + "loss": 0.5787, + "step": 9245 + }, + { + "epoch": 0.59, + "grad_norm": 0.8640733957290649, + "learning_rate": 3.863973383540714e-06, + "loss": 0.6095, + "step": 9246 + }, + { + "epoch": 0.59, + "grad_norm": 0.8310282230377197, + "learning_rate": 3.8629742467213266e-06, + "loss": 0.5379, + "step": 9247 + }, + { + "epoch": 0.59, + "grad_norm": 0.8928051590919495, + "learning_rate": 3.86197515777814e-06, + "loss": 0.538, + "step": 9248 + }, + { + "epoch": 0.59, + "grad_norm": 0.9271872639656067, + "learning_rate": 3.860976116753221e-06, + "loss": 0.5781, + "step": 9249 + }, + { + "epoch": 0.59, + "grad_norm": 0.8660386204719543, + "learning_rate": 3.859977123688636e-06, + "loss": 0.6074, + "step": 9250 + }, + { + "epoch": 0.59, + "grad_norm": 0.8669256567955017, + "learning_rate": 3.858978178626446e-06, + "loss": 0.59, + "step": 9251 + }, + { + "epoch": 0.59, + "grad_norm": 0.8511551022529602, + "learning_rate": 3.8579792816087175e-06, + "loss": 0.5496, + "step": 9252 + }, + { + "epoch": 0.59, + "grad_norm": 0.8670158386230469, + "learning_rate": 3.856980432677508e-06, + "loss": 0.5846, + "step": 9253 + }, + { + "epoch": 0.59, + "grad_norm": 0.9106800556182861, + "learning_rate": 3.855981631874877e-06, + "loss": 0.6221, + "step": 9254 + }, + { + "epoch": 0.59, + "grad_norm": 0.9327592253684998, + "learning_rate": 3.85498287924288e-06, + "loss": 0.6351, + "step": 9255 + }, + { + "epoch": 0.59, + "grad_norm": 0.8513845801353455, + "learning_rate": 3.853984174823568e-06, + "loss": 0.5642, + "step": 9256 + }, + { + "epoch": 0.59, + "grad_norm": 0.9598379135131836, + "learning_rate": 3.852985518658997e-06, + "loss": 0.5821, + "step": 9257 + }, + { + "epoch": 0.59, + "grad_norm": 0.9493588209152222, + "learning_rate": 3.851986910791217e-06, + "loss": 0.575, + "step": 9258 + }, + { + "epoch": 0.59, + "grad_norm": 0.8645276427268982, + "learning_rate": 3.850988351262274e-06, + "loss": 0.5513, + "step": 9259 + }, + { + "epoch": 0.59, + "grad_norm": 0.8836858868598938, + "learning_rate": 3.849989840114213e-06, + "loss": 0.6298, + "step": 9260 + }, + { + "epoch": 0.59, + "grad_norm": 0.8826265931129456, + "learning_rate": 3.84899137738908e-06, + "loss": 0.5903, + "step": 9261 + }, + { + "epoch": 0.59, + "grad_norm": 0.8552426695823669, + "learning_rate": 3.847992963128917e-06, + "loss": 0.578, + "step": 9262 + }, + { + "epoch": 0.59, + "grad_norm": 0.8744699954986572, + "learning_rate": 3.846994597375763e-06, + "loss": 0.5525, + "step": 9263 + }, + { + "epoch": 0.59, + "grad_norm": 0.9690203070640564, + "learning_rate": 3.845996280171653e-06, + "loss": 0.633, + "step": 9264 + }, + { + "epoch": 0.59, + "grad_norm": 0.9128517508506775, + "learning_rate": 3.844998011558626e-06, + "loss": 0.6275, + "step": 9265 + }, + { + "epoch": 0.59, + "grad_norm": 0.8809550404548645, + "learning_rate": 3.843999791578716e-06, + "loss": 0.5585, + "step": 9266 + }, + { + "epoch": 0.59, + "grad_norm": 0.8937491178512573, + "learning_rate": 3.843001620273954e-06, + "loss": 0.5942, + "step": 9267 + }, + { + "epoch": 0.59, + "grad_norm": 0.8887850046157837, + "learning_rate": 3.842003497686367e-06, + "loss": 0.5823, + "step": 9268 + }, + { + "epoch": 0.59, + "grad_norm": 0.8240920305252075, + "learning_rate": 3.841005423857984e-06, + "loss": 0.561, + "step": 9269 + }, + { + "epoch": 0.59, + "grad_norm": 0.889115035533905, + "learning_rate": 3.840007398830833e-06, + "loss": 0.5673, + "step": 9270 + }, + { + "epoch": 0.59, + "grad_norm": 0.9033503532409668, + "learning_rate": 3.839009422646935e-06, + "loss": 0.6556, + "step": 9271 + }, + { + "epoch": 0.59, + "grad_norm": 0.8702270984649658, + "learning_rate": 3.8380114953483095e-06, + "loss": 0.6081, + "step": 9272 + }, + { + "epoch": 0.59, + "grad_norm": 0.8264375329017639, + "learning_rate": 3.837013616976977e-06, + "loss": 0.5227, + "step": 9273 + }, + { + "epoch": 0.59, + "grad_norm": 0.9013060927391052, + "learning_rate": 3.8360157875749575e-06, + "loss": 0.6422, + "step": 9274 + }, + { + "epoch": 0.59, + "grad_norm": 0.8687025904655457, + "learning_rate": 3.835018007184265e-06, + "loss": 0.6144, + "step": 9275 + }, + { + "epoch": 0.59, + "grad_norm": 0.8397945761680603, + "learning_rate": 3.834020275846909e-06, + "loss": 0.5611, + "step": 9276 + }, + { + "epoch": 0.59, + "grad_norm": 0.8652381896972656, + "learning_rate": 3.833022593604902e-06, + "loss": 0.5984, + "step": 9277 + }, + { + "epoch": 0.59, + "grad_norm": 0.8577977418899536, + "learning_rate": 3.832024960500257e-06, + "loss": 0.5211, + "step": 9278 + }, + { + "epoch": 0.59, + "grad_norm": 0.9175687432289124, + "learning_rate": 3.8310273765749774e-06, + "loss": 0.6194, + "step": 9279 + }, + { + "epoch": 0.59, + "grad_norm": 0.8669849038124084, + "learning_rate": 3.830029841871067e-06, + "loss": 0.5919, + "step": 9280 + }, + { + "epoch": 0.59, + "grad_norm": 0.9313320517539978, + "learning_rate": 3.82903235643053e-06, + "loss": 0.6049, + "step": 9281 + }, + { + "epoch": 0.59, + "grad_norm": 0.8776915669441223, + "learning_rate": 3.828034920295368e-06, + "loss": 0.5875, + "step": 9282 + }, + { + "epoch": 0.59, + "grad_norm": 0.9610856175422668, + "learning_rate": 3.827037533507579e-06, + "loss": 0.5978, + "step": 9283 + }, + { + "epoch": 0.59, + "grad_norm": 0.8849360942840576, + "learning_rate": 3.826040196109158e-06, + "loss": 0.5807, + "step": 9284 + }, + { + "epoch": 0.59, + "grad_norm": 0.9191281795501709, + "learning_rate": 3.825042908142102e-06, + "loss": 0.5998, + "step": 9285 + }, + { + "epoch": 0.59, + "grad_norm": 0.8342413306236267, + "learning_rate": 3.824045669648398e-06, + "loss": 0.5753, + "step": 9286 + }, + { + "epoch": 0.59, + "grad_norm": 0.8650674819946289, + "learning_rate": 3.823048480670044e-06, + "loss": 0.5979, + "step": 9287 + }, + { + "epoch": 0.59, + "grad_norm": 0.8660332560539246, + "learning_rate": 3.8220513412490215e-06, + "loss": 0.5876, + "step": 9288 + }, + { + "epoch": 0.59, + "grad_norm": 0.9197229743003845, + "learning_rate": 3.821054251427321e-06, + "loss": 0.622, + "step": 9289 + }, + { + "epoch": 0.59, + "grad_norm": 0.8805333375930786, + "learning_rate": 3.820057211246923e-06, + "loss": 0.5349, + "step": 9290 + }, + { + "epoch": 0.59, + "grad_norm": 1.0064138174057007, + "learning_rate": 3.819060220749813e-06, + "loss": 0.6236, + "step": 9291 + }, + { + "epoch": 0.59, + "grad_norm": 0.9283258318901062, + "learning_rate": 3.8180632799779675e-06, + "loss": 0.56, + "step": 9292 + }, + { + "epoch": 0.59, + "grad_norm": 0.9393151998519897, + "learning_rate": 3.817066388973367e-06, + "loss": 0.5407, + "step": 9293 + }, + { + "epoch": 0.59, + "grad_norm": 0.87945157289505, + "learning_rate": 3.816069547777983e-06, + "loss": 0.6007, + "step": 9294 + }, + { + "epoch": 0.59, + "grad_norm": 0.9038872718811035, + "learning_rate": 3.815072756433794e-06, + "loss": 0.6051, + "step": 9295 + }, + { + "epoch": 0.59, + "grad_norm": 0.8580070734024048, + "learning_rate": 3.814076014982769e-06, + "loss": 0.5844, + "step": 9296 + }, + { + "epoch": 0.59, + "grad_norm": 0.9397634863853455, + "learning_rate": 3.8130793234668782e-06, + "loss": 0.6006, + "step": 9297 + }, + { + "epoch": 0.59, + "grad_norm": 0.9336340427398682, + "learning_rate": 3.812082681928086e-06, + "loss": 0.6343, + "step": 9298 + }, + { + "epoch": 0.59, + "grad_norm": 0.8567546606063843, + "learning_rate": 3.81108609040836e-06, + "loss": 0.5669, + "step": 9299 + }, + { + "epoch": 0.59, + "grad_norm": 0.9026763439178467, + "learning_rate": 3.810089548949665e-06, + "loss": 0.6, + "step": 9300 + }, + { + "epoch": 0.59, + "grad_norm": 0.8796485066413879, + "learning_rate": 3.8090930575939588e-06, + "loss": 0.56, + "step": 9301 + }, + { + "epoch": 0.59, + "grad_norm": 0.9571773409843445, + "learning_rate": 3.8080966163832e-06, + "loss": 0.6081, + "step": 9302 + }, + { + "epoch": 0.59, + "grad_norm": 0.8654407262802124, + "learning_rate": 3.807100225359346e-06, + "loss": 0.5752, + "step": 9303 + }, + { + "epoch": 0.59, + "grad_norm": 0.942138135433197, + "learning_rate": 3.8061038845643535e-06, + "loss": 0.6286, + "step": 9304 + }, + { + "epoch": 0.59, + "grad_norm": 0.9119827747344971, + "learning_rate": 3.8051075940401727e-06, + "loss": 0.5855, + "step": 9305 + }, + { + "epoch": 0.59, + "grad_norm": 0.908536970615387, + "learning_rate": 3.8041113538287537e-06, + "loss": 0.6099, + "step": 9306 + }, + { + "epoch": 0.59, + "grad_norm": 0.9727365970611572, + "learning_rate": 3.803115163972044e-06, + "loss": 0.5992, + "step": 9307 + }, + { + "epoch": 0.59, + "grad_norm": 0.9480968713760376, + "learning_rate": 3.8021190245119937e-06, + "loss": 0.6685, + "step": 9308 + }, + { + "epoch": 0.59, + "grad_norm": 0.9626975655555725, + "learning_rate": 3.8011229354905445e-06, + "loss": 0.633, + "step": 9309 + }, + { + "epoch": 0.59, + "grad_norm": 0.9095605611801147, + "learning_rate": 3.8001268969496357e-06, + "loss": 0.5897, + "step": 9310 + }, + { + "epoch": 0.59, + "grad_norm": 0.9230700135231018, + "learning_rate": 3.799130908931209e-06, + "loss": 0.6167, + "step": 9311 + }, + { + "epoch": 0.59, + "grad_norm": 0.8941061496734619, + "learning_rate": 3.7981349714772044e-06, + "loss": 0.5678, + "step": 9312 + }, + { + "epoch": 0.59, + "grad_norm": 0.9019367098808289, + "learning_rate": 3.7971390846295546e-06, + "loss": 0.6376, + "step": 9313 + }, + { + "epoch": 0.59, + "grad_norm": 0.9550539255142212, + "learning_rate": 3.7961432484301925e-06, + "loss": 0.6213, + "step": 9314 + }, + { + "epoch": 0.59, + "grad_norm": 0.8824061155319214, + "learning_rate": 3.7951474629210517e-06, + "loss": 0.5651, + "step": 9315 + }, + { + "epoch": 0.59, + "grad_norm": 0.9145764708518982, + "learning_rate": 3.7941517281440577e-06, + "loss": 0.5492, + "step": 9316 + }, + { + "epoch": 0.59, + "grad_norm": 0.8980282545089722, + "learning_rate": 3.7931560441411413e-06, + "loss": 0.5882, + "step": 9317 + }, + { + "epoch": 0.59, + "grad_norm": 0.8780221343040466, + "learning_rate": 3.792160410954225e-06, + "loss": 0.5892, + "step": 9318 + }, + { + "epoch": 0.59, + "grad_norm": 0.8798972368240356, + "learning_rate": 3.791164828625233e-06, + "loss": 0.5706, + "step": 9319 + }, + { + "epoch": 0.59, + "grad_norm": 0.8012358546257019, + "learning_rate": 3.7901692971960823e-06, + "loss": 0.5133, + "step": 9320 + }, + { + "epoch": 0.59, + "grad_norm": 0.9223332405090332, + "learning_rate": 3.7891738167086968e-06, + "loss": 0.5617, + "step": 9321 + }, + { + "epoch": 0.59, + "grad_norm": 0.8617243766784668, + "learning_rate": 3.7881783872049875e-06, + "loss": 0.6052, + "step": 9322 + }, + { + "epoch": 0.59, + "grad_norm": 0.8890591859817505, + "learning_rate": 3.7871830087268726e-06, + "loss": 0.5505, + "step": 9323 + }, + { + "epoch": 0.59, + "grad_norm": 0.9087486267089844, + "learning_rate": 3.7861876813162596e-06, + "loss": 0.5906, + "step": 9324 + }, + { + "epoch": 0.59, + "grad_norm": 0.9168681502342224, + "learning_rate": 3.7851924050150633e-06, + "loss": 0.6217, + "step": 9325 + }, + { + "epoch": 0.59, + "grad_norm": 0.8641383051872253, + "learning_rate": 3.7841971798651876e-06, + "loss": 0.5495, + "step": 9326 + }, + { + "epoch": 0.59, + "grad_norm": 0.9064701795578003, + "learning_rate": 3.78320200590854e-06, + "loss": 0.6339, + "step": 9327 + }, + { + "epoch": 0.59, + "grad_norm": 0.844735324382782, + "learning_rate": 3.782206883187021e-06, + "loss": 0.6285, + "step": 9328 + }, + { + "epoch": 0.59, + "grad_norm": 0.953070878982544, + "learning_rate": 3.7812118117425363e-06, + "loss": 0.5556, + "step": 9329 + }, + { + "epoch": 0.59, + "grad_norm": 0.8404299020767212, + "learning_rate": 3.7802167916169808e-06, + "loss": 0.6154, + "step": 9330 + }, + { + "epoch": 0.59, + "grad_norm": 0.8700167536735535, + "learning_rate": 3.7792218228522536e-06, + "loss": 0.6355, + "step": 9331 + }, + { + "epoch": 0.59, + "grad_norm": 0.8953800797462463, + "learning_rate": 3.7782269054902493e-06, + "loss": 0.5983, + "step": 9332 + }, + { + "epoch": 0.59, + "grad_norm": 0.8571730256080627, + "learning_rate": 3.777232039572858e-06, + "loss": 0.5895, + "step": 9333 + }, + { + "epoch": 0.59, + "grad_norm": 0.9391055107116699, + "learning_rate": 3.7762372251419722e-06, + "loss": 0.64, + "step": 9334 + }, + { + "epoch": 0.59, + "grad_norm": 0.8176417350769043, + "learning_rate": 3.7752424622394807e-06, + "loss": 0.5433, + "step": 9335 + }, + { + "epoch": 0.59, + "grad_norm": 0.8655744194984436, + "learning_rate": 3.7742477509072684e-06, + "loss": 0.5325, + "step": 9336 + }, + { + "epoch": 0.59, + "grad_norm": 0.9132146835327148, + "learning_rate": 3.7732530911872177e-06, + "loss": 0.5687, + "step": 9337 + }, + { + "epoch": 0.59, + "grad_norm": 0.9163744449615479, + "learning_rate": 3.7722584831212127e-06, + "loss": 0.5955, + "step": 9338 + }, + { + "epoch": 0.59, + "grad_norm": 0.9320352673530579, + "learning_rate": 3.771263926751133e-06, + "loss": 0.5999, + "step": 9339 + }, + { + "epoch": 0.59, + "grad_norm": 0.8946950435638428, + "learning_rate": 3.7702694221188548e-06, + "loss": 0.599, + "step": 9340 + }, + { + "epoch": 0.59, + "grad_norm": 0.8757476210594177, + "learning_rate": 3.769274969266251e-06, + "loss": 0.5752, + "step": 9341 + }, + { + "epoch": 0.59, + "grad_norm": 0.9394705295562744, + "learning_rate": 3.768280568235198e-06, + "loss": 0.607, + "step": 9342 + }, + { + "epoch": 0.59, + "grad_norm": 0.8652751445770264, + "learning_rate": 3.767286219067566e-06, + "loss": 0.5999, + "step": 9343 + }, + { + "epoch": 0.59, + "grad_norm": 0.9048978686332703, + "learning_rate": 3.766291921805224e-06, + "loss": 0.5745, + "step": 9344 + }, + { + "epoch": 0.59, + "grad_norm": 0.8632850050926208, + "learning_rate": 3.765297676490035e-06, + "loss": 0.5459, + "step": 9345 + }, + { + "epoch": 0.59, + "grad_norm": 0.8732088804244995, + "learning_rate": 3.764303483163867e-06, + "loss": 0.6096, + "step": 9346 + }, + { + "epoch": 0.59, + "grad_norm": 0.914079487323761, + "learning_rate": 3.7633093418685806e-06, + "loss": 0.5492, + "step": 9347 + }, + { + "epoch": 0.59, + "grad_norm": 0.8865966200828552, + "learning_rate": 3.7623152526460365e-06, + "loss": 0.5839, + "step": 9348 + }, + { + "epoch": 0.59, + "grad_norm": 0.9960510730743408, + "learning_rate": 3.7613212155380907e-06, + "loss": 0.6053, + "step": 9349 + }, + { + "epoch": 0.59, + "grad_norm": 0.8940538763999939, + "learning_rate": 3.760327230586598e-06, + "loss": 0.56, + "step": 9350 + }, + { + "epoch": 0.59, + "grad_norm": 0.9300123453140259, + "learning_rate": 3.7593332978334153e-06, + "loss": 0.5792, + "step": 9351 + }, + { + "epoch": 0.59, + "grad_norm": 0.8757148385047913, + "learning_rate": 3.7583394173203913e-06, + "loss": 0.5871, + "step": 9352 + }, + { + "epoch": 0.59, + "grad_norm": 0.9180594682693481, + "learning_rate": 3.757345589089374e-06, + "loss": 0.5897, + "step": 9353 + }, + { + "epoch": 0.59, + "grad_norm": 0.9074809551239014, + "learning_rate": 3.75635181318221e-06, + "loss": 0.6202, + "step": 9354 + }, + { + "epoch": 0.59, + "grad_norm": 0.9342314600944519, + "learning_rate": 3.755358089640747e-06, + "loss": 0.6069, + "step": 9355 + }, + { + "epoch": 0.59, + "grad_norm": 0.9455806016921997, + "learning_rate": 3.754364418506825e-06, + "loss": 0.5676, + "step": 9356 + }, + { + "epoch": 0.59, + "grad_norm": 0.8812072277069092, + "learning_rate": 3.7533707998222835e-06, + "loss": 0.6217, + "step": 9357 + }, + { + "epoch": 0.59, + "grad_norm": 0.8865488767623901, + "learning_rate": 3.7523772336289594e-06, + "loss": 0.5901, + "step": 9358 + }, + { + "epoch": 0.59, + "grad_norm": 0.8491330146789551, + "learning_rate": 3.751383719968692e-06, + "loss": 0.5475, + "step": 9359 + }, + { + "epoch": 0.59, + "grad_norm": 0.8442015647888184, + "learning_rate": 3.7503902588833124e-06, + "loss": 0.5699, + "step": 9360 + }, + { + "epoch": 0.59, + "grad_norm": 0.8833118081092834, + "learning_rate": 3.7493968504146513e-06, + "loss": 0.5281, + "step": 9361 + }, + { + "epoch": 0.59, + "grad_norm": 0.9113878011703491, + "learning_rate": 3.748403494604539e-06, + "loss": 0.5694, + "step": 9362 + }, + { + "epoch": 0.59, + "grad_norm": 0.8851996064186096, + "learning_rate": 3.747410191494799e-06, + "loss": 0.6256, + "step": 9363 + }, + { + "epoch": 0.59, + "grad_norm": 0.9763572216033936, + "learning_rate": 3.74641694112726e-06, + "loss": 0.5549, + "step": 9364 + }, + { + "epoch": 0.59, + "grad_norm": 0.900982141494751, + "learning_rate": 3.745423743543744e-06, + "loss": 0.5617, + "step": 9365 + }, + { + "epoch": 0.59, + "grad_norm": 0.9159935116767883, + "learning_rate": 3.7444305987860698e-06, + "loss": 0.6144, + "step": 9366 + }, + { + "epoch": 0.59, + "grad_norm": 0.949503481388092, + "learning_rate": 3.7434375068960528e-06, + "loss": 0.583, + "step": 9367 + }, + { + "epoch": 0.59, + "grad_norm": 0.8828021287918091, + "learning_rate": 3.7424444679155126e-06, + "loss": 0.6385, + "step": 9368 + }, + { + "epoch": 0.59, + "grad_norm": 0.8816289901733398, + "learning_rate": 3.7414514818862613e-06, + "loss": 0.6269, + "step": 9369 + }, + { + "epoch": 0.59, + "grad_norm": 0.9142639636993408, + "learning_rate": 3.7404585488501106e-06, + "loss": 0.6134, + "step": 9370 + }, + { + "epoch": 0.59, + "grad_norm": 0.8727523684501648, + "learning_rate": 3.7394656688488663e-06, + "loss": 0.5525, + "step": 9371 + }, + { + "epoch": 0.59, + "grad_norm": 0.9459042549133301, + "learning_rate": 3.7384728419243386e-06, + "loss": 0.5931, + "step": 9372 + }, + { + "epoch": 0.59, + "grad_norm": 0.916693389415741, + "learning_rate": 3.7374800681183334e-06, + "loss": 0.5991, + "step": 9373 + }, + { + "epoch": 0.59, + "grad_norm": 0.9326224327087402, + "learning_rate": 3.736487347472649e-06, + "loss": 0.6123, + "step": 9374 + }, + { + "epoch": 0.59, + "grad_norm": 0.962369978427887, + "learning_rate": 3.735494680029086e-06, + "loss": 0.6034, + "step": 9375 + }, + { + "epoch": 0.59, + "grad_norm": 0.8627373576164246, + "learning_rate": 3.734502065829443e-06, + "loss": 0.5739, + "step": 9376 + }, + { + "epoch": 0.59, + "grad_norm": 0.9000149369239807, + "learning_rate": 3.7335095049155173e-06, + "loss": 0.5426, + "step": 9377 + }, + { + "epoch": 0.59, + "grad_norm": 0.9197530150413513, + "learning_rate": 3.732516997329101e-06, + "loss": 0.6075, + "step": 9378 + }, + { + "epoch": 0.59, + "grad_norm": 0.8494321703910828, + "learning_rate": 3.731524543111983e-06, + "loss": 0.6488, + "step": 9379 + }, + { + "epoch": 0.59, + "grad_norm": 0.9430056810379028, + "learning_rate": 3.7305321423059526e-06, + "loss": 0.5836, + "step": 9380 + }, + { + "epoch": 0.59, + "grad_norm": 0.8478774428367615, + "learning_rate": 3.7295397949528e-06, + "loss": 0.565, + "step": 9381 + }, + { + "epoch": 0.59, + "grad_norm": 0.9042559266090393, + "learning_rate": 3.7285475010943067e-06, + "loss": 0.5992, + "step": 9382 + }, + { + "epoch": 0.59, + "grad_norm": 0.8787109851837158, + "learning_rate": 3.7275552607722544e-06, + "loss": 0.5903, + "step": 9383 + }, + { + "epoch": 0.59, + "grad_norm": 0.9351126551628113, + "learning_rate": 3.726563074028422e-06, + "loss": 0.5989, + "step": 9384 + }, + { + "epoch": 0.59, + "grad_norm": 0.8641285300254822, + "learning_rate": 3.7255709409045914e-06, + "loss": 0.5639, + "step": 9385 + }, + { + "epoch": 0.59, + "grad_norm": 0.8513421416282654, + "learning_rate": 3.724578861442535e-06, + "loss": 0.5561, + "step": 9386 + }, + { + "epoch": 0.59, + "grad_norm": 0.9056752324104309, + "learning_rate": 3.7235868356840244e-06, + "loss": 0.6007, + "step": 9387 + }, + { + "epoch": 0.59, + "grad_norm": 0.8228696584701538, + "learning_rate": 3.722594863670831e-06, + "loss": 0.5458, + "step": 9388 + }, + { + "epoch": 0.59, + "grad_norm": 0.8777575492858887, + "learning_rate": 3.7216029454447262e-06, + "loss": 0.5547, + "step": 9389 + }, + { + "epoch": 0.59, + "grad_norm": 0.8967665433883667, + "learning_rate": 3.720611081047474e-06, + "loss": 0.5842, + "step": 9390 + }, + { + "epoch": 0.59, + "grad_norm": 0.8031774759292603, + "learning_rate": 3.7196192705208378e-06, + "loss": 0.5461, + "step": 9391 + }, + { + "epoch": 0.6, + "grad_norm": 0.9115647077560425, + "learning_rate": 3.7186275139065807e-06, + "loss": 0.5559, + "step": 9392 + }, + { + "epoch": 0.6, + "grad_norm": 0.9450206756591797, + "learning_rate": 3.7176358112464593e-06, + "loss": 0.6226, + "step": 9393 + }, + { + "epoch": 0.6, + "grad_norm": 0.866363525390625, + "learning_rate": 3.716644162582235e-06, + "loss": 0.5949, + "step": 9394 + }, + { + "epoch": 0.6, + "grad_norm": 0.8881354331970215, + "learning_rate": 3.7156525679556597e-06, + "loss": 0.6358, + "step": 9395 + }, + { + "epoch": 0.6, + "grad_norm": 0.8575799465179443, + "learning_rate": 3.7146610274084875e-06, + "loss": 0.6055, + "step": 9396 + }, + { + "epoch": 0.6, + "grad_norm": 0.9607718586921692, + "learning_rate": 3.7136695409824665e-06, + "loss": 0.5996, + "step": 9397 + }, + { + "epoch": 0.6, + "grad_norm": 0.922845184803009, + "learning_rate": 3.712678108719348e-06, + "loss": 0.6158, + "step": 9398 + }, + { + "epoch": 0.6, + "grad_norm": 0.9459704756736755, + "learning_rate": 3.711686730660875e-06, + "loss": 0.5811, + "step": 9399 + }, + { + "epoch": 0.6, + "grad_norm": 0.8620908856391907, + "learning_rate": 3.710695406848794e-06, + "loss": 0.6261, + "step": 9400 + }, + { + "epoch": 0.6, + "grad_norm": 0.9615730047225952, + "learning_rate": 3.709704137324841e-06, + "loss": 0.6142, + "step": 9401 + }, + { + "epoch": 0.6, + "grad_norm": 0.8960047960281372, + "learning_rate": 3.7087129221307605e-06, + "loss": 0.5535, + "step": 9402 + }, + { + "epoch": 0.6, + "grad_norm": 0.9143571257591248, + "learning_rate": 3.7077217613082863e-06, + "loss": 0.5854, + "step": 9403 + }, + { + "epoch": 0.6, + "grad_norm": 0.9054227471351624, + "learning_rate": 3.7067306548991543e-06, + "loss": 0.6092, + "step": 9404 + }, + { + "epoch": 0.6, + "grad_norm": 0.9034886956214905, + "learning_rate": 3.7057396029450925e-06, + "loss": 0.6547, + "step": 9405 + }, + { + "epoch": 0.6, + "grad_norm": 0.862893283367157, + "learning_rate": 3.7047486054878367e-06, + "loss": 0.6051, + "step": 9406 + }, + { + "epoch": 0.6, + "grad_norm": 0.8760849237442017, + "learning_rate": 3.7037576625691095e-06, + "loss": 0.5925, + "step": 9407 + }, + { + "epoch": 0.6, + "grad_norm": 0.8564549684524536, + "learning_rate": 3.7027667742306393e-06, + "loss": 0.5745, + "step": 9408 + }, + { + "epoch": 0.6, + "grad_norm": 0.8729873895645142, + "learning_rate": 3.7017759405141476e-06, + "loss": 0.5559, + "step": 9409 + }, + { + "epoch": 0.6, + "grad_norm": 0.8863754868507385, + "learning_rate": 3.7007851614613522e-06, + "loss": 0.6388, + "step": 9410 + }, + { + "epoch": 0.6, + "grad_norm": 0.9098048210144043, + "learning_rate": 3.699794437113975e-06, + "loss": 0.6317, + "step": 9411 + }, + { + "epoch": 0.6, + "grad_norm": 0.8687307834625244, + "learning_rate": 3.698803767513732e-06, + "loss": 0.5728, + "step": 9412 + }, + { + "epoch": 0.6, + "grad_norm": 0.9310614466667175, + "learning_rate": 3.6978131527023363e-06, + "loss": 0.5108, + "step": 9413 + }, + { + "epoch": 0.6, + "grad_norm": 0.8796650767326355, + "learning_rate": 3.696822592721497e-06, + "loss": 0.6081, + "step": 9414 + }, + { + "epoch": 0.6, + "grad_norm": 0.8547250628471375, + "learning_rate": 3.695832087612925e-06, + "loss": 0.5948, + "step": 9415 + }, + { + "epoch": 0.6, + "grad_norm": 0.8584107756614685, + "learning_rate": 3.6948416374183287e-06, + "loss": 0.6096, + "step": 9416 + }, + { + "epoch": 0.6, + "grad_norm": 0.8845054507255554, + "learning_rate": 3.6938512421794103e-06, + "loss": 0.6239, + "step": 9417 + }, + { + "epoch": 0.6, + "grad_norm": 0.8871665596961975, + "learning_rate": 3.6928609019378702e-06, + "loss": 0.6064, + "step": 9418 + }, + { + "epoch": 0.6, + "grad_norm": 0.9024144411087036, + "learning_rate": 3.6918706167354125e-06, + "loss": 0.5947, + "step": 9419 + }, + { + "epoch": 0.6, + "grad_norm": 0.8818714618682861, + "learning_rate": 3.690880386613732e-06, + "loss": 0.5635, + "step": 9420 + }, + { + "epoch": 0.6, + "grad_norm": 0.8889881372451782, + "learning_rate": 3.689890211614525e-06, + "loss": 0.5769, + "step": 9421 + }, + { + "epoch": 0.6, + "grad_norm": 0.8806608319282532, + "learning_rate": 3.6889000917794816e-06, + "loss": 0.5825, + "step": 9422 + }, + { + "epoch": 0.6, + "grad_norm": 0.9338346719741821, + "learning_rate": 3.6879100271502953e-06, + "loss": 0.6212, + "step": 9423 + }, + { + "epoch": 0.6, + "grad_norm": 0.8248009085655212, + "learning_rate": 3.6869200177686543e-06, + "loss": 0.5429, + "step": 9424 + }, + { + "epoch": 0.6, + "grad_norm": 0.8343362212181091, + "learning_rate": 3.6859300636762423e-06, + "loss": 0.5304, + "step": 9425 + }, + { + "epoch": 0.6, + "grad_norm": 0.9038220643997192, + "learning_rate": 3.6849401649147453e-06, + "loss": 0.6251, + "step": 9426 + }, + { + "epoch": 0.6, + "grad_norm": 0.929233968257904, + "learning_rate": 3.683950321525841e-06, + "loss": 0.5699, + "step": 9427 + }, + { + "epoch": 0.6, + "grad_norm": 0.8593815565109253, + "learning_rate": 3.682960533551213e-06, + "loss": 0.5558, + "step": 9428 + }, + { + "epoch": 0.6, + "grad_norm": 0.8797299861907959, + "learning_rate": 3.681970801032534e-06, + "loss": 0.5799, + "step": 9429 + }, + { + "epoch": 0.6, + "grad_norm": 0.8691550493240356, + "learning_rate": 3.68098112401148e-06, + "loss": 0.5446, + "step": 9430 + }, + { + "epoch": 0.6, + "grad_norm": 0.8693859577178955, + "learning_rate": 3.6799915025297206e-06, + "loss": 0.5527, + "step": 9431 + }, + { + "epoch": 0.6, + "grad_norm": 0.8916878700256348, + "learning_rate": 3.6790019366289293e-06, + "loss": 0.566, + "step": 9432 + }, + { + "epoch": 0.6, + "grad_norm": 0.907010018825531, + "learning_rate": 3.67801242635077e-06, + "loss": 0.6145, + "step": 9433 + }, + { + "epoch": 0.6, + "grad_norm": 0.8580716848373413, + "learning_rate": 3.6770229717369086e-06, + "loss": 0.5711, + "step": 9434 + }, + { + "epoch": 0.6, + "grad_norm": 0.8871995806694031, + "learning_rate": 3.6760335728290062e-06, + "loss": 0.6209, + "step": 9435 + }, + { + "epoch": 0.6, + "grad_norm": 0.8500985503196716, + "learning_rate": 3.6750442296687272e-06, + "loss": 0.584, + "step": 9436 + }, + { + "epoch": 0.6, + "grad_norm": 0.871005654335022, + "learning_rate": 3.6740549422977244e-06, + "loss": 0.5876, + "step": 9437 + }, + { + "epoch": 0.6, + "grad_norm": 0.929032564163208, + "learning_rate": 3.6730657107576574e-06, + "loss": 0.6177, + "step": 9438 + }, + { + "epoch": 0.6, + "grad_norm": 0.9027160406112671, + "learning_rate": 3.6720765350901765e-06, + "loss": 0.5653, + "step": 9439 + }, + { + "epoch": 0.6, + "grad_norm": 0.939904510974884, + "learning_rate": 3.671087415336931e-06, + "loss": 0.6247, + "step": 9440 + }, + { + "epoch": 0.6, + "grad_norm": 0.8883887529373169, + "learning_rate": 3.6700983515395726e-06, + "loss": 0.5748, + "step": 9441 + }, + { + "epoch": 0.6, + "grad_norm": 0.9077379703521729, + "learning_rate": 3.669109343739747e-06, + "loss": 0.5666, + "step": 9442 + }, + { + "epoch": 0.6, + "grad_norm": 0.91063392162323, + "learning_rate": 3.668120391979098e-06, + "loss": 0.5704, + "step": 9443 + }, + { + "epoch": 0.6, + "grad_norm": 0.8031530380249023, + "learning_rate": 3.6671314962992634e-06, + "loss": 0.5456, + "step": 9444 + }, + { + "epoch": 0.6, + "grad_norm": 0.8312580585479736, + "learning_rate": 3.666142656741886e-06, + "loss": 0.5977, + "step": 9445 + }, + { + "epoch": 0.6, + "grad_norm": 0.8705939054489136, + "learning_rate": 3.6651538733486027e-06, + "loss": 0.5965, + "step": 9446 + }, + { + "epoch": 0.6, + "grad_norm": 0.9241865873336792, + "learning_rate": 3.664165146161045e-06, + "loss": 0.5973, + "step": 9447 + }, + { + "epoch": 0.6, + "grad_norm": 0.8538462519645691, + "learning_rate": 3.663176475220844e-06, + "loss": 0.5936, + "step": 9448 + }, + { + "epoch": 0.6, + "grad_norm": 0.8608808517456055, + "learning_rate": 3.6621878605696338e-06, + "loss": 0.5336, + "step": 9449 + }, + { + "epoch": 0.6, + "grad_norm": 0.9277994632720947, + "learning_rate": 3.6611993022490383e-06, + "loss": 0.5753, + "step": 9450 + }, + { + "epoch": 0.6, + "grad_norm": 0.8902243375778198, + "learning_rate": 3.660210800300683e-06, + "loss": 0.5836, + "step": 9451 + }, + { + "epoch": 0.6, + "grad_norm": 0.9182867407798767, + "learning_rate": 3.6592223547661888e-06, + "loss": 0.616, + "step": 9452 + }, + { + "epoch": 0.6, + "grad_norm": 0.8831941485404968, + "learning_rate": 3.6582339656871778e-06, + "loss": 0.5666, + "step": 9453 + }, + { + "epoch": 0.6, + "grad_norm": 0.9414946436882019, + "learning_rate": 3.6572456331052673e-06, + "loss": 0.5904, + "step": 9454 + }, + { + "epoch": 0.6, + "grad_norm": 0.8741511106491089, + "learning_rate": 3.656257357062073e-06, + "loss": 0.6501, + "step": 9455 + }, + { + "epoch": 0.6, + "grad_norm": 0.882854700088501, + "learning_rate": 3.6552691375992056e-06, + "loss": 0.661, + "step": 9456 + }, + { + "epoch": 0.6, + "grad_norm": 0.8658345937728882, + "learning_rate": 3.6542809747582755e-06, + "loss": 0.593, + "step": 9457 + }, + { + "epoch": 0.6, + "grad_norm": 0.856181800365448, + "learning_rate": 3.6532928685808937e-06, + "loss": 0.6158, + "step": 9458 + }, + { + "epoch": 0.6, + "grad_norm": 0.9174668788909912, + "learning_rate": 3.6523048191086654e-06, + "loss": 0.5738, + "step": 9459 + }, + { + "epoch": 0.6, + "grad_norm": 0.9109136462211609, + "learning_rate": 3.6513168263831913e-06, + "loss": 0.5629, + "step": 9460 + }, + { + "epoch": 0.6, + "grad_norm": 0.9288260340690613, + "learning_rate": 3.6503288904460725e-06, + "loss": 0.5806, + "step": 9461 + }, + { + "epoch": 0.6, + "grad_norm": 0.912398636341095, + "learning_rate": 3.6493410113389116e-06, + "loss": 0.6132, + "step": 9462 + }, + { + "epoch": 0.6, + "grad_norm": 0.9341295957565308, + "learning_rate": 3.648353189103302e-06, + "loss": 0.6314, + "step": 9463 + }, + { + "epoch": 0.6, + "grad_norm": 0.885158121585846, + "learning_rate": 3.6473654237808365e-06, + "loss": 0.5599, + "step": 9464 + }, + { + "epoch": 0.6, + "grad_norm": 0.8969646096229553, + "learning_rate": 3.6463777154131065e-06, + "loss": 0.6136, + "step": 9465 + }, + { + "epoch": 0.6, + "grad_norm": 0.8847031593322754, + "learning_rate": 3.645390064041704e-06, + "loss": 0.5568, + "step": 9466 + }, + { + "epoch": 0.6, + "grad_norm": 0.857172429561615, + "learning_rate": 3.6444024697082137e-06, + "loss": 0.5342, + "step": 9467 + }, + { + "epoch": 0.6, + "grad_norm": 0.8913065791130066, + "learning_rate": 3.6434149324542185e-06, + "loss": 0.6032, + "step": 9468 + }, + { + "epoch": 0.6, + "grad_norm": 0.856587290763855, + "learning_rate": 3.6424274523213e-06, + "loss": 0.5779, + "step": 9469 + }, + { + "epoch": 0.6, + "grad_norm": 0.9097947478294373, + "learning_rate": 3.641440029351041e-06, + "loss": 0.5989, + "step": 9470 + }, + { + "epoch": 0.6, + "grad_norm": 0.9367252588272095, + "learning_rate": 3.640452663585017e-06, + "loss": 0.6136, + "step": 9471 + }, + { + "epoch": 0.6, + "grad_norm": 0.8729375004768372, + "learning_rate": 3.6394653550647996e-06, + "loss": 0.5497, + "step": 9472 + }, + { + "epoch": 0.6, + "grad_norm": 0.8242816925048828, + "learning_rate": 3.638478103831965e-06, + "loss": 0.5366, + "step": 9473 + }, + { + "epoch": 0.6, + "grad_norm": 0.8420335054397583, + "learning_rate": 3.6374909099280786e-06, + "loss": 0.5695, + "step": 9474 + }, + { + "epoch": 0.6, + "grad_norm": 0.8642702698707581, + "learning_rate": 3.636503773394713e-06, + "loss": 0.6007, + "step": 9475 + }, + { + "epoch": 0.6, + "grad_norm": 0.9021347761154175, + "learning_rate": 3.635516694273428e-06, + "loss": 0.6437, + "step": 9476 + }, + { + "epoch": 0.6, + "grad_norm": 0.9246450662612915, + "learning_rate": 3.63452967260579e-06, + "loss": 0.5961, + "step": 9477 + }, + { + "epoch": 0.6, + "grad_norm": 0.8637540340423584, + "learning_rate": 3.633542708433355e-06, + "loss": 0.5337, + "step": 9478 + }, + { + "epoch": 0.6, + "grad_norm": 0.8642069697380066, + "learning_rate": 3.632555801797686e-06, + "loss": 0.5905, + "step": 9479 + }, + { + "epoch": 0.6, + "grad_norm": 0.9082743525505066, + "learning_rate": 3.631568952740333e-06, + "loss": 0.5878, + "step": 9480 + }, + { + "epoch": 0.6, + "grad_norm": 0.9280330538749695, + "learning_rate": 3.6305821613028524e-06, + "loss": 0.5967, + "step": 9481 + }, + { + "epoch": 0.6, + "grad_norm": 0.8534235954284668, + "learning_rate": 3.6295954275267914e-06, + "loss": 0.5841, + "step": 9482 + }, + { + "epoch": 0.6, + "grad_norm": 0.8653678297996521, + "learning_rate": 3.6286087514537017e-06, + "loss": 0.5635, + "step": 9483 + }, + { + "epoch": 0.6, + "grad_norm": 0.8563132882118225, + "learning_rate": 3.6276221331251253e-06, + "loss": 0.5565, + "step": 9484 + }, + { + "epoch": 0.6, + "grad_norm": 0.7956026196479797, + "learning_rate": 3.626635572582608e-06, + "loss": 0.5475, + "step": 9485 + }, + { + "epoch": 0.6, + "grad_norm": 0.9152180552482605, + "learning_rate": 3.6256490698676884e-06, + "loss": 0.5893, + "step": 9486 + }, + { + "epoch": 0.6, + "grad_norm": 0.8446356058120728, + "learning_rate": 3.6246626250219047e-06, + "loss": 0.5365, + "step": 9487 + }, + { + "epoch": 0.6, + "grad_norm": 0.8718549013137817, + "learning_rate": 3.623676238086794e-06, + "loss": 0.5374, + "step": 9488 + }, + { + "epoch": 0.6, + "grad_norm": 0.8933292031288147, + "learning_rate": 3.6226899091038896e-06, + "loss": 0.6108, + "step": 9489 + }, + { + "epoch": 0.6, + "grad_norm": 0.9709043502807617, + "learning_rate": 3.6217036381147216e-06, + "loss": 0.6381, + "step": 9490 + }, + { + "epoch": 0.6, + "grad_norm": 0.9374916553497314, + "learning_rate": 3.620717425160818e-06, + "loss": 0.5966, + "step": 9491 + }, + { + "epoch": 0.6, + "grad_norm": 0.906370997428894, + "learning_rate": 3.619731270283705e-06, + "loss": 0.6183, + "step": 9492 + }, + { + "epoch": 0.6, + "grad_norm": 0.9059337377548218, + "learning_rate": 3.6187451735249085e-06, + "loss": 0.5477, + "step": 9493 + }, + { + "epoch": 0.6, + "grad_norm": 0.9128062725067139, + "learning_rate": 3.6177591349259465e-06, + "loss": 0.638, + "step": 9494 + }, + { + "epoch": 0.6, + "grad_norm": 0.912139892578125, + "learning_rate": 3.616773154528339e-06, + "loss": 0.6009, + "step": 9495 + }, + { + "epoch": 0.6, + "grad_norm": 0.8537312150001526, + "learning_rate": 3.6157872323736017e-06, + "loss": 0.5281, + "step": 9496 + }, + { + "epoch": 0.6, + "grad_norm": 0.9160687923431396, + "learning_rate": 3.61480136850325e-06, + "loss": 0.5757, + "step": 9497 + }, + { + "epoch": 0.6, + "grad_norm": 0.8970745205879211, + "learning_rate": 3.6138155629587925e-06, + "loss": 0.5668, + "step": 9498 + }, + { + "epoch": 0.6, + "grad_norm": 0.8694158792495728, + "learning_rate": 3.61282981578174e-06, + "loss": 0.6049, + "step": 9499 + }, + { + "epoch": 0.6, + "grad_norm": 0.8502684235572815, + "learning_rate": 3.611844127013598e-06, + "loss": 0.5737, + "step": 9500 + }, + { + "epoch": 0.6, + "grad_norm": 0.9084693193435669, + "learning_rate": 3.6108584966958717e-06, + "loss": 0.5383, + "step": 9501 + }, + { + "epoch": 0.6, + "grad_norm": 0.9308486580848694, + "learning_rate": 3.6098729248700604e-06, + "loss": 0.6029, + "step": 9502 + }, + { + "epoch": 0.6, + "grad_norm": 0.8839983344078064, + "learning_rate": 3.6088874115776664e-06, + "loss": 0.5604, + "step": 9503 + }, + { + "epoch": 0.6, + "grad_norm": 0.8351526856422424, + "learning_rate": 3.6079019568601816e-06, + "loss": 0.5777, + "step": 9504 + }, + { + "epoch": 0.6, + "grad_norm": 0.8404673933982849, + "learning_rate": 3.606916560759104e-06, + "loss": 0.5518, + "step": 9505 + }, + { + "epoch": 0.6, + "grad_norm": 0.8732911348342896, + "learning_rate": 3.6059312233159237e-06, + "loss": 0.5913, + "step": 9506 + }, + { + "epoch": 0.6, + "grad_norm": 0.8823626637458801, + "learning_rate": 3.6049459445721303e-06, + "loss": 0.567, + "step": 9507 + }, + { + "epoch": 0.6, + "grad_norm": 0.8690586686134338, + "learning_rate": 3.6039607245692086e-06, + "loss": 0.5204, + "step": 9508 + }, + { + "epoch": 0.6, + "grad_norm": 0.8530438542366028, + "learning_rate": 3.6029755633486464e-06, + "loss": 0.6059, + "step": 9509 + }, + { + "epoch": 0.6, + "grad_norm": 0.8257336616516113, + "learning_rate": 3.601990460951922e-06, + "loss": 0.5089, + "step": 9510 + }, + { + "epoch": 0.6, + "grad_norm": 0.8363312482833862, + "learning_rate": 3.6010054174205167e-06, + "loss": 0.5395, + "step": 9511 + }, + { + "epoch": 0.6, + "grad_norm": 0.9421717524528503, + "learning_rate": 3.6000204327959055e-06, + "loss": 0.6033, + "step": 9512 + }, + { + "epoch": 0.6, + "grad_norm": 0.8582902550697327, + "learning_rate": 3.599035507119565e-06, + "loss": 0.5413, + "step": 9513 + }, + { + "epoch": 0.6, + "grad_norm": 0.8793736696243286, + "learning_rate": 3.5980506404329647e-06, + "loss": 0.6307, + "step": 9514 + }, + { + "epoch": 0.6, + "grad_norm": 0.9074476361274719, + "learning_rate": 3.597065832777576e-06, + "loss": 0.6315, + "step": 9515 + }, + { + "epoch": 0.6, + "grad_norm": 0.8699880242347717, + "learning_rate": 3.5960810841948622e-06, + "loss": 0.6043, + "step": 9516 + }, + { + "epoch": 0.6, + "grad_norm": 0.8704454302787781, + "learning_rate": 3.595096394726293e-06, + "loss": 0.5378, + "step": 9517 + }, + { + "epoch": 0.6, + "grad_norm": 0.8879233002662659, + "learning_rate": 3.594111764413326e-06, + "loss": 0.5705, + "step": 9518 + }, + { + "epoch": 0.6, + "grad_norm": 0.9449933171272278, + "learning_rate": 3.5931271932974227e-06, + "loss": 0.589, + "step": 9519 + }, + { + "epoch": 0.6, + "grad_norm": 0.8365026116371155, + "learning_rate": 3.592142681420039e-06, + "loss": 0.5869, + "step": 9520 + }, + { + "epoch": 0.6, + "grad_norm": 0.8679017424583435, + "learning_rate": 3.5911582288226275e-06, + "loss": 0.5936, + "step": 9521 + }, + { + "epoch": 0.6, + "grad_norm": 0.9098031520843506, + "learning_rate": 3.5901738355466433e-06, + "loss": 0.6482, + "step": 9522 + }, + { + "epoch": 0.6, + "grad_norm": 0.8863396048545837, + "learning_rate": 3.5891895016335347e-06, + "loss": 0.5826, + "step": 9523 + }, + { + "epoch": 0.6, + "grad_norm": 0.8773937225341797, + "learning_rate": 3.588205227124749e-06, + "loss": 0.5994, + "step": 9524 + }, + { + "epoch": 0.6, + "grad_norm": 0.9152101874351501, + "learning_rate": 3.587221012061728e-06, + "loss": 0.5859, + "step": 9525 + }, + { + "epoch": 0.6, + "grad_norm": 0.8691193461418152, + "learning_rate": 3.586236856485916e-06, + "loss": 0.5964, + "step": 9526 + }, + { + "epoch": 0.6, + "grad_norm": 0.8620315194129944, + "learning_rate": 3.5852527604387533e-06, + "loss": 0.569, + "step": 9527 + }, + { + "epoch": 0.6, + "grad_norm": 0.864154040813446, + "learning_rate": 3.5842687239616745e-06, + "loss": 0.5576, + "step": 9528 + }, + { + "epoch": 0.6, + "grad_norm": 0.8283834457397461, + "learning_rate": 3.583284747096114e-06, + "loss": 0.5909, + "step": 9529 + }, + { + "epoch": 0.6, + "grad_norm": 0.9094521403312683, + "learning_rate": 3.5823008298835044e-06, + "loss": 0.6439, + "step": 9530 + }, + { + "epoch": 0.6, + "grad_norm": 0.8895583748817444, + "learning_rate": 3.5813169723652763e-06, + "loss": 0.5486, + "step": 9531 + }, + { + "epoch": 0.6, + "grad_norm": 0.9506862759590149, + "learning_rate": 3.5803331745828558e-06, + "loss": 0.6163, + "step": 9532 + }, + { + "epoch": 0.6, + "grad_norm": 0.855083703994751, + "learning_rate": 3.579349436577665e-06, + "loss": 0.6066, + "step": 9533 + }, + { + "epoch": 0.6, + "grad_norm": 0.9323597550392151, + "learning_rate": 3.5783657583911268e-06, + "loss": 0.6049, + "step": 9534 + }, + { + "epoch": 0.6, + "grad_norm": 0.865875780582428, + "learning_rate": 3.5773821400646623e-06, + "loss": 0.5621, + "step": 9535 + }, + { + "epoch": 0.6, + "grad_norm": 0.846933901309967, + "learning_rate": 3.5763985816396873e-06, + "loss": 0.5495, + "step": 9536 + }, + { + "epoch": 0.6, + "grad_norm": 0.8472744822502136, + "learning_rate": 3.575415083157615e-06, + "loss": 0.5131, + "step": 9537 + }, + { + "epoch": 0.6, + "grad_norm": 0.8493777513504028, + "learning_rate": 3.5744316446598565e-06, + "loss": 0.5477, + "step": 9538 + }, + { + "epoch": 0.6, + "grad_norm": 0.8486292958259583, + "learning_rate": 3.5734482661878244e-06, + "loss": 0.5877, + "step": 9539 + }, + { + "epoch": 0.6, + "grad_norm": 0.9552314281463623, + "learning_rate": 3.5724649477829232e-06, + "loss": 0.6654, + "step": 9540 + }, + { + "epoch": 0.6, + "grad_norm": 0.8565940260887146, + "learning_rate": 3.5714816894865556e-06, + "loss": 0.532, + "step": 9541 + }, + { + "epoch": 0.6, + "grad_norm": 0.8645039200782776, + "learning_rate": 3.570498491340124e-06, + "loss": 0.5583, + "step": 9542 + }, + { + "epoch": 0.6, + "grad_norm": 0.8806138038635254, + "learning_rate": 3.5695153533850302e-06, + "loss": 0.5508, + "step": 9543 + }, + { + "epoch": 0.6, + "grad_norm": 0.8636948466300964, + "learning_rate": 3.5685322756626683e-06, + "loss": 0.5921, + "step": 9544 + }, + { + "epoch": 0.6, + "grad_norm": 0.8807823657989502, + "learning_rate": 3.5675492582144322e-06, + "loss": 0.62, + "step": 9545 + }, + { + "epoch": 0.6, + "grad_norm": 0.85367751121521, + "learning_rate": 3.566566301081712e-06, + "loss": 0.5503, + "step": 9546 + }, + { + "epoch": 0.6, + "grad_norm": 0.8549711108207703, + "learning_rate": 3.5655834043059e-06, + "loss": 0.5368, + "step": 9547 + }, + { + "epoch": 0.6, + "grad_norm": 0.9118362069129944, + "learning_rate": 3.5646005679283813e-06, + "loss": 0.585, + "step": 9548 + }, + { + "epoch": 0.6, + "grad_norm": 0.8719263672828674, + "learning_rate": 3.5636177919905385e-06, + "loss": 0.5321, + "step": 9549 + }, + { + "epoch": 0.61, + "grad_norm": 0.8751515746116638, + "learning_rate": 3.5626350765337546e-06, + "loss": 0.6133, + "step": 9550 + }, + { + "epoch": 0.61, + "grad_norm": 0.8825535774230957, + "learning_rate": 3.5616524215994052e-06, + "loss": 0.5868, + "step": 9551 + }, + { + "epoch": 0.61, + "grad_norm": 1.0655604600906372, + "learning_rate": 3.560669827228871e-06, + "loss": 0.6187, + "step": 9552 + }, + { + "epoch": 0.61, + "grad_norm": 0.8702925443649292, + "learning_rate": 3.559687293463522e-06, + "loss": 0.5919, + "step": 9553 + }, + { + "epoch": 0.61, + "grad_norm": 0.8378567695617676, + "learning_rate": 3.5587048203447314e-06, + "loss": 0.5988, + "step": 9554 + }, + { + "epoch": 0.61, + "grad_norm": 0.8837084770202637, + "learning_rate": 3.557722407913865e-06, + "loss": 0.5909, + "step": 9555 + }, + { + "epoch": 0.61, + "grad_norm": 0.8577612042427063, + "learning_rate": 3.5567400562122934e-06, + "loss": 0.4996, + "step": 9556 + }, + { + "epoch": 0.61, + "grad_norm": 0.8624299764633179, + "learning_rate": 3.5557577652813758e-06, + "loss": 0.5573, + "step": 9557 + }, + { + "epoch": 0.61, + "grad_norm": 0.9122274518013, + "learning_rate": 3.554775535162475e-06, + "loss": 0.6072, + "step": 9558 + }, + { + "epoch": 0.61, + "grad_norm": 0.8915830254554749, + "learning_rate": 3.5537933658969475e-06, + "loss": 0.5344, + "step": 9559 + }, + { + "epoch": 0.61, + "grad_norm": 1.072354793548584, + "learning_rate": 3.5528112575261525e-06, + "loss": 0.6077, + "step": 9560 + }, + { + "epoch": 0.61, + "grad_norm": 0.9154992699623108, + "learning_rate": 3.5518292100914396e-06, + "loss": 0.5795, + "step": 9561 + }, + { + "epoch": 0.61, + "grad_norm": 0.8368164896965027, + "learning_rate": 3.550847223634162e-06, + "loss": 0.6176, + "step": 9562 + }, + { + "epoch": 0.61, + "grad_norm": 0.8458346724510193, + "learning_rate": 3.549865298195665e-06, + "loss": 0.5834, + "step": 9563 + }, + { + "epoch": 0.61, + "grad_norm": 0.8798760771751404, + "learning_rate": 3.5488834338172974e-06, + "loss": 0.5863, + "step": 9564 + }, + { + "epoch": 0.61, + "grad_norm": 0.9277465343475342, + "learning_rate": 3.547901630540399e-06, + "loss": 0.5814, + "step": 9565 + }, + { + "epoch": 0.61, + "grad_norm": 0.9117797017097473, + "learning_rate": 3.546919888406313e-06, + "loss": 0.5731, + "step": 9566 + }, + { + "epoch": 0.61, + "grad_norm": 0.9465237855911255, + "learning_rate": 3.5459382074563737e-06, + "loss": 0.584, + "step": 9567 + }, + { + "epoch": 0.61, + "grad_norm": 0.8362681269645691, + "learning_rate": 3.5449565877319175e-06, + "loss": 0.5362, + "step": 9568 + }, + { + "epoch": 0.61, + "grad_norm": 0.8532899022102356, + "learning_rate": 3.54397502927428e-06, + "loss": 0.5545, + "step": 9569 + }, + { + "epoch": 0.61, + "grad_norm": 0.8592314124107361, + "learning_rate": 3.5429935321247887e-06, + "loss": 0.6031, + "step": 9570 + }, + { + "epoch": 0.61, + "grad_norm": 0.9131558537483215, + "learning_rate": 3.5420120963247706e-06, + "loss": 0.5733, + "step": 9571 + }, + { + "epoch": 0.61, + "grad_norm": 0.9123291373252869, + "learning_rate": 3.5410307219155495e-06, + "loss": 0.5963, + "step": 9572 + }, + { + "epoch": 0.61, + "grad_norm": 0.9059653282165527, + "learning_rate": 3.540049408938452e-06, + "loss": 0.5867, + "step": 9573 + }, + { + "epoch": 0.61, + "grad_norm": 0.8649680614471436, + "learning_rate": 3.539068157434794e-06, + "loss": 0.5413, + "step": 9574 + }, + { + "epoch": 0.61, + "grad_norm": 0.8861904144287109, + "learning_rate": 3.538086967445894e-06, + "loss": 0.5541, + "step": 9575 + }, + { + "epoch": 0.61, + "grad_norm": 0.8411959409713745, + "learning_rate": 3.5371058390130643e-06, + "loss": 0.6017, + "step": 9576 + }, + { + "epoch": 0.61, + "grad_norm": 0.8303496837615967, + "learning_rate": 3.536124772177621e-06, + "loss": 0.5385, + "step": 9577 + }, + { + "epoch": 0.61, + "grad_norm": 0.8831817507743835, + "learning_rate": 3.535143766980871e-06, + "loss": 0.5553, + "step": 9578 + }, + { + "epoch": 0.61, + "grad_norm": 0.8853132128715515, + "learning_rate": 3.53416282346412e-06, + "loss": 0.5881, + "step": 9579 + }, + { + "epoch": 0.61, + "grad_norm": 0.9052870869636536, + "learning_rate": 3.533181941668675e-06, + "loss": 0.5308, + "step": 9580 + }, + { + "epoch": 0.61, + "grad_norm": 0.836897611618042, + "learning_rate": 3.5322011216358325e-06, + "loss": 0.5369, + "step": 9581 + }, + { + "epoch": 0.61, + "grad_norm": 0.9233197569847107, + "learning_rate": 3.5312203634068977e-06, + "loss": 0.6164, + "step": 9582 + }, + { + "epoch": 0.61, + "grad_norm": 0.883269727230072, + "learning_rate": 3.5302396670231622e-06, + "loss": 0.6196, + "step": 9583 + }, + { + "epoch": 0.61, + "grad_norm": 0.8604480028152466, + "learning_rate": 3.529259032525923e-06, + "loss": 0.5845, + "step": 9584 + }, + { + "epoch": 0.61, + "grad_norm": 0.8859973549842834, + "learning_rate": 3.5282784599564667e-06, + "loss": 0.5217, + "step": 9585 + }, + { + "epoch": 0.61, + "grad_norm": 0.8377750515937805, + "learning_rate": 3.5272979493560877e-06, + "loss": 0.5654, + "step": 9586 + }, + { + "epoch": 0.61, + "grad_norm": 0.901696503162384, + "learning_rate": 3.5263175007660676e-06, + "loss": 0.6074, + "step": 9587 + }, + { + "epoch": 0.61, + "grad_norm": 0.8750433325767517, + "learning_rate": 3.5253371142276915e-06, + "loss": 0.5781, + "step": 9588 + }, + { + "epoch": 0.61, + "grad_norm": 0.988763689994812, + "learning_rate": 3.5243567897822382e-06, + "loss": 0.6265, + "step": 9589 + }, + { + "epoch": 0.61, + "grad_norm": 0.9134507179260254, + "learning_rate": 3.5233765274709885e-06, + "loss": 0.5861, + "step": 9590 + }, + { + "epoch": 0.61, + "grad_norm": 0.8977269530296326, + "learning_rate": 3.5223963273352157e-06, + "loss": 0.6029, + "step": 9591 + }, + { + "epoch": 0.61, + "grad_norm": 0.9446091651916504, + "learning_rate": 3.5214161894161948e-06, + "loss": 0.6057, + "step": 9592 + }, + { + "epoch": 0.61, + "grad_norm": 0.9235208630561829, + "learning_rate": 3.5204361137551924e-06, + "loss": 0.5999, + "step": 9593 + }, + { + "epoch": 0.61, + "grad_norm": 0.9178057312965393, + "learning_rate": 3.5194561003934798e-06, + "loss": 0.5572, + "step": 9594 + }, + { + "epoch": 0.61, + "grad_norm": 0.886055052280426, + "learning_rate": 3.5184761493723197e-06, + "loss": 0.5808, + "step": 9595 + }, + { + "epoch": 0.61, + "grad_norm": 0.9092001914978027, + "learning_rate": 3.5174962607329755e-06, + "loss": 0.5728, + "step": 9596 + }, + { + "epoch": 0.61, + "grad_norm": 0.830111026763916, + "learning_rate": 3.516516434516707e-06, + "loss": 0.5996, + "step": 9597 + }, + { + "epoch": 0.61, + "grad_norm": 0.9029525518417358, + "learning_rate": 3.5155366707647686e-06, + "loss": 0.566, + "step": 9598 + }, + { + "epoch": 0.61, + "grad_norm": 0.9275105595588684, + "learning_rate": 3.514556969518418e-06, + "loss": 0.6466, + "step": 9599 + }, + { + "epoch": 0.61, + "grad_norm": 0.8675131797790527, + "learning_rate": 3.513577330818907e-06, + "loss": 0.5825, + "step": 9600 + }, + { + "epoch": 0.61, + "grad_norm": 0.9288623929023743, + "learning_rate": 3.512597754707484e-06, + "loss": 0.6074, + "step": 9601 + }, + { + "epoch": 0.61, + "grad_norm": 0.8611968159675598, + "learning_rate": 3.511618241225393e-06, + "loss": 0.586, + "step": 9602 + }, + { + "epoch": 0.61, + "grad_norm": 0.8688474893569946, + "learning_rate": 3.5106387904138804e-06, + "loss": 0.5534, + "step": 9603 + }, + { + "epoch": 0.61, + "grad_norm": 0.8907158374786377, + "learning_rate": 3.5096594023141895e-06, + "loss": 0.5984, + "step": 9604 + }, + { + "epoch": 0.61, + "grad_norm": 0.9105345606803894, + "learning_rate": 3.508680076967556e-06, + "loss": 0.6486, + "step": 9605 + }, + { + "epoch": 0.61, + "grad_norm": 0.9449050426483154, + "learning_rate": 3.507700814415215e-06, + "loss": 0.563, + "step": 9606 + }, + { + "epoch": 0.61, + "grad_norm": 0.9659131169319153, + "learning_rate": 3.5067216146984016e-06, + "loss": 0.6047, + "step": 9607 + }, + { + "epoch": 0.61, + "grad_norm": 0.8895873427391052, + "learning_rate": 3.505742477858348e-06, + "loss": 0.5464, + "step": 9608 + }, + { + "epoch": 0.61, + "grad_norm": 0.875363826751709, + "learning_rate": 3.50476340393628e-06, + "loss": 0.534, + "step": 9609 + }, + { + "epoch": 0.61, + "grad_norm": 0.8972344398498535, + "learning_rate": 3.5037843929734216e-06, + "loss": 0.5933, + "step": 9610 + }, + { + "epoch": 0.61, + "grad_norm": 0.9015132784843445, + "learning_rate": 3.502805445010998e-06, + "loss": 0.5126, + "step": 9611 + }, + { + "epoch": 0.61, + "grad_norm": 0.8442445993423462, + "learning_rate": 3.5018265600902313e-06, + "loss": 0.5688, + "step": 9612 + }, + { + "epoch": 0.61, + "grad_norm": 0.9495238065719604, + "learning_rate": 3.5008477382523355e-06, + "loss": 0.5862, + "step": 9613 + }, + { + "epoch": 0.61, + "grad_norm": 0.8787839412689209, + "learning_rate": 3.4998689795385245e-06, + "loss": 0.5882, + "step": 9614 + }, + { + "epoch": 0.61, + "grad_norm": 0.9087364673614502, + "learning_rate": 3.4988902839900118e-06, + "loss": 0.6195, + "step": 9615 + }, + { + "epoch": 0.61, + "grad_norm": 0.9128758907318115, + "learning_rate": 3.4979116516480094e-06, + "loss": 0.5856, + "step": 9616 + }, + { + "epoch": 0.61, + "grad_norm": 0.9483041167259216, + "learning_rate": 3.496933082553722e-06, + "loss": 0.6421, + "step": 9617 + }, + { + "epoch": 0.61, + "grad_norm": 0.8916476964950562, + "learning_rate": 3.495954576748353e-06, + "loss": 0.5491, + "step": 9618 + }, + { + "epoch": 0.61, + "grad_norm": 0.895670473575592, + "learning_rate": 3.4949761342731025e-06, + "loss": 0.6149, + "step": 9619 + }, + { + "epoch": 0.61, + "grad_norm": 0.9284258484840393, + "learning_rate": 3.493997755169174e-06, + "loss": 0.6185, + "step": 9620 + }, + { + "epoch": 0.61, + "grad_norm": 0.8784351348876953, + "learning_rate": 3.4930194394777615e-06, + "loss": 0.5884, + "step": 9621 + }, + { + "epoch": 0.61, + "grad_norm": 0.8456250429153442, + "learning_rate": 3.492041187240056e-06, + "loss": 0.5612, + "step": 9622 + }, + { + "epoch": 0.61, + "grad_norm": 0.8818730115890503, + "learning_rate": 3.49106299849725e-06, + "loss": 0.5623, + "step": 9623 + }, + { + "epoch": 0.61, + "grad_norm": 0.9130449891090393, + "learning_rate": 3.4900848732905348e-06, + "loss": 0.6577, + "step": 9624 + }, + { + "epoch": 0.61, + "grad_norm": 0.9004276990890503, + "learning_rate": 3.4891068116610914e-06, + "loss": 0.6268, + "step": 9625 + }, + { + "epoch": 0.61, + "grad_norm": 0.8974410891532898, + "learning_rate": 3.4881288136501036e-06, + "loss": 0.6242, + "step": 9626 + }, + { + "epoch": 0.61, + "grad_norm": 0.9068264961242676, + "learning_rate": 3.487150879298753e-06, + "loss": 0.5797, + "step": 9627 + }, + { + "epoch": 0.61, + "grad_norm": 0.8939555883407593, + "learning_rate": 3.486173008648215e-06, + "loss": 0.5922, + "step": 9628 + }, + { + "epoch": 0.61, + "grad_norm": 0.8704434037208557, + "learning_rate": 3.485195201739665e-06, + "loss": 0.5675, + "step": 9629 + }, + { + "epoch": 0.61, + "grad_norm": 0.8694623112678528, + "learning_rate": 3.4842174586142772e-06, + "loss": 0.5603, + "step": 9630 + }, + { + "epoch": 0.61, + "grad_norm": 0.9141775965690613, + "learning_rate": 3.4832397793132187e-06, + "loss": 0.6373, + "step": 9631 + }, + { + "epoch": 0.61, + "grad_norm": 0.9108834862709045, + "learning_rate": 3.4822621638776555e-06, + "loss": 0.5756, + "step": 9632 + }, + { + "epoch": 0.61, + "grad_norm": 0.8546727895736694, + "learning_rate": 3.4812846123487532e-06, + "loss": 0.5232, + "step": 9633 + }, + { + "epoch": 0.61, + "grad_norm": 0.9427882432937622, + "learning_rate": 3.4803071247676735e-06, + "loss": 0.5932, + "step": 9634 + }, + { + "epoch": 0.61, + "grad_norm": 0.846181333065033, + "learning_rate": 3.4793297011755746e-06, + "loss": 0.5629, + "step": 9635 + }, + { + "epoch": 0.61, + "grad_norm": 0.8653784394264221, + "learning_rate": 3.4783523416136096e-06, + "loss": 0.6068, + "step": 9636 + }, + { + "epoch": 0.61, + "grad_norm": 0.8583688139915466, + "learning_rate": 3.477375046122935e-06, + "loss": 0.583, + "step": 9637 + }, + { + "epoch": 0.61, + "grad_norm": 0.9034234881401062, + "learning_rate": 3.476397814744702e-06, + "loss": 0.6093, + "step": 9638 + }, + { + "epoch": 0.61, + "grad_norm": 0.8735195994377136, + "learning_rate": 3.4754206475200556e-06, + "loss": 0.5769, + "step": 9639 + }, + { + "epoch": 0.61, + "grad_norm": 0.9497086405754089, + "learning_rate": 3.4744435444901412e-06, + "loss": 0.5695, + "step": 9640 + }, + { + "epoch": 0.61, + "grad_norm": 0.8908088207244873, + "learning_rate": 3.473466505696103e-06, + "loss": 0.5208, + "step": 9641 + }, + { + "epoch": 0.61, + "grad_norm": 0.9061847925186157, + "learning_rate": 3.4724895311790806e-06, + "loss": 0.5601, + "step": 9642 + }, + { + "epoch": 0.61, + "grad_norm": 0.8748029470443726, + "learning_rate": 3.4715126209802104e-06, + "loss": 0.5772, + "step": 9643 + }, + { + "epoch": 0.61, + "grad_norm": 0.8988456130027771, + "learning_rate": 3.4705357751406256e-06, + "loss": 0.596, + "step": 9644 + }, + { + "epoch": 0.61, + "grad_norm": 0.8625943064689636, + "learning_rate": 3.469558993701457e-06, + "loss": 0.5393, + "step": 9645 + }, + { + "epoch": 0.61, + "grad_norm": 0.915705680847168, + "learning_rate": 3.468582276703838e-06, + "loss": 0.5994, + "step": 9646 + }, + { + "epoch": 0.61, + "grad_norm": 0.9504239559173584, + "learning_rate": 3.467605624188891e-06, + "loss": 0.5692, + "step": 9647 + }, + { + "epoch": 0.61, + "grad_norm": 0.8617990016937256, + "learning_rate": 3.46662903619774e-06, + "loss": 0.572, + "step": 9648 + }, + { + "epoch": 0.61, + "grad_norm": 0.9895144104957581, + "learning_rate": 3.4656525127715045e-06, + "loss": 0.5718, + "step": 9649 + }, + { + "epoch": 0.61, + "grad_norm": 0.9112670421600342, + "learning_rate": 3.464676053951307e-06, + "loss": 0.5922, + "step": 9650 + }, + { + "epoch": 0.61, + "grad_norm": 0.8914951682090759, + "learning_rate": 3.463699659778259e-06, + "loss": 0.5979, + "step": 9651 + }, + { + "epoch": 0.61, + "grad_norm": 0.9271389245986938, + "learning_rate": 3.4627233302934737e-06, + "loss": 0.6252, + "step": 9652 + }, + { + "epoch": 0.61, + "grad_norm": 0.9418599009513855, + "learning_rate": 3.4617470655380597e-06, + "loss": 0.5709, + "step": 9653 + }, + { + "epoch": 0.61, + "grad_norm": 0.8820015788078308, + "learning_rate": 3.460770865553128e-06, + "loss": 0.5428, + "step": 9654 + }, + { + "epoch": 0.61, + "grad_norm": 0.9147693514823914, + "learning_rate": 3.4597947303797795e-06, + "loss": 0.636, + "step": 9655 + }, + { + "epoch": 0.61, + "grad_norm": 0.9040268063545227, + "learning_rate": 3.458818660059117e-06, + "loss": 0.5763, + "step": 9656 + }, + { + "epoch": 0.61, + "grad_norm": 0.893278956413269, + "learning_rate": 3.4578426546322403e-06, + "loss": 0.6656, + "step": 9657 + }, + { + "epoch": 0.61, + "grad_norm": 0.8358849883079529, + "learning_rate": 3.4568667141402425e-06, + "loss": 0.5922, + "step": 9658 + }, + { + "epoch": 0.61, + "grad_norm": 0.889038622379303, + "learning_rate": 3.4558908386242208e-06, + "loss": 0.5502, + "step": 9659 + }, + { + "epoch": 0.61, + "grad_norm": 0.8911066055297852, + "learning_rate": 3.4549150281252635e-06, + "loss": 0.6117, + "step": 9660 + }, + { + "epoch": 0.61, + "grad_norm": 0.9033711552619934, + "learning_rate": 3.4539392826844607e-06, + "loss": 0.6014, + "step": 9661 + }, + { + "epoch": 0.61, + "grad_norm": 0.935605525970459, + "learning_rate": 3.4529636023428935e-06, + "loss": 0.5503, + "step": 9662 + }, + { + "epoch": 0.61, + "grad_norm": 0.9369493126869202, + "learning_rate": 3.4519879871416505e-06, + "loss": 0.5497, + "step": 9663 + }, + { + "epoch": 0.61, + "grad_norm": 0.8504288792610168, + "learning_rate": 3.451012437121806e-06, + "loss": 0.5578, + "step": 9664 + }, + { + "epoch": 0.61, + "grad_norm": 0.9002702832221985, + "learning_rate": 3.4500369523244414e-06, + "loss": 0.5544, + "step": 9665 + }, + { + "epoch": 0.61, + "grad_norm": 0.8431366086006165, + "learning_rate": 3.4490615327906264e-06, + "loss": 0.5637, + "step": 9666 + }, + { + "epoch": 0.61, + "grad_norm": 0.8854979872703552, + "learning_rate": 3.448086178561436e-06, + "loss": 0.5606, + "step": 9667 + }, + { + "epoch": 0.61, + "grad_norm": 0.9128515720367432, + "learning_rate": 3.447110889677938e-06, + "loss": 0.6465, + "step": 9668 + }, + { + "epoch": 0.61, + "grad_norm": 0.9017817974090576, + "learning_rate": 3.4461356661811997e-06, + "loss": 0.5738, + "step": 9669 + }, + { + "epoch": 0.61, + "grad_norm": 0.831354558467865, + "learning_rate": 3.4451605081122797e-06, + "loss": 0.5026, + "step": 9670 + }, + { + "epoch": 0.61, + "grad_norm": 0.9157218933105469, + "learning_rate": 3.4441854155122446e-06, + "loss": 0.5886, + "step": 9671 + }, + { + "epoch": 0.61, + "grad_norm": 0.9166000485420227, + "learning_rate": 3.443210388422148e-06, + "loss": 0.5729, + "step": 9672 + }, + { + "epoch": 0.61, + "grad_norm": 0.8740888833999634, + "learning_rate": 3.4422354268830473e-06, + "loss": 0.6174, + "step": 9673 + }, + { + "epoch": 0.61, + "grad_norm": 0.8689664006233215, + "learning_rate": 3.441260530935994e-06, + "loss": 0.5786, + "step": 9674 + }, + { + "epoch": 0.61, + "grad_norm": 0.9302951693534851, + "learning_rate": 3.4402857006220353e-06, + "loss": 0.6591, + "step": 9675 + }, + { + "epoch": 0.61, + "grad_norm": 0.9434636235237122, + "learning_rate": 3.439310935982221e-06, + "loss": 0.6147, + "step": 9676 + }, + { + "epoch": 0.61, + "grad_norm": 0.9156984686851501, + "learning_rate": 3.4383362370575947e-06, + "loss": 0.5605, + "step": 9677 + }, + { + "epoch": 0.61, + "grad_norm": 0.866672933101654, + "learning_rate": 3.4373616038891966e-06, + "loss": 0.6085, + "step": 9678 + }, + { + "epoch": 0.61, + "grad_norm": 0.8342917561531067, + "learning_rate": 3.4363870365180634e-06, + "loss": 0.5376, + "step": 9679 + }, + { + "epoch": 0.61, + "grad_norm": 0.8971079587936401, + "learning_rate": 3.435412534985234e-06, + "loss": 0.5982, + "step": 9680 + }, + { + "epoch": 0.61, + "grad_norm": 0.8973367810249329, + "learning_rate": 3.4344380993317404e-06, + "loss": 0.601, + "step": 9681 + }, + { + "epoch": 0.61, + "grad_norm": 0.9082698822021484, + "learning_rate": 3.433463729598613e-06, + "loss": 0.6087, + "step": 9682 + }, + { + "epoch": 0.61, + "grad_norm": 0.8526648879051208, + "learning_rate": 3.432489425826876e-06, + "loss": 0.5689, + "step": 9683 + }, + { + "epoch": 0.61, + "grad_norm": 0.8531283736228943, + "learning_rate": 3.431515188057557e-06, + "loss": 0.5591, + "step": 9684 + }, + { + "epoch": 0.61, + "grad_norm": 0.8188491463661194, + "learning_rate": 3.4305410163316788e-06, + "loss": 0.5402, + "step": 9685 + }, + { + "epoch": 0.61, + "grad_norm": 0.8680586218833923, + "learning_rate": 3.429566910690258e-06, + "loss": 0.5776, + "step": 9686 + }, + { + "epoch": 0.61, + "grad_norm": 0.9405946731567383, + "learning_rate": 3.42859287117431e-06, + "loss": 0.611, + "step": 9687 + }, + { + "epoch": 0.61, + "grad_norm": 0.9357609152793884, + "learning_rate": 3.42761889782485e-06, + "loss": 0.621, + "step": 9688 + }, + { + "epoch": 0.61, + "grad_norm": 0.9674767255783081, + "learning_rate": 3.4266449906828897e-06, + "loss": 0.5803, + "step": 9689 + }, + { + "epoch": 0.61, + "grad_norm": 0.9120761752128601, + "learning_rate": 3.4256711497894346e-06, + "loss": 0.5951, + "step": 9690 + }, + { + "epoch": 0.61, + "grad_norm": 0.8053117394447327, + "learning_rate": 3.4246973751854917e-06, + "loss": 0.5611, + "step": 9691 + }, + { + "epoch": 0.61, + "grad_norm": 0.9436960220336914, + "learning_rate": 3.42372366691206e-06, + "loss": 0.5907, + "step": 9692 + }, + { + "epoch": 0.61, + "grad_norm": 0.9401655197143555, + "learning_rate": 3.422750025010143e-06, + "loss": 0.5508, + "step": 9693 + }, + { + "epoch": 0.61, + "grad_norm": 0.9254014492034912, + "learning_rate": 3.421776449520735e-06, + "loss": 0.5881, + "step": 9694 + }, + { + "epoch": 0.61, + "grad_norm": 0.9455965757369995, + "learning_rate": 3.4208029404848315e-06, + "loss": 0.5914, + "step": 9695 + }, + { + "epoch": 0.61, + "grad_norm": 0.8691204786300659, + "learning_rate": 3.4198294979434207e-06, + "loss": 0.5917, + "step": 9696 + }, + { + "epoch": 0.61, + "grad_norm": 0.9164184927940369, + "learning_rate": 3.418856121937494e-06, + "loss": 0.5272, + "step": 9697 + }, + { + "epoch": 0.61, + "grad_norm": 0.857101559638977, + "learning_rate": 3.4178828125080354e-06, + "loss": 0.6098, + "step": 9698 + }, + { + "epoch": 0.61, + "grad_norm": 0.9042714238166809, + "learning_rate": 3.4169095696960287e-06, + "loss": 0.6457, + "step": 9699 + }, + { + "epoch": 0.61, + "grad_norm": 0.87809157371521, + "learning_rate": 3.4159363935424505e-06, + "loss": 0.5905, + "step": 9700 + }, + { + "epoch": 0.61, + "grad_norm": 0.91737961769104, + "learning_rate": 3.4149632840882817e-06, + "loss": 0.5974, + "step": 9701 + }, + { + "epoch": 0.61, + "grad_norm": 0.8448576331138611, + "learning_rate": 3.413990241374495e-06, + "loss": 0.5751, + "step": 9702 + }, + { + "epoch": 0.61, + "grad_norm": 0.8721796274185181, + "learning_rate": 3.4130172654420623e-06, + "loss": 0.5554, + "step": 9703 + }, + { + "epoch": 0.61, + "grad_norm": 0.9579261541366577, + "learning_rate": 3.4120443563319527e-06, + "loss": 0.5359, + "step": 9704 + }, + { + "epoch": 0.61, + "grad_norm": 0.9207855463027954, + "learning_rate": 3.4110715140851286e-06, + "loss": 0.5518, + "step": 9705 + }, + { + "epoch": 0.61, + "grad_norm": 0.9483494758605957, + "learning_rate": 3.4100987387425566e-06, + "loss": 0.6161, + "step": 9706 + }, + { + "epoch": 0.61, + "grad_norm": 0.9150481820106506, + "learning_rate": 3.4091260303451967e-06, + "loss": 0.5725, + "step": 9707 + }, + { + "epoch": 0.62, + "grad_norm": 0.8735103607177734, + "learning_rate": 3.4081533889340056e-06, + "loss": 0.5568, + "step": 9708 + }, + { + "epoch": 0.62, + "grad_norm": 0.8997935652732849, + "learning_rate": 3.407180814549935e-06, + "loss": 0.5814, + "step": 9709 + }, + { + "epoch": 0.62, + "grad_norm": 0.9537015557289124, + "learning_rate": 3.4062083072339415e-06, + "loss": 0.6278, + "step": 9710 + }, + { + "epoch": 0.62, + "grad_norm": 0.9637327194213867, + "learning_rate": 3.4052358670269727e-06, + "loss": 0.6356, + "step": 9711 + }, + { + "epoch": 0.62, + "grad_norm": 0.8522918224334717, + "learning_rate": 3.4042634939699728e-06, + "loss": 0.5623, + "step": 9712 + }, + { + "epoch": 0.62, + "grad_norm": 0.9028704762458801, + "learning_rate": 3.4032911881038842e-06, + "loss": 0.6201, + "step": 9713 + }, + { + "epoch": 0.62, + "grad_norm": 0.9436061382293701, + "learning_rate": 3.4023189494696506e-06, + "loss": 0.6674, + "step": 9714 + }, + { + "epoch": 0.62, + "grad_norm": 0.9471274018287659, + "learning_rate": 3.401346778108209e-06, + "loss": 0.6199, + "step": 9715 + }, + { + "epoch": 0.62, + "grad_norm": 0.9044203162193298, + "learning_rate": 3.4003746740604925e-06, + "loss": 0.5994, + "step": 9716 + }, + { + "epoch": 0.62, + "grad_norm": 0.8830143213272095, + "learning_rate": 3.399402637367433e-06, + "loss": 0.5824, + "step": 9717 + }, + { + "epoch": 0.62, + "grad_norm": 0.8798513412475586, + "learning_rate": 3.398430668069961e-06, + "loss": 0.5765, + "step": 9718 + }, + { + "epoch": 0.62, + "grad_norm": 0.9042376279830933, + "learning_rate": 3.3974587662090026e-06, + "loss": 0.6328, + "step": 9719 + }, + { + "epoch": 0.62, + "grad_norm": 0.9810076355934143, + "learning_rate": 3.396486931825481e-06, + "loss": 0.6196, + "step": 9720 + }, + { + "epoch": 0.62, + "grad_norm": 0.8763681054115295, + "learning_rate": 3.395515164960316e-06, + "loss": 0.5587, + "step": 9721 + }, + { + "epoch": 0.62, + "grad_norm": 0.8596003651618958, + "learning_rate": 3.394543465654424e-06, + "loss": 0.6053, + "step": 9722 + }, + { + "epoch": 0.62, + "grad_norm": 0.9068569540977478, + "learning_rate": 3.393571833948724e-06, + "loss": 0.5969, + "step": 9723 + }, + { + "epoch": 0.62, + "grad_norm": 0.8864340782165527, + "learning_rate": 3.3926002698841253e-06, + "loss": 0.5411, + "step": 9724 + }, + { + "epoch": 0.62, + "grad_norm": 0.8910347819328308, + "learning_rate": 3.3916287735015375e-06, + "loss": 0.5857, + "step": 9725 + }, + { + "epoch": 0.62, + "grad_norm": 0.8630258440971375, + "learning_rate": 3.390657344841865e-06, + "loss": 0.5274, + "step": 9726 + }, + { + "epoch": 0.62, + "grad_norm": 0.8938130140304565, + "learning_rate": 3.3896859839460155e-06, + "loss": 0.5875, + "step": 9727 + }, + { + "epoch": 0.62, + "grad_norm": 0.9419970512390137, + "learning_rate": 3.3887146908548875e-06, + "loss": 0.6277, + "step": 9728 + }, + { + "epoch": 0.62, + "grad_norm": 0.8735063672065735, + "learning_rate": 3.3877434656093777e-06, + "loss": 0.5373, + "step": 9729 + }, + { + "epoch": 0.62, + "grad_norm": 0.9380084276199341, + "learning_rate": 3.3867723082503807e-06, + "loss": 0.664, + "step": 9730 + }, + { + "epoch": 0.62, + "grad_norm": 0.8645982146263123, + "learning_rate": 3.385801218818792e-06, + "loss": 0.5602, + "step": 9731 + }, + { + "epoch": 0.62, + "grad_norm": 0.9503861665725708, + "learning_rate": 3.384830197355499e-06, + "loss": 0.629, + "step": 9732 + }, + { + "epoch": 0.62, + "grad_norm": 0.875836968421936, + "learning_rate": 3.383859243901385e-06, + "loss": 0.5647, + "step": 9733 + }, + { + "epoch": 0.62, + "grad_norm": 0.841139018535614, + "learning_rate": 3.3828883584973364e-06, + "loss": 0.5904, + "step": 9734 + }, + { + "epoch": 0.62, + "grad_norm": 0.9259316325187683, + "learning_rate": 3.3819175411842353e-06, + "loss": 0.6008, + "step": 9735 + }, + { + "epoch": 0.62, + "grad_norm": 0.8969772458076477, + "learning_rate": 3.3809467920029574e-06, + "loss": 0.5846, + "step": 9736 + }, + { + "epoch": 0.62, + "grad_norm": 0.8963971138000488, + "learning_rate": 3.3799761109943775e-06, + "loss": 0.5553, + "step": 9737 + }, + { + "epoch": 0.62, + "grad_norm": 0.8801413178443909, + "learning_rate": 3.3790054981993683e-06, + "loss": 0.5912, + "step": 9738 + }, + { + "epoch": 0.62, + "grad_norm": 0.9127761125564575, + "learning_rate": 3.3780349536587965e-06, + "loss": 0.5684, + "step": 9739 + }, + { + "epoch": 0.62, + "grad_norm": 0.8743903636932373, + "learning_rate": 3.377064477413533e-06, + "loss": 0.6024, + "step": 9740 + }, + { + "epoch": 0.62, + "grad_norm": 0.8177317380905151, + "learning_rate": 3.376094069504437e-06, + "loss": 0.5123, + "step": 9741 + }, + { + "epoch": 0.62, + "grad_norm": 0.9717310070991516, + "learning_rate": 3.3751237299723715e-06, + "loss": 0.5859, + "step": 9742 + }, + { + "epoch": 0.62, + "grad_norm": 0.9141378402709961, + "learning_rate": 3.3741534588581915e-06, + "loss": 0.6222, + "step": 9743 + }, + { + "epoch": 0.62, + "grad_norm": 0.9228180050849915, + "learning_rate": 3.3731832562027555e-06, + "loss": 0.5765, + "step": 9744 + }, + { + "epoch": 0.62, + "grad_norm": 0.8938828110694885, + "learning_rate": 3.372213122046912e-06, + "loss": 0.5691, + "step": 9745 + }, + { + "epoch": 0.62, + "grad_norm": 0.8958885669708252, + "learning_rate": 3.3712430564315124e-06, + "loss": 0.5607, + "step": 9746 + }, + { + "epoch": 0.62, + "grad_norm": 0.9243308305740356, + "learning_rate": 3.3702730593974e-06, + "loss": 0.5898, + "step": 9747 + }, + { + "epoch": 0.62, + "grad_norm": 0.9135646224021912, + "learning_rate": 3.3693031309854214e-06, + "loss": 0.5836, + "step": 9748 + }, + { + "epoch": 0.62, + "grad_norm": 0.8542222380638123, + "learning_rate": 3.3683332712364138e-06, + "loss": 0.5579, + "step": 9749 + }, + { + "epoch": 0.62, + "grad_norm": 0.9198238849639893, + "learning_rate": 3.3673634801912175e-06, + "loss": 0.5955, + "step": 9750 + }, + { + "epoch": 0.62, + "grad_norm": 0.8537915349006653, + "learning_rate": 3.3663937578906642e-06, + "loss": 0.5355, + "step": 9751 + }, + { + "epoch": 0.62, + "grad_norm": 0.8687244057655334, + "learning_rate": 3.365424104375587e-06, + "loss": 0.5622, + "step": 9752 + }, + { + "epoch": 0.62, + "grad_norm": 0.8461039662361145, + "learning_rate": 3.3644545196868146e-06, + "loss": 0.594, + "step": 9753 + }, + { + "epoch": 0.62, + "grad_norm": 0.8379154801368713, + "learning_rate": 3.3634850038651734e-06, + "loss": 0.6012, + "step": 9754 + }, + { + "epoch": 0.62, + "grad_norm": 0.870664119720459, + "learning_rate": 3.362515556951485e-06, + "loss": 0.552, + "step": 9755 + }, + { + "epoch": 0.62, + "grad_norm": 0.8928505182266235, + "learning_rate": 3.36154617898657e-06, + "loss": 0.5454, + "step": 9756 + }, + { + "epoch": 0.62, + "grad_norm": 0.9355548620223999, + "learning_rate": 3.360576870011246e-06, + "loss": 0.5332, + "step": 9757 + }, + { + "epoch": 0.62, + "grad_norm": 0.910213828086853, + "learning_rate": 3.3596076300663273e-06, + "loss": 0.5907, + "step": 9758 + }, + { + "epoch": 0.62, + "grad_norm": 0.8772505521774292, + "learning_rate": 3.3586384591926235e-06, + "loss": 0.5826, + "step": 9759 + }, + { + "epoch": 0.62, + "grad_norm": 0.932346522808075, + "learning_rate": 3.3576693574309447e-06, + "loss": 0.6092, + "step": 9760 + }, + { + "epoch": 0.62, + "grad_norm": 0.8593097925186157, + "learning_rate": 3.3567003248220966e-06, + "loss": 0.6045, + "step": 9761 + }, + { + "epoch": 0.62, + "grad_norm": 0.8780187368392944, + "learning_rate": 3.355731361406882e-06, + "loss": 0.5656, + "step": 9762 + }, + { + "epoch": 0.62, + "grad_norm": 0.9253993630409241, + "learning_rate": 3.354762467226098e-06, + "loss": 0.5928, + "step": 9763 + }, + { + "epoch": 0.62, + "grad_norm": 0.9301041960716248, + "learning_rate": 3.3537936423205435e-06, + "loss": 0.5931, + "step": 9764 + }, + { + "epoch": 0.62, + "grad_norm": 0.8613686561584473, + "learning_rate": 3.352824886731012e-06, + "loss": 0.528, + "step": 9765 + }, + { + "epoch": 0.62, + "grad_norm": 0.9040000438690186, + "learning_rate": 3.351856200498296e-06, + "loss": 0.6182, + "step": 9766 + }, + { + "epoch": 0.62, + "grad_norm": 0.8548308610916138, + "learning_rate": 3.3508875836631806e-06, + "loss": 0.5758, + "step": 9767 + }, + { + "epoch": 0.62, + "grad_norm": 0.8682760000228882, + "learning_rate": 3.3499190362664523e-06, + "loss": 0.515, + "step": 9768 + }, + { + "epoch": 0.62, + "grad_norm": 0.8267245292663574, + "learning_rate": 3.3489505583488925e-06, + "loss": 0.5448, + "step": 9769 + }, + { + "epoch": 0.62, + "grad_norm": 0.9359412789344788, + "learning_rate": 3.3479821499512823e-06, + "loss": 0.6277, + "step": 9770 + }, + { + "epoch": 0.62, + "grad_norm": 0.9154059290885925, + "learning_rate": 3.347013811114396e-06, + "loss": 0.6081, + "step": 9771 + }, + { + "epoch": 0.62, + "grad_norm": 0.8913496136665344, + "learning_rate": 3.346045541879009e-06, + "loss": 0.5149, + "step": 9772 + }, + { + "epoch": 0.62, + "grad_norm": 0.8516811728477478, + "learning_rate": 3.3450773422858886e-06, + "loss": 0.605, + "step": 9773 + }, + { + "epoch": 0.62, + "grad_norm": 0.854370653629303, + "learning_rate": 3.3441092123758055e-06, + "loss": 0.5382, + "step": 9774 + }, + { + "epoch": 0.62, + "grad_norm": 0.9357313513755798, + "learning_rate": 3.3431411521895228e-06, + "loss": 0.5528, + "step": 9775 + }, + { + "epoch": 0.62, + "grad_norm": 0.9238491058349609, + "learning_rate": 3.342173161767803e-06, + "loss": 0.6366, + "step": 9776 + }, + { + "epoch": 0.62, + "grad_norm": 0.8327895402908325, + "learning_rate": 3.341205241151403e-06, + "loss": 0.5646, + "step": 9777 + }, + { + "epoch": 0.62, + "grad_norm": 0.9005405306816101, + "learning_rate": 3.3402373903810807e-06, + "loss": 0.5825, + "step": 9778 + }, + { + "epoch": 0.62, + "grad_norm": 0.8644494414329529, + "learning_rate": 3.3392696094975875e-06, + "loss": 0.5506, + "step": 9779 + }, + { + "epoch": 0.62, + "grad_norm": 0.8813982009887695, + "learning_rate": 3.338301898541675e-06, + "loss": 0.6004, + "step": 9780 + }, + { + "epoch": 0.62, + "grad_norm": 0.9440781474113464, + "learning_rate": 3.337334257554086e-06, + "loss": 0.6181, + "step": 9781 + }, + { + "epoch": 0.62, + "grad_norm": 0.8736399412155151, + "learning_rate": 3.3363666865755708e-06, + "loss": 0.5939, + "step": 9782 + }, + { + "epoch": 0.62, + "grad_norm": 0.929132342338562, + "learning_rate": 3.335399185646865e-06, + "loss": 0.6048, + "step": 9783 + }, + { + "epoch": 0.62, + "grad_norm": 0.8123360872268677, + "learning_rate": 3.33443175480871e-06, + "loss": 0.5391, + "step": 9784 + }, + { + "epoch": 0.62, + "grad_norm": 0.8572057485580444, + "learning_rate": 3.3334643941018398e-06, + "loss": 0.546, + "step": 9785 + }, + { + "epoch": 0.62, + "grad_norm": 0.8945849537849426, + "learning_rate": 3.3324971035669844e-06, + "loss": 0.6196, + "step": 9786 + }, + { + "epoch": 0.62, + "grad_norm": 0.8958498239517212, + "learning_rate": 3.3315298832448762e-06, + "loss": 0.5671, + "step": 9787 + }, + { + "epoch": 0.62, + "grad_norm": 0.9150652885437012, + "learning_rate": 3.330562733176242e-06, + "loss": 0.5584, + "step": 9788 + }, + { + "epoch": 0.62, + "grad_norm": 0.9838289022445679, + "learning_rate": 3.3295956534018033e-06, + "loss": 0.5973, + "step": 9789 + }, + { + "epoch": 0.62, + "grad_norm": 0.9296243786811829, + "learning_rate": 3.328628643962278e-06, + "loss": 0.5697, + "step": 9790 + }, + { + "epoch": 0.62, + "grad_norm": 0.8552951216697693, + "learning_rate": 3.3276617048983876e-06, + "loss": 0.5475, + "step": 9791 + }, + { + "epoch": 0.62, + "grad_norm": 0.958833634853363, + "learning_rate": 3.326694836250847e-06, + "loss": 0.615, + "step": 9792 + }, + { + "epoch": 0.62, + "grad_norm": 0.9096387028694153, + "learning_rate": 3.325728038060365e-06, + "loss": 0.5835, + "step": 9793 + }, + { + "epoch": 0.62, + "grad_norm": 0.9228499531745911, + "learning_rate": 3.324761310367649e-06, + "loss": 0.5948, + "step": 9794 + }, + { + "epoch": 0.62, + "grad_norm": 0.9237566590309143, + "learning_rate": 3.3237946532134077e-06, + "loss": 0.5998, + "step": 9795 + }, + { + "epoch": 0.62, + "grad_norm": 0.8698447942733765, + "learning_rate": 3.322828066638343e-06, + "loss": 0.5345, + "step": 9796 + }, + { + "epoch": 0.62, + "grad_norm": 0.9300669431686401, + "learning_rate": 3.321861550683154e-06, + "loss": 0.6291, + "step": 9797 + }, + { + "epoch": 0.62, + "grad_norm": 0.9495216012001038, + "learning_rate": 3.3208951053885367e-06, + "loss": 0.6271, + "step": 9798 + }, + { + "epoch": 0.62, + "grad_norm": 0.8946996331214905, + "learning_rate": 3.3199287307951844e-06, + "loss": 0.5963, + "step": 9799 + }, + { + "epoch": 0.62, + "grad_norm": 0.8824841976165771, + "learning_rate": 3.31896242694379e-06, + "loss": 0.5869, + "step": 9800 + }, + { + "epoch": 0.62, + "grad_norm": 0.9500332474708557, + "learning_rate": 3.317996193875041e-06, + "loss": 0.6316, + "step": 9801 + }, + { + "epoch": 0.62, + "grad_norm": 0.8856709599494934, + "learning_rate": 3.3170300316296194e-06, + "loss": 0.6012, + "step": 9802 + }, + { + "epoch": 0.62, + "grad_norm": 0.8249387741088867, + "learning_rate": 3.3160639402482077e-06, + "loss": 0.5547, + "step": 9803 + }, + { + "epoch": 0.62, + "grad_norm": 0.9641744494438171, + "learning_rate": 3.3150979197714874e-06, + "loss": 0.573, + "step": 9804 + }, + { + "epoch": 0.62, + "grad_norm": 0.9111761450767517, + "learning_rate": 3.314131970240132e-06, + "loss": 0.6098, + "step": 9805 + }, + { + "epoch": 0.62, + "grad_norm": 0.8709757924079895, + "learning_rate": 3.3131660916948137e-06, + "loss": 0.6311, + "step": 9806 + }, + { + "epoch": 0.62, + "grad_norm": 0.8643493056297302, + "learning_rate": 3.3122002841762023e-06, + "loss": 0.6367, + "step": 9807 + }, + { + "epoch": 0.62, + "grad_norm": 0.9116371870040894, + "learning_rate": 3.311234547724968e-06, + "loss": 0.6311, + "step": 9808 + }, + { + "epoch": 0.62, + "grad_norm": 0.8542858958244324, + "learning_rate": 3.310268882381772e-06, + "loss": 0.5615, + "step": 9809 + }, + { + "epoch": 0.62, + "grad_norm": 0.8505398035049438, + "learning_rate": 3.3093032881872738e-06, + "loss": 0.537, + "step": 9810 + }, + { + "epoch": 0.62, + "grad_norm": 0.9205043315887451, + "learning_rate": 3.3083377651821314e-06, + "loss": 0.5719, + "step": 9811 + }, + { + "epoch": 0.62, + "grad_norm": 0.8506627082824707, + "learning_rate": 3.3073723134070033e-06, + "loss": 0.5692, + "step": 9812 + }, + { + "epoch": 0.62, + "grad_norm": 0.8526927828788757, + "learning_rate": 3.3064069329025394e-06, + "loss": 0.602, + "step": 9813 + }, + { + "epoch": 0.62, + "grad_norm": 0.8983103036880493, + "learning_rate": 3.3054416237093863e-06, + "loss": 0.6207, + "step": 9814 + }, + { + "epoch": 0.62, + "grad_norm": 0.8063021898269653, + "learning_rate": 3.3044763858681928e-06, + "loss": 0.5113, + "step": 9815 + }, + { + "epoch": 0.62, + "grad_norm": 0.9379715323448181, + "learning_rate": 3.303511219419598e-06, + "loss": 0.603, + "step": 9816 + }, + { + "epoch": 0.62, + "grad_norm": 0.8616729378700256, + "learning_rate": 3.3025461244042462e-06, + "loss": 0.5416, + "step": 9817 + }, + { + "epoch": 0.62, + "grad_norm": 0.8878458142280579, + "learning_rate": 3.3015811008627707e-06, + "loss": 0.597, + "step": 9818 + }, + { + "epoch": 0.62, + "grad_norm": 0.9654011726379395, + "learning_rate": 3.3006161488358084e-06, + "loss": 0.5782, + "step": 9819 + }, + { + "epoch": 0.62, + "grad_norm": 0.8611358404159546, + "learning_rate": 3.299651268363986e-06, + "loss": 0.5884, + "step": 9820 + }, + { + "epoch": 0.62, + "grad_norm": 0.8918520212173462, + "learning_rate": 3.298686459487936e-06, + "loss": 0.5348, + "step": 9821 + }, + { + "epoch": 0.62, + "grad_norm": 0.8771730065345764, + "learning_rate": 3.2977217222482794e-06, + "loss": 0.5562, + "step": 9822 + }, + { + "epoch": 0.62, + "grad_norm": 0.8952116966247559, + "learning_rate": 3.29675705668564e-06, + "loss": 0.5843, + "step": 9823 + }, + { + "epoch": 0.62, + "grad_norm": 0.7943683862686157, + "learning_rate": 3.2957924628406347e-06, + "loss": 0.4923, + "step": 9824 + }, + { + "epoch": 0.62, + "grad_norm": 0.8794922828674316, + "learning_rate": 3.2948279407538813e-06, + "loss": 0.5498, + "step": 9825 + }, + { + "epoch": 0.62, + "grad_norm": 0.8752865195274353, + "learning_rate": 3.2938634904659903e-06, + "loss": 0.563, + "step": 9826 + }, + { + "epoch": 0.62, + "grad_norm": 0.8498966097831726, + "learning_rate": 3.2928991120175747e-06, + "loss": 0.5738, + "step": 9827 + }, + { + "epoch": 0.62, + "grad_norm": 0.9079285860061646, + "learning_rate": 3.2919348054492363e-06, + "loss": 0.5089, + "step": 9828 + }, + { + "epoch": 0.62, + "grad_norm": 0.9044334292411804, + "learning_rate": 3.2909705708015834e-06, + "loss": 0.6078, + "step": 9829 + }, + { + "epoch": 0.62, + "grad_norm": 0.8988425135612488, + "learning_rate": 3.290006408115213e-06, + "loss": 0.6227, + "step": 9830 + }, + { + "epoch": 0.62, + "grad_norm": 0.914106011390686, + "learning_rate": 3.289042317430726e-06, + "loss": 0.5588, + "step": 9831 + }, + { + "epoch": 0.62, + "grad_norm": 0.8243867754936218, + "learning_rate": 3.2880782987887128e-06, + "loss": 0.5413, + "step": 9832 + }, + { + "epoch": 0.62, + "grad_norm": 0.8767701983451843, + "learning_rate": 3.2871143522297672e-06, + "loss": 0.5734, + "step": 9833 + }, + { + "epoch": 0.62, + "grad_norm": 0.8820706605911255, + "learning_rate": 3.286150477794479e-06, + "loss": 0.5971, + "step": 9834 + }, + { + "epoch": 0.62, + "grad_norm": 0.8267252445220947, + "learning_rate": 3.2851866755234324e-06, + "loss": 0.584, + "step": 9835 + }, + { + "epoch": 0.62, + "grad_norm": 0.8427024483680725, + "learning_rate": 3.2842229454572084e-06, + "loss": 0.571, + "step": 9836 + }, + { + "epoch": 0.62, + "grad_norm": 0.9389393925666809, + "learning_rate": 3.2832592876363866e-06, + "loss": 0.6084, + "step": 9837 + }, + { + "epoch": 0.62, + "grad_norm": 0.9303972125053406, + "learning_rate": 3.2822957021015455e-06, + "loss": 0.5381, + "step": 9838 + }, + { + "epoch": 0.62, + "grad_norm": 0.9479062557220459, + "learning_rate": 3.2813321888932573e-06, + "loss": 0.5407, + "step": 9839 + }, + { + "epoch": 0.62, + "grad_norm": 0.808933436870575, + "learning_rate": 3.2803687480520905e-06, + "loss": 0.5186, + "step": 9840 + }, + { + "epoch": 0.62, + "grad_norm": 0.9015218019485474, + "learning_rate": 3.279405379618613e-06, + "loss": 0.5446, + "step": 9841 + }, + { + "epoch": 0.62, + "grad_norm": 0.8663600087165833, + "learning_rate": 3.278442083633392e-06, + "loss": 0.5311, + "step": 9842 + }, + { + "epoch": 0.62, + "grad_norm": 0.94172203540802, + "learning_rate": 3.277478860136985e-06, + "loss": 0.6051, + "step": 9843 + }, + { + "epoch": 0.62, + "grad_norm": 0.8853269219398499, + "learning_rate": 3.276515709169951e-06, + "loss": 0.5771, + "step": 9844 + }, + { + "epoch": 0.62, + "grad_norm": 0.8446550965309143, + "learning_rate": 3.2755526307728447e-06, + "loss": 0.5803, + "step": 9845 + }, + { + "epoch": 0.62, + "grad_norm": 0.8651383519172668, + "learning_rate": 3.2745896249862166e-06, + "loss": 0.5394, + "step": 9846 + }, + { + "epoch": 0.62, + "grad_norm": 0.923086941242218, + "learning_rate": 3.2736266918506195e-06, + "loss": 0.6304, + "step": 9847 + }, + { + "epoch": 0.62, + "grad_norm": 0.9140406847000122, + "learning_rate": 3.272663831406595e-06, + "loss": 0.6575, + "step": 9848 + }, + { + "epoch": 0.62, + "grad_norm": 0.8631356358528137, + "learning_rate": 3.2717010436946894e-06, + "loss": 0.577, + "step": 9849 + }, + { + "epoch": 0.62, + "grad_norm": 1.065402626991272, + "learning_rate": 3.270738328755438e-06, + "loss": 0.6418, + "step": 9850 + }, + { + "epoch": 0.62, + "grad_norm": 0.9519265294075012, + "learning_rate": 3.269775686629383e-06, + "loss": 0.6477, + "step": 9851 + }, + { + "epoch": 0.62, + "grad_norm": 0.9092915058135986, + "learning_rate": 3.2688131173570523e-06, + "loss": 0.6198, + "step": 9852 + }, + { + "epoch": 0.62, + "grad_norm": 0.9024431109428406, + "learning_rate": 3.2678506209789805e-06, + "loss": 0.6181, + "step": 9853 + }, + { + "epoch": 0.62, + "grad_norm": 0.8769482970237732, + "learning_rate": 3.2668881975356915e-06, + "loss": 0.5536, + "step": 9854 + }, + { + "epoch": 0.62, + "grad_norm": 0.9238791465759277, + "learning_rate": 3.2659258470677137e-06, + "loss": 0.6364, + "step": 9855 + }, + { + "epoch": 0.62, + "grad_norm": 0.7877947092056274, + "learning_rate": 3.2649635696155646e-06, + "loss": 0.4979, + "step": 9856 + }, + { + "epoch": 0.62, + "grad_norm": 0.84283447265625, + "learning_rate": 3.2640013652197654e-06, + "loss": 0.5688, + "step": 9857 + }, + { + "epoch": 0.62, + "grad_norm": 0.8760327100753784, + "learning_rate": 3.263039233920827e-06, + "loss": 0.616, + "step": 9858 + }, + { + "epoch": 0.62, + "grad_norm": 0.9331052303314209, + "learning_rate": 3.2620771757592663e-06, + "loss": 0.5612, + "step": 9859 + }, + { + "epoch": 0.62, + "grad_norm": 0.9472546577453613, + "learning_rate": 3.261115190775589e-06, + "loss": 0.5579, + "step": 9860 + }, + { + "epoch": 0.62, + "grad_norm": 0.9176338315010071, + "learning_rate": 3.2601532790103026e-06, + "loss": 0.5493, + "step": 9861 + }, + { + "epoch": 0.62, + "grad_norm": 0.9220177531242371, + "learning_rate": 3.259191440503909e-06, + "loss": 0.6003, + "step": 9862 + }, + { + "epoch": 0.62, + "grad_norm": 0.9254795908927917, + "learning_rate": 3.258229675296907e-06, + "loss": 0.607, + "step": 9863 + }, + { + "epoch": 0.62, + "grad_norm": 0.9141079187393188, + "learning_rate": 3.257267983429794e-06, + "loss": 0.6196, + "step": 9864 + }, + { + "epoch": 0.62, + "grad_norm": 0.8655530214309692, + "learning_rate": 3.2563063649430647e-06, + "loss": 0.5738, + "step": 9865 + }, + { + "epoch": 0.63, + "grad_norm": 0.9000252485275269, + "learning_rate": 3.25534481987721e-06, + "loss": 0.6145, + "step": 9866 + }, + { + "epoch": 0.63, + "grad_norm": 0.9283547401428223, + "learning_rate": 3.2543833482727123e-06, + "loss": 0.6134, + "step": 9867 + }, + { + "epoch": 0.63, + "grad_norm": 0.8959989547729492, + "learning_rate": 3.2534219501700603e-06, + "loss": 0.5565, + "step": 9868 + }, + { + "epoch": 0.63, + "grad_norm": 0.8951772451400757, + "learning_rate": 3.252460625609736e-06, + "loss": 0.5718, + "step": 9869 + }, + { + "epoch": 0.63, + "grad_norm": 0.9525763988494873, + "learning_rate": 3.251499374632214e-06, + "loss": 0.5916, + "step": 9870 + }, + { + "epoch": 0.63, + "grad_norm": 0.8921371102333069, + "learning_rate": 3.2505381972779704e-06, + "loss": 0.5674, + "step": 9871 + }, + { + "epoch": 0.63, + "grad_norm": 0.8959813714027405, + "learning_rate": 3.249577093587477e-06, + "loss": 0.5605, + "step": 9872 + }, + { + "epoch": 0.63, + "grad_norm": 0.9102842807769775, + "learning_rate": 3.2486160636012054e-06, + "loss": 0.6122, + "step": 9873 + }, + { + "epoch": 0.63, + "grad_norm": 0.8645837306976318, + "learning_rate": 3.2476551073596173e-06, + "loss": 0.5871, + "step": 9874 + }, + { + "epoch": 0.63, + "grad_norm": 0.9292948246002197, + "learning_rate": 3.246694224903175e-06, + "loss": 0.563, + "step": 9875 + }, + { + "epoch": 0.63, + "grad_norm": 0.9219769835472107, + "learning_rate": 3.245733416272341e-06, + "loss": 0.5999, + "step": 9876 + }, + { + "epoch": 0.63, + "grad_norm": 0.9211458563804626, + "learning_rate": 3.2447726815075715e-06, + "loss": 0.5903, + "step": 9877 + }, + { + "epoch": 0.63, + "grad_norm": 0.8986589312553406, + "learning_rate": 3.243812020649318e-06, + "loss": 0.5675, + "step": 9878 + }, + { + "epoch": 0.63, + "grad_norm": 0.9727473855018616, + "learning_rate": 3.24285143373803e-06, + "loss": 0.6265, + "step": 9879 + }, + { + "epoch": 0.63, + "grad_norm": 0.9438537955284119, + "learning_rate": 3.241890920814154e-06, + "loss": 0.5264, + "step": 9880 + }, + { + "epoch": 0.63, + "grad_norm": 0.9068976640701294, + "learning_rate": 3.2409304819181377e-06, + "loss": 0.5723, + "step": 9881 + }, + { + "epoch": 0.63, + "grad_norm": 0.8970639705657959, + "learning_rate": 3.2399701170904197e-06, + "loss": 0.5673, + "step": 9882 + }, + { + "epoch": 0.63, + "grad_norm": 0.8697466850280762, + "learning_rate": 3.239009826371436e-06, + "loss": 0.5677, + "step": 9883 + }, + { + "epoch": 0.63, + "grad_norm": 0.8627969622612, + "learning_rate": 3.238049609801621e-06, + "loss": 0.5892, + "step": 9884 + }, + { + "epoch": 0.63, + "grad_norm": 0.8970100283622742, + "learning_rate": 3.2370894674214102e-06, + "loss": 0.5913, + "step": 9885 + }, + { + "epoch": 0.63, + "grad_norm": 0.811985969543457, + "learning_rate": 3.2361293992712295e-06, + "loss": 0.4866, + "step": 9886 + }, + { + "epoch": 0.63, + "grad_norm": 0.8113346099853516, + "learning_rate": 3.2351694053915027e-06, + "loss": 0.5692, + "step": 9887 + }, + { + "epoch": 0.63, + "grad_norm": 0.9210705757141113, + "learning_rate": 3.2342094858226514e-06, + "loss": 0.6221, + "step": 9888 + }, + { + "epoch": 0.63, + "grad_norm": 0.8682329654693604, + "learning_rate": 3.233249640605098e-06, + "loss": 0.5479, + "step": 9889 + }, + { + "epoch": 0.63, + "grad_norm": 0.8741553425788879, + "learning_rate": 3.232289869779256e-06, + "loss": 0.5479, + "step": 9890 + }, + { + "epoch": 0.63, + "grad_norm": 0.8522763252258301, + "learning_rate": 3.231330173385537e-06, + "loss": 0.565, + "step": 9891 + }, + { + "epoch": 0.63, + "grad_norm": 0.8624328374862671, + "learning_rate": 3.2303705514643537e-06, + "loss": 0.6031, + "step": 9892 + }, + { + "epoch": 0.63, + "grad_norm": 0.952092707157135, + "learning_rate": 3.229411004056108e-06, + "loss": 0.5751, + "step": 9893 + }, + { + "epoch": 0.63, + "grad_norm": 0.9104188084602356, + "learning_rate": 3.2284515312012056e-06, + "loss": 0.5759, + "step": 9894 + }, + { + "epoch": 0.63, + "grad_norm": 0.8913605213165283, + "learning_rate": 3.2274921329400484e-06, + "loss": 0.6126, + "step": 9895 + }, + { + "epoch": 0.63, + "grad_norm": 0.8454800844192505, + "learning_rate": 3.226532809313031e-06, + "loss": 0.5386, + "step": 9896 + }, + { + "epoch": 0.63, + "grad_norm": 0.9178531765937805, + "learning_rate": 3.2255735603605454e-06, + "loss": 0.6037, + "step": 9897 + }, + { + "epoch": 0.63, + "grad_norm": 0.9100960493087769, + "learning_rate": 3.2246143861229857e-06, + "loss": 0.5579, + "step": 9898 + }, + { + "epoch": 0.63, + "grad_norm": 0.8924016952514648, + "learning_rate": 3.223655286640739e-06, + "loss": 0.5699, + "step": 9899 + }, + { + "epoch": 0.63, + "grad_norm": 0.9032720327377319, + "learning_rate": 3.2226962619541885e-06, + "loss": 0.5988, + "step": 9900 + }, + { + "epoch": 0.63, + "grad_norm": 0.8333504796028137, + "learning_rate": 3.221737312103714e-06, + "loss": 0.5045, + "step": 9901 + }, + { + "epoch": 0.63, + "grad_norm": 0.8808243274688721, + "learning_rate": 3.2207784371296957e-06, + "loss": 0.6074, + "step": 9902 + }, + { + "epoch": 0.63, + "grad_norm": 0.906588613986969, + "learning_rate": 3.2198196370725095e-06, + "loss": 0.6131, + "step": 9903 + }, + { + "epoch": 0.63, + "grad_norm": 0.9039662480354309, + "learning_rate": 3.218860911972525e-06, + "loss": 0.637, + "step": 9904 + }, + { + "epoch": 0.63, + "grad_norm": 0.9129331707954407, + "learning_rate": 3.2179022618701093e-06, + "loss": 0.624, + "step": 9905 + }, + { + "epoch": 0.63, + "grad_norm": 0.904314398765564, + "learning_rate": 3.2169436868056316e-06, + "loss": 0.5856, + "step": 9906 + }, + { + "epoch": 0.63, + "grad_norm": 0.8561462163925171, + "learning_rate": 3.215985186819453e-06, + "loss": 0.6, + "step": 9907 + }, + { + "epoch": 0.63, + "grad_norm": 0.8671022653579712, + "learning_rate": 3.2150267619519326e-06, + "loss": 0.5943, + "step": 9908 + }, + { + "epoch": 0.63, + "grad_norm": 0.895698070526123, + "learning_rate": 3.214068412243424e-06, + "loss": 0.5647, + "step": 9909 + }, + { + "epoch": 0.63, + "grad_norm": 0.8691821694374084, + "learning_rate": 3.213110137734281e-06, + "loss": 0.6168, + "step": 9910 + }, + { + "epoch": 0.63, + "grad_norm": 0.8884726166725159, + "learning_rate": 3.2121519384648558e-06, + "loss": 0.6183, + "step": 9911 + }, + { + "epoch": 0.63, + "grad_norm": 0.8356814980506897, + "learning_rate": 3.211193814475494e-06, + "loss": 0.5989, + "step": 9912 + }, + { + "epoch": 0.63, + "grad_norm": 0.8705270290374756, + "learning_rate": 3.2102357658065357e-06, + "loss": 0.6103, + "step": 9913 + }, + { + "epoch": 0.63, + "grad_norm": 0.9131333827972412, + "learning_rate": 3.2092777924983224e-06, + "loss": 0.6342, + "step": 9914 + }, + { + "epoch": 0.63, + "grad_norm": 0.9165261387825012, + "learning_rate": 3.208319894591194e-06, + "loss": 0.6076, + "step": 9915 + }, + { + "epoch": 0.63, + "grad_norm": 0.858545184135437, + "learning_rate": 3.207362072125482e-06, + "loss": 0.5796, + "step": 9916 + }, + { + "epoch": 0.63, + "grad_norm": 0.8806081414222717, + "learning_rate": 3.2064043251415166e-06, + "loss": 0.5396, + "step": 9917 + }, + { + "epoch": 0.63, + "grad_norm": 0.9189614057540894, + "learning_rate": 3.2054466536796236e-06, + "loss": 0.5817, + "step": 9918 + }, + { + "epoch": 0.63, + "grad_norm": 0.9014858603477478, + "learning_rate": 3.2044890577801317e-06, + "loss": 0.5843, + "step": 9919 + }, + { + "epoch": 0.63, + "grad_norm": 0.9093121886253357, + "learning_rate": 3.2035315374833596e-06, + "loss": 0.5669, + "step": 9920 + }, + { + "epoch": 0.63, + "grad_norm": 0.8906499743461609, + "learning_rate": 3.2025740928296235e-06, + "loss": 0.5766, + "step": 9921 + }, + { + "epoch": 0.63, + "grad_norm": 0.9178594350814819, + "learning_rate": 3.201616723859241e-06, + "loss": 0.6233, + "step": 9922 + }, + { + "epoch": 0.63, + "grad_norm": 0.8954256772994995, + "learning_rate": 3.20065943061252e-06, + "loss": 0.6048, + "step": 9923 + }, + { + "epoch": 0.63, + "grad_norm": 0.8136070370674133, + "learning_rate": 3.199702213129773e-06, + "loss": 0.5394, + "step": 9924 + }, + { + "epoch": 0.63, + "grad_norm": 0.8871577382087708, + "learning_rate": 3.1987450714513018e-06, + "loss": 0.5906, + "step": 9925 + }, + { + "epoch": 0.63, + "grad_norm": 0.8907647728919983, + "learning_rate": 3.1977880056174105e-06, + "loss": 0.5741, + "step": 9926 + }, + { + "epoch": 0.63, + "grad_norm": 0.8544868230819702, + "learning_rate": 3.196831015668396e-06, + "loss": 0.5634, + "step": 9927 + }, + { + "epoch": 0.63, + "grad_norm": 0.9441981911659241, + "learning_rate": 3.195874101644555e-06, + "loss": 0.5918, + "step": 9928 + }, + { + "epoch": 0.63, + "grad_norm": 0.8469243049621582, + "learning_rate": 3.194917263586179e-06, + "loss": 0.5407, + "step": 9929 + }, + { + "epoch": 0.63, + "grad_norm": 0.8396049737930298, + "learning_rate": 3.1939605015335588e-06, + "loss": 0.5383, + "step": 9930 + }, + { + "epoch": 0.63, + "grad_norm": 0.8566557168960571, + "learning_rate": 3.193003815526977e-06, + "loss": 0.5878, + "step": 9931 + }, + { + "epoch": 0.63, + "grad_norm": 0.9029106497764587, + "learning_rate": 3.192047205606721e-06, + "loss": 0.6431, + "step": 9932 + }, + { + "epoch": 0.63, + "grad_norm": 0.9187177419662476, + "learning_rate": 3.1910906718130665e-06, + "loss": 0.5857, + "step": 9933 + }, + { + "epoch": 0.63, + "grad_norm": 0.8693289756774902, + "learning_rate": 3.1901342141862917e-06, + "loss": 0.5984, + "step": 9934 + }, + { + "epoch": 0.63, + "grad_norm": 0.9296219944953918, + "learning_rate": 3.1891778327666673e-06, + "loss": 0.6454, + "step": 9935 + }, + { + "epoch": 0.63, + "grad_norm": 0.944770097732544, + "learning_rate": 3.1882215275944673e-06, + "loss": 0.6541, + "step": 9936 + }, + { + "epoch": 0.63, + "grad_norm": 0.8502100706100464, + "learning_rate": 3.187265298709954e-06, + "loss": 0.5676, + "step": 9937 + }, + { + "epoch": 0.63, + "grad_norm": 0.8540067076683044, + "learning_rate": 3.1863091461533945e-06, + "loss": 0.5624, + "step": 9938 + }, + { + "epoch": 0.63, + "grad_norm": 0.8409416079521179, + "learning_rate": 3.1853530699650483e-06, + "loss": 0.5844, + "step": 9939 + }, + { + "epoch": 0.63, + "grad_norm": 0.858970046043396, + "learning_rate": 3.184397070185169e-06, + "loss": 0.6213, + "step": 9940 + }, + { + "epoch": 0.63, + "grad_norm": 0.8982256054878235, + "learning_rate": 3.183441146854014e-06, + "loss": 0.5477, + "step": 9941 + }, + { + "epoch": 0.63, + "grad_norm": 0.924256443977356, + "learning_rate": 3.182485300011834e-06, + "loss": 0.6534, + "step": 9942 + }, + { + "epoch": 0.63, + "grad_norm": 0.8575473427772522, + "learning_rate": 3.181529529698875e-06, + "loss": 0.5467, + "step": 9943 + }, + { + "epoch": 0.63, + "grad_norm": 0.8267804980278015, + "learning_rate": 3.1805738359553796e-06, + "loss": 0.5687, + "step": 9944 + }, + { + "epoch": 0.63, + "grad_norm": 0.8258667588233948, + "learning_rate": 3.1796182188215917e-06, + "loss": 0.5367, + "step": 9945 + }, + { + "epoch": 0.63, + "grad_norm": 0.9159985184669495, + "learning_rate": 3.1786626783377494e-06, + "loss": 0.576, + "step": 9946 + }, + { + "epoch": 0.63, + "grad_norm": 0.8569443225860596, + "learning_rate": 3.177707214544086e-06, + "loss": 0.5744, + "step": 9947 + }, + { + "epoch": 0.63, + "grad_norm": 0.8471035957336426, + "learning_rate": 3.1767518274808298e-06, + "loss": 0.5411, + "step": 9948 + }, + { + "epoch": 0.63, + "grad_norm": 0.8953260779380798, + "learning_rate": 3.175796517188212e-06, + "loss": 0.6003, + "step": 9949 + }, + { + "epoch": 0.63, + "grad_norm": 0.868668258190155, + "learning_rate": 3.174841283706459e-06, + "loss": 0.5516, + "step": 9950 + }, + { + "epoch": 0.63, + "grad_norm": 0.8107344508171082, + "learning_rate": 3.17388612707579e-06, + "loss": 0.5666, + "step": 9951 + }, + { + "epoch": 0.63, + "grad_norm": 0.9241723418235779, + "learning_rate": 3.172931047336421e-06, + "loss": 0.5836, + "step": 9952 + }, + { + "epoch": 0.63, + "grad_norm": 0.8751961588859558, + "learning_rate": 3.1719760445285712e-06, + "loss": 0.5113, + "step": 9953 + }, + { + "epoch": 0.63, + "grad_norm": 0.9166142344474792, + "learning_rate": 3.1710211186924524e-06, + "loss": 0.555, + "step": 9954 + }, + { + "epoch": 0.63, + "grad_norm": 0.889083981513977, + "learning_rate": 3.170066269868271e-06, + "loss": 0.5905, + "step": 9955 + }, + { + "epoch": 0.63, + "grad_norm": 0.905120313167572, + "learning_rate": 3.169111498096232e-06, + "loss": 0.5809, + "step": 9956 + }, + { + "epoch": 0.63, + "grad_norm": 0.852555513381958, + "learning_rate": 3.1681568034165383e-06, + "loss": 0.5447, + "step": 9957 + }, + { + "epoch": 0.63, + "grad_norm": 0.8818122148513794, + "learning_rate": 3.167202185869391e-06, + "loss": 0.5774, + "step": 9958 + }, + { + "epoch": 0.63, + "grad_norm": 0.9433296918869019, + "learning_rate": 3.166247645494982e-06, + "loss": 0.6099, + "step": 9959 + }, + { + "epoch": 0.63, + "grad_norm": 0.9000284671783447, + "learning_rate": 3.1652931823335074e-06, + "loss": 0.5622, + "step": 9960 + }, + { + "epoch": 0.63, + "grad_norm": 0.9485234618186951, + "learning_rate": 3.164338796425152e-06, + "loss": 0.6129, + "step": 9961 + }, + { + "epoch": 0.63, + "grad_norm": 0.8851210474967957, + "learning_rate": 3.163384487810106e-06, + "loss": 0.542, + "step": 9962 + }, + { + "epoch": 0.63, + "grad_norm": 0.8798405528068542, + "learning_rate": 3.162430256528549e-06, + "loss": 0.5844, + "step": 9963 + }, + { + "epoch": 0.63, + "grad_norm": 0.921736478805542, + "learning_rate": 3.161476102620663e-06, + "loss": 0.6119, + "step": 9964 + }, + { + "epoch": 0.63, + "grad_norm": 0.9609774947166443, + "learning_rate": 3.16052202612662e-06, + "loss": 0.5531, + "step": 9965 + }, + { + "epoch": 0.63, + "grad_norm": 0.8847622275352478, + "learning_rate": 3.159568027086598e-06, + "loss": 0.6304, + "step": 9966 + }, + { + "epoch": 0.63, + "grad_norm": 0.9161363244056702, + "learning_rate": 3.1586141055407627e-06, + "loss": 0.6271, + "step": 9967 + }, + { + "epoch": 0.63, + "grad_norm": 0.8306808471679688, + "learning_rate": 3.157660261529283e-06, + "loss": 0.5713, + "step": 9968 + }, + { + "epoch": 0.63, + "grad_norm": 0.932395875453949, + "learning_rate": 3.15670649509232e-06, + "loss": 0.5708, + "step": 9969 + }, + { + "epoch": 0.63, + "grad_norm": 0.890895426273346, + "learning_rate": 3.155752806270033e-06, + "loss": 0.6783, + "step": 9970 + }, + { + "epoch": 0.63, + "grad_norm": 0.8662253618240356, + "learning_rate": 3.1547991951025795e-06, + "loss": 0.5452, + "step": 9971 + }, + { + "epoch": 0.63, + "grad_norm": 0.9375318884849548, + "learning_rate": 3.153845661630115e-06, + "loss": 0.6196, + "step": 9972 + }, + { + "epoch": 0.63, + "grad_norm": 0.8994795680046082, + "learning_rate": 3.152892205892787e-06, + "loss": 0.5902, + "step": 9973 + }, + { + "epoch": 0.63, + "grad_norm": 0.890771210193634, + "learning_rate": 3.15193882793074e-06, + "loss": 0.5948, + "step": 9974 + }, + { + "epoch": 0.63, + "grad_norm": 0.8573660254478455, + "learning_rate": 3.150985527784122e-06, + "loss": 0.5463, + "step": 9975 + }, + { + "epoch": 0.63, + "grad_norm": 0.8332209587097168, + "learning_rate": 3.1500323054930715e-06, + "loss": 0.5577, + "step": 9976 + }, + { + "epoch": 0.63, + "grad_norm": 0.9283886551856995, + "learning_rate": 3.149079161097725e-06, + "loss": 0.5936, + "step": 9977 + }, + { + "epoch": 0.63, + "grad_norm": 0.8500183820724487, + "learning_rate": 3.1481260946382143e-06, + "loss": 0.5424, + "step": 9978 + }, + { + "epoch": 0.63, + "grad_norm": 0.8809803128242493, + "learning_rate": 3.147173106154673e-06, + "loss": 0.6419, + "step": 9979 + }, + { + "epoch": 0.63, + "grad_norm": 0.8598153591156006, + "learning_rate": 3.146220195687227e-06, + "loss": 0.6031, + "step": 9980 + }, + { + "epoch": 0.63, + "grad_norm": 0.8905846476554871, + "learning_rate": 3.145267363276e-06, + "loss": 0.5879, + "step": 9981 + }, + { + "epoch": 0.63, + "grad_norm": 0.8749983906745911, + "learning_rate": 3.1443146089611102e-06, + "loss": 0.566, + "step": 9982 + }, + { + "epoch": 0.63, + "grad_norm": 0.8304601907730103, + "learning_rate": 3.143361932782678e-06, + "loss": 0.5731, + "step": 9983 + }, + { + "epoch": 0.63, + "grad_norm": 0.8867066502571106, + "learning_rate": 3.142409334780817e-06, + "loss": 0.5504, + "step": 9984 + }, + { + "epoch": 0.63, + "grad_norm": 0.8217571377754211, + "learning_rate": 3.1414568149956366e-06, + "loss": 0.4873, + "step": 9985 + }, + { + "epoch": 0.63, + "grad_norm": 0.8885734677314758, + "learning_rate": 3.1405043734672436e-06, + "loss": 0.5873, + "step": 9986 + }, + { + "epoch": 0.63, + "grad_norm": 0.8465083241462708, + "learning_rate": 3.1395520102357413e-06, + "loss": 0.5652, + "step": 9987 + }, + { + "epoch": 0.63, + "grad_norm": 0.9155653119087219, + "learning_rate": 3.1385997253412336e-06, + "loss": 0.5387, + "step": 9988 + }, + { + "epoch": 0.63, + "grad_norm": 0.9237584471702576, + "learning_rate": 3.137647518823817e-06, + "loss": 0.5978, + "step": 9989 + }, + { + "epoch": 0.63, + "grad_norm": 0.9412940144538879, + "learning_rate": 3.136695390723583e-06, + "loss": 0.6437, + "step": 9990 + }, + { + "epoch": 0.63, + "grad_norm": 0.9255321025848389, + "learning_rate": 3.135743341080624e-06, + "loss": 0.59, + "step": 9991 + }, + { + "epoch": 0.63, + "grad_norm": 0.9065369367599487, + "learning_rate": 3.1347913699350286e-06, + "loss": 0.5753, + "step": 9992 + }, + { + "epoch": 0.63, + "grad_norm": 0.8333830237388611, + "learning_rate": 3.1338394773268805e-06, + "loss": 0.5217, + "step": 9993 + }, + { + "epoch": 0.63, + "grad_norm": 0.9209916591644287, + "learning_rate": 3.132887663296259e-06, + "loss": 0.6099, + "step": 9994 + }, + { + "epoch": 0.63, + "grad_norm": 0.9044961929321289, + "learning_rate": 3.131935927883242e-06, + "loss": 0.566, + "step": 9995 + }, + { + "epoch": 0.63, + "grad_norm": 0.9058372378349304, + "learning_rate": 3.1309842711279066e-06, + "loss": 0.5774, + "step": 9996 + }, + { + "epoch": 0.63, + "grad_norm": 0.8610040545463562, + "learning_rate": 3.130032693070322e-06, + "loss": 0.5434, + "step": 9997 + }, + { + "epoch": 0.63, + "grad_norm": 0.894743800163269, + "learning_rate": 3.129081193750554e-06, + "loss": 0.5637, + "step": 9998 + }, + { + "epoch": 0.63, + "grad_norm": 0.9117133021354675, + "learning_rate": 3.1281297732086666e-06, + "loss": 0.5844, + "step": 9999 + }, + { + "epoch": 0.63, + "grad_norm": 0.9719625115394592, + "learning_rate": 3.1271784314847266e-06, + "loss": 0.6132, + "step": 10000 + }, + { + "epoch": 0.63, + "grad_norm": 0.8312113881111145, + "learning_rate": 3.126227168618786e-06, + "loss": 0.5298, + "step": 10001 + }, + { + "epoch": 0.63, + "grad_norm": 0.85428786277771, + "learning_rate": 3.1252759846509013e-06, + "loss": 0.5784, + "step": 10002 + }, + { + "epoch": 0.63, + "grad_norm": 0.8149659633636475, + "learning_rate": 3.1243248796211234e-06, + "loss": 0.5674, + "step": 10003 + }, + { + "epoch": 0.63, + "grad_norm": 0.8940887451171875, + "learning_rate": 3.123373853569498e-06, + "loss": 0.5869, + "step": 10004 + }, + { + "epoch": 0.63, + "grad_norm": 0.9396780133247375, + "learning_rate": 3.1224229065360734e-06, + "loss": 0.5875, + "step": 10005 + }, + { + "epoch": 0.63, + "grad_norm": 0.8760607838630676, + "learning_rate": 3.1214720385608875e-06, + "loss": 0.6323, + "step": 10006 + }, + { + "epoch": 0.63, + "grad_norm": 0.9258213043212891, + "learning_rate": 3.120521249683981e-06, + "loss": 0.6057, + "step": 10007 + }, + { + "epoch": 0.63, + "grad_norm": 0.9452094435691833, + "learning_rate": 3.1195705399453833e-06, + "loss": 0.547, + "step": 10008 + }, + { + "epoch": 0.63, + "grad_norm": 0.8690341711044312, + "learning_rate": 3.118619909385131e-06, + "loss": 0.6319, + "step": 10009 + }, + { + "epoch": 0.63, + "grad_norm": 0.8699579238891602, + "learning_rate": 3.117669358043248e-06, + "loss": 0.568, + "step": 10010 + }, + { + "epoch": 0.63, + "grad_norm": 0.9703599214553833, + "learning_rate": 3.116718885959762e-06, + "loss": 0.592, + "step": 10011 + }, + { + "epoch": 0.63, + "grad_norm": 0.8900342583656311, + "learning_rate": 3.1157684931746902e-06, + "loss": 0.5536, + "step": 10012 + }, + { + "epoch": 0.63, + "grad_norm": 0.8778373003005981, + "learning_rate": 3.1148181797280543e-06, + "loss": 0.6032, + "step": 10013 + }, + { + "epoch": 0.63, + "grad_norm": 0.8625448942184448, + "learning_rate": 3.1138679456598654e-06, + "loss": 0.5673, + "step": 10014 + }, + { + "epoch": 0.63, + "grad_norm": 0.8882395029067993, + "learning_rate": 3.112917791010137e-06, + "loss": 0.6069, + "step": 10015 + }, + { + "epoch": 0.63, + "grad_norm": 0.8981207013130188, + "learning_rate": 3.111967715818876e-06, + "loss": 0.579, + "step": 10016 + }, + { + "epoch": 0.63, + "grad_norm": 0.891898512840271, + "learning_rate": 3.1110177201260845e-06, + "loss": 0.5742, + "step": 10017 + }, + { + "epoch": 0.63, + "grad_norm": 0.9222726225852966, + "learning_rate": 3.1100678039717665e-06, + "loss": 0.553, + "step": 10018 + }, + { + "epoch": 0.63, + "grad_norm": 0.8938819766044617, + "learning_rate": 3.1091179673959194e-06, + "loss": 0.5761, + "step": 10019 + }, + { + "epoch": 0.63, + "grad_norm": 0.9439987540245056, + "learning_rate": 3.108168210438536e-06, + "loss": 0.6038, + "step": 10020 + }, + { + "epoch": 0.63, + "grad_norm": 0.9559965133666992, + "learning_rate": 3.1072185331396083e-06, + "loss": 0.5841, + "step": 10021 + }, + { + "epoch": 0.63, + "grad_norm": 0.912056565284729, + "learning_rate": 3.106268935539123e-06, + "loss": 0.6017, + "step": 10022 + }, + { + "epoch": 0.64, + "grad_norm": 0.9289038181304932, + "learning_rate": 3.1053194176770662e-06, + "loss": 0.6042, + "step": 10023 + }, + { + "epoch": 0.64, + "grad_norm": 0.864149808883667, + "learning_rate": 3.1043699795934172e-06, + "loss": 0.549, + "step": 10024 + }, + { + "epoch": 0.64, + "grad_norm": 0.9083261489868164, + "learning_rate": 3.1034206213281536e-06, + "loss": 0.6446, + "step": 10025 + }, + { + "epoch": 0.64, + "grad_norm": 0.8593977689743042, + "learning_rate": 3.10247134292125e-06, + "loss": 0.5199, + "step": 10026 + }, + { + "epoch": 0.64, + "grad_norm": 0.9130897521972656, + "learning_rate": 3.1015221444126776e-06, + "loss": 0.5645, + "step": 10027 + }, + { + "epoch": 0.64, + "grad_norm": 0.8100042939186096, + "learning_rate": 3.1005730258424025e-06, + "loss": 0.5811, + "step": 10028 + }, + { + "epoch": 0.64, + "grad_norm": 0.7985337376594543, + "learning_rate": 3.099623987250391e-06, + "loss": 0.5514, + "step": 10029 + }, + { + "epoch": 0.64, + "grad_norm": 0.8951230049133301, + "learning_rate": 3.098675028676601e-06, + "loss": 0.6081, + "step": 10030 + }, + { + "epoch": 0.64, + "grad_norm": 0.844353973865509, + "learning_rate": 3.0977261501609924e-06, + "loss": 0.5776, + "step": 10031 + }, + { + "epoch": 0.64, + "grad_norm": 0.9215499758720398, + "learning_rate": 3.0967773517435173e-06, + "loss": 0.5944, + "step": 10032 + }, + { + "epoch": 0.64, + "grad_norm": 0.8491506576538086, + "learning_rate": 3.0958286334641284e-06, + "loss": 0.5225, + "step": 10033 + }, + { + "epoch": 0.64, + "grad_norm": 0.9156690835952759, + "learning_rate": 3.0948799953627696e-06, + "loss": 0.5782, + "step": 10034 + }, + { + "epoch": 0.64, + "grad_norm": 0.8732212781906128, + "learning_rate": 3.093931437479388e-06, + "loss": 0.5373, + "step": 10035 + }, + { + "epoch": 0.64, + "grad_norm": 0.8561059236526489, + "learning_rate": 3.092982959853923e-06, + "loss": 0.5791, + "step": 10036 + }, + { + "epoch": 0.64, + "grad_norm": 0.9923532605171204, + "learning_rate": 3.092034562526312e-06, + "loss": 0.619, + "step": 10037 + }, + { + "epoch": 0.64, + "grad_norm": 0.8949557542800903, + "learning_rate": 3.0910862455364864e-06, + "loss": 0.5534, + "step": 10038 + }, + { + "epoch": 0.64, + "grad_norm": 0.9672521948814392, + "learning_rate": 3.09013800892438e-06, + "loss": 0.6306, + "step": 10039 + }, + { + "epoch": 0.64, + "grad_norm": 0.8525355458259583, + "learning_rate": 3.0891898527299167e-06, + "loss": 0.5502, + "step": 10040 + }, + { + "epoch": 0.64, + "grad_norm": 0.8738742470741272, + "learning_rate": 3.088241776993024e-06, + "loss": 0.5939, + "step": 10041 + }, + { + "epoch": 0.64, + "grad_norm": 0.9154573082923889, + "learning_rate": 3.0872937817536165e-06, + "loss": 0.6274, + "step": 10042 + }, + { + "epoch": 0.64, + "grad_norm": 0.8766052722930908, + "learning_rate": 3.0863458670516157e-06, + "loss": 0.5632, + "step": 10043 + }, + { + "epoch": 0.64, + "grad_norm": 0.9145663976669312, + "learning_rate": 3.085398032926933e-06, + "loss": 0.5808, + "step": 10044 + }, + { + "epoch": 0.64, + "grad_norm": 0.9256823062896729, + "learning_rate": 3.0844502794194795e-06, + "loss": 0.6116, + "step": 10045 + }, + { + "epoch": 0.64, + "grad_norm": 0.8836879730224609, + "learning_rate": 3.083502606569159e-06, + "loss": 0.5633, + "step": 10046 + }, + { + "epoch": 0.64, + "grad_norm": 0.8442484736442566, + "learning_rate": 3.0825550144158788e-06, + "loss": 0.5449, + "step": 10047 + }, + { + "epoch": 0.64, + "grad_norm": 0.8978825807571411, + "learning_rate": 3.081607502999536e-06, + "loss": 0.5596, + "step": 10048 + }, + { + "epoch": 0.64, + "grad_norm": 0.8920104503631592, + "learning_rate": 3.0806600723600275e-06, + "loss": 0.5789, + "step": 10049 + }, + { + "epoch": 0.64, + "grad_norm": 0.8824292421340942, + "learning_rate": 3.0797127225372477e-06, + "loss": 0.5769, + "step": 10050 + }, + { + "epoch": 0.64, + "grad_norm": 0.8434662818908691, + "learning_rate": 3.078765453571082e-06, + "loss": 0.5399, + "step": 10051 + }, + { + "epoch": 0.64, + "grad_norm": 0.8964858055114746, + "learning_rate": 3.077818265501421e-06, + "loss": 0.5802, + "step": 10052 + }, + { + "epoch": 0.64, + "grad_norm": 0.8828626871109009, + "learning_rate": 3.0768711583681475e-06, + "loss": 0.5715, + "step": 10053 + }, + { + "epoch": 0.64, + "grad_norm": 0.8482038974761963, + "learning_rate": 3.075924132211139e-06, + "loss": 0.5171, + "step": 10054 + }, + { + "epoch": 0.64, + "grad_norm": 0.8848569989204407, + "learning_rate": 3.07497718707027e-06, + "loss": 0.5214, + "step": 10055 + }, + { + "epoch": 0.64, + "grad_norm": 0.9475182294845581, + "learning_rate": 3.074030322985416e-06, + "loss": 0.5741, + "step": 10056 + }, + { + "epoch": 0.64, + "grad_norm": 0.8911900520324707, + "learning_rate": 3.073083539996446e-06, + "loss": 0.5845, + "step": 10057 + }, + { + "epoch": 0.64, + "grad_norm": 0.8566701412200928, + "learning_rate": 3.072136838143225e-06, + "loss": 0.5495, + "step": 10058 + }, + { + "epoch": 0.64, + "grad_norm": 0.8495940566062927, + "learning_rate": 3.0711902174656126e-06, + "loss": 0.5523, + "step": 10059 + }, + { + "epoch": 0.64, + "grad_norm": 0.9519007802009583, + "learning_rate": 3.070243678003472e-06, + "loss": 0.6046, + "step": 10060 + }, + { + "epoch": 0.64, + "grad_norm": 0.8953449726104736, + "learning_rate": 3.069297219796658e-06, + "loss": 0.6238, + "step": 10061 + }, + { + "epoch": 0.64, + "grad_norm": 0.8640437126159668, + "learning_rate": 3.068350842885022e-06, + "loss": 0.5685, + "step": 10062 + }, + { + "epoch": 0.64, + "grad_norm": 0.9119696021080017, + "learning_rate": 3.0674045473084103e-06, + "loss": 0.5888, + "step": 10063 + }, + { + "epoch": 0.64, + "grad_norm": 0.8524396419525146, + "learning_rate": 3.0664583331066695e-06, + "loss": 0.521, + "step": 10064 + }, + { + "epoch": 0.64, + "grad_norm": 0.896168053150177, + "learning_rate": 3.0655122003196443e-06, + "loss": 0.5385, + "step": 10065 + }, + { + "epoch": 0.64, + "grad_norm": 0.9223374724388123, + "learning_rate": 3.06456614898717e-06, + "loss": 0.6304, + "step": 10066 + }, + { + "epoch": 0.64, + "grad_norm": 0.877920389175415, + "learning_rate": 3.0636201791490823e-06, + "loss": 0.6141, + "step": 10067 + }, + { + "epoch": 0.64, + "grad_norm": 0.8867497444152832, + "learning_rate": 3.062674290845211e-06, + "loss": 0.54, + "step": 10068 + }, + { + "epoch": 0.64, + "grad_norm": 0.8688225746154785, + "learning_rate": 3.061728484115388e-06, + "loss": 0.5374, + "step": 10069 + }, + { + "epoch": 0.64, + "grad_norm": 0.8275909423828125, + "learning_rate": 3.0607827589994353e-06, + "loss": 0.5417, + "step": 10070 + }, + { + "epoch": 0.64, + "grad_norm": 0.8562379479408264, + "learning_rate": 3.0598371155371747e-06, + "loss": 0.5275, + "step": 10071 + }, + { + "epoch": 0.64, + "grad_norm": 0.8817172646522522, + "learning_rate": 3.058891553768422e-06, + "loss": 0.5717, + "step": 10072 + }, + { + "epoch": 0.64, + "grad_norm": 0.9288895130157471, + "learning_rate": 3.0579460737329958e-06, + "loss": 0.61, + "step": 10073 + }, + { + "epoch": 0.64, + "grad_norm": 0.9470510482788086, + "learning_rate": 3.0570006754707044e-06, + "loss": 0.6149, + "step": 10074 + }, + { + "epoch": 0.64, + "grad_norm": 0.9388991594314575, + "learning_rate": 3.056055359021354e-06, + "loss": 0.5868, + "step": 10075 + }, + { + "epoch": 0.64, + "grad_norm": 0.8381592035293579, + "learning_rate": 3.0551101244247494e-06, + "loss": 0.5587, + "step": 10076 + }, + { + "epoch": 0.64, + "grad_norm": 0.8484103679656982, + "learning_rate": 3.0541649717206933e-06, + "loss": 0.544, + "step": 10077 + }, + { + "epoch": 0.64, + "grad_norm": 0.9049462080001831, + "learning_rate": 3.0532199009489814e-06, + "loss": 0.5781, + "step": 10078 + }, + { + "epoch": 0.64, + "grad_norm": 0.9403258562088013, + "learning_rate": 3.052274912149406e-06, + "loss": 0.5796, + "step": 10079 + }, + { + "epoch": 0.64, + "grad_norm": 0.8975145220756531, + "learning_rate": 3.0513300053617595e-06, + "loss": 0.564, + "step": 10080 + }, + { + "epoch": 0.64, + "grad_norm": 0.9125024676322937, + "learning_rate": 3.0503851806258257e-06, + "loss": 0.5705, + "step": 10081 + }, + { + "epoch": 0.64, + "grad_norm": 0.8600341081619263, + "learning_rate": 3.0494404379813914e-06, + "loss": 0.6273, + "step": 10082 + }, + { + "epoch": 0.64, + "grad_norm": 0.8747133016586304, + "learning_rate": 3.048495777468234e-06, + "loss": 0.6381, + "step": 10083 + }, + { + "epoch": 0.64, + "grad_norm": 0.9442613124847412, + "learning_rate": 3.047551199126131e-06, + "loss": 0.5865, + "step": 10084 + }, + { + "epoch": 0.64, + "grad_norm": 0.8632836937904358, + "learning_rate": 3.046606702994854e-06, + "loss": 0.6283, + "step": 10085 + }, + { + "epoch": 0.64, + "grad_norm": 0.8757123351097107, + "learning_rate": 3.0456622891141748e-06, + "loss": 0.5375, + "step": 10086 + }, + { + "epoch": 0.64, + "grad_norm": 0.8973109126091003, + "learning_rate": 3.0447179575238565e-06, + "loss": 0.6009, + "step": 10087 + }, + { + "epoch": 0.64, + "grad_norm": 0.9889295101165771, + "learning_rate": 3.0437737082636647e-06, + "loss": 0.6492, + "step": 10088 + }, + { + "epoch": 0.64, + "grad_norm": 0.8877894282341003, + "learning_rate": 3.0428295413733546e-06, + "loss": 0.4994, + "step": 10089 + }, + { + "epoch": 0.64, + "grad_norm": 0.9340519309043884, + "learning_rate": 3.0418854568926866e-06, + "loss": 0.5923, + "step": 10090 + }, + { + "epoch": 0.64, + "grad_norm": 0.8820253610610962, + "learning_rate": 3.0409414548614086e-06, + "loss": 0.5531, + "step": 10091 + }, + { + "epoch": 0.64, + "grad_norm": 0.8349282145500183, + "learning_rate": 3.039997535319272e-06, + "loss": 0.572, + "step": 10092 + }, + { + "epoch": 0.64, + "grad_norm": 0.8930419087409973, + "learning_rate": 3.039053698306019e-06, + "loss": 0.5305, + "step": 10093 + }, + { + "epoch": 0.64, + "grad_norm": 0.8789426684379578, + "learning_rate": 3.0381099438613948e-06, + "loss": 0.5386, + "step": 10094 + }, + { + "epoch": 0.64, + "grad_norm": 0.8947481513023376, + "learning_rate": 3.037166272025135e-06, + "loss": 0.5672, + "step": 10095 + }, + { + "epoch": 0.64, + "grad_norm": 0.8884199261665344, + "learning_rate": 3.0362226828369767e-06, + "loss": 0.6096, + "step": 10096 + }, + { + "epoch": 0.64, + "grad_norm": 0.885449230670929, + "learning_rate": 3.0352791763366484e-06, + "loss": 0.5831, + "step": 10097 + }, + { + "epoch": 0.64, + "grad_norm": 0.836551308631897, + "learning_rate": 3.0343357525638787e-06, + "loss": 0.5132, + "step": 10098 + }, + { + "epoch": 0.64, + "grad_norm": 0.9031782150268555, + "learning_rate": 3.0333924115583935e-06, + "loss": 0.5898, + "step": 10099 + }, + { + "epoch": 0.64, + "grad_norm": 0.9216272234916687, + "learning_rate": 3.032449153359913e-06, + "loss": 0.5271, + "step": 10100 + }, + { + "epoch": 0.64, + "grad_norm": 0.8929412961006165, + "learning_rate": 3.031505978008153e-06, + "loss": 0.5852, + "step": 10101 + }, + { + "epoch": 0.64, + "grad_norm": 0.884545087814331, + "learning_rate": 3.030562885542827e-06, + "loss": 0.5595, + "step": 10102 + }, + { + "epoch": 0.64, + "grad_norm": 0.881131649017334, + "learning_rate": 3.0296198760036493e-06, + "loss": 0.5557, + "step": 10103 + }, + { + "epoch": 0.64, + "grad_norm": 0.9663098454475403, + "learning_rate": 3.0286769494303237e-06, + "loss": 0.5894, + "step": 10104 + }, + { + "epoch": 0.64, + "grad_norm": 0.936959445476532, + "learning_rate": 3.0277341058625537e-06, + "loss": 0.5987, + "step": 10105 + }, + { + "epoch": 0.64, + "grad_norm": 0.8869735598564148, + "learning_rate": 3.026791345340038e-06, + "loss": 0.5943, + "step": 10106 + }, + { + "epoch": 0.64, + "grad_norm": 0.8177929520606995, + "learning_rate": 3.0258486679024767e-06, + "loss": 0.528, + "step": 10107 + }, + { + "epoch": 0.64, + "grad_norm": 0.9447188377380371, + "learning_rate": 3.0249060735895603e-06, + "loss": 0.5911, + "step": 10108 + }, + { + "epoch": 0.64, + "grad_norm": 0.9261248707771301, + "learning_rate": 3.0239635624409767e-06, + "loss": 0.5625, + "step": 10109 + }, + { + "epoch": 0.64, + "grad_norm": 0.8987361788749695, + "learning_rate": 3.0230211344964154e-06, + "loss": 0.5519, + "step": 10110 + }, + { + "epoch": 0.64, + "grad_norm": 0.8797249794006348, + "learning_rate": 3.0220787897955544e-06, + "loss": 0.5839, + "step": 10111 + }, + { + "epoch": 0.64, + "grad_norm": 0.9020108580589294, + "learning_rate": 3.021136528378077e-06, + "loss": 0.5724, + "step": 10112 + }, + { + "epoch": 0.64, + "grad_norm": 1.0054893493652344, + "learning_rate": 3.020194350283655e-06, + "loss": 0.6108, + "step": 10113 + }, + { + "epoch": 0.64, + "grad_norm": 0.9611765146255493, + "learning_rate": 3.019252255551963e-06, + "loss": 0.6548, + "step": 10114 + }, + { + "epoch": 0.64, + "grad_norm": 0.8305823802947998, + "learning_rate": 3.0183102442226653e-06, + "loss": 0.5477, + "step": 10115 + }, + { + "epoch": 0.64, + "grad_norm": 0.8949651718139648, + "learning_rate": 3.017368316335432e-06, + "loss": 0.576, + "step": 10116 + }, + { + "epoch": 0.64, + "grad_norm": 0.919265627861023, + "learning_rate": 3.0164264719299207e-06, + "loss": 0.585, + "step": 10117 + }, + { + "epoch": 0.64, + "grad_norm": 0.8549671173095703, + "learning_rate": 3.0154847110457918e-06, + "loss": 0.5094, + "step": 10118 + }, + { + "epoch": 0.64, + "grad_norm": 0.9418630003929138, + "learning_rate": 3.0145430337226955e-06, + "loss": 0.5906, + "step": 10119 + }, + { + "epoch": 0.64, + "grad_norm": 0.9273284673690796, + "learning_rate": 3.013601440000288e-06, + "loss": 0.5946, + "step": 10120 + }, + { + "epoch": 0.64, + "grad_norm": 0.9151654243469238, + "learning_rate": 3.0126599299182114e-06, + "loss": 0.5878, + "step": 10121 + }, + { + "epoch": 0.64, + "grad_norm": 0.8435792922973633, + "learning_rate": 3.0117185035161135e-06, + "loss": 0.5951, + "step": 10122 + }, + { + "epoch": 0.64, + "grad_norm": 0.8737865090370178, + "learning_rate": 3.01077716083363e-06, + "loss": 0.5739, + "step": 10123 + }, + { + "epoch": 0.64, + "grad_norm": 0.8778201937675476, + "learning_rate": 3.009835901910403e-06, + "loss": 0.5487, + "step": 10124 + }, + { + "epoch": 0.64, + "grad_norm": 0.862269937992096, + "learning_rate": 3.008894726786062e-06, + "loss": 0.5627, + "step": 10125 + }, + { + "epoch": 0.64, + "grad_norm": 0.8969505429267883, + "learning_rate": 3.007953635500238e-06, + "loss": 0.5473, + "step": 10126 + }, + { + "epoch": 0.64, + "grad_norm": 0.9201652407646179, + "learning_rate": 3.0070126280925564e-06, + "loss": 0.6661, + "step": 10127 + }, + { + "epoch": 0.64, + "grad_norm": 0.882611870765686, + "learning_rate": 3.0060717046026387e-06, + "loss": 0.6069, + "step": 10128 + }, + { + "epoch": 0.64, + "grad_norm": 0.9005841016769409, + "learning_rate": 3.0051308650701054e-06, + "loss": 0.6415, + "step": 10129 + }, + { + "epoch": 0.64, + "grad_norm": 0.8846337795257568, + "learning_rate": 3.004190109534573e-06, + "loss": 0.5779, + "step": 10130 + }, + { + "epoch": 0.64, + "grad_norm": 0.9538823366165161, + "learning_rate": 3.0032494380356523e-06, + "loss": 0.5801, + "step": 10131 + }, + { + "epoch": 0.64, + "grad_norm": 0.8762175440788269, + "learning_rate": 3.002308850612949e-06, + "loss": 0.5717, + "step": 10132 + }, + { + "epoch": 0.64, + "grad_norm": 0.8317214846611023, + "learning_rate": 3.001368347306073e-06, + "loss": 0.5577, + "step": 10133 + }, + { + "epoch": 0.64, + "grad_norm": 0.8527503609657288, + "learning_rate": 3.0004279281546235e-06, + "loss": 0.5818, + "step": 10134 + }, + { + "epoch": 0.64, + "grad_norm": 0.861371636390686, + "learning_rate": 2.999487593198197e-06, + "loss": 0.5475, + "step": 10135 + }, + { + "epoch": 0.64, + "grad_norm": 0.8559701442718506, + "learning_rate": 2.9985473424763876e-06, + "loss": 0.5565, + "step": 10136 + }, + { + "epoch": 0.64, + "grad_norm": 0.9402846693992615, + "learning_rate": 2.9976071760287874e-06, + "loss": 0.6107, + "step": 10137 + }, + { + "epoch": 0.64, + "grad_norm": 0.8749223351478577, + "learning_rate": 2.9966670938949847e-06, + "loss": 0.5484, + "step": 10138 + }, + { + "epoch": 0.64, + "grad_norm": 0.8214702606201172, + "learning_rate": 2.995727096114561e-06, + "loss": 0.5719, + "step": 10139 + }, + { + "epoch": 0.64, + "grad_norm": 0.8834431767463684, + "learning_rate": 2.9947871827270956e-06, + "loss": 0.6228, + "step": 10140 + }, + { + "epoch": 0.64, + "grad_norm": 0.9178330302238464, + "learning_rate": 2.993847353772168e-06, + "loss": 0.5815, + "step": 10141 + }, + { + "epoch": 0.64, + "grad_norm": 0.8592966198921204, + "learning_rate": 2.9929076092893496e-06, + "loss": 0.5508, + "step": 10142 + }, + { + "epoch": 0.64, + "grad_norm": 0.8537743091583252, + "learning_rate": 2.991967949318209e-06, + "loss": 0.6015, + "step": 10143 + }, + { + "epoch": 0.64, + "grad_norm": 0.8182849884033203, + "learning_rate": 2.9910283738983125e-06, + "loss": 0.5648, + "step": 10144 + }, + { + "epoch": 0.64, + "grad_norm": 0.9029396772384644, + "learning_rate": 2.9900888830692208e-06, + "loss": 0.6084, + "step": 10145 + }, + { + "epoch": 0.64, + "grad_norm": 0.8994178175926208, + "learning_rate": 2.9891494768704964e-06, + "loss": 0.6156, + "step": 10146 + }, + { + "epoch": 0.64, + "grad_norm": 0.8991573452949524, + "learning_rate": 2.9882101553416932e-06, + "loss": 0.5458, + "step": 10147 + }, + { + "epoch": 0.64, + "grad_norm": 0.89846271276474, + "learning_rate": 2.9872709185223596e-06, + "loss": 0.6052, + "step": 10148 + }, + { + "epoch": 0.64, + "grad_norm": 0.8613349199295044, + "learning_rate": 2.9863317664520453e-06, + "loss": 0.5582, + "step": 10149 + }, + { + "epoch": 0.64, + "grad_norm": 0.9185076951980591, + "learning_rate": 2.9853926991702974e-06, + "loss": 0.6023, + "step": 10150 + }, + { + "epoch": 0.64, + "grad_norm": 0.9413586258888245, + "learning_rate": 2.984453716716655e-06, + "loss": 0.5681, + "step": 10151 + }, + { + "epoch": 0.64, + "grad_norm": 0.843997061252594, + "learning_rate": 2.9835148191306535e-06, + "loss": 0.5453, + "step": 10152 + }, + { + "epoch": 0.64, + "grad_norm": 0.8782387375831604, + "learning_rate": 2.9825760064518273e-06, + "loss": 0.5996, + "step": 10153 + }, + { + "epoch": 0.64, + "grad_norm": 0.7524551749229431, + "learning_rate": 2.981637278719709e-06, + "loss": 0.5087, + "step": 10154 + }, + { + "epoch": 0.64, + "grad_norm": 0.8854588270187378, + "learning_rate": 2.9806986359738244e-06, + "loss": 0.5919, + "step": 10155 + }, + { + "epoch": 0.64, + "grad_norm": 0.8053493499755859, + "learning_rate": 2.979760078253694e-06, + "loss": 0.5397, + "step": 10156 + }, + { + "epoch": 0.64, + "grad_norm": 0.9677163362503052, + "learning_rate": 2.9788216055988397e-06, + "loss": 0.6133, + "step": 10157 + }, + { + "epoch": 0.64, + "grad_norm": 0.8523488640785217, + "learning_rate": 2.977883218048775e-06, + "loss": 0.5826, + "step": 10158 + }, + { + "epoch": 0.64, + "grad_norm": 0.9211286902427673, + "learning_rate": 2.9769449156430147e-06, + "loss": 0.6244, + "step": 10159 + }, + { + "epoch": 0.64, + "grad_norm": 0.9199965596199036, + "learning_rate": 2.9760066984210655e-06, + "loss": 0.6112, + "step": 10160 + }, + { + "epoch": 0.64, + "grad_norm": 0.8444145321846008, + "learning_rate": 2.975068566422434e-06, + "loss": 0.5704, + "step": 10161 + }, + { + "epoch": 0.64, + "grad_norm": 0.907306969165802, + "learning_rate": 2.97413051968662e-06, + "loss": 0.6156, + "step": 10162 + }, + { + "epoch": 0.64, + "grad_norm": 0.8991623520851135, + "learning_rate": 2.9731925582531227e-06, + "loss": 0.5905, + "step": 10163 + }, + { + "epoch": 0.64, + "grad_norm": 0.8663104772567749, + "learning_rate": 2.9722546821614373e-06, + "loss": 0.5704, + "step": 10164 + }, + { + "epoch": 0.64, + "grad_norm": 0.8777760863304138, + "learning_rate": 2.9713168914510533e-06, + "loss": 0.5416, + "step": 10165 + }, + { + "epoch": 0.64, + "grad_norm": 0.8857688307762146, + "learning_rate": 2.970379186161455e-06, + "loss": 0.5669, + "step": 10166 + }, + { + "epoch": 0.64, + "grad_norm": 0.8899209499359131, + "learning_rate": 2.96944156633213e-06, + "loss": 0.6229, + "step": 10167 + }, + { + "epoch": 0.64, + "grad_norm": 0.8441648483276367, + "learning_rate": 2.9685040320025583e-06, + "loss": 0.5529, + "step": 10168 + }, + { + "epoch": 0.64, + "grad_norm": 0.8758301138877869, + "learning_rate": 2.9675665832122146e-06, + "loss": 0.6021, + "step": 10169 + }, + { + "epoch": 0.64, + "grad_norm": 0.8592897057533264, + "learning_rate": 2.966629220000569e-06, + "loss": 0.5656, + "step": 10170 + }, + { + "epoch": 0.64, + "grad_norm": 0.8968542814254761, + "learning_rate": 2.965691942407095e-06, + "loss": 0.5639, + "step": 10171 + }, + { + "epoch": 0.64, + "grad_norm": 0.8699895143508911, + "learning_rate": 2.9647547504712577e-06, + "loss": 0.6159, + "step": 10172 + }, + { + "epoch": 0.64, + "grad_norm": 0.8662521243095398, + "learning_rate": 2.9638176442325173e-06, + "loss": 0.5801, + "step": 10173 + }, + { + "epoch": 0.64, + "grad_norm": 0.8635749220848083, + "learning_rate": 2.962880623730332e-06, + "loss": 0.5819, + "step": 10174 + }, + { + "epoch": 0.64, + "grad_norm": 0.9355505108833313, + "learning_rate": 2.9619436890041555e-06, + "loss": 0.5975, + "step": 10175 + }, + { + "epoch": 0.64, + "grad_norm": 0.9172835350036621, + "learning_rate": 2.961006840093442e-06, + "loss": 0.5504, + "step": 10176 + }, + { + "epoch": 0.64, + "grad_norm": 0.814353346824646, + "learning_rate": 2.9600700770376384e-06, + "loss": 0.551, + "step": 10177 + }, + { + "epoch": 0.64, + "grad_norm": 0.8739163875579834, + "learning_rate": 2.959133399876186e-06, + "loss": 0.5662, + "step": 10178 + }, + { + "epoch": 0.64, + "grad_norm": 0.8901175856590271, + "learning_rate": 2.958196808648525e-06, + "loss": 0.6053, + "step": 10179 + }, + { + "epoch": 0.64, + "grad_norm": 0.9073649644851685, + "learning_rate": 2.957260303394096e-06, + "loss": 0.5715, + "step": 10180 + }, + { + "epoch": 0.65, + "grad_norm": 0.8607237935066223, + "learning_rate": 2.9563238841523293e-06, + "loss": 0.5847, + "step": 10181 + }, + { + "epoch": 0.65, + "grad_norm": 0.8739342093467712, + "learning_rate": 2.955387550962654e-06, + "loss": 0.5199, + "step": 10182 + }, + { + "epoch": 0.65, + "grad_norm": 0.8706129193305969, + "learning_rate": 2.954451303864494e-06, + "loss": 0.61, + "step": 10183 + }, + { + "epoch": 0.65, + "grad_norm": 0.9067039489746094, + "learning_rate": 2.9535151428972762e-06, + "loss": 0.5869, + "step": 10184 + }, + { + "epoch": 0.65, + "grad_norm": 0.856227695941925, + "learning_rate": 2.9525790681004172e-06, + "loss": 0.5495, + "step": 10185 + }, + { + "epoch": 0.65, + "grad_norm": 0.8174280524253845, + "learning_rate": 2.9516430795133294e-06, + "loss": 0.5439, + "step": 10186 + }, + { + "epoch": 0.65, + "grad_norm": 0.9031091332435608, + "learning_rate": 2.950707177175427e-06, + "loss": 0.5668, + "step": 10187 + }, + { + "epoch": 0.65, + "grad_norm": 0.8566731810569763, + "learning_rate": 2.9497713611261146e-06, + "loss": 0.5512, + "step": 10188 + }, + { + "epoch": 0.65, + "grad_norm": 0.9484649300575256, + "learning_rate": 2.9488356314047994e-06, + "loss": 0.6265, + "step": 10189 + }, + { + "epoch": 0.65, + "grad_norm": 0.8249022364616394, + "learning_rate": 2.94789998805088e-06, + "loss": 0.604, + "step": 10190 + }, + { + "epoch": 0.65, + "grad_norm": 0.880988597869873, + "learning_rate": 2.9469644311037545e-06, + "loss": 0.6227, + "step": 10191 + }, + { + "epoch": 0.65, + "grad_norm": 0.9004330635070801, + "learning_rate": 2.946028960602812e-06, + "loss": 0.58, + "step": 10192 + }, + { + "epoch": 0.65, + "grad_norm": 0.9128255844116211, + "learning_rate": 2.9450935765874474e-06, + "loss": 0.5868, + "step": 10193 + }, + { + "epoch": 0.65, + "grad_norm": 0.9160966873168945, + "learning_rate": 2.9441582790970425e-06, + "loss": 0.6127, + "step": 10194 + }, + { + "epoch": 0.65, + "grad_norm": 0.9519477486610413, + "learning_rate": 2.9432230681709815e-06, + "loss": 0.6271, + "step": 10195 + }, + { + "epoch": 0.65, + "grad_norm": 0.8393691778182983, + "learning_rate": 2.942287943848641e-06, + "loss": 0.552, + "step": 10196 + }, + { + "epoch": 0.65, + "grad_norm": 0.9617919921875, + "learning_rate": 2.941352906169398e-06, + "loss": 0.6222, + "step": 10197 + }, + { + "epoch": 0.65, + "grad_norm": 0.8588807582855225, + "learning_rate": 2.9404179551726214e-06, + "loss": 0.5407, + "step": 10198 + }, + { + "epoch": 0.65, + "grad_norm": 0.9286124110221863, + "learning_rate": 2.939483090897681e-06, + "loss": 0.6136, + "step": 10199 + }, + { + "epoch": 0.65, + "grad_norm": 0.9325358867645264, + "learning_rate": 2.9385483133839386e-06, + "loss": 0.5356, + "step": 10200 + }, + { + "epoch": 0.65, + "grad_norm": 0.9960110187530518, + "learning_rate": 2.937613622670756e-06, + "loss": 0.6458, + "step": 10201 + }, + { + "epoch": 0.65, + "grad_norm": 0.8421880006790161, + "learning_rate": 2.9366790187974897e-06, + "loss": 0.5576, + "step": 10202 + }, + { + "epoch": 0.65, + "grad_norm": 0.8421469330787659, + "learning_rate": 2.9357445018034926e-06, + "loss": 0.5855, + "step": 10203 + }, + { + "epoch": 0.65, + "grad_norm": 0.8795361518859863, + "learning_rate": 2.934810071728114e-06, + "loss": 0.5877, + "step": 10204 + }, + { + "epoch": 0.65, + "grad_norm": 0.9030759930610657, + "learning_rate": 2.9338757286106955e-06, + "loss": 0.5878, + "step": 10205 + }, + { + "epoch": 0.65, + "grad_norm": 0.8403552770614624, + "learning_rate": 2.9329414724905845e-06, + "loss": 0.5818, + "step": 10206 + }, + { + "epoch": 0.65, + "grad_norm": 0.8796659708023071, + "learning_rate": 2.9320073034071187e-06, + "loss": 0.5653, + "step": 10207 + }, + { + "epoch": 0.65, + "grad_norm": 0.8549631237983704, + "learning_rate": 2.9310732213996305e-06, + "loss": 0.5763, + "step": 10208 + }, + { + "epoch": 0.65, + "grad_norm": 0.8520306944847107, + "learning_rate": 2.9301392265074506e-06, + "loss": 0.5754, + "step": 10209 + }, + { + "epoch": 0.65, + "grad_norm": 0.8692139387130737, + "learning_rate": 2.9292053187699075e-06, + "loss": 0.5272, + "step": 10210 + }, + { + "epoch": 0.65, + "grad_norm": 0.8986145257949829, + "learning_rate": 2.9282714982263265e-06, + "loss": 0.5251, + "step": 10211 + }, + { + "epoch": 0.65, + "grad_norm": 0.9022727012634277, + "learning_rate": 2.927337764916025e-06, + "loss": 0.5878, + "step": 10212 + }, + { + "epoch": 0.65, + "grad_norm": 0.8935984969139099, + "learning_rate": 2.926404118878319e-06, + "loss": 0.6037, + "step": 10213 + }, + { + "epoch": 0.65, + "grad_norm": 0.8796955347061157, + "learning_rate": 2.925470560152522e-06, + "loss": 0.5815, + "step": 10214 + }, + { + "epoch": 0.65, + "grad_norm": 0.8789433240890503, + "learning_rate": 2.924537088777944e-06, + "loss": 0.6031, + "step": 10215 + }, + { + "epoch": 0.65, + "grad_norm": 0.8689199686050415, + "learning_rate": 2.9236037047938894e-06, + "loss": 0.5566, + "step": 10216 + }, + { + "epoch": 0.65, + "grad_norm": 0.850175678730011, + "learning_rate": 2.922670408239657e-06, + "loss": 0.5817, + "step": 10217 + }, + { + "epoch": 0.65, + "grad_norm": 0.8795483112335205, + "learning_rate": 2.921737199154549e-06, + "loss": 0.6259, + "step": 10218 + }, + { + "epoch": 0.65, + "grad_norm": 0.8465956449508667, + "learning_rate": 2.920804077577859e-06, + "loss": 0.5467, + "step": 10219 + }, + { + "epoch": 0.65, + "grad_norm": 0.8541370630264282, + "learning_rate": 2.919871043548875e-06, + "loss": 0.5509, + "step": 10220 + }, + { + "epoch": 0.65, + "grad_norm": 0.8528336882591248, + "learning_rate": 2.9189380971068864e-06, + "loss": 0.5237, + "step": 10221 + }, + { + "epoch": 0.65, + "grad_norm": 1.0272489786148071, + "learning_rate": 2.918005238291172e-06, + "loss": 0.5948, + "step": 10222 + }, + { + "epoch": 0.65, + "grad_norm": 0.8642032742500305, + "learning_rate": 2.9170724671410155e-06, + "loss": 0.605, + "step": 10223 + }, + { + "epoch": 0.65, + "grad_norm": 0.8577390313148499, + "learning_rate": 2.916139783695694e-06, + "loss": 0.5634, + "step": 10224 + }, + { + "epoch": 0.65, + "grad_norm": 0.935626208782196, + "learning_rate": 2.9152071879944743e-06, + "loss": 0.5815, + "step": 10225 + }, + { + "epoch": 0.65, + "grad_norm": 0.9437475800514221, + "learning_rate": 2.914274680076628e-06, + "loss": 0.6359, + "step": 10226 + }, + { + "epoch": 0.65, + "grad_norm": 0.8789603114128113, + "learning_rate": 2.913342259981419e-06, + "loss": 0.6031, + "step": 10227 + }, + { + "epoch": 0.65, + "grad_norm": 0.9114549160003662, + "learning_rate": 2.9124099277481088e-06, + "loss": 0.544, + "step": 10228 + }, + { + "epoch": 0.65, + "grad_norm": 0.8359835743904114, + "learning_rate": 2.9114776834159563e-06, + "loss": 0.5686, + "step": 10229 + }, + { + "epoch": 0.65, + "grad_norm": 0.9180512428283691, + "learning_rate": 2.910545527024209e-06, + "loss": 0.58, + "step": 10230 + }, + { + "epoch": 0.65, + "grad_norm": 0.9041998386383057, + "learning_rate": 2.9096134586121227e-06, + "loss": 0.6303, + "step": 10231 + }, + { + "epoch": 0.65, + "grad_norm": 0.8931963443756104, + "learning_rate": 2.908681478218944e-06, + "loss": 0.5667, + "step": 10232 + }, + { + "epoch": 0.65, + "grad_norm": 0.8502830266952515, + "learning_rate": 2.907749585883911e-06, + "loss": 0.5689, + "step": 10233 + }, + { + "epoch": 0.65, + "grad_norm": 0.8675402998924255, + "learning_rate": 2.906817781646264e-06, + "loss": 0.5585, + "step": 10234 + }, + { + "epoch": 0.65, + "grad_norm": 0.9389364719390869, + "learning_rate": 2.905886065545239e-06, + "loss": 0.6007, + "step": 10235 + }, + { + "epoch": 0.65, + "grad_norm": 0.8219680786132812, + "learning_rate": 2.9049544376200674e-06, + "loss": 0.5835, + "step": 10236 + }, + { + "epoch": 0.65, + "grad_norm": 0.9516189098358154, + "learning_rate": 2.9040228979099777e-06, + "loss": 0.5689, + "step": 10237 + }, + { + "epoch": 0.65, + "grad_norm": 0.8465138077735901, + "learning_rate": 2.9030914464541904e-06, + "loss": 0.5335, + "step": 10238 + }, + { + "epoch": 0.65, + "grad_norm": 0.8579193949699402, + "learning_rate": 2.902160083291926e-06, + "loss": 0.5572, + "step": 10239 + }, + { + "epoch": 0.65, + "grad_norm": 0.9451611042022705, + "learning_rate": 2.9012288084624065e-06, + "loss": 0.5445, + "step": 10240 + }, + { + "epoch": 0.65, + "grad_norm": 0.8656702637672424, + "learning_rate": 2.9002976220048383e-06, + "loss": 0.5438, + "step": 10241 + }, + { + "epoch": 0.65, + "grad_norm": 0.8685592412948608, + "learning_rate": 2.899366523958434e-06, + "loss": 0.6194, + "step": 10242 + }, + { + "epoch": 0.65, + "grad_norm": 0.8590168356895447, + "learning_rate": 2.898435514362397e-06, + "loss": 0.5964, + "step": 10243 + }, + { + "epoch": 0.65, + "grad_norm": 0.8453319668769836, + "learning_rate": 2.89750459325593e-06, + "loss": 0.5445, + "step": 10244 + }, + { + "epoch": 0.65, + "grad_norm": 0.8947049379348755, + "learning_rate": 2.896573760678232e-06, + "loss": 0.6004, + "step": 10245 + }, + { + "epoch": 0.65, + "grad_norm": 0.8810886144638062, + "learning_rate": 2.8956430166684945e-06, + "loss": 0.5597, + "step": 10246 + }, + { + "epoch": 0.65, + "grad_norm": 0.9045408964157104, + "learning_rate": 2.8947123612659068e-06, + "loss": 0.5975, + "step": 10247 + }, + { + "epoch": 0.65, + "grad_norm": 0.907370388507843, + "learning_rate": 2.8937817945096614e-06, + "loss": 0.546, + "step": 10248 + }, + { + "epoch": 0.65, + "grad_norm": 0.929260790348053, + "learning_rate": 2.8928513164389353e-06, + "loss": 0.6313, + "step": 10249 + }, + { + "epoch": 0.65, + "grad_norm": 0.8894972205162048, + "learning_rate": 2.8919209270929106e-06, + "loss": 0.6308, + "step": 10250 + }, + { + "epoch": 0.65, + "grad_norm": 0.8753820061683655, + "learning_rate": 2.8909906265107647e-06, + "loss": 0.5576, + "step": 10251 + }, + { + "epoch": 0.65, + "grad_norm": 0.9265826940536499, + "learning_rate": 2.890060414731662e-06, + "loss": 0.5626, + "step": 10252 + }, + { + "epoch": 0.65, + "grad_norm": 0.9352290034294128, + "learning_rate": 2.8891302917947794e-06, + "loss": 0.628, + "step": 10253 + }, + { + "epoch": 0.65, + "grad_norm": 0.9359737038612366, + "learning_rate": 2.8882002577392752e-06, + "loss": 0.6278, + "step": 10254 + }, + { + "epoch": 0.65, + "grad_norm": 0.9087960124015808, + "learning_rate": 2.8872703126043116e-06, + "loss": 0.6675, + "step": 10255 + }, + { + "epoch": 0.65, + "grad_norm": 0.9556131958961487, + "learning_rate": 2.8863404564290455e-06, + "loss": 0.5625, + "step": 10256 + }, + { + "epoch": 0.65, + "grad_norm": 0.8998469710350037, + "learning_rate": 2.88541068925263e-06, + "loss": 0.6218, + "step": 10257 + }, + { + "epoch": 0.65, + "grad_norm": 0.8599625825881958, + "learning_rate": 2.8844810111142143e-06, + "loss": 0.5521, + "step": 10258 + }, + { + "epoch": 0.65, + "grad_norm": 0.8799909353256226, + "learning_rate": 2.883551422052946e-06, + "loss": 0.5713, + "step": 10259 + }, + { + "epoch": 0.65, + "grad_norm": 0.864239513874054, + "learning_rate": 2.8826219221079597e-06, + "loss": 0.6036, + "step": 10260 + }, + { + "epoch": 0.65, + "grad_norm": 0.8341729044914246, + "learning_rate": 2.8816925113184034e-06, + "loss": 0.5587, + "step": 10261 + }, + { + "epoch": 0.65, + "grad_norm": 0.8841572403907776, + "learning_rate": 2.8807631897234045e-06, + "loss": 0.6003, + "step": 10262 + }, + { + "epoch": 0.65, + "grad_norm": 0.9406521320343018, + "learning_rate": 2.8798339573620953e-06, + "loss": 0.6259, + "step": 10263 + }, + { + "epoch": 0.65, + "grad_norm": 0.8605220913887024, + "learning_rate": 2.8789048142736026e-06, + "loss": 0.5397, + "step": 10264 + }, + { + "epoch": 0.65, + "grad_norm": 0.9191677570343018, + "learning_rate": 2.8779757604970495e-06, + "loss": 0.5754, + "step": 10265 + }, + { + "epoch": 0.65, + "grad_norm": 0.8478958010673523, + "learning_rate": 2.877046796071554e-06, + "loss": 0.5911, + "step": 10266 + }, + { + "epoch": 0.65, + "grad_norm": 0.909317135810852, + "learning_rate": 2.8761179210362365e-06, + "loss": 0.5999, + "step": 10267 + }, + { + "epoch": 0.65, + "grad_norm": 0.9130200743675232, + "learning_rate": 2.8751891354302018e-06, + "loss": 0.6098, + "step": 10268 + }, + { + "epoch": 0.65, + "grad_norm": 0.8185581564903259, + "learning_rate": 2.8742604392925587e-06, + "loss": 0.5674, + "step": 10269 + }, + { + "epoch": 0.65, + "grad_norm": 0.8762167692184448, + "learning_rate": 2.8733318326624182e-06, + "loss": 0.5917, + "step": 10270 + }, + { + "epoch": 0.65, + "grad_norm": 0.852927029132843, + "learning_rate": 2.8724033155788743e-06, + "loss": 0.587, + "step": 10271 + }, + { + "epoch": 0.65, + "grad_norm": 0.8949410915374756, + "learning_rate": 2.871474888081025e-06, + "loss": 0.6095, + "step": 10272 + }, + { + "epoch": 0.65, + "grad_norm": 0.8751702904701233, + "learning_rate": 2.870546550207964e-06, + "loss": 0.5567, + "step": 10273 + }, + { + "epoch": 0.65, + "grad_norm": 0.9688418507575989, + "learning_rate": 2.8696183019987796e-06, + "loss": 0.559, + "step": 10274 + }, + { + "epoch": 0.65, + "grad_norm": 0.9164302945137024, + "learning_rate": 2.868690143492559e-06, + "loss": 0.6014, + "step": 10275 + }, + { + "epoch": 0.65, + "grad_norm": 0.9164918065071106, + "learning_rate": 2.8677620747283807e-06, + "loss": 0.5787, + "step": 10276 + }, + { + "epoch": 0.65, + "grad_norm": 0.8945170044898987, + "learning_rate": 2.8668340957453224e-06, + "loss": 0.5649, + "step": 10277 + }, + { + "epoch": 0.65, + "grad_norm": 0.8914811015129089, + "learning_rate": 2.865906206582463e-06, + "loss": 0.5866, + "step": 10278 + }, + { + "epoch": 0.65, + "grad_norm": 0.8111115097999573, + "learning_rate": 2.8649784072788668e-06, + "loss": 0.534, + "step": 10279 + }, + { + "epoch": 0.65, + "grad_norm": 0.9475454092025757, + "learning_rate": 2.8640506978736027e-06, + "loss": 0.6491, + "step": 10280 + }, + { + "epoch": 0.65, + "grad_norm": 0.9642074704170227, + "learning_rate": 2.8631230784057362e-06, + "loss": 0.6173, + "step": 10281 + }, + { + "epoch": 0.65, + "grad_norm": 0.9231216907501221, + "learning_rate": 2.862195548914318e-06, + "loss": 0.6038, + "step": 10282 + }, + { + "epoch": 0.65, + "grad_norm": 0.9643025994300842, + "learning_rate": 2.8612681094384135e-06, + "loss": 0.5809, + "step": 10283 + }, + { + "epoch": 0.65, + "grad_norm": 0.8661615252494812, + "learning_rate": 2.8603407600170664e-06, + "loss": 0.5797, + "step": 10284 + }, + { + "epoch": 0.65, + "grad_norm": 0.8539398908615112, + "learning_rate": 2.8594135006893264e-06, + "loss": 0.595, + "step": 10285 + }, + { + "epoch": 0.65, + "grad_norm": 0.8886363506317139, + "learning_rate": 2.858486331494238e-06, + "loss": 0.5977, + "step": 10286 + }, + { + "epoch": 0.65, + "grad_norm": 0.8894230127334595, + "learning_rate": 2.8575592524708397e-06, + "loss": 0.5999, + "step": 10287 + }, + { + "epoch": 0.65, + "grad_norm": 0.8313820362091064, + "learning_rate": 2.856632263658169e-06, + "loss": 0.5703, + "step": 10288 + }, + { + "epoch": 0.65, + "grad_norm": 0.8702353239059448, + "learning_rate": 2.855705365095258e-06, + "loss": 0.6152, + "step": 10289 + }, + { + "epoch": 0.65, + "grad_norm": 0.8346042037010193, + "learning_rate": 2.854778556821132e-06, + "loss": 0.5277, + "step": 10290 + }, + { + "epoch": 0.65, + "grad_norm": 0.9115665555000305, + "learning_rate": 2.8538518388748214e-06, + "loss": 0.595, + "step": 10291 + }, + { + "epoch": 0.65, + "grad_norm": 0.9286834001541138, + "learning_rate": 2.8529252112953434e-06, + "loss": 0.6031, + "step": 10292 + }, + { + "epoch": 0.65, + "grad_norm": 0.9434182047843933, + "learning_rate": 2.8519986741217144e-06, + "loss": 0.5983, + "step": 10293 + }, + { + "epoch": 0.65, + "grad_norm": 0.8886797428131104, + "learning_rate": 2.8510722273929486e-06, + "loss": 0.595, + "step": 10294 + }, + { + "epoch": 0.65, + "grad_norm": 0.868736207485199, + "learning_rate": 2.8501458711480564e-06, + "loss": 0.5769, + "step": 10295 + }, + { + "epoch": 0.65, + "grad_norm": 0.8849626183509827, + "learning_rate": 2.8492196054260424e-06, + "loss": 0.6066, + "step": 10296 + }, + { + "epoch": 0.65, + "grad_norm": 0.860435426235199, + "learning_rate": 2.848293430265911e-06, + "loss": 0.5701, + "step": 10297 + }, + { + "epoch": 0.65, + "grad_norm": 0.9047563672065735, + "learning_rate": 2.8473673457066564e-06, + "loss": 0.5482, + "step": 10298 + }, + { + "epoch": 0.65, + "grad_norm": 0.8450853824615479, + "learning_rate": 2.8464413517872737e-06, + "loss": 0.5659, + "step": 10299 + }, + { + "epoch": 0.65, + "grad_norm": 0.8788303732872009, + "learning_rate": 2.845515448546754e-06, + "loss": 0.5781, + "step": 10300 + }, + { + "epoch": 0.65, + "grad_norm": 0.8010481595993042, + "learning_rate": 2.8445896360240845e-06, + "loss": 0.5364, + "step": 10301 + }, + { + "epoch": 0.65, + "grad_norm": 0.9223700761795044, + "learning_rate": 2.843663914258249e-06, + "loss": 0.5826, + "step": 10302 + }, + { + "epoch": 0.65, + "grad_norm": 0.8434270024299622, + "learning_rate": 2.8427382832882207e-06, + "loss": 0.5676, + "step": 10303 + }, + { + "epoch": 0.65, + "grad_norm": 0.9163960218429565, + "learning_rate": 2.8418127431529807e-06, + "loss": 0.5913, + "step": 10304 + }, + { + "epoch": 0.65, + "grad_norm": 0.8485933542251587, + "learning_rate": 2.8408872938915e-06, + "loss": 0.5494, + "step": 10305 + }, + { + "epoch": 0.65, + "grad_norm": 0.9408286213874817, + "learning_rate": 2.8399619355427427e-06, + "loss": 0.6158, + "step": 10306 + }, + { + "epoch": 0.65, + "grad_norm": 0.8759029507637024, + "learning_rate": 2.839036668145674e-06, + "loss": 0.6119, + "step": 10307 + }, + { + "epoch": 0.65, + "grad_norm": 0.8358346819877625, + "learning_rate": 2.8381114917392538e-06, + "loss": 0.5738, + "step": 10308 + }, + { + "epoch": 0.65, + "grad_norm": 0.8680429458618164, + "learning_rate": 2.8371864063624375e-06, + "loss": 0.5452, + "step": 10309 + }, + { + "epoch": 0.65, + "grad_norm": 0.9013274312019348, + "learning_rate": 2.836261412054181e-06, + "loss": 0.5305, + "step": 10310 + }, + { + "epoch": 0.65, + "grad_norm": 0.8434852361679077, + "learning_rate": 2.8353365088534247e-06, + "loss": 0.6082, + "step": 10311 + }, + { + "epoch": 0.65, + "grad_norm": 0.8728095889091492, + "learning_rate": 2.8344116967991197e-06, + "loss": 0.549, + "step": 10312 + }, + { + "epoch": 0.65, + "grad_norm": 0.8872493505477905, + "learning_rate": 2.8334869759302064e-06, + "loss": 0.5777, + "step": 10313 + }, + { + "epoch": 0.65, + "grad_norm": 0.8925797343254089, + "learning_rate": 2.8325623462856176e-06, + "loss": 0.5751, + "step": 10314 + }, + { + "epoch": 0.65, + "grad_norm": 0.903728187084198, + "learning_rate": 2.8316378079042887e-06, + "loss": 0.6265, + "step": 10315 + }, + { + "epoch": 0.65, + "grad_norm": 0.8824670910835266, + "learning_rate": 2.8307133608251486e-06, + "loss": 0.5769, + "step": 10316 + }, + { + "epoch": 0.65, + "grad_norm": 0.8991369605064392, + "learning_rate": 2.8297890050871222e-06, + "loss": 0.5767, + "step": 10317 + }, + { + "epoch": 0.65, + "grad_norm": 0.8974249362945557, + "learning_rate": 2.8288647407291337e-06, + "loss": 0.6057, + "step": 10318 + }, + { + "epoch": 0.65, + "grad_norm": 0.859311580657959, + "learning_rate": 2.827940567790096e-06, + "loss": 0.5939, + "step": 10319 + }, + { + "epoch": 0.65, + "grad_norm": 0.9485636949539185, + "learning_rate": 2.8270164863089227e-06, + "loss": 0.6018, + "step": 10320 + }, + { + "epoch": 0.65, + "grad_norm": 0.8993692398071289, + "learning_rate": 2.82609249632453e-06, + "loss": 0.5957, + "step": 10321 + }, + { + "epoch": 0.65, + "grad_norm": 0.9110742807388306, + "learning_rate": 2.825168597875818e-06, + "loss": 0.5788, + "step": 10322 + }, + { + "epoch": 0.65, + "grad_norm": 0.9139736890792847, + "learning_rate": 2.82424479100169e-06, + "loss": 0.5432, + "step": 10323 + }, + { + "epoch": 0.65, + "grad_norm": 0.909750759601593, + "learning_rate": 2.8233210757410454e-06, + "loss": 0.6235, + "step": 10324 + }, + { + "epoch": 0.65, + "grad_norm": 0.8736597299575806, + "learning_rate": 2.8223974521327787e-06, + "loss": 0.5876, + "step": 10325 + }, + { + "epoch": 0.65, + "grad_norm": 0.886572003364563, + "learning_rate": 2.8214739202157794e-06, + "loss": 0.581, + "step": 10326 + }, + { + "epoch": 0.65, + "grad_norm": 0.8689284920692444, + "learning_rate": 2.820550480028937e-06, + "loss": 0.5974, + "step": 10327 + }, + { + "epoch": 0.65, + "grad_norm": 0.9559029936790466, + "learning_rate": 2.81962713161113e-06, + "loss": 0.6199, + "step": 10328 + }, + { + "epoch": 0.65, + "grad_norm": 0.854682445526123, + "learning_rate": 2.8187038750012396e-06, + "loss": 0.5861, + "step": 10329 + }, + { + "epoch": 0.65, + "grad_norm": 0.8388245105743408, + "learning_rate": 2.8177807102381404e-06, + "loss": 0.5608, + "step": 10330 + }, + { + "epoch": 0.65, + "grad_norm": 0.8935778737068176, + "learning_rate": 2.816857637360705e-06, + "loss": 0.5666, + "step": 10331 + }, + { + "epoch": 0.65, + "grad_norm": 0.876492440700531, + "learning_rate": 2.8159346564078006e-06, + "loss": 0.5852, + "step": 10332 + }, + { + "epoch": 0.65, + "grad_norm": 0.9023503661155701, + "learning_rate": 2.815011767418287e-06, + "loss": 0.6174, + "step": 10333 + }, + { + "epoch": 0.65, + "grad_norm": 0.9186480045318604, + "learning_rate": 2.8140889704310287e-06, + "loss": 0.5975, + "step": 10334 + }, + { + "epoch": 0.65, + "grad_norm": 0.8938761949539185, + "learning_rate": 2.8131662654848814e-06, + "loss": 0.5741, + "step": 10335 + }, + { + "epoch": 0.65, + "grad_norm": 0.8748285174369812, + "learning_rate": 2.8122436526186935e-06, + "loss": 0.6341, + "step": 10336 + }, + { + "epoch": 0.65, + "grad_norm": 0.8347454071044922, + "learning_rate": 2.8113211318713146e-06, + "loss": 0.6091, + "step": 10337 + }, + { + "epoch": 0.65, + "grad_norm": 0.8568246364593506, + "learning_rate": 2.810398703281589e-06, + "loss": 0.5535, + "step": 10338 + }, + { + "epoch": 0.66, + "grad_norm": 0.9079662561416626, + "learning_rate": 2.8094763668883567e-06, + "loss": 0.5603, + "step": 10339 + }, + { + "epoch": 0.66, + "grad_norm": 0.9075840711593628, + "learning_rate": 2.808554122730457e-06, + "loss": 0.5642, + "step": 10340 + }, + { + "epoch": 0.66, + "grad_norm": 0.8704594373703003, + "learning_rate": 2.8076319708467146e-06, + "loss": 0.5842, + "step": 10341 + }, + { + "epoch": 0.66, + "grad_norm": 0.9282211661338806, + "learning_rate": 2.8067099112759665e-06, + "loss": 0.5972, + "step": 10342 + }, + { + "epoch": 0.66, + "grad_norm": 0.8854076266288757, + "learning_rate": 2.8057879440570356e-06, + "loss": 0.5837, + "step": 10343 + }, + { + "epoch": 0.66, + "grad_norm": 0.8140289783477783, + "learning_rate": 2.804866069228739e-06, + "loss": 0.5405, + "step": 10344 + }, + { + "epoch": 0.66, + "grad_norm": 0.9335722923278809, + "learning_rate": 2.803944286829896e-06, + "loss": 0.5488, + "step": 10345 + }, + { + "epoch": 0.66, + "grad_norm": 0.8789125084877014, + "learning_rate": 2.8030225968993198e-06, + "loss": 0.5977, + "step": 10346 + }, + { + "epoch": 0.66, + "grad_norm": 0.9545979499816895, + "learning_rate": 2.802100999475819e-06, + "loss": 0.5622, + "step": 10347 + }, + { + "epoch": 0.66, + "grad_norm": 0.9649593830108643, + "learning_rate": 2.8011794945982013e-06, + "loss": 0.6229, + "step": 10348 + }, + { + "epoch": 0.66, + "grad_norm": 0.8558527827262878, + "learning_rate": 2.8002580823052638e-06, + "loss": 0.5659, + "step": 10349 + }, + { + "epoch": 0.66, + "grad_norm": 0.9221006631851196, + "learning_rate": 2.7993367626358047e-06, + "loss": 0.5421, + "step": 10350 + }, + { + "epoch": 0.66, + "grad_norm": 0.8340117335319519, + "learning_rate": 2.7984155356286224e-06, + "loss": 0.5119, + "step": 10351 + }, + { + "epoch": 0.66, + "grad_norm": 0.8941150903701782, + "learning_rate": 2.7974944013225013e-06, + "loss": 0.5676, + "step": 10352 + }, + { + "epoch": 0.66, + "grad_norm": 0.857522189617157, + "learning_rate": 2.796573359756229e-06, + "loss": 0.5856, + "step": 10353 + }, + { + "epoch": 0.66, + "grad_norm": 0.986824631690979, + "learning_rate": 2.7956524109685874e-06, + "loss": 0.6455, + "step": 10354 + }, + { + "epoch": 0.66, + "grad_norm": 0.8224316239356995, + "learning_rate": 2.7947315549983545e-06, + "loss": 0.584, + "step": 10355 + }, + { + "epoch": 0.66, + "grad_norm": 0.8816094994544983, + "learning_rate": 2.793810791884306e-06, + "loss": 0.5838, + "step": 10356 + }, + { + "epoch": 0.66, + "grad_norm": 0.8999599814414978, + "learning_rate": 2.792890121665208e-06, + "loss": 0.5797, + "step": 10357 + }, + { + "epoch": 0.66, + "grad_norm": 0.9199798703193665, + "learning_rate": 2.791969544379828e-06, + "loss": 0.606, + "step": 10358 + }, + { + "epoch": 0.66, + "grad_norm": 0.8767827153205872, + "learning_rate": 2.7910490600669327e-06, + "loss": 0.5771, + "step": 10359 + }, + { + "epoch": 0.66, + "grad_norm": 0.8857783675193787, + "learning_rate": 2.790128668765275e-06, + "loss": 0.6235, + "step": 10360 + }, + { + "epoch": 0.66, + "grad_norm": 0.873058021068573, + "learning_rate": 2.789208370513612e-06, + "loss": 0.5137, + "step": 10361 + }, + { + "epoch": 0.66, + "grad_norm": 0.9512156248092651, + "learning_rate": 2.7882881653506947e-06, + "loss": 0.6124, + "step": 10362 + }, + { + "epoch": 0.66, + "grad_norm": 0.8597283959388733, + "learning_rate": 2.787368053315266e-06, + "loss": 0.5774, + "step": 10363 + }, + { + "epoch": 0.66, + "grad_norm": 0.9025830626487732, + "learning_rate": 2.7864480344460743e-06, + "loss": 0.5343, + "step": 10364 + }, + { + "epoch": 0.66, + "grad_norm": 0.9169187545776367, + "learning_rate": 2.7855281087818543e-06, + "loss": 0.5986, + "step": 10365 + }, + { + "epoch": 0.66, + "grad_norm": 0.8351139426231384, + "learning_rate": 2.7846082763613412e-06, + "loss": 0.5107, + "step": 10366 + }, + { + "epoch": 0.66, + "grad_norm": 0.9500547647476196, + "learning_rate": 2.783688537223268e-06, + "loss": 0.6406, + "step": 10367 + }, + { + "epoch": 0.66, + "grad_norm": 0.9017059803009033, + "learning_rate": 2.7827688914063596e-06, + "loss": 0.6178, + "step": 10368 + }, + { + "epoch": 0.66, + "grad_norm": 0.9711951613426208, + "learning_rate": 2.78184933894934e-06, + "loss": 0.5575, + "step": 10369 + }, + { + "epoch": 0.66, + "grad_norm": 0.8450467586517334, + "learning_rate": 2.780929879890931e-06, + "loss": 0.5841, + "step": 10370 + }, + { + "epoch": 0.66, + "grad_norm": 0.9280916452407837, + "learning_rate": 2.780010514269841e-06, + "loss": 0.5718, + "step": 10371 + }, + { + "epoch": 0.66, + "grad_norm": 0.9153168201446533, + "learning_rate": 2.7790912421247883e-06, + "loss": 0.6029, + "step": 10372 + }, + { + "epoch": 0.66, + "grad_norm": 0.9410317540168762, + "learning_rate": 2.7781720634944766e-06, + "loss": 0.6092, + "step": 10373 + }, + { + "epoch": 0.66, + "grad_norm": 0.8701797723770142, + "learning_rate": 2.77725297841761e-06, + "loss": 0.5228, + "step": 10374 + }, + { + "epoch": 0.66, + "grad_norm": 0.8795192241668701, + "learning_rate": 2.7763339869328897e-06, + "loss": 0.5848, + "step": 10375 + }, + { + "epoch": 0.66, + "grad_norm": 0.920274555683136, + "learning_rate": 2.7754150890790067e-06, + "loss": 0.5968, + "step": 10376 + }, + { + "epoch": 0.66, + "grad_norm": 0.8954097032546997, + "learning_rate": 2.7744962848946565e-06, + "loss": 0.5599, + "step": 10377 + }, + { + "epoch": 0.66, + "grad_norm": 0.9171625375747681, + "learning_rate": 2.7735775744185276e-06, + "loss": 0.5803, + "step": 10378 + }, + { + "epoch": 0.66, + "grad_norm": 0.8371365666389465, + "learning_rate": 2.7726589576893004e-06, + "loss": 0.5397, + "step": 10379 + }, + { + "epoch": 0.66, + "grad_norm": 0.8804381489753723, + "learning_rate": 2.7717404347456567e-06, + "loss": 0.5273, + "step": 10380 + }, + { + "epoch": 0.66, + "grad_norm": 0.8363378643989563, + "learning_rate": 2.7708220056262706e-06, + "loss": 0.578, + "step": 10381 + }, + { + "epoch": 0.66, + "grad_norm": 0.9080025553703308, + "learning_rate": 2.7699036703698158e-06, + "loss": 0.5766, + "step": 10382 + }, + { + "epoch": 0.66, + "grad_norm": 0.9054446220397949, + "learning_rate": 2.7689854290149608e-06, + "loss": 0.6039, + "step": 10383 + }, + { + "epoch": 0.66, + "grad_norm": 0.8597883582115173, + "learning_rate": 2.768067281600365e-06, + "loss": 0.5887, + "step": 10384 + }, + { + "epoch": 0.66, + "grad_norm": 0.939932644367218, + "learning_rate": 2.7671492281646937e-06, + "loss": 0.5608, + "step": 10385 + }, + { + "epoch": 0.66, + "grad_norm": 0.8852954506874084, + "learning_rate": 2.7662312687466026e-06, + "loss": 0.5328, + "step": 10386 + }, + { + "epoch": 0.66, + "grad_norm": 0.8315883278846741, + "learning_rate": 2.7653134033847393e-06, + "loss": 0.5873, + "step": 10387 + }, + { + "epoch": 0.66, + "grad_norm": 0.8988177180290222, + "learning_rate": 2.7643956321177558e-06, + "loss": 0.5822, + "step": 10388 + }, + { + "epoch": 0.66, + "grad_norm": 0.8246173858642578, + "learning_rate": 2.763477954984295e-06, + "loss": 0.5949, + "step": 10389 + }, + { + "epoch": 0.66, + "grad_norm": 0.8828296065330505, + "learning_rate": 2.7625603720229964e-06, + "loss": 0.5875, + "step": 10390 + }, + { + "epoch": 0.66, + "grad_norm": 0.9308893084526062, + "learning_rate": 2.7616428832724983e-06, + "loss": 0.6503, + "step": 10391 + }, + { + "epoch": 0.66, + "grad_norm": 0.914340078830719, + "learning_rate": 2.760725488771433e-06, + "loss": 0.6379, + "step": 10392 + }, + { + "epoch": 0.66, + "grad_norm": 0.8978453874588013, + "learning_rate": 2.7598081885584237e-06, + "loss": 0.5879, + "step": 10393 + }, + { + "epoch": 0.66, + "grad_norm": 0.9084619283676147, + "learning_rate": 2.758890982672102e-06, + "loss": 0.5752, + "step": 10394 + }, + { + "epoch": 0.66, + "grad_norm": 0.8810911774635315, + "learning_rate": 2.757973871151083e-06, + "loss": 0.6372, + "step": 10395 + }, + { + "epoch": 0.66, + "grad_norm": 0.8374783992767334, + "learning_rate": 2.757056854033985e-06, + "loss": 0.5807, + "step": 10396 + }, + { + "epoch": 0.66, + "grad_norm": 0.9119501709938049, + "learning_rate": 2.7561399313594205e-06, + "loss": 0.586, + "step": 10397 + }, + { + "epoch": 0.66, + "grad_norm": 1.0351508855819702, + "learning_rate": 2.7552231031659972e-06, + "loss": 0.5944, + "step": 10398 + }, + { + "epoch": 0.66, + "grad_norm": 0.9276666045188904, + "learning_rate": 2.75430636949232e-06, + "loss": 0.6075, + "step": 10399 + }, + { + "epoch": 0.66, + "grad_norm": 0.851760745048523, + "learning_rate": 2.753389730376992e-06, + "loss": 0.5774, + "step": 10400 + }, + { + "epoch": 0.66, + "grad_norm": 0.8853036165237427, + "learning_rate": 2.752473185858603e-06, + "loss": 0.581, + "step": 10401 + }, + { + "epoch": 0.66, + "grad_norm": 0.9542864561080933, + "learning_rate": 2.7515567359757526e-06, + "loss": 0.6231, + "step": 10402 + }, + { + "epoch": 0.66, + "grad_norm": 0.8955079317092896, + "learning_rate": 2.750640380767025e-06, + "loss": 0.5894, + "step": 10403 + }, + { + "epoch": 0.66, + "grad_norm": 0.9134573340415955, + "learning_rate": 2.7497241202710056e-06, + "loss": 0.5966, + "step": 10404 + }, + { + "epoch": 0.66, + "grad_norm": 0.8695476651191711, + "learning_rate": 2.7488079545262757e-06, + "loss": 0.6017, + "step": 10405 + }, + { + "epoch": 0.66, + "grad_norm": 0.9271215796470642, + "learning_rate": 2.747891883571412e-06, + "loss": 0.6063, + "step": 10406 + }, + { + "epoch": 0.66, + "grad_norm": 0.8371964693069458, + "learning_rate": 2.746975907444986e-06, + "loss": 0.5274, + "step": 10407 + }, + { + "epoch": 0.66, + "grad_norm": 0.9001272320747375, + "learning_rate": 2.7460600261855687e-06, + "loss": 0.6252, + "step": 10408 + }, + { + "epoch": 0.66, + "grad_norm": 0.8996703028678894, + "learning_rate": 2.7451442398317206e-06, + "loss": 0.6346, + "step": 10409 + }, + { + "epoch": 0.66, + "grad_norm": 0.8631662130355835, + "learning_rate": 2.7442285484220055e-06, + "loss": 0.5648, + "step": 10410 + }, + { + "epoch": 0.66, + "grad_norm": 0.9067828059196472, + "learning_rate": 2.7433129519949784e-06, + "loss": 0.596, + "step": 10411 + }, + { + "epoch": 0.66, + "grad_norm": 0.8877487182617188, + "learning_rate": 2.742397450589193e-06, + "loss": 0.6091, + "step": 10412 + }, + { + "epoch": 0.66, + "grad_norm": 0.8369250893592834, + "learning_rate": 2.7414820442431976e-06, + "loss": 0.5867, + "step": 10413 + }, + { + "epoch": 0.66, + "grad_norm": 0.9432762265205383, + "learning_rate": 2.7405667329955344e-06, + "loss": 0.6184, + "step": 10414 + }, + { + "epoch": 0.66, + "grad_norm": 0.8785738348960876, + "learning_rate": 2.739651516884747e-06, + "loss": 0.5174, + "step": 10415 + }, + { + "epoch": 0.66, + "grad_norm": 0.8684585690498352, + "learning_rate": 2.7387363959493733e-06, + "loss": 0.5862, + "step": 10416 + }, + { + "epoch": 0.66, + "grad_norm": 0.8876842856407166, + "learning_rate": 2.737821370227942e-06, + "loss": 0.6136, + "step": 10417 + }, + { + "epoch": 0.66, + "grad_norm": 0.9569928050041199, + "learning_rate": 2.7369064397589828e-06, + "loss": 0.5912, + "step": 10418 + }, + { + "epoch": 0.66, + "grad_norm": 0.8708109259605408, + "learning_rate": 2.7359916045810207e-06, + "loss": 0.5547, + "step": 10419 + }, + { + "epoch": 0.66, + "grad_norm": 0.8798702359199524, + "learning_rate": 2.7350768647325766e-06, + "loss": 0.5619, + "step": 10420 + }, + { + "epoch": 0.66, + "grad_norm": 0.8539235591888428, + "learning_rate": 2.734162220252168e-06, + "loss": 0.5034, + "step": 10421 + }, + { + "epoch": 0.66, + "grad_norm": 0.9067310094833374, + "learning_rate": 2.7332476711783044e-06, + "loss": 0.6071, + "step": 10422 + }, + { + "epoch": 0.66, + "grad_norm": 0.8697945475578308, + "learning_rate": 2.732333217549494e-06, + "loss": 0.598, + "step": 10423 + }, + { + "epoch": 0.66, + "grad_norm": 0.8847575187683105, + "learning_rate": 2.7314188594042466e-06, + "loss": 0.5674, + "step": 10424 + }, + { + "epoch": 0.66, + "grad_norm": 0.9137183427810669, + "learning_rate": 2.7305045967810585e-06, + "loss": 0.5582, + "step": 10425 + }, + { + "epoch": 0.66, + "grad_norm": 0.8915376663208008, + "learning_rate": 2.7295904297184262e-06, + "loss": 0.5746, + "step": 10426 + }, + { + "epoch": 0.66, + "grad_norm": 0.8941647410392761, + "learning_rate": 2.7286763582548424e-06, + "loss": 0.5729, + "step": 10427 + }, + { + "epoch": 0.66, + "grad_norm": 0.9105641841888428, + "learning_rate": 2.7277623824287957e-06, + "loss": 0.5698, + "step": 10428 + }, + { + "epoch": 0.66, + "grad_norm": 0.8876394033432007, + "learning_rate": 2.726848502278773e-06, + "loss": 0.645, + "step": 10429 + }, + { + "epoch": 0.66, + "grad_norm": 0.9088033437728882, + "learning_rate": 2.7259347178432493e-06, + "loss": 0.5629, + "step": 10430 + }, + { + "epoch": 0.66, + "grad_norm": 0.910689115524292, + "learning_rate": 2.7250210291607026e-06, + "loss": 0.5819, + "step": 10431 + }, + { + "epoch": 0.66, + "grad_norm": 0.8679473996162415, + "learning_rate": 2.7241074362696108e-06, + "loss": 0.5183, + "step": 10432 + }, + { + "epoch": 0.66, + "grad_norm": 0.9289723634719849, + "learning_rate": 2.7231939392084347e-06, + "loss": 0.6181, + "step": 10433 + }, + { + "epoch": 0.66, + "grad_norm": 0.895182728767395, + "learning_rate": 2.7222805380156414e-06, + "loss": 0.6143, + "step": 10434 + }, + { + "epoch": 0.66, + "grad_norm": 0.8651720881462097, + "learning_rate": 2.7213672327296914e-06, + "loss": 0.58, + "step": 10435 + }, + { + "epoch": 0.66, + "grad_norm": 0.8684262037277222, + "learning_rate": 2.72045402338904e-06, + "loss": 0.5928, + "step": 10436 + }, + { + "epoch": 0.66, + "grad_norm": 0.9024814367294312, + "learning_rate": 2.719540910032142e-06, + "loss": 0.5643, + "step": 10437 + }, + { + "epoch": 0.66, + "grad_norm": 0.9180070161819458, + "learning_rate": 2.7186278926974406e-06, + "loss": 0.5972, + "step": 10438 + }, + { + "epoch": 0.66, + "grad_norm": 0.867103099822998, + "learning_rate": 2.717714971423383e-06, + "loss": 0.5539, + "step": 10439 + }, + { + "epoch": 0.66, + "grad_norm": 0.864017903804779, + "learning_rate": 2.7168021462484084e-06, + "loss": 0.5505, + "step": 10440 + }, + { + "epoch": 0.66, + "grad_norm": 0.8561496138572693, + "learning_rate": 2.715889417210953e-06, + "loss": 0.586, + "step": 10441 + }, + { + "epoch": 0.66, + "grad_norm": 0.874715268611908, + "learning_rate": 2.714976784349448e-06, + "loss": 0.5803, + "step": 10442 + }, + { + "epoch": 0.66, + "grad_norm": 0.9094971418380737, + "learning_rate": 2.7140642477023237e-06, + "loss": 0.5973, + "step": 10443 + }, + { + "epoch": 0.66, + "grad_norm": 0.9093654155731201, + "learning_rate": 2.7131518073079976e-06, + "loss": 0.6123, + "step": 10444 + }, + { + "epoch": 0.66, + "grad_norm": 0.8765634894371033, + "learning_rate": 2.7122394632048974e-06, + "loss": 0.5875, + "step": 10445 + }, + { + "epoch": 0.66, + "grad_norm": 0.9068828821182251, + "learning_rate": 2.7113272154314328e-06, + "loss": 0.6061, + "step": 10446 + }, + { + "epoch": 0.66, + "grad_norm": 0.8776718378067017, + "learning_rate": 2.710415064026018e-06, + "loss": 0.5145, + "step": 10447 + }, + { + "epoch": 0.66, + "grad_norm": 0.8980036377906799, + "learning_rate": 2.7095030090270596e-06, + "loss": 0.5571, + "step": 10448 + }, + { + "epoch": 0.66, + "grad_norm": 0.837546706199646, + "learning_rate": 2.7085910504729617e-06, + "loss": 0.6024, + "step": 10449 + }, + { + "epoch": 0.66, + "grad_norm": 0.8471895456314087, + "learning_rate": 2.7076791884021236e-06, + "loss": 0.5205, + "step": 10450 + }, + { + "epoch": 0.66, + "grad_norm": 0.882883608341217, + "learning_rate": 2.7067674228529417e-06, + "loss": 0.5888, + "step": 10451 + }, + { + "epoch": 0.66, + "grad_norm": 0.8597538471221924, + "learning_rate": 2.7058557538638026e-06, + "loss": 0.555, + "step": 10452 + }, + { + "epoch": 0.66, + "grad_norm": 0.8812461495399475, + "learning_rate": 2.7049441814731007e-06, + "loss": 0.5738, + "step": 10453 + }, + { + "epoch": 0.66, + "grad_norm": 0.8679195642471313, + "learning_rate": 2.704032705719214e-06, + "loss": 0.5855, + "step": 10454 + }, + { + "epoch": 0.66, + "grad_norm": 0.9121565818786621, + "learning_rate": 2.703121326640522e-06, + "loss": 0.6203, + "step": 10455 + }, + { + "epoch": 0.66, + "grad_norm": 0.8402708768844604, + "learning_rate": 2.702210044275401e-06, + "loss": 0.5068, + "step": 10456 + }, + { + "epoch": 0.66, + "grad_norm": 0.9229235053062439, + "learning_rate": 2.7012988586622224e-06, + "loss": 0.5719, + "step": 10457 + }, + { + "epoch": 0.66, + "grad_norm": 0.874308168888092, + "learning_rate": 2.7003877698393512e-06, + "loss": 0.5587, + "step": 10458 + }, + { + "epoch": 0.66, + "grad_norm": 0.8813081383705139, + "learning_rate": 2.6994767778451535e-06, + "loss": 0.5455, + "step": 10459 + }, + { + "epoch": 0.66, + "grad_norm": 0.8940520286560059, + "learning_rate": 2.6985658827179845e-06, + "loss": 0.5927, + "step": 10460 + }, + { + "epoch": 0.66, + "grad_norm": 0.8507505059242249, + "learning_rate": 2.6976550844961992e-06, + "loss": 0.5388, + "step": 10461 + }, + { + "epoch": 0.66, + "grad_norm": 0.9301406741142273, + "learning_rate": 2.6967443832181496e-06, + "loss": 0.57, + "step": 10462 + }, + { + "epoch": 0.66, + "grad_norm": 0.8438676595687866, + "learning_rate": 2.6958337789221813e-06, + "loss": 0.555, + "step": 10463 + }, + { + "epoch": 0.66, + "grad_norm": 0.9643988609313965, + "learning_rate": 2.694923271646637e-06, + "loss": 0.5663, + "step": 10464 + }, + { + "epoch": 0.66, + "grad_norm": 0.9135273098945618, + "learning_rate": 2.694012861429855e-06, + "loss": 0.603, + "step": 10465 + }, + { + "epoch": 0.66, + "grad_norm": 0.9375592470169067, + "learning_rate": 2.693102548310169e-06, + "loss": 0.5981, + "step": 10466 + }, + { + "epoch": 0.66, + "grad_norm": 0.8663008809089661, + "learning_rate": 2.6921923323259124e-06, + "loss": 0.5962, + "step": 10467 + }, + { + "epoch": 0.66, + "grad_norm": 0.9672373533248901, + "learning_rate": 2.691282213515406e-06, + "loss": 0.5845, + "step": 10468 + }, + { + "epoch": 0.66, + "grad_norm": 0.8220438957214355, + "learning_rate": 2.690372191916974e-06, + "loss": 0.57, + "step": 10469 + }, + { + "epoch": 0.66, + "grad_norm": 0.905386745929718, + "learning_rate": 2.6894622675689345e-06, + "loss": 0.6044, + "step": 10470 + }, + { + "epoch": 0.66, + "grad_norm": 0.860525906085968, + "learning_rate": 2.6885524405096007e-06, + "loss": 0.6008, + "step": 10471 + }, + { + "epoch": 0.66, + "grad_norm": 0.9347862005233765, + "learning_rate": 2.687642710777284e-06, + "loss": 0.6183, + "step": 10472 + }, + { + "epoch": 0.66, + "grad_norm": 0.8891615867614746, + "learning_rate": 2.6867330784102896e-06, + "loss": 0.5547, + "step": 10473 + }, + { + "epoch": 0.66, + "grad_norm": 0.9229059815406799, + "learning_rate": 2.6858235434469138e-06, + "loss": 0.6138, + "step": 10474 + }, + { + "epoch": 0.66, + "grad_norm": 0.8102059364318848, + "learning_rate": 2.684914105925463e-06, + "loss": 0.5539, + "step": 10475 + }, + { + "epoch": 0.66, + "grad_norm": 0.8751254081726074, + "learning_rate": 2.6840047658842226e-06, + "loss": 0.5189, + "step": 10476 + }, + { + "epoch": 0.66, + "grad_norm": 0.9399062991142273, + "learning_rate": 2.683095523361486e-06, + "loss": 0.6127, + "step": 10477 + }, + { + "epoch": 0.66, + "grad_norm": 0.9292119145393372, + "learning_rate": 2.682186378395536e-06, + "loss": 0.6257, + "step": 10478 + }, + { + "epoch": 0.66, + "grad_norm": 0.9873320460319519, + "learning_rate": 2.6812773310246547e-06, + "loss": 0.5942, + "step": 10479 + }, + { + "epoch": 0.66, + "grad_norm": 0.8316569328308105, + "learning_rate": 2.680368381287119e-06, + "loss": 0.5663, + "step": 10480 + }, + { + "epoch": 0.66, + "grad_norm": 0.893159031867981, + "learning_rate": 2.6794595292212035e-06, + "loss": 0.5561, + "step": 10481 + }, + { + "epoch": 0.66, + "grad_norm": 0.905292272567749, + "learning_rate": 2.67855077486517e-06, + "loss": 0.5821, + "step": 10482 + }, + { + "epoch": 0.66, + "grad_norm": 0.9160034656524658, + "learning_rate": 2.677642118257292e-06, + "loss": 0.6112, + "step": 10483 + }, + { + "epoch": 0.66, + "grad_norm": 0.88798987865448, + "learning_rate": 2.6767335594358234e-06, + "loss": 0.6043, + "step": 10484 + }, + { + "epoch": 0.66, + "grad_norm": 0.8431712985038757, + "learning_rate": 2.675825098439023e-06, + "loss": 0.5438, + "step": 10485 + }, + { + "epoch": 0.66, + "grad_norm": 0.9120664596557617, + "learning_rate": 2.6749167353051443e-06, + "loss": 0.6029, + "step": 10486 + }, + { + "epoch": 0.66, + "grad_norm": 0.8885997533798218, + "learning_rate": 2.674008470072429e-06, + "loss": 0.5532, + "step": 10487 + }, + { + "epoch": 0.66, + "grad_norm": 0.9783884286880493, + "learning_rate": 2.673100302779128e-06, + "loss": 0.5793, + "step": 10488 + }, + { + "epoch": 0.66, + "grad_norm": 0.8485262393951416, + "learning_rate": 2.6721922334634804e-06, + "loss": 0.5563, + "step": 10489 + }, + { + "epoch": 0.66, + "grad_norm": 0.896809458732605, + "learning_rate": 2.671284262163718e-06, + "loss": 0.5856, + "step": 10490 + }, + { + "epoch": 0.66, + "grad_norm": 0.8948637843132019, + "learning_rate": 2.6703763889180746e-06, + "loss": 0.5547, + "step": 10491 + }, + { + "epoch": 0.66, + "grad_norm": 0.8857586979866028, + "learning_rate": 2.6694686137647767e-06, + "loss": 0.5937, + "step": 10492 + }, + { + "epoch": 0.66, + "grad_norm": 0.8958655595779419, + "learning_rate": 2.668560936742048e-06, + "loss": 0.5439, + "step": 10493 + }, + { + "epoch": 0.66, + "grad_norm": 0.8610227704048157, + "learning_rate": 2.6676533578881102e-06, + "loss": 0.5449, + "step": 10494 + }, + { + "epoch": 0.66, + "grad_norm": 0.8370438814163208, + "learning_rate": 2.6667458772411724e-06, + "loss": 0.5593, + "step": 10495 + }, + { + "epoch": 0.66, + "grad_norm": 0.886195182800293, + "learning_rate": 2.66583849483945e-06, + "loss": 0.6025, + "step": 10496 + }, + { + "epoch": 0.67, + "grad_norm": 0.8641106486320496, + "learning_rate": 2.664931210721151e-06, + "loss": 0.5801, + "step": 10497 + }, + { + "epoch": 0.67, + "grad_norm": 0.8426538109779358, + "learning_rate": 2.6640240249244744e-06, + "loss": 0.5569, + "step": 10498 + }, + { + "epoch": 0.67, + "grad_norm": 0.8817174434661865, + "learning_rate": 2.6631169374876185e-06, + "loss": 0.562, + "step": 10499 + }, + { + "epoch": 0.67, + "grad_norm": 0.8603051900863647, + "learning_rate": 2.6622099484487794e-06, + "loss": 0.5917, + "step": 10500 + }, + { + "epoch": 0.67, + "grad_norm": 0.9094916582107544, + "learning_rate": 2.6613030578461476e-06, + "loss": 0.5641, + "step": 10501 + }, + { + "epoch": 0.67, + "grad_norm": 0.8471028804779053, + "learning_rate": 2.6603962657179094e-06, + "loss": 0.5664, + "step": 10502 + }, + { + "epoch": 0.67, + "grad_norm": 0.9582904577255249, + "learning_rate": 2.6594895721022436e-06, + "loss": 0.651, + "step": 10503 + }, + { + "epoch": 0.67, + "grad_norm": 0.8720226287841797, + "learning_rate": 2.6585829770373286e-06, + "loss": 0.5729, + "step": 10504 + }, + { + "epoch": 0.67, + "grad_norm": 1.0530695915222168, + "learning_rate": 2.657676480561342e-06, + "loss": 0.6192, + "step": 10505 + }, + { + "epoch": 0.67, + "grad_norm": 0.8338209986686707, + "learning_rate": 2.6567700827124494e-06, + "loss": 0.5738, + "step": 10506 + }, + { + "epoch": 0.67, + "grad_norm": 0.9002853631973267, + "learning_rate": 2.655863783528817e-06, + "loss": 0.5746, + "step": 10507 + }, + { + "epoch": 0.67, + "grad_norm": 0.9461910128593445, + "learning_rate": 2.6549575830486053e-06, + "loss": 0.6294, + "step": 10508 + }, + { + "epoch": 0.67, + "grad_norm": 0.883553683757782, + "learning_rate": 2.6540514813099728e-06, + "loss": 0.6029, + "step": 10509 + }, + { + "epoch": 0.67, + "grad_norm": 0.9209686517715454, + "learning_rate": 2.6531454783510736e-06, + "loss": 0.6247, + "step": 10510 + }, + { + "epoch": 0.67, + "grad_norm": 0.9430029988288879, + "learning_rate": 2.6522395742100514e-06, + "loss": 0.6145, + "step": 10511 + }, + { + "epoch": 0.67, + "grad_norm": 0.8918984532356262, + "learning_rate": 2.651333768925052e-06, + "loss": 0.5791, + "step": 10512 + }, + { + "epoch": 0.67, + "grad_norm": 0.9535161256790161, + "learning_rate": 2.6504280625342203e-06, + "loss": 0.6567, + "step": 10513 + }, + { + "epoch": 0.67, + "grad_norm": 0.8918493390083313, + "learning_rate": 2.6495224550756888e-06, + "loss": 0.6135, + "step": 10514 + }, + { + "epoch": 0.67, + "grad_norm": 0.8543890118598938, + "learning_rate": 2.6486169465875887e-06, + "loss": 0.5809, + "step": 10515 + }, + { + "epoch": 0.67, + "grad_norm": 0.9117350578308105, + "learning_rate": 2.647711537108052e-06, + "loss": 0.5975, + "step": 10516 + }, + { + "epoch": 0.67, + "grad_norm": 0.9220753908157349, + "learning_rate": 2.6468062266751955e-06, + "loss": 0.5826, + "step": 10517 + }, + { + "epoch": 0.67, + "grad_norm": 0.8443688750267029, + "learning_rate": 2.6459010153271456e-06, + "loss": 0.5437, + "step": 10518 + }, + { + "epoch": 0.67, + "grad_norm": 0.9096937775611877, + "learning_rate": 2.6449959031020134e-06, + "loss": 0.5755, + "step": 10519 + }, + { + "epoch": 0.67, + "grad_norm": 0.9416838884353638, + "learning_rate": 2.6440908900379115e-06, + "loss": 0.5631, + "step": 10520 + }, + { + "epoch": 0.67, + "grad_norm": 0.855556309223175, + "learning_rate": 2.6431859761729462e-06, + "loss": 0.596, + "step": 10521 + }, + { + "epoch": 0.67, + "grad_norm": 0.8807106018066406, + "learning_rate": 2.6422811615452205e-06, + "loss": 0.5545, + "step": 10522 + }, + { + "epoch": 0.67, + "grad_norm": 0.8947232365608215, + "learning_rate": 2.6413764461928335e-06, + "loss": 0.5682, + "step": 10523 + }, + { + "epoch": 0.67, + "grad_norm": 0.7953035235404968, + "learning_rate": 2.6404718301538814e-06, + "loss": 0.6003, + "step": 10524 + }, + { + "epoch": 0.67, + "grad_norm": 0.880653977394104, + "learning_rate": 2.639567313466448e-06, + "loss": 0.5458, + "step": 10525 + }, + { + "epoch": 0.67, + "grad_norm": 0.8574607968330383, + "learning_rate": 2.6386628961686277e-06, + "loss": 0.5645, + "step": 10526 + }, + { + "epoch": 0.67, + "grad_norm": 0.9444485306739807, + "learning_rate": 2.6377585782984972e-06, + "loss": 0.572, + "step": 10527 + }, + { + "epoch": 0.67, + "grad_norm": 0.942674994468689, + "learning_rate": 2.636854359894134e-06, + "loss": 0.5551, + "step": 10528 + }, + { + "epoch": 0.67, + "grad_norm": 0.8828451633453369, + "learning_rate": 2.635950240993614e-06, + "loss": 0.6088, + "step": 10529 + }, + { + "epoch": 0.67, + "grad_norm": 0.9297851920127869, + "learning_rate": 2.635046221635005e-06, + "loss": 0.6244, + "step": 10530 + }, + { + "epoch": 0.67, + "grad_norm": 0.8250426054000854, + "learning_rate": 2.6341423018563727e-06, + "loss": 0.5795, + "step": 10531 + }, + { + "epoch": 0.67, + "grad_norm": 0.8772184252738953, + "learning_rate": 2.633238481695779e-06, + "loss": 0.5588, + "step": 10532 + }, + { + "epoch": 0.67, + "grad_norm": 0.8727168440818787, + "learning_rate": 2.6323347611912786e-06, + "loss": 0.5697, + "step": 10533 + }, + { + "epoch": 0.67, + "grad_norm": 0.9432665705680847, + "learning_rate": 2.6314311403809224e-06, + "loss": 0.6384, + "step": 10534 + }, + { + "epoch": 0.67, + "grad_norm": 0.8159708380699158, + "learning_rate": 2.630527619302765e-06, + "loss": 0.5078, + "step": 10535 + }, + { + "epoch": 0.67, + "grad_norm": 0.8695153594017029, + "learning_rate": 2.6296241979948455e-06, + "loss": 0.5961, + "step": 10536 + }, + { + "epoch": 0.67, + "grad_norm": 0.9193745255470276, + "learning_rate": 2.6287208764952045e-06, + "loss": 0.6085, + "step": 10537 + }, + { + "epoch": 0.67, + "grad_norm": 0.8767115473747253, + "learning_rate": 2.6278176548418783e-06, + "loss": 0.5908, + "step": 10538 + }, + { + "epoch": 0.67, + "grad_norm": 0.9035547971725464, + "learning_rate": 2.6269145330728985e-06, + "loss": 0.5646, + "step": 10539 + }, + { + "epoch": 0.67, + "grad_norm": 0.8972700238227844, + "learning_rate": 2.626011511226294e-06, + "loss": 0.5871, + "step": 10540 + }, + { + "epoch": 0.67, + "grad_norm": 0.9117726683616638, + "learning_rate": 2.625108589340085e-06, + "loss": 0.5854, + "step": 10541 + }, + { + "epoch": 0.67, + "grad_norm": 0.8025404810905457, + "learning_rate": 2.624205767452289e-06, + "loss": 0.5042, + "step": 10542 + }, + { + "epoch": 0.67, + "grad_norm": 0.9173393249511719, + "learning_rate": 2.623303045600928e-06, + "loss": 0.527, + "step": 10543 + }, + { + "epoch": 0.67, + "grad_norm": 0.9533704519271851, + "learning_rate": 2.622400423824005e-06, + "loss": 0.6099, + "step": 10544 + }, + { + "epoch": 0.67, + "grad_norm": 0.9197595119476318, + "learning_rate": 2.62149790215953e-06, + "loss": 0.5371, + "step": 10545 + }, + { + "epoch": 0.67, + "grad_norm": 0.8468542098999023, + "learning_rate": 2.6205954806455057e-06, + "loss": 0.5773, + "step": 10546 + }, + { + "epoch": 0.67, + "grad_norm": 0.8749061226844788, + "learning_rate": 2.6196931593199247e-06, + "loss": 0.5984, + "step": 10547 + }, + { + "epoch": 0.67, + "grad_norm": 0.9503610134124756, + "learning_rate": 2.618790938220788e-06, + "loss": 0.5798, + "step": 10548 + }, + { + "epoch": 0.67, + "grad_norm": 0.8924551010131836, + "learning_rate": 2.617888817386079e-06, + "loss": 0.5438, + "step": 10549 + }, + { + "epoch": 0.67, + "grad_norm": 0.9542580246925354, + "learning_rate": 2.6169867968537856e-06, + "loss": 0.5912, + "step": 10550 + }, + { + "epoch": 0.67, + "grad_norm": 0.8375207781791687, + "learning_rate": 2.616084876661888e-06, + "loss": 0.5299, + "step": 10551 + }, + { + "epoch": 0.67, + "grad_norm": 0.8957962989807129, + "learning_rate": 2.6151830568483627e-06, + "loss": 0.5979, + "step": 10552 + }, + { + "epoch": 0.67, + "grad_norm": 0.9477977156639099, + "learning_rate": 2.614281337451183e-06, + "loss": 0.5815, + "step": 10553 + }, + { + "epoch": 0.67, + "grad_norm": 0.8835856914520264, + "learning_rate": 2.61337971850832e-06, + "loss": 0.6355, + "step": 10554 + }, + { + "epoch": 0.67, + "grad_norm": 0.8357982039451599, + "learning_rate": 2.6124782000577296e-06, + "loss": 0.5298, + "step": 10555 + }, + { + "epoch": 0.67, + "grad_norm": 0.8705008625984192, + "learning_rate": 2.6115767821373807e-06, + "loss": 0.5278, + "step": 10556 + }, + { + "epoch": 0.67, + "grad_norm": 0.8560452461242676, + "learning_rate": 2.610675464785223e-06, + "loss": 0.6022, + "step": 10557 + }, + { + "epoch": 0.67, + "grad_norm": 0.8124215006828308, + "learning_rate": 2.6097742480392097e-06, + "loss": 0.5591, + "step": 10558 + }, + { + "epoch": 0.67, + "grad_norm": 0.865249752998352, + "learning_rate": 2.6088731319372874e-06, + "loss": 0.5972, + "step": 10559 + }, + { + "epoch": 0.67, + "grad_norm": 0.9022778868675232, + "learning_rate": 2.6079721165173994e-06, + "loss": 0.5793, + "step": 10560 + }, + { + "epoch": 0.67, + "grad_norm": 0.9692482352256775, + "learning_rate": 2.6070712018174847e-06, + "loss": 0.5374, + "step": 10561 + }, + { + "epoch": 0.67, + "grad_norm": 0.8842456340789795, + "learning_rate": 2.6061703878754784e-06, + "loss": 0.592, + "step": 10562 + }, + { + "epoch": 0.67, + "grad_norm": 0.8886352777481079, + "learning_rate": 2.6052696747293087e-06, + "loss": 0.5585, + "step": 10563 + }, + { + "epoch": 0.67, + "grad_norm": 0.9306212663650513, + "learning_rate": 2.6043690624169014e-06, + "loss": 0.6042, + "step": 10564 + }, + { + "epoch": 0.67, + "grad_norm": 0.9206665754318237, + "learning_rate": 2.6034685509761803e-06, + "loss": 0.5784, + "step": 10565 + }, + { + "epoch": 0.67, + "grad_norm": 0.9081207513809204, + "learning_rate": 2.602568140445061e-06, + "loss": 0.5858, + "step": 10566 + }, + { + "epoch": 0.67, + "grad_norm": 0.9279916882514954, + "learning_rate": 2.6016678308614583e-06, + "loss": 0.5536, + "step": 10567 + }, + { + "epoch": 0.67, + "grad_norm": 0.8986056447029114, + "learning_rate": 2.600767622263277e-06, + "loss": 0.5619, + "step": 10568 + }, + { + "epoch": 0.67, + "grad_norm": 0.9222875833511353, + "learning_rate": 2.599867514688427e-06, + "loss": 0.587, + "step": 10569 + }, + { + "epoch": 0.67, + "grad_norm": 0.880499005317688, + "learning_rate": 2.598967508174808e-06, + "loss": 0.5327, + "step": 10570 + }, + { + "epoch": 0.67, + "grad_norm": 0.8843125700950623, + "learning_rate": 2.598067602760313e-06, + "loss": 0.5566, + "step": 10571 + }, + { + "epoch": 0.67, + "grad_norm": 0.8370311856269836, + "learning_rate": 2.597167798482835e-06, + "loss": 0.5875, + "step": 10572 + }, + { + "epoch": 0.67, + "grad_norm": 0.9187718033790588, + "learning_rate": 2.596268095380263e-06, + "loss": 0.5995, + "step": 10573 + }, + { + "epoch": 0.67, + "grad_norm": 0.8443053364753723, + "learning_rate": 2.5953684934904788e-06, + "loss": 0.5484, + "step": 10574 + }, + { + "epoch": 0.67, + "grad_norm": 0.8494760990142822, + "learning_rate": 2.5944689928513643e-06, + "loss": 0.5316, + "step": 10575 + }, + { + "epoch": 0.67, + "grad_norm": 0.9031586647033691, + "learning_rate": 2.593569593500789e-06, + "loss": 0.5691, + "step": 10576 + }, + { + "epoch": 0.67, + "grad_norm": 0.8875634074211121, + "learning_rate": 2.592670295476628e-06, + "loss": 0.5378, + "step": 10577 + }, + { + "epoch": 0.67, + "grad_norm": 0.8447946310043335, + "learning_rate": 2.591771098816749e-06, + "loss": 0.6219, + "step": 10578 + }, + { + "epoch": 0.67, + "grad_norm": 0.8940092921257019, + "learning_rate": 2.5908720035590085e-06, + "loss": 0.5784, + "step": 10579 + }, + { + "epoch": 0.67, + "grad_norm": 0.8719146251678467, + "learning_rate": 2.5899730097412678e-06, + "loss": 0.5754, + "step": 10580 + }, + { + "epoch": 0.67, + "grad_norm": 0.8950543403625488, + "learning_rate": 2.58907411740138e-06, + "loss": 0.5846, + "step": 10581 + }, + { + "epoch": 0.67, + "grad_norm": 0.8580577373504639, + "learning_rate": 2.5881753265771938e-06, + "loss": 0.5897, + "step": 10582 + }, + { + "epoch": 0.67, + "grad_norm": 0.8067103028297424, + "learning_rate": 2.587276637306556e-06, + "loss": 0.5128, + "step": 10583 + }, + { + "epoch": 0.67, + "grad_norm": 0.8843825459480286, + "learning_rate": 2.586378049627304e-06, + "loss": 0.5823, + "step": 10584 + }, + { + "epoch": 0.67, + "grad_norm": 0.8826472759246826, + "learning_rate": 2.5854795635772743e-06, + "loss": 0.5843, + "step": 10585 + }, + { + "epoch": 0.67, + "grad_norm": 0.8553101420402527, + "learning_rate": 2.584581179194304e-06, + "loss": 0.5796, + "step": 10586 + }, + { + "epoch": 0.67, + "grad_norm": 0.8672010898590088, + "learning_rate": 2.5836828965162167e-06, + "loss": 0.5931, + "step": 10587 + }, + { + "epoch": 0.67, + "grad_norm": 0.927105188369751, + "learning_rate": 2.582784715580836e-06, + "loss": 0.5827, + "step": 10588 + }, + { + "epoch": 0.67, + "grad_norm": 0.8936177492141724, + "learning_rate": 2.581886636425983e-06, + "loss": 0.5892, + "step": 10589 + }, + { + "epoch": 0.67, + "grad_norm": 0.9585930109024048, + "learning_rate": 2.580988659089471e-06, + "loss": 0.6104, + "step": 10590 + }, + { + "epoch": 0.67, + "grad_norm": 0.957203209400177, + "learning_rate": 2.580090783609114e-06, + "loss": 0.6265, + "step": 10591 + }, + { + "epoch": 0.67, + "grad_norm": 0.8555622696876526, + "learning_rate": 2.5791930100227133e-06, + "loss": 0.5533, + "step": 10592 + }, + { + "epoch": 0.67, + "grad_norm": 0.8494757413864136, + "learning_rate": 2.5782953383680733e-06, + "loss": 0.5352, + "step": 10593 + }, + { + "epoch": 0.67, + "grad_norm": 0.8753517270088196, + "learning_rate": 2.5773977686829928e-06, + "loss": 0.5861, + "step": 10594 + }, + { + "epoch": 0.67, + "grad_norm": 0.8782363533973694, + "learning_rate": 2.5765003010052643e-06, + "loss": 0.55, + "step": 10595 + }, + { + "epoch": 0.67, + "grad_norm": 0.9527836441993713, + "learning_rate": 2.5756029353726777e-06, + "loss": 0.5831, + "step": 10596 + }, + { + "epoch": 0.67, + "grad_norm": 0.9011462926864624, + "learning_rate": 2.574705671823019e-06, + "loss": 0.6172, + "step": 10597 + }, + { + "epoch": 0.67, + "grad_norm": 0.935152530670166, + "learning_rate": 2.5738085103940634e-06, + "loss": 0.6276, + "step": 10598 + }, + { + "epoch": 0.67, + "grad_norm": 0.8869521021842957, + "learning_rate": 2.572911451123594e-06, + "loss": 0.6076, + "step": 10599 + }, + { + "epoch": 0.67, + "grad_norm": 0.918903648853302, + "learning_rate": 2.572014494049382e-06, + "loss": 0.5971, + "step": 10600 + }, + { + "epoch": 0.67, + "grad_norm": 0.8759111166000366, + "learning_rate": 2.571117639209191e-06, + "loss": 0.6048, + "step": 10601 + }, + { + "epoch": 0.67, + "grad_norm": 0.8874173164367676, + "learning_rate": 2.5702208866407873e-06, + "loss": 0.5135, + "step": 10602 + }, + { + "epoch": 0.67, + "grad_norm": 0.8090372681617737, + "learning_rate": 2.5693242363819292e-06, + "loss": 0.5409, + "step": 10603 + }, + { + "epoch": 0.67, + "grad_norm": 0.8543607592582703, + "learning_rate": 2.5684276884703717e-06, + "loss": 0.5645, + "step": 10604 + }, + { + "epoch": 0.67, + "grad_norm": 0.9561940431594849, + "learning_rate": 2.567531242943867e-06, + "loss": 0.6001, + "step": 10605 + }, + { + "epoch": 0.67, + "grad_norm": 0.9354879260063171, + "learning_rate": 2.5666348998401565e-06, + "loss": 0.5695, + "step": 10606 + }, + { + "epoch": 0.67, + "grad_norm": 0.8753422498703003, + "learning_rate": 2.565738659196987e-06, + "loss": 0.556, + "step": 10607 + }, + { + "epoch": 0.67, + "grad_norm": 0.9323849678039551, + "learning_rate": 2.5648425210520967e-06, + "loss": 0.6462, + "step": 10608 + }, + { + "epoch": 0.67, + "grad_norm": 0.8231973052024841, + "learning_rate": 2.563946485443214e-06, + "loss": 0.5039, + "step": 10609 + }, + { + "epoch": 0.67, + "grad_norm": 0.8657350540161133, + "learning_rate": 2.5630505524080707e-06, + "loss": 0.5553, + "step": 10610 + }, + { + "epoch": 0.67, + "grad_norm": 0.8768414258956909, + "learning_rate": 2.5621547219843905e-06, + "loss": 0.5786, + "step": 10611 + }, + { + "epoch": 0.67, + "grad_norm": 0.9179761409759521, + "learning_rate": 2.5612589942098952e-06, + "loss": 0.5519, + "step": 10612 + }, + { + "epoch": 0.67, + "grad_norm": 0.8903763294219971, + "learning_rate": 2.560363369122301e-06, + "loss": 0.6134, + "step": 10613 + }, + { + "epoch": 0.67, + "grad_norm": 0.9475022554397583, + "learning_rate": 2.559467846759317e-06, + "loss": 0.5948, + "step": 10614 + }, + { + "epoch": 0.67, + "grad_norm": 0.9176366329193115, + "learning_rate": 2.5585724271586505e-06, + "loss": 0.5935, + "step": 10615 + }, + { + "epoch": 0.67, + "grad_norm": 0.9265202283859253, + "learning_rate": 2.557677110358009e-06, + "loss": 0.5969, + "step": 10616 + }, + { + "epoch": 0.67, + "grad_norm": 0.8886149525642395, + "learning_rate": 2.556781896395087e-06, + "loss": 0.571, + "step": 10617 + }, + { + "epoch": 0.67, + "grad_norm": 0.942081093788147, + "learning_rate": 2.55588678530758e-06, + "loss": 0.5508, + "step": 10618 + }, + { + "epoch": 0.67, + "grad_norm": 0.9063771367073059, + "learning_rate": 2.5549917771331767e-06, + "loss": 0.5414, + "step": 10619 + }, + { + "epoch": 0.67, + "grad_norm": 0.8666827082633972, + "learning_rate": 2.5540968719095656e-06, + "loss": 0.5374, + "step": 10620 + }, + { + "epoch": 0.67, + "grad_norm": 0.9030107259750366, + "learning_rate": 2.5532020696744277e-06, + "loss": 0.5554, + "step": 10621 + }, + { + "epoch": 0.67, + "grad_norm": 0.9230242967605591, + "learning_rate": 2.5523073704654374e-06, + "loss": 0.6067, + "step": 10622 + }, + { + "epoch": 0.67, + "grad_norm": 0.9134321808815002, + "learning_rate": 2.5514127743202668e-06, + "loss": 0.5651, + "step": 10623 + }, + { + "epoch": 0.67, + "grad_norm": 0.8783094882965088, + "learning_rate": 2.5505182812765894e-06, + "loss": 0.5925, + "step": 10624 + }, + { + "epoch": 0.67, + "grad_norm": 0.9257774949073792, + "learning_rate": 2.549623891372065e-06, + "loss": 0.6196, + "step": 10625 + }, + { + "epoch": 0.67, + "grad_norm": 0.924429714679718, + "learning_rate": 2.5487296046443537e-06, + "loss": 0.5452, + "step": 10626 + }, + { + "epoch": 0.67, + "grad_norm": 0.9063295125961304, + "learning_rate": 2.547835421131114e-06, + "loss": 0.6303, + "step": 10627 + }, + { + "epoch": 0.67, + "grad_norm": 0.8978242874145508, + "learning_rate": 2.5469413408699894e-06, + "loss": 0.5459, + "step": 10628 + }, + { + "epoch": 0.67, + "grad_norm": 0.9666255712509155, + "learning_rate": 2.546047363898636e-06, + "loss": 0.6131, + "step": 10629 + }, + { + "epoch": 0.67, + "grad_norm": 0.9179185032844543, + "learning_rate": 2.545153490254689e-06, + "loss": 0.6264, + "step": 10630 + }, + { + "epoch": 0.67, + "grad_norm": 0.8218669295310974, + "learning_rate": 2.5442597199757896e-06, + "loss": 0.5331, + "step": 10631 + }, + { + "epoch": 0.67, + "grad_norm": 0.8918872475624084, + "learning_rate": 2.5433660530995696e-06, + "loss": 0.6075, + "step": 10632 + }, + { + "epoch": 0.67, + "grad_norm": 0.8965834379196167, + "learning_rate": 2.54247248966366e-06, + "loss": 0.5411, + "step": 10633 + }, + { + "epoch": 0.67, + "grad_norm": 0.9281273484230042, + "learning_rate": 2.5415790297056843e-06, + "loss": 0.545, + "step": 10634 + }, + { + "epoch": 0.67, + "grad_norm": 0.882722795009613, + "learning_rate": 2.5406856732632647e-06, + "loss": 0.6063, + "step": 10635 + }, + { + "epoch": 0.67, + "grad_norm": 0.8930636048316956, + "learning_rate": 2.539792420374013e-06, + "loss": 0.6173, + "step": 10636 + }, + { + "epoch": 0.67, + "grad_norm": 0.8792672753334045, + "learning_rate": 2.5388992710755477e-06, + "loss": 0.5656, + "step": 10637 + }, + { + "epoch": 0.67, + "grad_norm": 0.8245922327041626, + "learning_rate": 2.5380062254054706e-06, + "loss": 0.5317, + "step": 10638 + }, + { + "epoch": 0.67, + "grad_norm": 0.964028537273407, + "learning_rate": 2.5371132834013867e-06, + "loss": 0.5868, + "step": 10639 + }, + { + "epoch": 0.67, + "grad_norm": 0.8802077770233154, + "learning_rate": 2.5362204451008963e-06, + "loss": 0.5438, + "step": 10640 + }, + { + "epoch": 0.67, + "grad_norm": 0.8937103152275085, + "learning_rate": 2.5353277105415887e-06, + "loss": 0.6146, + "step": 10641 + }, + { + "epoch": 0.67, + "grad_norm": 1.144944667816162, + "learning_rate": 2.5344350797610597e-06, + "loss": 0.5291, + "step": 10642 + }, + { + "epoch": 0.67, + "grad_norm": 0.8854457139968872, + "learning_rate": 2.533542552796893e-06, + "loss": 0.5663, + "step": 10643 + }, + { + "epoch": 0.67, + "grad_norm": 0.9482905864715576, + "learning_rate": 2.5326501296866677e-06, + "loss": 0.5978, + "step": 10644 + }, + { + "epoch": 0.67, + "grad_norm": 0.8801560401916504, + "learning_rate": 2.531757810467963e-06, + "loss": 0.5489, + "step": 10645 + }, + { + "epoch": 0.67, + "grad_norm": 0.8408117890357971, + "learning_rate": 2.53086559517835e-06, + "loss": 0.5729, + "step": 10646 + }, + { + "epoch": 0.67, + "grad_norm": 0.8972226977348328, + "learning_rate": 2.529973483855397e-06, + "loss": 0.507, + "step": 10647 + }, + { + "epoch": 0.67, + "grad_norm": 0.8918501734733582, + "learning_rate": 2.52908147653667e-06, + "loss": 0.5221, + "step": 10648 + }, + { + "epoch": 0.67, + "grad_norm": 0.8490516543388367, + "learning_rate": 2.5281895732597227e-06, + "loss": 0.5688, + "step": 10649 + }, + { + "epoch": 0.67, + "grad_norm": 0.9360247850418091, + "learning_rate": 2.527297774062115e-06, + "loss": 0.5614, + "step": 10650 + }, + { + "epoch": 0.67, + "grad_norm": 0.8709716796875, + "learning_rate": 2.5264060789813994e-06, + "loss": 0.5421, + "step": 10651 + }, + { + "epoch": 0.67, + "grad_norm": 0.8740971088409424, + "learning_rate": 2.525514488055116e-06, + "loss": 0.5832, + "step": 10652 + }, + { + "epoch": 0.67, + "grad_norm": 0.9189413785934448, + "learning_rate": 2.5246230013208093e-06, + "loss": 0.6433, + "step": 10653 + }, + { + "epoch": 0.67, + "grad_norm": 0.7839402556419373, + "learning_rate": 2.5237316188160165e-06, + "loss": 0.4975, + "step": 10654 + }, + { + "epoch": 0.68, + "grad_norm": 0.9069191217422485, + "learning_rate": 2.522840340578272e-06, + "loss": 0.5909, + "step": 10655 + }, + { + "epoch": 0.68, + "grad_norm": 0.9287664294242859, + "learning_rate": 2.521949166645102e-06, + "loss": 0.6464, + "step": 10656 + }, + { + "epoch": 0.68, + "grad_norm": 0.8806740045547485, + "learning_rate": 2.5210580970540354e-06, + "loss": 0.5462, + "step": 10657 + }, + { + "epoch": 0.68, + "grad_norm": 0.8837103247642517, + "learning_rate": 2.5201671318425834e-06, + "loss": 0.5808, + "step": 10658 + }, + { + "epoch": 0.68, + "grad_norm": 0.9099284410476685, + "learning_rate": 2.519276271048272e-06, + "loss": 0.5988, + "step": 10659 + }, + { + "epoch": 0.68, + "grad_norm": 0.9354240894317627, + "learning_rate": 2.5183855147086045e-06, + "loss": 0.6039, + "step": 10660 + }, + { + "epoch": 0.68, + "grad_norm": 0.8624934554100037, + "learning_rate": 2.51749486286109e-06, + "loss": 0.5186, + "step": 10661 + }, + { + "epoch": 0.68, + "grad_norm": 0.8302717208862305, + "learning_rate": 2.516604315543231e-06, + "loss": 0.578, + "step": 10662 + }, + { + "epoch": 0.68, + "grad_norm": 0.9564114212989807, + "learning_rate": 2.515713872792525e-06, + "loss": 0.6205, + "step": 10663 + }, + { + "epoch": 0.68, + "grad_norm": 0.9036477208137512, + "learning_rate": 2.5148235346464654e-06, + "loss": 0.5378, + "step": 10664 + }, + { + "epoch": 0.68, + "grad_norm": 0.850906252861023, + "learning_rate": 2.5139333011425435e-06, + "loss": 0.5776, + "step": 10665 + }, + { + "epoch": 0.68, + "grad_norm": 0.8165357708930969, + "learning_rate": 2.5130431723182386e-06, + "loss": 0.5129, + "step": 10666 + }, + { + "epoch": 0.68, + "grad_norm": 0.9315398931503296, + "learning_rate": 2.512153148211038e-06, + "loss": 0.602, + "step": 10667 + }, + { + "epoch": 0.68, + "grad_norm": 0.97512286901474, + "learning_rate": 2.5112632288584116e-06, + "loss": 0.5975, + "step": 10668 + }, + { + "epoch": 0.68, + "grad_norm": 0.9549464583396912, + "learning_rate": 2.5103734142978325e-06, + "loss": 0.6342, + "step": 10669 + }, + { + "epoch": 0.68, + "grad_norm": 0.9069748520851135, + "learning_rate": 2.5094837045667684e-06, + "loss": 0.5671, + "step": 10670 + }, + { + "epoch": 0.68, + "grad_norm": 0.8227144479751587, + "learning_rate": 2.508594099702682e-06, + "loss": 0.5805, + "step": 10671 + }, + { + "epoch": 0.68, + "grad_norm": 0.9068019390106201, + "learning_rate": 2.5077045997430304e-06, + "loss": 0.5906, + "step": 10672 + }, + { + "epoch": 0.68, + "grad_norm": 0.9003850221633911, + "learning_rate": 2.5068152047252702e-06, + "loss": 0.6073, + "step": 10673 + }, + { + "epoch": 0.68, + "grad_norm": 0.9493726491928101, + "learning_rate": 2.5059259146868474e-06, + "loss": 0.5954, + "step": 10674 + }, + { + "epoch": 0.68, + "grad_norm": 0.8758067488670349, + "learning_rate": 2.5050367296652075e-06, + "loss": 0.5644, + "step": 10675 + }, + { + "epoch": 0.68, + "grad_norm": 0.9231355786323547, + "learning_rate": 2.504147649697791e-06, + "loss": 0.6068, + "step": 10676 + }, + { + "epoch": 0.68, + "grad_norm": 0.8375126123428345, + "learning_rate": 2.5032586748220354e-06, + "loss": 0.5932, + "step": 10677 + }, + { + "epoch": 0.68, + "grad_norm": 0.886325478553772, + "learning_rate": 2.5023698050753732e-06, + "loss": 0.6023, + "step": 10678 + }, + { + "epoch": 0.68, + "grad_norm": 0.8187273740768433, + "learning_rate": 2.5014810404952262e-06, + "loss": 0.5291, + "step": 10679 + }, + { + "epoch": 0.68, + "grad_norm": 0.8535604476928711, + "learning_rate": 2.5005923811190226e-06, + "loss": 0.5721, + "step": 10680 + }, + { + "epoch": 0.68, + "grad_norm": 0.8962329030036926, + "learning_rate": 2.4997038269841804e-06, + "loss": 0.5637, + "step": 10681 + }, + { + "epoch": 0.68, + "grad_norm": 0.9370246529579163, + "learning_rate": 2.498815378128111e-06, + "loss": 0.61, + "step": 10682 + }, + { + "epoch": 0.68, + "grad_norm": 0.8292384743690491, + "learning_rate": 2.497927034588225e-06, + "loss": 0.5253, + "step": 10683 + }, + { + "epoch": 0.68, + "grad_norm": 0.8883755803108215, + "learning_rate": 2.497038796401927e-06, + "loss": 0.5684, + "step": 10684 + }, + { + "epoch": 0.68, + "grad_norm": 0.833806037902832, + "learning_rate": 2.4961506636066185e-06, + "loss": 0.623, + "step": 10685 + }, + { + "epoch": 0.68, + "grad_norm": 0.9147443771362305, + "learning_rate": 2.495262636239697e-06, + "loss": 0.6173, + "step": 10686 + }, + { + "epoch": 0.68, + "grad_norm": 0.8724647760391235, + "learning_rate": 2.4943747143385503e-06, + "loss": 0.6106, + "step": 10687 + }, + { + "epoch": 0.68, + "grad_norm": 0.8868297934532166, + "learning_rate": 2.4934868979405667e-06, + "loss": 0.5995, + "step": 10688 + }, + { + "epoch": 0.68, + "grad_norm": 0.8661412000656128, + "learning_rate": 2.492599187083134e-06, + "loss": 0.6492, + "step": 10689 + }, + { + "epoch": 0.68, + "grad_norm": 0.8457236289978027, + "learning_rate": 2.491711581803625e-06, + "loss": 0.5432, + "step": 10690 + }, + { + "epoch": 0.68, + "grad_norm": 0.864971935749054, + "learning_rate": 2.490824082139415e-06, + "loss": 0.5379, + "step": 10691 + }, + { + "epoch": 0.68, + "grad_norm": 0.9583846926689148, + "learning_rate": 2.489936688127875e-06, + "loss": 0.6022, + "step": 10692 + }, + { + "epoch": 0.68, + "grad_norm": 0.8547648787498474, + "learning_rate": 2.4890493998063685e-06, + "loss": 0.5668, + "step": 10693 + }, + { + "epoch": 0.68, + "grad_norm": 0.8871789574623108, + "learning_rate": 2.4881622172122595e-06, + "loss": 0.6422, + "step": 10694 + }, + { + "epoch": 0.68, + "grad_norm": 0.9174278974533081, + "learning_rate": 2.4872751403828986e-06, + "loss": 0.6206, + "step": 10695 + }, + { + "epoch": 0.68, + "grad_norm": 0.9678024649620056, + "learning_rate": 2.4863881693556393e-06, + "loss": 0.5992, + "step": 10696 + }, + { + "epoch": 0.68, + "grad_norm": 0.8451238870620728, + "learning_rate": 2.4855013041678335e-06, + "loss": 0.544, + "step": 10697 + }, + { + "epoch": 0.68, + "grad_norm": 0.8987723588943481, + "learning_rate": 2.484614544856819e-06, + "loss": 0.5851, + "step": 10698 + }, + { + "epoch": 0.68, + "grad_norm": 0.8522927761077881, + "learning_rate": 2.483727891459935e-06, + "loss": 0.5805, + "step": 10699 + }, + { + "epoch": 0.68, + "grad_norm": 0.9000210762023926, + "learning_rate": 2.482841344014516e-06, + "loss": 0.5587, + "step": 10700 + }, + { + "epoch": 0.68, + "grad_norm": 0.8868560791015625, + "learning_rate": 2.4819549025578917e-06, + "loss": 0.5957, + "step": 10701 + }, + { + "epoch": 0.68, + "grad_norm": 0.8964491486549377, + "learning_rate": 2.481068567127389e-06, + "loss": 0.5865, + "step": 10702 + }, + { + "epoch": 0.68, + "grad_norm": 0.8719425797462463, + "learning_rate": 2.4801823377603236e-06, + "loss": 0.5505, + "step": 10703 + }, + { + "epoch": 0.68, + "grad_norm": 0.9729426503181458, + "learning_rate": 2.4792962144940148e-06, + "loss": 0.5863, + "step": 10704 + }, + { + "epoch": 0.68, + "grad_norm": 0.906240701675415, + "learning_rate": 2.4784101973657724e-06, + "loss": 0.6275, + "step": 10705 + }, + { + "epoch": 0.68, + "grad_norm": 0.8632292747497559, + "learning_rate": 2.4775242864129055e-06, + "loss": 0.5586, + "step": 10706 + }, + { + "epoch": 0.68, + "grad_norm": 0.9146695733070374, + "learning_rate": 2.4766384816727164e-06, + "loss": 0.5932, + "step": 10707 + }, + { + "epoch": 0.68, + "grad_norm": 0.8897523880004883, + "learning_rate": 2.475752783182504e-06, + "loss": 0.5804, + "step": 10708 + }, + { + "epoch": 0.68, + "grad_norm": 0.870412290096283, + "learning_rate": 2.4748671909795568e-06, + "loss": 0.5777, + "step": 10709 + }, + { + "epoch": 0.68, + "grad_norm": 0.8530032634735107, + "learning_rate": 2.4739817051011717e-06, + "loss": 0.5358, + "step": 10710 + }, + { + "epoch": 0.68, + "grad_norm": 0.8868164420127869, + "learning_rate": 2.473096325584628e-06, + "loss": 0.5883, + "step": 10711 + }, + { + "epoch": 0.68, + "grad_norm": 0.9638000130653381, + "learning_rate": 2.4722110524672074e-06, + "loss": 0.6171, + "step": 10712 + }, + { + "epoch": 0.68, + "grad_norm": 0.945044755935669, + "learning_rate": 2.4713258857861856e-06, + "loss": 0.5875, + "step": 10713 + }, + { + "epoch": 0.68, + "grad_norm": 0.8738934993743896, + "learning_rate": 2.4704408255788342e-06, + "loss": 0.6121, + "step": 10714 + }, + { + "epoch": 0.68, + "grad_norm": 0.8028507828712463, + "learning_rate": 2.4695558718824204e-06, + "loss": 0.5547, + "step": 10715 + }, + { + "epoch": 0.68, + "grad_norm": 0.8492550253868103, + "learning_rate": 2.468671024734208e-06, + "loss": 0.5684, + "step": 10716 + }, + { + "epoch": 0.68, + "grad_norm": 0.875840961933136, + "learning_rate": 2.4677862841714485e-06, + "loss": 0.5418, + "step": 10717 + }, + { + "epoch": 0.68, + "grad_norm": 0.8230100870132446, + "learning_rate": 2.4669016502314038e-06, + "loss": 0.5823, + "step": 10718 + }, + { + "epoch": 0.68, + "grad_norm": 0.8757971525192261, + "learning_rate": 2.4660171229513165e-06, + "loss": 0.5821, + "step": 10719 + }, + { + "epoch": 0.68, + "grad_norm": 0.8600106239318848, + "learning_rate": 2.465132702368433e-06, + "loss": 0.5618, + "step": 10720 + }, + { + "epoch": 0.68, + "grad_norm": 0.8039467930793762, + "learning_rate": 2.4642483885199938e-06, + "loss": 0.5544, + "step": 10721 + }, + { + "epoch": 0.68, + "grad_norm": 0.8692030906677246, + "learning_rate": 2.463364181443233e-06, + "loss": 0.6235, + "step": 10722 + }, + { + "epoch": 0.68, + "grad_norm": 0.8942098617553711, + "learning_rate": 2.4624800811753826e-06, + "loss": 0.6117, + "step": 10723 + }, + { + "epoch": 0.68, + "grad_norm": 0.8640264272689819, + "learning_rate": 2.4615960877536706e-06, + "loss": 0.5282, + "step": 10724 + }, + { + "epoch": 0.68, + "grad_norm": 0.8637884259223938, + "learning_rate": 2.4607122012153146e-06, + "loss": 0.5566, + "step": 10725 + }, + { + "epoch": 0.68, + "grad_norm": 0.8512043356895447, + "learning_rate": 2.459828421597534e-06, + "loss": 0.5275, + "step": 10726 + }, + { + "epoch": 0.68, + "grad_norm": 0.9386641383171082, + "learning_rate": 2.458944748937543e-06, + "loss": 0.619, + "step": 10727 + }, + { + "epoch": 0.68, + "grad_norm": 0.9329386949539185, + "learning_rate": 2.4580611832725482e-06, + "loss": 0.555, + "step": 10728 + }, + { + "epoch": 0.68, + "grad_norm": 0.8880377411842346, + "learning_rate": 2.4571777246397543e-06, + "loss": 0.6331, + "step": 10729 + }, + { + "epoch": 0.68, + "grad_norm": 0.9043840765953064, + "learning_rate": 2.456294373076361e-06, + "loss": 0.5705, + "step": 10730 + }, + { + "epoch": 0.68, + "grad_norm": 0.8265879154205322, + "learning_rate": 2.455411128619562e-06, + "loss": 0.5232, + "step": 10731 + }, + { + "epoch": 0.68, + "grad_norm": 0.9159626364707947, + "learning_rate": 2.4545279913065513e-06, + "loss": 0.6011, + "step": 10732 + }, + { + "epoch": 0.68, + "grad_norm": 0.9764483571052551, + "learning_rate": 2.4536449611745087e-06, + "loss": 0.6575, + "step": 10733 + }, + { + "epoch": 0.68, + "grad_norm": 0.9006572365760803, + "learning_rate": 2.45276203826062e-06, + "loss": 0.5808, + "step": 10734 + }, + { + "epoch": 0.68, + "grad_norm": 0.8400965929031372, + "learning_rate": 2.451879222602059e-06, + "loss": 0.5425, + "step": 10735 + }, + { + "epoch": 0.68, + "grad_norm": 1.0129814147949219, + "learning_rate": 2.4509965142360013e-06, + "loss": 0.6147, + "step": 10736 + }, + { + "epoch": 0.68, + "grad_norm": 1.019564151763916, + "learning_rate": 2.4501139131996122e-06, + "loss": 0.5869, + "step": 10737 + }, + { + "epoch": 0.68, + "grad_norm": 0.9409759044647217, + "learning_rate": 2.4492314195300583e-06, + "loss": 0.5929, + "step": 10738 + }, + { + "epoch": 0.68, + "grad_norm": 0.924321711063385, + "learning_rate": 2.4483490332644918e-06, + "loss": 0.602, + "step": 10739 + }, + { + "epoch": 0.68, + "grad_norm": 0.8794954419136047, + "learning_rate": 2.4474667544400744e-06, + "loss": 0.5726, + "step": 10740 + }, + { + "epoch": 0.68, + "grad_norm": 0.9003675580024719, + "learning_rate": 2.4465845830939504e-06, + "loss": 0.6345, + "step": 10741 + }, + { + "epoch": 0.68, + "grad_norm": 0.8394778370857239, + "learning_rate": 2.4457025192632672e-06, + "loss": 0.5132, + "step": 10742 + }, + { + "epoch": 0.68, + "grad_norm": 0.8338208198547363, + "learning_rate": 2.444820562985165e-06, + "loss": 0.5275, + "step": 10743 + }, + { + "epoch": 0.68, + "grad_norm": 0.9329877495765686, + "learning_rate": 2.443938714296781e-06, + "loss": 0.5879, + "step": 10744 + }, + { + "epoch": 0.68, + "grad_norm": 0.8452143669128418, + "learning_rate": 2.4430569732352444e-06, + "loss": 0.5829, + "step": 10745 + }, + { + "epoch": 0.68, + "grad_norm": 0.9036096930503845, + "learning_rate": 2.4421753398376865e-06, + "loss": 0.6168, + "step": 10746 + }, + { + "epoch": 0.68, + "grad_norm": 0.8841857314109802, + "learning_rate": 2.441293814141223e-06, + "loss": 0.5878, + "step": 10747 + }, + { + "epoch": 0.68, + "grad_norm": 0.8716691732406616, + "learning_rate": 2.4404123961829795e-06, + "loss": 0.5744, + "step": 10748 + }, + { + "epoch": 0.68, + "grad_norm": 0.8817796111106873, + "learning_rate": 2.4395310860000644e-06, + "loss": 0.5486, + "step": 10749 + }, + { + "epoch": 0.68, + "grad_norm": 0.9194137454032898, + "learning_rate": 2.438649883629588e-06, + "loss": 0.5834, + "step": 10750 + }, + { + "epoch": 0.68, + "grad_norm": 0.9086952805519104, + "learning_rate": 2.437768789108656e-06, + "loss": 0.6189, + "step": 10751 + }, + { + "epoch": 0.68, + "grad_norm": 0.9165956377983093, + "learning_rate": 2.4368878024743638e-06, + "loss": 0.5459, + "step": 10752 + }, + { + "epoch": 0.68, + "grad_norm": 0.9193375110626221, + "learning_rate": 2.4360069237638114e-06, + "loss": 0.5939, + "step": 10753 + }, + { + "epoch": 0.68, + "grad_norm": 0.9131724238395691, + "learning_rate": 2.43512615301409e-06, + "loss": 0.5554, + "step": 10754 + }, + { + "epoch": 0.68, + "grad_norm": 0.9217658638954163, + "learning_rate": 2.434245490262282e-06, + "loss": 0.5851, + "step": 10755 + }, + { + "epoch": 0.68, + "grad_norm": 0.8388816714286804, + "learning_rate": 2.4333649355454704e-06, + "loss": 0.6016, + "step": 10756 + }, + { + "epoch": 0.68, + "grad_norm": 0.8548718690872192, + "learning_rate": 2.4324844889007328e-06, + "loss": 0.628, + "step": 10757 + }, + { + "epoch": 0.68, + "grad_norm": 0.8744621276855469, + "learning_rate": 2.4316041503651417e-06, + "loss": 0.5213, + "step": 10758 + }, + { + "epoch": 0.68, + "grad_norm": 0.9566894769668579, + "learning_rate": 2.430723919975767e-06, + "loss": 0.5779, + "step": 10759 + }, + { + "epoch": 0.68, + "grad_norm": 0.8871926665306091, + "learning_rate": 2.4298437977696658e-06, + "loss": 0.6031, + "step": 10760 + }, + { + "epoch": 0.68, + "grad_norm": 0.8938164114952087, + "learning_rate": 2.428963783783904e-06, + "loss": 0.5478, + "step": 10761 + }, + { + "epoch": 0.68, + "grad_norm": 0.9035009741783142, + "learning_rate": 2.4280838780555347e-06, + "loss": 0.5693, + "step": 10762 + }, + { + "epoch": 0.68, + "grad_norm": 0.9277425408363342, + "learning_rate": 2.427204080621605e-06, + "loss": 0.5697, + "step": 10763 + }, + { + "epoch": 0.68, + "grad_norm": 0.8887539505958557, + "learning_rate": 2.426324391519161e-06, + "loss": 0.5116, + "step": 10764 + }, + { + "epoch": 0.68, + "grad_norm": 0.8846824765205383, + "learning_rate": 2.4254448107852434e-06, + "loss": 0.6138, + "step": 10765 + }, + { + "epoch": 0.68, + "grad_norm": 0.8920591473579407, + "learning_rate": 2.424565338456889e-06, + "loss": 0.5854, + "step": 10766 + }, + { + "epoch": 0.68, + "grad_norm": 0.8790110945701599, + "learning_rate": 2.4236859745711305e-06, + "loss": 0.6154, + "step": 10767 + }, + { + "epoch": 0.68, + "grad_norm": 0.9227504730224609, + "learning_rate": 2.4228067191649917e-06, + "loss": 0.5714, + "step": 10768 + }, + { + "epoch": 0.68, + "grad_norm": 0.8249009847640991, + "learning_rate": 2.421927572275494e-06, + "loss": 0.5788, + "step": 10769 + }, + { + "epoch": 0.68, + "grad_norm": 0.8537331223487854, + "learning_rate": 2.4210485339396627e-06, + "loss": 0.5664, + "step": 10770 + }, + { + "epoch": 0.68, + "grad_norm": 0.9200884103775024, + "learning_rate": 2.4201696041945033e-06, + "loss": 0.6134, + "step": 10771 + }, + { + "epoch": 0.68, + "grad_norm": 0.8930040001869202, + "learning_rate": 2.419290783077028e-06, + "loss": 0.5332, + "step": 10772 + }, + { + "epoch": 0.68, + "grad_norm": 0.932697057723999, + "learning_rate": 2.41841207062424e-06, + "loss": 0.5486, + "step": 10773 + }, + { + "epoch": 0.68, + "grad_norm": 0.9301908016204834, + "learning_rate": 2.4175334668731383e-06, + "loss": 0.6119, + "step": 10774 + }, + { + "epoch": 0.68, + "grad_norm": 0.8337537050247192, + "learning_rate": 2.416654971860721e-06, + "loss": 0.5692, + "step": 10775 + }, + { + "epoch": 0.68, + "grad_norm": 0.881458580493927, + "learning_rate": 2.415776585623974e-06, + "loss": 0.5814, + "step": 10776 + }, + { + "epoch": 0.68, + "grad_norm": 0.9348959922790527, + "learning_rate": 2.4148983081998834e-06, + "loss": 0.5929, + "step": 10777 + }, + { + "epoch": 0.68, + "grad_norm": 0.8827102184295654, + "learning_rate": 2.414020139625436e-06, + "loss": 0.5264, + "step": 10778 + }, + { + "epoch": 0.68, + "grad_norm": 0.8925660848617554, + "learning_rate": 2.413142079937602e-06, + "loss": 0.6432, + "step": 10779 + }, + { + "epoch": 0.68, + "grad_norm": 0.9171149134635925, + "learning_rate": 2.4122641291733567e-06, + "loss": 0.5957, + "step": 10780 + }, + { + "epoch": 0.68, + "grad_norm": 0.9240100383758545, + "learning_rate": 2.4113862873696687e-06, + "loss": 0.6231, + "step": 10781 + }, + { + "epoch": 0.68, + "grad_norm": 0.8504339456558228, + "learning_rate": 2.410508554563495e-06, + "loss": 0.5734, + "step": 10782 + }, + { + "epoch": 0.68, + "grad_norm": 0.8871136903762817, + "learning_rate": 2.4096309307918013e-06, + "loss": 0.5962, + "step": 10783 + }, + { + "epoch": 0.68, + "grad_norm": 0.9689726829528809, + "learning_rate": 2.4087534160915364e-06, + "loss": 0.6037, + "step": 10784 + }, + { + "epoch": 0.68, + "grad_norm": 0.8371800780296326, + "learning_rate": 2.407876010499651e-06, + "loss": 0.6064, + "step": 10785 + }, + { + "epoch": 0.68, + "grad_norm": 0.9337158799171448, + "learning_rate": 2.4069987140530893e-06, + "loss": 0.5893, + "step": 10786 + }, + { + "epoch": 0.68, + "grad_norm": 0.8576418161392212, + "learning_rate": 2.4061215267887915e-06, + "loss": 0.5729, + "step": 10787 + }, + { + "epoch": 0.68, + "grad_norm": 0.859890341758728, + "learning_rate": 2.4052444487436925e-06, + "loss": 0.5478, + "step": 10788 + }, + { + "epoch": 0.68, + "grad_norm": 0.9343597292900085, + "learning_rate": 2.4043674799547252e-06, + "loss": 0.5488, + "step": 10789 + }, + { + "epoch": 0.68, + "grad_norm": 0.9209311008453369, + "learning_rate": 2.4034906204588104e-06, + "loss": 0.5923, + "step": 10790 + }, + { + "epoch": 0.68, + "grad_norm": 0.8784055709838867, + "learning_rate": 2.4026138702928763e-06, + "loss": 0.5471, + "step": 10791 + }, + { + "epoch": 0.68, + "grad_norm": 0.8925771117210388, + "learning_rate": 2.4017372294938347e-06, + "loss": 0.6078, + "step": 10792 + }, + { + "epoch": 0.68, + "grad_norm": 0.9076485633850098, + "learning_rate": 2.4008606980985994e-06, + "loss": 0.6088, + "step": 10793 + }, + { + "epoch": 0.68, + "grad_norm": 0.9135996699333191, + "learning_rate": 2.399984276144079e-06, + "loss": 0.6064, + "step": 10794 + }, + { + "epoch": 0.68, + "grad_norm": 0.8818286657333374, + "learning_rate": 2.3991079636671755e-06, + "loss": 0.5986, + "step": 10795 + }, + { + "epoch": 0.68, + "grad_norm": 0.8494421243667603, + "learning_rate": 2.398231760704788e-06, + "loss": 0.5909, + "step": 10796 + }, + { + "epoch": 0.68, + "grad_norm": 0.8972966074943542, + "learning_rate": 2.397355667293812e-06, + "loss": 0.5537, + "step": 10797 + }, + { + "epoch": 0.68, + "grad_norm": 0.884488582611084, + "learning_rate": 2.396479683471133e-06, + "loss": 0.6015, + "step": 10798 + }, + { + "epoch": 0.68, + "grad_norm": 0.8640215992927551, + "learning_rate": 2.395603809273635e-06, + "loss": 0.6178, + "step": 10799 + }, + { + "epoch": 0.68, + "grad_norm": 0.9510016441345215, + "learning_rate": 2.3947280447382055e-06, + "loss": 0.6046, + "step": 10800 + }, + { + "epoch": 0.68, + "grad_norm": 0.8891522288322449, + "learning_rate": 2.3938523899017124e-06, + "loss": 0.5477, + "step": 10801 + }, + { + "epoch": 0.68, + "grad_norm": 0.8761363625526428, + "learning_rate": 2.392976844801029e-06, + "loss": 0.5773, + "step": 10802 + }, + { + "epoch": 0.68, + "grad_norm": 0.8561110496520996, + "learning_rate": 2.3921014094730216e-06, + "loss": 0.5879, + "step": 10803 + }, + { + "epoch": 0.68, + "grad_norm": 0.8632552623748779, + "learning_rate": 2.3912260839545514e-06, + "loss": 0.5905, + "step": 10804 + }, + { + "epoch": 0.68, + "grad_norm": 0.8456622958183289, + "learning_rate": 2.390350868282478e-06, + "loss": 0.6465, + "step": 10805 + }, + { + "epoch": 0.68, + "grad_norm": 0.9606796503067017, + "learning_rate": 2.389475762493649e-06, + "loss": 0.5823, + "step": 10806 + }, + { + "epoch": 0.68, + "grad_norm": 0.8369455337524414, + "learning_rate": 2.3886007666249124e-06, + "loss": 0.5502, + "step": 10807 + }, + { + "epoch": 0.68, + "grad_norm": 0.8868955373764038, + "learning_rate": 2.387725880713117e-06, + "loss": 0.5796, + "step": 10808 + }, + { + "epoch": 0.68, + "grad_norm": 0.9382752180099487, + "learning_rate": 2.3868511047950955e-06, + "loss": 0.625, + "step": 10809 + }, + { + "epoch": 0.68, + "grad_norm": 0.9117169380187988, + "learning_rate": 2.3859764389076834e-06, + "loss": 0.5422, + "step": 10810 + }, + { + "epoch": 0.68, + "grad_norm": 0.8499246835708618, + "learning_rate": 2.3851018830877115e-06, + "loss": 0.5505, + "step": 10811 + }, + { + "epoch": 0.68, + "grad_norm": 0.9279850125312805, + "learning_rate": 2.3842274373719994e-06, + "loss": 0.5964, + "step": 10812 + }, + { + "epoch": 0.69, + "grad_norm": 0.9057535529136658, + "learning_rate": 2.383353101797374e-06, + "loss": 0.5497, + "step": 10813 + }, + { + "epoch": 0.69, + "grad_norm": 0.8557054996490479, + "learning_rate": 2.3824788764006446e-06, + "loss": 0.5935, + "step": 10814 + }, + { + "epoch": 0.69, + "grad_norm": 0.8106154203414917, + "learning_rate": 2.3816047612186243e-06, + "loss": 0.5335, + "step": 10815 + }, + { + "epoch": 0.69, + "grad_norm": 0.8800660371780396, + "learning_rate": 2.3807307562881188e-06, + "loss": 0.5915, + "step": 10816 + }, + { + "epoch": 0.69, + "grad_norm": 0.901800811290741, + "learning_rate": 2.3798568616459295e-06, + "loss": 0.6108, + "step": 10817 + }, + { + "epoch": 0.69, + "grad_norm": 0.9201937913894653, + "learning_rate": 2.378983077328853e-06, + "loss": 0.563, + "step": 10818 + }, + { + "epoch": 0.69, + "grad_norm": 0.8660761713981628, + "learning_rate": 2.378109403373683e-06, + "loss": 0.5706, + "step": 10819 + }, + { + "epoch": 0.69, + "grad_norm": 0.8939432501792908, + "learning_rate": 2.3772358398172013e-06, + "loss": 0.6153, + "step": 10820 + }, + { + "epoch": 0.69, + "grad_norm": 0.8241981863975525, + "learning_rate": 2.3763623866961984e-06, + "loss": 0.5213, + "step": 10821 + }, + { + "epoch": 0.69, + "grad_norm": 0.9262666702270508, + "learning_rate": 2.375489044047446e-06, + "loss": 0.5569, + "step": 10822 + }, + { + "epoch": 0.69, + "grad_norm": 0.9339314699172974, + "learning_rate": 2.37461581190772e-06, + "loss": 0.62, + "step": 10823 + }, + { + "epoch": 0.69, + "grad_norm": 0.8499922156333923, + "learning_rate": 2.37374269031379e-06, + "loss": 0.568, + "step": 10824 + }, + { + "epoch": 0.69, + "grad_norm": 0.8106879591941833, + "learning_rate": 2.3728696793024187e-06, + "loss": 0.5473, + "step": 10825 + }, + { + "epoch": 0.69, + "grad_norm": 0.9187078475952148, + "learning_rate": 2.371996778910366e-06, + "loss": 0.5985, + "step": 10826 + }, + { + "epoch": 0.69, + "grad_norm": 0.9437380433082581, + "learning_rate": 2.3711239891743886e-06, + "loss": 0.5821, + "step": 10827 + }, + { + "epoch": 0.69, + "grad_norm": 0.8216588497161865, + "learning_rate": 2.370251310131233e-06, + "loss": 0.4918, + "step": 10828 + }, + { + "epoch": 0.69, + "grad_norm": 0.8683214783668518, + "learning_rate": 2.369378741817647e-06, + "loss": 0.5689, + "step": 10829 + }, + { + "epoch": 0.69, + "grad_norm": 0.8889510035514832, + "learning_rate": 2.3685062842703697e-06, + "loss": 0.5838, + "step": 10830 + }, + { + "epoch": 0.69, + "grad_norm": 0.9381679892539978, + "learning_rate": 2.3676339375261394e-06, + "loss": 0.6048, + "step": 10831 + }, + { + "epoch": 0.69, + "grad_norm": 0.9227031469345093, + "learning_rate": 2.3667617016216885e-06, + "loss": 0.5528, + "step": 10832 + }, + { + "epoch": 0.69, + "grad_norm": 0.8656090497970581, + "learning_rate": 2.365889576593738e-06, + "loss": 0.5258, + "step": 10833 + }, + { + "epoch": 0.69, + "grad_norm": 0.9459344148635864, + "learning_rate": 2.365017562479016e-06, + "loss": 0.5723, + "step": 10834 + }, + { + "epoch": 0.69, + "grad_norm": 0.8714156150817871, + "learning_rate": 2.36414565931424e-06, + "loss": 0.5408, + "step": 10835 + }, + { + "epoch": 0.69, + "grad_norm": 0.9365402460098267, + "learning_rate": 2.3632738671361187e-06, + "loss": 0.5667, + "step": 10836 + }, + { + "epoch": 0.69, + "grad_norm": 0.952563464641571, + "learning_rate": 2.362402185981363e-06, + "loss": 0.5606, + "step": 10837 + }, + { + "epoch": 0.69, + "grad_norm": 0.8872142434120178, + "learning_rate": 2.3615306158866745e-06, + "loss": 0.6201, + "step": 10838 + }, + { + "epoch": 0.69, + "grad_norm": 0.9767509698867798, + "learning_rate": 2.360659156888754e-06, + "loss": 0.6305, + "step": 10839 + }, + { + "epoch": 0.69, + "grad_norm": 0.8186350464820862, + "learning_rate": 2.359787809024297e-06, + "loss": 0.5489, + "step": 10840 + }, + { + "epoch": 0.69, + "grad_norm": 0.891994297504425, + "learning_rate": 2.358916572329986e-06, + "loss": 0.5613, + "step": 10841 + }, + { + "epoch": 0.69, + "grad_norm": 0.8956696391105652, + "learning_rate": 2.3580454468425136e-06, + "loss": 0.558, + "step": 10842 + }, + { + "epoch": 0.69, + "grad_norm": 0.8858946561813354, + "learning_rate": 2.357174432598558e-06, + "loss": 0.5871, + "step": 10843 + }, + { + "epoch": 0.69, + "grad_norm": 0.9084068536758423, + "learning_rate": 2.356303529634791e-06, + "loss": 0.6036, + "step": 10844 + }, + { + "epoch": 0.69, + "grad_norm": 0.8893341422080994, + "learning_rate": 2.355432737987886e-06, + "loss": 0.6102, + "step": 10845 + }, + { + "epoch": 0.69, + "grad_norm": 0.8843010067939758, + "learning_rate": 2.3545620576945088e-06, + "loss": 0.5974, + "step": 10846 + }, + { + "epoch": 0.69, + "grad_norm": 0.9258445501327515, + "learning_rate": 2.3536914887913203e-06, + "loss": 0.5951, + "step": 10847 + }, + { + "epoch": 0.69, + "grad_norm": 0.8948039412498474, + "learning_rate": 2.3528210313149793e-06, + "loss": 0.556, + "step": 10848 + }, + { + "epoch": 0.69, + "grad_norm": 0.9238641262054443, + "learning_rate": 2.351950685302134e-06, + "loss": 0.6082, + "step": 10849 + }, + { + "epoch": 0.69, + "grad_norm": 0.9253204464912415, + "learning_rate": 2.351080450789431e-06, + "loss": 0.5741, + "step": 10850 + }, + { + "epoch": 0.69, + "grad_norm": 0.8795269727706909, + "learning_rate": 2.3502103278135203e-06, + "loss": 0.5695, + "step": 10851 + }, + { + "epoch": 0.69, + "grad_norm": 0.905057430267334, + "learning_rate": 2.349340316411032e-06, + "loss": 0.5569, + "step": 10852 + }, + { + "epoch": 0.69, + "grad_norm": 0.8453585505485535, + "learning_rate": 2.3484704166186024e-06, + "loss": 0.5595, + "step": 10853 + }, + { + "epoch": 0.69, + "grad_norm": 0.9011979699134827, + "learning_rate": 2.347600628472859e-06, + "loss": 0.5623, + "step": 10854 + }, + { + "epoch": 0.69, + "grad_norm": 0.874869704246521, + "learning_rate": 2.3467309520104265e-06, + "loss": 0.5435, + "step": 10855 + }, + { + "epoch": 0.69, + "grad_norm": 0.965836763381958, + "learning_rate": 2.3458613872679255e-06, + "loss": 0.6046, + "step": 10856 + }, + { + "epoch": 0.69, + "grad_norm": 0.8620368242263794, + "learning_rate": 2.344991934281966e-06, + "loss": 0.5409, + "step": 10857 + }, + { + "epoch": 0.69, + "grad_norm": 0.9628636240959167, + "learning_rate": 2.344122593089161e-06, + "loss": 0.6049, + "step": 10858 + }, + { + "epoch": 0.69, + "grad_norm": 0.8034865856170654, + "learning_rate": 2.3432533637261135e-06, + "loss": 0.5112, + "step": 10859 + }, + { + "epoch": 0.69, + "grad_norm": 0.8890984058380127, + "learning_rate": 2.3423842462294257e-06, + "loss": 0.5682, + "step": 10860 + }, + { + "epoch": 0.69, + "grad_norm": 0.9449944496154785, + "learning_rate": 2.341515240635691e-06, + "loss": 0.5434, + "step": 10861 + }, + { + "epoch": 0.69, + "grad_norm": 0.9405069947242737, + "learning_rate": 2.340646346981504e-06, + "loss": 0.5712, + "step": 10862 + }, + { + "epoch": 0.69, + "grad_norm": 0.9340393543243408, + "learning_rate": 2.339777565303444e-06, + "loss": 0.6118, + "step": 10863 + }, + { + "epoch": 0.69, + "grad_norm": 0.8473518490791321, + "learning_rate": 2.3389088956380982e-06, + "loss": 0.4942, + "step": 10864 + }, + { + "epoch": 0.69, + "grad_norm": 0.8445674777030945, + "learning_rate": 2.338040338022044e-06, + "loss": 0.548, + "step": 10865 + }, + { + "epoch": 0.69, + "grad_norm": 0.9048270583152771, + "learning_rate": 2.3371718924918487e-06, + "loss": 0.5564, + "step": 10866 + }, + { + "epoch": 0.69, + "grad_norm": 0.9229834675788879, + "learning_rate": 2.3363035590840814e-06, + "loss": 0.5522, + "step": 10867 + }, + { + "epoch": 0.69, + "grad_norm": 0.89118891954422, + "learning_rate": 2.3354353378353056e-06, + "loss": 0.5978, + "step": 10868 + }, + { + "epoch": 0.69, + "grad_norm": 0.8255208730697632, + "learning_rate": 2.334567228782078e-06, + "loss": 0.5616, + "step": 10869 + }, + { + "epoch": 0.69, + "grad_norm": 0.9787054061889648, + "learning_rate": 2.3336992319609534e-06, + "loss": 0.6014, + "step": 10870 + }, + { + "epoch": 0.69, + "grad_norm": 0.8942728638648987, + "learning_rate": 2.3328313474084755e-06, + "loss": 0.6, + "step": 10871 + }, + { + "epoch": 0.69, + "grad_norm": 0.8750494718551636, + "learning_rate": 2.3319635751611937e-06, + "loss": 0.5883, + "step": 10872 + }, + { + "epoch": 0.69, + "grad_norm": 0.8585361838340759, + "learning_rate": 2.3310959152556453e-06, + "loss": 0.6229, + "step": 10873 + }, + { + "epoch": 0.69, + "grad_norm": 0.9201778173446655, + "learning_rate": 2.3302283677283618e-06, + "loss": 0.596, + "step": 10874 + }, + { + "epoch": 0.69, + "grad_norm": 0.8352607488632202, + "learning_rate": 2.3293609326158745e-06, + "loss": 0.5342, + "step": 10875 + }, + { + "epoch": 0.69, + "grad_norm": 0.8795650005340576, + "learning_rate": 2.328493609954707e-06, + "loss": 0.5886, + "step": 10876 + }, + { + "epoch": 0.69, + "grad_norm": 0.9259792566299438, + "learning_rate": 2.3276263997813812e-06, + "loss": 0.5542, + "step": 10877 + }, + { + "epoch": 0.69, + "grad_norm": 0.933414876461029, + "learning_rate": 2.3267593021324127e-06, + "loss": 0.6461, + "step": 10878 + }, + { + "epoch": 0.69, + "grad_norm": 0.9607113599777222, + "learning_rate": 2.3258923170443087e-06, + "loss": 0.6609, + "step": 10879 + }, + { + "epoch": 0.69, + "grad_norm": 0.8267933130264282, + "learning_rate": 2.3250254445535743e-06, + "loss": 0.5456, + "step": 10880 + }, + { + "epoch": 0.69, + "grad_norm": 0.884596586227417, + "learning_rate": 2.324158684696717e-06, + "loss": 0.5046, + "step": 10881 + }, + { + "epoch": 0.69, + "grad_norm": 0.8587662577629089, + "learning_rate": 2.323292037510227e-06, + "loss": 0.4938, + "step": 10882 + }, + { + "epoch": 0.69, + "grad_norm": 0.9369049668312073, + "learning_rate": 2.3224255030305977e-06, + "loss": 0.5844, + "step": 10883 + }, + { + "epoch": 0.69, + "grad_norm": 0.830431342124939, + "learning_rate": 2.321559081294316e-06, + "loss": 0.5372, + "step": 10884 + }, + { + "epoch": 0.69, + "grad_norm": 0.9005808234214783, + "learning_rate": 2.3206927723378638e-06, + "loss": 0.5994, + "step": 10885 + }, + { + "epoch": 0.69, + "grad_norm": 0.9264594912528992, + "learning_rate": 2.3198265761977196e-06, + "loss": 0.5876, + "step": 10886 + }, + { + "epoch": 0.69, + "grad_norm": 0.871841549873352, + "learning_rate": 2.3189604929103533e-06, + "loss": 0.5646, + "step": 10887 + }, + { + "epoch": 0.69, + "grad_norm": 0.9147515296936035, + "learning_rate": 2.318094522512232e-06, + "loss": 0.6012, + "step": 10888 + }, + { + "epoch": 0.69, + "grad_norm": 0.8173208236694336, + "learning_rate": 2.3172286650398247e-06, + "loss": 0.5651, + "step": 10889 + }, + { + "epoch": 0.69, + "grad_norm": 0.8764269351959229, + "learning_rate": 2.3163629205295833e-06, + "loss": 0.5715, + "step": 10890 + }, + { + "epoch": 0.69, + "grad_norm": 0.8883751034736633, + "learning_rate": 2.3154972890179638e-06, + "loss": 0.563, + "step": 10891 + }, + { + "epoch": 0.69, + "grad_norm": 0.941449761390686, + "learning_rate": 2.3146317705414168e-06, + "loss": 0.5512, + "step": 10892 + }, + { + "epoch": 0.69, + "grad_norm": 0.8946614861488342, + "learning_rate": 2.31376636513638e-06, + "loss": 0.5787, + "step": 10893 + }, + { + "epoch": 0.69, + "grad_norm": 0.8711824417114258, + "learning_rate": 2.3129010728393012e-06, + "loss": 0.6297, + "step": 10894 + }, + { + "epoch": 0.69, + "grad_norm": 0.9191935658454895, + "learning_rate": 2.3120358936866084e-06, + "loss": 0.6161, + "step": 10895 + }, + { + "epoch": 0.69, + "grad_norm": 0.9106520414352417, + "learning_rate": 2.3111708277147333e-06, + "loss": 0.5355, + "step": 10896 + }, + { + "epoch": 0.69, + "grad_norm": 0.9305688142776489, + "learning_rate": 2.310305874960101e-06, + "loss": 0.6004, + "step": 10897 + }, + { + "epoch": 0.69, + "grad_norm": 0.9202895760536194, + "learning_rate": 2.3094410354591314e-06, + "loss": 0.6412, + "step": 10898 + }, + { + "epoch": 0.69, + "grad_norm": 0.9189572930335999, + "learning_rate": 2.30857630924824e-06, + "loss": 0.5783, + "step": 10899 + }, + { + "epoch": 0.69, + "grad_norm": 0.8673662543296814, + "learning_rate": 2.3077116963638396e-06, + "loss": 0.5999, + "step": 10900 + }, + { + "epoch": 0.69, + "grad_norm": 0.8311372399330139, + "learning_rate": 2.3068471968423296e-06, + "loss": 0.5998, + "step": 10901 + }, + { + "epoch": 0.69, + "grad_norm": 0.9261046051979065, + "learning_rate": 2.305982810720119e-06, + "loss": 0.6344, + "step": 10902 + }, + { + "epoch": 0.69, + "grad_norm": 0.8744479417800903, + "learning_rate": 2.3051185380335995e-06, + "loss": 0.556, + "step": 10903 + }, + { + "epoch": 0.69, + "grad_norm": 0.8244556784629822, + "learning_rate": 2.304254378819163e-06, + "loss": 0.5574, + "step": 10904 + }, + { + "epoch": 0.69, + "grad_norm": 0.8999570608139038, + "learning_rate": 2.3033903331131986e-06, + "loss": 0.609, + "step": 10905 + }, + { + "epoch": 0.69, + "grad_norm": 0.9004625082015991, + "learning_rate": 2.3025264009520833e-06, + "loss": 0.596, + "step": 10906 + }, + { + "epoch": 0.69, + "grad_norm": 0.8306798338890076, + "learning_rate": 2.3016625823721985e-06, + "loss": 0.5895, + "step": 10907 + }, + { + "epoch": 0.69, + "grad_norm": 0.9338074922561646, + "learning_rate": 2.300798877409918e-06, + "loss": 0.6051, + "step": 10908 + }, + { + "epoch": 0.69, + "grad_norm": 0.9324320554733276, + "learning_rate": 2.2999352861016042e-06, + "loss": 0.5728, + "step": 10909 + }, + { + "epoch": 0.69, + "grad_norm": 0.8564440608024597, + "learning_rate": 2.299071808483623e-06, + "loss": 0.5535, + "step": 10910 + }, + { + "epoch": 0.69, + "grad_norm": 0.9293292164802551, + "learning_rate": 2.2982084445923327e-06, + "loss": 0.625, + "step": 10911 + }, + { + "epoch": 0.69, + "grad_norm": 0.8664717078208923, + "learning_rate": 2.297345194464086e-06, + "loss": 0.5831, + "step": 10912 + }, + { + "epoch": 0.69, + "grad_norm": 0.8852226734161377, + "learning_rate": 2.2964820581352325e-06, + "loss": 0.6052, + "step": 10913 + }, + { + "epoch": 0.69, + "grad_norm": 0.8343265056610107, + "learning_rate": 2.295619035642111e-06, + "loss": 0.5413, + "step": 10914 + }, + { + "epoch": 0.69, + "grad_norm": 0.8824000358581543, + "learning_rate": 2.294756127021066e-06, + "loss": 0.5667, + "step": 10915 + }, + { + "epoch": 0.69, + "grad_norm": 0.9818698763847351, + "learning_rate": 2.2938933323084315e-06, + "loss": 0.6607, + "step": 10916 + }, + { + "epoch": 0.69, + "grad_norm": 0.8719751238822937, + "learning_rate": 2.293030651540534e-06, + "loss": 0.6078, + "step": 10917 + }, + { + "epoch": 0.69, + "grad_norm": 0.8655606508255005, + "learning_rate": 2.2921680847536976e-06, + "loss": 0.5596, + "step": 10918 + }, + { + "epoch": 0.69, + "grad_norm": 0.9295132160186768, + "learning_rate": 2.2913056319842436e-06, + "loss": 0.5886, + "step": 10919 + }, + { + "epoch": 0.69, + "grad_norm": 0.8664971590042114, + "learning_rate": 2.2904432932684865e-06, + "loss": 0.5728, + "step": 10920 + }, + { + "epoch": 0.69, + "grad_norm": 0.857593297958374, + "learning_rate": 2.289581068642737e-06, + "loss": 0.5657, + "step": 10921 + }, + { + "epoch": 0.69, + "grad_norm": 0.9390791058540344, + "learning_rate": 2.2887189581433016e-06, + "loss": 0.5587, + "step": 10922 + }, + { + "epoch": 0.69, + "grad_norm": 0.9358313679695129, + "learning_rate": 2.287856961806475e-06, + "loss": 0.6039, + "step": 10923 + }, + { + "epoch": 0.69, + "grad_norm": 0.862331211566925, + "learning_rate": 2.286995079668561e-06, + "loss": 0.5494, + "step": 10924 + }, + { + "epoch": 0.69, + "grad_norm": 0.841224193572998, + "learning_rate": 2.2861333117658442e-06, + "loss": 0.5516, + "step": 10925 + }, + { + "epoch": 0.69, + "grad_norm": 0.8751315474510193, + "learning_rate": 2.2852716581346124e-06, + "loss": 0.6054, + "step": 10926 + }, + { + "epoch": 0.69, + "grad_norm": 0.8290528655052185, + "learning_rate": 2.2844101188111477e-06, + "loss": 0.5849, + "step": 10927 + }, + { + "epoch": 0.69, + "grad_norm": 0.9079095125198364, + "learning_rate": 2.283548693831726e-06, + "loss": 0.5642, + "step": 10928 + }, + { + "epoch": 0.69, + "grad_norm": 0.9231603145599365, + "learning_rate": 2.2826873832326192e-06, + "loss": 0.5922, + "step": 10929 + }, + { + "epoch": 0.69, + "grad_norm": 0.9110752940177917, + "learning_rate": 2.2818261870500954e-06, + "loss": 0.5498, + "step": 10930 + }, + { + "epoch": 0.69, + "grad_norm": 0.8755868077278137, + "learning_rate": 2.280965105320411e-06, + "loss": 0.5596, + "step": 10931 + }, + { + "epoch": 0.69, + "grad_norm": 0.8865872621536255, + "learning_rate": 2.280104138079831e-06, + "loss": 0.6292, + "step": 10932 + }, + { + "epoch": 0.69, + "grad_norm": 0.8371679186820984, + "learning_rate": 2.2792432853646023e-06, + "loss": 0.5717, + "step": 10933 + }, + { + "epoch": 0.69, + "grad_norm": 0.9466820359230042, + "learning_rate": 2.2783825472109743e-06, + "loss": 0.5489, + "step": 10934 + }, + { + "epoch": 0.69, + "grad_norm": 0.8898562788963318, + "learning_rate": 2.277521923655189e-06, + "loss": 0.599, + "step": 10935 + }, + { + "epoch": 0.69, + "grad_norm": 0.904425323009491, + "learning_rate": 2.276661414733485e-06, + "loss": 0.5712, + "step": 10936 + }, + { + "epoch": 0.69, + "grad_norm": 0.936082661151886, + "learning_rate": 2.2758010204820945e-06, + "loss": 0.6113, + "step": 10937 + }, + { + "epoch": 0.69, + "grad_norm": 0.8919061422348022, + "learning_rate": 2.2749407409372487e-06, + "loss": 0.5971, + "step": 10938 + }, + { + "epoch": 0.69, + "grad_norm": 0.884014368057251, + "learning_rate": 2.2740805761351664e-06, + "loss": 0.5329, + "step": 10939 + }, + { + "epoch": 0.69, + "grad_norm": 0.9374119639396667, + "learning_rate": 2.273220526112068e-06, + "loss": 0.5739, + "step": 10940 + }, + { + "epoch": 0.69, + "grad_norm": 0.8741660714149475, + "learning_rate": 2.272360590904168e-06, + "loss": 0.5725, + "step": 10941 + }, + { + "epoch": 0.69, + "grad_norm": 0.8772330284118652, + "learning_rate": 2.2715007705476744e-06, + "loss": 0.5734, + "step": 10942 + }, + { + "epoch": 0.69, + "grad_norm": 0.9191374182701111, + "learning_rate": 2.2706410650787937e-06, + "loss": 0.5994, + "step": 10943 + }, + { + "epoch": 0.69, + "grad_norm": 0.8320372700691223, + "learning_rate": 2.2697814745337186e-06, + "loss": 0.5521, + "step": 10944 + }, + { + "epoch": 0.69, + "grad_norm": 0.835684597492218, + "learning_rate": 2.2689219989486506e-06, + "loss": 0.6022, + "step": 10945 + }, + { + "epoch": 0.69, + "grad_norm": 0.9576183557510376, + "learning_rate": 2.2680626383597782e-06, + "loss": 0.63, + "step": 10946 + }, + { + "epoch": 0.69, + "grad_norm": 0.877859354019165, + "learning_rate": 2.267203392803282e-06, + "loss": 0.5517, + "step": 10947 + }, + { + "epoch": 0.69, + "grad_norm": 0.9066639542579651, + "learning_rate": 2.266344262315345e-06, + "loss": 0.6017, + "step": 10948 + }, + { + "epoch": 0.69, + "grad_norm": 1.0002546310424805, + "learning_rate": 2.2654852469321405e-06, + "loss": 0.581, + "step": 10949 + }, + { + "epoch": 0.69, + "grad_norm": 1.0141836404800415, + "learning_rate": 2.26462634668984e-06, + "loss": 0.6754, + "step": 10950 + }, + { + "epoch": 0.69, + "grad_norm": 0.8675405979156494, + "learning_rate": 2.2637675616246103e-06, + "loss": 0.6055, + "step": 10951 + }, + { + "epoch": 0.69, + "grad_norm": 0.8971235752105713, + "learning_rate": 2.262908891772608e-06, + "loss": 0.6545, + "step": 10952 + }, + { + "epoch": 0.69, + "grad_norm": 0.8403980731964111, + "learning_rate": 2.2620503371699886e-06, + "loss": 0.5979, + "step": 10953 + }, + { + "epoch": 0.69, + "grad_norm": 0.948631227016449, + "learning_rate": 2.261191897852909e-06, + "loss": 0.5405, + "step": 10954 + }, + { + "epoch": 0.69, + "grad_norm": 0.8814859986305237, + "learning_rate": 2.260333573857509e-06, + "loss": 0.6071, + "step": 10955 + }, + { + "epoch": 0.69, + "grad_norm": 0.8945904970169067, + "learning_rate": 2.2594753652199313e-06, + "loss": 0.6018, + "step": 10956 + }, + { + "epoch": 0.69, + "grad_norm": 0.8850582838058472, + "learning_rate": 2.2586172719763126e-06, + "loss": 0.5572, + "step": 10957 + }, + { + "epoch": 0.69, + "grad_norm": 0.8527590036392212, + "learning_rate": 2.2577592941627842e-06, + "loss": 0.5759, + "step": 10958 + }, + { + "epoch": 0.69, + "grad_norm": 0.870134711265564, + "learning_rate": 2.2569014318154735e-06, + "loss": 0.576, + "step": 10959 + }, + { + "epoch": 0.69, + "grad_norm": 1.019551396369934, + "learning_rate": 2.2560436849704996e-06, + "loss": 0.6245, + "step": 10960 + }, + { + "epoch": 0.69, + "grad_norm": 0.9043488502502441, + "learning_rate": 2.255186053663979e-06, + "loss": 0.6191, + "step": 10961 + }, + { + "epoch": 0.69, + "grad_norm": 0.8273271918296814, + "learning_rate": 2.2543285379320283e-06, + "loss": 0.5481, + "step": 10962 + }, + { + "epoch": 0.69, + "grad_norm": 0.833625078201294, + "learning_rate": 2.2534711378107498e-06, + "loss": 0.5749, + "step": 10963 + }, + { + "epoch": 0.69, + "grad_norm": 0.9222172498703003, + "learning_rate": 2.2526138533362475e-06, + "loss": 0.5493, + "step": 10964 + }, + { + "epoch": 0.69, + "grad_norm": 0.9397459626197815, + "learning_rate": 2.2517566845446182e-06, + "loss": 0.5928, + "step": 10965 + }, + { + "epoch": 0.69, + "grad_norm": 0.8722630143165588, + "learning_rate": 2.2508996314719544e-06, + "loss": 0.5606, + "step": 10966 + }, + { + "epoch": 0.69, + "grad_norm": 0.870911717414856, + "learning_rate": 2.250042694154345e-06, + "loss": 0.5453, + "step": 10967 + }, + { + "epoch": 0.69, + "grad_norm": 0.8412066698074341, + "learning_rate": 2.2491858726278704e-06, + "loss": 0.5291, + "step": 10968 + }, + { + "epoch": 0.69, + "grad_norm": 0.8742692470550537, + "learning_rate": 2.248329166928609e-06, + "loss": 0.6004, + "step": 10969 + }, + { + "epoch": 0.7, + "grad_norm": 0.9923862814903259, + "learning_rate": 2.2474725770926337e-06, + "loss": 0.552, + "step": 10970 + }, + { + "epoch": 0.7, + "grad_norm": 0.9316403865814209, + "learning_rate": 2.2466161031560136e-06, + "loss": 0.6006, + "step": 10971 + }, + { + "epoch": 0.7, + "grad_norm": 0.8640490174293518, + "learning_rate": 2.2457597451548102e-06, + "loss": 0.521, + "step": 10972 + }, + { + "epoch": 0.7, + "grad_norm": 0.8956085443496704, + "learning_rate": 2.2449035031250847e-06, + "loss": 0.5854, + "step": 10973 + }, + { + "epoch": 0.7, + "grad_norm": 0.8568456768989563, + "learning_rate": 2.2440473771028855e-06, + "loss": 0.5973, + "step": 10974 + }, + { + "epoch": 0.7, + "grad_norm": 0.8945633769035339, + "learning_rate": 2.2431913671242666e-06, + "loss": 0.6168, + "step": 10975 + }, + { + "epoch": 0.7, + "grad_norm": 0.8528943657875061, + "learning_rate": 2.242335473225268e-06, + "loss": 0.5569, + "step": 10976 + }, + { + "epoch": 0.7, + "grad_norm": 0.8728605508804321, + "learning_rate": 2.2414796954419286e-06, + "loss": 0.5558, + "step": 10977 + }, + { + "epoch": 0.7, + "grad_norm": 0.8296922445297241, + "learning_rate": 2.2406240338102836e-06, + "loss": 0.5519, + "step": 10978 + }, + { + "epoch": 0.7, + "grad_norm": 0.9309175610542297, + "learning_rate": 2.239768488366361e-06, + "loss": 0.6021, + "step": 10979 + }, + { + "epoch": 0.7, + "grad_norm": 0.8594921827316284, + "learning_rate": 2.2389130591461855e-06, + "loss": 0.5878, + "step": 10980 + }, + { + "epoch": 0.7, + "grad_norm": 0.9349560737609863, + "learning_rate": 2.2380577461857777e-06, + "loss": 0.5937, + "step": 10981 + }, + { + "epoch": 0.7, + "grad_norm": 0.8946079611778259, + "learning_rate": 2.2372025495211465e-06, + "loss": 0.6016, + "step": 10982 + }, + { + "epoch": 0.7, + "grad_norm": 0.8826418519020081, + "learning_rate": 2.236347469188308e-06, + "loss": 0.6071, + "step": 10983 + }, + { + "epoch": 0.7, + "grad_norm": 0.9132988452911377, + "learning_rate": 2.2354925052232625e-06, + "loss": 0.5728, + "step": 10984 + }, + { + "epoch": 0.7, + "grad_norm": 0.8709650039672852, + "learning_rate": 2.2346376576620103e-06, + "loss": 0.5415, + "step": 10985 + }, + { + "epoch": 0.7, + "grad_norm": 0.982613205909729, + "learning_rate": 2.2337829265405466e-06, + "loss": 0.5818, + "step": 10986 + }, + { + "epoch": 0.7, + "grad_norm": 0.9030888676643372, + "learning_rate": 2.2329283118948604e-06, + "loss": 0.5771, + "step": 10987 + }, + { + "epoch": 0.7, + "grad_norm": 0.8313351273536682, + "learning_rate": 2.232073813760937e-06, + "loss": 0.6115, + "step": 10988 + }, + { + "epoch": 0.7, + "grad_norm": 0.8704630136489868, + "learning_rate": 2.2312194321747582e-06, + "loss": 0.5722, + "step": 10989 + }, + { + "epoch": 0.7, + "grad_norm": 0.9125388264656067, + "learning_rate": 2.230365167172296e-06, + "loss": 0.5761, + "step": 10990 + }, + { + "epoch": 0.7, + "grad_norm": 0.9968715906143188, + "learning_rate": 2.2295110187895215e-06, + "loss": 0.5553, + "step": 10991 + }, + { + "epoch": 0.7, + "grad_norm": 0.8663219809532166, + "learning_rate": 2.2286569870624e-06, + "loss": 0.5965, + "step": 10992 + }, + { + "epoch": 0.7, + "grad_norm": 0.899998664855957, + "learning_rate": 2.227803072026892e-06, + "loss": 0.5645, + "step": 10993 + }, + { + "epoch": 0.7, + "grad_norm": 0.9184356927871704, + "learning_rate": 2.226949273718953e-06, + "loss": 0.6109, + "step": 10994 + }, + { + "epoch": 0.7, + "grad_norm": 0.9209024906158447, + "learning_rate": 2.226095592174533e-06, + "loss": 0.6007, + "step": 10995 + }, + { + "epoch": 0.7, + "grad_norm": 0.812882125377655, + "learning_rate": 2.2252420274295782e-06, + "loss": 0.5157, + "step": 10996 + }, + { + "epoch": 0.7, + "grad_norm": 0.8180590271949768, + "learning_rate": 2.224388579520031e-06, + "loss": 0.5697, + "step": 10997 + }, + { + "epoch": 0.7, + "grad_norm": 0.9130131602287292, + "learning_rate": 2.2235352484818228e-06, + "loss": 0.5966, + "step": 10998 + }, + { + "epoch": 0.7, + "grad_norm": 0.8782884478569031, + "learning_rate": 2.222682034350887e-06, + "loss": 0.5733, + "step": 10999 + }, + { + "epoch": 0.7, + "grad_norm": 0.912164032459259, + "learning_rate": 2.221828937163149e-06, + "loss": 0.5988, + "step": 11000 + }, + { + "epoch": 0.7, + "grad_norm": 0.8906491994857788, + "learning_rate": 2.22097595695453e-06, + "loss": 0.5479, + "step": 11001 + }, + { + "epoch": 0.7, + "grad_norm": 0.8602820634841919, + "learning_rate": 2.220123093760946e-06, + "loss": 0.5901, + "step": 11002 + }, + { + "epoch": 0.7, + "grad_norm": 0.9242262244224548, + "learning_rate": 2.2192703476183093e-06, + "loss": 0.5587, + "step": 11003 + }, + { + "epoch": 0.7, + "grad_norm": 0.919808566570282, + "learning_rate": 2.2184177185625217e-06, + "loss": 0.6188, + "step": 11004 + }, + { + "epoch": 0.7, + "grad_norm": 0.8946382999420166, + "learning_rate": 2.217565206629491e-06, + "loss": 0.5634, + "step": 11005 + }, + { + "epoch": 0.7, + "grad_norm": 0.8390125632286072, + "learning_rate": 2.2167128118551084e-06, + "loss": 0.5738, + "step": 11006 + }, + { + "epoch": 0.7, + "grad_norm": 0.8909174203872681, + "learning_rate": 2.2158605342752667e-06, + "loss": 0.6098, + "step": 11007 + }, + { + "epoch": 0.7, + "grad_norm": 0.851243257522583, + "learning_rate": 2.2150083739258525e-06, + "loss": 0.5296, + "step": 11008 + }, + { + "epoch": 0.7, + "grad_norm": 0.866870641708374, + "learning_rate": 2.214156330842748e-06, + "loss": 0.5695, + "step": 11009 + }, + { + "epoch": 0.7, + "grad_norm": 0.8538164496421814, + "learning_rate": 2.2133044050618286e-06, + "loss": 0.5897, + "step": 11010 + }, + { + "epoch": 0.7, + "grad_norm": 0.8874875903129578, + "learning_rate": 2.2124525966189685e-06, + "loss": 0.5415, + "step": 11011 + }, + { + "epoch": 0.7, + "grad_norm": 0.9058599472045898, + "learning_rate": 2.211600905550029e-06, + "loss": 0.5472, + "step": 11012 + }, + { + "epoch": 0.7, + "grad_norm": 0.8872851729393005, + "learning_rate": 2.2107493318908785e-06, + "loss": 0.5927, + "step": 11013 + }, + { + "epoch": 0.7, + "grad_norm": 0.8519189953804016, + "learning_rate": 2.2098978756773687e-06, + "loss": 0.5767, + "step": 11014 + }, + { + "epoch": 0.7, + "grad_norm": 0.9283721446990967, + "learning_rate": 2.2090465369453533e-06, + "loss": 0.6112, + "step": 11015 + }, + { + "epoch": 0.7, + "grad_norm": 0.863922655582428, + "learning_rate": 2.208195315730681e-06, + "loss": 0.5684, + "step": 11016 + }, + { + "epoch": 0.7, + "grad_norm": 0.9413056969642639, + "learning_rate": 2.207344212069189e-06, + "loss": 0.6224, + "step": 11017 + }, + { + "epoch": 0.7, + "grad_norm": 0.92134690284729, + "learning_rate": 2.2064932259967188e-06, + "loss": 0.6007, + "step": 11018 + }, + { + "epoch": 0.7, + "grad_norm": 0.8620043396949768, + "learning_rate": 2.2056423575491026e-06, + "loss": 0.6119, + "step": 11019 + }, + { + "epoch": 0.7, + "grad_norm": 0.9397025108337402, + "learning_rate": 2.204791606762164e-06, + "loss": 0.6156, + "step": 11020 + }, + { + "epoch": 0.7, + "grad_norm": 0.8872060179710388, + "learning_rate": 2.2039409736717273e-06, + "loss": 0.5733, + "step": 11021 + }, + { + "epoch": 0.7, + "grad_norm": 0.8693039417266846, + "learning_rate": 2.2030904583136085e-06, + "loss": 0.587, + "step": 11022 + }, + { + "epoch": 0.7, + "grad_norm": 0.8837648034095764, + "learning_rate": 2.2022400607236214e-06, + "loss": 0.6031, + "step": 11023 + }, + { + "epoch": 0.7, + "grad_norm": 0.848081648349762, + "learning_rate": 2.2013897809375753e-06, + "loss": 0.6094, + "step": 11024 + }, + { + "epoch": 0.7, + "grad_norm": 0.945680558681488, + "learning_rate": 2.2005396189912647e-06, + "loss": 0.5531, + "step": 11025 + }, + { + "epoch": 0.7, + "grad_norm": 0.8282198309898376, + "learning_rate": 2.199689574920495e-06, + "loss": 0.5546, + "step": 11026 + }, + { + "epoch": 0.7, + "grad_norm": 0.8442526459693909, + "learning_rate": 2.198839648761057e-06, + "loss": 0.5722, + "step": 11027 + }, + { + "epoch": 0.7, + "grad_norm": 0.8687816858291626, + "learning_rate": 2.1979898405487354e-06, + "loss": 0.5748, + "step": 11028 + }, + { + "epoch": 0.7, + "grad_norm": 0.8658022284507751, + "learning_rate": 2.197140150319314e-06, + "loss": 0.5626, + "step": 11029 + }, + { + "epoch": 0.7, + "grad_norm": 0.905732274055481, + "learning_rate": 2.19629057810857e-06, + "loss": 0.6178, + "step": 11030 + }, + { + "epoch": 0.7, + "grad_norm": 0.8981587290763855, + "learning_rate": 2.195441123952277e-06, + "loss": 0.5984, + "step": 11031 + }, + { + "epoch": 0.7, + "grad_norm": 0.91184002161026, + "learning_rate": 2.1945917878862037e-06, + "loss": 0.5684, + "step": 11032 + }, + { + "epoch": 0.7, + "grad_norm": 0.8562777042388916, + "learning_rate": 2.193742569946109e-06, + "loss": 0.5981, + "step": 11033 + }, + { + "epoch": 0.7, + "grad_norm": 0.842792272567749, + "learning_rate": 2.1928934701677507e-06, + "loss": 0.5707, + "step": 11034 + }, + { + "epoch": 0.7, + "grad_norm": 0.8843762278556824, + "learning_rate": 2.1920444885868862e-06, + "loss": 0.5663, + "step": 11035 + }, + { + "epoch": 0.7, + "grad_norm": 0.8537455201148987, + "learning_rate": 2.1911956252392593e-06, + "loss": 0.5627, + "step": 11036 + }, + { + "epoch": 0.7, + "grad_norm": 0.9044625163078308, + "learning_rate": 2.1903468801606125e-06, + "loss": 0.5699, + "step": 11037 + }, + { + "epoch": 0.7, + "grad_norm": 0.8944157958030701, + "learning_rate": 2.1894982533866852e-06, + "loss": 0.5917, + "step": 11038 + }, + { + "epoch": 0.7, + "grad_norm": 0.8653507232666016, + "learning_rate": 2.188649744953209e-06, + "loss": 0.5735, + "step": 11039 + }, + { + "epoch": 0.7, + "grad_norm": 0.9325670003890991, + "learning_rate": 2.1878013548959145e-06, + "loss": 0.5949, + "step": 11040 + }, + { + "epoch": 0.7, + "grad_norm": 0.9095918536186218, + "learning_rate": 2.186953083250519e-06, + "loss": 0.632, + "step": 11041 + }, + { + "epoch": 0.7, + "grad_norm": 0.8838375806808472, + "learning_rate": 2.1861049300527426e-06, + "loss": 0.5716, + "step": 11042 + }, + { + "epoch": 0.7, + "grad_norm": 0.9739626049995422, + "learning_rate": 2.1852568953383025e-06, + "loss": 0.5902, + "step": 11043 + }, + { + "epoch": 0.7, + "grad_norm": 0.8509230017662048, + "learning_rate": 2.1844089791429002e-06, + "loss": 0.5383, + "step": 11044 + }, + { + "epoch": 0.7, + "grad_norm": 0.8938042521476746, + "learning_rate": 2.1835611815022412e-06, + "loss": 0.6017, + "step": 11045 + }, + { + "epoch": 0.7, + "grad_norm": 0.8988103866577148, + "learning_rate": 2.182713502452025e-06, + "loss": 0.5487, + "step": 11046 + }, + { + "epoch": 0.7, + "grad_norm": 0.8871778845787048, + "learning_rate": 2.181865942027939e-06, + "loss": 0.5941, + "step": 11047 + }, + { + "epoch": 0.7, + "grad_norm": 0.9358175992965698, + "learning_rate": 2.181018500265679e-06, + "loss": 0.6045, + "step": 11048 + }, + { + "epoch": 0.7, + "grad_norm": 0.8977616429328918, + "learning_rate": 2.1801711772009203e-06, + "loss": 0.5579, + "step": 11049 + }, + { + "epoch": 0.7, + "grad_norm": 0.8415703177452087, + "learning_rate": 2.179323972869345e-06, + "loss": 0.6, + "step": 11050 + }, + { + "epoch": 0.7, + "grad_norm": 0.8636377453804016, + "learning_rate": 2.1784768873066243e-06, + "loss": 0.5714, + "step": 11051 + }, + { + "epoch": 0.7, + "grad_norm": 0.9031801223754883, + "learning_rate": 2.1776299205484265e-06, + "loss": 0.5426, + "step": 11052 + }, + { + "epoch": 0.7, + "grad_norm": 0.9137712121009827, + "learning_rate": 2.176783072630414e-06, + "loss": 0.569, + "step": 11053 + }, + { + "epoch": 0.7, + "grad_norm": 0.8924576044082642, + "learning_rate": 2.1759363435882475e-06, + "loss": 0.5756, + "step": 11054 + }, + { + "epoch": 0.7, + "grad_norm": 0.8783155083656311, + "learning_rate": 2.1750897334575736e-06, + "loss": 0.5775, + "step": 11055 + }, + { + "epoch": 0.7, + "grad_norm": 0.8635226488113403, + "learning_rate": 2.174243242274047e-06, + "loss": 0.5589, + "step": 11056 + }, + { + "epoch": 0.7, + "grad_norm": 0.9452078938484192, + "learning_rate": 2.1733968700733066e-06, + "loss": 0.6036, + "step": 11057 + }, + { + "epoch": 0.7, + "grad_norm": 0.9600135684013367, + "learning_rate": 2.1725506168909903e-06, + "loss": 0.6068, + "step": 11058 + }, + { + "epoch": 0.7, + "grad_norm": 0.8594204187393188, + "learning_rate": 2.1717044827627314e-06, + "loss": 0.5309, + "step": 11059 + }, + { + "epoch": 0.7, + "grad_norm": 0.9110593199729919, + "learning_rate": 2.1708584677241586e-06, + "loss": 0.6443, + "step": 11060 + }, + { + "epoch": 0.7, + "grad_norm": 0.8541653156280518, + "learning_rate": 2.170012571810893e-06, + "loss": 0.532, + "step": 11061 + }, + { + "epoch": 0.7, + "grad_norm": 0.8981989622116089, + "learning_rate": 2.1691667950585552e-06, + "loss": 0.5661, + "step": 11062 + }, + { + "epoch": 0.7, + "grad_norm": 0.8796327710151672, + "learning_rate": 2.1683211375027543e-06, + "loss": 0.6167, + "step": 11063 + }, + { + "epoch": 0.7, + "grad_norm": 0.8893603086471558, + "learning_rate": 2.1674755991790976e-06, + "loss": 0.6118, + "step": 11064 + }, + { + "epoch": 0.7, + "grad_norm": 0.8742503523826599, + "learning_rate": 2.1666301801231937e-06, + "loss": 0.5821, + "step": 11065 + }, + { + "epoch": 0.7, + "grad_norm": 0.9033846259117126, + "learning_rate": 2.1657848803706344e-06, + "loss": 0.589, + "step": 11066 + }, + { + "epoch": 0.7, + "grad_norm": 0.8721830248832703, + "learning_rate": 2.1649396999570137e-06, + "loss": 0.5571, + "step": 11067 + }, + { + "epoch": 0.7, + "grad_norm": 0.8391421437263489, + "learning_rate": 2.1640946389179207e-06, + "loss": 0.5393, + "step": 11068 + }, + { + "epoch": 0.7, + "grad_norm": 0.8839281797409058, + "learning_rate": 2.1632496972889366e-06, + "loss": 0.5608, + "step": 11069 + }, + { + "epoch": 0.7, + "grad_norm": 0.9285804033279419, + "learning_rate": 2.162404875105641e-06, + "loss": 0.6456, + "step": 11070 + }, + { + "epoch": 0.7, + "grad_norm": 0.908049464225769, + "learning_rate": 2.1615601724036033e-06, + "loss": 0.5772, + "step": 11071 + }, + { + "epoch": 0.7, + "grad_norm": 0.8621760010719299, + "learning_rate": 2.1607155892183905e-06, + "loss": 0.566, + "step": 11072 + }, + { + "epoch": 0.7, + "grad_norm": 0.8190118670463562, + "learning_rate": 2.1598711255855713e-06, + "loss": 0.5507, + "step": 11073 + }, + { + "epoch": 0.7, + "grad_norm": 0.9111891984939575, + "learning_rate": 2.1590267815406968e-06, + "loss": 0.5911, + "step": 11074 + }, + { + "epoch": 0.7, + "grad_norm": 0.9198263883590698, + "learning_rate": 2.1581825571193216e-06, + "loss": 0.5695, + "step": 11075 + }, + { + "epoch": 0.7, + "grad_norm": 0.863237202167511, + "learning_rate": 2.1573384523569945e-06, + "loss": 0.5976, + "step": 11076 + }, + { + "epoch": 0.7, + "grad_norm": 0.914863109588623, + "learning_rate": 2.1564944672892524e-06, + "loss": 0.5644, + "step": 11077 + }, + { + "epoch": 0.7, + "grad_norm": 0.8742169141769409, + "learning_rate": 2.1556506019516405e-06, + "loss": 0.5602, + "step": 11078 + }, + { + "epoch": 0.7, + "grad_norm": 0.862916886806488, + "learning_rate": 2.1548068563796855e-06, + "loss": 0.5522, + "step": 11079 + }, + { + "epoch": 0.7, + "grad_norm": 0.8305811882019043, + "learning_rate": 2.1539632306089153e-06, + "loss": 0.543, + "step": 11080 + }, + { + "epoch": 0.7, + "grad_norm": 0.8726207613945007, + "learning_rate": 2.153119724674853e-06, + "loss": 0.5805, + "step": 11081 + }, + { + "epoch": 0.7, + "grad_norm": 0.9057608246803284, + "learning_rate": 2.1522763386130156e-06, + "loss": 0.5893, + "step": 11082 + }, + { + "epoch": 0.7, + "grad_norm": 0.8841626644134521, + "learning_rate": 2.1514330724589156e-06, + "loss": 0.5744, + "step": 11083 + }, + { + "epoch": 0.7, + "grad_norm": 0.9211553931236267, + "learning_rate": 2.1505899262480607e-06, + "loss": 0.6397, + "step": 11084 + }, + { + "epoch": 0.7, + "grad_norm": 1.0019750595092773, + "learning_rate": 2.149746900015948e-06, + "loss": 0.6107, + "step": 11085 + }, + { + "epoch": 0.7, + "grad_norm": 0.8821682929992676, + "learning_rate": 2.148903993798082e-06, + "loss": 0.5628, + "step": 11086 + }, + { + "epoch": 0.7, + "grad_norm": 0.874483048915863, + "learning_rate": 2.148061207629949e-06, + "loss": 0.6007, + "step": 11087 + }, + { + "epoch": 0.7, + "grad_norm": 0.8956743478775024, + "learning_rate": 2.1472185415470365e-06, + "loss": 0.5535, + "step": 11088 + }, + { + "epoch": 0.7, + "grad_norm": 0.9166892766952515, + "learning_rate": 2.1463759955848277e-06, + "loss": 0.551, + "step": 11089 + }, + { + "epoch": 0.7, + "grad_norm": 0.8699899911880493, + "learning_rate": 2.1455335697787987e-06, + "loss": 0.6144, + "step": 11090 + }, + { + "epoch": 0.7, + "grad_norm": 0.8954592347145081, + "learning_rate": 2.1446912641644206e-06, + "loss": 0.5944, + "step": 11091 + }, + { + "epoch": 0.7, + "grad_norm": 0.8515070080757141, + "learning_rate": 2.1438490787771634e-06, + "loss": 0.5716, + "step": 11092 + }, + { + "epoch": 0.7, + "grad_norm": 0.8956817388534546, + "learning_rate": 2.1430070136524826e-06, + "loss": 0.555, + "step": 11093 + }, + { + "epoch": 0.7, + "grad_norm": 0.9041964411735535, + "learning_rate": 2.1421650688258384e-06, + "loss": 0.5484, + "step": 11094 + }, + { + "epoch": 0.7, + "grad_norm": 0.8969672322273254, + "learning_rate": 2.1413232443326813e-06, + "loss": 0.6186, + "step": 11095 + }, + { + "epoch": 0.7, + "grad_norm": 0.9423562288284302, + "learning_rate": 2.140481540208458e-06, + "loss": 0.6008, + "step": 11096 + }, + { + "epoch": 0.7, + "grad_norm": 0.8758630156517029, + "learning_rate": 2.1396399564886113e-06, + "loss": 0.5685, + "step": 11097 + }, + { + "epoch": 0.7, + "grad_norm": 0.8483191728591919, + "learning_rate": 2.1387984932085714e-06, + "loss": 0.5524, + "step": 11098 + }, + { + "epoch": 0.7, + "grad_norm": 0.9557432532310486, + "learning_rate": 2.1379571504037754e-06, + "loss": 0.6017, + "step": 11099 + }, + { + "epoch": 0.7, + "grad_norm": 0.9311283230781555, + "learning_rate": 2.1371159281096497e-06, + "loss": 0.5575, + "step": 11100 + }, + { + "epoch": 0.7, + "grad_norm": 0.8768463134765625, + "learning_rate": 2.1362748263616112e-06, + "loss": 0.6389, + "step": 11101 + }, + { + "epoch": 0.7, + "grad_norm": 0.926892876625061, + "learning_rate": 2.1354338451950774e-06, + "loss": 0.5177, + "step": 11102 + }, + { + "epoch": 0.7, + "grad_norm": 0.8700109124183655, + "learning_rate": 2.1345929846454593e-06, + "loss": 0.5717, + "step": 11103 + }, + { + "epoch": 0.7, + "grad_norm": 0.9436931014060974, + "learning_rate": 2.133752244748163e-06, + "loss": 0.5981, + "step": 11104 + }, + { + "epoch": 0.7, + "grad_norm": 0.8545697927474976, + "learning_rate": 2.1329116255385902e-06, + "loss": 0.559, + "step": 11105 + }, + { + "epoch": 0.7, + "grad_norm": 1.0063272714614868, + "learning_rate": 2.132071127052131e-06, + "loss": 0.6422, + "step": 11106 + }, + { + "epoch": 0.7, + "grad_norm": 0.8825348019599915, + "learning_rate": 2.1312307493241825e-06, + "loss": 0.5503, + "step": 11107 + }, + { + "epoch": 0.7, + "grad_norm": 0.837097704410553, + "learning_rate": 2.1303904923901288e-06, + "loss": 0.5861, + "step": 11108 + }, + { + "epoch": 0.7, + "grad_norm": 0.8669401407241821, + "learning_rate": 2.1295503562853466e-06, + "loss": 0.5819, + "step": 11109 + }, + { + "epoch": 0.7, + "grad_norm": 0.9110631346702576, + "learning_rate": 2.1287103410452135e-06, + "loss": 0.5713, + "step": 11110 + }, + { + "epoch": 0.7, + "grad_norm": 0.8955477476119995, + "learning_rate": 2.1278704467050996e-06, + "loss": 0.6162, + "step": 11111 + }, + { + "epoch": 0.7, + "grad_norm": 0.8396604061126709, + "learning_rate": 2.1270306733003697e-06, + "loss": 0.5518, + "step": 11112 + }, + { + "epoch": 0.7, + "grad_norm": 0.9126778841018677, + "learning_rate": 2.126191020866386e-06, + "loss": 0.6153, + "step": 11113 + }, + { + "epoch": 0.7, + "grad_norm": 0.9138805866241455, + "learning_rate": 2.125351489438499e-06, + "loss": 0.5754, + "step": 11114 + }, + { + "epoch": 0.7, + "grad_norm": 0.8429421782493591, + "learning_rate": 2.124512079052059e-06, + "loss": 0.5369, + "step": 11115 + }, + { + "epoch": 0.7, + "grad_norm": 0.8413889408111572, + "learning_rate": 2.123672789742416e-06, + "loss": 0.5845, + "step": 11116 + }, + { + "epoch": 0.7, + "grad_norm": 0.9169177412986755, + "learning_rate": 2.1228336215449036e-06, + "loss": 0.5887, + "step": 11117 + }, + { + "epoch": 0.7, + "grad_norm": 0.8493983745574951, + "learning_rate": 2.1219945744948584e-06, + "loss": 0.5727, + "step": 11118 + }, + { + "epoch": 0.7, + "grad_norm": 0.8498938083648682, + "learning_rate": 2.12115564862761e-06, + "loss": 0.5381, + "step": 11119 + }, + { + "epoch": 0.7, + "grad_norm": 0.8709191083908081, + "learning_rate": 2.1203168439784828e-06, + "loss": 0.5768, + "step": 11120 + }, + { + "epoch": 0.7, + "grad_norm": 0.8937205076217651, + "learning_rate": 2.119478160582797e-06, + "loss": 0.6019, + "step": 11121 + }, + { + "epoch": 0.7, + "grad_norm": 0.8882603645324707, + "learning_rate": 2.1186395984758633e-06, + "loss": 0.5199, + "step": 11122 + }, + { + "epoch": 0.7, + "grad_norm": 0.8753896951675415, + "learning_rate": 2.117801157692993e-06, + "loss": 0.5835, + "step": 11123 + }, + { + "epoch": 0.7, + "grad_norm": 0.92037034034729, + "learning_rate": 2.1169628382694894e-06, + "loss": 0.5877, + "step": 11124 + }, + { + "epoch": 0.7, + "grad_norm": 0.8579007983207703, + "learning_rate": 2.1161246402406518e-06, + "loss": 0.5617, + "step": 11125 + }, + { + "epoch": 0.7, + "grad_norm": 0.8872489929199219, + "learning_rate": 2.1152865636417723e-06, + "loss": 0.5598, + "step": 11126 + }, + { + "epoch": 0.7, + "grad_norm": 0.9075922966003418, + "learning_rate": 2.114448608508143e-06, + "loss": 0.5544, + "step": 11127 + }, + { + "epoch": 0.71, + "grad_norm": 0.8997741937637329, + "learning_rate": 2.113610774875041e-06, + "loss": 0.5516, + "step": 11128 + }, + { + "epoch": 0.71, + "grad_norm": 0.8664461970329285, + "learning_rate": 2.1127730627777497e-06, + "loss": 0.5554, + "step": 11129 + }, + { + "epoch": 0.71, + "grad_norm": 0.9150891900062561, + "learning_rate": 2.111935472251543e-06, + "loss": 0.5794, + "step": 11130 + }, + { + "epoch": 0.71, + "grad_norm": 0.9180150628089905, + "learning_rate": 2.1110980033316846e-06, + "loss": 0.5755, + "step": 11131 + }, + { + "epoch": 0.71, + "grad_norm": 0.9424551129341125, + "learning_rate": 2.1102606560534393e-06, + "loss": 0.6034, + "step": 11132 + }, + { + "epoch": 0.71, + "grad_norm": 0.893530011177063, + "learning_rate": 2.1094234304520655e-06, + "loss": 0.6, + "step": 11133 + }, + { + "epoch": 0.71, + "grad_norm": 0.9302678108215332, + "learning_rate": 2.108586326562816e-06, + "loss": 0.6037, + "step": 11134 + }, + { + "epoch": 0.71, + "grad_norm": 0.9196210503578186, + "learning_rate": 2.1077493444209385e-06, + "loss": 0.6183, + "step": 11135 + }, + { + "epoch": 0.71, + "grad_norm": 0.9748576879501343, + "learning_rate": 2.1069124840616717e-06, + "loss": 0.5905, + "step": 11136 + }, + { + "epoch": 0.71, + "grad_norm": 0.8754902482032776, + "learning_rate": 2.1060757455202574e-06, + "loss": 0.5659, + "step": 11137 + }, + { + "epoch": 0.71, + "grad_norm": 0.9038988947868347, + "learning_rate": 2.1052391288319285e-06, + "loss": 0.6012, + "step": 11138 + }, + { + "epoch": 0.71, + "grad_norm": 0.9287976026535034, + "learning_rate": 2.1044026340319075e-06, + "loss": 0.5397, + "step": 11139 + }, + { + "epoch": 0.71, + "grad_norm": 0.9258267879486084, + "learning_rate": 2.1035662611554187e-06, + "loss": 0.586, + "step": 11140 + }, + { + "epoch": 0.71, + "grad_norm": 0.9034359455108643, + "learning_rate": 2.1027300102376787e-06, + "loss": 0.5757, + "step": 11141 + }, + { + "epoch": 0.71, + "grad_norm": 0.8997130393981934, + "learning_rate": 2.101893881313899e-06, + "loss": 0.5749, + "step": 11142 + }, + { + "epoch": 0.71, + "grad_norm": 0.8986077308654785, + "learning_rate": 2.1010578744192885e-06, + "loss": 0.6169, + "step": 11143 + }, + { + "epoch": 0.71, + "grad_norm": 0.9187172055244446, + "learning_rate": 2.1002219895890435e-06, + "loss": 0.6212, + "step": 11144 + }, + { + "epoch": 0.71, + "grad_norm": 0.8708627223968506, + "learning_rate": 2.099386226858362e-06, + "loss": 0.5887, + "step": 11145 + }, + { + "epoch": 0.71, + "grad_norm": 0.8534128665924072, + "learning_rate": 2.098550586262439e-06, + "loss": 0.597, + "step": 11146 + }, + { + "epoch": 0.71, + "grad_norm": 0.8883056044578552, + "learning_rate": 2.097715067836456e-06, + "loss": 0.5597, + "step": 11147 + }, + { + "epoch": 0.71, + "grad_norm": 0.9250147938728333, + "learning_rate": 2.096879671615595e-06, + "loss": 0.6114, + "step": 11148 + }, + { + "epoch": 0.71, + "grad_norm": 0.9054756164550781, + "learning_rate": 2.0960443976350315e-06, + "loss": 0.6134, + "step": 11149 + }, + { + "epoch": 0.71, + "grad_norm": 0.9181949496269226, + "learning_rate": 2.0952092459299366e-06, + "loss": 0.6393, + "step": 11150 + }, + { + "epoch": 0.71, + "grad_norm": 0.8578399419784546, + "learning_rate": 2.0943742165354776e-06, + "loss": 0.5453, + "step": 11151 + }, + { + "epoch": 0.71, + "grad_norm": 0.8718449473381042, + "learning_rate": 2.0935393094868094e-06, + "loss": 0.5702, + "step": 11152 + }, + { + "epoch": 0.71, + "grad_norm": 0.8640325665473938, + "learning_rate": 2.092704524819089e-06, + "loss": 0.529, + "step": 11153 + }, + { + "epoch": 0.71, + "grad_norm": 0.878528892993927, + "learning_rate": 2.091869862567471e-06, + "loss": 0.6061, + "step": 11154 + }, + { + "epoch": 0.71, + "grad_norm": 0.9092130064964294, + "learning_rate": 2.091035322767095e-06, + "loss": 0.5999, + "step": 11155 + }, + { + "epoch": 0.71, + "grad_norm": 0.8933854699134827, + "learning_rate": 2.0902009054531013e-06, + "loss": 0.6148, + "step": 11156 + }, + { + "epoch": 0.71, + "grad_norm": 0.9362192153930664, + "learning_rate": 2.089366610660627e-06, + "loss": 0.6049, + "step": 11157 + }, + { + "epoch": 0.71, + "grad_norm": 0.8995941281318665, + "learning_rate": 2.0885324384247956e-06, + "loss": 0.5718, + "step": 11158 + }, + { + "epoch": 0.71, + "grad_norm": 0.8591166734695435, + "learning_rate": 2.087698388780739e-06, + "loss": 0.5622, + "step": 11159 + }, + { + "epoch": 0.71, + "grad_norm": 0.8993247151374817, + "learning_rate": 2.0868644617635697e-06, + "loss": 0.5731, + "step": 11160 + }, + { + "epoch": 0.71, + "grad_norm": 0.9018330574035645, + "learning_rate": 2.0860306574084043e-06, + "loss": 0.5593, + "step": 11161 + }, + { + "epoch": 0.71, + "grad_norm": 0.889884889125824, + "learning_rate": 2.085196975750351e-06, + "loss": 0.5715, + "step": 11162 + }, + { + "epoch": 0.71, + "grad_norm": 0.8837161660194397, + "learning_rate": 2.084363416824513e-06, + "loss": 0.5673, + "step": 11163 + }, + { + "epoch": 0.71, + "grad_norm": 0.825923502445221, + "learning_rate": 2.0835299806659885e-06, + "loss": 0.5608, + "step": 11164 + }, + { + "epoch": 0.71, + "grad_norm": 0.9410537481307983, + "learning_rate": 2.0826966673098737e-06, + "loss": 0.6276, + "step": 11165 + }, + { + "epoch": 0.71, + "grad_norm": 0.8919404149055481, + "learning_rate": 2.0818634767912495e-06, + "loss": 0.6048, + "step": 11166 + }, + { + "epoch": 0.71, + "grad_norm": 0.9027935266494751, + "learning_rate": 2.081030409145206e-06, + "loss": 0.5912, + "step": 11167 + }, + { + "epoch": 0.71, + "grad_norm": 0.9064032435417175, + "learning_rate": 2.080197464406816e-06, + "loss": 0.6004, + "step": 11168 + }, + { + "epoch": 0.71, + "grad_norm": 0.8723354935646057, + "learning_rate": 2.0793646426111536e-06, + "loss": 0.5449, + "step": 11169 + }, + { + "epoch": 0.71, + "grad_norm": 0.9101514220237732, + "learning_rate": 2.078531943793288e-06, + "loss": 0.605, + "step": 11170 + }, + { + "epoch": 0.71, + "grad_norm": 0.9378718137741089, + "learning_rate": 2.0776993679882752e-06, + "loss": 0.5605, + "step": 11171 + }, + { + "epoch": 0.71, + "grad_norm": 0.8340771198272705, + "learning_rate": 2.076866915231178e-06, + "loss": 0.5802, + "step": 11172 + }, + { + "epoch": 0.71, + "grad_norm": 0.9529073238372803, + "learning_rate": 2.076034585557048e-06, + "loss": 0.5803, + "step": 11173 + }, + { + "epoch": 0.71, + "grad_norm": 0.9297928214073181, + "learning_rate": 2.075202379000928e-06, + "loss": 0.5573, + "step": 11174 + }, + { + "epoch": 0.71, + "grad_norm": 0.8736124038696289, + "learning_rate": 2.074370295597861e-06, + "loss": 0.5904, + "step": 11175 + }, + { + "epoch": 0.71, + "grad_norm": 0.947452962398529, + "learning_rate": 2.0735383353828843e-06, + "loss": 0.6098, + "step": 11176 + }, + { + "epoch": 0.71, + "grad_norm": 0.8607105612754822, + "learning_rate": 2.0727064983910266e-06, + "loss": 0.5983, + "step": 11177 + }, + { + "epoch": 0.71, + "grad_norm": 0.9094382524490356, + "learning_rate": 2.071874784657318e-06, + "loss": 0.6039, + "step": 11178 + }, + { + "epoch": 0.71, + "grad_norm": 0.868175745010376, + "learning_rate": 2.0710431942167713e-06, + "loss": 0.5535, + "step": 11179 + }, + { + "epoch": 0.71, + "grad_norm": 0.9550389647483826, + "learning_rate": 2.070211727104409e-06, + "loss": 0.5303, + "step": 11180 + }, + { + "epoch": 0.71, + "grad_norm": 0.939507246017456, + "learning_rate": 2.0693803833552407e-06, + "loss": 0.5674, + "step": 11181 + }, + { + "epoch": 0.71, + "grad_norm": 0.8654747009277344, + "learning_rate": 2.0685491630042677e-06, + "loss": 0.5973, + "step": 11182 + }, + { + "epoch": 0.71, + "grad_norm": 0.8690040111541748, + "learning_rate": 2.0677180660864916e-06, + "loss": 0.5844, + "step": 11183 + }, + { + "epoch": 0.71, + "grad_norm": 0.9559879302978516, + "learning_rate": 2.0668870926369068e-06, + "loss": 0.5674, + "step": 11184 + }, + { + "epoch": 0.71, + "grad_norm": 0.8681148290634155, + "learning_rate": 2.066056242690503e-06, + "loss": 0.6035, + "step": 11185 + }, + { + "epoch": 0.71, + "grad_norm": 0.8854528665542603, + "learning_rate": 2.0652255162822665e-06, + "loss": 0.5315, + "step": 11186 + }, + { + "epoch": 0.71, + "grad_norm": 0.866400957107544, + "learning_rate": 2.0643949134471726e-06, + "loss": 0.5466, + "step": 11187 + }, + { + "epoch": 0.71, + "grad_norm": 0.8909302949905396, + "learning_rate": 2.0635644342201942e-06, + "loss": 0.5889, + "step": 11188 + }, + { + "epoch": 0.71, + "grad_norm": 0.884699821472168, + "learning_rate": 2.0627340786363063e-06, + "loss": 0.6031, + "step": 11189 + }, + { + "epoch": 0.71, + "grad_norm": 0.930429995059967, + "learning_rate": 2.0619038467304663e-06, + "loss": 0.5815, + "step": 11190 + }, + { + "epoch": 0.71, + "grad_norm": 0.8738210201263428, + "learning_rate": 2.061073738537635e-06, + "loss": 0.57, + "step": 11191 + }, + { + "epoch": 0.71, + "grad_norm": 0.8566862344741821, + "learning_rate": 2.0602437540927644e-06, + "loss": 0.564, + "step": 11192 + }, + { + "epoch": 0.71, + "grad_norm": 0.9492089152336121, + "learning_rate": 2.0594138934308027e-06, + "loss": 0.6218, + "step": 11193 + }, + { + "epoch": 0.71, + "grad_norm": 0.929764986038208, + "learning_rate": 2.058584156586692e-06, + "loss": 0.6217, + "step": 11194 + }, + { + "epoch": 0.71, + "grad_norm": 0.9115621447563171, + "learning_rate": 2.0577545435953727e-06, + "loss": 0.5181, + "step": 11195 + }, + { + "epoch": 0.71, + "grad_norm": 0.8706603646278381, + "learning_rate": 2.05692505449177e-06, + "loss": 0.578, + "step": 11196 + }, + { + "epoch": 0.71, + "grad_norm": 0.958949863910675, + "learning_rate": 2.0560956893108188e-06, + "loss": 0.6523, + "step": 11197 + }, + { + "epoch": 0.71, + "grad_norm": 0.888208270072937, + "learning_rate": 2.0552664480874353e-06, + "loss": 0.5606, + "step": 11198 + }, + { + "epoch": 0.71, + "grad_norm": 1.0087759494781494, + "learning_rate": 2.0544373308565374e-06, + "loss": 0.6203, + "step": 11199 + }, + { + "epoch": 0.71, + "grad_norm": 0.8741673827171326, + "learning_rate": 2.0536083376530368e-06, + "loss": 0.5617, + "step": 11200 + }, + { + "epoch": 0.71, + "grad_norm": 0.86680668592453, + "learning_rate": 2.0527794685118397e-06, + "loss": 0.5924, + "step": 11201 + }, + { + "epoch": 0.71, + "grad_norm": 0.8506894111633301, + "learning_rate": 2.0519507234678464e-06, + "loss": 0.6104, + "step": 11202 + }, + { + "epoch": 0.71, + "grad_norm": 0.8975198864936829, + "learning_rate": 2.051122102555954e-06, + "loss": 0.5931, + "step": 11203 + }, + { + "epoch": 0.71, + "grad_norm": 0.9341747164726257, + "learning_rate": 2.0502936058110502e-06, + "loss": 0.6311, + "step": 11204 + }, + { + "epoch": 0.71, + "grad_norm": 0.8767626285552979, + "learning_rate": 2.049465233268021e-06, + "loss": 0.5843, + "step": 11205 + }, + { + "epoch": 0.71, + "grad_norm": 0.8813466429710388, + "learning_rate": 2.0486369849617467e-06, + "loss": 0.5672, + "step": 11206 + }, + { + "epoch": 0.71, + "grad_norm": 1.044753909111023, + "learning_rate": 2.0478088609271018e-06, + "loss": 0.5594, + "step": 11207 + }, + { + "epoch": 0.71, + "grad_norm": 0.9174667000770569, + "learning_rate": 2.0469808611989583e-06, + "loss": 0.6105, + "step": 11208 + }, + { + "epoch": 0.71, + "grad_norm": 0.9028404951095581, + "learning_rate": 2.0461529858121737e-06, + "loss": 0.5766, + "step": 11209 + }, + { + "epoch": 0.71, + "grad_norm": 0.9191893339157104, + "learning_rate": 2.0453252348016133e-06, + "loss": 0.592, + "step": 11210 + }, + { + "epoch": 0.71, + "grad_norm": 0.9252839088439941, + "learning_rate": 2.04449760820213e-06, + "loss": 0.5725, + "step": 11211 + }, + { + "epoch": 0.71, + "grad_norm": 0.8581644296646118, + "learning_rate": 2.04367010604857e-06, + "loss": 0.5989, + "step": 11212 + }, + { + "epoch": 0.71, + "grad_norm": 0.8583932518959045, + "learning_rate": 2.042842728375777e-06, + "loss": 0.5921, + "step": 11213 + }, + { + "epoch": 0.71, + "grad_norm": 0.8592872023582458, + "learning_rate": 2.0420154752185896e-06, + "loss": 0.5688, + "step": 11214 + }, + { + "epoch": 0.71, + "grad_norm": 0.8804487586021423, + "learning_rate": 2.0411883466118406e-06, + "loss": 0.6106, + "step": 11215 + }, + { + "epoch": 0.71, + "grad_norm": 0.8709439039230347, + "learning_rate": 2.0403613425903584e-06, + "loss": 0.5829, + "step": 11216 + }, + { + "epoch": 0.71, + "grad_norm": 0.9017482995986938, + "learning_rate": 2.0395344631889636e-06, + "loss": 0.5525, + "step": 11217 + }, + { + "epoch": 0.71, + "grad_norm": 0.8507391810417175, + "learning_rate": 2.038707708442471e-06, + "loss": 0.6201, + "step": 11218 + }, + { + "epoch": 0.71, + "grad_norm": 0.9192302227020264, + "learning_rate": 2.0378810783856996e-06, + "loss": 0.5915, + "step": 11219 + }, + { + "epoch": 0.71, + "grad_norm": 0.8469076156616211, + "learning_rate": 2.0370545730534493e-06, + "loss": 0.5402, + "step": 11220 + }, + { + "epoch": 0.71, + "grad_norm": 0.9404274225234985, + "learning_rate": 2.0362281924805238e-06, + "loss": 0.5853, + "step": 11221 + }, + { + "epoch": 0.71, + "grad_norm": 0.8778280019760132, + "learning_rate": 2.035401936701719e-06, + "loss": 0.5735, + "step": 11222 + }, + { + "epoch": 0.71, + "grad_norm": 0.8963788151741028, + "learning_rate": 2.034575805751825e-06, + "loss": 0.5796, + "step": 11223 + }, + { + "epoch": 0.71, + "grad_norm": 0.8657545447349548, + "learning_rate": 2.0337497996656303e-06, + "loss": 0.5865, + "step": 11224 + }, + { + "epoch": 0.71, + "grad_norm": 0.8633813261985779, + "learning_rate": 2.03292391847791e-06, + "loss": 0.568, + "step": 11225 + }, + { + "epoch": 0.71, + "grad_norm": 1.0021978616714478, + "learning_rate": 2.032098162223441e-06, + "loss": 0.5911, + "step": 11226 + }, + { + "epoch": 0.71, + "grad_norm": 0.8717944622039795, + "learning_rate": 2.031272530936997e-06, + "loss": 0.5538, + "step": 11227 + }, + { + "epoch": 0.71, + "grad_norm": 0.8620628118515015, + "learning_rate": 2.0304470246533377e-06, + "loss": 0.583, + "step": 11228 + }, + { + "epoch": 0.71, + "grad_norm": 0.8687443733215332, + "learning_rate": 2.0296216434072237e-06, + "loss": 0.5445, + "step": 11229 + }, + { + "epoch": 0.71, + "grad_norm": 0.9198354482650757, + "learning_rate": 2.0287963872334093e-06, + "loss": 0.5932, + "step": 11230 + }, + { + "epoch": 0.71, + "grad_norm": 0.83836430311203, + "learning_rate": 2.0279712561666425e-06, + "loss": 0.5579, + "step": 11231 + }, + { + "epoch": 0.71, + "grad_norm": 0.8580030202865601, + "learning_rate": 2.0271462502416694e-06, + "loss": 0.5878, + "step": 11232 + }, + { + "epoch": 0.71, + "grad_norm": 0.8928140997886658, + "learning_rate": 2.0263213694932238e-06, + "loss": 0.517, + "step": 11233 + }, + { + "epoch": 0.71, + "grad_norm": 0.8412920832633972, + "learning_rate": 2.0254966139560404e-06, + "loss": 0.5409, + "step": 11234 + }, + { + "epoch": 0.71, + "grad_norm": 0.9490091800689697, + "learning_rate": 2.0246719836648476e-06, + "loss": 0.5876, + "step": 11235 + }, + { + "epoch": 0.71, + "grad_norm": 0.9710505604743958, + "learning_rate": 2.0238474786543673e-06, + "loss": 0.5687, + "step": 11236 + }, + { + "epoch": 0.71, + "grad_norm": 0.8756168484687805, + "learning_rate": 2.0230230989593157e-06, + "loss": 0.5713, + "step": 11237 + }, + { + "epoch": 0.71, + "grad_norm": 0.9873241782188416, + "learning_rate": 2.0221988446144076e-06, + "loss": 0.6178, + "step": 11238 + }, + { + "epoch": 0.71, + "grad_norm": 0.9938790798187256, + "learning_rate": 2.0213747156543432e-06, + "loss": 0.565, + "step": 11239 + }, + { + "epoch": 0.71, + "grad_norm": 0.8901024460792542, + "learning_rate": 2.0205507121138316e-06, + "loss": 0.565, + "step": 11240 + }, + { + "epoch": 0.71, + "grad_norm": 0.923038899898529, + "learning_rate": 2.019726834027563e-06, + "loss": 0.5797, + "step": 11241 + }, + { + "epoch": 0.71, + "grad_norm": 0.925470769405365, + "learning_rate": 2.0189030814302295e-06, + "loss": 0.5736, + "step": 11242 + }, + { + "epoch": 0.71, + "grad_norm": 0.9232540130615234, + "learning_rate": 2.018079454356517e-06, + "loss": 0.5231, + "step": 11243 + }, + { + "epoch": 0.71, + "grad_norm": 0.9192769527435303, + "learning_rate": 2.017255952841105e-06, + "loss": 0.593, + "step": 11244 + }, + { + "epoch": 0.71, + "grad_norm": 0.9208205938339233, + "learning_rate": 2.016432576918669e-06, + "loss": 0.5463, + "step": 11245 + }, + { + "epoch": 0.71, + "grad_norm": 0.8803871870040894, + "learning_rate": 2.0156093266238795e-06, + "loss": 0.5155, + "step": 11246 + }, + { + "epoch": 0.71, + "grad_norm": 0.9068865776062012, + "learning_rate": 2.014786201991396e-06, + "loss": 0.613, + "step": 11247 + }, + { + "epoch": 0.71, + "grad_norm": 0.9430435299873352, + "learning_rate": 2.0139632030558844e-06, + "loss": 0.6024, + "step": 11248 + }, + { + "epoch": 0.71, + "grad_norm": 0.8339919447898865, + "learning_rate": 2.0131403298519927e-06, + "loss": 0.5304, + "step": 11249 + }, + { + "epoch": 0.71, + "grad_norm": 0.9149238467216492, + "learning_rate": 2.012317582414371e-06, + "loss": 0.5962, + "step": 11250 + }, + { + "epoch": 0.71, + "grad_norm": 0.8416147828102112, + "learning_rate": 2.011494960777663e-06, + "loss": 0.5355, + "step": 11251 + }, + { + "epoch": 0.71, + "grad_norm": 0.8826677203178406, + "learning_rate": 2.0106724649765055e-06, + "loss": 0.6031, + "step": 11252 + }, + { + "epoch": 0.71, + "grad_norm": 0.9082532525062561, + "learning_rate": 2.0098500950455313e-06, + "loss": 0.6503, + "step": 11253 + }, + { + "epoch": 0.71, + "grad_norm": 0.924066960811615, + "learning_rate": 2.00902785101937e-06, + "loss": 0.5527, + "step": 11254 + }, + { + "epoch": 0.71, + "grad_norm": 0.8147642016410828, + "learning_rate": 2.008205732932639e-06, + "loss": 0.5123, + "step": 11255 + }, + { + "epoch": 0.71, + "grad_norm": 0.8650907278060913, + "learning_rate": 2.0073837408199566e-06, + "loss": 0.5695, + "step": 11256 + }, + { + "epoch": 0.71, + "grad_norm": 0.9047468900680542, + "learning_rate": 2.0065618747159342e-06, + "loss": 0.556, + "step": 11257 + }, + { + "epoch": 0.71, + "grad_norm": 0.8059144020080566, + "learning_rate": 2.0057401346551785e-06, + "loss": 0.5433, + "step": 11258 + }, + { + "epoch": 0.71, + "grad_norm": 0.8873769044876099, + "learning_rate": 2.004918520672289e-06, + "loss": 0.6051, + "step": 11259 + }, + { + "epoch": 0.71, + "grad_norm": 0.8762856125831604, + "learning_rate": 2.0040970328018618e-06, + "loss": 0.6158, + "step": 11260 + }, + { + "epoch": 0.71, + "grad_norm": 0.8870139718055725, + "learning_rate": 2.0032756710784864e-06, + "loss": 0.5862, + "step": 11261 + }, + { + "epoch": 0.71, + "grad_norm": 0.9140628576278687, + "learning_rate": 2.0024544355367494e-06, + "loss": 0.5758, + "step": 11262 + }, + { + "epoch": 0.71, + "grad_norm": 0.9125930666923523, + "learning_rate": 2.001633326211227e-06, + "loss": 0.5277, + "step": 11263 + }, + { + "epoch": 0.71, + "grad_norm": 0.8611398339271545, + "learning_rate": 2.000812343136494e-06, + "loss": 0.5982, + "step": 11264 + }, + { + "epoch": 0.71, + "grad_norm": 0.9212353229522705, + "learning_rate": 1.99999148634712e-06, + "loss": 0.5773, + "step": 11265 + }, + { + "epoch": 0.71, + "grad_norm": 0.8876082897186279, + "learning_rate": 1.9991707558776686e-06, + "loss": 0.5718, + "step": 11266 + }, + { + "epoch": 0.71, + "grad_norm": 0.9632240533828735, + "learning_rate": 1.9983501517626976e-06, + "loss": 0.654, + "step": 11267 + }, + { + "epoch": 0.71, + "grad_norm": 0.8377987742424011, + "learning_rate": 1.997529674036761e-06, + "loss": 0.5421, + "step": 11268 + }, + { + "epoch": 0.71, + "grad_norm": 0.8990334272384644, + "learning_rate": 1.9967093227344013e-06, + "loss": 0.6147, + "step": 11269 + }, + { + "epoch": 0.71, + "grad_norm": 0.8912368416786194, + "learning_rate": 1.9958890978901685e-06, + "loss": 0.5817, + "step": 11270 + }, + { + "epoch": 0.71, + "grad_norm": 0.9113243818283081, + "learning_rate": 1.9950689995385936e-06, + "loss": 0.5697, + "step": 11271 + }, + { + "epoch": 0.71, + "grad_norm": 0.9373201727867126, + "learning_rate": 1.994249027714209e-06, + "loss": 0.6274, + "step": 11272 + }, + { + "epoch": 0.71, + "grad_norm": 0.8835095167160034, + "learning_rate": 1.9934291824515423e-06, + "loss": 0.57, + "step": 11273 + }, + { + "epoch": 0.71, + "grad_norm": 0.9121303558349609, + "learning_rate": 1.9926094637851135e-06, + "loss": 0.5945, + "step": 11274 + }, + { + "epoch": 0.71, + "grad_norm": 0.8844984769821167, + "learning_rate": 1.9917898717494377e-06, + "loss": 0.5921, + "step": 11275 + }, + { + "epoch": 0.71, + "grad_norm": 0.8488909006118774, + "learning_rate": 1.990970406379028e-06, + "loss": 0.5555, + "step": 11276 + }, + { + "epoch": 0.71, + "grad_norm": 0.9113077521324158, + "learning_rate": 1.990151067708383e-06, + "loss": 0.6299, + "step": 11277 + }, + { + "epoch": 0.71, + "grad_norm": 0.9047096967697144, + "learning_rate": 1.9893318557720093e-06, + "loss": 0.5794, + "step": 11278 + }, + { + "epoch": 0.71, + "grad_norm": 0.8998475670814514, + "learning_rate": 1.9885127706043966e-06, + "loss": 0.5783, + "step": 11279 + }, + { + "epoch": 0.71, + "grad_norm": 0.9234053492546082, + "learning_rate": 1.9876938122400348e-06, + "loss": 0.5779, + "step": 11280 + }, + { + "epoch": 0.71, + "grad_norm": 0.9886752367019653, + "learning_rate": 1.9868749807134087e-06, + "loss": 0.5843, + "step": 11281 + }, + { + "epoch": 0.71, + "grad_norm": 0.870291531085968, + "learning_rate": 1.9860562760589926e-06, + "loss": 0.5362, + "step": 11282 + }, + { + "epoch": 0.71, + "grad_norm": 0.9425791501998901, + "learning_rate": 1.9852376983112632e-06, + "loss": 0.6392, + "step": 11283 + }, + { + "epoch": 0.71, + "grad_norm": 0.8813480734825134, + "learning_rate": 1.9844192475046885e-06, + "loss": 0.5949, + "step": 11284 + }, + { + "epoch": 0.71, + "grad_norm": 0.8936353921890259, + "learning_rate": 1.983600923673727e-06, + "loss": 0.5256, + "step": 11285 + }, + { + "epoch": 0.72, + "grad_norm": 0.87742018699646, + "learning_rate": 1.9827827268528378e-06, + "loss": 0.544, + "step": 11286 + }, + { + "epoch": 0.72, + "grad_norm": 0.9305248260498047, + "learning_rate": 1.9819646570764712e-06, + "loss": 0.5996, + "step": 11287 + }, + { + "epoch": 0.72, + "grad_norm": 0.9481449723243713, + "learning_rate": 1.981146714379074e-06, + "loss": 0.617, + "step": 11288 + }, + { + "epoch": 0.72, + "grad_norm": 0.8779386281967163, + "learning_rate": 1.980328898795089e-06, + "loss": 0.5538, + "step": 11289 + }, + { + "epoch": 0.72, + "grad_norm": 0.9480637311935425, + "learning_rate": 1.979511210358946e-06, + "loss": 0.571, + "step": 11290 + }, + { + "epoch": 0.72, + "grad_norm": 0.8718064427375793, + "learning_rate": 1.9786936491050803e-06, + "loss": 0.5632, + "step": 11291 + }, + { + "epoch": 0.72, + "grad_norm": 0.8864429593086243, + "learning_rate": 1.9778762150679155e-06, + "loss": 0.5402, + "step": 11292 + }, + { + "epoch": 0.72, + "grad_norm": 0.861854076385498, + "learning_rate": 1.9770589082818694e-06, + "loss": 0.5858, + "step": 11293 + }, + { + "epoch": 0.72, + "grad_norm": 0.8690637350082397, + "learning_rate": 1.9762417287813557e-06, + "loss": 0.5755, + "step": 11294 + }, + { + "epoch": 0.72, + "grad_norm": 0.9482481479644775, + "learning_rate": 1.9754246766007847e-06, + "loss": 0.5812, + "step": 11295 + }, + { + "epoch": 0.72, + "grad_norm": 0.8821942806243896, + "learning_rate": 1.9746077517745582e-06, + "loss": 0.5189, + "step": 11296 + }, + { + "epoch": 0.72, + "grad_norm": 0.8461850881576538, + "learning_rate": 1.9737909543370764e-06, + "loss": 0.5615, + "step": 11297 + }, + { + "epoch": 0.72, + "grad_norm": 0.8924559950828552, + "learning_rate": 1.972974284322729e-06, + "loss": 0.5045, + "step": 11298 + }, + { + "epoch": 0.72, + "grad_norm": 0.8439890742301941, + "learning_rate": 1.9721577417659023e-06, + "loss": 0.5655, + "step": 11299 + }, + { + "epoch": 0.72, + "grad_norm": 0.8429501056671143, + "learning_rate": 1.9713413267009827e-06, + "loss": 0.5588, + "step": 11300 + }, + { + "epoch": 0.72, + "grad_norm": 0.8670548796653748, + "learning_rate": 1.970525039162343e-06, + "loss": 0.6076, + "step": 11301 + }, + { + "epoch": 0.72, + "grad_norm": 0.8741304874420166, + "learning_rate": 1.969708879184355e-06, + "loss": 0.6139, + "step": 11302 + }, + { + "epoch": 0.72, + "grad_norm": 0.8507691025733948, + "learning_rate": 1.9688928468013846e-06, + "loss": 0.5363, + "step": 11303 + }, + { + "epoch": 0.72, + "grad_norm": 0.8928526043891907, + "learning_rate": 1.968076942047791e-06, + "loss": 0.5258, + "step": 11304 + }, + { + "epoch": 0.72, + "grad_norm": 0.9080408215522766, + "learning_rate": 1.9672611649579332e-06, + "loss": 0.5913, + "step": 11305 + }, + { + "epoch": 0.72, + "grad_norm": 0.8880747556686401, + "learning_rate": 1.966445515566155e-06, + "loss": 0.5521, + "step": 11306 + }, + { + "epoch": 0.72, + "grad_norm": 0.9175702929496765, + "learning_rate": 1.965629993906802e-06, + "loss": 0.5995, + "step": 11307 + }, + { + "epoch": 0.72, + "grad_norm": 0.8689432144165039, + "learning_rate": 1.9648146000142173e-06, + "loss": 0.5661, + "step": 11308 + }, + { + "epoch": 0.72, + "grad_norm": 0.868963897228241, + "learning_rate": 1.963999333922729e-06, + "loss": 0.52, + "step": 11309 + }, + { + "epoch": 0.72, + "grad_norm": 0.8676355481147766, + "learning_rate": 1.963184195666668e-06, + "loss": 0.576, + "step": 11310 + }, + { + "epoch": 0.72, + "grad_norm": 0.8827881217002869, + "learning_rate": 1.9623691852803577e-06, + "loss": 0.5648, + "step": 11311 + }, + { + "epoch": 0.72, + "grad_norm": 0.8609069585800171, + "learning_rate": 1.9615543027981105e-06, + "loss": 0.5785, + "step": 11312 + }, + { + "epoch": 0.72, + "grad_norm": 0.8777223229408264, + "learning_rate": 1.9607395482542446e-06, + "loss": 0.5395, + "step": 11313 + }, + { + "epoch": 0.72, + "grad_norm": 0.9304616451263428, + "learning_rate": 1.9599249216830624e-06, + "loss": 0.6124, + "step": 11314 + }, + { + "epoch": 0.72, + "grad_norm": 0.945838987827301, + "learning_rate": 1.9591104231188656e-06, + "loss": 0.5889, + "step": 11315 + }, + { + "epoch": 0.72, + "grad_norm": 0.8509537577629089, + "learning_rate": 1.958296052595951e-06, + "loss": 0.558, + "step": 11316 + }, + { + "epoch": 0.72, + "grad_norm": 0.9135622978210449, + "learning_rate": 1.9574818101486075e-06, + "loss": 0.5564, + "step": 11317 + }, + { + "epoch": 0.72, + "grad_norm": 0.9122533202171326, + "learning_rate": 1.9566676958111214e-06, + "loss": 0.5926, + "step": 11318 + }, + { + "epoch": 0.72, + "grad_norm": 0.9296271800994873, + "learning_rate": 1.955853709617773e-06, + "loss": 0.597, + "step": 11319 + }, + { + "epoch": 0.72, + "grad_norm": 0.8591296672821045, + "learning_rate": 1.955039851602832e-06, + "loss": 0.6263, + "step": 11320 + }, + { + "epoch": 0.72, + "grad_norm": 0.9196903705596924, + "learning_rate": 1.9542261218005737e-06, + "loss": 0.5828, + "step": 11321 + }, + { + "epoch": 0.72, + "grad_norm": 0.9328646659851074, + "learning_rate": 1.9534125202452557e-06, + "loss": 0.5962, + "step": 11322 + }, + { + "epoch": 0.72, + "grad_norm": 0.9008827209472656, + "learning_rate": 1.952599046971139e-06, + "loss": 0.6416, + "step": 11323 + }, + { + "epoch": 0.72, + "grad_norm": 0.9060094952583313, + "learning_rate": 1.951785702012475e-06, + "loss": 0.5831, + "step": 11324 + }, + { + "epoch": 0.72, + "grad_norm": 0.8941810727119446, + "learning_rate": 1.9509724854035105e-06, + "loss": 0.5775, + "step": 11325 + }, + { + "epoch": 0.72, + "grad_norm": 0.9267244935035706, + "learning_rate": 1.950159397178488e-06, + "loss": 0.6576, + "step": 11326 + }, + { + "epoch": 0.72, + "grad_norm": 0.908926784992218, + "learning_rate": 1.9493464373716458e-06, + "loss": 0.5861, + "step": 11327 + }, + { + "epoch": 0.72, + "grad_norm": 0.8545692563056946, + "learning_rate": 1.9485336060172106e-06, + "loss": 0.5916, + "step": 11328 + }, + { + "epoch": 0.72, + "grad_norm": 0.9397866725921631, + "learning_rate": 1.9477209031494104e-06, + "loss": 0.5883, + "step": 11329 + }, + { + "epoch": 0.72, + "grad_norm": 0.8065406680107117, + "learning_rate": 1.9469083288024647e-06, + "loss": 0.5156, + "step": 11330 + }, + { + "epoch": 0.72, + "grad_norm": 0.8719428181648254, + "learning_rate": 1.9460958830105882e-06, + "loss": 0.61, + "step": 11331 + }, + { + "epoch": 0.72, + "grad_norm": 0.9234678149223328, + "learning_rate": 1.9452835658079905e-06, + "loss": 0.5471, + "step": 11332 + }, + { + "epoch": 0.72, + "grad_norm": 0.941314160823822, + "learning_rate": 1.9444713772288747e-06, + "loss": 0.6378, + "step": 11333 + }, + { + "epoch": 0.72, + "grad_norm": 0.9694302082061768, + "learning_rate": 1.94365931730744e-06, + "loss": 0.6152, + "step": 11334 + }, + { + "epoch": 0.72, + "grad_norm": 0.9199761152267456, + "learning_rate": 1.9428473860778817e-06, + "loss": 0.5797, + "step": 11335 + }, + { + "epoch": 0.72, + "grad_norm": 0.8677429556846619, + "learning_rate": 1.9420355835743826e-06, + "loss": 0.6189, + "step": 11336 + }, + { + "epoch": 0.72, + "grad_norm": 0.866114616394043, + "learning_rate": 1.941223909831125e-06, + "loss": 0.5872, + "step": 11337 + }, + { + "epoch": 0.72, + "grad_norm": 1.0049126148223877, + "learning_rate": 1.9404123648822924e-06, + "loss": 0.6329, + "step": 11338 + }, + { + "epoch": 0.72, + "grad_norm": 0.9035833477973938, + "learning_rate": 1.9396009487620494e-06, + "loss": 0.5539, + "step": 11339 + }, + { + "epoch": 0.72, + "grad_norm": 0.9090478420257568, + "learning_rate": 1.9387896615045636e-06, + "loss": 0.5676, + "step": 11340 + }, + { + "epoch": 0.72, + "grad_norm": 0.9102824330329895, + "learning_rate": 1.9379785031439985e-06, + "loss": 0.5586, + "step": 11341 + }, + { + "epoch": 0.72, + "grad_norm": 0.8439232707023621, + "learning_rate": 1.9371674737145023e-06, + "loss": 0.5545, + "step": 11342 + }, + { + "epoch": 0.72, + "grad_norm": 0.8065714836120605, + "learning_rate": 1.936356573250233e-06, + "loss": 0.5784, + "step": 11343 + }, + { + "epoch": 0.72, + "grad_norm": 0.8955844044685364, + "learning_rate": 1.935545801785329e-06, + "loss": 0.5508, + "step": 11344 + }, + { + "epoch": 0.72, + "grad_norm": 0.8924664258956909, + "learning_rate": 1.934735159353931e-06, + "loss": 0.5664, + "step": 11345 + }, + { + "epoch": 0.72, + "grad_norm": 0.8322812914848328, + "learning_rate": 1.9339246459901715e-06, + "loss": 0.5858, + "step": 11346 + }, + { + "epoch": 0.72, + "grad_norm": 0.8678402304649353, + "learning_rate": 1.93311426172818e-06, + "loss": 0.5856, + "step": 11347 + }, + { + "epoch": 0.72, + "grad_norm": 0.8565698266029358, + "learning_rate": 1.9323040066020774e-06, + "loss": 0.6008, + "step": 11348 + }, + { + "epoch": 0.72, + "grad_norm": 0.8522049188613892, + "learning_rate": 1.931493880645983e-06, + "loss": 0.5971, + "step": 11349 + }, + { + "epoch": 0.72, + "grad_norm": 0.8686321973800659, + "learning_rate": 1.9306838838940035e-06, + "loss": 0.552, + "step": 11350 + }, + { + "epoch": 0.72, + "grad_norm": 0.9111335873603821, + "learning_rate": 1.9298740163802523e-06, + "loss": 0.597, + "step": 11351 + }, + { + "epoch": 0.72, + "grad_norm": 0.8459984064102173, + "learning_rate": 1.929064278138823e-06, + "loss": 0.5981, + "step": 11352 + }, + { + "epoch": 0.72, + "grad_norm": 0.9223425388336182, + "learning_rate": 1.928254669203815e-06, + "loss": 0.6072, + "step": 11353 + }, + { + "epoch": 0.72, + "grad_norm": 0.9331545233726501, + "learning_rate": 1.9274451896093164e-06, + "loss": 0.6259, + "step": 11354 + }, + { + "epoch": 0.72, + "grad_norm": 0.8875550627708435, + "learning_rate": 1.926635839389413e-06, + "loss": 0.5238, + "step": 11355 + }, + { + "epoch": 0.72, + "grad_norm": 0.8523957133293152, + "learning_rate": 1.925826618578182e-06, + "loss": 0.5549, + "step": 11356 + }, + { + "epoch": 0.72, + "grad_norm": 0.8597106337547302, + "learning_rate": 1.9250175272097003e-06, + "loss": 0.5347, + "step": 11357 + }, + { + "epoch": 0.72, + "grad_norm": 0.865592360496521, + "learning_rate": 1.9242085653180314e-06, + "loss": 0.5577, + "step": 11358 + }, + { + "epoch": 0.72, + "grad_norm": 0.9318245649337769, + "learning_rate": 1.9233997329372402e-06, + "loss": 0.6011, + "step": 11359 + }, + { + "epoch": 0.72, + "grad_norm": 0.9316973090171814, + "learning_rate": 1.9225910301013834e-06, + "loss": 0.5626, + "step": 11360 + }, + { + "epoch": 0.72, + "grad_norm": 0.9204529523849487, + "learning_rate": 1.9217824568445125e-06, + "loss": 0.6112, + "step": 11361 + }, + { + "epoch": 0.72, + "grad_norm": 0.8521873950958252, + "learning_rate": 1.920974013200676e-06, + "loss": 0.5743, + "step": 11362 + }, + { + "epoch": 0.72, + "grad_norm": 0.8950269222259521, + "learning_rate": 1.9201656992039092e-06, + "loss": 0.5561, + "step": 11363 + }, + { + "epoch": 0.72, + "grad_norm": 0.9938862323760986, + "learning_rate": 1.9193575148882526e-06, + "loss": 0.6297, + "step": 11364 + }, + { + "epoch": 0.72, + "grad_norm": 0.9481446743011475, + "learning_rate": 1.918549460287736e-06, + "loss": 0.6234, + "step": 11365 + }, + { + "epoch": 0.72, + "grad_norm": 0.8465852737426758, + "learning_rate": 1.9177415354363802e-06, + "loss": 0.5314, + "step": 11366 + }, + { + "epoch": 0.72, + "grad_norm": 0.8947675824165344, + "learning_rate": 1.916933740368206e-06, + "loss": 0.5545, + "step": 11367 + }, + { + "epoch": 0.72, + "grad_norm": 0.8321127891540527, + "learning_rate": 1.916126075117227e-06, + "loss": 0.5376, + "step": 11368 + }, + { + "epoch": 0.72, + "grad_norm": 0.8513221740722656, + "learning_rate": 1.9153185397174506e-06, + "loss": 0.5966, + "step": 11369 + }, + { + "epoch": 0.72, + "grad_norm": 0.8549067974090576, + "learning_rate": 1.9145111342028817e-06, + "loss": 0.5757, + "step": 11370 + }, + { + "epoch": 0.72, + "grad_norm": 0.9566909074783325, + "learning_rate": 1.9137038586075117e-06, + "loss": 0.5956, + "step": 11371 + }, + { + "epoch": 0.72, + "grad_norm": 0.8595585823059082, + "learning_rate": 1.9128967129653375e-06, + "loss": 0.5711, + "step": 11372 + }, + { + "epoch": 0.72, + "grad_norm": 0.8773391842842102, + "learning_rate": 1.9120896973103453e-06, + "loss": 0.6042, + "step": 11373 + }, + { + "epoch": 0.72, + "grad_norm": 0.970403790473938, + "learning_rate": 1.911282811676512e-06, + "loss": 0.6186, + "step": 11374 + }, + { + "epoch": 0.72, + "grad_norm": 0.838644802570343, + "learning_rate": 1.9104760560978147e-06, + "loss": 0.5634, + "step": 11375 + }, + { + "epoch": 0.72, + "grad_norm": 0.8876333236694336, + "learning_rate": 1.909669430608223e-06, + "loss": 0.543, + "step": 11376 + }, + { + "epoch": 0.72, + "grad_norm": 0.9171946048736572, + "learning_rate": 1.908862935241701e-06, + "loss": 0.5908, + "step": 11377 + }, + { + "epoch": 0.72, + "grad_norm": 0.8265011310577393, + "learning_rate": 1.9080565700322095e-06, + "loss": 0.5305, + "step": 11378 + }, + { + "epoch": 0.72, + "grad_norm": 0.9624162912368774, + "learning_rate": 1.9072503350136979e-06, + "loss": 0.5419, + "step": 11379 + }, + { + "epoch": 0.72, + "grad_norm": 0.8564184308052063, + "learning_rate": 1.9064442302201136e-06, + "loss": 0.5412, + "step": 11380 + }, + { + "epoch": 0.72, + "grad_norm": 0.9162154793739319, + "learning_rate": 1.9056382556854053e-06, + "loss": 0.598, + "step": 11381 + }, + { + "epoch": 0.72, + "grad_norm": 0.8687850832939148, + "learning_rate": 1.9048324114435036e-06, + "loss": 0.5839, + "step": 11382 + }, + { + "epoch": 0.72, + "grad_norm": 0.8435238003730774, + "learning_rate": 1.9040266975283417e-06, + "loss": 0.5566, + "step": 11383 + }, + { + "epoch": 0.72, + "grad_norm": 0.8652970790863037, + "learning_rate": 1.9032211139738455e-06, + "loss": 0.5781, + "step": 11384 + }, + { + "epoch": 0.72, + "grad_norm": 0.863194465637207, + "learning_rate": 1.902415660813935e-06, + "loss": 0.6132, + "step": 11385 + }, + { + "epoch": 0.72, + "grad_norm": 0.9415022134780884, + "learning_rate": 1.9016103380825274e-06, + "loss": 0.5613, + "step": 11386 + }, + { + "epoch": 0.72, + "grad_norm": 0.9320086240768433, + "learning_rate": 1.900805145813528e-06, + "loss": 0.5869, + "step": 11387 + }, + { + "epoch": 0.72, + "grad_norm": 0.8881116509437561, + "learning_rate": 1.9000000840408421e-06, + "loss": 0.5784, + "step": 11388 + }, + { + "epoch": 0.72, + "grad_norm": 0.8864371180534363, + "learning_rate": 1.8991951527983694e-06, + "loss": 0.5878, + "step": 11389 + }, + { + "epoch": 0.72, + "grad_norm": 0.922127902507782, + "learning_rate": 1.8983903521200015e-06, + "loss": 0.5998, + "step": 11390 + }, + { + "epoch": 0.72, + "grad_norm": 0.8979513049125671, + "learning_rate": 1.8975856820396265e-06, + "loss": 0.5647, + "step": 11391 + }, + { + "epoch": 0.72, + "grad_norm": 0.8566264510154724, + "learning_rate": 1.8967811425911275e-06, + "loss": 0.6127, + "step": 11392 + }, + { + "epoch": 0.72, + "grad_norm": 0.9450397491455078, + "learning_rate": 1.8959767338083758e-06, + "loss": 0.5713, + "step": 11393 + }, + { + "epoch": 0.72, + "grad_norm": 0.8804638385772705, + "learning_rate": 1.8951724557252472e-06, + "loss": 0.5315, + "step": 11394 + }, + { + "epoch": 0.72, + "grad_norm": 0.8379377722740173, + "learning_rate": 1.8943683083756075e-06, + "loss": 0.5763, + "step": 11395 + }, + { + "epoch": 0.72, + "grad_norm": 0.9465924501419067, + "learning_rate": 1.8935642917933128e-06, + "loss": 0.6547, + "step": 11396 + }, + { + "epoch": 0.72, + "grad_norm": 0.9012244343757629, + "learning_rate": 1.8927604060122196e-06, + "loss": 0.5796, + "step": 11397 + }, + { + "epoch": 0.72, + "grad_norm": 0.9090456962585449, + "learning_rate": 1.8919566510661758e-06, + "loss": 0.5763, + "step": 11398 + }, + { + "epoch": 0.72, + "grad_norm": 0.8996036648750305, + "learning_rate": 1.891153026989026e-06, + "loss": 0.5953, + "step": 11399 + }, + { + "epoch": 0.72, + "grad_norm": 0.8748338222503662, + "learning_rate": 1.8903495338146089e-06, + "loss": 0.5869, + "step": 11400 + }, + { + "epoch": 0.72, + "grad_norm": 0.8042425513267517, + "learning_rate": 1.8895461715767517e-06, + "loss": 0.5118, + "step": 11401 + }, + { + "epoch": 0.72, + "grad_norm": 0.8664458990097046, + "learning_rate": 1.888742940309286e-06, + "loss": 0.5208, + "step": 11402 + }, + { + "epoch": 0.72, + "grad_norm": 0.9896268248558044, + "learning_rate": 1.8879398400460342e-06, + "loss": 0.6079, + "step": 11403 + }, + { + "epoch": 0.72, + "grad_norm": 0.849636435508728, + "learning_rate": 1.8871368708208076e-06, + "loss": 0.5507, + "step": 11404 + }, + { + "epoch": 0.72, + "grad_norm": 0.8900498151779175, + "learning_rate": 1.8863340326674184e-06, + "loss": 0.6023, + "step": 11405 + }, + { + "epoch": 0.72, + "grad_norm": 0.8602756261825562, + "learning_rate": 1.8855313256196722e-06, + "loss": 0.6087, + "step": 11406 + }, + { + "epoch": 0.72, + "grad_norm": 0.8602705001831055, + "learning_rate": 1.8847287497113664e-06, + "loss": 0.5247, + "step": 11407 + }, + { + "epoch": 0.72, + "grad_norm": 0.879084587097168, + "learning_rate": 1.883926304976298e-06, + "loss": 0.544, + "step": 11408 + }, + { + "epoch": 0.72, + "grad_norm": 0.8612745404243469, + "learning_rate": 1.8831239914482512e-06, + "loss": 0.5575, + "step": 11409 + }, + { + "epoch": 0.72, + "grad_norm": 0.8964210152626038, + "learning_rate": 1.8823218091610085e-06, + "loss": 0.6014, + "step": 11410 + }, + { + "epoch": 0.72, + "grad_norm": 0.891295313835144, + "learning_rate": 1.8815197581483523e-06, + "loss": 0.5591, + "step": 11411 + }, + { + "epoch": 0.72, + "grad_norm": 0.9350022673606873, + "learning_rate": 1.880717838444049e-06, + "loss": 0.5922, + "step": 11412 + }, + { + "epoch": 0.72, + "grad_norm": 0.8329875469207764, + "learning_rate": 1.879916050081866e-06, + "loss": 0.5386, + "step": 11413 + }, + { + "epoch": 0.72, + "grad_norm": 0.8741490244865417, + "learning_rate": 1.8791143930955641e-06, + "loss": 0.5588, + "step": 11414 + }, + { + "epoch": 0.72, + "grad_norm": 0.8966600894927979, + "learning_rate": 1.8783128675188988e-06, + "loss": 0.6352, + "step": 11415 + }, + { + "epoch": 0.72, + "grad_norm": 0.8930423855781555, + "learning_rate": 1.8775114733856203e-06, + "loss": 0.5551, + "step": 11416 + }, + { + "epoch": 0.72, + "grad_norm": 0.835688591003418, + "learning_rate": 1.87671021072947e-06, + "loss": 0.5729, + "step": 11417 + }, + { + "epoch": 0.72, + "grad_norm": 0.9322239756584167, + "learning_rate": 1.8759090795841856e-06, + "loss": 0.5968, + "step": 11418 + }, + { + "epoch": 0.72, + "grad_norm": 0.9086197018623352, + "learning_rate": 1.8751080799835059e-06, + "loss": 0.5331, + "step": 11419 + }, + { + "epoch": 0.72, + "grad_norm": 0.8828703165054321, + "learning_rate": 1.8743072119611522e-06, + "loss": 0.5389, + "step": 11420 + }, + { + "epoch": 0.72, + "grad_norm": 0.9224802255630493, + "learning_rate": 1.873506475550848e-06, + "loss": 0.6192, + "step": 11421 + }, + { + "epoch": 0.72, + "grad_norm": 0.8025329113006592, + "learning_rate": 1.8727058707863121e-06, + "loss": 0.5028, + "step": 11422 + }, + { + "epoch": 0.72, + "grad_norm": 0.8815546631813049, + "learning_rate": 1.871905397701249e-06, + "loss": 0.6344, + "step": 11423 + }, + { + "epoch": 0.72, + "grad_norm": 0.9033147692680359, + "learning_rate": 1.8711050563293714e-06, + "loss": 0.5781, + "step": 11424 + }, + { + "epoch": 0.72, + "grad_norm": 0.8625471591949463, + "learning_rate": 1.8703048467043732e-06, + "loss": 0.5251, + "step": 11425 + }, + { + "epoch": 0.72, + "grad_norm": 0.9039772152900696, + "learning_rate": 1.869504768859951e-06, + "loss": 0.5467, + "step": 11426 + }, + { + "epoch": 0.72, + "grad_norm": 0.9913069605827332, + "learning_rate": 1.8687048228297928e-06, + "loss": 0.6187, + "step": 11427 + }, + { + "epoch": 0.72, + "grad_norm": 0.9143205881118774, + "learning_rate": 1.8679050086475814e-06, + "loss": 0.5619, + "step": 11428 + }, + { + "epoch": 0.72, + "grad_norm": 0.9044589400291443, + "learning_rate": 1.867105326346994e-06, + "loss": 0.6366, + "step": 11429 + }, + { + "epoch": 0.72, + "grad_norm": 0.8686836361885071, + "learning_rate": 1.8663057759617048e-06, + "loss": 0.5955, + "step": 11430 + }, + { + "epoch": 0.72, + "grad_norm": 0.9669235348701477, + "learning_rate": 1.8655063575253746e-06, + "loss": 0.5954, + "step": 11431 + }, + { + "epoch": 0.72, + "grad_norm": 0.9471785426139832, + "learning_rate": 1.8647070710716709e-06, + "loss": 0.6246, + "step": 11432 + }, + { + "epoch": 0.72, + "grad_norm": 0.8249446153640747, + "learning_rate": 1.8639079166342438e-06, + "loss": 0.5258, + "step": 11433 + }, + { + "epoch": 0.72, + "grad_norm": 0.8744306564331055, + "learning_rate": 1.8631088942467452e-06, + "loss": 0.5818, + "step": 11434 + }, + { + "epoch": 0.72, + "grad_norm": 0.8946027159690857, + "learning_rate": 1.8623100039428194e-06, + "loss": 0.5504, + "step": 11435 + }, + { + "epoch": 0.72, + "grad_norm": 0.8908700346946716, + "learning_rate": 1.8615112457561013e-06, + "loss": 0.582, + "step": 11436 + }, + { + "epoch": 0.72, + "grad_norm": 0.86359703540802, + "learning_rate": 1.860712619720228e-06, + "loss": 0.6233, + "step": 11437 + }, + { + "epoch": 0.72, + "grad_norm": 0.92805016040802, + "learning_rate": 1.8599141258688274e-06, + "loss": 0.5893, + "step": 11438 + }, + { + "epoch": 0.72, + "grad_norm": 0.8947566151618958, + "learning_rate": 1.8591157642355179e-06, + "loss": 0.5218, + "step": 11439 + }, + { + "epoch": 0.72, + "grad_norm": 0.9130182266235352, + "learning_rate": 1.8583175348539173e-06, + "loss": 0.5615, + "step": 11440 + }, + { + "epoch": 0.72, + "grad_norm": 0.8366416096687317, + "learning_rate": 1.8575194377576355e-06, + "loss": 0.5131, + "step": 11441 + }, + { + "epoch": 0.72, + "grad_norm": 0.8980015516281128, + "learning_rate": 1.856721472980279e-06, + "loss": 0.5609, + "step": 11442 + }, + { + "epoch": 0.72, + "grad_norm": 0.8970168232917786, + "learning_rate": 1.855923640555448e-06, + "loss": 0.5611, + "step": 11443 + }, + { + "epoch": 0.73, + "grad_norm": 0.8903645873069763, + "learning_rate": 1.8551259405167315e-06, + "loss": 0.5523, + "step": 11444 + }, + { + "epoch": 0.73, + "grad_norm": 0.8101871609687805, + "learning_rate": 1.8543283728977234e-06, + "loss": 0.5412, + "step": 11445 + }, + { + "epoch": 0.73, + "grad_norm": 0.8400049209594727, + "learning_rate": 1.8535309377320059e-06, + "loss": 0.5745, + "step": 11446 + }, + { + "epoch": 0.73, + "grad_norm": 0.8555065393447876, + "learning_rate": 1.8527336350531532e-06, + "loss": 0.5557, + "step": 11447 + }, + { + "epoch": 0.73, + "grad_norm": 0.858265221118927, + "learning_rate": 1.851936464894739e-06, + "loss": 0.5744, + "step": 11448 + }, + { + "epoch": 0.73, + "grad_norm": 0.925983190536499, + "learning_rate": 1.8511394272903287e-06, + "loss": 0.6146, + "step": 11449 + }, + { + "epoch": 0.73, + "grad_norm": 0.9145652651786804, + "learning_rate": 1.8503425222734834e-06, + "loss": 0.5427, + "step": 11450 + }, + { + "epoch": 0.73, + "grad_norm": 0.8962170481681824, + "learning_rate": 1.8495457498777585e-06, + "loss": 0.6176, + "step": 11451 + }, + { + "epoch": 0.73, + "grad_norm": 0.8576472997665405, + "learning_rate": 1.8487491101367016e-06, + "loss": 0.5613, + "step": 11452 + }, + { + "epoch": 0.73, + "grad_norm": 0.9138413667678833, + "learning_rate": 1.8479526030838552e-06, + "loss": 0.6351, + "step": 11453 + }, + { + "epoch": 0.73, + "grad_norm": 0.8952200412750244, + "learning_rate": 1.8471562287527627e-06, + "loss": 0.5977, + "step": 11454 + }, + { + "epoch": 0.73, + "grad_norm": 0.9184353351593018, + "learning_rate": 1.8463599871769516e-06, + "loss": 0.6327, + "step": 11455 + }, + { + "epoch": 0.73, + "grad_norm": 0.9197295904159546, + "learning_rate": 1.8455638783899515e-06, + "loss": 0.6019, + "step": 11456 + }, + { + "epoch": 0.73, + "grad_norm": 0.8394375443458557, + "learning_rate": 1.8447679024252825e-06, + "loss": 0.5873, + "step": 11457 + }, + { + "epoch": 0.73, + "grad_norm": 0.8649691939353943, + "learning_rate": 1.8439720593164606e-06, + "loss": 0.5927, + "step": 11458 + }, + { + "epoch": 0.73, + "grad_norm": 0.8406863808631897, + "learning_rate": 1.8431763490969968e-06, + "loss": 0.5791, + "step": 11459 + }, + { + "epoch": 0.73, + "grad_norm": 0.8677908182144165, + "learning_rate": 1.8423807718003967e-06, + "loss": 0.5523, + "step": 11460 + }, + { + "epoch": 0.73, + "grad_norm": 0.8684352040290833, + "learning_rate": 1.8415853274601541e-06, + "loss": 0.5395, + "step": 11461 + }, + { + "epoch": 0.73, + "grad_norm": 0.9893013834953308, + "learning_rate": 1.8407900161097698e-06, + "loss": 0.5907, + "step": 11462 + }, + { + "epoch": 0.73, + "grad_norm": 0.9031257033348083, + "learning_rate": 1.839994837782726e-06, + "loss": 0.576, + "step": 11463 + }, + { + "epoch": 0.73, + "grad_norm": 0.8636795282363892, + "learning_rate": 1.8391997925125066e-06, + "loss": 0.6069, + "step": 11464 + }, + { + "epoch": 0.73, + "grad_norm": 0.8956241607666016, + "learning_rate": 1.8384048803325887e-06, + "loss": 0.5795, + "step": 11465 + }, + { + "epoch": 0.73, + "grad_norm": 0.857373058795929, + "learning_rate": 1.8376101012764424e-06, + "loss": 0.5387, + "step": 11466 + }, + { + "epoch": 0.73, + "grad_norm": 0.875593364238739, + "learning_rate": 1.8368154553775342e-06, + "loss": 0.5665, + "step": 11467 + }, + { + "epoch": 0.73, + "grad_norm": 0.90203857421875, + "learning_rate": 1.8360209426693242e-06, + "loss": 0.5414, + "step": 11468 + }, + { + "epoch": 0.73, + "grad_norm": 0.8598183393478394, + "learning_rate": 1.8352265631852645e-06, + "loss": 0.5466, + "step": 11469 + }, + { + "epoch": 0.73, + "grad_norm": 0.9762682914733887, + "learning_rate": 1.8344323169588045e-06, + "loss": 0.5858, + "step": 11470 + }, + { + "epoch": 0.73, + "grad_norm": 0.8744803071022034, + "learning_rate": 1.8336382040233874e-06, + "loss": 0.5745, + "step": 11471 + }, + { + "epoch": 0.73, + "grad_norm": 0.8582815527915955, + "learning_rate": 1.8328442244124506e-06, + "loss": 0.5103, + "step": 11472 + }, + { + "epoch": 0.73, + "grad_norm": 0.8855782747268677, + "learning_rate": 1.8320503781594273e-06, + "loss": 0.624, + "step": 11473 + }, + { + "epoch": 0.73, + "grad_norm": 0.8730263710021973, + "learning_rate": 1.8312566652977393e-06, + "loss": 0.5789, + "step": 11474 + }, + { + "epoch": 0.73, + "grad_norm": 0.926342248916626, + "learning_rate": 1.8304630858608107e-06, + "loss": 0.6076, + "step": 11475 + }, + { + "epoch": 0.73, + "grad_norm": 0.872226357460022, + "learning_rate": 1.8296696398820579e-06, + "loss": 0.5964, + "step": 11476 + }, + { + "epoch": 0.73, + "grad_norm": 0.9411280751228333, + "learning_rate": 1.828876327394886e-06, + "loss": 0.579, + "step": 11477 + }, + { + "epoch": 0.73, + "grad_norm": 0.8571912050247192, + "learning_rate": 1.8280831484327006e-06, + "loss": 0.5736, + "step": 11478 + }, + { + "epoch": 0.73, + "grad_norm": 0.8927587866783142, + "learning_rate": 1.8272901030288991e-06, + "loss": 0.6098, + "step": 11479 + }, + { + "epoch": 0.73, + "grad_norm": 0.845928966999054, + "learning_rate": 1.8264971912168744e-06, + "loss": 0.5546, + "step": 11480 + }, + { + "epoch": 0.73, + "grad_norm": 0.8704530000686646, + "learning_rate": 1.825704413030015e-06, + "loss": 0.5941, + "step": 11481 + }, + { + "epoch": 0.73, + "grad_norm": 0.9911707639694214, + "learning_rate": 1.8249117685016983e-06, + "loss": 0.5893, + "step": 11482 + }, + { + "epoch": 0.73, + "grad_norm": 0.9240842461585999, + "learning_rate": 1.8241192576653e-06, + "loss": 0.6098, + "step": 11483 + }, + { + "epoch": 0.73, + "grad_norm": 1.0558674335479736, + "learning_rate": 1.8233268805541953e-06, + "loss": 0.5122, + "step": 11484 + }, + { + "epoch": 0.73, + "grad_norm": 0.8903986215591431, + "learning_rate": 1.8225346372017432e-06, + "loss": 0.5622, + "step": 11485 + }, + { + "epoch": 0.73, + "grad_norm": 0.9144458174705505, + "learning_rate": 1.8217425276413037e-06, + "loss": 0.5719, + "step": 11486 + }, + { + "epoch": 0.73, + "grad_norm": 0.8681707978248596, + "learning_rate": 1.8209505519062299e-06, + "loss": 0.554, + "step": 11487 + }, + { + "epoch": 0.73, + "grad_norm": 0.8360025882720947, + "learning_rate": 1.8201587100298694e-06, + "loss": 0.5348, + "step": 11488 + }, + { + "epoch": 0.73, + "grad_norm": 0.8641213178634644, + "learning_rate": 1.8193670020455656e-06, + "loss": 0.5632, + "step": 11489 + }, + { + "epoch": 0.73, + "grad_norm": 0.9490131735801697, + "learning_rate": 1.8185754279866508e-06, + "loss": 0.5454, + "step": 11490 + }, + { + "epoch": 0.73, + "grad_norm": 0.9039450883865356, + "learning_rate": 1.8177839878864562e-06, + "loss": 0.5132, + "step": 11491 + }, + { + "epoch": 0.73, + "grad_norm": 0.8818166851997375, + "learning_rate": 1.8169926817783106e-06, + "loss": 0.5789, + "step": 11492 + }, + { + "epoch": 0.73, + "grad_norm": 0.9329283237457275, + "learning_rate": 1.8162015096955288e-06, + "loss": 0.593, + "step": 11493 + }, + { + "epoch": 0.73, + "grad_norm": 0.9215501546859741, + "learning_rate": 1.8154104716714254e-06, + "loss": 0.5819, + "step": 11494 + }, + { + "epoch": 0.73, + "grad_norm": 0.9087458252906799, + "learning_rate": 1.814619567739309e-06, + "loss": 0.563, + "step": 11495 + }, + { + "epoch": 0.73, + "grad_norm": 0.9145926237106323, + "learning_rate": 1.8138287979324815e-06, + "loss": 0.571, + "step": 11496 + }, + { + "epoch": 0.73, + "grad_norm": 0.8668627142906189, + "learning_rate": 1.8130381622842414e-06, + "loss": 0.4685, + "step": 11497 + }, + { + "epoch": 0.73, + "grad_norm": 0.8847333788871765, + "learning_rate": 1.8122476608278755e-06, + "loss": 0.5919, + "step": 11498 + }, + { + "epoch": 0.73, + "grad_norm": 0.857651948928833, + "learning_rate": 1.8114572935966713e-06, + "loss": 0.574, + "step": 11499 + }, + { + "epoch": 0.73, + "grad_norm": 0.8987635374069214, + "learning_rate": 1.8106670606239086e-06, + "loss": 0.5646, + "step": 11500 + }, + { + "epoch": 0.73, + "grad_norm": 0.8433480262756348, + "learning_rate": 1.8098769619428607e-06, + "loss": 0.5461, + "step": 11501 + }, + { + "epoch": 0.73, + "grad_norm": 0.8456798791885376, + "learning_rate": 1.8090869975867964e-06, + "loss": 0.5271, + "step": 11502 + }, + { + "epoch": 0.73, + "grad_norm": 0.9002053737640381, + "learning_rate": 1.8082971675889798e-06, + "loss": 0.6169, + "step": 11503 + }, + { + "epoch": 0.73, + "grad_norm": 0.8994352221488953, + "learning_rate": 1.8075074719826636e-06, + "loss": 0.5652, + "step": 11504 + }, + { + "epoch": 0.73, + "grad_norm": 0.8358734846115112, + "learning_rate": 1.8067179108011047e-06, + "loss": 0.5523, + "step": 11505 + }, + { + "epoch": 0.73, + "grad_norm": 0.8662353754043579, + "learning_rate": 1.8059284840775443e-06, + "loss": 0.5645, + "step": 11506 + }, + { + "epoch": 0.73, + "grad_norm": 0.8541300296783447, + "learning_rate": 1.8051391918452244e-06, + "loss": 0.5972, + "step": 11507 + }, + { + "epoch": 0.73, + "grad_norm": 0.9039734601974487, + "learning_rate": 1.8043500341373788e-06, + "loss": 0.5526, + "step": 11508 + }, + { + "epoch": 0.73, + "grad_norm": 0.8430119156837463, + "learning_rate": 1.8035610109872364e-06, + "loss": 0.588, + "step": 11509 + }, + { + "epoch": 0.73, + "grad_norm": 0.9176574349403381, + "learning_rate": 1.8027721224280204e-06, + "loss": 0.6203, + "step": 11510 + }, + { + "epoch": 0.73, + "grad_norm": 0.874259352684021, + "learning_rate": 1.8019833684929493e-06, + "loss": 0.5835, + "step": 11511 + }, + { + "epoch": 0.73, + "grad_norm": 0.8891341686248779, + "learning_rate": 1.8011947492152303e-06, + "loss": 0.59, + "step": 11512 + }, + { + "epoch": 0.73, + "grad_norm": 0.9052767753601074, + "learning_rate": 1.8004062646280762e-06, + "loss": 0.5455, + "step": 11513 + }, + { + "epoch": 0.73, + "grad_norm": 0.9022553563117981, + "learning_rate": 1.799617914764682e-06, + "loss": 0.5658, + "step": 11514 + }, + { + "epoch": 0.73, + "grad_norm": 0.9340382814407349, + "learning_rate": 1.7988296996582438e-06, + "loss": 0.6134, + "step": 11515 + }, + { + "epoch": 0.73, + "grad_norm": 0.9259970188140869, + "learning_rate": 1.7980416193419509e-06, + "loss": 0.561, + "step": 11516 + }, + { + "epoch": 0.73, + "grad_norm": 0.9039214849472046, + "learning_rate": 1.7972536738489865e-06, + "loss": 0.5975, + "step": 11517 + }, + { + "epoch": 0.73, + "grad_norm": 0.8830257654190063, + "learning_rate": 1.7964658632125286e-06, + "loss": 0.6171, + "step": 11518 + }, + { + "epoch": 0.73, + "grad_norm": 0.8530765771865845, + "learning_rate": 1.7956781874657508e-06, + "loss": 0.5773, + "step": 11519 + }, + { + "epoch": 0.73, + "grad_norm": 0.9169198274612427, + "learning_rate": 1.7948906466418154e-06, + "loss": 0.6076, + "step": 11520 + }, + { + "epoch": 0.73, + "grad_norm": 0.8221704959869385, + "learning_rate": 1.7941032407738857e-06, + "loss": 0.5213, + "step": 11521 + }, + { + "epoch": 0.73, + "grad_norm": 0.9299726486206055, + "learning_rate": 1.7933159698951153e-06, + "loss": 0.5747, + "step": 11522 + }, + { + "epoch": 0.73, + "grad_norm": 0.8379265666007996, + "learning_rate": 1.7925288340386543e-06, + "loss": 0.6024, + "step": 11523 + }, + { + "epoch": 0.73, + "grad_norm": 0.8032079339027405, + "learning_rate": 1.7917418332376463e-06, + "loss": 0.5009, + "step": 11524 + }, + { + "epoch": 0.73, + "grad_norm": 0.885210394859314, + "learning_rate": 1.7909549675252291e-06, + "loss": 0.5925, + "step": 11525 + }, + { + "epoch": 0.73, + "grad_norm": 0.8500308394432068, + "learning_rate": 1.7901682369345346e-06, + "loss": 0.5507, + "step": 11526 + }, + { + "epoch": 0.73, + "grad_norm": 0.8852202296257019, + "learning_rate": 1.7893816414986915e-06, + "loss": 0.5658, + "step": 11527 + }, + { + "epoch": 0.73, + "grad_norm": 0.9225091934204102, + "learning_rate": 1.7885951812508163e-06, + "loss": 0.5696, + "step": 11528 + }, + { + "epoch": 0.73, + "grad_norm": 0.9719336032867432, + "learning_rate": 1.787808856224027e-06, + "loss": 0.6464, + "step": 11529 + }, + { + "epoch": 0.73, + "grad_norm": 0.9021725654602051, + "learning_rate": 1.7870226664514318e-06, + "loss": 0.5704, + "step": 11530 + }, + { + "epoch": 0.73, + "grad_norm": 0.8923550844192505, + "learning_rate": 1.786236611966135e-06, + "loss": 0.605, + "step": 11531 + }, + { + "epoch": 0.73, + "grad_norm": 0.9258638620376587, + "learning_rate": 1.7854506928012349e-06, + "loss": 0.5321, + "step": 11532 + }, + { + "epoch": 0.73, + "grad_norm": 0.8562982082366943, + "learning_rate": 1.784664908989825e-06, + "loss": 0.5608, + "step": 11533 + }, + { + "epoch": 0.73, + "grad_norm": 0.9579175710678101, + "learning_rate": 1.7838792605649874e-06, + "loss": 0.6364, + "step": 11534 + }, + { + "epoch": 0.73, + "grad_norm": 0.8694881200790405, + "learning_rate": 1.7830937475598092e-06, + "loss": 0.563, + "step": 11535 + }, + { + "epoch": 0.73, + "grad_norm": 0.9621427655220032, + "learning_rate": 1.7823083700073607e-06, + "loss": 0.5745, + "step": 11536 + }, + { + "epoch": 0.73, + "grad_norm": 0.8175010085105896, + "learning_rate": 1.781523127940713e-06, + "loss": 0.5574, + "step": 11537 + }, + { + "epoch": 0.73, + "grad_norm": 0.9051305055618286, + "learning_rate": 1.7807380213929304e-06, + "loss": 0.5485, + "step": 11538 + }, + { + "epoch": 0.73, + "grad_norm": 0.9284529089927673, + "learning_rate": 1.7799530503970707e-06, + "loss": 0.5776, + "step": 11539 + }, + { + "epoch": 0.73, + "grad_norm": 0.9079828262329102, + "learning_rate": 1.7791682149861866e-06, + "loss": 0.5703, + "step": 11540 + }, + { + "epoch": 0.73, + "grad_norm": 0.8822880387306213, + "learning_rate": 1.778383515193326e-06, + "loss": 0.6083, + "step": 11541 + }, + { + "epoch": 0.73, + "grad_norm": 0.8300610184669495, + "learning_rate": 1.777598951051525e-06, + "loss": 0.5038, + "step": 11542 + }, + { + "epoch": 0.73, + "grad_norm": 0.8688510656356812, + "learning_rate": 1.7768145225938254e-06, + "loss": 0.5166, + "step": 11543 + }, + { + "epoch": 0.73, + "grad_norm": 0.8871831297874451, + "learning_rate": 1.7760302298532522e-06, + "loss": 0.5148, + "step": 11544 + }, + { + "epoch": 0.73, + "grad_norm": 0.8553435802459717, + "learning_rate": 1.7752460728628308e-06, + "loss": 0.5087, + "step": 11545 + }, + { + "epoch": 0.73, + "grad_norm": 0.8729947805404663, + "learning_rate": 1.7744620516555804e-06, + "loss": 0.5862, + "step": 11546 + }, + { + "epoch": 0.73, + "grad_norm": 0.9969896078109741, + "learning_rate": 1.7736781662645092e-06, + "loss": 0.6322, + "step": 11547 + }, + { + "epoch": 0.73, + "grad_norm": 0.9456208944320679, + "learning_rate": 1.7728944167226287e-06, + "loss": 0.5841, + "step": 11548 + }, + { + "epoch": 0.73, + "grad_norm": 0.9061382412910461, + "learning_rate": 1.772110803062939e-06, + "loss": 0.5942, + "step": 11549 + }, + { + "epoch": 0.73, + "grad_norm": 0.9012535810470581, + "learning_rate": 1.7713273253184331e-06, + "loss": 0.5462, + "step": 11550 + }, + { + "epoch": 0.73, + "grad_norm": 0.8942342400550842, + "learning_rate": 1.7705439835221022e-06, + "loss": 0.6231, + "step": 11551 + }, + { + "epoch": 0.73, + "grad_norm": 0.9196451902389526, + "learning_rate": 1.7697607777069291e-06, + "loss": 0.5851, + "step": 11552 + }, + { + "epoch": 0.73, + "grad_norm": 0.9161397814750671, + "learning_rate": 1.7689777079058929e-06, + "loss": 0.5397, + "step": 11553 + }, + { + "epoch": 0.73, + "grad_norm": 0.870907187461853, + "learning_rate": 1.7681947741519668e-06, + "loss": 0.5578, + "step": 11554 + }, + { + "epoch": 0.73, + "grad_norm": 0.9699699282646179, + "learning_rate": 1.7674119764781129e-06, + "loss": 0.639, + "step": 11555 + }, + { + "epoch": 0.73, + "grad_norm": 0.8969030380249023, + "learning_rate": 1.7666293149172969e-06, + "loss": 0.6149, + "step": 11556 + }, + { + "epoch": 0.73, + "grad_norm": 0.8886342644691467, + "learning_rate": 1.7658467895024744e-06, + "loss": 0.5669, + "step": 11557 + }, + { + "epoch": 0.73, + "grad_norm": 0.9235454797744751, + "learning_rate": 1.7650644002665906e-06, + "loss": 0.6232, + "step": 11558 + }, + { + "epoch": 0.73, + "grad_norm": 0.8997302055358887, + "learning_rate": 1.7642821472425918e-06, + "loss": 0.5862, + "step": 11559 + }, + { + "epoch": 0.73, + "grad_norm": 0.965051531791687, + "learning_rate": 1.7635000304634154e-06, + "loss": 0.569, + "step": 11560 + }, + { + "epoch": 0.73, + "grad_norm": 0.8321825861930847, + "learning_rate": 1.762718049961994e-06, + "loss": 0.5761, + "step": 11561 + }, + { + "epoch": 0.73, + "grad_norm": 0.8804091811180115, + "learning_rate": 1.7619362057712552e-06, + "loss": 0.5491, + "step": 11562 + }, + { + "epoch": 0.73, + "grad_norm": 0.8647125363349915, + "learning_rate": 1.761154497924117e-06, + "loss": 0.5474, + "step": 11563 + }, + { + "epoch": 0.73, + "grad_norm": 0.9047082662582397, + "learning_rate": 1.7603729264534936e-06, + "loss": 0.5333, + "step": 11564 + }, + { + "epoch": 0.73, + "grad_norm": 0.8758959174156189, + "learning_rate": 1.7595914913923001e-06, + "loss": 0.6063, + "step": 11565 + }, + { + "epoch": 0.73, + "grad_norm": 0.8838177919387817, + "learning_rate": 1.7588101927734346e-06, + "loss": 0.5993, + "step": 11566 + }, + { + "epoch": 0.73, + "grad_norm": 0.8192143440246582, + "learning_rate": 1.7580290306297965e-06, + "loss": 0.5965, + "step": 11567 + }, + { + "epoch": 0.73, + "grad_norm": 0.8735188245773315, + "learning_rate": 1.7572480049942781e-06, + "loss": 0.6107, + "step": 11568 + }, + { + "epoch": 0.73, + "grad_norm": 0.8884807229042053, + "learning_rate": 1.7564671158997653e-06, + "loss": 0.5726, + "step": 11569 + }, + { + "epoch": 0.73, + "grad_norm": 0.9197561144828796, + "learning_rate": 1.755686363379141e-06, + "loss": 0.5968, + "step": 11570 + }, + { + "epoch": 0.73, + "grad_norm": 0.9510713815689087, + "learning_rate": 1.7549057474652753e-06, + "loss": 0.6455, + "step": 11571 + }, + { + "epoch": 0.73, + "grad_norm": 0.9018495082855225, + "learning_rate": 1.7541252681910386e-06, + "loss": 0.5914, + "step": 11572 + }, + { + "epoch": 0.73, + "grad_norm": 0.8656198382377625, + "learning_rate": 1.7533449255892986e-06, + "loss": 0.5308, + "step": 11573 + }, + { + "epoch": 0.73, + "grad_norm": 0.9031473994255066, + "learning_rate": 1.7525647196929079e-06, + "loss": 0.5924, + "step": 11574 + }, + { + "epoch": 0.73, + "grad_norm": 0.966624915599823, + "learning_rate": 1.7517846505347197e-06, + "loss": 0.5615, + "step": 11575 + }, + { + "epoch": 0.73, + "grad_norm": 0.8910838961601257, + "learning_rate": 1.751004718147582e-06, + "loss": 0.5599, + "step": 11576 + }, + { + "epoch": 0.73, + "grad_norm": 0.8924875855445862, + "learning_rate": 1.7502249225643291e-06, + "loss": 0.5545, + "step": 11577 + }, + { + "epoch": 0.73, + "grad_norm": 0.8473634123802185, + "learning_rate": 1.7494452638178039e-06, + "loss": 0.5356, + "step": 11578 + }, + { + "epoch": 0.73, + "grad_norm": 0.8900013566017151, + "learning_rate": 1.7486657419408287e-06, + "loss": 0.5623, + "step": 11579 + }, + { + "epoch": 0.73, + "grad_norm": 0.859286367893219, + "learning_rate": 1.7478863569662286e-06, + "loss": 0.5538, + "step": 11580 + }, + { + "epoch": 0.73, + "grad_norm": 0.9663856029510498, + "learning_rate": 1.7471071089268204e-06, + "loss": 0.5877, + "step": 11581 + }, + { + "epoch": 0.73, + "grad_norm": 0.9221107959747314, + "learning_rate": 1.7463279978554166e-06, + "loss": 0.5999, + "step": 11582 + }, + { + "epoch": 0.73, + "grad_norm": 0.8356893062591553, + "learning_rate": 1.745549023784821e-06, + "loss": 0.5129, + "step": 11583 + }, + { + "epoch": 0.73, + "grad_norm": 0.9485192894935608, + "learning_rate": 1.7447701867478372e-06, + "loss": 0.5601, + "step": 11584 + }, + { + "epoch": 0.73, + "grad_norm": 0.9047239422798157, + "learning_rate": 1.7439914867772529e-06, + "loss": 0.5176, + "step": 11585 + }, + { + "epoch": 0.73, + "grad_norm": 0.9019331932067871, + "learning_rate": 1.7432129239058637e-06, + "loss": 0.5698, + "step": 11586 + }, + { + "epoch": 0.73, + "grad_norm": 0.8926165699958801, + "learning_rate": 1.7424344981664475e-06, + "loss": 0.6147, + "step": 11587 + }, + { + "epoch": 0.73, + "grad_norm": 0.9237696528434753, + "learning_rate": 1.7416562095917822e-06, + "loss": 0.5531, + "step": 11588 + }, + { + "epoch": 0.73, + "grad_norm": 0.8881582021713257, + "learning_rate": 1.7408780582146383e-06, + "loss": 0.5981, + "step": 11589 + }, + { + "epoch": 0.73, + "grad_norm": 0.8784075975418091, + "learning_rate": 1.7401000440677824e-06, + "loss": 0.5443, + "step": 11590 + }, + { + "epoch": 0.73, + "grad_norm": 0.8897961974143982, + "learning_rate": 1.7393221671839727e-06, + "loss": 0.5622, + "step": 11591 + }, + { + "epoch": 0.73, + "grad_norm": 0.9459214806556702, + "learning_rate": 1.7385444275959657e-06, + "loss": 0.5154, + "step": 11592 + }, + { + "epoch": 0.73, + "grad_norm": 0.8417472839355469, + "learning_rate": 1.7377668253365054e-06, + "loss": 0.5909, + "step": 11593 + }, + { + "epoch": 0.73, + "grad_norm": 0.8860768675804138, + "learning_rate": 1.7369893604383353e-06, + "loss": 0.5721, + "step": 11594 + }, + { + "epoch": 0.73, + "grad_norm": 0.8790547251701355, + "learning_rate": 1.736212032934192e-06, + "loss": 0.5747, + "step": 11595 + }, + { + "epoch": 0.73, + "grad_norm": 0.8245856165885925, + "learning_rate": 1.7354348428568063e-06, + "loss": 0.5412, + "step": 11596 + }, + { + "epoch": 0.73, + "grad_norm": 0.9556723237037659, + "learning_rate": 1.7346577902389028e-06, + "loss": 0.5856, + "step": 11597 + }, + { + "epoch": 0.73, + "grad_norm": 0.8890882730484009, + "learning_rate": 1.7338808751132002e-06, + "loss": 0.564, + "step": 11598 + }, + { + "epoch": 0.73, + "grad_norm": 0.8770986795425415, + "learning_rate": 1.7331040975124125e-06, + "loss": 0.5536, + "step": 11599 + }, + { + "epoch": 0.73, + "grad_norm": 0.8428150415420532, + "learning_rate": 1.7323274574692479e-06, + "loss": 0.529, + "step": 11600 + }, + { + "epoch": 0.73, + "grad_norm": 0.9159516096115112, + "learning_rate": 1.7315509550164044e-06, + "loss": 0.5696, + "step": 11601 + }, + { + "epoch": 0.74, + "grad_norm": 0.8555203676223755, + "learning_rate": 1.730774590186579e-06, + "loss": 0.6464, + "step": 11602 + }, + { + "epoch": 0.74, + "grad_norm": 0.911897599697113, + "learning_rate": 1.7299983630124663e-06, + "loss": 0.5728, + "step": 11603 + }, + { + "epoch": 0.74, + "grad_norm": 0.8873314261436462, + "learning_rate": 1.729222273526745e-06, + "loss": 0.5747, + "step": 11604 + }, + { + "epoch": 0.74, + "grad_norm": 0.8664464354515076, + "learning_rate": 1.7284463217620955e-06, + "loss": 0.551, + "step": 11605 + }, + { + "epoch": 0.74, + "grad_norm": 0.9194732308387756, + "learning_rate": 1.727670507751193e-06, + "loss": 0.5568, + "step": 11606 + }, + { + "epoch": 0.74, + "grad_norm": 0.9157373905181885, + "learning_rate": 1.7268948315266975e-06, + "loss": 0.6275, + "step": 11607 + }, + { + "epoch": 0.74, + "grad_norm": 0.9254802465438843, + "learning_rate": 1.7261192931212783e-06, + "loss": 0.604, + "step": 11608 + }, + { + "epoch": 0.74, + "grad_norm": 0.870588481426239, + "learning_rate": 1.7253438925675847e-06, + "loss": 0.526, + "step": 11609 + }, + { + "epoch": 0.74, + "grad_norm": 0.9618417024612427, + "learning_rate": 1.7245686298982678e-06, + "loss": 0.6359, + "step": 11610 + }, + { + "epoch": 0.74, + "grad_norm": 0.9000369310379028, + "learning_rate": 1.723793505145972e-06, + "loss": 0.6009, + "step": 11611 + }, + { + "epoch": 0.74, + "grad_norm": 0.8883331418037415, + "learning_rate": 1.7230185183433345e-06, + "loss": 0.5614, + "step": 11612 + }, + { + "epoch": 0.74, + "grad_norm": 0.8387264609336853, + "learning_rate": 1.722243669522987e-06, + "loss": 0.5603, + "step": 11613 + }, + { + "epoch": 0.74, + "grad_norm": 0.8569300770759583, + "learning_rate": 1.7214689587175582e-06, + "loss": 0.6069, + "step": 11614 + }, + { + "epoch": 0.74, + "grad_norm": 0.8399550318717957, + "learning_rate": 1.720694385959663e-06, + "loss": 0.5225, + "step": 11615 + }, + { + "epoch": 0.74, + "grad_norm": 0.8741680979728699, + "learning_rate": 1.7199199512819225e-06, + "loss": 0.5591, + "step": 11616 + }, + { + "epoch": 0.74, + "grad_norm": 0.9207227826118469, + "learning_rate": 1.7191456547169405e-06, + "loss": 0.6151, + "step": 11617 + }, + { + "epoch": 0.74, + "grad_norm": 0.8906126022338867, + "learning_rate": 1.718371496297322e-06, + "loss": 0.606, + "step": 11618 + }, + { + "epoch": 0.74, + "grad_norm": 0.9442402720451355, + "learning_rate": 1.717597476055664e-06, + "loss": 0.6094, + "step": 11619 + }, + { + "epoch": 0.74, + "grad_norm": 0.9012939929962158, + "learning_rate": 1.716823594024557e-06, + "loss": 0.5826, + "step": 11620 + }, + { + "epoch": 0.74, + "grad_norm": 0.880403995513916, + "learning_rate": 1.716049850236588e-06, + "loss": 0.5725, + "step": 11621 + }, + { + "epoch": 0.74, + "grad_norm": 0.9011920690536499, + "learning_rate": 1.7152762447243365e-06, + "loss": 0.5993, + "step": 11622 + }, + { + "epoch": 0.74, + "grad_norm": 0.8702940940856934, + "learning_rate": 1.7145027775203748e-06, + "loss": 0.5512, + "step": 11623 + }, + { + "epoch": 0.74, + "grad_norm": 0.8984467387199402, + "learning_rate": 1.7137294486572714e-06, + "loss": 0.5759, + "step": 11624 + }, + { + "epoch": 0.74, + "grad_norm": 0.9334822297096252, + "learning_rate": 1.7129562581675885e-06, + "loss": 0.5788, + "step": 11625 + }, + { + "epoch": 0.74, + "grad_norm": 0.8600862622261047, + "learning_rate": 1.7121832060838833e-06, + "loss": 0.5691, + "step": 11626 + }, + { + "epoch": 0.74, + "grad_norm": 0.9075511693954468, + "learning_rate": 1.711410292438707e-06, + "loss": 0.6158, + "step": 11627 + }, + { + "epoch": 0.74, + "grad_norm": 0.8380544185638428, + "learning_rate": 1.7106375172646e-06, + "loss": 0.5239, + "step": 11628 + }, + { + "epoch": 0.74, + "grad_norm": 0.8987744450569153, + "learning_rate": 1.709864880594106e-06, + "loss": 0.5677, + "step": 11629 + }, + { + "epoch": 0.74, + "grad_norm": 0.8771459460258484, + "learning_rate": 1.7090923824597578e-06, + "loss": 0.6342, + "step": 11630 + }, + { + "epoch": 0.74, + "grad_norm": 0.9104797840118408, + "learning_rate": 1.70832002289408e-06, + "loss": 0.6225, + "step": 11631 + }, + { + "epoch": 0.74, + "grad_norm": 0.8693386316299438, + "learning_rate": 1.7075478019295943e-06, + "loss": 0.5796, + "step": 11632 + }, + { + "epoch": 0.74, + "grad_norm": 0.8541246056556702, + "learning_rate": 1.7067757195988178e-06, + "loss": 0.5259, + "step": 11633 + }, + { + "epoch": 0.74, + "grad_norm": 0.9266880750656128, + "learning_rate": 1.706003775934259e-06, + "loss": 0.5968, + "step": 11634 + }, + { + "epoch": 0.74, + "grad_norm": 0.8783169388771057, + "learning_rate": 1.705231970968424e-06, + "loss": 0.5978, + "step": 11635 + }, + { + "epoch": 0.74, + "grad_norm": 0.9030970335006714, + "learning_rate": 1.704460304733806e-06, + "loss": 0.6016, + "step": 11636 + }, + { + "epoch": 0.74, + "grad_norm": 0.9360423684120178, + "learning_rate": 1.7036887772629012e-06, + "loss": 0.6177, + "step": 11637 + }, + { + "epoch": 0.74, + "grad_norm": 0.8983248472213745, + "learning_rate": 1.7029173885881973e-06, + "loss": 0.5724, + "step": 11638 + }, + { + "epoch": 0.74, + "grad_norm": 0.8801354765892029, + "learning_rate": 1.7021461387421705e-06, + "loss": 0.5717, + "step": 11639 + }, + { + "epoch": 0.74, + "grad_norm": 0.8851686120033264, + "learning_rate": 1.7013750277572977e-06, + "loss": 0.5777, + "step": 11640 + }, + { + "epoch": 0.74, + "grad_norm": 0.9012311100959778, + "learning_rate": 1.7006040556660468e-06, + "loss": 0.5995, + "step": 11641 + }, + { + "epoch": 0.74, + "grad_norm": 0.7989736199378967, + "learning_rate": 1.6998332225008817e-06, + "loss": 0.5581, + "step": 11642 + }, + { + "epoch": 0.74, + "grad_norm": 0.8802455067634583, + "learning_rate": 1.6990625282942607e-06, + "loss": 0.5862, + "step": 11643 + }, + { + "epoch": 0.74, + "grad_norm": 0.938679039478302, + "learning_rate": 1.6982919730786323e-06, + "loss": 0.5618, + "step": 11644 + }, + { + "epoch": 0.74, + "grad_norm": 0.9325195550918579, + "learning_rate": 1.697521556886441e-06, + "loss": 0.6066, + "step": 11645 + }, + { + "epoch": 0.74, + "grad_norm": 0.8584638237953186, + "learning_rate": 1.6967512797501317e-06, + "loss": 0.5666, + "step": 11646 + }, + { + "epoch": 0.74, + "grad_norm": 0.9101821780204773, + "learning_rate": 1.6959811417021338e-06, + "loss": 0.5435, + "step": 11647 + }, + { + "epoch": 0.74, + "grad_norm": 0.9440627694129944, + "learning_rate": 1.6952111427748758e-06, + "loss": 0.5705, + "step": 11648 + }, + { + "epoch": 0.74, + "grad_norm": 0.9121119379997253, + "learning_rate": 1.69444128300078e-06, + "loss": 0.538, + "step": 11649 + }, + { + "epoch": 0.74, + "grad_norm": 0.8702934980392456, + "learning_rate": 1.6936715624122623e-06, + "loss": 0.561, + "step": 11650 + }, + { + "epoch": 0.74, + "grad_norm": 0.872009813785553, + "learning_rate": 1.6929019810417352e-06, + "loss": 0.5535, + "step": 11651 + }, + { + "epoch": 0.74, + "grad_norm": 0.8676707744598389, + "learning_rate": 1.6921325389215993e-06, + "loss": 0.5736, + "step": 11652 + }, + { + "epoch": 0.74, + "grad_norm": 0.8695118427276611, + "learning_rate": 1.6913632360842553e-06, + "loss": 0.5864, + "step": 11653 + }, + { + "epoch": 0.74, + "grad_norm": 0.8931376934051514, + "learning_rate": 1.6905940725620951e-06, + "loss": 0.5808, + "step": 11654 + }, + { + "epoch": 0.74, + "grad_norm": 0.9246284365653992, + "learning_rate": 1.6898250483875063e-06, + "loss": 0.6203, + "step": 11655 + }, + { + "epoch": 0.74, + "grad_norm": 0.8424333333969116, + "learning_rate": 1.6890561635928692e-06, + "loss": 0.5167, + "step": 11656 + }, + { + "epoch": 0.74, + "grad_norm": 0.8894586563110352, + "learning_rate": 1.6882874182105613e-06, + "loss": 0.5479, + "step": 11657 + }, + { + "epoch": 0.74, + "grad_norm": 0.8806304931640625, + "learning_rate": 1.6875188122729458e-06, + "loss": 0.5519, + "step": 11658 + }, + { + "epoch": 0.74, + "grad_norm": 0.8468473553657532, + "learning_rate": 1.6867503458123913e-06, + "loss": 0.5492, + "step": 11659 + }, + { + "epoch": 0.74, + "grad_norm": 0.8903117775917053, + "learning_rate": 1.6859820188612557e-06, + "loss": 0.5855, + "step": 11660 + }, + { + "epoch": 0.74, + "grad_norm": 0.8897051215171814, + "learning_rate": 1.6852138314518873e-06, + "loss": 0.5357, + "step": 11661 + }, + { + "epoch": 0.74, + "grad_norm": 0.9220659732818604, + "learning_rate": 1.6844457836166329e-06, + "loss": 0.5354, + "step": 11662 + }, + { + "epoch": 0.74, + "grad_norm": 0.9147717356681824, + "learning_rate": 1.6836778753878324e-06, + "loss": 0.5965, + "step": 11663 + }, + { + "epoch": 0.74, + "grad_norm": 0.9581725597381592, + "learning_rate": 1.68291010679782e-06, + "loss": 0.556, + "step": 11664 + }, + { + "epoch": 0.74, + "grad_norm": 0.8362496495246887, + "learning_rate": 1.6821424778789252e-06, + "loss": 0.556, + "step": 11665 + }, + { + "epoch": 0.74, + "grad_norm": 0.8997658491134644, + "learning_rate": 1.6813749886634657e-06, + "loss": 0.5754, + "step": 11666 + }, + { + "epoch": 0.74, + "grad_norm": 0.8712424039840698, + "learning_rate": 1.6806076391837622e-06, + "loss": 0.5229, + "step": 11667 + }, + { + "epoch": 0.74, + "grad_norm": 0.9129472374916077, + "learning_rate": 1.6798404294721254e-06, + "loss": 0.5505, + "step": 11668 + }, + { + "epoch": 0.74, + "grad_norm": 0.8357523083686829, + "learning_rate": 1.6790733595608567e-06, + "loss": 0.5906, + "step": 11669 + }, + { + "epoch": 0.74, + "grad_norm": 0.9335298538208008, + "learning_rate": 1.6783064294822559e-06, + "loss": 0.5469, + "step": 11670 + }, + { + "epoch": 0.74, + "grad_norm": 0.8826762437820435, + "learning_rate": 1.677539639268616e-06, + "loss": 0.5969, + "step": 11671 + }, + { + "epoch": 0.74, + "grad_norm": 0.8778190612792969, + "learning_rate": 1.6767729889522239e-06, + "loss": 0.5918, + "step": 11672 + }, + { + "epoch": 0.74, + "grad_norm": 0.9307227730751038, + "learning_rate": 1.6760064785653624e-06, + "loss": 0.593, + "step": 11673 + }, + { + "epoch": 0.74, + "grad_norm": 0.9407718181610107, + "learning_rate": 1.675240108140303e-06, + "loss": 0.5833, + "step": 11674 + }, + { + "epoch": 0.74, + "grad_norm": 0.8945533633232117, + "learning_rate": 1.674473877709315e-06, + "loss": 0.5462, + "step": 11675 + }, + { + "epoch": 0.74, + "grad_norm": 0.9327276945114136, + "learning_rate": 1.6737077873046669e-06, + "loss": 0.6013, + "step": 11676 + }, + { + "epoch": 0.74, + "grad_norm": 0.9071036577224731, + "learning_rate": 1.672941836958611e-06, + "loss": 0.5889, + "step": 11677 + }, + { + "epoch": 0.74, + "grad_norm": 0.9192063212394714, + "learning_rate": 1.6721760267033998e-06, + "loss": 0.5819, + "step": 11678 + }, + { + "epoch": 0.74, + "grad_norm": 0.8941338062286377, + "learning_rate": 1.6714103565712798e-06, + "loss": 0.5678, + "step": 11679 + }, + { + "epoch": 0.74, + "grad_norm": 0.8962938785552979, + "learning_rate": 1.6706448265944902e-06, + "loss": 0.5746, + "step": 11680 + }, + { + "epoch": 0.74, + "grad_norm": 0.8910273313522339, + "learning_rate": 1.6698794368052669e-06, + "loss": 0.5355, + "step": 11681 + }, + { + "epoch": 0.74, + "grad_norm": 0.872856855392456, + "learning_rate": 1.6691141872358336e-06, + "loss": 0.5956, + "step": 11682 + }, + { + "epoch": 0.74, + "grad_norm": 0.8627503514289856, + "learning_rate": 1.668349077918413e-06, + "loss": 0.5305, + "step": 11683 + }, + { + "epoch": 0.74, + "grad_norm": 0.9181475043296814, + "learning_rate": 1.6675841088852268e-06, + "loss": 0.5133, + "step": 11684 + }, + { + "epoch": 0.74, + "grad_norm": 0.8350986242294312, + "learning_rate": 1.666819280168479e-06, + "loss": 0.5133, + "step": 11685 + }, + { + "epoch": 0.74, + "grad_norm": 0.8253143429756165, + "learning_rate": 1.6660545918003762e-06, + "loss": 0.5165, + "step": 11686 + }, + { + "epoch": 0.74, + "grad_norm": 0.9417761564254761, + "learning_rate": 1.6652900438131181e-06, + "loss": 0.6201, + "step": 11687 + }, + { + "epoch": 0.74, + "grad_norm": 0.9664661288261414, + "learning_rate": 1.6645256362388922e-06, + "loss": 0.569, + "step": 11688 + }, + { + "epoch": 0.74, + "grad_norm": 0.9326826333999634, + "learning_rate": 1.663761369109892e-06, + "loss": 0.6134, + "step": 11689 + }, + { + "epoch": 0.74, + "grad_norm": 0.9516881108283997, + "learning_rate": 1.662997242458293e-06, + "loss": 0.5727, + "step": 11690 + }, + { + "epoch": 0.74, + "grad_norm": 0.9566120505332947, + "learning_rate": 1.6622332563162714e-06, + "loss": 0.6568, + "step": 11691 + }, + { + "epoch": 0.74, + "grad_norm": 0.838861882686615, + "learning_rate": 1.6614694107159962e-06, + "loss": 0.5633, + "step": 11692 + }, + { + "epoch": 0.74, + "grad_norm": 0.8725398182868958, + "learning_rate": 1.6607057056896304e-06, + "loss": 0.537, + "step": 11693 + }, + { + "epoch": 0.74, + "grad_norm": 0.8720897436141968, + "learning_rate": 1.6599421412693307e-06, + "loss": 0.5786, + "step": 11694 + }, + { + "epoch": 0.74, + "grad_norm": 0.8999429941177368, + "learning_rate": 1.65917871748725e-06, + "loss": 0.5867, + "step": 11695 + }, + { + "epoch": 0.74, + "grad_norm": 0.9211562275886536, + "learning_rate": 1.6584154343755276e-06, + "loss": 0.5425, + "step": 11696 + }, + { + "epoch": 0.74, + "grad_norm": 0.856549859046936, + "learning_rate": 1.6576522919663107e-06, + "loss": 0.5257, + "step": 11697 + }, + { + "epoch": 0.74, + "grad_norm": 0.88596510887146, + "learning_rate": 1.6568892902917267e-06, + "loss": 0.564, + "step": 11698 + }, + { + "epoch": 0.74, + "grad_norm": 0.8725159168243408, + "learning_rate": 1.6561264293839051e-06, + "loss": 0.5379, + "step": 11699 + }, + { + "epoch": 0.74, + "grad_norm": 0.9307702779769897, + "learning_rate": 1.6553637092749685e-06, + "loss": 0.5769, + "step": 11700 + }, + { + "epoch": 0.74, + "grad_norm": 0.8692091107368469, + "learning_rate": 1.6546011299970276e-06, + "loss": 0.5311, + "step": 11701 + }, + { + "epoch": 0.74, + "grad_norm": 0.8620391488075256, + "learning_rate": 1.6538386915821975e-06, + "loss": 0.5296, + "step": 11702 + }, + { + "epoch": 0.74, + "grad_norm": 0.8763535022735596, + "learning_rate": 1.6530763940625805e-06, + "loss": 0.5436, + "step": 11703 + }, + { + "epoch": 0.74, + "grad_norm": 0.970310628414154, + "learning_rate": 1.6523142374702722e-06, + "loss": 0.607, + "step": 11704 + }, + { + "epoch": 0.74, + "grad_norm": 0.9026583433151245, + "learning_rate": 1.6515522218373658e-06, + "loss": 0.5318, + "step": 11705 + }, + { + "epoch": 0.74, + "grad_norm": 0.9070267081260681, + "learning_rate": 1.6507903471959468e-06, + "loss": 0.5649, + "step": 11706 + }, + { + "epoch": 0.74, + "grad_norm": 0.9056694507598877, + "learning_rate": 1.6500286135780951e-06, + "loss": 0.6071, + "step": 11707 + }, + { + "epoch": 0.74, + "grad_norm": 0.8863142132759094, + "learning_rate": 1.6492670210158863e-06, + "loss": 0.5855, + "step": 11708 + }, + { + "epoch": 0.74, + "grad_norm": 0.8576910495758057, + "learning_rate": 1.6485055695413838e-06, + "loss": 0.5702, + "step": 11709 + }, + { + "epoch": 0.74, + "grad_norm": 0.9119299650192261, + "learning_rate": 1.6477442591866544e-06, + "loss": 0.5564, + "step": 11710 + }, + { + "epoch": 0.74, + "grad_norm": 0.9486945867538452, + "learning_rate": 1.6469830899837547e-06, + "loss": 0.5378, + "step": 11711 + }, + { + "epoch": 0.74, + "grad_norm": 0.8819604516029358, + "learning_rate": 1.6462220619647306e-06, + "loss": 0.5903, + "step": 11712 + }, + { + "epoch": 0.74, + "grad_norm": 0.8723688125610352, + "learning_rate": 1.6454611751616283e-06, + "loss": 0.5676, + "step": 11713 + }, + { + "epoch": 0.74, + "grad_norm": 0.8502789735794067, + "learning_rate": 1.6447004296064867e-06, + "loss": 0.543, + "step": 11714 + }, + { + "epoch": 0.74, + "grad_norm": 0.8809540867805481, + "learning_rate": 1.6439398253313377e-06, + "loss": 0.5899, + "step": 11715 + }, + { + "epoch": 0.74, + "grad_norm": 0.8517667055130005, + "learning_rate": 1.6431793623682096e-06, + "loss": 0.5581, + "step": 11716 + }, + { + "epoch": 0.74, + "grad_norm": 0.9315950870513916, + "learning_rate": 1.642419040749119e-06, + "loss": 0.6287, + "step": 11717 + }, + { + "epoch": 0.74, + "grad_norm": 0.9184224605560303, + "learning_rate": 1.6416588605060812e-06, + "loss": 0.5907, + "step": 11718 + }, + { + "epoch": 0.74, + "grad_norm": 0.9085866808891296, + "learning_rate": 1.6408988216711092e-06, + "loss": 0.5375, + "step": 11719 + }, + { + "epoch": 0.74, + "grad_norm": 0.8682625889778137, + "learning_rate": 1.6401389242762006e-06, + "loss": 0.5492, + "step": 11720 + }, + { + "epoch": 0.74, + "grad_norm": 0.871749997138977, + "learning_rate": 1.639379168353354e-06, + "loss": 0.5566, + "step": 11721 + }, + { + "epoch": 0.74, + "grad_norm": 0.8846398591995239, + "learning_rate": 1.6386195539345596e-06, + "loss": 0.566, + "step": 11722 + }, + { + "epoch": 0.74, + "grad_norm": 0.8326940536499023, + "learning_rate": 1.6378600810518026e-06, + "loss": 0.6035, + "step": 11723 + }, + { + "epoch": 0.74, + "grad_norm": 0.8957687616348267, + "learning_rate": 1.6371007497370612e-06, + "loss": 0.5666, + "step": 11724 + }, + { + "epoch": 0.74, + "grad_norm": 0.9184751510620117, + "learning_rate": 1.6363415600223103e-06, + "loss": 0.6243, + "step": 11725 + }, + { + "epoch": 0.74, + "grad_norm": 0.8690382838249207, + "learning_rate": 1.6355825119395118e-06, + "loss": 0.5514, + "step": 11726 + }, + { + "epoch": 0.74, + "grad_norm": 0.8801531791687012, + "learning_rate": 1.634823605520633e-06, + "loss": 0.5549, + "step": 11727 + }, + { + "epoch": 0.74, + "grad_norm": 0.8974312543869019, + "learning_rate": 1.634064840797624e-06, + "loss": 0.5657, + "step": 11728 + }, + { + "epoch": 0.74, + "grad_norm": 0.8583878874778748, + "learning_rate": 1.6333062178024355e-06, + "loss": 0.5825, + "step": 11729 + }, + { + "epoch": 0.74, + "grad_norm": 0.8436487913131714, + "learning_rate": 1.63254773656701e-06, + "loss": 0.5514, + "step": 11730 + }, + { + "epoch": 0.74, + "grad_norm": 0.8887004852294922, + "learning_rate": 1.6317893971232852e-06, + "loss": 0.5557, + "step": 11731 + }, + { + "epoch": 0.74, + "grad_norm": 0.9396257400512695, + "learning_rate": 1.6310311995031913e-06, + "loss": 0.5989, + "step": 11732 + }, + { + "epoch": 0.74, + "grad_norm": 0.9944149851799011, + "learning_rate": 1.6302731437386555e-06, + "loss": 0.6509, + "step": 11733 + }, + { + "epoch": 0.74, + "grad_norm": 0.8768121600151062, + "learning_rate": 1.6295152298615936e-06, + "loss": 0.5258, + "step": 11734 + }, + { + "epoch": 0.74, + "grad_norm": 0.9114717245101929, + "learning_rate": 1.62875745790392e-06, + "loss": 0.5883, + "step": 11735 + }, + { + "epoch": 0.74, + "grad_norm": 0.9182329177856445, + "learning_rate": 1.6279998278975428e-06, + "loss": 0.6177, + "step": 11736 + }, + { + "epoch": 0.74, + "grad_norm": 0.8736885190010071, + "learning_rate": 1.627242339874362e-06, + "loss": 0.5619, + "step": 11737 + }, + { + "epoch": 0.74, + "grad_norm": 0.8888165950775146, + "learning_rate": 1.6264849938662753e-06, + "loss": 0.6056, + "step": 11738 + }, + { + "epoch": 0.74, + "grad_norm": 0.9017614126205444, + "learning_rate": 1.6257277899051666e-06, + "loss": 0.6036, + "step": 11739 + }, + { + "epoch": 0.74, + "grad_norm": 0.9094336628913879, + "learning_rate": 1.6249707280229237e-06, + "loss": 0.6252, + "step": 11740 + }, + { + "epoch": 0.74, + "grad_norm": 0.8804279565811157, + "learning_rate": 1.6242138082514247e-06, + "loss": 0.6023, + "step": 11741 + }, + { + "epoch": 0.74, + "grad_norm": 0.8932421207427979, + "learning_rate": 1.6234570306225366e-06, + "loss": 0.5898, + "step": 11742 + }, + { + "epoch": 0.74, + "grad_norm": 0.8643161058425903, + "learning_rate": 1.6227003951681276e-06, + "loss": 0.5163, + "step": 11743 + }, + { + "epoch": 0.74, + "grad_norm": 0.8888043165206909, + "learning_rate": 1.6219439019200557e-06, + "loss": 0.5626, + "step": 11744 + }, + { + "epoch": 0.74, + "grad_norm": 0.9490756988525391, + "learning_rate": 1.6211875509101744e-06, + "loss": 0.6331, + "step": 11745 + }, + { + "epoch": 0.74, + "grad_norm": 0.8775157928466797, + "learning_rate": 1.6204313421703332e-06, + "loss": 0.5488, + "step": 11746 + }, + { + "epoch": 0.74, + "grad_norm": 0.9012529850006104, + "learning_rate": 1.6196752757323698e-06, + "loss": 0.6517, + "step": 11747 + }, + { + "epoch": 0.74, + "grad_norm": 0.8449646830558777, + "learning_rate": 1.61891935162812e-06, + "loss": 0.5482, + "step": 11748 + }, + { + "epoch": 0.74, + "grad_norm": 0.8353961110115051, + "learning_rate": 1.6181635698894171e-06, + "loss": 0.5155, + "step": 11749 + }, + { + "epoch": 0.74, + "grad_norm": 0.9023754596710205, + "learning_rate": 1.61740793054808e-06, + "loss": 0.6113, + "step": 11750 + }, + { + "epoch": 0.74, + "grad_norm": 0.8526588082313538, + "learning_rate": 1.6166524336359285e-06, + "loss": 0.537, + "step": 11751 + }, + { + "epoch": 0.74, + "grad_norm": 0.9162303805351257, + "learning_rate": 1.6158970791847728e-06, + "loss": 0.6146, + "step": 11752 + }, + { + "epoch": 0.74, + "grad_norm": 0.8880906701087952, + "learning_rate": 1.6151418672264186e-06, + "loss": 0.5587, + "step": 11753 + }, + { + "epoch": 0.74, + "grad_norm": 0.8889377117156982, + "learning_rate": 1.614386797792667e-06, + "loss": 0.5929, + "step": 11754 + }, + { + "epoch": 0.74, + "grad_norm": 0.817284882068634, + "learning_rate": 1.6136318709153075e-06, + "loss": 0.5572, + "step": 11755 + }, + { + "epoch": 0.74, + "grad_norm": 0.785580039024353, + "learning_rate": 1.612877086626129e-06, + "loss": 0.6277, + "step": 11756 + }, + { + "epoch": 0.74, + "grad_norm": 0.8508361577987671, + "learning_rate": 1.612122444956916e-06, + "loss": 0.5669, + "step": 11757 + }, + { + "epoch": 0.74, + "grad_norm": 0.8702815175056458, + "learning_rate": 1.6113679459394398e-06, + "loss": 0.5907, + "step": 11758 + }, + { + "epoch": 0.74, + "grad_norm": 0.8659467697143555, + "learning_rate": 1.6106135896054714e-06, + "loss": 0.574, + "step": 11759 + }, + { + "epoch": 0.75, + "grad_norm": 0.8882265686988831, + "learning_rate": 1.6098593759867736e-06, + "loss": 0.5649, + "step": 11760 + }, + { + "epoch": 0.75, + "grad_norm": 0.8908340334892273, + "learning_rate": 1.609105305115104e-06, + "loss": 0.5496, + "step": 11761 + }, + { + "epoch": 0.75, + "grad_norm": 0.8687838315963745, + "learning_rate": 1.6083513770222158e-06, + "loss": 0.5963, + "step": 11762 + }, + { + "epoch": 0.75, + "grad_norm": 0.893989622592926, + "learning_rate": 1.6075975917398512e-06, + "loss": 0.5502, + "step": 11763 + }, + { + "epoch": 0.75, + "grad_norm": 0.8989611864089966, + "learning_rate": 1.60684394929975e-06, + "loss": 0.6383, + "step": 11764 + }, + { + "epoch": 0.75, + "grad_norm": 0.8593994975090027, + "learning_rate": 1.6060904497336465e-06, + "loss": 0.5928, + "step": 11765 + }, + { + "epoch": 0.75, + "grad_norm": 0.8752898573875427, + "learning_rate": 1.6053370930732676e-06, + "loss": 0.5127, + "step": 11766 + }, + { + "epoch": 0.75, + "grad_norm": 0.8868995904922485, + "learning_rate": 1.6045838793503342e-06, + "loss": 0.5745, + "step": 11767 + }, + { + "epoch": 0.75, + "grad_norm": 0.872316837310791, + "learning_rate": 1.6038308085965642e-06, + "loss": 0.5879, + "step": 11768 + }, + { + "epoch": 0.75, + "grad_norm": 0.9360784888267517, + "learning_rate": 1.6030778808436609e-06, + "loss": 0.5412, + "step": 11769 + }, + { + "epoch": 0.75, + "grad_norm": 0.8613805174827576, + "learning_rate": 1.6023250961233338e-06, + "loss": 0.5846, + "step": 11770 + }, + { + "epoch": 0.75, + "grad_norm": 0.9219672083854675, + "learning_rate": 1.6015724544672762e-06, + "loss": 0.5664, + "step": 11771 + }, + { + "epoch": 0.75, + "grad_norm": 0.9142691493034363, + "learning_rate": 1.6008199559071795e-06, + "loss": 0.602, + "step": 11772 + }, + { + "epoch": 0.75, + "grad_norm": 0.8564930558204651, + "learning_rate": 1.6000676004747306e-06, + "loss": 0.5597, + "step": 11773 + }, + { + "epoch": 0.75, + "grad_norm": 0.9067575335502625, + "learning_rate": 1.5993153882016065e-06, + "loss": 0.6287, + "step": 11774 + }, + { + "epoch": 0.75, + "grad_norm": 0.8603774309158325, + "learning_rate": 1.5985633191194821e-06, + "loss": 0.6032, + "step": 11775 + }, + { + "epoch": 0.75, + "grad_norm": 0.9035540819168091, + "learning_rate": 1.5978113932600248e-06, + "loss": 0.5868, + "step": 11776 + }, + { + "epoch": 0.75, + "grad_norm": 0.9324126839637756, + "learning_rate": 1.5970596106548913e-06, + "loss": 0.5675, + "step": 11777 + }, + { + "epoch": 0.75, + "grad_norm": 0.8501653671264648, + "learning_rate": 1.5963079713357432e-06, + "loss": 0.5974, + "step": 11778 + }, + { + "epoch": 0.75, + "grad_norm": 0.8585829734802246, + "learning_rate": 1.595556475334224e-06, + "loss": 0.5757, + "step": 11779 + }, + { + "epoch": 0.75, + "grad_norm": 0.8865067362785339, + "learning_rate": 1.5948051226819783e-06, + "loss": 0.5329, + "step": 11780 + }, + { + "epoch": 0.75, + "grad_norm": 0.8885084986686707, + "learning_rate": 1.5940539134106442e-06, + "loss": 0.5593, + "step": 11781 + }, + { + "epoch": 0.75, + "grad_norm": 0.8946758508682251, + "learning_rate": 1.5933028475518486e-06, + "loss": 0.5682, + "step": 11782 + }, + { + "epoch": 0.75, + "grad_norm": 0.9001892805099487, + "learning_rate": 1.5925519251372212e-06, + "loss": 0.4956, + "step": 11783 + }, + { + "epoch": 0.75, + "grad_norm": 0.8397232890129089, + "learning_rate": 1.5918011461983796e-06, + "loss": 0.5367, + "step": 11784 + }, + { + "epoch": 0.75, + "grad_norm": 0.9217719435691833, + "learning_rate": 1.5910505107669339e-06, + "loss": 0.5639, + "step": 11785 + }, + { + "epoch": 0.75, + "grad_norm": 0.8915478587150574, + "learning_rate": 1.5903000188744922e-06, + "loss": 0.5673, + "step": 11786 + }, + { + "epoch": 0.75, + "grad_norm": 0.8859308362007141, + "learning_rate": 1.589549670552656e-06, + "loss": 0.589, + "step": 11787 + }, + { + "epoch": 0.75, + "grad_norm": 0.8340456485748291, + "learning_rate": 1.588799465833018e-06, + "loss": 0.5375, + "step": 11788 + }, + { + "epoch": 0.75, + "grad_norm": 0.950278639793396, + "learning_rate": 1.5880494047471683e-06, + "loss": 0.6206, + "step": 11789 + }, + { + "epoch": 0.75, + "grad_norm": 0.8300553560256958, + "learning_rate": 1.587299487326689e-06, + "loss": 0.5973, + "step": 11790 + }, + { + "epoch": 0.75, + "grad_norm": 0.9166631698608398, + "learning_rate": 1.586549713603156e-06, + "loss": 0.6043, + "step": 11791 + }, + { + "epoch": 0.75, + "grad_norm": 0.8950029611587524, + "learning_rate": 1.5858000836081422e-06, + "loss": 0.5707, + "step": 11792 + }, + { + "epoch": 0.75, + "grad_norm": 0.9038580060005188, + "learning_rate": 1.5850505973732077e-06, + "loss": 0.5714, + "step": 11793 + }, + { + "epoch": 0.75, + "grad_norm": 0.8897790908813477, + "learning_rate": 1.5843012549299131e-06, + "loss": 0.5668, + "step": 11794 + }, + { + "epoch": 0.75, + "grad_norm": 0.9130045175552368, + "learning_rate": 1.58355205630981e-06, + "loss": 0.6087, + "step": 11795 + }, + { + "epoch": 0.75, + "grad_norm": 0.8878775835037231, + "learning_rate": 1.5828030015444451e-06, + "loss": 0.5306, + "step": 11796 + }, + { + "epoch": 0.75, + "grad_norm": 0.8759022951126099, + "learning_rate": 1.5820540906653581e-06, + "loss": 0.6105, + "step": 11797 + }, + { + "epoch": 0.75, + "grad_norm": 0.8387483358383179, + "learning_rate": 1.5813053237040849e-06, + "loss": 0.565, + "step": 11798 + }, + { + "epoch": 0.75, + "grad_norm": 0.8774323463439941, + "learning_rate": 1.580556700692148e-06, + "loss": 0.5536, + "step": 11799 + }, + { + "epoch": 0.75, + "grad_norm": 0.9281049370765686, + "learning_rate": 1.5798082216610766e-06, + "loss": 0.5497, + "step": 11800 + }, + { + "epoch": 0.75, + "grad_norm": 0.8957639932632446, + "learning_rate": 1.5790598866423818e-06, + "loss": 0.6225, + "step": 11801 + }, + { + "epoch": 0.75, + "grad_norm": 0.8487939834594727, + "learning_rate": 1.5783116956675742e-06, + "loss": 0.5618, + "step": 11802 + }, + { + "epoch": 0.75, + "grad_norm": 0.8974397778511047, + "learning_rate": 1.5775636487681579e-06, + "loss": 0.5703, + "step": 11803 + }, + { + "epoch": 0.75, + "grad_norm": 0.9455395340919495, + "learning_rate": 1.5768157459756307e-06, + "loss": 0.6104, + "step": 11804 + }, + { + "epoch": 0.75, + "grad_norm": 0.9057279825210571, + "learning_rate": 1.576067987321484e-06, + "loss": 0.553, + "step": 11805 + }, + { + "epoch": 0.75, + "grad_norm": 0.9172567129135132, + "learning_rate": 1.5753203728372052e-06, + "loss": 0.5335, + "step": 11806 + }, + { + "epoch": 0.75, + "grad_norm": 0.8080207705497742, + "learning_rate": 1.5745729025542684e-06, + "loss": 0.5155, + "step": 11807 + }, + { + "epoch": 0.75, + "grad_norm": 0.9321126937866211, + "learning_rate": 1.5738255765041537e-06, + "loss": 0.6143, + "step": 11808 + }, + { + "epoch": 0.75, + "grad_norm": 0.8731662631034851, + "learning_rate": 1.5730783947183237e-06, + "loss": 0.5733, + "step": 11809 + }, + { + "epoch": 0.75, + "grad_norm": 0.8786374926567078, + "learning_rate": 1.5723313572282412e-06, + "loss": 0.5162, + "step": 11810 + }, + { + "epoch": 0.75, + "grad_norm": 0.9051015973091125, + "learning_rate": 1.5715844640653627e-06, + "loss": 0.5836, + "step": 11811 + }, + { + "epoch": 0.75, + "grad_norm": 0.9382368922233582, + "learning_rate": 1.5708377152611326e-06, + "loss": 0.5949, + "step": 11812 + }, + { + "epoch": 0.75, + "grad_norm": 0.8949106931686401, + "learning_rate": 1.5700911108469986e-06, + "loss": 0.5708, + "step": 11813 + }, + { + "epoch": 0.75, + "grad_norm": 0.925713837146759, + "learning_rate": 1.569344650854398e-06, + "loss": 0.5473, + "step": 11814 + }, + { + "epoch": 0.75, + "grad_norm": 0.8979496955871582, + "learning_rate": 1.5685983353147582e-06, + "loss": 0.578, + "step": 11815 + }, + { + "epoch": 0.75, + "grad_norm": 0.8621270060539246, + "learning_rate": 1.5678521642595052e-06, + "loss": 0.5517, + "step": 11816 + }, + { + "epoch": 0.75, + "grad_norm": 0.8650081157684326, + "learning_rate": 1.567106137720058e-06, + "loss": 0.507, + "step": 11817 + }, + { + "epoch": 0.75, + "grad_norm": 0.9151085615158081, + "learning_rate": 1.5663602557278297e-06, + "loss": 0.565, + "step": 11818 + }, + { + "epoch": 0.75, + "grad_norm": 0.8362554311752319, + "learning_rate": 1.5656145183142274e-06, + "loss": 0.5517, + "step": 11819 + }, + { + "epoch": 0.75, + "grad_norm": 0.9083791971206665, + "learning_rate": 1.5648689255106474e-06, + "loss": 0.5975, + "step": 11820 + }, + { + "epoch": 0.75, + "grad_norm": 0.9755656123161316, + "learning_rate": 1.5641234773484887e-06, + "loss": 0.5784, + "step": 11821 + }, + { + "epoch": 0.75, + "grad_norm": 0.8774923086166382, + "learning_rate": 1.5633781738591392e-06, + "loss": 0.5766, + "step": 11822 + }, + { + "epoch": 0.75, + "grad_norm": 0.9168820977210999, + "learning_rate": 1.5626330150739776e-06, + "loss": 0.616, + "step": 11823 + }, + { + "epoch": 0.75, + "grad_norm": 0.8971782326698303, + "learning_rate": 1.5618880010243831e-06, + "loss": 0.5556, + "step": 11824 + }, + { + "epoch": 0.75, + "grad_norm": 0.9524270296096802, + "learning_rate": 1.5611431317417235e-06, + "loss": 0.5918, + "step": 11825 + }, + { + "epoch": 0.75, + "grad_norm": 0.9112175107002258, + "learning_rate": 1.5603984072573648e-06, + "loss": 0.5589, + "step": 11826 + }, + { + "epoch": 0.75, + "grad_norm": 0.856706440448761, + "learning_rate": 1.5596538276026641e-06, + "loss": 0.5309, + "step": 11827 + }, + { + "epoch": 0.75, + "grad_norm": 0.8865464329719543, + "learning_rate": 1.5589093928089715e-06, + "loss": 0.5807, + "step": 11828 + }, + { + "epoch": 0.75, + "grad_norm": 0.8657694458961487, + "learning_rate": 1.5581651029076322e-06, + "loss": 0.5807, + "step": 11829 + }, + { + "epoch": 0.75, + "grad_norm": 0.9261035919189453, + "learning_rate": 1.5574209579299903e-06, + "loss": 0.5876, + "step": 11830 + }, + { + "epoch": 0.75, + "grad_norm": 0.918413519859314, + "learning_rate": 1.5566769579073747e-06, + "loss": 0.5667, + "step": 11831 + }, + { + "epoch": 0.75, + "grad_norm": 0.9813733696937561, + "learning_rate": 1.555933102871114e-06, + "loss": 0.5733, + "step": 11832 + }, + { + "epoch": 0.75, + "grad_norm": 0.9484089016914368, + "learning_rate": 1.5551893928525285e-06, + "loss": 0.6259, + "step": 11833 + }, + { + "epoch": 0.75, + "grad_norm": 0.9082149267196655, + "learning_rate": 1.5544458278829344e-06, + "loss": 0.6183, + "step": 11834 + }, + { + "epoch": 0.75, + "grad_norm": 0.9003174304962158, + "learning_rate": 1.5537024079936425e-06, + "loss": 0.5506, + "step": 11835 + }, + { + "epoch": 0.75, + "grad_norm": 0.9653313755989075, + "learning_rate": 1.5529591332159511e-06, + "loss": 0.6133, + "step": 11836 + }, + { + "epoch": 0.75, + "grad_norm": 0.9120768904685974, + "learning_rate": 1.5522160035811578e-06, + "loss": 0.5768, + "step": 11837 + }, + { + "epoch": 0.75, + "grad_norm": 0.8551223278045654, + "learning_rate": 1.551473019120558e-06, + "loss": 0.5806, + "step": 11838 + }, + { + "epoch": 0.75, + "grad_norm": 0.9125446677207947, + "learning_rate": 1.5507301798654313e-06, + "loss": 0.5718, + "step": 11839 + }, + { + "epoch": 0.75, + "grad_norm": 0.8963059782981873, + "learning_rate": 1.549987485847057e-06, + "loss": 0.556, + "step": 11840 + }, + { + "epoch": 0.75, + "grad_norm": 0.8334496021270752, + "learning_rate": 1.54924493709671e-06, + "loss": 0.5308, + "step": 11841 + }, + { + "epoch": 0.75, + "grad_norm": 0.8776934742927551, + "learning_rate": 1.5485025336456511e-06, + "loss": 0.643, + "step": 11842 + }, + { + "epoch": 0.75, + "grad_norm": 0.8814354538917542, + "learning_rate": 1.547760275525147e-06, + "loss": 0.5543, + "step": 11843 + }, + { + "epoch": 0.75, + "grad_norm": 0.8887062072753906, + "learning_rate": 1.547018162766446e-06, + "loss": 0.6306, + "step": 11844 + }, + { + "epoch": 0.75, + "grad_norm": 0.9002584218978882, + "learning_rate": 1.5462761954007987e-06, + "loss": 0.5831, + "step": 11845 + }, + { + "epoch": 0.75, + "grad_norm": 0.9003365635871887, + "learning_rate": 1.5455343734594463e-06, + "loss": 0.5889, + "step": 11846 + }, + { + "epoch": 0.75, + "grad_norm": 0.8967679142951965, + "learning_rate": 1.5447926969736237e-06, + "loss": 0.599, + "step": 11847 + }, + { + "epoch": 0.75, + "grad_norm": 0.9296191334724426, + "learning_rate": 1.5440511659745611e-06, + "loss": 0.5842, + "step": 11848 + }, + { + "epoch": 0.75, + "grad_norm": 0.903057873249054, + "learning_rate": 1.5433097804934833e-06, + "loss": 0.5436, + "step": 11849 + }, + { + "epoch": 0.75, + "grad_norm": 0.9655782580375671, + "learning_rate": 1.5425685405616026e-06, + "loss": 0.603, + "step": 11850 + }, + { + "epoch": 0.75, + "grad_norm": 0.9100805521011353, + "learning_rate": 1.5418274462101358e-06, + "loss": 0.5269, + "step": 11851 + }, + { + "epoch": 0.75, + "grad_norm": 0.9555662274360657, + "learning_rate": 1.541086497470284e-06, + "loss": 0.5998, + "step": 11852 + }, + { + "epoch": 0.75, + "grad_norm": 0.9616516828536987, + "learning_rate": 1.540345694373247e-06, + "loss": 0.5653, + "step": 11853 + }, + { + "epoch": 0.75, + "grad_norm": 0.8906912207603455, + "learning_rate": 1.5396050369502175e-06, + "loss": 0.5835, + "step": 11854 + }, + { + "epoch": 0.75, + "grad_norm": 0.9743589162826538, + "learning_rate": 1.538864525232382e-06, + "loss": 0.5809, + "step": 11855 + }, + { + "epoch": 0.75, + "grad_norm": 0.9580129981040955, + "learning_rate": 1.538124159250921e-06, + "loss": 0.5397, + "step": 11856 + }, + { + "epoch": 0.75, + "grad_norm": 0.9036690592765808, + "learning_rate": 1.5373839390370098e-06, + "loss": 0.6106, + "step": 11857 + }, + { + "epoch": 0.75, + "grad_norm": 0.9009885191917419, + "learning_rate": 1.5366438646218146e-06, + "loss": 0.5968, + "step": 11858 + }, + { + "epoch": 0.75, + "grad_norm": 0.8845691084861755, + "learning_rate": 1.5359039360364975e-06, + "loss": 0.5573, + "step": 11859 + }, + { + "epoch": 0.75, + "grad_norm": 0.8430015444755554, + "learning_rate": 1.5351641533122153e-06, + "loss": 0.5701, + "step": 11860 + }, + { + "epoch": 0.75, + "grad_norm": 0.8827622532844543, + "learning_rate": 1.5344245164801174e-06, + "loss": 0.5712, + "step": 11861 + }, + { + "epoch": 0.75, + "grad_norm": 0.8615781664848328, + "learning_rate": 1.533685025571347e-06, + "loss": 0.5173, + "step": 11862 + }, + { + "epoch": 0.75, + "grad_norm": 0.8814289569854736, + "learning_rate": 1.5329456806170418e-06, + "loss": 0.6011, + "step": 11863 + }, + { + "epoch": 0.75, + "grad_norm": 0.9278409481048584, + "learning_rate": 1.5322064816483328e-06, + "loss": 0.6135, + "step": 11864 + }, + { + "epoch": 0.75, + "grad_norm": 0.875895082950592, + "learning_rate": 1.5314674286963471e-06, + "loss": 0.6038, + "step": 11865 + }, + { + "epoch": 0.75, + "grad_norm": 0.9008811712265015, + "learning_rate": 1.5307285217922003e-06, + "loss": 0.6005, + "step": 11866 + }, + { + "epoch": 0.75, + "grad_norm": 0.8702130913734436, + "learning_rate": 1.529989760967005e-06, + "loss": 0.5811, + "step": 11867 + }, + { + "epoch": 0.75, + "grad_norm": 0.8915956616401672, + "learning_rate": 1.5292511462518728e-06, + "loss": 0.5713, + "step": 11868 + }, + { + "epoch": 0.75, + "grad_norm": 0.8408598899841309, + "learning_rate": 1.528512677677899e-06, + "loss": 0.5299, + "step": 11869 + }, + { + "epoch": 0.75, + "grad_norm": 0.8335807919502258, + "learning_rate": 1.5277743552761809e-06, + "loss": 0.5533, + "step": 11870 + }, + { + "epoch": 0.75, + "grad_norm": 0.8974030613899231, + "learning_rate": 1.5270361790778065e-06, + "loss": 0.5777, + "step": 11871 + }, + { + "epoch": 0.75, + "grad_norm": 0.8503268957138062, + "learning_rate": 1.526298149113854e-06, + "loss": 0.5747, + "step": 11872 + }, + { + "epoch": 0.75, + "grad_norm": 0.9573015570640564, + "learning_rate": 1.5255602654154055e-06, + "loss": 0.5811, + "step": 11873 + }, + { + "epoch": 0.75, + "grad_norm": 0.9180850386619568, + "learning_rate": 1.5248225280135258e-06, + "loss": 0.5726, + "step": 11874 + }, + { + "epoch": 0.75, + "grad_norm": 0.8586685657501221, + "learning_rate": 1.5240849369392807e-06, + "loss": 0.5612, + "step": 11875 + }, + { + "epoch": 0.75, + "grad_norm": 0.9390682578086853, + "learning_rate": 1.5233474922237268e-06, + "loss": 0.5809, + "step": 11876 + }, + { + "epoch": 0.75, + "grad_norm": 0.8708896636962891, + "learning_rate": 1.5226101938979153e-06, + "loss": 0.5575, + "step": 11877 + }, + { + "epoch": 0.75, + "grad_norm": 0.89445960521698, + "learning_rate": 1.5218730419928917e-06, + "loss": 0.5099, + "step": 11878 + }, + { + "epoch": 0.75, + "grad_norm": 0.884432315826416, + "learning_rate": 1.5211360365396972e-06, + "loss": 0.6021, + "step": 11879 + }, + { + "epoch": 0.75, + "grad_norm": 0.9297842979431152, + "learning_rate": 1.5203991775693577e-06, + "loss": 0.6123, + "step": 11880 + }, + { + "epoch": 0.75, + "grad_norm": 0.9308014512062073, + "learning_rate": 1.5196624651129084e-06, + "loss": 0.5901, + "step": 11881 + }, + { + "epoch": 0.75, + "grad_norm": 0.9186192154884338, + "learning_rate": 1.5189258992013635e-06, + "loss": 0.5711, + "step": 11882 + }, + { + "epoch": 0.75, + "grad_norm": 0.9167851209640503, + "learning_rate": 1.5181894798657388e-06, + "loss": 0.5695, + "step": 11883 + }, + { + "epoch": 0.75, + "grad_norm": 0.878422200679779, + "learning_rate": 1.517453207137043e-06, + "loss": 0.5535, + "step": 11884 + }, + { + "epoch": 0.75, + "grad_norm": 0.8583741784095764, + "learning_rate": 1.5167170810462777e-06, + "loss": 0.5945, + "step": 11885 + }, + { + "epoch": 0.75, + "grad_norm": 0.8769426345825195, + "learning_rate": 1.5159811016244392e-06, + "loss": 0.5756, + "step": 11886 + }, + { + "epoch": 0.75, + "grad_norm": 0.9809277057647705, + "learning_rate": 1.5152452689025176e-06, + "loss": 0.5608, + "step": 11887 + }, + { + "epoch": 0.75, + "grad_norm": 0.8417267203330994, + "learning_rate": 1.5145095829114937e-06, + "loss": 0.56, + "step": 11888 + }, + { + "epoch": 0.75, + "grad_norm": 0.8778293132781982, + "learning_rate": 1.5137740436823462e-06, + "loss": 0.5685, + "step": 11889 + }, + { + "epoch": 0.75, + "grad_norm": 0.9264646768569946, + "learning_rate": 1.5130386512460454e-06, + "loss": 0.6045, + "step": 11890 + }, + { + "epoch": 0.75, + "grad_norm": 0.8687536120414734, + "learning_rate": 1.5123034056335572e-06, + "loss": 0.572, + "step": 11891 + }, + { + "epoch": 0.75, + "grad_norm": 0.8998939394950867, + "learning_rate": 1.5115683068758419e-06, + "loss": 0.5886, + "step": 11892 + }, + { + "epoch": 0.75, + "grad_norm": 0.9059341549873352, + "learning_rate": 1.5108333550038461e-06, + "loss": 0.581, + "step": 11893 + }, + { + "epoch": 0.75, + "grad_norm": 0.9004920721054077, + "learning_rate": 1.510098550048521e-06, + "loss": 0.5742, + "step": 11894 + }, + { + "epoch": 0.75, + "grad_norm": 0.8428323268890381, + "learning_rate": 1.5093638920408077e-06, + "loss": 0.5403, + "step": 11895 + }, + { + "epoch": 0.75, + "grad_norm": 0.8726648688316345, + "learning_rate": 1.508629381011636e-06, + "loss": 0.5946, + "step": 11896 + }, + { + "epoch": 0.75, + "grad_norm": 0.8611435294151306, + "learning_rate": 1.507895016991936e-06, + "loss": 0.5453, + "step": 11897 + }, + { + "epoch": 0.75, + "grad_norm": 0.9121397137641907, + "learning_rate": 1.507160800012628e-06, + "loss": 0.5912, + "step": 11898 + }, + { + "epoch": 0.75, + "grad_norm": 0.979377031326294, + "learning_rate": 1.5064267301046281e-06, + "loss": 0.5991, + "step": 11899 + }, + { + "epoch": 0.75, + "grad_norm": 0.9422827363014221, + "learning_rate": 1.5056928072988475e-06, + "loss": 0.5693, + "step": 11900 + }, + { + "epoch": 0.75, + "grad_norm": 0.9085085391998291, + "learning_rate": 1.504959031626183e-06, + "loss": 0.6133, + "step": 11901 + }, + { + "epoch": 0.75, + "grad_norm": 0.9609709978103638, + "learning_rate": 1.5042254031175373e-06, + "loss": 0.6043, + "step": 11902 + }, + { + "epoch": 0.75, + "grad_norm": 0.8676922917366028, + "learning_rate": 1.5034919218038007e-06, + "loss": 0.5184, + "step": 11903 + }, + { + "epoch": 0.75, + "grad_norm": 0.8704044818878174, + "learning_rate": 1.502758587715854e-06, + "loss": 0.5492, + "step": 11904 + }, + { + "epoch": 0.75, + "grad_norm": 0.9572499990463257, + "learning_rate": 1.5020254008845775e-06, + "loss": 0.6057, + "step": 11905 + }, + { + "epoch": 0.75, + "grad_norm": 0.895881712436676, + "learning_rate": 1.501292361340842e-06, + "loss": 0.5942, + "step": 11906 + }, + { + "epoch": 0.75, + "grad_norm": 0.8465459942817688, + "learning_rate": 1.500559469115515e-06, + "loss": 0.5872, + "step": 11907 + }, + { + "epoch": 0.75, + "grad_norm": 0.9170262217521667, + "learning_rate": 1.499826724239456e-06, + "loss": 0.5747, + "step": 11908 + }, + { + "epoch": 0.75, + "grad_norm": 0.8896523118019104, + "learning_rate": 1.499094126743516e-06, + "loss": 0.624, + "step": 11909 + }, + { + "epoch": 0.75, + "grad_norm": 0.8543857336044312, + "learning_rate": 1.4983616766585423e-06, + "loss": 0.5329, + "step": 11910 + }, + { + "epoch": 0.75, + "grad_norm": 0.9327712655067444, + "learning_rate": 1.4976293740153803e-06, + "loss": 0.6734, + "step": 11911 + }, + { + "epoch": 0.75, + "grad_norm": 0.9487749338150024, + "learning_rate": 1.4968972188448593e-06, + "loss": 0.551, + "step": 11912 + }, + { + "epoch": 0.75, + "grad_norm": 0.8792836666107178, + "learning_rate": 1.4961652111778103e-06, + "loss": 0.5467, + "step": 11913 + }, + { + "epoch": 0.75, + "grad_norm": 0.873479962348938, + "learning_rate": 1.4954333510450552e-06, + "loss": 0.5726, + "step": 11914 + }, + { + "epoch": 0.75, + "grad_norm": 0.8478526473045349, + "learning_rate": 1.4947016384774105e-06, + "loss": 0.5231, + "step": 11915 + }, + { + "epoch": 0.75, + "grad_norm": 0.8277620673179626, + "learning_rate": 1.4939700735056873e-06, + "loss": 0.5512, + "step": 11916 + }, + { + "epoch": 0.76, + "grad_norm": 0.9055673480033875, + "learning_rate": 1.493238656160686e-06, + "loss": 0.6138, + "step": 11917 + }, + { + "epoch": 0.76, + "grad_norm": 0.8501646518707275, + "learning_rate": 1.492507386473206e-06, + "loss": 0.5596, + "step": 11918 + }, + { + "epoch": 0.76, + "grad_norm": 0.9250453114509583, + "learning_rate": 1.4917762644740381e-06, + "loss": 0.5815, + "step": 11919 + }, + { + "epoch": 0.76, + "grad_norm": 0.8276757597923279, + "learning_rate": 1.4910452901939671e-06, + "loss": 0.525, + "step": 11920 + }, + { + "epoch": 0.76, + "grad_norm": 0.8669492602348328, + "learning_rate": 1.4903144636637723e-06, + "loss": 0.5612, + "step": 11921 + }, + { + "epoch": 0.76, + "grad_norm": 0.9456034302711487, + "learning_rate": 1.489583784914228e-06, + "loss": 0.5591, + "step": 11922 + }, + { + "epoch": 0.76, + "grad_norm": 0.971352756023407, + "learning_rate": 1.4888532539760958e-06, + "loss": 0.6341, + "step": 11923 + }, + { + "epoch": 0.76, + "grad_norm": 0.8353976607322693, + "learning_rate": 1.4881228708801409e-06, + "loss": 0.5194, + "step": 11924 + }, + { + "epoch": 0.76, + "grad_norm": 0.8877169489860535, + "learning_rate": 1.4873926356571144e-06, + "loss": 0.5992, + "step": 11925 + }, + { + "epoch": 0.76, + "grad_norm": 0.9067363142967224, + "learning_rate": 1.486662548337764e-06, + "loss": 0.6173, + "step": 11926 + }, + { + "epoch": 0.76, + "grad_norm": 0.891920268535614, + "learning_rate": 1.485932608952832e-06, + "loss": 0.5366, + "step": 11927 + }, + { + "epoch": 0.76, + "grad_norm": 0.8979024291038513, + "learning_rate": 1.485202817533053e-06, + "loss": 0.5402, + "step": 11928 + }, + { + "epoch": 0.76, + "grad_norm": 0.9207996129989624, + "learning_rate": 1.4844731741091561e-06, + "loss": 0.5968, + "step": 11929 + }, + { + "epoch": 0.76, + "grad_norm": 0.9184231162071228, + "learning_rate": 1.4837436787118665e-06, + "loss": 0.5759, + "step": 11930 + }, + { + "epoch": 0.76, + "grad_norm": 0.851150393486023, + "learning_rate": 1.4830143313718943e-06, + "loss": 0.5953, + "step": 11931 + }, + { + "epoch": 0.76, + "grad_norm": 0.8827084898948669, + "learning_rate": 1.482285132119956e-06, + "loss": 0.5872, + "step": 11932 + }, + { + "epoch": 0.76, + "grad_norm": 0.840436577796936, + "learning_rate": 1.4815560809867551e-06, + "loss": 0.5918, + "step": 11933 + }, + { + "epoch": 0.76, + "grad_norm": 0.903814971446991, + "learning_rate": 1.4808271780029864e-06, + "loss": 0.5741, + "step": 11934 + }, + { + "epoch": 0.76, + "grad_norm": 0.8789491057395935, + "learning_rate": 1.4800984231993432e-06, + "loss": 0.5188, + "step": 11935 + }, + { + "epoch": 0.76, + "grad_norm": 0.9306246638298035, + "learning_rate": 1.47936981660651e-06, + "loss": 0.5841, + "step": 11936 + }, + { + "epoch": 0.76, + "grad_norm": 0.9300113916397095, + "learning_rate": 1.4786413582551668e-06, + "loss": 0.6367, + "step": 11937 + }, + { + "epoch": 0.76, + "grad_norm": 0.8994358777999878, + "learning_rate": 1.4779130481759874e-06, + "loss": 0.5961, + "step": 11938 + }, + { + "epoch": 0.76, + "grad_norm": 0.872204065322876, + "learning_rate": 1.4771848863996353e-06, + "loss": 0.6409, + "step": 11939 + }, + { + "epoch": 0.76, + "grad_norm": 0.897193431854248, + "learning_rate": 1.4764568729567714e-06, + "loss": 0.5399, + "step": 11940 + }, + { + "epoch": 0.76, + "grad_norm": 0.8492984771728516, + "learning_rate": 1.4757290078780545e-06, + "loss": 0.5339, + "step": 11941 + }, + { + "epoch": 0.76, + "grad_norm": 0.942596971988678, + "learning_rate": 1.475001291194127e-06, + "loss": 0.6122, + "step": 11942 + }, + { + "epoch": 0.76, + "grad_norm": 0.9129643440246582, + "learning_rate": 1.4742737229356324e-06, + "loss": 0.6431, + "step": 11943 + }, + { + "epoch": 0.76, + "grad_norm": 0.9358230829238892, + "learning_rate": 1.473546303133207e-06, + "loss": 0.6236, + "step": 11944 + }, + { + "epoch": 0.76, + "grad_norm": 0.8448783159255981, + "learning_rate": 1.4728190318174785e-06, + "loss": 0.5691, + "step": 11945 + }, + { + "epoch": 0.76, + "grad_norm": 0.8805672526359558, + "learning_rate": 1.4720919090190723e-06, + "loss": 0.5347, + "step": 11946 + }, + { + "epoch": 0.76, + "grad_norm": 0.9884516596794128, + "learning_rate": 1.471364934768601e-06, + "loss": 0.5915, + "step": 11947 + }, + { + "epoch": 0.76, + "grad_norm": 0.9252734780311584, + "learning_rate": 1.470638109096676e-06, + "loss": 0.5798, + "step": 11948 + }, + { + "epoch": 0.76, + "grad_norm": 0.8685827851295471, + "learning_rate": 1.469911432033906e-06, + "loss": 0.5078, + "step": 11949 + }, + { + "epoch": 0.76, + "grad_norm": 0.8583547472953796, + "learning_rate": 1.469184903610883e-06, + "loss": 0.5571, + "step": 11950 + }, + { + "epoch": 0.76, + "grad_norm": 0.9142690300941467, + "learning_rate": 1.468458523858201e-06, + "loss": 0.5576, + "step": 11951 + }, + { + "epoch": 0.76, + "grad_norm": 0.8868157267570496, + "learning_rate": 1.467732292806447e-06, + "loss": 0.6154, + "step": 11952 + }, + { + "epoch": 0.76, + "grad_norm": 0.9219274520874023, + "learning_rate": 1.4670062104861948e-06, + "loss": 0.5348, + "step": 11953 + }, + { + "epoch": 0.76, + "grad_norm": 0.8708443641662598, + "learning_rate": 1.4662802769280244e-06, + "loss": 0.5806, + "step": 11954 + }, + { + "epoch": 0.76, + "grad_norm": 0.884825587272644, + "learning_rate": 1.4655544921624964e-06, + "loss": 0.5741, + "step": 11955 + }, + { + "epoch": 0.76, + "grad_norm": 0.9130429625511169, + "learning_rate": 1.464828856220174e-06, + "loss": 0.5545, + "step": 11956 + }, + { + "epoch": 0.76, + "grad_norm": 0.9295893907546997, + "learning_rate": 1.4641033691316104e-06, + "loss": 0.6212, + "step": 11957 + }, + { + "epoch": 0.76, + "grad_norm": 0.8640606999397278, + "learning_rate": 1.4633780309273532e-06, + "loss": 0.5754, + "step": 11958 + }, + { + "epoch": 0.76, + "grad_norm": 0.8814523220062256, + "learning_rate": 1.4626528416379438e-06, + "loss": 0.5948, + "step": 11959 + }, + { + "epoch": 0.76, + "grad_norm": 0.91228848695755, + "learning_rate": 1.4619278012939197e-06, + "loss": 0.5684, + "step": 11960 + }, + { + "epoch": 0.76, + "grad_norm": 0.8493878245353699, + "learning_rate": 1.4612029099258046e-06, + "loss": 0.5311, + "step": 11961 + }, + { + "epoch": 0.76, + "grad_norm": 0.8996723890304565, + "learning_rate": 1.4604781675641273e-06, + "loss": 0.5628, + "step": 11962 + }, + { + "epoch": 0.76, + "grad_norm": 0.9304405450820923, + "learning_rate": 1.4597535742393998e-06, + "loss": 0.5539, + "step": 11963 + }, + { + "epoch": 0.76, + "grad_norm": 0.8904679417610168, + "learning_rate": 1.459029129982134e-06, + "loss": 0.5766, + "step": 11964 + }, + { + "epoch": 0.76, + "grad_norm": 0.9363497495651245, + "learning_rate": 1.4583048348228345e-06, + "loss": 0.5826, + "step": 11965 + }, + { + "epoch": 0.76, + "grad_norm": 0.8508997559547424, + "learning_rate": 1.4575806887919951e-06, + "loss": 0.5896, + "step": 11966 + }, + { + "epoch": 0.76, + "grad_norm": 0.8754972219467163, + "learning_rate": 1.456856691920111e-06, + "loss": 0.579, + "step": 11967 + }, + { + "epoch": 0.76, + "grad_norm": 0.9017912745475769, + "learning_rate": 1.4561328442376678e-06, + "loss": 0.5713, + "step": 11968 + }, + { + "epoch": 0.76, + "grad_norm": 0.8693665862083435, + "learning_rate": 1.45540914577514e-06, + "loss": 0.5654, + "step": 11969 + }, + { + "epoch": 0.76, + "grad_norm": 0.9070661664009094, + "learning_rate": 1.454685596563003e-06, + "loss": 0.5468, + "step": 11970 + }, + { + "epoch": 0.76, + "grad_norm": 0.8635410666465759, + "learning_rate": 1.4539621966317219e-06, + "loss": 0.5684, + "step": 11971 + }, + { + "epoch": 0.76, + "grad_norm": 0.8574047088623047, + "learning_rate": 1.4532389460117574e-06, + "loss": 0.5417, + "step": 11972 + }, + { + "epoch": 0.76, + "grad_norm": 0.9325186610221863, + "learning_rate": 1.4525158447335635e-06, + "loss": 0.5479, + "step": 11973 + }, + { + "epoch": 0.76, + "grad_norm": 0.8873754143714905, + "learning_rate": 1.4517928928275843e-06, + "loss": 0.5812, + "step": 11974 + }, + { + "epoch": 0.76, + "grad_norm": 0.8907727599143982, + "learning_rate": 1.4510700903242642e-06, + "loss": 0.5342, + "step": 11975 + }, + { + "epoch": 0.76, + "grad_norm": 0.8830631375312805, + "learning_rate": 1.4503474372540382e-06, + "loss": 0.6237, + "step": 11976 + }, + { + "epoch": 0.76, + "grad_norm": 0.9112756252288818, + "learning_rate": 1.4496249336473318e-06, + "loss": 0.5756, + "step": 11977 + }, + { + "epoch": 0.76, + "grad_norm": 0.9188751578330994, + "learning_rate": 1.4489025795345686e-06, + "loss": 0.5752, + "step": 11978 + }, + { + "epoch": 0.76, + "grad_norm": 0.915205717086792, + "learning_rate": 1.4481803749461643e-06, + "loss": 0.5848, + "step": 11979 + }, + { + "epoch": 0.76, + "grad_norm": 0.8783239722251892, + "learning_rate": 1.4474583199125285e-06, + "loss": 0.5691, + "step": 11980 + }, + { + "epoch": 0.76, + "grad_norm": 0.8469722270965576, + "learning_rate": 1.446736414464066e-06, + "loss": 0.5367, + "step": 11981 + }, + { + "epoch": 0.76, + "grad_norm": 0.855970025062561, + "learning_rate": 1.4460146586311713e-06, + "loss": 0.5862, + "step": 11982 + }, + { + "epoch": 0.76, + "grad_norm": 0.9022277593612671, + "learning_rate": 1.4452930524442338e-06, + "loss": 0.5992, + "step": 11983 + }, + { + "epoch": 0.76, + "grad_norm": 0.856157124042511, + "learning_rate": 1.4445715959336432e-06, + "loss": 0.5505, + "step": 11984 + }, + { + "epoch": 0.76, + "grad_norm": 0.8723897933959961, + "learning_rate": 1.4438502891297723e-06, + "loss": 0.5493, + "step": 11985 + }, + { + "epoch": 0.76, + "grad_norm": 0.8988958597183228, + "learning_rate": 1.4431291320629953e-06, + "loss": 0.5577, + "step": 11986 + }, + { + "epoch": 0.76, + "grad_norm": 0.9534813165664673, + "learning_rate": 1.4424081247636768e-06, + "loss": 0.575, + "step": 11987 + }, + { + "epoch": 0.76, + "grad_norm": 0.886587917804718, + "learning_rate": 1.4416872672621762e-06, + "loss": 0.6182, + "step": 11988 + }, + { + "epoch": 0.76, + "grad_norm": 0.9059985280036926, + "learning_rate": 1.440966559588846e-06, + "loss": 0.5644, + "step": 11989 + }, + { + "epoch": 0.76, + "grad_norm": 0.8605340719223022, + "learning_rate": 1.4402460017740355e-06, + "loss": 0.5692, + "step": 11990 + }, + { + "epoch": 0.76, + "grad_norm": 0.8864085674285889, + "learning_rate": 1.4395255938480785e-06, + "loss": 0.5363, + "step": 11991 + }, + { + "epoch": 0.76, + "grad_norm": 0.9561933875083923, + "learning_rate": 1.4388053358413162e-06, + "loss": 0.5968, + "step": 11992 + }, + { + "epoch": 0.76, + "grad_norm": 0.8334751129150391, + "learning_rate": 1.4380852277840712e-06, + "loss": 0.5657, + "step": 11993 + }, + { + "epoch": 0.76, + "grad_norm": 0.878257155418396, + "learning_rate": 1.437365269706666e-06, + "loss": 0.5811, + "step": 11994 + }, + { + "epoch": 0.76, + "grad_norm": 0.8865748047828674, + "learning_rate": 1.436645461639416e-06, + "loss": 0.5639, + "step": 11995 + }, + { + "epoch": 0.76, + "grad_norm": 0.9328632354736328, + "learning_rate": 1.4359258036126295e-06, + "loss": 0.5744, + "step": 11996 + }, + { + "epoch": 0.76, + "grad_norm": 0.8493953943252563, + "learning_rate": 1.4352062956566088e-06, + "loss": 0.5575, + "step": 11997 + }, + { + "epoch": 0.76, + "grad_norm": 0.8793647289276123, + "learning_rate": 1.4344869378016518e-06, + "loss": 0.5454, + "step": 11998 + }, + { + "epoch": 0.76, + "grad_norm": 0.8968666195869446, + "learning_rate": 1.4337677300780445e-06, + "loss": 0.6076, + "step": 11999 + }, + { + "epoch": 0.76, + "grad_norm": 0.8650113940238953, + "learning_rate": 1.433048672516072e-06, + "loss": 0.601, + "step": 12000 + }, + { + "epoch": 0.76, + "grad_norm": 0.8847838044166565, + "learning_rate": 1.4323297651460117e-06, + "loss": 0.5885, + "step": 12001 + }, + { + "epoch": 0.76, + "grad_norm": 0.9786666035652161, + "learning_rate": 1.4316110079981339e-06, + "loss": 0.5909, + "step": 12002 + }, + { + "epoch": 0.76, + "grad_norm": 0.8795079588890076, + "learning_rate": 1.4308924011027042e-06, + "loss": 0.593, + "step": 12003 + }, + { + "epoch": 0.76, + "grad_norm": 0.9203742146492004, + "learning_rate": 1.430173944489977e-06, + "loss": 0.576, + "step": 12004 + }, + { + "epoch": 0.76, + "grad_norm": 0.8110754489898682, + "learning_rate": 1.4294556381902074e-06, + "loss": 0.5006, + "step": 12005 + }, + { + "epoch": 0.76, + "grad_norm": 0.90155029296875, + "learning_rate": 1.428737482233642e-06, + "loss": 0.5475, + "step": 12006 + }, + { + "epoch": 0.76, + "grad_norm": 0.8908960819244385, + "learning_rate": 1.4280194766505156e-06, + "loss": 0.5745, + "step": 12007 + }, + { + "epoch": 0.76, + "grad_norm": 0.9284471869468689, + "learning_rate": 1.427301621471064e-06, + "loss": 0.6061, + "step": 12008 + }, + { + "epoch": 0.76, + "grad_norm": 0.9098774790763855, + "learning_rate": 1.4265839167255114e-06, + "loss": 0.6325, + "step": 12009 + }, + { + "epoch": 0.76, + "grad_norm": 0.8448832035064697, + "learning_rate": 1.42586636244408e-06, + "loss": 0.5629, + "step": 12010 + }, + { + "epoch": 0.76, + "grad_norm": 0.898383378982544, + "learning_rate": 1.4251489586569834e-06, + "loss": 0.6486, + "step": 12011 + }, + { + "epoch": 0.76, + "grad_norm": 0.915291428565979, + "learning_rate": 1.4244317053944268e-06, + "loss": 0.5692, + "step": 12012 + }, + { + "epoch": 0.76, + "grad_norm": 0.8258968591690063, + "learning_rate": 1.423714602686611e-06, + "loss": 0.5649, + "step": 12013 + }, + { + "epoch": 0.76, + "grad_norm": 0.8475250601768494, + "learning_rate": 1.4229976505637361e-06, + "loss": 0.5672, + "step": 12014 + }, + { + "epoch": 0.76, + "grad_norm": 0.8996663689613342, + "learning_rate": 1.4222808490559842e-06, + "loss": 0.6316, + "step": 12015 + }, + { + "epoch": 0.76, + "grad_norm": 0.8408166766166687, + "learning_rate": 1.4215641981935403e-06, + "loss": 0.4919, + "step": 12016 + }, + { + "epoch": 0.76, + "grad_norm": 0.9476755261421204, + "learning_rate": 1.4208476980065794e-06, + "loss": 0.5853, + "step": 12017 + }, + { + "epoch": 0.76, + "grad_norm": 0.8874031901359558, + "learning_rate": 1.420131348525271e-06, + "loss": 0.5749, + "step": 12018 + }, + { + "epoch": 0.76, + "grad_norm": 0.8719558119773865, + "learning_rate": 1.4194151497797793e-06, + "loss": 0.6041, + "step": 12019 + }, + { + "epoch": 0.76, + "grad_norm": 0.9502779841423035, + "learning_rate": 1.4186991018002582e-06, + "loss": 0.5715, + "step": 12020 + }, + { + "epoch": 0.76, + "grad_norm": 0.8630761504173279, + "learning_rate": 1.4179832046168584e-06, + "loss": 0.6021, + "step": 12021 + }, + { + "epoch": 0.76, + "grad_norm": 0.922612190246582, + "learning_rate": 1.417267458259728e-06, + "loss": 0.5775, + "step": 12022 + }, + { + "epoch": 0.76, + "grad_norm": 0.8538753390312195, + "learning_rate": 1.4165518627589991e-06, + "loss": 0.5034, + "step": 12023 + }, + { + "epoch": 0.76, + "grad_norm": 0.9272584915161133, + "learning_rate": 1.4158364181448065e-06, + "loss": 0.5819, + "step": 12024 + }, + { + "epoch": 0.76, + "grad_norm": 0.9062113761901855, + "learning_rate": 1.4151211244472734e-06, + "loss": 0.6303, + "step": 12025 + }, + { + "epoch": 0.76, + "grad_norm": 1.0064603090286255, + "learning_rate": 1.414405981696519e-06, + "loss": 0.5895, + "step": 12026 + }, + { + "epoch": 0.76, + "grad_norm": 0.8894251585006714, + "learning_rate": 1.4136909899226564e-06, + "loss": 0.6, + "step": 12027 + }, + { + "epoch": 0.76, + "grad_norm": 0.8731244206428528, + "learning_rate": 1.412976149155789e-06, + "loss": 0.5272, + "step": 12028 + }, + { + "epoch": 0.76, + "grad_norm": 0.9041507840156555, + "learning_rate": 1.412261459426018e-06, + "loss": 0.5635, + "step": 12029 + }, + { + "epoch": 0.76, + "grad_norm": 0.9179893136024475, + "learning_rate": 1.4115469207634358e-06, + "loss": 0.6162, + "step": 12030 + }, + { + "epoch": 0.76, + "grad_norm": 0.9017035365104675, + "learning_rate": 1.4108325331981298e-06, + "loss": 0.5674, + "step": 12031 + }, + { + "epoch": 0.76, + "grad_norm": 0.8612952828407288, + "learning_rate": 1.4101182967601796e-06, + "loss": 0.6167, + "step": 12032 + }, + { + "epoch": 0.76, + "grad_norm": 0.8670690059661865, + "learning_rate": 1.4094042114796613e-06, + "loss": 0.5648, + "step": 12033 + }, + { + "epoch": 0.76, + "grad_norm": 0.900267481803894, + "learning_rate": 1.4086902773866379e-06, + "loss": 0.5819, + "step": 12034 + }, + { + "epoch": 0.76, + "grad_norm": 0.8597514629364014, + "learning_rate": 1.4079764945111767e-06, + "loss": 0.5914, + "step": 12035 + }, + { + "epoch": 0.76, + "grad_norm": 0.8889679908752441, + "learning_rate": 1.407262862883328e-06, + "loss": 0.5411, + "step": 12036 + }, + { + "epoch": 0.76, + "grad_norm": 0.8739466071128845, + "learning_rate": 1.4065493825331416e-06, + "loss": 0.5116, + "step": 12037 + }, + { + "epoch": 0.76, + "grad_norm": 0.9049973487854004, + "learning_rate": 1.4058360534906607e-06, + "loss": 0.6136, + "step": 12038 + }, + { + "epoch": 0.76, + "grad_norm": 0.9659464359283447, + "learning_rate": 1.4051228757859197e-06, + "loss": 0.6078, + "step": 12039 + }, + { + "epoch": 0.76, + "grad_norm": 0.9126960635185242, + "learning_rate": 1.4044098494489494e-06, + "loss": 0.5604, + "step": 12040 + }, + { + "epoch": 0.76, + "grad_norm": 0.8697749972343445, + "learning_rate": 1.4036969745097735e-06, + "loss": 0.558, + "step": 12041 + }, + { + "epoch": 0.76, + "grad_norm": 0.8654941320419312, + "learning_rate": 1.4029842509984043e-06, + "loss": 0.5581, + "step": 12042 + }, + { + "epoch": 0.76, + "grad_norm": 0.8910323977470398, + "learning_rate": 1.4022716789448581e-06, + "loss": 0.5612, + "step": 12043 + }, + { + "epoch": 0.76, + "grad_norm": 0.8871368765830994, + "learning_rate": 1.4015592583791343e-06, + "loss": 0.5548, + "step": 12044 + }, + { + "epoch": 0.76, + "grad_norm": 0.8820889592170715, + "learning_rate": 1.4008469893312321e-06, + "loss": 0.5956, + "step": 12045 + }, + { + "epoch": 0.76, + "grad_norm": 0.8938196301460266, + "learning_rate": 1.4001348718311446e-06, + "loss": 0.5659, + "step": 12046 + }, + { + "epoch": 0.76, + "grad_norm": 0.864248514175415, + "learning_rate": 1.399422905908851e-06, + "loss": 0.5602, + "step": 12047 + }, + { + "epoch": 0.76, + "grad_norm": 0.9130455851554871, + "learning_rate": 1.3987110915943352e-06, + "loss": 0.5987, + "step": 12048 + }, + { + "epoch": 0.76, + "grad_norm": 0.8632736802101135, + "learning_rate": 1.397999428917569e-06, + "loss": 0.5656, + "step": 12049 + }, + { + "epoch": 0.76, + "grad_norm": 0.9414397478103638, + "learning_rate": 1.3972879179085147e-06, + "loss": 0.5893, + "step": 12050 + }, + { + "epoch": 0.76, + "grad_norm": 0.9404668211936951, + "learning_rate": 1.396576558597133e-06, + "loss": 0.608, + "step": 12051 + }, + { + "epoch": 0.76, + "grad_norm": 0.9048896431922913, + "learning_rate": 1.3958653510133774e-06, + "loss": 0.5404, + "step": 12052 + }, + { + "epoch": 0.76, + "grad_norm": 0.9009268879890442, + "learning_rate": 1.3951542951871938e-06, + "loss": 0.5708, + "step": 12053 + }, + { + "epoch": 0.76, + "grad_norm": 0.8709206581115723, + "learning_rate": 1.3944433911485229e-06, + "loss": 0.5458, + "step": 12054 + }, + { + "epoch": 0.76, + "grad_norm": 0.8912920951843262, + "learning_rate": 1.3937326389272977e-06, + "loss": 0.6084, + "step": 12055 + }, + { + "epoch": 0.76, + "grad_norm": 0.8818450570106506, + "learning_rate": 1.3930220385534453e-06, + "loss": 0.6093, + "step": 12056 + }, + { + "epoch": 0.76, + "grad_norm": 0.8648407459259033, + "learning_rate": 1.3923115900568896e-06, + "loss": 0.6086, + "step": 12057 + }, + { + "epoch": 0.76, + "grad_norm": 0.9007489681243896, + "learning_rate": 1.3916012934675405e-06, + "loss": 0.5638, + "step": 12058 + }, + { + "epoch": 0.76, + "grad_norm": 0.8429774641990662, + "learning_rate": 1.3908911488153081e-06, + "loss": 0.5916, + "step": 12059 + }, + { + "epoch": 0.76, + "grad_norm": 0.8665831685066223, + "learning_rate": 1.3901811561300944e-06, + "loss": 0.5793, + "step": 12060 + }, + { + "epoch": 0.76, + "grad_norm": 0.8880481719970703, + "learning_rate": 1.3894713154417944e-06, + "loss": 0.567, + "step": 12061 + }, + { + "epoch": 0.76, + "grad_norm": 0.8977078199386597, + "learning_rate": 1.3887616267802972e-06, + "loss": 0.5831, + "step": 12062 + }, + { + "epoch": 0.76, + "grad_norm": 0.8818337917327881, + "learning_rate": 1.3880520901754874e-06, + "loss": 0.584, + "step": 12063 + }, + { + "epoch": 0.76, + "grad_norm": 0.9489629864692688, + "learning_rate": 1.3873427056572354e-06, + "loss": 0.6143, + "step": 12064 + }, + { + "epoch": 0.76, + "grad_norm": 0.8898563981056213, + "learning_rate": 1.386633473255418e-06, + "loss": 0.5799, + "step": 12065 + }, + { + "epoch": 0.76, + "grad_norm": 0.9059821963310242, + "learning_rate": 1.3859243929998933e-06, + "loss": 0.6092, + "step": 12066 + }, + { + "epoch": 0.76, + "grad_norm": 0.8613783121109009, + "learning_rate": 1.3852154649205201e-06, + "loss": 0.5234, + "step": 12067 + }, + { + "epoch": 0.76, + "grad_norm": 0.9153664112091064, + "learning_rate": 1.3845066890471487e-06, + "loss": 0.5966, + "step": 12068 + }, + { + "epoch": 0.76, + "grad_norm": 0.8898327350616455, + "learning_rate": 1.3837980654096229e-06, + "loss": 0.6014, + "step": 12069 + }, + { + "epoch": 0.76, + "grad_norm": 0.9291636347770691, + "learning_rate": 1.383089594037781e-06, + "loss": 0.5429, + "step": 12070 + }, + { + "epoch": 0.76, + "grad_norm": 0.9276854991912842, + "learning_rate": 1.3823812749614556e-06, + "loss": 0.5632, + "step": 12071 + }, + { + "epoch": 0.76, + "grad_norm": 0.9263901114463806, + "learning_rate": 1.3816731082104668e-06, + "loss": 0.5816, + "step": 12072 + }, + { + "epoch": 0.76, + "grad_norm": 0.9651497602462769, + "learning_rate": 1.3809650938146391e-06, + "loss": 0.6368, + "step": 12073 + }, + { + "epoch": 0.76, + "grad_norm": 0.8885228037834167, + "learning_rate": 1.3802572318037804e-06, + "loss": 0.5443, + "step": 12074 + }, + { + "epoch": 0.77, + "grad_norm": 0.8886858820915222, + "learning_rate": 1.379549522207697e-06, + "loss": 0.5806, + "step": 12075 + }, + { + "epoch": 0.77, + "grad_norm": 0.9202280640602112, + "learning_rate": 1.3788419650561908e-06, + "loss": 0.6229, + "step": 12076 + }, + { + "epoch": 0.77, + "grad_norm": 0.875375509262085, + "learning_rate": 1.3781345603790485e-06, + "loss": 0.4984, + "step": 12077 + }, + { + "epoch": 0.77, + "grad_norm": 0.8515941500663757, + "learning_rate": 1.3774273082060625e-06, + "loss": 0.5577, + "step": 12078 + }, + { + "epoch": 0.77, + "grad_norm": 0.9056531190872192, + "learning_rate": 1.3767202085670118e-06, + "loss": 0.6189, + "step": 12079 + }, + { + "epoch": 0.77, + "grad_norm": 0.877628743648529, + "learning_rate": 1.3760132614916672e-06, + "loss": 0.5966, + "step": 12080 + }, + { + "epoch": 0.77, + "grad_norm": 0.9349701404571533, + "learning_rate": 1.375306467009797e-06, + "loss": 0.64, + "step": 12081 + }, + { + "epoch": 0.77, + "grad_norm": 0.9644002914428711, + "learning_rate": 1.3745998251511622e-06, + "loss": 0.5834, + "step": 12082 + }, + { + "epoch": 0.77, + "grad_norm": 0.8802942037582397, + "learning_rate": 1.373893335945517e-06, + "loss": 0.5498, + "step": 12083 + }, + { + "epoch": 0.77, + "grad_norm": 0.936839759349823, + "learning_rate": 1.373186999422611e-06, + "loss": 0.5698, + "step": 12084 + }, + { + "epoch": 0.77, + "grad_norm": 0.8854506015777588, + "learning_rate": 1.3724808156121799e-06, + "loss": 0.5429, + "step": 12085 + }, + { + "epoch": 0.77, + "grad_norm": 0.882511556148529, + "learning_rate": 1.3717747845439645e-06, + "loss": 0.531, + "step": 12086 + }, + { + "epoch": 0.77, + "grad_norm": 0.8254367709159851, + "learning_rate": 1.371068906247693e-06, + "loss": 0.5357, + "step": 12087 + }, + { + "epoch": 0.77, + "grad_norm": 0.9685107469558716, + "learning_rate": 1.3703631807530831e-06, + "loss": 0.6293, + "step": 12088 + }, + { + "epoch": 0.77, + "grad_norm": 0.8970738649368286, + "learning_rate": 1.3696576080898538e-06, + "loss": 0.6471, + "step": 12089 + }, + { + "epoch": 0.77, + "grad_norm": 0.8906121253967285, + "learning_rate": 1.3689521882877137e-06, + "loss": 0.5589, + "step": 12090 + }, + { + "epoch": 0.77, + "grad_norm": 0.8889774084091187, + "learning_rate": 1.3682469213763655e-06, + "loss": 0.5734, + "step": 12091 + }, + { + "epoch": 0.77, + "grad_norm": 0.8506032228469849, + "learning_rate": 1.367541807385507e-06, + "loss": 0.5613, + "step": 12092 + }, + { + "epoch": 0.77, + "grad_norm": 0.9361366629600525, + "learning_rate": 1.3668368463448246e-06, + "loss": 0.5551, + "step": 12093 + }, + { + "epoch": 0.77, + "grad_norm": 0.867592990398407, + "learning_rate": 1.3661320382840026e-06, + "loss": 0.4914, + "step": 12094 + }, + { + "epoch": 0.77, + "grad_norm": 0.9990113973617554, + "learning_rate": 1.3654273832327219e-06, + "loss": 0.5793, + "step": 12095 + }, + { + "epoch": 0.77, + "grad_norm": 0.9370816946029663, + "learning_rate": 1.3647228812206493e-06, + "loss": 0.5923, + "step": 12096 + }, + { + "epoch": 0.77, + "grad_norm": 0.8764215111732483, + "learning_rate": 1.3640185322774495e-06, + "loss": 0.5852, + "step": 12097 + }, + { + "epoch": 0.77, + "grad_norm": 0.8534221053123474, + "learning_rate": 1.3633143364327812e-06, + "loss": 0.5271, + "step": 12098 + }, + { + "epoch": 0.77, + "grad_norm": 0.8506825566291809, + "learning_rate": 1.3626102937162943e-06, + "loss": 0.5034, + "step": 12099 + }, + { + "epoch": 0.77, + "grad_norm": 0.9237973690032959, + "learning_rate": 1.3619064041576368e-06, + "loss": 0.5503, + "step": 12100 + }, + { + "epoch": 0.77, + "grad_norm": 0.9016441106796265, + "learning_rate": 1.3612026677864426e-06, + "loss": 0.6057, + "step": 12101 + }, + { + "epoch": 0.77, + "grad_norm": 0.9441617727279663, + "learning_rate": 1.360499084632344e-06, + "loss": 0.6116, + "step": 12102 + }, + { + "epoch": 0.77, + "grad_norm": 0.8589941263198853, + "learning_rate": 1.3597956547249713e-06, + "loss": 0.5755, + "step": 12103 + }, + { + "epoch": 0.77, + "grad_norm": 0.8942433595657349, + "learning_rate": 1.3590923780939386e-06, + "loss": 0.575, + "step": 12104 + }, + { + "epoch": 0.77, + "grad_norm": 0.8741679787635803, + "learning_rate": 1.3583892547688598e-06, + "loss": 0.5754, + "step": 12105 + }, + { + "epoch": 0.77, + "grad_norm": 0.8796617984771729, + "learning_rate": 1.357686284779343e-06, + "loss": 0.5743, + "step": 12106 + }, + { + "epoch": 0.77, + "grad_norm": 0.8270777463912964, + "learning_rate": 1.3569834681549832e-06, + "loss": 0.5867, + "step": 12107 + }, + { + "epoch": 0.77, + "grad_norm": 0.8669138550758362, + "learning_rate": 1.3562808049253795e-06, + "loss": 0.575, + "step": 12108 + }, + { + "epoch": 0.77, + "grad_norm": 0.8774738311767578, + "learning_rate": 1.3555782951201134e-06, + "loss": 0.6112, + "step": 12109 + }, + { + "epoch": 0.77, + "grad_norm": 0.8828439712524414, + "learning_rate": 1.3548759387687683e-06, + "loss": 0.5896, + "step": 12110 + }, + { + "epoch": 0.77, + "grad_norm": 0.9196562170982361, + "learning_rate": 1.3541737359009161e-06, + "loss": 0.5742, + "step": 12111 + }, + { + "epoch": 0.77, + "grad_norm": 0.8660917282104492, + "learning_rate": 1.3534716865461256e-06, + "loss": 0.5293, + "step": 12112 + }, + { + "epoch": 0.77, + "grad_norm": 0.8525222539901733, + "learning_rate": 1.3527697907339565e-06, + "loss": 0.5479, + "step": 12113 + }, + { + "epoch": 0.77, + "grad_norm": 0.855554461479187, + "learning_rate": 1.3520680484939651e-06, + "loss": 0.5706, + "step": 12114 + }, + { + "epoch": 0.77, + "grad_norm": 1.0038961172103882, + "learning_rate": 1.3513664598556952e-06, + "loss": 0.5432, + "step": 12115 + }, + { + "epoch": 0.77, + "grad_norm": 0.8975916504859924, + "learning_rate": 1.3506650248486946e-06, + "loss": 0.5811, + "step": 12116 + }, + { + "epoch": 0.77, + "grad_norm": 1.0002868175506592, + "learning_rate": 1.3499637435024926e-06, + "loss": 0.5884, + "step": 12117 + }, + { + "epoch": 0.77, + "grad_norm": 0.9339752197265625, + "learning_rate": 1.34926261584662e-06, + "loss": 0.5654, + "step": 12118 + }, + { + "epoch": 0.77, + "grad_norm": 0.8323291540145874, + "learning_rate": 1.3485616419105985e-06, + "loss": 0.6116, + "step": 12119 + }, + { + "epoch": 0.77, + "grad_norm": 0.9814819097518921, + "learning_rate": 1.3478608217239435e-06, + "loss": 0.5862, + "step": 12120 + }, + { + "epoch": 0.77, + "grad_norm": 0.8952215313911438, + "learning_rate": 1.347160155316165e-06, + "loss": 0.5829, + "step": 12121 + }, + { + "epoch": 0.77, + "grad_norm": 0.9084662795066833, + "learning_rate": 1.3464596427167663e-06, + "loss": 0.574, + "step": 12122 + }, + { + "epoch": 0.77, + "grad_norm": 0.8473517298698425, + "learning_rate": 1.3457592839552409e-06, + "loss": 0.5339, + "step": 12123 + }, + { + "epoch": 0.77, + "grad_norm": 0.8757284879684448, + "learning_rate": 1.3450590790610795e-06, + "loss": 0.5858, + "step": 12124 + }, + { + "epoch": 0.77, + "grad_norm": 0.9462736248970032, + "learning_rate": 1.3443590280637664e-06, + "loss": 0.5508, + "step": 12125 + }, + { + "epoch": 0.77, + "grad_norm": 0.9609660506248474, + "learning_rate": 1.3436591309927772e-06, + "loss": 0.58, + "step": 12126 + }, + { + "epoch": 0.77, + "grad_norm": 0.8770208358764648, + "learning_rate": 1.3429593878775825e-06, + "loss": 0.5592, + "step": 12127 + }, + { + "epoch": 0.77, + "grad_norm": 0.9094352722167969, + "learning_rate": 1.342259798747646e-06, + "loss": 0.5542, + "step": 12128 + }, + { + "epoch": 0.77, + "grad_norm": 0.8885565996170044, + "learning_rate": 1.3415603636324248e-06, + "loss": 0.5852, + "step": 12129 + }, + { + "epoch": 0.77, + "grad_norm": 0.839444637298584, + "learning_rate": 1.3408610825613722e-06, + "loss": 0.5314, + "step": 12130 + }, + { + "epoch": 0.77, + "grad_norm": 0.9045486450195312, + "learning_rate": 1.340161955563928e-06, + "loss": 0.5268, + "step": 12131 + }, + { + "epoch": 0.77, + "grad_norm": 0.886461079120636, + "learning_rate": 1.339462982669531e-06, + "loss": 0.5484, + "step": 12132 + }, + { + "epoch": 0.77, + "grad_norm": 0.8416271805763245, + "learning_rate": 1.3387641639076165e-06, + "loss": 0.5461, + "step": 12133 + }, + { + "epoch": 0.77, + "grad_norm": 0.8772505521774292, + "learning_rate": 1.3380654993076054e-06, + "loss": 0.5332, + "step": 12134 + }, + { + "epoch": 0.77, + "grad_norm": 0.824565589427948, + "learning_rate": 1.3373669888989167e-06, + "loss": 0.5231, + "step": 12135 + }, + { + "epoch": 0.77, + "grad_norm": 0.8874905109405518, + "learning_rate": 1.3366686327109645e-06, + "loss": 0.5964, + "step": 12136 + }, + { + "epoch": 0.77, + "grad_norm": 0.9241152405738831, + "learning_rate": 1.3359704307731491e-06, + "loss": 0.563, + "step": 12137 + }, + { + "epoch": 0.77, + "grad_norm": 0.9180853366851807, + "learning_rate": 1.3352723831148761e-06, + "loss": 0.5685, + "step": 12138 + }, + { + "epoch": 0.77, + "grad_norm": 0.9550989270210266, + "learning_rate": 1.3345744897655327e-06, + "loss": 0.5835, + "step": 12139 + }, + { + "epoch": 0.77, + "grad_norm": 0.8818813562393188, + "learning_rate": 1.3338767507545064e-06, + "loss": 0.5739, + "step": 12140 + }, + { + "epoch": 0.77, + "grad_norm": 0.8973625302314758, + "learning_rate": 1.3331791661111765e-06, + "loss": 0.5607, + "step": 12141 + }, + { + "epoch": 0.77, + "grad_norm": 0.9165273904800415, + "learning_rate": 1.3324817358649162e-06, + "loss": 0.5993, + "step": 12142 + }, + { + "epoch": 0.77, + "grad_norm": 0.8530603647232056, + "learning_rate": 1.3317844600450912e-06, + "loss": 0.5488, + "step": 12143 + }, + { + "epoch": 0.77, + "grad_norm": 0.9110085964202881, + "learning_rate": 1.3310873386810641e-06, + "loss": 0.6053, + "step": 12144 + }, + { + "epoch": 0.77, + "grad_norm": 0.8425561189651489, + "learning_rate": 1.330390371802182e-06, + "loss": 0.5784, + "step": 12145 + }, + { + "epoch": 0.77, + "grad_norm": 0.9364494681358337, + "learning_rate": 1.3296935594377996e-06, + "loss": 0.6112, + "step": 12146 + }, + { + "epoch": 0.77, + "grad_norm": 0.9249874949455261, + "learning_rate": 1.3289969016172515e-06, + "loss": 0.6136, + "step": 12147 + }, + { + "epoch": 0.77, + "grad_norm": 0.8937221169471741, + "learning_rate": 1.3283003983698733e-06, + "loss": 0.5601, + "step": 12148 + }, + { + "epoch": 0.77, + "grad_norm": 0.9098302721977234, + "learning_rate": 1.3276040497249926e-06, + "loss": 0.5666, + "step": 12149 + }, + { + "epoch": 0.77, + "grad_norm": 0.862511396408081, + "learning_rate": 1.3269078557119297e-06, + "loss": 0.5542, + "step": 12150 + }, + { + "epoch": 0.77, + "grad_norm": 0.9537906646728516, + "learning_rate": 1.3262118163599992e-06, + "loss": 0.5299, + "step": 12151 + }, + { + "epoch": 0.77, + "grad_norm": 0.9067496657371521, + "learning_rate": 1.3255159316985105e-06, + "loss": 0.5696, + "step": 12152 + }, + { + "epoch": 0.77, + "grad_norm": 0.8889273405075073, + "learning_rate": 1.3248202017567624e-06, + "loss": 0.5981, + "step": 12153 + }, + { + "epoch": 0.77, + "grad_norm": 0.8882769346237183, + "learning_rate": 1.32412462656405e-06, + "loss": 0.61, + "step": 12154 + }, + { + "epoch": 0.77, + "grad_norm": 0.9011073112487793, + "learning_rate": 1.3234292061496622e-06, + "loss": 0.5695, + "step": 12155 + }, + { + "epoch": 0.77, + "grad_norm": 0.8439561724662781, + "learning_rate": 1.3227339405428807e-06, + "loss": 0.5706, + "step": 12156 + }, + { + "epoch": 0.77, + "grad_norm": 0.9296759366989136, + "learning_rate": 1.3220388297729825e-06, + "loss": 0.5967, + "step": 12157 + }, + { + "epoch": 0.77, + "grad_norm": 0.8398501873016357, + "learning_rate": 1.3213438738692313e-06, + "loss": 0.5288, + "step": 12158 + }, + { + "epoch": 0.77, + "grad_norm": 0.908816397190094, + "learning_rate": 1.320649072860894e-06, + "loss": 0.5917, + "step": 12159 + }, + { + "epoch": 0.77, + "grad_norm": 0.90097975730896, + "learning_rate": 1.3199544267772257e-06, + "loss": 0.5321, + "step": 12160 + }, + { + "epoch": 0.77, + "grad_norm": 0.899578869342804, + "learning_rate": 1.3192599356474733e-06, + "loss": 0.6484, + "step": 12161 + }, + { + "epoch": 0.77, + "grad_norm": 0.8797482252120972, + "learning_rate": 1.318565599500881e-06, + "loss": 0.6243, + "step": 12162 + }, + { + "epoch": 0.77, + "grad_norm": 0.873276948928833, + "learning_rate": 1.3178714183666846e-06, + "loss": 0.5852, + "step": 12163 + }, + { + "epoch": 0.77, + "grad_norm": 0.9565367102622986, + "learning_rate": 1.3171773922741132e-06, + "loss": 0.573, + "step": 12164 + }, + { + "epoch": 0.77, + "grad_norm": 0.8669590950012207, + "learning_rate": 1.316483521252392e-06, + "loss": 0.6123, + "step": 12165 + }, + { + "epoch": 0.77, + "grad_norm": 0.7968014478683472, + "learning_rate": 1.3157898053307322e-06, + "loss": 0.5062, + "step": 12166 + }, + { + "epoch": 0.77, + "grad_norm": 0.9194180369377136, + "learning_rate": 1.3150962445383492e-06, + "loss": 0.5859, + "step": 12167 + }, + { + "epoch": 0.77, + "grad_norm": 0.8830471038818359, + "learning_rate": 1.314402838904446e-06, + "loss": 0.5655, + "step": 12168 + }, + { + "epoch": 0.77, + "grad_norm": 0.9183891415596008, + "learning_rate": 1.3137095884582163e-06, + "loss": 0.6396, + "step": 12169 + }, + { + "epoch": 0.77, + "grad_norm": 0.8405441045761108, + "learning_rate": 1.3130164932288524e-06, + "loss": 0.5072, + "step": 12170 + }, + { + "epoch": 0.77, + "grad_norm": 0.9240639209747314, + "learning_rate": 1.3123235532455376e-06, + "loss": 0.6048, + "step": 12171 + }, + { + "epoch": 0.77, + "grad_norm": 0.8928464651107788, + "learning_rate": 1.3116307685374497e-06, + "loss": 0.5657, + "step": 12172 + }, + { + "epoch": 0.77, + "grad_norm": 0.8829059600830078, + "learning_rate": 1.3109381391337605e-06, + "loss": 0.5598, + "step": 12173 + }, + { + "epoch": 0.77, + "grad_norm": 0.8686420321464539, + "learning_rate": 1.3102456650636314e-06, + "loss": 0.6097, + "step": 12174 + }, + { + "epoch": 0.77, + "grad_norm": 0.9173951745033264, + "learning_rate": 1.3095533463562204e-06, + "loss": 0.6205, + "step": 12175 + }, + { + "epoch": 0.77, + "grad_norm": 0.9148120880126953, + "learning_rate": 1.3088611830406828e-06, + "loss": 0.5677, + "step": 12176 + }, + { + "epoch": 0.77, + "grad_norm": 0.8983868360519409, + "learning_rate": 1.3081691751461588e-06, + "loss": 0.5204, + "step": 12177 + }, + { + "epoch": 0.77, + "grad_norm": 0.9133874177932739, + "learning_rate": 1.3074773227017878e-06, + "loss": 0.601, + "step": 12178 + }, + { + "epoch": 0.77, + "grad_norm": 0.9384349584579468, + "learning_rate": 1.3067856257367018e-06, + "loss": 0.5376, + "step": 12179 + }, + { + "epoch": 0.77, + "grad_norm": 0.9111471772193909, + "learning_rate": 1.3060940842800247e-06, + "loss": 0.5794, + "step": 12180 + }, + { + "epoch": 0.77, + "grad_norm": 0.8366988897323608, + "learning_rate": 1.3054026983608776e-06, + "loss": 0.5528, + "step": 12181 + }, + { + "epoch": 0.77, + "grad_norm": 0.887912392616272, + "learning_rate": 1.3047114680083683e-06, + "loss": 0.544, + "step": 12182 + }, + { + "epoch": 0.77, + "grad_norm": 0.8583880662918091, + "learning_rate": 1.3040203932516043e-06, + "loss": 0.5857, + "step": 12183 + }, + { + "epoch": 0.77, + "grad_norm": 0.8937926888465881, + "learning_rate": 1.303329474119684e-06, + "loss": 0.5397, + "step": 12184 + }, + { + "epoch": 0.77, + "grad_norm": 0.9099065661430359, + "learning_rate": 1.3026387106417e-06, + "loss": 0.5527, + "step": 12185 + }, + { + "epoch": 0.77, + "grad_norm": 0.8292108774185181, + "learning_rate": 1.301948102846738e-06, + "loss": 0.5656, + "step": 12186 + }, + { + "epoch": 0.77, + "grad_norm": 0.8275082111358643, + "learning_rate": 1.301257650763878e-06, + "loss": 0.5818, + "step": 12187 + }, + { + "epoch": 0.77, + "grad_norm": 0.973997950553894, + "learning_rate": 1.3005673544221882e-06, + "loss": 0.6322, + "step": 12188 + }, + { + "epoch": 0.77, + "grad_norm": 0.8978073596954346, + "learning_rate": 1.299877213850741e-06, + "loss": 0.5376, + "step": 12189 + }, + { + "epoch": 0.77, + "grad_norm": 0.8951266407966614, + "learning_rate": 1.2991872290785906e-06, + "loss": 0.5989, + "step": 12190 + }, + { + "epoch": 0.77, + "grad_norm": 1.023902416229248, + "learning_rate": 1.2984974001347922e-06, + "loss": 0.5635, + "step": 12191 + }, + { + "epoch": 0.77, + "grad_norm": 0.8716408014297485, + "learning_rate": 1.2978077270483913e-06, + "loss": 0.5602, + "step": 12192 + }, + { + "epoch": 0.77, + "grad_norm": 0.865201473236084, + "learning_rate": 1.2971182098484286e-06, + "loss": 0.5535, + "step": 12193 + }, + { + "epoch": 0.77, + "grad_norm": 0.9294458031654358, + "learning_rate": 1.2964288485639366e-06, + "loss": 0.5739, + "step": 12194 + }, + { + "epoch": 0.77, + "grad_norm": 0.9854139089584351, + "learning_rate": 1.2957396432239427e-06, + "loss": 0.5758, + "step": 12195 + }, + { + "epoch": 0.77, + "grad_norm": 0.9392171502113342, + "learning_rate": 1.2950505938574643e-06, + "loss": 0.5966, + "step": 12196 + }, + { + "epoch": 0.77, + "grad_norm": 0.9069497585296631, + "learning_rate": 1.2943617004935176e-06, + "loss": 0.5433, + "step": 12197 + }, + { + "epoch": 0.77, + "grad_norm": 0.9116702079772949, + "learning_rate": 1.2936729631611106e-06, + "loss": 0.596, + "step": 12198 + }, + { + "epoch": 0.77, + "grad_norm": 0.9324621558189392, + "learning_rate": 1.2929843818892401e-06, + "loss": 0.5372, + "step": 12199 + }, + { + "epoch": 0.77, + "grad_norm": 0.8757584691047668, + "learning_rate": 1.2922959567069016e-06, + "loss": 0.5684, + "step": 12200 + }, + { + "epoch": 0.77, + "grad_norm": 0.9068610668182373, + "learning_rate": 1.2916076876430821e-06, + "loss": 0.6232, + "step": 12201 + }, + { + "epoch": 0.77, + "grad_norm": 0.8894833326339722, + "learning_rate": 1.2909195747267622e-06, + "loss": 0.6127, + "step": 12202 + }, + { + "epoch": 0.77, + "grad_norm": 0.8847464323043823, + "learning_rate": 1.2902316179869179e-06, + "loss": 0.5741, + "step": 12203 + }, + { + "epoch": 0.77, + "grad_norm": 0.9438949823379517, + "learning_rate": 1.2895438174525127e-06, + "loss": 0.6046, + "step": 12204 + }, + { + "epoch": 0.77, + "grad_norm": 0.8600268363952637, + "learning_rate": 1.288856173152509e-06, + "loss": 0.574, + "step": 12205 + }, + { + "epoch": 0.77, + "grad_norm": 0.9138484001159668, + "learning_rate": 1.2881686851158642e-06, + "loss": 0.6331, + "step": 12206 + }, + { + "epoch": 0.77, + "grad_norm": 0.8854186534881592, + "learning_rate": 1.287481353371522e-06, + "loss": 0.5998, + "step": 12207 + }, + { + "epoch": 0.77, + "grad_norm": 0.8981321454048157, + "learning_rate": 1.286794177948425e-06, + "loss": 0.6245, + "step": 12208 + }, + { + "epoch": 0.77, + "grad_norm": 0.9491480588912964, + "learning_rate": 1.286107158875508e-06, + "loss": 0.6036, + "step": 12209 + }, + { + "epoch": 0.77, + "grad_norm": 0.8554275631904602, + "learning_rate": 1.285420296181699e-06, + "loss": 0.5711, + "step": 12210 + }, + { + "epoch": 0.77, + "grad_norm": 0.8955265283584595, + "learning_rate": 1.2847335898959207e-06, + "loss": 0.5782, + "step": 12211 + }, + { + "epoch": 0.77, + "grad_norm": 0.8879252076148987, + "learning_rate": 1.284047040047085e-06, + "loss": 0.6219, + "step": 12212 + }, + { + "epoch": 0.77, + "grad_norm": 0.922939121723175, + "learning_rate": 1.2833606466641001e-06, + "loss": 0.6014, + "step": 12213 + }, + { + "epoch": 0.77, + "grad_norm": 0.8796207904815674, + "learning_rate": 1.282674409775872e-06, + "loss": 0.5638, + "step": 12214 + }, + { + "epoch": 0.77, + "grad_norm": 0.8656979203224182, + "learning_rate": 1.2819883294112918e-06, + "loss": 0.5692, + "step": 12215 + }, + { + "epoch": 0.77, + "grad_norm": 0.8584392666816711, + "learning_rate": 1.2813024055992486e-06, + "loss": 0.5548, + "step": 12216 + }, + { + "epoch": 0.77, + "grad_norm": 0.8941633105278015, + "learning_rate": 1.2806166383686258e-06, + "loss": 0.5758, + "step": 12217 + }, + { + "epoch": 0.77, + "grad_norm": 0.8649379014968872, + "learning_rate": 1.2799310277482952e-06, + "loss": 0.5681, + "step": 12218 + }, + { + "epoch": 0.77, + "grad_norm": 0.9806539416313171, + "learning_rate": 1.2792455737671306e-06, + "loss": 0.6265, + "step": 12219 + }, + { + "epoch": 0.77, + "grad_norm": 0.884787380695343, + "learning_rate": 1.27856027645399e-06, + "loss": 0.5289, + "step": 12220 + }, + { + "epoch": 0.77, + "grad_norm": 0.8743571043014526, + "learning_rate": 1.27787513583773e-06, + "loss": 0.5108, + "step": 12221 + }, + { + "epoch": 0.77, + "grad_norm": 0.8686521649360657, + "learning_rate": 1.2771901519471997e-06, + "loss": 0.5756, + "step": 12222 + }, + { + "epoch": 0.77, + "grad_norm": 0.830317497253418, + "learning_rate": 1.2765053248112414e-06, + "loss": 0.5872, + "step": 12223 + }, + { + "epoch": 0.77, + "grad_norm": 0.9129568338394165, + "learning_rate": 1.2758206544586909e-06, + "loss": 0.602, + "step": 12224 + }, + { + "epoch": 0.77, + "grad_norm": 0.9156956076622009, + "learning_rate": 1.2751361409183788e-06, + "loss": 0.6216, + "step": 12225 + }, + { + "epoch": 0.77, + "grad_norm": 0.831794261932373, + "learning_rate": 1.2744517842191228e-06, + "loss": 0.5739, + "step": 12226 + }, + { + "epoch": 0.77, + "grad_norm": 0.9067423343658447, + "learning_rate": 1.2737675843897452e-06, + "loss": 0.5913, + "step": 12227 + }, + { + "epoch": 0.77, + "grad_norm": 0.9277194142341614, + "learning_rate": 1.2730835414590498e-06, + "loss": 0.6272, + "step": 12228 + }, + { + "epoch": 0.77, + "grad_norm": 0.8326361775398254, + "learning_rate": 1.272399655455842e-06, + "loss": 0.5507, + "step": 12229 + }, + { + "epoch": 0.77, + "grad_norm": 0.8325486183166504, + "learning_rate": 1.2717159264089185e-06, + "loss": 0.5397, + "step": 12230 + }, + { + "epoch": 0.77, + "grad_norm": 0.9493119716644287, + "learning_rate": 1.2710323543470648e-06, + "loss": 0.5241, + "step": 12231 + }, + { + "epoch": 0.77, + "grad_norm": 0.9216598868370056, + "learning_rate": 1.2703489392990682e-06, + "loss": 0.5895, + "step": 12232 + }, + { + "epoch": 0.78, + "grad_norm": 0.9237011671066284, + "learning_rate": 1.2696656812937047e-06, + "loss": 0.6019, + "step": 12233 + }, + { + "epoch": 0.78, + "grad_norm": 0.8747559189796448, + "learning_rate": 1.268982580359741e-06, + "loss": 0.5257, + "step": 12234 + }, + { + "epoch": 0.78, + "grad_norm": 0.877472460269928, + "learning_rate": 1.2682996365259415e-06, + "loss": 0.5535, + "step": 12235 + }, + { + "epoch": 0.78, + "grad_norm": 0.8679016828536987, + "learning_rate": 1.2676168498210623e-06, + "loss": 0.5842, + "step": 12236 + }, + { + "epoch": 0.78, + "grad_norm": 0.8848540782928467, + "learning_rate": 1.2669342202738537e-06, + "loss": 0.5703, + "step": 12237 + }, + { + "epoch": 0.78, + "grad_norm": 0.8749752640724182, + "learning_rate": 1.2662517479130605e-06, + "loss": 0.5588, + "step": 12238 + }, + { + "epoch": 0.78, + "grad_norm": 0.9668585062026978, + "learning_rate": 1.2655694327674145e-06, + "loss": 0.5812, + "step": 12239 + }, + { + "epoch": 0.78, + "grad_norm": 0.9180838465690613, + "learning_rate": 1.2648872748656498e-06, + "loss": 0.5711, + "step": 12240 + }, + { + "epoch": 0.78, + "grad_norm": 0.9085766077041626, + "learning_rate": 1.2642052742364903e-06, + "loss": 0.5718, + "step": 12241 + }, + { + "epoch": 0.78, + "grad_norm": 0.8867596983909607, + "learning_rate": 1.2635234309086486e-06, + "loss": 0.5779, + "step": 12242 + }, + { + "epoch": 0.78, + "grad_norm": 0.9551423788070679, + "learning_rate": 1.2628417449108376e-06, + "loss": 0.6215, + "step": 12243 + }, + { + "epoch": 0.78, + "grad_norm": 0.9479497671127319, + "learning_rate": 1.2621602162717594e-06, + "loss": 0.5509, + "step": 12244 + }, + { + "epoch": 0.78, + "grad_norm": 0.8776799440383911, + "learning_rate": 1.261478845020112e-06, + "loss": 0.5186, + "step": 12245 + }, + { + "epoch": 0.78, + "grad_norm": 0.8843742609024048, + "learning_rate": 1.2607976311845865e-06, + "loss": 0.51, + "step": 12246 + }, + { + "epoch": 0.78, + "grad_norm": 0.8797292709350586, + "learning_rate": 1.2601165747938638e-06, + "loss": 0.5066, + "step": 12247 + }, + { + "epoch": 0.78, + "grad_norm": 0.9293206930160522, + "learning_rate": 1.2594356758766201e-06, + "loss": 0.542, + "step": 12248 + }, + { + "epoch": 0.78, + "grad_norm": 0.8314594030380249, + "learning_rate": 1.2587549344615308e-06, + "loss": 0.5139, + "step": 12249 + }, + { + "epoch": 0.78, + "grad_norm": 0.893222451210022, + "learning_rate": 1.2580743505772553e-06, + "loss": 0.5823, + "step": 12250 + }, + { + "epoch": 0.78, + "grad_norm": 0.8597607016563416, + "learning_rate": 1.2573939242524508e-06, + "loss": 0.5671, + "step": 12251 + }, + { + "epoch": 0.78, + "grad_norm": 0.8614668846130371, + "learning_rate": 1.2567136555157694e-06, + "loss": 0.5764, + "step": 12252 + }, + { + "epoch": 0.78, + "grad_norm": 0.8970388174057007, + "learning_rate": 1.2560335443958533e-06, + "loss": 0.5439, + "step": 12253 + }, + { + "epoch": 0.78, + "grad_norm": 0.8344459533691406, + "learning_rate": 1.2553535909213422e-06, + "loss": 0.5791, + "step": 12254 + }, + { + "epoch": 0.78, + "grad_norm": 0.8963201642036438, + "learning_rate": 1.254673795120863e-06, + "loss": 0.578, + "step": 12255 + }, + { + "epoch": 0.78, + "grad_norm": 0.9341084957122803, + "learning_rate": 1.2539941570230402e-06, + "loss": 0.547, + "step": 12256 + }, + { + "epoch": 0.78, + "grad_norm": 0.8028890490531921, + "learning_rate": 1.2533146766564946e-06, + "loss": 0.5386, + "step": 12257 + }, + { + "epoch": 0.78, + "grad_norm": 0.959701657295227, + "learning_rate": 1.252635354049833e-06, + "loss": 0.5884, + "step": 12258 + }, + { + "epoch": 0.78, + "grad_norm": 0.8597055077552795, + "learning_rate": 1.2519561892316606e-06, + "loss": 0.5718, + "step": 12259 + }, + { + "epoch": 0.78, + "grad_norm": 0.8948055505752563, + "learning_rate": 1.2512771822305742e-06, + "loss": 0.5758, + "step": 12260 + }, + { + "epoch": 0.78, + "grad_norm": 0.9038350582122803, + "learning_rate": 1.2505983330751654e-06, + "loss": 0.5624, + "step": 12261 + }, + { + "epoch": 0.78, + "grad_norm": 0.8576204776763916, + "learning_rate": 1.2499196417940168e-06, + "loss": 0.6144, + "step": 12262 + }, + { + "epoch": 0.78, + "grad_norm": 0.8939236998558044, + "learning_rate": 1.2492411084157086e-06, + "loss": 0.5711, + "step": 12263 + }, + { + "epoch": 0.78, + "grad_norm": 0.859489381313324, + "learning_rate": 1.2485627329688076e-06, + "loss": 0.5811, + "step": 12264 + }, + { + "epoch": 0.78, + "grad_norm": 0.834520161151886, + "learning_rate": 1.2478845154818798e-06, + "loss": 0.5552, + "step": 12265 + }, + { + "epoch": 0.78, + "grad_norm": 0.9146906733512878, + "learning_rate": 1.2472064559834818e-06, + "loss": 0.6202, + "step": 12266 + }, + { + "epoch": 0.78, + "grad_norm": 0.8180127739906311, + "learning_rate": 1.2465285545021655e-06, + "loss": 0.5558, + "step": 12267 + }, + { + "epoch": 0.78, + "grad_norm": 0.8662042021751404, + "learning_rate": 1.2458508110664758e-06, + "loss": 0.5808, + "step": 12268 + }, + { + "epoch": 0.78, + "grad_norm": 0.8688540458679199, + "learning_rate": 1.2451732257049458e-06, + "loss": 0.6074, + "step": 12269 + }, + { + "epoch": 0.78, + "grad_norm": 0.8783148527145386, + "learning_rate": 1.2444957984461103e-06, + "loss": 0.5658, + "step": 12270 + }, + { + "epoch": 0.78, + "grad_norm": 0.8849241733551025, + "learning_rate": 1.243818529318494e-06, + "loss": 0.6209, + "step": 12271 + }, + { + "epoch": 0.78, + "grad_norm": 0.8112246990203857, + "learning_rate": 1.2431414183506114e-06, + "loss": 0.5052, + "step": 12272 + }, + { + "epoch": 0.78, + "grad_norm": 0.9490674734115601, + "learning_rate": 1.2424644655709744e-06, + "loss": 0.5935, + "step": 12273 + }, + { + "epoch": 0.78, + "grad_norm": 0.892665445804596, + "learning_rate": 1.2417876710080872e-06, + "loss": 0.5661, + "step": 12274 + }, + { + "epoch": 0.78, + "grad_norm": 0.8753570318222046, + "learning_rate": 1.2411110346904471e-06, + "loss": 0.5632, + "step": 12275 + }, + { + "epoch": 0.78, + "grad_norm": 0.8728823661804199, + "learning_rate": 1.2404345566465464e-06, + "loss": 0.5697, + "step": 12276 + }, + { + "epoch": 0.78, + "grad_norm": 0.8521443009376526, + "learning_rate": 1.2397582369048672e-06, + "loss": 0.5444, + "step": 12277 + }, + { + "epoch": 0.78, + "grad_norm": 0.8864396214485168, + "learning_rate": 1.2390820754938859e-06, + "loss": 0.6033, + "step": 12278 + }, + { + "epoch": 0.78, + "grad_norm": 0.9159855842590332, + "learning_rate": 1.2384060724420776e-06, + "loss": 0.6399, + "step": 12279 + }, + { + "epoch": 0.78, + "grad_norm": 0.8791429400444031, + "learning_rate": 1.2377302277779029e-06, + "loss": 0.5495, + "step": 12280 + }, + { + "epoch": 0.78, + "grad_norm": 0.8864124417304993, + "learning_rate": 1.2370545415298207e-06, + "loss": 0.555, + "step": 12281 + }, + { + "epoch": 0.78, + "grad_norm": 0.8805786967277527, + "learning_rate": 1.236379013726281e-06, + "loss": 0.576, + "step": 12282 + }, + { + "epoch": 0.78, + "grad_norm": 0.9043353199958801, + "learning_rate": 1.2357036443957283e-06, + "loss": 0.5943, + "step": 12283 + }, + { + "epoch": 0.78, + "grad_norm": 0.8610161542892456, + "learning_rate": 1.2350284335666019e-06, + "loss": 0.5871, + "step": 12284 + }, + { + "epoch": 0.78, + "grad_norm": 0.8844594359397888, + "learning_rate": 1.2343533812673286e-06, + "loss": 0.5531, + "step": 12285 + }, + { + "epoch": 0.78, + "grad_norm": 0.9110302925109863, + "learning_rate": 1.2336784875263341e-06, + "loss": 0.5183, + "step": 12286 + }, + { + "epoch": 0.78, + "grad_norm": 0.9357644319534302, + "learning_rate": 1.233003752372039e-06, + "loss": 0.5973, + "step": 12287 + }, + { + "epoch": 0.78, + "grad_norm": 0.865262508392334, + "learning_rate": 1.23232917583285e-06, + "loss": 0.5508, + "step": 12288 + }, + { + "epoch": 0.78, + "grad_norm": 0.8521873354911804, + "learning_rate": 1.2316547579371724e-06, + "loss": 0.5563, + "step": 12289 + }, + { + "epoch": 0.78, + "grad_norm": 0.8528589010238647, + "learning_rate": 1.230980498713404e-06, + "loss": 0.5659, + "step": 12290 + }, + { + "epoch": 0.78, + "grad_norm": 0.9339171648025513, + "learning_rate": 1.2303063981899355e-06, + "loss": 0.5992, + "step": 12291 + }, + { + "epoch": 0.78, + "grad_norm": 0.8944584131240845, + "learning_rate": 1.2296324563951517e-06, + "loss": 0.5916, + "step": 12292 + }, + { + "epoch": 0.78, + "grad_norm": 0.8510122299194336, + "learning_rate": 1.2289586733574283e-06, + "loss": 0.5496, + "step": 12293 + }, + { + "epoch": 0.78, + "grad_norm": 0.9237475991249084, + "learning_rate": 1.2282850491051363e-06, + "loss": 0.5481, + "step": 12294 + }, + { + "epoch": 0.78, + "grad_norm": 0.8931830525398254, + "learning_rate": 1.2276115836666396e-06, + "loss": 0.5749, + "step": 12295 + }, + { + "epoch": 0.78, + "grad_norm": 0.9052478671073914, + "learning_rate": 1.2269382770702964e-06, + "loss": 0.5891, + "step": 12296 + }, + { + "epoch": 0.78, + "grad_norm": 0.8357118964195251, + "learning_rate": 1.2262651293444572e-06, + "loss": 0.552, + "step": 12297 + }, + { + "epoch": 0.78, + "grad_norm": 0.8756887912750244, + "learning_rate": 1.2255921405174664e-06, + "loss": 0.5766, + "step": 12298 + }, + { + "epoch": 0.78, + "grad_norm": 0.8769047260284424, + "learning_rate": 1.2249193106176578e-06, + "loss": 0.5404, + "step": 12299 + }, + { + "epoch": 0.78, + "grad_norm": 0.7944373488426208, + "learning_rate": 1.224246639673367e-06, + "loss": 0.5492, + "step": 12300 + }, + { + "epoch": 0.78, + "grad_norm": 0.904240071773529, + "learning_rate": 1.2235741277129143e-06, + "loss": 0.5788, + "step": 12301 + }, + { + "epoch": 0.78, + "grad_norm": 0.8664789795875549, + "learning_rate": 1.2229017747646178e-06, + "loss": 0.5623, + "step": 12302 + }, + { + "epoch": 0.78, + "grad_norm": 0.9359251260757446, + "learning_rate": 1.2222295808567874e-06, + "loss": 0.5974, + "step": 12303 + }, + { + "epoch": 0.78, + "grad_norm": 0.9124470353126526, + "learning_rate": 1.2215575460177282e-06, + "loss": 0.5919, + "step": 12304 + }, + { + "epoch": 0.78, + "grad_norm": 0.8797950148582458, + "learning_rate": 1.220885670275736e-06, + "loss": 0.5813, + "step": 12305 + }, + { + "epoch": 0.78, + "grad_norm": 0.9038450717926025, + "learning_rate": 1.2202139536591035e-06, + "loss": 0.5688, + "step": 12306 + }, + { + "epoch": 0.78, + "grad_norm": 0.8724334239959717, + "learning_rate": 1.2195423961961089e-06, + "loss": 0.5828, + "step": 12307 + }, + { + "epoch": 0.78, + "grad_norm": 0.8944981098175049, + "learning_rate": 1.2188709979150366e-06, + "loss": 0.589, + "step": 12308 + }, + { + "epoch": 0.78, + "grad_norm": 0.9792714715003967, + "learning_rate": 1.2181997588441507e-06, + "loss": 0.591, + "step": 12309 + }, + { + "epoch": 0.78, + "grad_norm": 0.8511162400245667, + "learning_rate": 1.2175286790117174e-06, + "loss": 0.5557, + "step": 12310 + }, + { + "epoch": 0.78, + "grad_norm": 0.8353309035301208, + "learning_rate": 1.2168577584459944e-06, + "loss": 0.5897, + "step": 12311 + }, + { + "epoch": 0.78, + "grad_norm": 0.9010828733444214, + "learning_rate": 1.2161869971752283e-06, + "loss": 0.6036, + "step": 12312 + }, + { + "epoch": 0.78, + "grad_norm": 0.9711151719093323, + "learning_rate": 1.2155163952276654e-06, + "loss": 0.626, + "step": 12313 + }, + { + "epoch": 0.78, + "grad_norm": 0.8932445049285889, + "learning_rate": 1.2148459526315442e-06, + "loss": 0.6017, + "step": 12314 + }, + { + "epoch": 0.78, + "grad_norm": 0.8985554575920105, + "learning_rate": 1.2141756694150903e-06, + "loss": 0.5927, + "step": 12315 + }, + { + "epoch": 0.78, + "grad_norm": 0.8908417820930481, + "learning_rate": 1.2135055456065292e-06, + "loss": 0.5449, + "step": 12316 + }, + { + "epoch": 0.78, + "grad_norm": 0.9284818768501282, + "learning_rate": 1.2128355812340776e-06, + "loss": 0.583, + "step": 12317 + }, + { + "epoch": 0.78, + "grad_norm": 0.9066043496131897, + "learning_rate": 1.2121657763259448e-06, + "loss": 0.5134, + "step": 12318 + }, + { + "epoch": 0.78, + "grad_norm": 0.8915185332298279, + "learning_rate": 1.211496130910334e-06, + "loss": 0.5315, + "step": 12319 + }, + { + "epoch": 0.78, + "grad_norm": 0.8998283743858337, + "learning_rate": 1.2108266450154422e-06, + "loss": 0.5922, + "step": 12320 + }, + { + "epoch": 0.78, + "grad_norm": 0.9139837622642517, + "learning_rate": 1.2101573186694587e-06, + "loss": 0.5897, + "step": 12321 + }, + { + "epoch": 0.78, + "grad_norm": 0.8933039307594299, + "learning_rate": 1.209488151900568e-06, + "loss": 0.5763, + "step": 12322 + }, + { + "epoch": 0.78, + "grad_norm": 0.9665024876594543, + "learning_rate": 1.2088191447369436e-06, + "loss": 0.6145, + "step": 12323 + }, + { + "epoch": 0.78, + "grad_norm": 0.9376837015151978, + "learning_rate": 1.2081502972067567e-06, + "loss": 0.5251, + "step": 12324 + }, + { + "epoch": 0.78, + "grad_norm": 0.8636725544929504, + "learning_rate": 1.2074816093381696e-06, + "loss": 0.5737, + "step": 12325 + }, + { + "epoch": 0.78, + "grad_norm": 0.8847807049751282, + "learning_rate": 1.2068130811593387e-06, + "loss": 0.5633, + "step": 12326 + }, + { + "epoch": 0.78, + "grad_norm": 0.8365536332130432, + "learning_rate": 1.2061447126984138e-06, + "loss": 0.5499, + "step": 12327 + }, + { + "epoch": 0.78, + "grad_norm": 0.9122150540351868, + "learning_rate": 1.2054765039835382e-06, + "loss": 0.5775, + "step": 12328 + }, + { + "epoch": 0.78, + "grad_norm": 0.8604024052619934, + "learning_rate": 1.2048084550428442e-06, + "loss": 0.5087, + "step": 12329 + }, + { + "epoch": 0.78, + "grad_norm": 0.9017311334609985, + "learning_rate": 1.2041405659044664e-06, + "loss": 0.5894, + "step": 12330 + }, + { + "epoch": 0.78, + "grad_norm": 0.8747026324272156, + "learning_rate": 1.203472836596523e-06, + "loss": 0.5661, + "step": 12331 + }, + { + "epoch": 0.78, + "grad_norm": 0.8790405988693237, + "learning_rate": 1.2028052671471318e-06, + "loss": 0.6278, + "step": 12332 + }, + { + "epoch": 0.78, + "grad_norm": 0.8900148868560791, + "learning_rate": 1.2021378575844005e-06, + "loss": 0.5758, + "step": 12333 + }, + { + "epoch": 0.78, + "grad_norm": 0.85635906457901, + "learning_rate": 1.201470607936433e-06, + "loss": 0.5376, + "step": 12334 + }, + { + "epoch": 0.78, + "grad_norm": 0.9126538038253784, + "learning_rate": 1.2008035182313237e-06, + "loss": 0.5883, + "step": 12335 + }, + { + "epoch": 0.78, + "grad_norm": 0.8496696949005127, + "learning_rate": 1.2001365884971634e-06, + "loss": 0.5212, + "step": 12336 + }, + { + "epoch": 0.78, + "grad_norm": 0.9417339563369751, + "learning_rate": 1.1994698187620297e-06, + "loss": 0.6001, + "step": 12337 + }, + { + "epoch": 0.78, + "grad_norm": 0.9529904127120972, + "learning_rate": 1.1988032090540036e-06, + "loss": 0.6143, + "step": 12338 + }, + { + "epoch": 0.78, + "grad_norm": 0.8431176543235779, + "learning_rate": 1.1981367594011496e-06, + "loss": 0.5776, + "step": 12339 + }, + { + "epoch": 0.78, + "grad_norm": 0.8899187445640564, + "learning_rate": 1.1974704698315309e-06, + "loss": 0.5197, + "step": 12340 + }, + { + "epoch": 0.78, + "grad_norm": 0.8935467004776001, + "learning_rate": 1.1968043403732044e-06, + "loss": 0.5667, + "step": 12341 + }, + { + "epoch": 0.78, + "grad_norm": 0.9127129316329956, + "learning_rate": 1.1961383710542135e-06, + "loss": 0.5768, + "step": 12342 + }, + { + "epoch": 0.78, + "grad_norm": 0.8471307754516602, + "learning_rate": 1.1954725619026048e-06, + "loss": 0.5569, + "step": 12343 + }, + { + "epoch": 0.78, + "grad_norm": 0.7840201258659363, + "learning_rate": 1.1948069129464128e-06, + "loss": 0.572, + "step": 12344 + }, + { + "epoch": 0.78, + "grad_norm": 0.9100561738014221, + "learning_rate": 1.1941414242136635e-06, + "loss": 0.5413, + "step": 12345 + }, + { + "epoch": 0.78, + "grad_norm": 0.910991370677948, + "learning_rate": 1.1934760957323782e-06, + "loss": 0.569, + "step": 12346 + }, + { + "epoch": 0.78, + "grad_norm": 0.9393359422683716, + "learning_rate": 1.1928109275305734e-06, + "loss": 0.6035, + "step": 12347 + }, + { + "epoch": 0.78, + "grad_norm": 0.8058403134346008, + "learning_rate": 1.1921459196362562e-06, + "loss": 0.5626, + "step": 12348 + }, + { + "epoch": 0.78, + "grad_norm": 0.8618589639663696, + "learning_rate": 1.1914810720774289e-06, + "loss": 0.5869, + "step": 12349 + }, + { + "epoch": 0.78, + "grad_norm": 0.924644410610199, + "learning_rate": 1.190816384882082e-06, + "loss": 0.6129, + "step": 12350 + }, + { + "epoch": 0.78, + "grad_norm": 0.8994777202606201, + "learning_rate": 1.1901518580782073e-06, + "loss": 0.5791, + "step": 12351 + }, + { + "epoch": 0.78, + "grad_norm": 0.9266136288642883, + "learning_rate": 1.1894874916937855e-06, + "loss": 0.6058, + "step": 12352 + }, + { + "epoch": 0.78, + "grad_norm": 0.854226291179657, + "learning_rate": 1.1888232857567888e-06, + "loss": 0.5411, + "step": 12353 + }, + { + "epoch": 0.78, + "grad_norm": 0.9290048480033875, + "learning_rate": 1.1881592402951853e-06, + "loss": 0.5852, + "step": 12354 + }, + { + "epoch": 0.78, + "grad_norm": 0.9401707649230957, + "learning_rate": 1.1874953553369351e-06, + "loss": 0.5687, + "step": 12355 + }, + { + "epoch": 0.78, + "grad_norm": 0.8896312713623047, + "learning_rate": 1.1868316309099937e-06, + "loss": 0.5607, + "step": 12356 + }, + { + "epoch": 0.78, + "grad_norm": 0.9019138216972351, + "learning_rate": 1.186168067042308e-06, + "loss": 0.6016, + "step": 12357 + }, + { + "epoch": 0.78, + "grad_norm": 0.8993564248085022, + "learning_rate": 1.1855046637618168e-06, + "loss": 0.6566, + "step": 12358 + }, + { + "epoch": 0.78, + "grad_norm": 0.9466603994369507, + "learning_rate": 1.1848414210964526e-06, + "loss": 0.6339, + "step": 12359 + }, + { + "epoch": 0.78, + "grad_norm": 0.9109975099563599, + "learning_rate": 1.1841783390741473e-06, + "loss": 0.5418, + "step": 12360 + }, + { + "epoch": 0.78, + "grad_norm": 0.963789701461792, + "learning_rate": 1.1835154177228165e-06, + "loss": 0.5697, + "step": 12361 + }, + { + "epoch": 0.78, + "grad_norm": 0.9295591115951538, + "learning_rate": 1.1828526570703747e-06, + "loss": 0.6339, + "step": 12362 + }, + { + "epoch": 0.78, + "grad_norm": 0.9075334668159485, + "learning_rate": 1.1821900571447286e-06, + "loss": 0.5751, + "step": 12363 + }, + { + "epoch": 0.78, + "grad_norm": 0.8823397159576416, + "learning_rate": 1.1815276179737778e-06, + "loss": 0.5023, + "step": 12364 + }, + { + "epoch": 0.78, + "grad_norm": 0.8924038410186768, + "learning_rate": 1.1808653395854174e-06, + "loss": 0.5934, + "step": 12365 + }, + { + "epoch": 0.78, + "grad_norm": 0.8544875979423523, + "learning_rate": 1.1802032220075299e-06, + "loss": 0.5516, + "step": 12366 + }, + { + "epoch": 0.78, + "grad_norm": 0.9305124282836914, + "learning_rate": 1.1795412652679955e-06, + "loss": 0.5981, + "step": 12367 + }, + { + "epoch": 0.78, + "grad_norm": 0.8870397806167603, + "learning_rate": 1.178879469394691e-06, + "loss": 0.5729, + "step": 12368 + }, + { + "epoch": 0.78, + "grad_norm": 0.9008588194847107, + "learning_rate": 1.1782178344154776e-06, + "loss": 0.5062, + "step": 12369 + }, + { + "epoch": 0.78, + "grad_norm": 0.8877758383750916, + "learning_rate": 1.1775563603582162e-06, + "loss": 0.5799, + "step": 12370 + }, + { + "epoch": 0.78, + "grad_norm": 0.882616400718689, + "learning_rate": 1.1768950472507605e-06, + "loss": 0.5285, + "step": 12371 + }, + { + "epoch": 0.78, + "grad_norm": 0.9182950854301453, + "learning_rate": 1.1762338951209524e-06, + "loss": 0.6087, + "step": 12372 + }, + { + "epoch": 0.78, + "grad_norm": 0.9180057048797607, + "learning_rate": 1.1755729039966358e-06, + "loss": 0.5914, + "step": 12373 + }, + { + "epoch": 0.78, + "grad_norm": 0.8797052502632141, + "learning_rate": 1.174912073905638e-06, + "loss": 0.5412, + "step": 12374 + }, + { + "epoch": 0.78, + "grad_norm": 0.8645214438438416, + "learning_rate": 1.174251404875787e-06, + "loss": 0.564, + "step": 12375 + }, + { + "epoch": 0.78, + "grad_norm": 0.8359499573707581, + "learning_rate": 1.1735908969349002e-06, + "loss": 0.546, + "step": 12376 + }, + { + "epoch": 0.78, + "grad_norm": 0.8409955501556396, + "learning_rate": 1.1729305501107897e-06, + "loss": 0.5585, + "step": 12377 + }, + { + "epoch": 0.78, + "grad_norm": 0.8773937821388245, + "learning_rate": 1.1722703644312599e-06, + "loss": 0.5409, + "step": 12378 + }, + { + "epoch": 0.78, + "grad_norm": 0.8302714824676514, + "learning_rate": 1.1716103399241113e-06, + "loss": 0.5417, + "step": 12379 + }, + { + "epoch": 0.78, + "grad_norm": 0.8925114870071411, + "learning_rate": 1.1709504766171298e-06, + "loss": 0.5455, + "step": 12380 + }, + { + "epoch": 0.78, + "grad_norm": 0.8733065724372864, + "learning_rate": 1.170290774538107e-06, + "loss": 0.6242, + "step": 12381 + }, + { + "epoch": 0.78, + "grad_norm": 0.8538454174995422, + "learning_rate": 1.1696312337148152e-06, + "loss": 0.5389, + "step": 12382 + }, + { + "epoch": 0.78, + "grad_norm": 0.9496648907661438, + "learning_rate": 1.1689718541750278e-06, + "loss": 0.6046, + "step": 12383 + }, + { + "epoch": 0.78, + "grad_norm": 0.9294766187667847, + "learning_rate": 1.168312635946508e-06, + "loss": 0.6338, + "step": 12384 + }, + { + "epoch": 0.78, + "grad_norm": 0.8941949605941772, + "learning_rate": 1.1676535790570137e-06, + "loss": 0.5433, + "step": 12385 + }, + { + "epoch": 0.78, + "grad_norm": 0.9269332885742188, + "learning_rate": 1.1669946835342956e-06, + "loss": 0.5374, + "step": 12386 + }, + { + "epoch": 0.78, + "grad_norm": 0.9914242029190063, + "learning_rate": 1.1663359494060983e-06, + "loss": 0.6343, + "step": 12387 + }, + { + "epoch": 0.78, + "grad_norm": 0.8914863467216492, + "learning_rate": 1.1656773767001566e-06, + "loss": 0.6055, + "step": 12388 + }, + { + "epoch": 0.78, + "grad_norm": 0.9513469934463501, + "learning_rate": 1.1650189654442024e-06, + "loss": 0.5674, + "step": 12389 + }, + { + "epoch": 0.78, + "grad_norm": 0.891490638256073, + "learning_rate": 1.1643607156659582e-06, + "loss": 0.5888, + "step": 12390 + }, + { + "epoch": 0.79, + "grad_norm": 0.9232016205787659, + "learning_rate": 1.1637026273931413e-06, + "loss": 0.5609, + "step": 12391 + }, + { + "epoch": 0.79, + "grad_norm": 0.9406624436378479, + "learning_rate": 1.1630447006534606e-06, + "loss": 0.5978, + "step": 12392 + }, + { + "epoch": 0.79, + "grad_norm": 0.9257485866546631, + "learning_rate": 1.1623869354746203e-06, + "loss": 0.5876, + "step": 12393 + }, + { + "epoch": 0.79, + "grad_norm": 0.985722541809082, + "learning_rate": 1.1617293318843164e-06, + "loss": 0.6456, + "step": 12394 + }, + { + "epoch": 0.79, + "grad_norm": 0.8931677341461182, + "learning_rate": 1.1610718899102392e-06, + "loss": 0.5579, + "step": 12395 + }, + { + "epoch": 0.79, + "grad_norm": 0.9458318948745728, + "learning_rate": 1.1604146095800684e-06, + "loss": 0.5987, + "step": 12396 + }, + { + "epoch": 0.79, + "grad_norm": 0.8708502650260925, + "learning_rate": 1.1597574909214808e-06, + "loss": 0.6126, + "step": 12397 + }, + { + "epoch": 0.79, + "grad_norm": 0.937856912612915, + "learning_rate": 1.159100533962147e-06, + "loss": 0.5574, + "step": 12398 + }, + { + "epoch": 0.79, + "grad_norm": 0.9945967793464661, + "learning_rate": 1.1584437387297283e-06, + "loss": 0.6743, + "step": 12399 + }, + { + "epoch": 0.79, + "grad_norm": 0.8693322539329529, + "learning_rate": 1.157787105251879e-06, + "loss": 0.5495, + "step": 12400 + }, + { + "epoch": 0.79, + "grad_norm": 0.8383262157440186, + "learning_rate": 1.157130633556251e-06, + "loss": 0.4967, + "step": 12401 + }, + { + "epoch": 0.79, + "grad_norm": 0.9006306529045105, + "learning_rate": 1.1564743236704801e-06, + "loss": 0.5194, + "step": 12402 + }, + { + "epoch": 0.79, + "grad_norm": 0.909695565700531, + "learning_rate": 1.1558181756222081e-06, + "loss": 0.5596, + "step": 12403 + }, + { + "epoch": 0.79, + "grad_norm": 0.9043020009994507, + "learning_rate": 1.1551621894390586e-06, + "loss": 0.5476, + "step": 12404 + }, + { + "epoch": 0.79, + "grad_norm": 0.9523054957389832, + "learning_rate": 1.1545063651486533e-06, + "loss": 0.6002, + "step": 12405 + }, + { + "epoch": 0.79, + "grad_norm": 0.8855312466621399, + "learning_rate": 1.1538507027786077e-06, + "loss": 0.6329, + "step": 12406 + }, + { + "epoch": 0.79, + "grad_norm": 0.8605347275733948, + "learning_rate": 1.1531952023565295e-06, + "loss": 0.5569, + "step": 12407 + }, + { + "epoch": 0.79, + "grad_norm": 0.8833476901054382, + "learning_rate": 1.1525398639100194e-06, + "loss": 0.5713, + "step": 12408 + }, + { + "epoch": 0.79, + "grad_norm": 0.9746968746185303, + "learning_rate": 1.1518846874666723e-06, + "loss": 0.5846, + "step": 12409 + }, + { + "epoch": 0.79, + "grad_norm": 0.9625856280326843, + "learning_rate": 1.1512296730540717e-06, + "loss": 0.5859, + "step": 12410 + }, + { + "epoch": 0.79, + "grad_norm": 0.8508451581001282, + "learning_rate": 1.1505748206998036e-06, + "loss": 0.5348, + "step": 12411 + }, + { + "epoch": 0.79, + "grad_norm": 0.8078241944313049, + "learning_rate": 1.1499201304314372e-06, + "loss": 0.5757, + "step": 12412 + }, + { + "epoch": 0.79, + "grad_norm": 0.9019641280174255, + "learning_rate": 1.149265602276541e-06, + "loss": 0.6191, + "step": 12413 + }, + { + "epoch": 0.79, + "grad_norm": 0.9142687320709229, + "learning_rate": 1.1486112362626738e-06, + "loss": 0.6163, + "step": 12414 + }, + { + "epoch": 0.79, + "grad_norm": 0.8982365131378174, + "learning_rate": 1.14795703241739e-06, + "loss": 0.5845, + "step": 12415 + }, + { + "epoch": 0.79, + "grad_norm": 0.8687730431556702, + "learning_rate": 1.1473029907682348e-06, + "loss": 0.497, + "step": 12416 + }, + { + "epoch": 0.79, + "grad_norm": 0.9555364847183228, + "learning_rate": 1.1466491113427503e-06, + "loss": 0.5448, + "step": 12417 + }, + { + "epoch": 0.79, + "grad_norm": 0.9418555498123169, + "learning_rate": 1.1459953941684648e-06, + "loss": 0.5793, + "step": 12418 + }, + { + "epoch": 0.79, + "grad_norm": 0.8871247172355652, + "learning_rate": 1.1453418392729065e-06, + "loss": 0.5731, + "step": 12419 + }, + { + "epoch": 0.79, + "grad_norm": 0.9137433171272278, + "learning_rate": 1.1446884466835933e-06, + "loss": 0.6143, + "step": 12420 + }, + { + "epoch": 0.79, + "grad_norm": 0.9435691237449646, + "learning_rate": 1.1440352164280388e-06, + "loss": 0.5642, + "step": 12421 + }, + { + "epoch": 0.79, + "grad_norm": 0.8553792834281921, + "learning_rate": 1.1433821485337487e-06, + "loss": 0.55, + "step": 12422 + }, + { + "epoch": 0.79, + "grad_norm": 0.8792483806610107, + "learning_rate": 1.1427292430282165e-06, + "loss": 0.5809, + "step": 12423 + }, + { + "epoch": 0.79, + "grad_norm": 0.8634614944458008, + "learning_rate": 1.14207649993894e-06, + "loss": 0.5819, + "step": 12424 + }, + { + "epoch": 0.79, + "grad_norm": 0.904400110244751, + "learning_rate": 1.1414239192934019e-06, + "loss": 0.5542, + "step": 12425 + }, + { + "epoch": 0.79, + "grad_norm": 0.9054979085922241, + "learning_rate": 1.1407715011190784e-06, + "loss": 0.5524, + "step": 12426 + }, + { + "epoch": 0.79, + "grad_norm": 0.9589418768882751, + "learning_rate": 1.1401192454434418e-06, + "loss": 0.5837, + "step": 12427 + }, + { + "epoch": 0.79, + "grad_norm": 0.9193122386932373, + "learning_rate": 1.139467152293956e-06, + "loss": 0.5805, + "step": 12428 + }, + { + "epoch": 0.79, + "grad_norm": 0.9566795825958252, + "learning_rate": 1.138815221698079e-06, + "loss": 0.6101, + "step": 12429 + }, + { + "epoch": 0.79, + "grad_norm": 0.8651120662689209, + "learning_rate": 1.138163453683262e-06, + "loss": 0.5513, + "step": 12430 + }, + { + "epoch": 0.79, + "grad_norm": 0.8350364565849304, + "learning_rate": 1.1375118482769447e-06, + "loss": 0.5298, + "step": 12431 + }, + { + "epoch": 0.79, + "grad_norm": 0.8395058512687683, + "learning_rate": 1.136860405506569e-06, + "loss": 0.5459, + "step": 12432 + }, + { + "epoch": 0.79, + "grad_norm": 0.8627316951751709, + "learning_rate": 1.1362091253995632e-06, + "loss": 0.5212, + "step": 12433 + }, + { + "epoch": 0.79, + "grad_norm": 0.8814694285392761, + "learning_rate": 1.1355580079833496e-06, + "loss": 0.5991, + "step": 12434 + }, + { + "epoch": 0.79, + "grad_norm": 0.8934352397918701, + "learning_rate": 1.134907053285344e-06, + "loss": 0.5512, + "step": 12435 + }, + { + "epoch": 0.79, + "grad_norm": 0.9029948711395264, + "learning_rate": 1.1342562613329571e-06, + "loss": 0.5956, + "step": 12436 + }, + { + "epoch": 0.79, + "grad_norm": 0.8928791284561157, + "learning_rate": 1.133605632153591e-06, + "loss": 0.5683, + "step": 12437 + }, + { + "epoch": 0.79, + "grad_norm": 0.9214066863059998, + "learning_rate": 1.1329551657746429e-06, + "loss": 0.6537, + "step": 12438 + }, + { + "epoch": 0.79, + "grad_norm": 0.9235839247703552, + "learning_rate": 1.132304862223499e-06, + "loss": 0.587, + "step": 12439 + }, + { + "epoch": 0.79, + "grad_norm": 0.8503764271736145, + "learning_rate": 1.1316547215275409e-06, + "loss": 0.6163, + "step": 12440 + }, + { + "epoch": 0.79, + "grad_norm": 0.8700659275054932, + "learning_rate": 1.1310047437141485e-06, + "loss": 0.5875, + "step": 12441 + }, + { + "epoch": 0.79, + "grad_norm": 0.9010173678398132, + "learning_rate": 1.1303549288106857e-06, + "loss": 0.5572, + "step": 12442 + }, + { + "epoch": 0.79, + "grad_norm": 0.906274676322937, + "learning_rate": 1.1297052768445154e-06, + "loss": 0.5747, + "step": 12443 + }, + { + "epoch": 0.79, + "grad_norm": 0.9110028147697449, + "learning_rate": 1.129055787842992e-06, + "loss": 0.5646, + "step": 12444 + }, + { + "epoch": 0.79, + "grad_norm": 0.9146189093589783, + "learning_rate": 1.1284064618334634e-06, + "loss": 0.5341, + "step": 12445 + }, + { + "epoch": 0.79, + "grad_norm": 0.9614166021347046, + "learning_rate": 1.1277572988432716e-06, + "loss": 0.5856, + "step": 12446 + }, + { + "epoch": 0.79, + "grad_norm": 0.8432893753051758, + "learning_rate": 1.1271082988997485e-06, + "loss": 0.5756, + "step": 12447 + }, + { + "epoch": 0.79, + "grad_norm": 0.9383045434951782, + "learning_rate": 1.1264594620302216e-06, + "loss": 0.6037, + "step": 12448 + }, + { + "epoch": 0.79, + "grad_norm": 0.8760130405426025, + "learning_rate": 1.1258107882620117e-06, + "loss": 0.5568, + "step": 12449 + }, + { + "epoch": 0.79, + "grad_norm": 0.9355485439300537, + "learning_rate": 1.1251622776224325e-06, + "loss": 0.5927, + "step": 12450 + }, + { + "epoch": 0.79, + "grad_norm": 0.9326279759407043, + "learning_rate": 1.1245139301387903e-06, + "loss": 0.5338, + "step": 12451 + }, + { + "epoch": 0.79, + "grad_norm": 0.9106242060661316, + "learning_rate": 1.1238657458383857e-06, + "loss": 0.5986, + "step": 12452 + }, + { + "epoch": 0.79, + "grad_norm": 0.9031466841697693, + "learning_rate": 1.1232177247485076e-06, + "loss": 0.5867, + "step": 12453 + }, + { + "epoch": 0.79, + "grad_norm": 0.8840919137001038, + "learning_rate": 1.122569866896448e-06, + "loss": 0.5369, + "step": 12454 + }, + { + "epoch": 0.79, + "grad_norm": 0.875639021396637, + "learning_rate": 1.1219221723094815e-06, + "loss": 0.5746, + "step": 12455 + }, + { + "epoch": 0.79, + "grad_norm": 0.9193491339683533, + "learning_rate": 1.1212746410148807e-06, + "loss": 0.6223, + "step": 12456 + }, + { + "epoch": 0.79, + "grad_norm": 0.9045842885971069, + "learning_rate": 1.120627273039912e-06, + "loss": 0.6001, + "step": 12457 + }, + { + "epoch": 0.79, + "grad_norm": 0.9147197008132935, + "learning_rate": 1.119980068411834e-06, + "loss": 0.5921, + "step": 12458 + }, + { + "epoch": 0.79, + "grad_norm": 0.9671141505241394, + "learning_rate": 1.1193330271578968e-06, + "loss": 0.6136, + "step": 12459 + }, + { + "epoch": 0.79, + "grad_norm": 0.8843987584114075, + "learning_rate": 1.118686149305348e-06, + "loss": 0.5372, + "step": 12460 + }, + { + "epoch": 0.79, + "grad_norm": 0.9339917302131653, + "learning_rate": 1.1180394348814206e-06, + "loss": 0.5736, + "step": 12461 + }, + { + "epoch": 0.79, + "grad_norm": 0.8633296489715576, + "learning_rate": 1.117392883913349e-06, + "loss": 0.5683, + "step": 12462 + }, + { + "epoch": 0.79, + "grad_norm": 0.856465220451355, + "learning_rate": 1.1167464964283587e-06, + "loss": 0.5533, + "step": 12463 + }, + { + "epoch": 0.79, + "grad_norm": 0.851737916469574, + "learning_rate": 1.1161002724536623e-06, + "loss": 0.5454, + "step": 12464 + }, + { + "epoch": 0.79, + "grad_norm": 0.897743821144104, + "learning_rate": 1.115454212016473e-06, + "loss": 0.5616, + "step": 12465 + }, + { + "epoch": 0.79, + "grad_norm": 0.8601440191268921, + "learning_rate": 1.1148083151439932e-06, + "loss": 0.5598, + "step": 12466 + }, + { + "epoch": 0.79, + "grad_norm": 0.913490891456604, + "learning_rate": 1.1141625818634194e-06, + "loss": 0.5871, + "step": 12467 + }, + { + "epoch": 0.79, + "grad_norm": 0.9724521636962891, + "learning_rate": 1.1135170122019433e-06, + "loss": 0.6231, + "step": 12468 + }, + { + "epoch": 0.79, + "grad_norm": 0.9473910927772522, + "learning_rate": 1.112871606186744e-06, + "loss": 0.5913, + "step": 12469 + }, + { + "epoch": 0.79, + "grad_norm": 0.902228593826294, + "learning_rate": 1.112226363844998e-06, + "loss": 0.6149, + "step": 12470 + }, + { + "epoch": 0.79, + "grad_norm": 0.8123179078102112, + "learning_rate": 1.1115812852038777e-06, + "loss": 0.4786, + "step": 12471 + }, + { + "epoch": 0.79, + "grad_norm": 0.8619558811187744, + "learning_rate": 1.1109363702905419e-06, + "loss": 0.6023, + "step": 12472 + }, + { + "epoch": 0.79, + "grad_norm": 0.9255017042160034, + "learning_rate": 1.1102916191321456e-06, + "loss": 0.5984, + "step": 12473 + }, + { + "epoch": 0.79, + "grad_norm": 0.8794713616371155, + "learning_rate": 1.1096470317558384e-06, + "loss": 0.5654, + "step": 12474 + }, + { + "epoch": 0.79, + "grad_norm": 0.9066189527511597, + "learning_rate": 1.1090026081887611e-06, + "loss": 0.5686, + "step": 12475 + }, + { + "epoch": 0.79, + "grad_norm": 0.8906106352806091, + "learning_rate": 1.1083583484580495e-06, + "loss": 0.5434, + "step": 12476 + }, + { + "epoch": 0.79, + "grad_norm": 0.8132662177085876, + "learning_rate": 1.107714252590828e-06, + "loss": 0.5387, + "step": 12477 + }, + { + "epoch": 0.79, + "grad_norm": 0.8702597618103027, + "learning_rate": 1.1070703206142186e-06, + "loss": 0.5839, + "step": 12478 + }, + { + "epoch": 0.79, + "grad_norm": 0.9468421339988708, + "learning_rate": 1.1064265525553375e-06, + "loss": 0.6534, + "step": 12479 + }, + { + "epoch": 0.79, + "grad_norm": 0.8762499690055847, + "learning_rate": 1.1057829484412885e-06, + "loss": 0.5579, + "step": 12480 + }, + { + "epoch": 0.79, + "grad_norm": 0.9076083302497864, + "learning_rate": 1.1051395082991722e-06, + "loss": 0.5715, + "step": 12481 + }, + { + "epoch": 0.79, + "grad_norm": 0.8767365217208862, + "learning_rate": 1.1044962321560837e-06, + "loss": 0.5713, + "step": 12482 + }, + { + "epoch": 0.79, + "grad_norm": 0.864399254322052, + "learning_rate": 1.1038531200391045e-06, + "loss": 0.6256, + "step": 12483 + }, + { + "epoch": 0.79, + "grad_norm": 0.8748794794082642, + "learning_rate": 1.1032101719753197e-06, + "loss": 0.5357, + "step": 12484 + }, + { + "epoch": 0.79, + "grad_norm": 0.8488878607749939, + "learning_rate": 1.102567387991797e-06, + "loss": 0.5536, + "step": 12485 + }, + { + "epoch": 0.79, + "grad_norm": 0.8816309571266174, + "learning_rate": 1.101924768115603e-06, + "loss": 0.5564, + "step": 12486 + }, + { + "epoch": 0.79, + "grad_norm": 0.8800366520881653, + "learning_rate": 1.101282312373797e-06, + "loss": 0.5739, + "step": 12487 + }, + { + "epoch": 0.79, + "grad_norm": 0.9694901704788208, + "learning_rate": 1.1006400207934304e-06, + "loss": 0.6166, + "step": 12488 + }, + { + "epoch": 0.79, + "grad_norm": 0.9245271682739258, + "learning_rate": 1.0999978934015475e-06, + "loss": 0.5898, + "step": 12489 + }, + { + "epoch": 0.79, + "grad_norm": 0.9495474100112915, + "learning_rate": 1.0993559302251878e-06, + "loss": 0.6334, + "step": 12490 + }, + { + "epoch": 0.79, + "grad_norm": 0.9290176630020142, + "learning_rate": 1.0987141312913773e-06, + "loss": 0.583, + "step": 12491 + }, + { + "epoch": 0.79, + "grad_norm": 0.9221222400665283, + "learning_rate": 1.098072496627146e-06, + "loss": 0.6511, + "step": 12492 + }, + { + "epoch": 0.79, + "grad_norm": 0.9218513369560242, + "learning_rate": 1.0974310262595067e-06, + "loss": 0.6061, + "step": 12493 + }, + { + "epoch": 0.79, + "grad_norm": 0.9661274552345276, + "learning_rate": 1.096789720215471e-06, + "loss": 0.682, + "step": 12494 + }, + { + "epoch": 0.79, + "grad_norm": 0.9002541303634644, + "learning_rate": 1.0961485785220434e-06, + "loss": 0.5775, + "step": 12495 + }, + { + "epoch": 0.79, + "grad_norm": 0.8728871941566467, + "learning_rate": 1.0955076012062155e-06, + "loss": 0.6046, + "step": 12496 + }, + { + "epoch": 0.79, + "grad_norm": 0.9101660251617432, + "learning_rate": 1.094866788294981e-06, + "loss": 0.5323, + "step": 12497 + }, + { + "epoch": 0.79, + "grad_norm": 0.8614184856414795, + "learning_rate": 1.094226139815323e-06, + "loss": 0.5448, + "step": 12498 + }, + { + "epoch": 0.79, + "grad_norm": 0.934859037399292, + "learning_rate": 1.0935856557942132e-06, + "loss": 0.5444, + "step": 12499 + }, + { + "epoch": 0.79, + "grad_norm": 0.8698869943618774, + "learning_rate": 1.0929453362586223e-06, + "loss": 0.5316, + "step": 12500 + }, + { + "epoch": 0.79, + "grad_norm": 0.941087007522583, + "learning_rate": 1.0923051812355117e-06, + "loss": 0.6492, + "step": 12501 + }, + { + "epoch": 0.79, + "grad_norm": 0.9395822882652283, + "learning_rate": 1.091665190751836e-06, + "loss": 0.5986, + "step": 12502 + }, + { + "epoch": 0.79, + "grad_norm": 0.9707981944084167, + "learning_rate": 1.0910253648345442e-06, + "loss": 0.6301, + "step": 12503 + }, + { + "epoch": 0.79, + "grad_norm": 0.8878294825553894, + "learning_rate": 1.0903857035105736e-06, + "loss": 0.5955, + "step": 12504 + }, + { + "epoch": 0.79, + "grad_norm": 0.8388591408729553, + "learning_rate": 1.0897462068068616e-06, + "loss": 0.5295, + "step": 12505 + }, + { + "epoch": 0.79, + "grad_norm": 0.8977577686309814, + "learning_rate": 1.0891068747503353e-06, + "loss": 0.5855, + "step": 12506 + }, + { + "epoch": 0.79, + "grad_norm": 0.8421580195426941, + "learning_rate": 1.0884677073679123e-06, + "loss": 0.5343, + "step": 12507 + }, + { + "epoch": 0.79, + "grad_norm": 0.777437150478363, + "learning_rate": 1.0878287046865072e-06, + "loss": 0.5167, + "step": 12508 + }, + { + "epoch": 0.79, + "grad_norm": 0.8942268490791321, + "learning_rate": 1.0871898667330249e-06, + "loss": 0.5571, + "step": 12509 + }, + { + "epoch": 0.79, + "grad_norm": 0.8648400902748108, + "learning_rate": 1.0865511935343664e-06, + "loss": 0.5088, + "step": 12510 + }, + { + "epoch": 0.79, + "grad_norm": 0.8418681621551514, + "learning_rate": 1.0859126851174246e-06, + "loss": 0.5647, + "step": 12511 + }, + { + "epoch": 0.79, + "grad_norm": 0.9093899130821228, + "learning_rate": 1.0852743415090823e-06, + "loss": 0.5833, + "step": 12512 + }, + { + "epoch": 0.79, + "grad_norm": 0.8962028622627258, + "learning_rate": 1.0846361627362174e-06, + "loss": 0.5429, + "step": 12513 + }, + { + "epoch": 0.79, + "grad_norm": 0.8653958439826965, + "learning_rate": 1.0839981488257061e-06, + "loss": 0.5451, + "step": 12514 + }, + { + "epoch": 0.79, + "grad_norm": 0.8512267470359802, + "learning_rate": 1.0833602998044085e-06, + "loss": 0.5307, + "step": 12515 + }, + { + "epoch": 0.79, + "grad_norm": 0.8976225852966309, + "learning_rate": 1.0827226156991838e-06, + "loss": 0.605, + "step": 12516 + }, + { + "epoch": 0.79, + "grad_norm": 0.8847118020057678, + "learning_rate": 1.0820850965368822e-06, + "loss": 0.5519, + "step": 12517 + }, + { + "epoch": 0.79, + "grad_norm": 0.9687950611114502, + "learning_rate": 1.0814477423443482e-06, + "loss": 0.635, + "step": 12518 + }, + { + "epoch": 0.79, + "grad_norm": 0.8953339457511902, + "learning_rate": 1.0808105531484192e-06, + "loss": 0.5767, + "step": 12519 + }, + { + "epoch": 0.79, + "grad_norm": 0.9060798287391663, + "learning_rate": 1.0801735289759225e-06, + "loss": 0.5794, + "step": 12520 + }, + { + "epoch": 0.79, + "grad_norm": 0.9054228663444519, + "learning_rate": 1.0795366698536812e-06, + "loss": 0.5836, + "step": 12521 + }, + { + "epoch": 0.79, + "grad_norm": 0.8784095644950867, + "learning_rate": 1.078899975808515e-06, + "loss": 0.5729, + "step": 12522 + }, + { + "epoch": 0.79, + "grad_norm": 0.947877049446106, + "learning_rate": 1.0782634468672293e-06, + "loss": 0.5233, + "step": 12523 + }, + { + "epoch": 0.79, + "grad_norm": 0.866150438785553, + "learning_rate": 1.0776270830566266e-06, + "loss": 0.5557, + "step": 12524 + }, + { + "epoch": 0.79, + "grad_norm": 0.8818299174308777, + "learning_rate": 1.0769908844035032e-06, + "loss": 0.5335, + "step": 12525 + }, + { + "epoch": 0.79, + "grad_norm": 0.8382863998413086, + "learning_rate": 1.0763548509346461e-06, + "loss": 0.5066, + "step": 12526 + }, + { + "epoch": 0.79, + "grad_norm": 0.876054584980011, + "learning_rate": 1.0757189826768367e-06, + "loss": 0.5483, + "step": 12527 + }, + { + "epoch": 0.79, + "grad_norm": 0.9083865284919739, + "learning_rate": 1.075083279656851e-06, + "loss": 0.5776, + "step": 12528 + }, + { + "epoch": 0.79, + "grad_norm": 0.8849220275878906, + "learning_rate": 1.0744477419014532e-06, + "loss": 0.5793, + "step": 12529 + }, + { + "epoch": 0.79, + "grad_norm": 0.8889400959014893, + "learning_rate": 1.0738123694374047e-06, + "loss": 0.55, + "step": 12530 + }, + { + "epoch": 0.79, + "grad_norm": 0.8188003897666931, + "learning_rate": 1.0731771622914595e-06, + "loss": 0.5511, + "step": 12531 + }, + { + "epoch": 0.79, + "grad_norm": 0.8689089417457581, + "learning_rate": 1.072542120490363e-06, + "loss": 0.5708, + "step": 12532 + }, + { + "epoch": 0.79, + "grad_norm": 0.8809791803359985, + "learning_rate": 1.0719072440608575e-06, + "loss": 0.5782, + "step": 12533 + }, + { + "epoch": 0.79, + "grad_norm": 0.8765868544578552, + "learning_rate": 1.0712725330296697e-06, + "loss": 0.564, + "step": 12534 + }, + { + "epoch": 0.79, + "grad_norm": 0.8565429449081421, + "learning_rate": 1.07063798742353e-06, + "loss": 0.6002, + "step": 12535 + }, + { + "epoch": 0.79, + "grad_norm": 0.9748111367225647, + "learning_rate": 1.0700036072691566e-06, + "loss": 0.6289, + "step": 12536 + }, + { + "epoch": 0.79, + "grad_norm": 0.9657660722732544, + "learning_rate": 1.0693693925932585e-06, + "loss": 0.6292, + "step": 12537 + }, + { + "epoch": 0.79, + "grad_norm": 0.8865155577659607, + "learning_rate": 1.0687353434225418e-06, + "loss": 0.6005, + "step": 12538 + }, + { + "epoch": 0.79, + "grad_norm": 0.9077226519584656, + "learning_rate": 1.0681014597837042e-06, + "loss": 0.599, + "step": 12539 + }, + { + "epoch": 0.79, + "grad_norm": 0.8932228088378906, + "learning_rate": 1.0674677417034358e-06, + "loss": 0.5546, + "step": 12540 + }, + { + "epoch": 0.79, + "grad_norm": 0.8195880651473999, + "learning_rate": 1.0668341892084217e-06, + "loss": 0.5231, + "step": 12541 + }, + { + "epoch": 0.79, + "grad_norm": 0.9403937458992004, + "learning_rate": 1.0662008023253356e-06, + "loss": 0.5589, + "step": 12542 + }, + { + "epoch": 0.79, + "grad_norm": 0.9366670250892639, + "learning_rate": 1.0655675810808485e-06, + "loss": 0.6032, + "step": 12543 + }, + { + "epoch": 0.79, + "grad_norm": 0.8904660940170288, + "learning_rate": 1.0649345255016258e-06, + "loss": 0.6044, + "step": 12544 + }, + { + "epoch": 0.79, + "grad_norm": 0.9282307624816895, + "learning_rate": 1.0643016356143204e-06, + "loss": 0.624, + "step": 12545 + }, + { + "epoch": 0.79, + "grad_norm": 0.8701701164245605, + "learning_rate": 1.0636689114455811e-06, + "loss": 0.6007, + "step": 12546 + }, + { + "epoch": 0.79, + "grad_norm": 0.8869695067405701, + "learning_rate": 1.063036353022051e-06, + "loss": 0.6469, + "step": 12547 + }, + { + "epoch": 0.79, + "grad_norm": 0.8800140619277954, + "learning_rate": 1.0624039603703645e-06, + "loss": 0.509, + "step": 12548 + }, + { + "epoch": 0.8, + "grad_norm": 0.8784294724464417, + "learning_rate": 1.06177173351715e-06, + "loss": 0.5827, + "step": 12549 + }, + { + "epoch": 0.8, + "grad_norm": 0.9010189771652222, + "learning_rate": 1.061139672489027e-06, + "loss": 0.5904, + "step": 12550 + }, + { + "epoch": 0.8, + "grad_norm": 0.9229983687400818, + "learning_rate": 1.0605077773126083e-06, + "loss": 0.5881, + "step": 12551 + }, + { + "epoch": 0.8, + "grad_norm": 0.863856852054596, + "learning_rate": 1.059876048014506e-06, + "loss": 0.5963, + "step": 12552 + }, + { + "epoch": 0.8, + "grad_norm": 0.8975127935409546, + "learning_rate": 1.0592444846213145e-06, + "loss": 0.597, + "step": 12553 + }, + { + "epoch": 0.8, + "grad_norm": 0.8481269478797913, + "learning_rate": 1.058613087159629e-06, + "loss": 0.5336, + "step": 12554 + }, + { + "epoch": 0.8, + "grad_norm": 0.8915879130363464, + "learning_rate": 1.0579818556560357e-06, + "loss": 0.6215, + "step": 12555 + }, + { + "epoch": 0.8, + "grad_norm": 0.9215599298477173, + "learning_rate": 1.0573507901371126e-06, + "loss": 0.5748, + "step": 12556 + }, + { + "epoch": 0.8, + "grad_norm": 0.8889424800872803, + "learning_rate": 1.0567198906294341e-06, + "loss": 0.5658, + "step": 12557 + }, + { + "epoch": 0.8, + "grad_norm": 0.8827781081199646, + "learning_rate": 1.0560891571595616e-06, + "loss": 0.5804, + "step": 12558 + }, + { + "epoch": 0.8, + "grad_norm": 0.8699508905410767, + "learning_rate": 1.0554585897540553e-06, + "loss": 0.5575, + "step": 12559 + }, + { + "epoch": 0.8, + "grad_norm": 0.9525449872016907, + "learning_rate": 1.0548281884394657e-06, + "loss": 0.5723, + "step": 12560 + }, + { + "epoch": 0.8, + "grad_norm": 0.868190348148346, + "learning_rate": 1.0541979532423362e-06, + "loss": 0.5423, + "step": 12561 + }, + { + "epoch": 0.8, + "grad_norm": 0.8393450975418091, + "learning_rate": 1.053567884189205e-06, + "loss": 0.5703, + "step": 12562 + }, + { + "epoch": 0.8, + "grad_norm": 0.8783072829246521, + "learning_rate": 1.0529379813066026e-06, + "loss": 0.5426, + "step": 12563 + }, + { + "epoch": 0.8, + "grad_norm": 0.871197521686554, + "learning_rate": 1.0523082446210487e-06, + "loss": 0.5194, + "step": 12564 + }, + { + "epoch": 0.8, + "grad_norm": 0.9241814017295837, + "learning_rate": 1.051678674159064e-06, + "loss": 0.6151, + "step": 12565 + }, + { + "epoch": 0.8, + "grad_norm": 0.9252364039421082, + "learning_rate": 1.0510492699471536e-06, + "loss": 0.5901, + "step": 12566 + }, + { + "epoch": 0.8, + "grad_norm": 0.9119340777397156, + "learning_rate": 1.0504200320118214e-06, + "loss": 0.5864, + "step": 12567 + }, + { + "epoch": 0.8, + "grad_norm": 1.0206106901168823, + "learning_rate": 1.049790960379562e-06, + "loss": 0.6423, + "step": 12568 + }, + { + "epoch": 0.8, + "grad_norm": 0.9204949140548706, + "learning_rate": 1.0491620550768633e-06, + "loss": 0.5887, + "step": 12569 + }, + { + "epoch": 0.8, + "grad_norm": 0.9017525911331177, + "learning_rate": 1.048533316130207e-06, + "loss": 0.5684, + "step": 12570 + }, + { + "epoch": 0.8, + "grad_norm": 0.950289249420166, + "learning_rate": 1.0479047435660671e-06, + "loss": 0.5176, + "step": 12571 + }, + { + "epoch": 0.8, + "grad_norm": 0.9097518920898438, + "learning_rate": 1.047276337410908e-06, + "loss": 0.5952, + "step": 12572 + }, + { + "epoch": 0.8, + "grad_norm": 0.8936463594436646, + "learning_rate": 1.0466480976911947e-06, + "loss": 0.6109, + "step": 12573 + }, + { + "epoch": 0.8, + "grad_norm": 0.8422192931175232, + "learning_rate": 1.0460200244333758e-06, + "loss": 0.5667, + "step": 12574 + }, + { + "epoch": 0.8, + "grad_norm": 0.8249906301498413, + "learning_rate": 1.0453921176638981e-06, + "loss": 0.5465, + "step": 12575 + }, + { + "epoch": 0.8, + "grad_norm": 0.8235678672790527, + "learning_rate": 1.044764377409203e-06, + "loss": 0.5726, + "step": 12576 + }, + { + "epoch": 0.8, + "grad_norm": 1.0038381814956665, + "learning_rate": 1.0441368036957184e-06, + "loss": 0.5896, + "step": 12577 + }, + { + "epoch": 0.8, + "grad_norm": 0.8613317012786865, + "learning_rate": 1.0435093965498727e-06, + "loss": 0.5448, + "step": 12578 + }, + { + "epoch": 0.8, + "grad_norm": 0.8612802624702454, + "learning_rate": 1.0428821559980839e-06, + "loss": 0.5702, + "step": 12579 + }, + { + "epoch": 0.8, + "grad_norm": 0.8284898996353149, + "learning_rate": 1.0422550820667605e-06, + "loss": 0.5696, + "step": 12580 + }, + { + "epoch": 0.8, + "grad_norm": 0.8467788696289062, + "learning_rate": 1.0416281747823076e-06, + "loss": 0.5265, + "step": 12581 + }, + { + "epoch": 0.8, + "grad_norm": 0.9856624603271484, + "learning_rate": 1.0410014341711216e-06, + "loss": 0.6143, + "step": 12582 + }, + { + "epoch": 0.8, + "grad_norm": 0.8942157626152039, + "learning_rate": 1.0403748602595937e-06, + "loss": 0.6032, + "step": 12583 + }, + { + "epoch": 0.8, + "grad_norm": 0.9039360880851746, + "learning_rate": 1.0397484530741053e-06, + "loss": 0.6271, + "step": 12584 + }, + { + "epoch": 0.8, + "grad_norm": 0.9321849942207336, + "learning_rate": 1.0391222126410327e-06, + "loss": 0.6062, + "step": 12585 + }, + { + "epoch": 0.8, + "grad_norm": 0.8697063326835632, + "learning_rate": 1.0384961389867454e-06, + "loss": 0.546, + "step": 12586 + }, + { + "epoch": 0.8, + "grad_norm": 0.9680486917495728, + "learning_rate": 1.0378702321376054e-06, + "loss": 0.5974, + "step": 12587 + }, + { + "epoch": 0.8, + "grad_norm": 0.8630591034889221, + "learning_rate": 1.037244492119966e-06, + "loss": 0.5821, + "step": 12588 + }, + { + "epoch": 0.8, + "grad_norm": 0.8251073360443115, + "learning_rate": 1.036618918960175e-06, + "loss": 0.5908, + "step": 12589 + }, + { + "epoch": 0.8, + "grad_norm": 0.8866623044013977, + "learning_rate": 1.0359935126845738e-06, + "loss": 0.5549, + "step": 12590 + }, + { + "epoch": 0.8, + "grad_norm": 0.8582077622413635, + "learning_rate": 1.0353682733194965e-06, + "loss": 0.5637, + "step": 12591 + }, + { + "epoch": 0.8, + "grad_norm": 0.8953722715377808, + "learning_rate": 1.0347432008912688e-06, + "loss": 0.6297, + "step": 12592 + }, + { + "epoch": 0.8, + "grad_norm": 0.9254661798477173, + "learning_rate": 1.0341182954262125e-06, + "loss": 0.5432, + "step": 12593 + }, + { + "epoch": 0.8, + "grad_norm": 0.8666430711746216, + "learning_rate": 1.0334935569506355e-06, + "loss": 0.5653, + "step": 12594 + }, + { + "epoch": 0.8, + "grad_norm": 0.9944994449615479, + "learning_rate": 1.0328689854908492e-06, + "loss": 0.5792, + "step": 12595 + }, + { + "epoch": 0.8, + "grad_norm": 0.8367435336112976, + "learning_rate": 1.032244581073148e-06, + "loss": 0.5453, + "step": 12596 + }, + { + "epoch": 0.8, + "grad_norm": 0.9115063548088074, + "learning_rate": 1.0316203437238242e-06, + "loss": 0.6038, + "step": 12597 + }, + { + "epoch": 0.8, + "grad_norm": 0.8422768712043762, + "learning_rate": 1.0309962734691632e-06, + "loss": 0.5879, + "step": 12598 + }, + { + "epoch": 0.8, + "grad_norm": 0.9396683573722839, + "learning_rate": 1.0303723703354418e-06, + "loss": 0.6432, + "step": 12599 + }, + { + "epoch": 0.8, + "grad_norm": 0.8688830733299255, + "learning_rate": 1.0297486343489304e-06, + "loss": 0.578, + "step": 12600 + }, + { + "epoch": 0.8, + "grad_norm": 0.8575296401977539, + "learning_rate": 1.0291250655358942e-06, + "loss": 0.5906, + "step": 12601 + }, + { + "epoch": 0.8, + "grad_norm": 0.8545289635658264, + "learning_rate": 1.0285016639225849e-06, + "loss": 0.5982, + "step": 12602 + }, + { + "epoch": 0.8, + "grad_norm": 0.8576691150665283, + "learning_rate": 1.0278784295352572e-06, + "loss": 0.589, + "step": 12603 + }, + { + "epoch": 0.8, + "grad_norm": 0.8619968295097351, + "learning_rate": 1.0272553624001502e-06, + "loss": 0.5483, + "step": 12604 + }, + { + "epoch": 0.8, + "grad_norm": 0.8848919868469238, + "learning_rate": 1.0266324625434992e-06, + "loss": 0.5735, + "step": 12605 + }, + { + "epoch": 0.8, + "grad_norm": 0.8641508221626282, + "learning_rate": 1.0260097299915345e-06, + "loss": 0.5249, + "step": 12606 + }, + { + "epoch": 0.8, + "grad_norm": 0.8593783378601074, + "learning_rate": 1.0253871647704722e-06, + "loss": 0.5686, + "step": 12607 + }, + { + "epoch": 0.8, + "grad_norm": 0.860919713973999, + "learning_rate": 1.024764766906532e-06, + "loss": 0.5702, + "step": 12608 + }, + { + "epoch": 0.8, + "grad_norm": 0.9240328669548035, + "learning_rate": 1.0241425364259195e-06, + "loss": 0.6011, + "step": 12609 + }, + { + "epoch": 0.8, + "grad_norm": 0.8484996557235718, + "learning_rate": 1.0235204733548321e-06, + "loss": 0.5523, + "step": 12610 + }, + { + "epoch": 0.8, + "grad_norm": 0.8490926027297974, + "learning_rate": 1.022898577719465e-06, + "loss": 0.5931, + "step": 12611 + }, + { + "epoch": 0.8, + "grad_norm": 0.8484750986099243, + "learning_rate": 1.0222768495460029e-06, + "loss": 0.5313, + "step": 12612 + }, + { + "epoch": 0.8, + "grad_norm": 0.904883086681366, + "learning_rate": 1.0216552888606256e-06, + "loss": 0.5397, + "step": 12613 + }, + { + "epoch": 0.8, + "grad_norm": 0.8598143458366394, + "learning_rate": 1.0210338956895054e-06, + "loss": 0.5831, + "step": 12614 + }, + { + "epoch": 0.8, + "grad_norm": 0.8726117014884949, + "learning_rate": 1.020412670058804e-06, + "loss": 0.5869, + "step": 12615 + }, + { + "epoch": 0.8, + "grad_norm": 0.9262253642082214, + "learning_rate": 1.0197916119946821e-06, + "loss": 0.607, + "step": 12616 + }, + { + "epoch": 0.8, + "grad_norm": 0.9659039974212646, + "learning_rate": 1.0191707215232905e-06, + "loss": 0.6243, + "step": 12617 + }, + { + "epoch": 0.8, + "grad_norm": 0.9009899497032166, + "learning_rate": 1.0185499986707702e-06, + "loss": 0.6024, + "step": 12618 + }, + { + "epoch": 0.8, + "grad_norm": 0.8743886351585388, + "learning_rate": 1.0179294434632593e-06, + "loss": 0.5578, + "step": 12619 + }, + { + "epoch": 0.8, + "grad_norm": 0.8483142256736755, + "learning_rate": 1.0173090559268867e-06, + "loss": 0.5586, + "step": 12620 + }, + { + "epoch": 0.8, + "grad_norm": 0.9112587571144104, + "learning_rate": 1.0166888360877747e-06, + "loss": 0.5717, + "step": 12621 + }, + { + "epoch": 0.8, + "grad_norm": 0.9113679528236389, + "learning_rate": 1.0160687839720407e-06, + "loss": 0.5826, + "step": 12622 + }, + { + "epoch": 0.8, + "grad_norm": 0.8618003129959106, + "learning_rate": 1.0154488996057894e-06, + "loss": 0.6087, + "step": 12623 + }, + { + "epoch": 0.8, + "grad_norm": 0.9165884256362915, + "learning_rate": 1.0148291830151224e-06, + "loss": 0.6375, + "step": 12624 + }, + { + "epoch": 0.8, + "grad_norm": 0.9440232515335083, + "learning_rate": 1.014209634226138e-06, + "loss": 0.6116, + "step": 12625 + }, + { + "epoch": 0.8, + "grad_norm": 0.8831072449684143, + "learning_rate": 1.013590253264919e-06, + "loss": 0.6156, + "step": 12626 + }, + { + "epoch": 0.8, + "grad_norm": 0.8621459603309631, + "learning_rate": 1.0129710401575465e-06, + "loss": 0.587, + "step": 12627 + }, + { + "epoch": 0.8, + "grad_norm": 0.8391403555870056, + "learning_rate": 1.0123519949300942e-06, + "loss": 0.5753, + "step": 12628 + }, + { + "epoch": 0.8, + "grad_norm": 0.8815181851387024, + "learning_rate": 1.0117331176086264e-06, + "loss": 0.5571, + "step": 12629 + }, + { + "epoch": 0.8, + "grad_norm": 0.881256103515625, + "learning_rate": 1.0111144082192048e-06, + "loss": 0.5949, + "step": 12630 + }, + { + "epoch": 0.8, + "grad_norm": 0.9096524715423584, + "learning_rate": 1.0104958667878778e-06, + "loss": 0.5858, + "step": 12631 + }, + { + "epoch": 0.8, + "grad_norm": 0.9030601382255554, + "learning_rate": 1.0098774933406903e-06, + "loss": 0.5881, + "step": 12632 + }, + { + "epoch": 0.8, + "grad_norm": 0.9352519512176514, + "learning_rate": 1.0092592879036834e-06, + "loss": 0.5795, + "step": 12633 + }, + { + "epoch": 0.8, + "grad_norm": 0.9007598161697388, + "learning_rate": 1.0086412505028836e-06, + "loss": 0.5347, + "step": 12634 + }, + { + "epoch": 0.8, + "grad_norm": 0.9302735328674316, + "learning_rate": 1.0080233811643158e-06, + "loss": 0.5944, + "step": 12635 + }, + { + "epoch": 0.8, + "grad_norm": 0.8626806735992432, + "learning_rate": 1.0074056799139981e-06, + "loss": 0.6201, + "step": 12636 + }, + { + "epoch": 0.8, + "grad_norm": 0.9001949429512024, + "learning_rate": 1.006788146777935e-06, + "loss": 0.5805, + "step": 12637 + }, + { + "epoch": 0.8, + "grad_norm": 0.9080000519752502, + "learning_rate": 1.0061707817821343e-06, + "loss": 0.5803, + "step": 12638 + }, + { + "epoch": 0.8, + "grad_norm": 0.8849290013313293, + "learning_rate": 1.0055535849525872e-06, + "loss": 0.5354, + "step": 12639 + }, + { + "epoch": 0.8, + "grad_norm": 0.9328687787055969, + "learning_rate": 1.004936556315283e-06, + "loss": 0.6155, + "step": 12640 + }, + { + "epoch": 0.8, + "grad_norm": 0.9027367234230042, + "learning_rate": 1.004319695896202e-06, + "loss": 0.5903, + "step": 12641 + }, + { + "epoch": 0.8, + "grad_norm": 0.8899877667427063, + "learning_rate": 1.0037030037213197e-06, + "loss": 0.5407, + "step": 12642 + }, + { + "epoch": 0.8, + "grad_norm": 0.9732675552368164, + "learning_rate": 1.0030864798166013e-06, + "loss": 0.6381, + "step": 12643 + }, + { + "epoch": 0.8, + "grad_norm": 0.9128854870796204, + "learning_rate": 1.0024701242080082e-06, + "loss": 0.6036, + "step": 12644 + }, + { + "epoch": 0.8, + "grad_norm": 0.905947744846344, + "learning_rate": 1.0018539369214891e-06, + "loss": 0.5918, + "step": 12645 + }, + { + "epoch": 0.8, + "grad_norm": 0.8508647084236145, + "learning_rate": 1.0012379179829951e-06, + "loss": 0.5757, + "step": 12646 + }, + { + "epoch": 0.8, + "grad_norm": 0.9843933582305908, + "learning_rate": 1.0006220674184602e-06, + "loss": 0.6191, + "step": 12647 + }, + { + "epoch": 0.8, + "grad_norm": 0.9173324704170227, + "learning_rate": 1.0000063852538172e-06, + "loss": 0.5374, + "step": 12648 + }, + { + "epoch": 0.8, + "grad_norm": 0.9240821599960327, + "learning_rate": 9.993908715149902e-07, + "loss": 0.5682, + "step": 12649 + }, + { + "epoch": 0.8, + "grad_norm": 0.8344833850860596, + "learning_rate": 9.98775526227897e-07, + "loss": 0.5103, + "step": 12650 + }, + { + "epoch": 0.8, + "grad_norm": 0.8877370953559875, + "learning_rate": 9.981603494184473e-07, + "loss": 0.6135, + "step": 12651 + }, + { + "epoch": 0.8, + "grad_norm": 0.9172238111495972, + "learning_rate": 9.975453411125447e-07, + "loss": 0.5739, + "step": 12652 + }, + { + "epoch": 0.8, + "grad_norm": 0.9218218326568604, + "learning_rate": 9.969305013360825e-07, + "loss": 0.5615, + "step": 12653 + }, + { + "epoch": 0.8, + "grad_norm": 0.9229342341423035, + "learning_rate": 9.963158301149522e-07, + "loss": 0.6138, + "step": 12654 + }, + { + "epoch": 0.8, + "grad_norm": 0.9228758215904236, + "learning_rate": 9.957013274750338e-07, + "loss": 0.6017, + "step": 12655 + }, + { + "epoch": 0.8, + "grad_norm": 0.8756922483444214, + "learning_rate": 9.95086993442203e-07, + "loss": 0.577, + "step": 12656 + }, + { + "epoch": 0.8, + "grad_norm": 0.9407891035079956, + "learning_rate": 9.944728280423265e-07, + "loss": 0.6189, + "step": 12657 + }, + { + "epoch": 0.8, + "grad_norm": 0.8400014042854309, + "learning_rate": 9.938588313012655e-07, + "loss": 0.5349, + "step": 12658 + }, + { + "epoch": 0.8, + "grad_norm": 0.9128040671348572, + "learning_rate": 9.93245003244872e-07, + "loss": 0.5806, + "step": 12659 + }, + { + "epoch": 0.8, + "grad_norm": 0.9192377328872681, + "learning_rate": 9.92631343898995e-07, + "loss": 0.5908, + "step": 12660 + }, + { + "epoch": 0.8, + "grad_norm": 0.9443216919898987, + "learning_rate": 9.920178532894698e-07, + "loss": 0.5576, + "step": 12661 + }, + { + "epoch": 0.8, + "grad_norm": 0.8419626355171204, + "learning_rate": 9.9140453144213e-07, + "loss": 0.5135, + "step": 12662 + }, + { + "epoch": 0.8, + "grad_norm": 0.8705309629440308, + "learning_rate": 9.907913783828004e-07, + "loss": 0.5648, + "step": 12663 + }, + { + "epoch": 0.8, + "grad_norm": 0.9433914422988892, + "learning_rate": 9.901783941372988e-07, + "loss": 0.5512, + "step": 12664 + }, + { + "epoch": 0.8, + "grad_norm": 0.9095032811164856, + "learning_rate": 9.895655787314361e-07, + "loss": 0.6271, + "step": 12665 + }, + { + "epoch": 0.8, + "grad_norm": 0.8473713994026184, + "learning_rate": 9.889529321910169e-07, + "loss": 0.5568, + "step": 12666 + }, + { + "epoch": 0.8, + "grad_norm": 0.8689250349998474, + "learning_rate": 9.88340454541834e-07, + "loss": 0.5617, + "step": 12667 + }, + { + "epoch": 0.8, + "grad_norm": 0.94936603307724, + "learning_rate": 9.87728145809681e-07, + "loss": 0.6084, + "step": 12668 + }, + { + "epoch": 0.8, + "grad_norm": 0.8550971150398254, + "learning_rate": 9.871160060203371e-07, + "loss": 0.485, + "step": 12669 + }, + { + "epoch": 0.8, + "grad_norm": 0.8166051506996155, + "learning_rate": 9.865040351995787e-07, + "loss": 0.5548, + "step": 12670 + }, + { + "epoch": 0.8, + "grad_norm": 0.8855223655700684, + "learning_rate": 9.85892233373173e-07, + "loss": 0.5517, + "step": 12671 + }, + { + "epoch": 0.8, + "grad_norm": 0.8748183846473694, + "learning_rate": 9.852806005668813e-07, + "loss": 0.5437, + "step": 12672 + }, + { + "epoch": 0.8, + "grad_norm": 0.9316419959068298, + "learning_rate": 9.846691368064577e-07, + "loss": 0.5686, + "step": 12673 + }, + { + "epoch": 0.8, + "grad_norm": 0.8692405819892883, + "learning_rate": 9.840578421176495e-07, + "loss": 0.5458, + "step": 12674 + }, + { + "epoch": 0.8, + "grad_norm": 0.9151699542999268, + "learning_rate": 9.834467165261924e-07, + "loss": 0.5581, + "step": 12675 + }, + { + "epoch": 0.8, + "grad_norm": 0.8994660973548889, + "learning_rate": 9.828357600578242e-07, + "loss": 0.5499, + "step": 12676 + }, + { + "epoch": 0.8, + "grad_norm": 0.9051674008369446, + "learning_rate": 9.82224972738266e-07, + "loss": 0.6041, + "step": 12677 + }, + { + "epoch": 0.8, + "grad_norm": 0.845827043056488, + "learning_rate": 9.816143545932378e-07, + "loss": 0.5242, + "step": 12678 + }, + { + "epoch": 0.8, + "grad_norm": 0.8907935619354248, + "learning_rate": 9.8100390564845e-07, + "loss": 0.5491, + "step": 12679 + }, + { + "epoch": 0.8, + "grad_norm": 0.833772599697113, + "learning_rate": 9.803936259296066e-07, + "loss": 0.5004, + "step": 12680 + }, + { + "epoch": 0.8, + "grad_norm": 0.9332374930381775, + "learning_rate": 9.797835154624041e-07, + "loss": 0.6143, + "step": 12681 + }, + { + "epoch": 0.8, + "grad_norm": 0.906049370765686, + "learning_rate": 9.791735742725339e-07, + "loss": 0.641, + "step": 12682 + }, + { + "epoch": 0.8, + "grad_norm": 0.9151736497879028, + "learning_rate": 9.78563802385676e-07, + "loss": 0.533, + "step": 12683 + }, + { + "epoch": 0.8, + "grad_norm": 0.8841385841369629, + "learning_rate": 9.779541998275067e-07, + "loss": 0.5605, + "step": 12684 + }, + { + "epoch": 0.8, + "grad_norm": 0.8864476084709167, + "learning_rate": 9.773447666236946e-07, + "loss": 0.5398, + "step": 12685 + }, + { + "epoch": 0.8, + "grad_norm": 0.8535383343696594, + "learning_rate": 9.767355027999004e-07, + "loss": 0.5665, + "step": 12686 + }, + { + "epoch": 0.8, + "grad_norm": 0.8663592338562012, + "learning_rate": 9.761264083817795e-07, + "loss": 0.5445, + "step": 12687 + }, + { + "epoch": 0.8, + "grad_norm": 0.9647719860076904, + "learning_rate": 9.755174833949749e-07, + "loss": 0.528, + "step": 12688 + }, + { + "epoch": 0.8, + "grad_norm": 0.9407845139503479, + "learning_rate": 9.749087278651304e-07, + "loss": 0.6414, + "step": 12689 + }, + { + "epoch": 0.8, + "grad_norm": 0.869473934173584, + "learning_rate": 9.743001418178782e-07, + "loss": 0.5956, + "step": 12690 + }, + { + "epoch": 0.8, + "grad_norm": 0.9082080125808716, + "learning_rate": 9.736917252788414e-07, + "loss": 0.5468, + "step": 12691 + }, + { + "epoch": 0.8, + "grad_norm": 0.8994290232658386, + "learning_rate": 9.730834782736393e-07, + "loss": 0.5714, + "step": 12692 + }, + { + "epoch": 0.8, + "grad_norm": 0.8263580799102783, + "learning_rate": 9.724754008278836e-07, + "loss": 0.5674, + "step": 12693 + }, + { + "epoch": 0.8, + "grad_norm": 0.8548535704612732, + "learning_rate": 9.718674929671778e-07, + "loss": 0.5221, + "step": 12694 + }, + { + "epoch": 0.8, + "grad_norm": 0.88187575340271, + "learning_rate": 9.71259754717121e-07, + "loss": 0.5945, + "step": 12695 + }, + { + "epoch": 0.8, + "grad_norm": 0.9128777384757996, + "learning_rate": 9.706521861032974e-07, + "loss": 0.5188, + "step": 12696 + }, + { + "epoch": 0.8, + "grad_norm": 0.9275169968605042, + "learning_rate": 9.700447871512953e-07, + "loss": 0.5871, + "step": 12697 + }, + { + "epoch": 0.8, + "grad_norm": 0.9131552577018738, + "learning_rate": 9.694375578866889e-07, + "loss": 0.6075, + "step": 12698 + }, + { + "epoch": 0.8, + "grad_norm": 0.9026870727539062, + "learning_rate": 9.688304983350443e-07, + "loss": 0.5856, + "step": 12699 + }, + { + "epoch": 0.8, + "grad_norm": 0.8732842803001404, + "learning_rate": 9.682236085219243e-07, + "loss": 0.5447, + "step": 12700 + }, + { + "epoch": 0.8, + "grad_norm": 0.9150475859642029, + "learning_rate": 9.67616888472882e-07, + "loss": 0.5613, + "step": 12701 + }, + { + "epoch": 0.8, + "grad_norm": 0.8680015802383423, + "learning_rate": 9.670103382134655e-07, + "loss": 0.5447, + "step": 12702 + }, + { + "epoch": 0.8, + "grad_norm": 0.9147303700447083, + "learning_rate": 9.664039577692152e-07, + "loss": 0.5829, + "step": 12703 + }, + { + "epoch": 0.8, + "grad_norm": 0.9100850224494934, + "learning_rate": 9.65797747165661e-07, + "loss": 0.5835, + "step": 12704 + }, + { + "epoch": 0.8, + "grad_norm": 0.8714893460273743, + "learning_rate": 9.65191706428328e-07, + "loss": 0.5811, + "step": 12705 + }, + { + "epoch": 0.8, + "grad_norm": 0.8966681361198425, + "learning_rate": 9.645858355827392e-07, + "loss": 0.5887, + "step": 12706 + }, + { + "epoch": 0.81, + "grad_norm": 0.8519495725631714, + "learning_rate": 9.639801346544015e-07, + "loss": 0.5868, + "step": 12707 + }, + { + "epoch": 0.81, + "grad_norm": 0.9009888768196106, + "learning_rate": 9.633746036688196e-07, + "loss": 0.5386, + "step": 12708 + }, + { + "epoch": 0.81, + "grad_norm": 0.8464866280555725, + "learning_rate": 9.627692426514907e-07, + "loss": 0.5542, + "step": 12709 + }, + { + "epoch": 0.81, + "grad_norm": 0.9193606972694397, + "learning_rate": 9.621640516279047e-07, + "loss": 0.5917, + "step": 12710 + }, + { + "epoch": 0.81, + "grad_norm": 0.9212644100189209, + "learning_rate": 9.61559030623545e-07, + "loss": 0.6036, + "step": 12711 + }, + { + "epoch": 0.81, + "grad_norm": 0.9172996878623962, + "learning_rate": 9.609541796638848e-07, + "loss": 0.5267, + "step": 12712 + }, + { + "epoch": 0.81, + "grad_norm": 0.8999651074409485, + "learning_rate": 9.603494987743932e-07, + "loss": 0.5834, + "step": 12713 + }, + { + "epoch": 0.81, + "grad_norm": 0.9378758072853088, + "learning_rate": 9.597449879805314e-07, + "loss": 0.5732, + "step": 12714 + }, + { + "epoch": 0.81, + "grad_norm": 0.8777378797531128, + "learning_rate": 9.59140647307753e-07, + "loss": 0.5429, + "step": 12715 + }, + { + "epoch": 0.81, + "grad_norm": 0.9323769807815552, + "learning_rate": 9.585364767815048e-07, + "loss": 0.5651, + "step": 12716 + }, + { + "epoch": 0.81, + "grad_norm": 0.9203583598136902, + "learning_rate": 9.57932476427228e-07, + "loss": 0.6104, + "step": 12717 + }, + { + "epoch": 0.81, + "grad_norm": 0.8466483354568481, + "learning_rate": 9.573286462703501e-07, + "loss": 0.5486, + "step": 12718 + }, + { + "epoch": 0.81, + "grad_norm": 0.9139605164527893, + "learning_rate": 9.567249863363027e-07, + "loss": 0.5501, + "step": 12719 + }, + { + "epoch": 0.81, + "grad_norm": 0.9106989502906799, + "learning_rate": 9.56121496650499e-07, + "loss": 0.5969, + "step": 12720 + }, + { + "epoch": 0.81, + "grad_norm": 0.8454228043556213, + "learning_rate": 9.55518177238351e-07, + "loss": 0.5935, + "step": 12721 + }, + { + "epoch": 0.81, + "grad_norm": 0.8739069700241089, + "learning_rate": 9.549150281252633e-07, + "loss": 0.599, + "step": 12722 + }, + { + "epoch": 0.81, + "grad_norm": 0.8933126926422119, + "learning_rate": 9.54312049336632e-07, + "loss": 0.5864, + "step": 12723 + }, + { + "epoch": 0.81, + "grad_norm": 0.8254374861717224, + "learning_rate": 9.53709240897846e-07, + "loss": 0.5309, + "step": 12724 + }, + { + "epoch": 0.81, + "grad_norm": 0.8132497668266296, + "learning_rate": 9.531066028342895e-07, + "loss": 0.6107, + "step": 12725 + }, + { + "epoch": 0.81, + "grad_norm": 0.870490312576294, + "learning_rate": 9.525041351713332e-07, + "loss": 0.5699, + "step": 12726 + }, + { + "epoch": 0.81, + "grad_norm": 0.8581969141960144, + "learning_rate": 9.519018379343486e-07, + "loss": 0.5235, + "step": 12727 + }, + { + "epoch": 0.81, + "grad_norm": 0.8589658141136169, + "learning_rate": 9.512997111486965e-07, + "loss": 0.6124, + "step": 12728 + }, + { + "epoch": 0.81, + "grad_norm": 0.885682225227356, + "learning_rate": 9.506977548397284e-07, + "loss": 0.6406, + "step": 12729 + }, + { + "epoch": 0.81, + "grad_norm": 0.8724113702774048, + "learning_rate": 9.50095969032791e-07, + "loss": 0.6235, + "step": 12730 + }, + { + "epoch": 0.81, + "grad_norm": 0.9281406998634338, + "learning_rate": 9.494943537532242e-07, + "loss": 0.5744, + "step": 12731 + }, + { + "epoch": 0.81, + "grad_norm": 0.8291860222816467, + "learning_rate": 9.488929090263588e-07, + "loss": 0.5355, + "step": 12732 + }, + { + "epoch": 0.81, + "grad_norm": 0.8633788228034973, + "learning_rate": 9.482916348775217e-07, + "loss": 0.5948, + "step": 12733 + }, + { + "epoch": 0.81, + "grad_norm": 0.9064257740974426, + "learning_rate": 9.476905313320283e-07, + "loss": 0.627, + "step": 12734 + }, + { + "epoch": 0.81, + "grad_norm": 0.9185280203819275, + "learning_rate": 9.470895984151879e-07, + "loss": 0.5504, + "step": 12735 + }, + { + "epoch": 0.81, + "grad_norm": 0.9063805937767029, + "learning_rate": 9.464888361523078e-07, + "loss": 0.5554, + "step": 12736 + }, + { + "epoch": 0.81, + "grad_norm": 0.9305859804153442, + "learning_rate": 9.458882445686807e-07, + "loss": 0.6012, + "step": 12737 + }, + { + "epoch": 0.81, + "grad_norm": 0.9028577208518982, + "learning_rate": 9.452878236895963e-07, + "loss": 0.6199, + "step": 12738 + }, + { + "epoch": 0.81, + "grad_norm": 0.870011568069458, + "learning_rate": 9.446875735403366e-07, + "loss": 0.5725, + "step": 12739 + }, + { + "epoch": 0.81, + "grad_norm": 0.8897619247436523, + "learning_rate": 9.440874941461753e-07, + "loss": 0.5679, + "step": 12740 + }, + { + "epoch": 0.81, + "grad_norm": 0.8889445662498474, + "learning_rate": 9.434875855323816e-07, + "loss": 0.5471, + "step": 12741 + }, + { + "epoch": 0.81, + "grad_norm": 0.8454013466835022, + "learning_rate": 9.428878477242131e-07, + "loss": 0.5971, + "step": 12742 + }, + { + "epoch": 0.81, + "grad_norm": 0.911864161491394, + "learning_rate": 9.422882807469219e-07, + "loss": 0.536, + "step": 12743 + }, + { + "epoch": 0.81, + "grad_norm": 0.9062489867210388, + "learning_rate": 9.416888846257588e-07, + "loss": 0.5738, + "step": 12744 + }, + { + "epoch": 0.81, + "grad_norm": 0.8988074660301208, + "learning_rate": 9.41089659385957e-07, + "loss": 0.5907, + "step": 12745 + }, + { + "epoch": 0.81, + "grad_norm": 0.9005908370018005, + "learning_rate": 9.404906050527496e-07, + "loss": 0.5977, + "step": 12746 + }, + { + "epoch": 0.81, + "grad_norm": 0.8807809352874756, + "learning_rate": 9.398917216513625e-07, + "loss": 0.5498, + "step": 12747 + }, + { + "epoch": 0.81, + "grad_norm": 1.0026898384094238, + "learning_rate": 9.39293009207008e-07, + "loss": 0.6227, + "step": 12748 + }, + { + "epoch": 0.81, + "grad_norm": 0.9482130408287048, + "learning_rate": 9.386944677449017e-07, + "loss": 0.5968, + "step": 12749 + }, + { + "epoch": 0.81, + "grad_norm": 0.8382649421691895, + "learning_rate": 9.380960972902414e-07, + "loss": 0.5151, + "step": 12750 + }, + { + "epoch": 0.81, + "grad_norm": 0.8774755597114563, + "learning_rate": 9.374978978682248e-07, + "loss": 0.558, + "step": 12751 + }, + { + "epoch": 0.81, + "grad_norm": 1.0281962156295776, + "learning_rate": 9.368998695040387e-07, + "loss": 0.6242, + "step": 12752 + }, + { + "epoch": 0.81, + "grad_norm": 0.8988599181175232, + "learning_rate": 9.363020122228645e-07, + "loss": 0.5831, + "step": 12753 + }, + { + "epoch": 0.81, + "grad_norm": 0.9397704005241394, + "learning_rate": 9.357043260498766e-07, + "loss": 0.5837, + "step": 12754 + }, + { + "epoch": 0.81, + "grad_norm": 0.851276695728302, + "learning_rate": 9.351068110102418e-07, + "loss": 0.5161, + "step": 12755 + }, + { + "epoch": 0.81, + "grad_norm": 0.9624939560890198, + "learning_rate": 9.345094671291155e-07, + "loss": 0.5814, + "step": 12756 + }, + { + "epoch": 0.81, + "grad_norm": 0.9801254868507385, + "learning_rate": 9.339122944316559e-07, + "loss": 0.6233, + "step": 12757 + }, + { + "epoch": 0.81, + "grad_norm": 0.8699768781661987, + "learning_rate": 9.333152929430029e-07, + "loss": 0.5727, + "step": 12758 + }, + { + "epoch": 0.81, + "grad_norm": 0.9153022766113281, + "learning_rate": 9.327184626882963e-07, + "loss": 0.6218, + "step": 12759 + }, + { + "epoch": 0.81, + "grad_norm": 0.8925560712814331, + "learning_rate": 9.321218036926677e-07, + "loss": 0.5697, + "step": 12760 + }, + { + "epoch": 0.81, + "grad_norm": 0.8836098313331604, + "learning_rate": 9.315253159812359e-07, + "loss": 0.5824, + "step": 12761 + }, + { + "epoch": 0.81, + "grad_norm": 0.895380437374115, + "learning_rate": 9.30928999579121e-07, + "loss": 0.5481, + "step": 12762 + }, + { + "epoch": 0.81, + "grad_norm": 0.9610360264778137, + "learning_rate": 9.303328545114321e-07, + "loss": 0.532, + "step": 12763 + }, + { + "epoch": 0.81, + "grad_norm": 0.9137628078460693, + "learning_rate": 9.29736880803268e-07, + "loss": 0.53, + "step": 12764 + }, + { + "epoch": 0.81, + "grad_norm": 0.9747650623321533, + "learning_rate": 9.29141078479725e-07, + "loss": 0.6109, + "step": 12765 + }, + { + "epoch": 0.81, + "grad_norm": 0.9045560956001282, + "learning_rate": 9.285454475658889e-07, + "loss": 0.5683, + "step": 12766 + }, + { + "epoch": 0.81, + "grad_norm": 0.8270063400268555, + "learning_rate": 9.279499880868409e-07, + "loss": 0.5004, + "step": 12767 + }, + { + "epoch": 0.81, + "grad_norm": 0.8460723757743835, + "learning_rate": 9.273547000676547e-07, + "loss": 0.599, + "step": 12768 + }, + { + "epoch": 0.81, + "grad_norm": 0.9193210601806641, + "learning_rate": 9.267595835333915e-07, + "loss": 0.5865, + "step": 12769 + }, + { + "epoch": 0.81, + "grad_norm": 0.910054087638855, + "learning_rate": 9.261646385091139e-07, + "loss": 0.543, + "step": 12770 + }, + { + "epoch": 0.81, + "grad_norm": 0.8721626400947571, + "learning_rate": 9.25569865019873e-07, + "loss": 0.5724, + "step": 12771 + }, + { + "epoch": 0.81, + "grad_norm": 0.9765549302101135, + "learning_rate": 9.249752630907094e-07, + "loss": 0.5772, + "step": 12772 + }, + { + "epoch": 0.81, + "grad_norm": 0.8811758756637573, + "learning_rate": 9.243808327466619e-07, + "loss": 0.5403, + "step": 12773 + }, + { + "epoch": 0.81, + "grad_norm": 0.8623104095458984, + "learning_rate": 9.237865740127594e-07, + "loss": 0.5399, + "step": 12774 + }, + { + "epoch": 0.81, + "grad_norm": 0.8554300665855408, + "learning_rate": 9.231924869140241e-07, + "loss": 0.5435, + "step": 12775 + }, + { + "epoch": 0.81, + "grad_norm": 0.8593326210975647, + "learning_rate": 9.225985714754721e-07, + "loss": 0.5547, + "step": 12776 + }, + { + "epoch": 0.81, + "grad_norm": 0.9124411344528198, + "learning_rate": 9.220048277221089e-07, + "loss": 0.5711, + "step": 12777 + }, + { + "epoch": 0.81, + "grad_norm": 0.9051636457443237, + "learning_rate": 9.214112556789345e-07, + "loss": 0.5853, + "step": 12778 + }, + { + "epoch": 0.81, + "grad_norm": 0.877150297164917, + "learning_rate": 9.208178553709468e-07, + "loss": 0.5834, + "step": 12779 + }, + { + "epoch": 0.81, + "grad_norm": 0.9467854499816895, + "learning_rate": 9.202246268231274e-07, + "loss": 0.6122, + "step": 12780 + }, + { + "epoch": 0.81, + "grad_norm": 0.9263243079185486, + "learning_rate": 9.196315700604564e-07, + "loss": 0.635, + "step": 12781 + }, + { + "epoch": 0.81, + "grad_norm": 0.8572517037391663, + "learning_rate": 9.190386851079053e-07, + "loss": 0.5614, + "step": 12782 + }, + { + "epoch": 0.81, + "grad_norm": 0.8789429664611816, + "learning_rate": 9.184459719904388e-07, + "loss": 0.5525, + "step": 12783 + }, + { + "epoch": 0.81, + "grad_norm": 0.8996034264564514, + "learning_rate": 9.178534307330145e-07, + "loss": 0.5553, + "step": 12784 + }, + { + "epoch": 0.81, + "grad_norm": 0.8926593661308289, + "learning_rate": 9.17261061360581e-07, + "loss": 0.603, + "step": 12785 + }, + { + "epoch": 0.81, + "grad_norm": 0.9206883311271667, + "learning_rate": 9.166688638980791e-07, + "loss": 0.5725, + "step": 12786 + }, + { + "epoch": 0.81, + "grad_norm": 0.8996316194534302, + "learning_rate": 9.160768383704499e-07, + "loss": 0.5316, + "step": 12787 + }, + { + "epoch": 0.81, + "grad_norm": 0.8590518236160278, + "learning_rate": 9.154849848026165e-07, + "loss": 0.5715, + "step": 12788 + }, + { + "epoch": 0.81, + "grad_norm": 0.8883064389228821, + "learning_rate": 9.148933032195013e-07, + "loss": 0.5745, + "step": 12789 + }, + { + "epoch": 0.81, + "grad_norm": 0.9011886119842529, + "learning_rate": 9.14301793646018e-07, + "loss": 0.5894, + "step": 12790 + }, + { + "epoch": 0.81, + "grad_norm": 0.8296880722045898, + "learning_rate": 9.137104561070736e-07, + "loss": 0.5376, + "step": 12791 + }, + { + "epoch": 0.81, + "grad_norm": 0.8713788986206055, + "learning_rate": 9.13119290627566e-07, + "loss": 0.5824, + "step": 12792 + }, + { + "epoch": 0.81, + "grad_norm": 0.8919610977172852, + "learning_rate": 9.125282972323895e-07, + "loss": 0.5717, + "step": 12793 + }, + { + "epoch": 0.81, + "grad_norm": 0.9087851643562317, + "learning_rate": 9.119374759464261e-07, + "loss": 0.5855, + "step": 12794 + }, + { + "epoch": 0.81, + "grad_norm": 0.8336042165756226, + "learning_rate": 9.113468267945541e-07, + "loss": 0.5096, + "step": 12795 + }, + { + "epoch": 0.81, + "grad_norm": 0.8984754681587219, + "learning_rate": 9.107563498016436e-07, + "loss": 0.6249, + "step": 12796 + }, + { + "epoch": 0.81, + "grad_norm": 0.9276543855667114, + "learning_rate": 9.101660449925576e-07, + "loss": 0.6166, + "step": 12797 + }, + { + "epoch": 0.81, + "grad_norm": 0.9266611933708191, + "learning_rate": 9.095759123921538e-07, + "loss": 0.5569, + "step": 12798 + }, + { + "epoch": 0.81, + "grad_norm": 0.8445834517478943, + "learning_rate": 9.089859520252759e-07, + "loss": 0.536, + "step": 12799 + }, + { + "epoch": 0.81, + "grad_norm": 1.0016990900039673, + "learning_rate": 9.083961639167693e-07, + "loss": 0.624, + "step": 12800 + }, + { + "epoch": 0.81, + "grad_norm": 0.9784378409385681, + "learning_rate": 9.078065480914678e-07, + "loss": 0.6467, + "step": 12801 + }, + { + "epoch": 0.81, + "grad_norm": 0.9219988584518433, + "learning_rate": 9.072171045741957e-07, + "loss": 0.5891, + "step": 12802 + }, + { + "epoch": 0.81, + "grad_norm": 0.9053341150283813, + "learning_rate": 9.066278333897732e-07, + "loss": 0.6182, + "step": 12803 + }, + { + "epoch": 0.81, + "grad_norm": 0.9229487776756287, + "learning_rate": 9.060387345630134e-07, + "loss": 0.5547, + "step": 12804 + }, + { + "epoch": 0.81, + "grad_norm": 0.8746492266654968, + "learning_rate": 9.054498081187202e-07, + "loss": 0.5368, + "step": 12805 + }, + { + "epoch": 0.81, + "grad_norm": 0.9092094898223877, + "learning_rate": 9.048610540816932e-07, + "loss": 0.5661, + "step": 12806 + }, + { + "epoch": 0.81, + "grad_norm": 0.8250091671943665, + "learning_rate": 9.042724724767199e-07, + "loss": 0.5977, + "step": 12807 + }, + { + "epoch": 0.81, + "grad_norm": 0.856377363204956, + "learning_rate": 9.036840633285837e-07, + "loss": 0.5594, + "step": 12808 + }, + { + "epoch": 0.81, + "grad_norm": 0.9337197542190552, + "learning_rate": 9.030958266620637e-07, + "loss": 0.6058, + "step": 12809 + }, + { + "epoch": 0.81, + "grad_norm": 0.9406629204750061, + "learning_rate": 9.025077625019252e-07, + "loss": 0.5506, + "step": 12810 + }, + { + "epoch": 0.81, + "grad_norm": 0.8978514671325684, + "learning_rate": 9.01919870872931e-07, + "loss": 0.6085, + "step": 12811 + }, + { + "epoch": 0.81, + "grad_norm": 0.8674015998840332, + "learning_rate": 9.013321517998347e-07, + "loss": 0.5683, + "step": 12812 + }, + { + "epoch": 0.81, + "grad_norm": 0.94971764087677, + "learning_rate": 9.007446053073832e-07, + "loss": 0.6416, + "step": 12813 + }, + { + "epoch": 0.81, + "grad_norm": 0.836727499961853, + "learning_rate": 9.001572314203172e-07, + "loss": 0.5604, + "step": 12814 + }, + { + "epoch": 0.81, + "grad_norm": 0.9299215078353882, + "learning_rate": 8.99570030163367e-07, + "loss": 0.5895, + "step": 12815 + }, + { + "epoch": 0.81, + "grad_norm": 0.8771916031837463, + "learning_rate": 8.989830015612566e-07, + "loss": 0.5793, + "step": 12816 + }, + { + "epoch": 0.81, + "grad_norm": 0.8739469647407532, + "learning_rate": 8.983961456387086e-07, + "loss": 0.6095, + "step": 12817 + }, + { + "epoch": 0.81, + "grad_norm": 0.899440586566925, + "learning_rate": 8.978094624204292e-07, + "loss": 0.5347, + "step": 12818 + }, + { + "epoch": 0.81, + "grad_norm": 0.916681706905365, + "learning_rate": 8.972229519311227e-07, + "loss": 0.5969, + "step": 12819 + }, + { + "epoch": 0.81, + "grad_norm": 0.8973095417022705, + "learning_rate": 8.966366141954852e-07, + "loss": 0.6042, + "step": 12820 + }, + { + "epoch": 0.81, + "grad_norm": 0.9100470542907715, + "learning_rate": 8.960504492382055e-07, + "loss": 0.5789, + "step": 12821 + }, + { + "epoch": 0.81, + "grad_norm": 0.8430030345916748, + "learning_rate": 8.95464457083966e-07, + "loss": 0.5646, + "step": 12822 + }, + { + "epoch": 0.81, + "grad_norm": 0.869049072265625, + "learning_rate": 8.948786377574382e-07, + "loss": 0.5889, + "step": 12823 + }, + { + "epoch": 0.81, + "grad_norm": 0.8816308379173279, + "learning_rate": 8.942929912832904e-07, + "loss": 0.5535, + "step": 12824 + }, + { + "epoch": 0.81, + "grad_norm": 0.8329145908355713, + "learning_rate": 8.93707517686182e-07, + "loss": 0.5467, + "step": 12825 + }, + { + "epoch": 0.81, + "grad_norm": 0.8186325430870056, + "learning_rate": 8.93122216990765e-07, + "loss": 0.5437, + "step": 12826 + }, + { + "epoch": 0.81, + "grad_norm": 0.9899107813835144, + "learning_rate": 8.92537089221685e-07, + "loss": 0.6088, + "step": 12827 + }, + { + "epoch": 0.81, + "grad_norm": 0.8372784852981567, + "learning_rate": 8.919521344035808e-07, + "loss": 0.5536, + "step": 12828 + }, + { + "epoch": 0.81, + "grad_norm": 0.8933220505714417, + "learning_rate": 8.913673525610783e-07, + "loss": 0.5864, + "step": 12829 + }, + { + "epoch": 0.81, + "grad_norm": 0.8942568898200989, + "learning_rate": 8.907827437188065e-07, + "loss": 0.5824, + "step": 12830 + }, + { + "epoch": 0.81, + "grad_norm": 0.8576558232307434, + "learning_rate": 8.901983079013771e-07, + "loss": 0.5339, + "step": 12831 + }, + { + "epoch": 0.81, + "grad_norm": 0.892993152141571, + "learning_rate": 8.896140451334001e-07, + "loss": 0.5865, + "step": 12832 + }, + { + "epoch": 0.81, + "grad_norm": 0.854968786239624, + "learning_rate": 8.890299554394766e-07, + "loss": 0.5602, + "step": 12833 + }, + { + "epoch": 0.81, + "grad_norm": 0.8966131806373596, + "learning_rate": 8.884460388442006e-07, + "loss": 0.5696, + "step": 12834 + }, + { + "epoch": 0.81, + "grad_norm": 0.9169580936431885, + "learning_rate": 8.878622953721589e-07, + "loss": 0.5467, + "step": 12835 + }, + { + "epoch": 0.81, + "grad_norm": 0.867084801197052, + "learning_rate": 8.87278725047932e-07, + "loss": 0.5794, + "step": 12836 + }, + { + "epoch": 0.81, + "grad_norm": 0.9113507866859436, + "learning_rate": 8.866953278960888e-07, + "loss": 0.5922, + "step": 12837 + }, + { + "epoch": 0.81, + "grad_norm": 0.8957472443580627, + "learning_rate": 8.86112103941198e-07, + "loss": 0.5407, + "step": 12838 + }, + { + "epoch": 0.81, + "grad_norm": 0.8440329432487488, + "learning_rate": 8.855290532078148e-07, + "loss": 0.507, + "step": 12839 + }, + { + "epoch": 0.81, + "grad_norm": 0.8923792839050293, + "learning_rate": 8.849461757204897e-07, + "loss": 0.6195, + "step": 12840 + }, + { + "epoch": 0.81, + "grad_norm": 0.8741909861564636, + "learning_rate": 8.843634715037669e-07, + "loss": 0.5827, + "step": 12841 + }, + { + "epoch": 0.81, + "grad_norm": 0.8744585514068604, + "learning_rate": 8.83780940582179e-07, + "loss": 0.6142, + "step": 12842 + }, + { + "epoch": 0.81, + "grad_norm": 0.8400830030441284, + "learning_rate": 8.83198582980257e-07, + "loss": 0.5726, + "step": 12843 + }, + { + "epoch": 0.81, + "grad_norm": 0.9461512565612793, + "learning_rate": 8.826163987225233e-07, + "loss": 0.5863, + "step": 12844 + }, + { + "epoch": 0.81, + "grad_norm": 0.9131925106048584, + "learning_rate": 8.82034387833488e-07, + "loss": 0.5959, + "step": 12845 + }, + { + "epoch": 0.81, + "grad_norm": 0.9167430996894836, + "learning_rate": 8.814525503376597e-07, + "loss": 0.5696, + "step": 12846 + }, + { + "epoch": 0.81, + "grad_norm": 0.8956706523895264, + "learning_rate": 8.808708862595367e-07, + "loss": 0.561, + "step": 12847 + }, + { + "epoch": 0.81, + "grad_norm": 0.8876976370811462, + "learning_rate": 8.802893956236114e-07, + "loss": 0.5375, + "step": 12848 + }, + { + "epoch": 0.81, + "grad_norm": 0.9218643307685852, + "learning_rate": 8.797080784543699e-07, + "loss": 0.561, + "step": 12849 + }, + { + "epoch": 0.81, + "grad_norm": 0.9845806956291199, + "learning_rate": 8.791269347762849e-07, + "loss": 0.5885, + "step": 12850 + }, + { + "epoch": 0.81, + "grad_norm": 0.8306980729103088, + "learning_rate": 8.785459646138306e-07, + "loss": 0.5472, + "step": 12851 + }, + { + "epoch": 0.81, + "grad_norm": 0.867559015750885, + "learning_rate": 8.779651679914692e-07, + "loss": 0.5377, + "step": 12852 + }, + { + "epoch": 0.81, + "grad_norm": 0.8420113921165466, + "learning_rate": 8.773845449336537e-07, + "loss": 0.5777, + "step": 12853 + }, + { + "epoch": 0.81, + "grad_norm": 0.9076850414276123, + "learning_rate": 8.768040954648338e-07, + "loss": 0.6205, + "step": 12854 + }, + { + "epoch": 0.81, + "grad_norm": 0.8947234153747559, + "learning_rate": 8.762238196094502e-07, + "loss": 0.5319, + "step": 12855 + }, + { + "epoch": 0.81, + "grad_norm": 0.9484972357749939, + "learning_rate": 8.756437173919352e-07, + "loss": 0.5909, + "step": 12856 + }, + { + "epoch": 0.81, + "grad_norm": 0.8586333990097046, + "learning_rate": 8.750637888367164e-07, + "loss": 0.5382, + "step": 12857 + }, + { + "epoch": 0.81, + "grad_norm": 0.8808966875076294, + "learning_rate": 8.744840339682126e-07, + "loss": 0.5391, + "step": 12858 + }, + { + "epoch": 0.81, + "grad_norm": 0.8085102438926697, + "learning_rate": 8.73904452810832e-07, + "loss": 0.5483, + "step": 12859 + }, + { + "epoch": 0.81, + "grad_norm": 0.9202531576156616, + "learning_rate": 8.733250453889841e-07, + "loss": 0.5316, + "step": 12860 + }, + { + "epoch": 0.81, + "grad_norm": 0.907964289188385, + "learning_rate": 8.727458117270615e-07, + "loss": 0.5327, + "step": 12861 + }, + { + "epoch": 0.81, + "grad_norm": 0.9174656867980957, + "learning_rate": 8.721667518494553e-07, + "loss": 0.5938, + "step": 12862 + }, + { + "epoch": 0.81, + "grad_norm": 0.8944279551506042, + "learning_rate": 8.715878657805471e-07, + "loss": 0.5871, + "step": 12863 + }, + { + "epoch": 0.82, + "grad_norm": 0.8624773025512695, + "learning_rate": 8.710091535447123e-07, + "loss": 0.5784, + "step": 12864 + }, + { + "epoch": 0.82, + "grad_norm": 0.8640050888061523, + "learning_rate": 8.704306151663184e-07, + "loss": 0.536, + "step": 12865 + }, + { + "epoch": 0.82, + "grad_norm": 0.8937069177627563, + "learning_rate": 8.698522506697271e-07, + "loss": 0.5738, + "step": 12866 + }, + { + "epoch": 0.82, + "grad_norm": 0.8589310646057129, + "learning_rate": 8.692740600792871e-07, + "loss": 0.5897, + "step": 12867 + }, + { + "epoch": 0.82, + "grad_norm": 0.8561339378356934, + "learning_rate": 8.686960434193486e-07, + "loss": 0.5341, + "step": 12868 + }, + { + "epoch": 0.82, + "grad_norm": 0.9550122618675232, + "learning_rate": 8.681182007142475e-07, + "loss": 0.5872, + "step": 12869 + }, + { + "epoch": 0.82, + "grad_norm": 0.9209311008453369, + "learning_rate": 8.675405319883146e-07, + "loss": 0.5488, + "step": 12870 + }, + { + "epoch": 0.82, + "grad_norm": 0.9099619388580322, + "learning_rate": 8.66963037265876e-07, + "loss": 0.5861, + "step": 12871 + }, + { + "epoch": 0.82, + "grad_norm": 0.7935923337936401, + "learning_rate": 8.663857165712431e-07, + "loss": 0.504, + "step": 12872 + }, + { + "epoch": 0.82, + "grad_norm": 0.8865057229995728, + "learning_rate": 8.658085699287294e-07, + "loss": 0.5812, + "step": 12873 + }, + { + "epoch": 0.82, + "grad_norm": 0.8969137668609619, + "learning_rate": 8.652315973626362e-07, + "loss": 0.5974, + "step": 12874 + }, + { + "epoch": 0.82, + "grad_norm": 0.8373164534568787, + "learning_rate": 8.646547988972553e-07, + "loss": 0.5351, + "step": 12875 + }, + { + "epoch": 0.82, + "grad_norm": 0.8645594716072083, + "learning_rate": 8.64078174556875e-07, + "loss": 0.5746, + "step": 12876 + }, + { + "epoch": 0.82, + "grad_norm": 0.921709418296814, + "learning_rate": 8.635017243657751e-07, + "loss": 0.5987, + "step": 12877 + }, + { + "epoch": 0.82, + "grad_norm": 0.9052848815917969, + "learning_rate": 8.629254483482274e-07, + "loss": 0.5244, + "step": 12878 + }, + { + "epoch": 0.82, + "grad_norm": 0.8511383533477783, + "learning_rate": 8.623493465284987e-07, + "loss": 0.505, + "step": 12879 + }, + { + "epoch": 0.82, + "grad_norm": 0.893326997756958, + "learning_rate": 8.61773418930843e-07, + "loss": 0.5679, + "step": 12880 + }, + { + "epoch": 0.82, + "grad_norm": 0.9681572914123535, + "learning_rate": 8.611976655795135e-07, + "loss": 0.6534, + "step": 12881 + }, + { + "epoch": 0.82, + "grad_norm": 0.9120391607284546, + "learning_rate": 8.606220864987541e-07, + "loss": 0.5988, + "step": 12882 + }, + { + "epoch": 0.82, + "grad_norm": 0.8803929686546326, + "learning_rate": 8.600466817127972e-07, + "loss": 0.5486, + "step": 12883 + }, + { + "epoch": 0.82, + "grad_norm": 0.9291055798530579, + "learning_rate": 8.59471451245873e-07, + "loss": 0.5445, + "step": 12884 + }, + { + "epoch": 0.82, + "grad_norm": 0.8664212226867676, + "learning_rate": 8.588963951222024e-07, + "loss": 0.5972, + "step": 12885 + }, + { + "epoch": 0.82, + "grad_norm": 0.8658425211906433, + "learning_rate": 8.583215133659983e-07, + "loss": 0.6289, + "step": 12886 + }, + { + "epoch": 0.82, + "grad_norm": 0.8955614566802979, + "learning_rate": 8.577468060014688e-07, + "loss": 0.5883, + "step": 12887 + }, + { + "epoch": 0.82, + "grad_norm": 0.9566403031349182, + "learning_rate": 8.571722730528098e-07, + "loss": 0.6358, + "step": 12888 + }, + { + "epoch": 0.82, + "grad_norm": 0.8918949365615845, + "learning_rate": 8.565979145442138e-07, + "loss": 0.5786, + "step": 12889 + }, + { + "epoch": 0.82, + "grad_norm": 0.8985361456871033, + "learning_rate": 8.560237304998681e-07, + "loss": 0.5742, + "step": 12890 + }, + { + "epoch": 0.82, + "grad_norm": 0.9355623722076416, + "learning_rate": 8.554497209439461e-07, + "loss": 0.6009, + "step": 12891 + }, + { + "epoch": 0.82, + "grad_norm": 0.8992531299591064, + "learning_rate": 8.548758859006184e-07, + "loss": 0.5768, + "step": 12892 + }, + { + "epoch": 0.82, + "grad_norm": 0.9270733594894409, + "learning_rate": 8.543022253940475e-07, + "loss": 0.5583, + "step": 12893 + }, + { + "epoch": 0.82, + "grad_norm": 0.8681014180183411, + "learning_rate": 8.537287394483878e-07, + "loss": 0.5807, + "step": 12894 + }, + { + "epoch": 0.82, + "grad_norm": 0.9098723530769348, + "learning_rate": 8.531554280877885e-07, + "loss": 0.5598, + "step": 12895 + }, + { + "epoch": 0.82, + "grad_norm": 0.9069850444793701, + "learning_rate": 8.525822913363868e-07, + "loss": 0.6112, + "step": 12896 + }, + { + "epoch": 0.82, + "grad_norm": 0.8686051368713379, + "learning_rate": 8.520093292183163e-07, + "loss": 0.5605, + "step": 12897 + }, + { + "epoch": 0.82, + "grad_norm": 0.9454940557479858, + "learning_rate": 8.514365417577048e-07, + "loss": 0.5387, + "step": 12898 + }, + { + "epoch": 0.82, + "grad_norm": 0.8750715851783752, + "learning_rate": 8.50863928978668e-07, + "loss": 0.5455, + "step": 12899 + }, + { + "epoch": 0.82, + "grad_norm": 0.9609119892120361, + "learning_rate": 8.502914909053173e-07, + "loss": 0.5859, + "step": 12900 + }, + { + "epoch": 0.82, + "grad_norm": 0.8676950931549072, + "learning_rate": 8.497192275617577e-07, + "loss": 0.5496, + "step": 12901 + }, + { + "epoch": 0.82, + "grad_norm": 0.8623301386833191, + "learning_rate": 8.491471389720807e-07, + "loss": 0.6052, + "step": 12902 + }, + { + "epoch": 0.82, + "grad_norm": 0.9404549598693848, + "learning_rate": 8.485752251603807e-07, + "loss": 0.5788, + "step": 12903 + }, + { + "epoch": 0.82, + "grad_norm": 0.9115918278694153, + "learning_rate": 8.480034861507347e-07, + "loss": 0.5342, + "step": 12904 + }, + { + "epoch": 0.82, + "grad_norm": 0.8364629149436951, + "learning_rate": 8.474319219672183e-07, + "loss": 0.5695, + "step": 12905 + }, + { + "epoch": 0.82, + "grad_norm": 0.8777880072593689, + "learning_rate": 8.46860532633898e-07, + "loss": 0.5487, + "step": 12906 + }, + { + "epoch": 0.82, + "grad_norm": 0.9011834263801575, + "learning_rate": 8.462893181748327e-07, + "loss": 0.5618, + "step": 12907 + }, + { + "epoch": 0.82, + "grad_norm": 0.8608363270759583, + "learning_rate": 8.457182786140744e-07, + "loss": 0.5919, + "step": 12908 + }, + { + "epoch": 0.82, + "grad_norm": 0.9006455540657043, + "learning_rate": 8.451474139756693e-07, + "loss": 0.6024, + "step": 12909 + }, + { + "epoch": 0.82, + "grad_norm": 0.8328776955604553, + "learning_rate": 8.445767242836506e-07, + "loss": 0.5455, + "step": 12910 + }, + { + "epoch": 0.82, + "grad_norm": 0.859550416469574, + "learning_rate": 8.440062095620527e-07, + "loss": 0.5565, + "step": 12911 + }, + { + "epoch": 0.82, + "grad_norm": 0.8993778824806213, + "learning_rate": 8.434358698348944e-07, + "loss": 0.598, + "step": 12912 + }, + { + "epoch": 0.82, + "grad_norm": 0.8660597205162048, + "learning_rate": 8.428657051261918e-07, + "loss": 0.5732, + "step": 12913 + }, + { + "epoch": 0.82, + "grad_norm": 0.8744674324989319, + "learning_rate": 8.422957154599526e-07, + "loss": 0.5754, + "step": 12914 + }, + { + "epoch": 0.82, + "grad_norm": 0.9497204422950745, + "learning_rate": 8.417259008601775e-07, + "loss": 0.5412, + "step": 12915 + }, + { + "epoch": 0.82, + "grad_norm": 0.8864256739616394, + "learning_rate": 8.411562613508595e-07, + "loss": 0.5603, + "step": 12916 + }, + { + "epoch": 0.82, + "grad_norm": 0.959272563457489, + "learning_rate": 8.405867969559845e-07, + "loss": 0.5884, + "step": 12917 + }, + { + "epoch": 0.82, + "grad_norm": 0.8853299021720886, + "learning_rate": 8.400175076995287e-07, + "loss": 0.5456, + "step": 12918 + }, + { + "epoch": 0.82, + "grad_norm": 0.8390821218490601, + "learning_rate": 8.394483936054643e-07, + "loss": 0.5739, + "step": 12919 + }, + { + "epoch": 0.82, + "grad_norm": 0.8850178122520447, + "learning_rate": 8.388794546977546e-07, + "loss": 0.5718, + "step": 12920 + }, + { + "epoch": 0.82, + "grad_norm": 0.9476692080497742, + "learning_rate": 8.383106910003552e-07, + "loss": 0.5619, + "step": 12921 + }, + { + "epoch": 0.82, + "grad_norm": 0.9127770066261292, + "learning_rate": 8.377421025372157e-07, + "loss": 0.5741, + "step": 12922 + }, + { + "epoch": 0.82, + "grad_norm": 0.8317306041717529, + "learning_rate": 8.371736893322763e-07, + "loss": 0.4702, + "step": 12923 + }, + { + "epoch": 0.82, + "grad_norm": 0.87800532579422, + "learning_rate": 8.366054514094718e-07, + "loss": 0.5637, + "step": 12924 + }, + { + "epoch": 0.82, + "grad_norm": 0.8989687561988831, + "learning_rate": 8.360373887927298e-07, + "loss": 0.5926, + "step": 12925 + }, + { + "epoch": 0.82, + "grad_norm": 0.9448102712631226, + "learning_rate": 8.35469501505966e-07, + "loss": 0.5936, + "step": 12926 + }, + { + "epoch": 0.82, + "grad_norm": 0.8820131421089172, + "learning_rate": 8.349017895730948e-07, + "loss": 0.5735, + "step": 12927 + }, + { + "epoch": 0.82, + "grad_norm": 0.9099850654602051, + "learning_rate": 8.343342530180198e-07, + "loss": 0.5738, + "step": 12928 + }, + { + "epoch": 0.82, + "grad_norm": 0.9121573567390442, + "learning_rate": 8.33766891864638e-07, + "loss": 0.5523, + "step": 12929 + }, + { + "epoch": 0.82, + "grad_norm": 0.9185227155685425, + "learning_rate": 8.331997061368391e-07, + "loss": 0.5919, + "step": 12930 + }, + { + "epoch": 0.82, + "grad_norm": 0.8707922101020813, + "learning_rate": 8.326326958585062e-07, + "loss": 0.6125, + "step": 12931 + }, + { + "epoch": 0.82, + "grad_norm": 0.8843598365783691, + "learning_rate": 8.320658610535115e-07, + "loss": 0.5889, + "step": 12932 + }, + { + "epoch": 0.82, + "grad_norm": 0.903973400592804, + "learning_rate": 8.314992017457263e-07, + "loss": 0.5731, + "step": 12933 + }, + { + "epoch": 0.82, + "grad_norm": 0.8613129258155823, + "learning_rate": 8.30932717959007e-07, + "loss": 0.5461, + "step": 12934 + }, + { + "epoch": 0.82, + "grad_norm": 0.912260890007019, + "learning_rate": 8.303664097172087e-07, + "loss": 0.5855, + "step": 12935 + }, + { + "epoch": 0.82, + "grad_norm": 0.8741612434387207, + "learning_rate": 8.298002770441749e-07, + "loss": 0.5981, + "step": 12936 + }, + { + "epoch": 0.82, + "grad_norm": 0.8477001190185547, + "learning_rate": 8.292343199637448e-07, + "loss": 0.5833, + "step": 12937 + }, + { + "epoch": 0.82, + "grad_norm": 0.8845143914222717, + "learning_rate": 8.286685384997484e-07, + "loss": 0.5903, + "step": 12938 + }, + { + "epoch": 0.82, + "grad_norm": 0.933994472026825, + "learning_rate": 8.281029326760104e-07, + "loss": 0.5752, + "step": 12939 + }, + { + "epoch": 0.82, + "grad_norm": 0.8114098310470581, + "learning_rate": 8.275375025163418e-07, + "loss": 0.5763, + "step": 12940 + }, + { + "epoch": 0.82, + "grad_norm": 0.9245671033859253, + "learning_rate": 8.269722480445569e-07, + "loss": 0.6329, + "step": 12941 + }, + { + "epoch": 0.82, + "grad_norm": 0.9412350058555603, + "learning_rate": 8.264071692844527e-07, + "loss": 0.5522, + "step": 12942 + }, + { + "epoch": 0.82, + "grad_norm": 0.8871721625328064, + "learning_rate": 8.258422662598231e-07, + "loss": 0.5602, + "step": 12943 + }, + { + "epoch": 0.82, + "grad_norm": 0.9519109725952148, + "learning_rate": 8.252775389944556e-07, + "loss": 0.5704, + "step": 12944 + }, + { + "epoch": 0.82, + "grad_norm": 0.9257845282554626, + "learning_rate": 8.247129875121274e-07, + "loss": 0.6062, + "step": 12945 + }, + { + "epoch": 0.82, + "grad_norm": 0.9066646695137024, + "learning_rate": 8.24148611836611e-07, + "loss": 0.556, + "step": 12946 + }, + { + "epoch": 0.82, + "grad_norm": 0.8821330070495605, + "learning_rate": 8.235844119916708e-07, + "loss": 0.5509, + "step": 12947 + }, + { + "epoch": 0.82, + "grad_norm": 1.1398460865020752, + "learning_rate": 8.230203880010612e-07, + "loss": 0.588, + "step": 12948 + }, + { + "epoch": 0.82, + "grad_norm": 0.8532936573028564, + "learning_rate": 8.224565398885325e-07, + "loss": 0.5312, + "step": 12949 + }, + { + "epoch": 0.82, + "grad_norm": 0.8935076594352722, + "learning_rate": 8.218928676778264e-07, + "loss": 0.5871, + "step": 12950 + }, + { + "epoch": 0.82, + "grad_norm": 0.8617026209831238, + "learning_rate": 8.213293713926767e-07, + "loss": 0.5039, + "step": 12951 + }, + { + "epoch": 0.82, + "grad_norm": 0.9438952207565308, + "learning_rate": 8.207660510568122e-07, + "loss": 0.6125, + "step": 12952 + }, + { + "epoch": 0.82, + "grad_norm": 0.8180469274520874, + "learning_rate": 8.202029066939482e-07, + "loss": 0.5147, + "step": 12953 + }, + { + "epoch": 0.82, + "grad_norm": 0.8670182824134827, + "learning_rate": 8.196399383278004e-07, + "loss": 0.6175, + "step": 12954 + }, + { + "epoch": 0.82, + "grad_norm": 0.9703617691993713, + "learning_rate": 8.190771459820739e-07, + "loss": 0.6071, + "step": 12955 + }, + { + "epoch": 0.82, + "grad_norm": 0.9828335046768188, + "learning_rate": 8.18514529680463e-07, + "loss": 0.6214, + "step": 12956 + }, + { + "epoch": 0.82, + "grad_norm": 0.8318359851837158, + "learning_rate": 8.179520894466592e-07, + "loss": 0.5637, + "step": 12957 + }, + { + "epoch": 0.82, + "grad_norm": 0.8575620651245117, + "learning_rate": 8.173898253043444e-07, + "loss": 0.5122, + "step": 12958 + }, + { + "epoch": 0.82, + "grad_norm": 0.8485636115074158, + "learning_rate": 8.168277372771937e-07, + "loss": 0.5165, + "step": 12959 + }, + { + "epoch": 0.82, + "grad_norm": 0.8538296222686768, + "learning_rate": 8.162658253888761e-07, + "loss": 0.6073, + "step": 12960 + }, + { + "epoch": 0.82, + "grad_norm": 0.8725820779800415, + "learning_rate": 8.157040896630481e-07, + "loss": 0.5341, + "step": 12961 + }, + { + "epoch": 0.82, + "grad_norm": 0.855991780757904, + "learning_rate": 8.151425301233656e-07, + "loss": 0.5491, + "step": 12962 + }, + { + "epoch": 0.82, + "grad_norm": 0.9150635600090027, + "learning_rate": 8.14581146793475e-07, + "loss": 0.5929, + "step": 12963 + }, + { + "epoch": 0.82, + "grad_norm": 0.9065380692481995, + "learning_rate": 8.140199396970106e-07, + "loss": 0.5817, + "step": 12964 + }, + { + "epoch": 0.82, + "grad_norm": 0.8524861335754395, + "learning_rate": 8.13458908857605e-07, + "loss": 0.5101, + "step": 12965 + }, + { + "epoch": 0.82, + "grad_norm": 0.8974103331565857, + "learning_rate": 8.128980542988801e-07, + "loss": 0.5379, + "step": 12966 + }, + { + "epoch": 0.82, + "grad_norm": 0.8953040242195129, + "learning_rate": 8.12337376044453e-07, + "loss": 0.5447, + "step": 12967 + }, + { + "epoch": 0.82, + "grad_norm": 0.9523823261260986, + "learning_rate": 8.117768741179322e-07, + "loss": 0.6085, + "step": 12968 + }, + { + "epoch": 0.82, + "grad_norm": 0.8712965846061707, + "learning_rate": 8.112165485429163e-07, + "loss": 0.5753, + "step": 12969 + }, + { + "epoch": 0.82, + "grad_norm": 0.9363554120063782, + "learning_rate": 8.106563993429983e-07, + "loss": 0.5624, + "step": 12970 + }, + { + "epoch": 0.82, + "grad_norm": 0.8442745208740234, + "learning_rate": 8.100964265417682e-07, + "loss": 0.5491, + "step": 12971 + }, + { + "epoch": 0.82, + "grad_norm": 0.9169662594795227, + "learning_rate": 8.09536630162801e-07, + "loss": 0.5917, + "step": 12972 + }, + { + "epoch": 0.82, + "grad_norm": 0.948613166809082, + "learning_rate": 8.089770102296685e-07, + "loss": 0.5397, + "step": 12973 + }, + { + "epoch": 0.82, + "grad_norm": 0.8877300024032593, + "learning_rate": 8.084175667659345e-07, + "loss": 0.5818, + "step": 12974 + }, + { + "epoch": 0.82, + "grad_norm": 0.8682299852371216, + "learning_rate": 8.078582997951556e-07, + "loss": 0.5694, + "step": 12975 + }, + { + "epoch": 0.82, + "grad_norm": 0.8772991299629211, + "learning_rate": 8.072992093408816e-07, + "loss": 0.6045, + "step": 12976 + }, + { + "epoch": 0.82, + "grad_norm": 0.8861331343650818, + "learning_rate": 8.067402954266512e-07, + "loss": 0.6145, + "step": 12977 + }, + { + "epoch": 0.82, + "grad_norm": 0.8956562876701355, + "learning_rate": 8.061815580759996e-07, + "loss": 0.5567, + "step": 12978 + }, + { + "epoch": 0.82, + "grad_norm": 0.8872475624084473, + "learning_rate": 8.056229973124529e-07, + "loss": 0.5827, + "step": 12979 + }, + { + "epoch": 0.82, + "grad_norm": 0.8420911431312561, + "learning_rate": 8.050646131595313e-07, + "loss": 0.5742, + "step": 12980 + }, + { + "epoch": 0.82, + "grad_norm": 0.8587638735771179, + "learning_rate": 8.045064056407453e-07, + "loss": 0.5755, + "step": 12981 + }, + { + "epoch": 0.82, + "grad_norm": 0.8562715649604797, + "learning_rate": 8.039483747796012e-07, + "loss": 0.5786, + "step": 12982 + }, + { + "epoch": 0.82, + "grad_norm": 0.8843387365341187, + "learning_rate": 8.033905205995913e-07, + "loss": 0.5752, + "step": 12983 + }, + { + "epoch": 0.82, + "grad_norm": 0.8959712982177734, + "learning_rate": 8.0283284312421e-07, + "loss": 0.5532, + "step": 12984 + }, + { + "epoch": 0.82, + "grad_norm": 0.8698373436927795, + "learning_rate": 8.022753423769359e-07, + "loss": 0.587, + "step": 12985 + }, + { + "epoch": 0.82, + "grad_norm": 0.8483936190605164, + "learning_rate": 8.017180183812439e-07, + "loss": 0.5502, + "step": 12986 + }, + { + "epoch": 0.82, + "grad_norm": 0.866079568862915, + "learning_rate": 8.011608711606017e-07, + "loss": 0.5588, + "step": 12987 + }, + { + "epoch": 0.82, + "grad_norm": 0.8948245048522949, + "learning_rate": 8.006039007384681e-07, + "loss": 0.5838, + "step": 12988 + }, + { + "epoch": 0.82, + "grad_norm": 0.9978700876235962, + "learning_rate": 8.000471071382959e-07, + "loss": 0.6032, + "step": 12989 + }, + { + "epoch": 0.82, + "grad_norm": 0.8848072290420532, + "learning_rate": 7.99490490383531e-07, + "loss": 0.5889, + "step": 12990 + }, + { + "epoch": 0.82, + "grad_norm": 0.8934358954429626, + "learning_rate": 7.989340504976062e-07, + "loss": 0.5968, + "step": 12991 + }, + { + "epoch": 0.82, + "grad_norm": 0.8869682550430298, + "learning_rate": 7.983777875039567e-07, + "loss": 0.5398, + "step": 12992 + }, + { + "epoch": 0.82, + "grad_norm": 0.8653879165649414, + "learning_rate": 7.978217014260009e-07, + "loss": 0.5723, + "step": 12993 + }, + { + "epoch": 0.82, + "grad_norm": 0.9040364027023315, + "learning_rate": 7.972657922871546e-07, + "loss": 0.5593, + "step": 12994 + }, + { + "epoch": 0.82, + "grad_norm": 0.8811683058738708, + "learning_rate": 7.967100601108258e-07, + "loss": 0.577, + "step": 12995 + }, + { + "epoch": 0.82, + "grad_norm": 0.8992339968681335, + "learning_rate": 7.961545049204145e-07, + "loss": 0.634, + "step": 12996 + }, + { + "epoch": 0.82, + "grad_norm": 0.8207805156707764, + "learning_rate": 7.955991267393127e-07, + "loss": 0.5304, + "step": 12997 + }, + { + "epoch": 0.82, + "grad_norm": 0.9232082366943359, + "learning_rate": 7.950439255909065e-07, + "loss": 0.6293, + "step": 12998 + }, + { + "epoch": 0.82, + "grad_norm": 0.884673535823822, + "learning_rate": 7.944889014985718e-07, + "loss": 0.5888, + "step": 12999 + }, + { + "epoch": 0.82, + "grad_norm": 0.8796509504318237, + "learning_rate": 7.939340544856783e-07, + "loss": 0.5765, + "step": 13000 + }, + { + "epoch": 0.82, + "grad_norm": 0.8928359746932983, + "learning_rate": 7.933793845755922e-07, + "loss": 0.5899, + "step": 13001 + }, + { + "epoch": 0.82, + "grad_norm": 0.8858817219734192, + "learning_rate": 7.928248917916653e-07, + "loss": 0.573, + "step": 13002 + }, + { + "epoch": 0.82, + "grad_norm": 0.9019994735717773, + "learning_rate": 7.922705761572464e-07, + "loss": 0.5574, + "step": 13003 + }, + { + "epoch": 0.82, + "grad_norm": 0.8664145469665527, + "learning_rate": 7.91716437695676e-07, + "loss": 0.5113, + "step": 13004 + }, + { + "epoch": 0.82, + "grad_norm": 0.91963791847229, + "learning_rate": 7.911624764302872e-07, + "loss": 0.6429, + "step": 13005 + }, + { + "epoch": 0.82, + "grad_norm": 0.9109863042831421, + "learning_rate": 7.906086923844059e-07, + "loss": 0.5727, + "step": 13006 + }, + { + "epoch": 0.82, + "grad_norm": 0.863783597946167, + "learning_rate": 7.900550855813477e-07, + "loss": 0.5765, + "step": 13007 + }, + { + "epoch": 0.82, + "grad_norm": 0.9418416619300842, + "learning_rate": 7.895016560444241e-07, + "loss": 0.5862, + "step": 13008 + }, + { + "epoch": 0.82, + "grad_norm": 0.8956203460693359, + "learning_rate": 7.889484037969403e-07, + "loss": 0.6175, + "step": 13009 + }, + { + "epoch": 0.82, + "grad_norm": 0.8799732327461243, + "learning_rate": 7.883953288621887e-07, + "loss": 0.6195, + "step": 13010 + }, + { + "epoch": 0.82, + "grad_norm": 1.00110924243927, + "learning_rate": 7.878424312634592e-07, + "loss": 0.5845, + "step": 13011 + }, + { + "epoch": 0.82, + "grad_norm": 0.9354737401008606, + "learning_rate": 7.87289711024033e-07, + "loss": 0.592, + "step": 13012 + }, + { + "epoch": 0.82, + "grad_norm": 0.8658231496810913, + "learning_rate": 7.867371681671793e-07, + "loss": 0.6014, + "step": 13013 + }, + { + "epoch": 0.82, + "grad_norm": 0.9071126580238342, + "learning_rate": 7.861848027161694e-07, + "loss": 0.6201, + "step": 13014 + }, + { + "epoch": 0.82, + "grad_norm": 0.8745089769363403, + "learning_rate": 7.856326146942572e-07, + "loss": 0.5287, + "step": 13015 + }, + { + "epoch": 0.82, + "grad_norm": 0.890994131565094, + "learning_rate": 7.85080604124695e-07, + "loss": 0.634, + "step": 13016 + }, + { + "epoch": 0.82, + "grad_norm": 0.8491596579551697, + "learning_rate": 7.845287710307258e-07, + "loss": 0.5487, + "step": 13017 + }, + { + "epoch": 0.82, + "grad_norm": 0.942820131778717, + "learning_rate": 7.839771154355858e-07, + "loss": 0.5933, + "step": 13018 + }, + { + "epoch": 0.82, + "grad_norm": 0.940209686756134, + "learning_rate": 7.834256373625027e-07, + "loss": 0.5907, + "step": 13019 + }, + { + "epoch": 0.82, + "grad_norm": 0.8660345077514648, + "learning_rate": 7.828743368346991e-07, + "loss": 0.5164, + "step": 13020 + }, + { + "epoch": 0.82, + "grad_norm": 0.8865716457366943, + "learning_rate": 7.823232138753845e-07, + "loss": 0.5352, + "step": 13021 + }, + { + "epoch": 0.83, + "grad_norm": 0.9319779872894287, + "learning_rate": 7.817722685077689e-07, + "loss": 0.5374, + "step": 13022 + }, + { + "epoch": 0.83, + "grad_norm": 0.8646177649497986, + "learning_rate": 7.812215007550483e-07, + "loss": 0.5976, + "step": 13023 + }, + { + "epoch": 0.83, + "grad_norm": 0.9318941831588745, + "learning_rate": 7.806709106404142e-07, + "loss": 0.6182, + "step": 13024 + }, + { + "epoch": 0.83, + "grad_norm": 0.9168413281440735, + "learning_rate": 7.801204981870508e-07, + "loss": 0.5817, + "step": 13025 + }, + { + "epoch": 0.83, + "grad_norm": 0.8882789015769958, + "learning_rate": 7.795702634181318e-07, + "loss": 0.5534, + "step": 13026 + }, + { + "epoch": 0.83, + "grad_norm": 0.8667416572570801, + "learning_rate": 7.790202063568276e-07, + "loss": 0.5252, + "step": 13027 + }, + { + "epoch": 0.83, + "grad_norm": 0.8797557353973389, + "learning_rate": 7.784703270263006e-07, + "loss": 0.5719, + "step": 13028 + }, + { + "epoch": 0.83, + "grad_norm": 0.8629273176193237, + "learning_rate": 7.779206254497007e-07, + "loss": 0.5397, + "step": 13029 + }, + { + "epoch": 0.83, + "grad_norm": 0.9070542454719543, + "learning_rate": 7.773711016501762e-07, + "loss": 0.5972, + "step": 13030 + }, + { + "epoch": 0.83, + "grad_norm": 0.8951036930084229, + "learning_rate": 7.76821755650865e-07, + "loss": 0.6304, + "step": 13031 + }, + { + "epoch": 0.83, + "grad_norm": 0.9298555850982666, + "learning_rate": 7.762725874748983e-07, + "loss": 0.5728, + "step": 13032 + }, + { + "epoch": 0.83, + "grad_norm": 0.9324959516525269, + "learning_rate": 7.757235971454008e-07, + "loss": 0.5416, + "step": 13033 + }, + { + "epoch": 0.83, + "grad_norm": 0.8365843296051025, + "learning_rate": 7.751747846854851e-07, + "loss": 0.5546, + "step": 13034 + }, + { + "epoch": 0.83, + "grad_norm": 0.9446489810943604, + "learning_rate": 7.746261501182633e-07, + "loss": 0.5714, + "step": 13035 + }, + { + "epoch": 0.83, + "grad_norm": 0.8774089217185974, + "learning_rate": 7.740776934668365e-07, + "loss": 0.5605, + "step": 13036 + }, + { + "epoch": 0.83, + "grad_norm": 0.8851078152656555, + "learning_rate": 7.73529414754296e-07, + "loss": 0.5426, + "step": 13037 + }, + { + "epoch": 0.83, + "grad_norm": 0.9036283493041992, + "learning_rate": 7.72981314003729e-07, + "loss": 0.5378, + "step": 13038 + }, + { + "epoch": 0.83, + "grad_norm": 0.9143775701522827, + "learning_rate": 7.724333912382143e-07, + "loss": 0.5731, + "step": 13039 + }, + { + "epoch": 0.83, + "grad_norm": 0.8436862230300903, + "learning_rate": 7.718856464808222e-07, + "loss": 0.5392, + "step": 13040 + }, + { + "epoch": 0.83, + "grad_norm": 0.8102920055389404, + "learning_rate": 7.713380797546188e-07, + "loss": 0.5208, + "step": 13041 + }, + { + "epoch": 0.83, + "grad_norm": 0.922103762626648, + "learning_rate": 7.707906910826574e-07, + "loss": 0.5924, + "step": 13042 + }, + { + "epoch": 0.83, + "grad_norm": 0.8845114707946777, + "learning_rate": 7.702434804879861e-07, + "loss": 0.5718, + "step": 13043 + }, + { + "epoch": 0.83, + "grad_norm": 0.91489577293396, + "learning_rate": 7.696964479936497e-07, + "loss": 0.5519, + "step": 13044 + }, + { + "epoch": 0.83, + "grad_norm": 0.8923588991165161, + "learning_rate": 7.691495936226789e-07, + "loss": 0.5516, + "step": 13045 + }, + { + "epoch": 0.83, + "grad_norm": 1.006177544593811, + "learning_rate": 7.686029173981008e-07, + "loss": 0.6134, + "step": 13046 + }, + { + "epoch": 0.83, + "grad_norm": 0.9382014870643616, + "learning_rate": 7.680564193429336e-07, + "loss": 0.5919, + "step": 13047 + }, + { + "epoch": 0.83, + "grad_norm": 0.9472145438194275, + "learning_rate": 7.675100994801888e-07, + "loss": 0.5363, + "step": 13048 + }, + { + "epoch": 0.83, + "grad_norm": 0.8798018097877502, + "learning_rate": 7.669639578328713e-07, + "loss": 0.5514, + "step": 13049 + }, + { + "epoch": 0.83, + "grad_norm": 0.9217506647109985, + "learning_rate": 7.664179944239746e-07, + "loss": 0.5821, + "step": 13050 + }, + { + "epoch": 0.83, + "grad_norm": 0.8195998072624207, + "learning_rate": 7.658722092764876e-07, + "loss": 0.4924, + "step": 13051 + }, + { + "epoch": 0.83, + "grad_norm": 0.892219066619873, + "learning_rate": 7.653266024133943e-07, + "loss": 0.5429, + "step": 13052 + }, + { + "epoch": 0.83, + "grad_norm": 0.9027977585792542, + "learning_rate": 7.647811738576655e-07, + "loss": 0.5846, + "step": 13053 + }, + { + "epoch": 0.83, + "grad_norm": 0.8905366063117981, + "learning_rate": 7.642359236322683e-07, + "loss": 0.5554, + "step": 13054 + }, + { + "epoch": 0.83, + "grad_norm": 0.9175378680229187, + "learning_rate": 7.63690851760161e-07, + "loss": 0.5582, + "step": 13055 + }, + { + "epoch": 0.83, + "grad_norm": 0.945669412612915, + "learning_rate": 7.631459582642947e-07, + "loss": 0.556, + "step": 13056 + }, + { + "epoch": 0.83, + "grad_norm": 0.8952832818031311, + "learning_rate": 7.626012431676138e-07, + "loss": 0.6063, + "step": 13057 + }, + { + "epoch": 0.83, + "grad_norm": 0.8996466994285583, + "learning_rate": 7.620567064930545e-07, + "loss": 0.5752, + "step": 13058 + }, + { + "epoch": 0.83, + "grad_norm": 0.8489691615104675, + "learning_rate": 7.615123482635433e-07, + "loss": 0.5823, + "step": 13059 + }, + { + "epoch": 0.83, + "grad_norm": 0.9028809070587158, + "learning_rate": 7.609681685020026e-07, + "loss": 0.5796, + "step": 13060 + }, + { + "epoch": 0.83, + "grad_norm": 0.8422659039497375, + "learning_rate": 7.604241672313461e-07, + "loss": 0.5435, + "step": 13061 + }, + { + "epoch": 0.83, + "grad_norm": 0.837838888168335, + "learning_rate": 7.59880344474479e-07, + "loss": 0.5687, + "step": 13062 + }, + { + "epoch": 0.83, + "grad_norm": 0.8525023460388184, + "learning_rate": 7.593367002543018e-07, + "loss": 0.5606, + "step": 13063 + }, + { + "epoch": 0.83, + "grad_norm": 0.8720320463180542, + "learning_rate": 7.587932345937016e-07, + "loss": 0.5699, + "step": 13064 + }, + { + "epoch": 0.83, + "grad_norm": 0.8558526635169983, + "learning_rate": 7.582499475155653e-07, + "loss": 0.6107, + "step": 13065 + }, + { + "epoch": 0.83, + "grad_norm": 0.8588683009147644, + "learning_rate": 7.577068390427689e-07, + "loss": 0.5271, + "step": 13066 + }, + { + "epoch": 0.83, + "grad_norm": 0.806747317314148, + "learning_rate": 7.571639091981786e-07, + "loss": 0.5362, + "step": 13067 + }, + { + "epoch": 0.83, + "grad_norm": 0.9133474826812744, + "learning_rate": 7.566211580046562e-07, + "loss": 0.5963, + "step": 13068 + }, + { + "epoch": 0.83, + "grad_norm": 0.7889014482498169, + "learning_rate": 7.56078585485055e-07, + "loss": 0.5403, + "step": 13069 + }, + { + "epoch": 0.83, + "grad_norm": 0.86361163854599, + "learning_rate": 7.555361916622217e-07, + "loss": 0.5825, + "step": 13070 + }, + { + "epoch": 0.83, + "grad_norm": 0.8512160181999207, + "learning_rate": 7.549939765589942e-07, + "loss": 0.5044, + "step": 13071 + }, + { + "epoch": 0.83, + "grad_norm": 0.8855159282684326, + "learning_rate": 7.544519401982025e-07, + "loss": 0.5909, + "step": 13072 + }, + { + "epoch": 0.83, + "grad_norm": 0.9207944273948669, + "learning_rate": 7.539100826026691e-07, + "loss": 0.5993, + "step": 13073 + }, + { + "epoch": 0.83, + "grad_norm": 0.9316564798355103, + "learning_rate": 7.533684037952133e-07, + "loss": 0.5755, + "step": 13074 + }, + { + "epoch": 0.83, + "grad_norm": 0.8831668496131897, + "learning_rate": 7.528269037986402e-07, + "loss": 0.6368, + "step": 13075 + }, + { + "epoch": 0.83, + "grad_norm": 0.9168758988380432, + "learning_rate": 7.522855826357511e-07, + "loss": 0.5728, + "step": 13076 + }, + { + "epoch": 0.83, + "grad_norm": 0.8036249876022339, + "learning_rate": 7.517444403293394e-07, + "loss": 0.5295, + "step": 13077 + }, + { + "epoch": 0.83, + "grad_norm": 0.8695041537284851, + "learning_rate": 7.512034769021909e-07, + "loss": 0.592, + "step": 13078 + }, + { + "epoch": 0.83, + "grad_norm": 0.9398552775382996, + "learning_rate": 7.506626923770843e-07, + "loss": 0.5927, + "step": 13079 + }, + { + "epoch": 0.83, + "grad_norm": 0.8533617258071899, + "learning_rate": 7.501220867767883e-07, + "loss": 0.5991, + "step": 13080 + }, + { + "epoch": 0.83, + "grad_norm": 0.877224862575531, + "learning_rate": 7.495816601240664e-07, + "loss": 0.5839, + "step": 13081 + }, + { + "epoch": 0.83, + "grad_norm": 0.9243265390396118, + "learning_rate": 7.490414124416761e-07, + "loss": 0.6103, + "step": 13082 + }, + { + "epoch": 0.83, + "grad_norm": 1.0017024278640747, + "learning_rate": 7.485013437523636e-07, + "loss": 0.6144, + "step": 13083 + }, + { + "epoch": 0.83, + "grad_norm": 0.8907317519187927, + "learning_rate": 7.479614540788687e-07, + "loss": 0.6113, + "step": 13084 + }, + { + "epoch": 0.83, + "grad_norm": 0.9147844910621643, + "learning_rate": 7.474217434439263e-07, + "loss": 0.5684, + "step": 13085 + }, + { + "epoch": 0.83, + "grad_norm": 0.8742222785949707, + "learning_rate": 7.468822118702596e-07, + "loss": 0.5424, + "step": 13086 + }, + { + "epoch": 0.83, + "grad_norm": 0.9334181547164917, + "learning_rate": 7.463428593805894e-07, + "loss": 0.5554, + "step": 13087 + }, + { + "epoch": 0.83, + "grad_norm": 0.8707894682884216, + "learning_rate": 7.458036859976225e-07, + "loss": 0.6064, + "step": 13088 + }, + { + "epoch": 0.83, + "grad_norm": 0.9232116341590881, + "learning_rate": 7.452646917440631e-07, + "loss": 0.6251, + "step": 13089 + }, + { + "epoch": 0.83, + "grad_norm": 0.9443577527999878, + "learning_rate": 7.447258766426063e-07, + "loss": 0.5861, + "step": 13090 + }, + { + "epoch": 0.83, + "grad_norm": 0.87910395860672, + "learning_rate": 7.441872407159401e-07, + "loss": 0.5628, + "step": 13091 + }, + { + "epoch": 0.83, + "grad_norm": 0.8710011839866638, + "learning_rate": 7.43648783986744e-07, + "loss": 0.5954, + "step": 13092 + }, + { + "epoch": 0.83, + "grad_norm": 0.9100737571716309, + "learning_rate": 7.431105064776922e-07, + "loss": 0.5956, + "step": 13093 + }, + { + "epoch": 0.83, + "grad_norm": 0.8823485970497131, + "learning_rate": 7.425724082114455e-07, + "loss": 0.5534, + "step": 13094 + }, + { + "epoch": 0.83, + "grad_norm": 0.9108067750930786, + "learning_rate": 7.420344892106674e-07, + "loss": 0.5459, + "step": 13095 + }, + { + "epoch": 0.83, + "grad_norm": 0.9197466969490051, + "learning_rate": 7.414967494980024e-07, + "loss": 0.5779, + "step": 13096 + }, + { + "epoch": 0.83, + "grad_norm": 0.8721498847007751, + "learning_rate": 7.40959189096096e-07, + "loss": 0.5026, + "step": 13097 + }, + { + "epoch": 0.83, + "grad_norm": 0.9107875823974609, + "learning_rate": 7.404218080275816e-07, + "loss": 0.6035, + "step": 13098 + }, + { + "epoch": 0.83, + "grad_norm": 0.8590791821479797, + "learning_rate": 7.398846063150866e-07, + "loss": 0.5347, + "step": 13099 + }, + { + "epoch": 0.83, + "grad_norm": 0.874270498752594, + "learning_rate": 7.393475839812314e-07, + "loss": 0.5954, + "step": 13100 + }, + { + "epoch": 0.83, + "grad_norm": 0.9111903309822083, + "learning_rate": 7.388107410486289e-07, + "loss": 0.5691, + "step": 13101 + }, + { + "epoch": 0.83, + "grad_norm": 0.9081681370735168, + "learning_rate": 7.3827407753988e-07, + "loss": 0.5675, + "step": 13102 + }, + { + "epoch": 0.83, + "grad_norm": 0.8183289766311646, + "learning_rate": 7.377375934775865e-07, + "loss": 0.5498, + "step": 13103 + }, + { + "epoch": 0.83, + "grad_norm": 0.8380873203277588, + "learning_rate": 7.372012888843344e-07, + "loss": 0.5786, + "step": 13104 + }, + { + "epoch": 0.83, + "grad_norm": 0.8212375044822693, + "learning_rate": 7.366651637827065e-07, + "loss": 0.5647, + "step": 13105 + }, + { + "epoch": 0.83, + "grad_norm": 0.900518000125885, + "learning_rate": 7.361292181952795e-07, + "loss": 0.5743, + "step": 13106 + }, + { + "epoch": 0.83, + "grad_norm": 0.9295457601547241, + "learning_rate": 7.355934521446151e-07, + "loss": 0.5577, + "step": 13107 + }, + { + "epoch": 0.83, + "grad_norm": 0.8961006999015808, + "learning_rate": 7.350578656532776e-07, + "loss": 0.5885, + "step": 13108 + }, + { + "epoch": 0.83, + "grad_norm": 0.8948516249656677, + "learning_rate": 7.345224587438171e-07, + "loss": 0.5077, + "step": 13109 + }, + { + "epoch": 0.83, + "grad_norm": 0.9140964150428772, + "learning_rate": 7.339872314387763e-07, + "loss": 0.6131, + "step": 13110 + }, + { + "epoch": 0.83, + "grad_norm": 0.9755547046661377, + "learning_rate": 7.334521837606934e-07, + "loss": 0.6061, + "step": 13111 + }, + { + "epoch": 0.83, + "grad_norm": 0.8581327795982361, + "learning_rate": 7.329173157320962e-07, + "loss": 0.5332, + "step": 13112 + }, + { + "epoch": 0.83, + "grad_norm": 0.9618088603019714, + "learning_rate": 7.323826273755069e-07, + "loss": 0.5948, + "step": 13113 + }, + { + "epoch": 0.83, + "grad_norm": 0.8937922120094299, + "learning_rate": 7.318481187134408e-07, + "loss": 0.5915, + "step": 13114 + }, + { + "epoch": 0.83, + "grad_norm": 0.9323161244392395, + "learning_rate": 7.313137897683997e-07, + "loss": 0.6016, + "step": 13115 + }, + { + "epoch": 0.83, + "grad_norm": 0.8632552623748779, + "learning_rate": 7.30779640562887e-07, + "loss": 0.6145, + "step": 13116 + }, + { + "epoch": 0.83, + "grad_norm": 0.8710545897483826, + "learning_rate": 7.302456711193928e-07, + "loss": 0.5644, + "step": 13117 + }, + { + "epoch": 0.83, + "grad_norm": 0.874191164970398, + "learning_rate": 7.297118814603987e-07, + "loss": 0.5579, + "step": 13118 + }, + { + "epoch": 0.83, + "grad_norm": 0.8017786741256714, + "learning_rate": 7.291782716083823e-07, + "loss": 0.5414, + "step": 13119 + }, + { + "epoch": 0.83, + "grad_norm": 0.8626580834388733, + "learning_rate": 7.286448415858116e-07, + "loss": 0.5909, + "step": 13120 + }, + { + "epoch": 0.83, + "grad_norm": 0.8846031427383423, + "learning_rate": 7.281115914151477e-07, + "loss": 0.5291, + "step": 13121 + }, + { + "epoch": 0.83, + "grad_norm": 0.8800442814826965, + "learning_rate": 7.275785211188441e-07, + "loss": 0.5698, + "step": 13122 + }, + { + "epoch": 0.83, + "grad_norm": 0.8646133542060852, + "learning_rate": 7.270456307193474e-07, + "loss": 0.5776, + "step": 13123 + }, + { + "epoch": 0.83, + "grad_norm": 0.9423984289169312, + "learning_rate": 7.265129202390924e-07, + "loss": 0.5374, + "step": 13124 + }, + { + "epoch": 0.83, + "grad_norm": 0.8401879072189331, + "learning_rate": 7.259803897005141e-07, + "loss": 0.5583, + "step": 13125 + }, + { + "epoch": 0.83, + "grad_norm": 0.8532096147537231, + "learning_rate": 7.254480391260321e-07, + "loss": 0.5056, + "step": 13126 + }, + { + "epoch": 0.83, + "grad_norm": 0.8508062958717346, + "learning_rate": 7.249158685380631e-07, + "loss": 0.5793, + "step": 13127 + }, + { + "epoch": 0.83, + "grad_norm": 0.8456823825836182, + "learning_rate": 7.243838779590151e-07, + "loss": 0.5542, + "step": 13128 + }, + { + "epoch": 0.83, + "grad_norm": 0.9003103375434875, + "learning_rate": 7.238520674112881e-07, + "loss": 0.5354, + "step": 13129 + }, + { + "epoch": 0.83, + "grad_norm": 0.8607522249221802, + "learning_rate": 7.233204369172753e-07, + "loss": 0.544, + "step": 13130 + }, + { + "epoch": 0.83, + "grad_norm": 0.8859104514122009, + "learning_rate": 7.22788986499362e-07, + "loss": 0.5419, + "step": 13131 + }, + { + "epoch": 0.83, + "grad_norm": 0.9029030799865723, + "learning_rate": 7.222577161799232e-07, + "loss": 0.5825, + "step": 13132 + }, + { + "epoch": 0.83, + "grad_norm": 0.934764564037323, + "learning_rate": 7.217266259813332e-07, + "loss": 0.5783, + "step": 13133 + }, + { + "epoch": 0.83, + "grad_norm": 0.8300181031227112, + "learning_rate": 7.211957159259503e-07, + "loss": 0.5394, + "step": 13134 + }, + { + "epoch": 0.83, + "grad_norm": 0.8454645276069641, + "learning_rate": 7.206649860361314e-07, + "loss": 0.5528, + "step": 13135 + }, + { + "epoch": 0.83, + "grad_norm": 0.8897960782051086, + "learning_rate": 7.201344363342245e-07, + "loss": 0.5781, + "step": 13136 + }, + { + "epoch": 0.83, + "grad_norm": 0.8986917734146118, + "learning_rate": 7.196040668425653e-07, + "loss": 0.6028, + "step": 13137 + }, + { + "epoch": 0.83, + "grad_norm": 0.9327632784843445, + "learning_rate": 7.190738775834894e-07, + "loss": 0.5857, + "step": 13138 + }, + { + "epoch": 0.83, + "grad_norm": 0.8856915235519409, + "learning_rate": 7.185438685793217e-07, + "loss": 0.5882, + "step": 13139 + }, + { + "epoch": 0.83, + "grad_norm": 0.9135767221450806, + "learning_rate": 7.180140398523761e-07, + "loss": 0.59, + "step": 13140 + }, + { + "epoch": 0.83, + "grad_norm": 0.8973036408424377, + "learning_rate": 7.174843914249636e-07, + "loss": 0.5655, + "step": 13141 + }, + { + "epoch": 0.83, + "grad_norm": 0.8983938694000244, + "learning_rate": 7.169549233193857e-07, + "loss": 0.5778, + "step": 13142 + }, + { + "epoch": 0.83, + "grad_norm": 0.9242495894432068, + "learning_rate": 7.164256355579363e-07, + "loss": 0.5819, + "step": 13143 + }, + { + "epoch": 0.83, + "grad_norm": 0.9081816673278809, + "learning_rate": 7.158965281629027e-07, + "loss": 0.5798, + "step": 13144 + }, + { + "epoch": 0.83, + "grad_norm": 0.9231504201889038, + "learning_rate": 7.153676011565613e-07, + "loss": 0.6053, + "step": 13145 + }, + { + "epoch": 0.83, + "grad_norm": 0.866088330745697, + "learning_rate": 7.148388545611856e-07, + "loss": 0.5286, + "step": 13146 + }, + { + "epoch": 0.83, + "grad_norm": 0.8189731240272522, + "learning_rate": 7.143102883990405e-07, + "loss": 0.5759, + "step": 13147 + }, + { + "epoch": 0.83, + "grad_norm": 0.8492090702056885, + "learning_rate": 7.137819026923786e-07, + "loss": 0.5127, + "step": 13148 + }, + { + "epoch": 0.83, + "grad_norm": 0.8900519609451294, + "learning_rate": 7.132536974634508e-07, + "loss": 0.5905, + "step": 13149 + }, + { + "epoch": 0.83, + "grad_norm": 0.8588072657585144, + "learning_rate": 7.127256727344967e-07, + "loss": 0.5479, + "step": 13150 + }, + { + "epoch": 0.83, + "grad_norm": 0.9344004988670349, + "learning_rate": 7.121978285277503e-07, + "loss": 0.5901, + "step": 13151 + }, + { + "epoch": 0.83, + "grad_norm": 0.9044827222824097, + "learning_rate": 7.116701648654384e-07, + "loss": 0.5989, + "step": 13152 + }, + { + "epoch": 0.83, + "grad_norm": 0.8644382953643799, + "learning_rate": 7.11142681769777e-07, + "loss": 0.5589, + "step": 13153 + }, + { + "epoch": 0.83, + "grad_norm": 0.9335626363754272, + "learning_rate": 7.106153792629761e-07, + "loss": 0.5711, + "step": 13154 + }, + { + "epoch": 0.83, + "grad_norm": 0.8146085143089294, + "learning_rate": 7.100882573672419e-07, + "loss": 0.5407, + "step": 13155 + }, + { + "epoch": 0.83, + "grad_norm": 0.8309633731842041, + "learning_rate": 7.095613161047666e-07, + "loss": 0.5615, + "step": 13156 + }, + { + "epoch": 0.83, + "grad_norm": 0.8940461277961731, + "learning_rate": 7.09034555497739e-07, + "loss": 0.561, + "step": 13157 + }, + { + "epoch": 0.83, + "grad_norm": 0.9012131690979004, + "learning_rate": 7.085079755683389e-07, + "loss": 0.5582, + "step": 13158 + }, + { + "epoch": 0.83, + "grad_norm": 0.8765063881874084, + "learning_rate": 7.079815763387393e-07, + "loss": 0.5955, + "step": 13159 + }, + { + "epoch": 0.83, + "grad_norm": 0.8758644461631775, + "learning_rate": 7.074553578311055e-07, + "loss": 0.5402, + "step": 13160 + }, + { + "epoch": 0.83, + "grad_norm": 0.8788025975227356, + "learning_rate": 7.06929320067593e-07, + "loss": 0.6192, + "step": 13161 + }, + { + "epoch": 0.83, + "grad_norm": 0.9614549279212952, + "learning_rate": 7.064034630703515e-07, + "loss": 0.6092, + "step": 13162 + }, + { + "epoch": 0.83, + "grad_norm": 0.9305884838104248, + "learning_rate": 7.058777868615258e-07, + "loss": 0.602, + "step": 13163 + }, + { + "epoch": 0.83, + "grad_norm": 0.8970014452934265, + "learning_rate": 7.053522914632466e-07, + "loss": 0.5997, + "step": 13164 + }, + { + "epoch": 0.83, + "grad_norm": 0.8438460230827332, + "learning_rate": 7.048269768976429e-07, + "loss": 0.5581, + "step": 13165 + }, + { + "epoch": 0.83, + "grad_norm": 0.9222960472106934, + "learning_rate": 7.043018431868348e-07, + "loss": 0.5482, + "step": 13166 + }, + { + "epoch": 0.83, + "grad_norm": 0.897331714630127, + "learning_rate": 7.037768903529302e-07, + "loss": 0.6095, + "step": 13167 + }, + { + "epoch": 0.83, + "grad_norm": 0.8716689348220825, + "learning_rate": 7.032521184180369e-07, + "loss": 0.5955, + "step": 13168 + }, + { + "epoch": 0.83, + "grad_norm": 0.8186154961585999, + "learning_rate": 7.027275274042489e-07, + "loss": 0.5867, + "step": 13169 + }, + { + "epoch": 0.83, + "grad_norm": 0.9115201830863953, + "learning_rate": 7.022031173336557e-07, + "loss": 0.5619, + "step": 13170 + }, + { + "epoch": 0.83, + "grad_norm": 0.8985578417778015, + "learning_rate": 7.016788882283382e-07, + "loss": 0.6085, + "step": 13171 + }, + { + "epoch": 0.83, + "grad_norm": 0.9288114905357361, + "learning_rate": 7.011548401103696e-07, + "loss": 0.6011, + "step": 13172 + }, + { + "epoch": 0.83, + "grad_norm": 0.8916085958480835, + "learning_rate": 7.006309730018168e-07, + "loss": 0.5845, + "step": 13173 + }, + { + "epoch": 0.83, + "grad_norm": 0.8739166855812073, + "learning_rate": 7.001072869247378e-07, + "loss": 0.6088, + "step": 13174 + }, + { + "epoch": 0.83, + "grad_norm": 0.9117295145988464, + "learning_rate": 6.995837819011808e-07, + "loss": 0.5982, + "step": 13175 + }, + { + "epoch": 0.83, + "grad_norm": 0.8260350227355957, + "learning_rate": 6.990604579531929e-07, + "loss": 0.5691, + "step": 13176 + }, + { + "epoch": 0.83, + "grad_norm": 0.940250039100647, + "learning_rate": 6.985373151028058e-07, + "loss": 0.5862, + "step": 13177 + }, + { + "epoch": 0.83, + "grad_norm": 0.8562113046646118, + "learning_rate": 6.980143533720491e-07, + "loss": 0.5487, + "step": 13178 + }, + { + "epoch": 0.83, + "grad_norm": 0.8613032698631287, + "learning_rate": 6.974915727829423e-07, + "loss": 0.5633, + "step": 13179 + }, + { + "epoch": 0.84, + "grad_norm": 0.986914336681366, + "learning_rate": 6.96968973357498e-07, + "loss": 0.6344, + "step": 13180 + }, + { + "epoch": 0.84, + "grad_norm": 0.8467575311660767, + "learning_rate": 6.964465551177208e-07, + "loss": 0.4884, + "step": 13181 + }, + { + "epoch": 0.84, + "grad_norm": 0.8608553409576416, + "learning_rate": 6.959243180856096e-07, + "loss": 0.627, + "step": 13182 + }, + { + "epoch": 0.84, + "grad_norm": 0.8423926830291748, + "learning_rate": 6.954022622831514e-07, + "loss": 0.5243, + "step": 13183 + }, + { + "epoch": 0.84, + "grad_norm": 0.8840621113777161, + "learning_rate": 6.948803877323296e-07, + "loss": 0.5071, + "step": 13184 + }, + { + "epoch": 0.84, + "grad_norm": 0.8253465294837952, + "learning_rate": 6.943586944551178e-07, + "loss": 0.571, + "step": 13185 + }, + { + "epoch": 0.84, + "grad_norm": 0.8736525774002075, + "learning_rate": 6.938371824734835e-07, + "loss": 0.6001, + "step": 13186 + }, + { + "epoch": 0.84, + "grad_norm": 0.9959997534751892, + "learning_rate": 6.933158518093852e-07, + "loss": 0.6158, + "step": 13187 + }, + { + "epoch": 0.84, + "grad_norm": 0.9295116066932678, + "learning_rate": 6.927947024847748e-07, + "loss": 0.581, + "step": 13188 + }, + { + "epoch": 0.84, + "grad_norm": 0.9184585809707642, + "learning_rate": 6.922737345215952e-07, + "loss": 0.544, + "step": 13189 + }, + { + "epoch": 0.84, + "grad_norm": 0.8298773169517517, + "learning_rate": 6.91752947941785e-07, + "loss": 0.561, + "step": 13190 + }, + { + "epoch": 0.84, + "grad_norm": 0.8674336075782776, + "learning_rate": 6.912323427672691e-07, + "loss": 0.5629, + "step": 13191 + }, + { + "epoch": 0.84, + "grad_norm": 0.9086819887161255, + "learning_rate": 6.907119190199706e-07, + "loss": 0.5735, + "step": 13192 + }, + { + "epoch": 0.84, + "grad_norm": 0.8917360305786133, + "learning_rate": 6.901916767218019e-07, + "loss": 0.5546, + "step": 13193 + }, + { + "epoch": 0.84, + "grad_norm": 0.8581564426422119, + "learning_rate": 6.896716158946692e-07, + "loss": 0.5619, + "step": 13194 + }, + { + "epoch": 0.84, + "grad_norm": 0.8573694229125977, + "learning_rate": 6.891517365604705e-07, + "loss": 0.5962, + "step": 13195 + }, + { + "epoch": 0.84, + "grad_norm": 0.8820661306381226, + "learning_rate": 6.886320387410967e-07, + "loss": 0.6283, + "step": 13196 + }, + { + "epoch": 0.84, + "grad_norm": 0.8105853796005249, + "learning_rate": 6.881125224584273e-07, + "loss": 0.528, + "step": 13197 + }, + { + "epoch": 0.84, + "grad_norm": 0.9009973406791687, + "learning_rate": 6.875931877343417e-07, + "loss": 0.5681, + "step": 13198 + }, + { + "epoch": 0.84, + "grad_norm": 0.8252160549163818, + "learning_rate": 6.870740345907046e-07, + "loss": 0.5771, + "step": 13199 + }, + { + "epoch": 0.84, + "grad_norm": 0.9308204054832458, + "learning_rate": 6.865550630493756e-07, + "loss": 0.5311, + "step": 13200 + }, + { + "epoch": 0.84, + "grad_norm": 0.9394121766090393, + "learning_rate": 6.860362731322079e-07, + "loss": 0.618, + "step": 13201 + }, + { + "epoch": 0.84, + "grad_norm": 0.8921918869018555, + "learning_rate": 6.855176648610457e-07, + "loss": 0.5777, + "step": 13202 + }, + { + "epoch": 0.84, + "grad_norm": 0.8987441062927246, + "learning_rate": 6.849992382577253e-07, + "loss": 0.5614, + "step": 13203 + }, + { + "epoch": 0.84, + "grad_norm": 0.8814181089401245, + "learning_rate": 6.844809933440776e-07, + "loss": 0.5644, + "step": 13204 + }, + { + "epoch": 0.84, + "grad_norm": 0.9095494151115417, + "learning_rate": 6.839629301419204e-07, + "loss": 0.5416, + "step": 13205 + }, + { + "epoch": 0.84, + "grad_norm": 0.864000678062439, + "learning_rate": 6.83445048673072e-07, + "loss": 0.5482, + "step": 13206 + }, + { + "epoch": 0.84, + "grad_norm": 0.8674211502075195, + "learning_rate": 6.829273489593352e-07, + "loss": 0.5395, + "step": 13207 + }, + { + "epoch": 0.84, + "grad_norm": 0.8799319863319397, + "learning_rate": 6.824098310225097e-07, + "loss": 0.5647, + "step": 13208 + }, + { + "epoch": 0.84, + "grad_norm": 0.9398074150085449, + "learning_rate": 6.818924948843863e-07, + "loss": 0.5973, + "step": 13209 + }, + { + "epoch": 0.84, + "grad_norm": 0.8925483226776123, + "learning_rate": 6.81375340566749e-07, + "loss": 0.5715, + "step": 13210 + }, + { + "epoch": 0.84, + "grad_norm": 0.8708029389381409, + "learning_rate": 6.808583680913722e-07, + "loss": 0.5579, + "step": 13211 + }, + { + "epoch": 0.84, + "grad_norm": 0.8572626113891602, + "learning_rate": 6.803415774800253e-07, + "loss": 0.5613, + "step": 13212 + }, + { + "epoch": 0.84, + "grad_norm": 0.8568171858787537, + "learning_rate": 6.798249687544667e-07, + "loss": 0.5321, + "step": 13213 + }, + { + "epoch": 0.84, + "grad_norm": 0.8693404197692871, + "learning_rate": 6.793085419364498e-07, + "loss": 0.555, + "step": 13214 + }, + { + "epoch": 0.84, + "grad_norm": 0.8741576075553894, + "learning_rate": 6.787922970477196e-07, + "loss": 0.5667, + "step": 13215 + }, + { + "epoch": 0.84, + "grad_norm": 0.9199385046958923, + "learning_rate": 6.782762341100135e-07, + "loss": 0.6141, + "step": 13216 + }, + { + "epoch": 0.84, + "grad_norm": 0.8483101725578308, + "learning_rate": 6.777603531450617e-07, + "loss": 0.5482, + "step": 13217 + }, + { + "epoch": 0.84, + "grad_norm": 0.8694477081298828, + "learning_rate": 6.772446541745836e-07, + "loss": 0.5839, + "step": 13218 + }, + { + "epoch": 0.84, + "grad_norm": 0.9048340320587158, + "learning_rate": 6.767291372202967e-07, + "loss": 0.5781, + "step": 13219 + }, + { + "epoch": 0.84, + "grad_norm": 0.9429792761802673, + "learning_rate": 6.762138023039072e-07, + "loss": 0.5974, + "step": 13220 + }, + { + "epoch": 0.84, + "grad_norm": 0.913020670413971, + "learning_rate": 6.756986494471119e-07, + "loss": 0.6104, + "step": 13221 + }, + { + "epoch": 0.84, + "grad_norm": 0.8851649761199951, + "learning_rate": 6.751836786716032e-07, + "loss": 0.5967, + "step": 13222 + }, + { + "epoch": 0.84, + "grad_norm": 0.9294677972793579, + "learning_rate": 6.74668889999065e-07, + "loss": 0.5813, + "step": 13223 + }, + { + "epoch": 0.84, + "grad_norm": 0.8423077464103699, + "learning_rate": 6.741542834511727e-07, + "loss": 0.5543, + "step": 13224 + }, + { + "epoch": 0.84, + "grad_norm": 0.8565467000007629, + "learning_rate": 6.736398590495968e-07, + "loss": 0.5139, + "step": 13225 + }, + { + "epoch": 0.84, + "grad_norm": 0.8920080661773682, + "learning_rate": 6.731256168159939e-07, + "loss": 0.5972, + "step": 13226 + }, + { + "epoch": 0.84, + "grad_norm": 0.8450667858123779, + "learning_rate": 6.726115567720198e-07, + "loss": 0.5539, + "step": 13227 + }, + { + "epoch": 0.84, + "grad_norm": 0.901174783706665, + "learning_rate": 6.720976789393202e-07, + "loss": 0.611, + "step": 13228 + }, + { + "epoch": 0.84, + "grad_norm": 0.8898508548736572, + "learning_rate": 6.71583983339531e-07, + "loss": 0.5798, + "step": 13229 + }, + { + "epoch": 0.84, + "grad_norm": 0.881693422794342, + "learning_rate": 6.710704699942827e-07, + "loss": 0.5774, + "step": 13230 + }, + { + "epoch": 0.84, + "grad_norm": 0.8955451846122742, + "learning_rate": 6.705571389251975e-07, + "loss": 0.5217, + "step": 13231 + }, + { + "epoch": 0.84, + "grad_norm": 0.9116746187210083, + "learning_rate": 6.700439901538902e-07, + "loss": 0.5331, + "step": 13232 + }, + { + "epoch": 0.84, + "grad_norm": 0.9191250801086426, + "learning_rate": 6.695310237019692e-07, + "loss": 0.5772, + "step": 13233 + }, + { + "epoch": 0.84, + "grad_norm": 0.9273549914360046, + "learning_rate": 6.690182395910305e-07, + "loss": 0.6408, + "step": 13234 + }, + { + "epoch": 0.84, + "grad_norm": 0.8469404578208923, + "learning_rate": 6.685056378426663e-07, + "loss": 0.5477, + "step": 13235 + }, + { + "epoch": 0.84, + "grad_norm": 0.9355968236923218, + "learning_rate": 6.679932184784638e-07, + "loss": 0.5865, + "step": 13236 + }, + { + "epoch": 0.84, + "grad_norm": 0.865906834602356, + "learning_rate": 6.674809815199962e-07, + "loss": 0.5217, + "step": 13237 + }, + { + "epoch": 0.84, + "grad_norm": 0.9029650688171387, + "learning_rate": 6.669689269888325e-07, + "loss": 0.5829, + "step": 13238 + }, + { + "epoch": 0.84, + "grad_norm": 0.8489553332328796, + "learning_rate": 6.664570549065336e-07, + "loss": 0.519, + "step": 13239 + }, + { + "epoch": 0.84, + "grad_norm": 0.8921743631362915, + "learning_rate": 6.659453652946529e-07, + "loss": 0.5553, + "step": 13240 + }, + { + "epoch": 0.84, + "grad_norm": 0.8686976432800293, + "learning_rate": 6.654338581747366e-07, + "loss": 0.5974, + "step": 13241 + }, + { + "epoch": 0.84, + "grad_norm": 0.9544159173965454, + "learning_rate": 6.649225335683213e-07, + "loss": 0.5986, + "step": 13242 + }, + { + "epoch": 0.84, + "grad_norm": 0.9924260973930359, + "learning_rate": 6.644113914969369e-07, + "loss": 0.6438, + "step": 13243 + }, + { + "epoch": 0.84, + "grad_norm": 0.8223074674606323, + "learning_rate": 6.639004319821063e-07, + "loss": 0.5702, + "step": 13244 + }, + { + "epoch": 0.84, + "grad_norm": 0.889176070690155, + "learning_rate": 6.63389655045345e-07, + "loss": 0.5799, + "step": 13245 + }, + { + "epoch": 0.84, + "grad_norm": 0.9296001195907593, + "learning_rate": 6.628790607081586e-07, + "loss": 0.583, + "step": 13246 + }, + { + "epoch": 0.84, + "grad_norm": 0.8625611662864685, + "learning_rate": 6.623686489920489e-07, + "loss": 0.5476, + "step": 13247 + }, + { + "epoch": 0.84, + "grad_norm": 0.9394053220748901, + "learning_rate": 6.61858419918503e-07, + "loss": 0.5582, + "step": 13248 + }, + { + "epoch": 0.84, + "grad_norm": 0.9077306389808655, + "learning_rate": 6.613483735090104e-07, + "loss": 0.5972, + "step": 13249 + }, + { + "epoch": 0.84, + "grad_norm": 0.8808714151382446, + "learning_rate": 6.608385097850439e-07, + "loss": 0.5684, + "step": 13250 + }, + { + "epoch": 0.84, + "grad_norm": 0.9206782579421997, + "learning_rate": 6.603288287680726e-07, + "loss": 0.5882, + "step": 13251 + }, + { + "epoch": 0.84, + "grad_norm": 0.8908818960189819, + "learning_rate": 6.598193304795575e-07, + "loss": 0.5315, + "step": 13252 + }, + { + "epoch": 0.84, + "grad_norm": 0.8861278891563416, + "learning_rate": 6.593100149409521e-07, + "loss": 0.6004, + "step": 13253 + }, + { + "epoch": 0.84, + "grad_norm": 0.9618304967880249, + "learning_rate": 6.588008821737019e-07, + "loss": 0.5732, + "step": 13254 + }, + { + "epoch": 0.84, + "grad_norm": 0.9097421169281006, + "learning_rate": 6.582919321992459e-07, + "loss": 0.5964, + "step": 13255 + }, + { + "epoch": 0.84, + "grad_norm": 0.9288156032562256, + "learning_rate": 6.577831650390104e-07, + "loss": 0.5434, + "step": 13256 + }, + { + "epoch": 0.84, + "grad_norm": 0.9109866619110107, + "learning_rate": 6.572745807144226e-07, + "loss": 0.5443, + "step": 13257 + }, + { + "epoch": 0.84, + "grad_norm": 0.8743159770965576, + "learning_rate": 6.567661792468944e-07, + "loss": 0.548, + "step": 13258 + }, + { + "epoch": 0.84, + "grad_norm": 0.8892823457717896, + "learning_rate": 6.562579606578328e-07, + "loss": 0.5657, + "step": 13259 + }, + { + "epoch": 0.84, + "grad_norm": 0.8872804045677185, + "learning_rate": 6.557499249686377e-07, + "loss": 0.5829, + "step": 13260 + }, + { + "epoch": 0.84, + "grad_norm": 0.9161667823791504, + "learning_rate": 6.552420722007008e-07, + "loss": 0.5586, + "step": 13261 + }, + { + "epoch": 0.84, + "grad_norm": 0.9431544542312622, + "learning_rate": 6.547344023754065e-07, + "loss": 0.6225, + "step": 13262 + }, + { + "epoch": 0.84, + "grad_norm": 0.9163276553153992, + "learning_rate": 6.542269155141306e-07, + "loss": 0.5697, + "step": 13263 + }, + { + "epoch": 0.84, + "grad_norm": 0.8587558269500732, + "learning_rate": 6.537196116382411e-07, + "loss": 0.5308, + "step": 13264 + }, + { + "epoch": 0.84, + "grad_norm": 0.9046618938446045, + "learning_rate": 6.532124907690979e-07, + "loss": 0.5743, + "step": 13265 + }, + { + "epoch": 0.84, + "grad_norm": 0.825258731842041, + "learning_rate": 6.527055529280574e-07, + "loss": 0.5398, + "step": 13266 + }, + { + "epoch": 0.84, + "grad_norm": 0.9225800037384033, + "learning_rate": 6.521987981364614e-07, + "loss": 0.5864, + "step": 13267 + }, + { + "epoch": 0.84, + "grad_norm": 0.8566347360610962, + "learning_rate": 6.516922264156495e-07, + "loss": 0.5508, + "step": 13268 + }, + { + "epoch": 0.84, + "grad_norm": 0.8173342943191528, + "learning_rate": 6.511858377869517e-07, + "loss": 0.5485, + "step": 13269 + }, + { + "epoch": 0.84, + "grad_norm": 0.8830443620681763, + "learning_rate": 6.506796322716891e-07, + "loss": 0.5276, + "step": 13270 + }, + { + "epoch": 0.84, + "grad_norm": 0.9083720445632935, + "learning_rate": 6.501736098911787e-07, + "loss": 0.5389, + "step": 13271 + }, + { + "epoch": 0.84, + "grad_norm": 0.8697338104248047, + "learning_rate": 6.496677706667243e-07, + "loss": 0.5826, + "step": 13272 + }, + { + "epoch": 0.84, + "grad_norm": 0.8422214984893799, + "learning_rate": 6.491621146196253e-07, + "loss": 0.5484, + "step": 13273 + }, + { + "epoch": 0.84, + "grad_norm": 0.8826960921287537, + "learning_rate": 6.486566417711765e-07, + "loss": 0.557, + "step": 13274 + }, + { + "epoch": 0.84, + "grad_norm": 0.8876155614852905, + "learning_rate": 6.481513521426581e-07, + "loss": 0.5637, + "step": 13275 + }, + { + "epoch": 0.84, + "grad_norm": 0.8074238896369934, + "learning_rate": 6.476462457553473e-07, + "loss": 0.5298, + "step": 13276 + }, + { + "epoch": 0.84, + "grad_norm": 0.9412943124771118, + "learning_rate": 6.471413226305134e-07, + "loss": 0.5909, + "step": 13277 + }, + { + "epoch": 0.84, + "grad_norm": 0.9044212102890015, + "learning_rate": 6.466365827894133e-07, + "loss": 0.603, + "step": 13278 + }, + { + "epoch": 0.84, + "grad_norm": 0.8331887722015381, + "learning_rate": 6.461320262533055e-07, + "loss": 0.5901, + "step": 13279 + }, + { + "epoch": 0.84, + "grad_norm": 0.8750473856925964, + "learning_rate": 6.456276530434302e-07, + "loss": 0.5648, + "step": 13280 + }, + { + "epoch": 0.84, + "grad_norm": 0.91391921043396, + "learning_rate": 6.451234631810271e-07, + "loss": 0.5972, + "step": 13281 + }, + { + "epoch": 0.84, + "grad_norm": 0.9085570573806763, + "learning_rate": 6.446194566873254e-07, + "loss": 0.6251, + "step": 13282 + }, + { + "epoch": 0.84, + "grad_norm": 0.8852720260620117, + "learning_rate": 6.441156335835474e-07, + "loss": 0.5953, + "step": 13283 + }, + { + "epoch": 0.84, + "grad_norm": 0.82054603099823, + "learning_rate": 6.436119938909069e-07, + "loss": 0.534, + "step": 13284 + }, + { + "epoch": 0.84, + "grad_norm": 0.886782169342041, + "learning_rate": 6.431085376306112e-07, + "loss": 0.5974, + "step": 13285 + }, + { + "epoch": 0.84, + "grad_norm": 0.8668603897094727, + "learning_rate": 6.426052648238568e-07, + "loss": 0.5656, + "step": 13286 + }, + { + "epoch": 0.84, + "grad_norm": 0.8825658559799194, + "learning_rate": 6.421021754918383e-07, + "loss": 0.5434, + "step": 13287 + }, + { + "epoch": 0.84, + "grad_norm": 0.899597704410553, + "learning_rate": 6.415992696557361e-07, + "loss": 0.5724, + "step": 13288 + }, + { + "epoch": 0.84, + "grad_norm": 0.8868544101715088, + "learning_rate": 6.41096547336727e-07, + "loss": 0.569, + "step": 13289 + }, + { + "epoch": 0.84, + "grad_norm": 0.8770740628242493, + "learning_rate": 6.405940085559797e-07, + "loss": 0.5501, + "step": 13290 + }, + { + "epoch": 0.84, + "grad_norm": 0.8351693153381348, + "learning_rate": 6.400916533346518e-07, + "loss": 0.4812, + "step": 13291 + }, + { + "epoch": 0.84, + "grad_norm": 0.9115918278694153, + "learning_rate": 6.39589481693898e-07, + "loss": 0.5842, + "step": 13292 + }, + { + "epoch": 0.84, + "grad_norm": 0.8377058506011963, + "learning_rate": 6.390874936548635e-07, + "loss": 0.5911, + "step": 13293 + }, + { + "epoch": 0.84, + "grad_norm": 0.8732972145080566, + "learning_rate": 6.385856892386826e-07, + "loss": 0.5937, + "step": 13294 + }, + { + "epoch": 0.84, + "grad_norm": 0.9321759939193726, + "learning_rate": 6.380840684664869e-07, + "loss": 0.5681, + "step": 13295 + }, + { + "epoch": 0.84, + "grad_norm": 0.9192104339599609, + "learning_rate": 6.375826313593963e-07, + "loss": 0.6206, + "step": 13296 + }, + { + "epoch": 0.84, + "grad_norm": 0.9148771166801453, + "learning_rate": 6.37081377938526e-07, + "loss": 0.6008, + "step": 13297 + }, + { + "epoch": 0.84, + "grad_norm": 0.9211153984069824, + "learning_rate": 6.365803082249822e-07, + "loss": 0.5656, + "step": 13298 + }, + { + "epoch": 0.84, + "grad_norm": 0.8448777794837952, + "learning_rate": 6.360794222398603e-07, + "loss": 0.5574, + "step": 13299 + }, + { + "epoch": 0.84, + "grad_norm": 0.8851933479309082, + "learning_rate": 6.35578720004254e-07, + "loss": 0.601, + "step": 13300 + }, + { + "epoch": 0.84, + "grad_norm": 0.9539099335670471, + "learning_rate": 6.350782015392459e-07, + "loss": 0.6214, + "step": 13301 + }, + { + "epoch": 0.84, + "grad_norm": 0.9478552341461182, + "learning_rate": 6.345778668659097e-07, + "loss": 0.6167, + "step": 13302 + }, + { + "epoch": 0.84, + "grad_norm": 0.9490789175033569, + "learning_rate": 6.34077716005313e-07, + "loss": 0.5713, + "step": 13303 + }, + { + "epoch": 0.84, + "grad_norm": 0.9128775000572205, + "learning_rate": 6.335777489785161e-07, + "loss": 0.5915, + "step": 13304 + }, + { + "epoch": 0.84, + "grad_norm": 0.8613923788070679, + "learning_rate": 6.3307796580657e-07, + "loss": 0.6341, + "step": 13305 + }, + { + "epoch": 0.84, + "grad_norm": 0.9108010530471802, + "learning_rate": 6.325783665105206e-07, + "loss": 0.5904, + "step": 13306 + }, + { + "epoch": 0.84, + "grad_norm": 0.8792107701301575, + "learning_rate": 6.320789511114022e-07, + "loss": 0.5576, + "step": 13307 + }, + { + "epoch": 0.84, + "grad_norm": 0.8667570352554321, + "learning_rate": 6.315797196302432e-07, + "loss": 0.5332, + "step": 13308 + }, + { + "epoch": 0.84, + "grad_norm": 0.8885064721107483, + "learning_rate": 6.310806720880675e-07, + "loss": 0.5781, + "step": 13309 + }, + { + "epoch": 0.84, + "grad_norm": 0.8899162411689758, + "learning_rate": 6.305818085058852e-07, + "loss": 0.5392, + "step": 13310 + }, + { + "epoch": 0.84, + "grad_norm": 0.9016544222831726, + "learning_rate": 6.300831289047027e-07, + "loss": 0.5896, + "step": 13311 + }, + { + "epoch": 0.84, + "grad_norm": 0.8984227776527405, + "learning_rate": 6.295846333055184e-07, + "loss": 0.561, + "step": 13312 + }, + { + "epoch": 0.84, + "grad_norm": 0.898589015007019, + "learning_rate": 6.290863217293214e-07, + "loss": 0.5771, + "step": 13313 + }, + { + "epoch": 0.84, + "grad_norm": 0.9066430330276489, + "learning_rate": 6.285881941970951e-07, + "loss": 0.5539, + "step": 13314 + }, + { + "epoch": 0.84, + "grad_norm": 0.8809421062469482, + "learning_rate": 6.280902507298115e-07, + "loss": 0.5963, + "step": 13315 + }, + { + "epoch": 0.84, + "grad_norm": 0.9171636700630188, + "learning_rate": 6.275924913484377e-07, + "loss": 0.575, + "step": 13316 + }, + { + "epoch": 0.84, + "grad_norm": 0.8545477390289307, + "learning_rate": 6.270949160739359e-07, + "loss": 0.5312, + "step": 13317 + }, + { + "epoch": 0.84, + "grad_norm": 0.8714274764060974, + "learning_rate": 6.265975249272544e-07, + "loss": 0.5371, + "step": 13318 + }, + { + "epoch": 0.84, + "grad_norm": 0.9174915552139282, + "learning_rate": 6.261003179293368e-07, + "loss": 0.5563, + "step": 13319 + }, + { + "epoch": 0.84, + "grad_norm": 0.9050828218460083, + "learning_rate": 6.256032951011188e-07, + "loss": 0.5948, + "step": 13320 + }, + { + "epoch": 0.84, + "grad_norm": 0.9102218747138977, + "learning_rate": 6.25106456463529e-07, + "loss": 0.6076, + "step": 13321 + }, + { + "epoch": 0.84, + "grad_norm": 0.8744686245918274, + "learning_rate": 6.246098020374869e-07, + "loss": 0.6083, + "step": 13322 + }, + { + "epoch": 0.84, + "grad_norm": 0.8967841267585754, + "learning_rate": 6.241133318439063e-07, + "loss": 0.6014, + "step": 13323 + }, + { + "epoch": 0.84, + "grad_norm": 0.9219756722450256, + "learning_rate": 6.236170459036894e-07, + "loss": 0.5299, + "step": 13324 + }, + { + "epoch": 0.84, + "grad_norm": 0.958886981010437, + "learning_rate": 6.23120944237735e-07, + "loss": 0.6052, + "step": 13325 + }, + { + "epoch": 0.84, + "grad_norm": 0.8341507315635681, + "learning_rate": 6.226250268669309e-07, + "loss": 0.5881, + "step": 13326 + }, + { + "epoch": 0.84, + "grad_norm": 0.885211169719696, + "learning_rate": 6.221292938121598e-07, + "loss": 0.6086, + "step": 13327 + }, + { + "epoch": 0.84, + "grad_norm": 0.8747490644454956, + "learning_rate": 6.216337450942955e-07, + "loss": 0.5505, + "step": 13328 + }, + { + "epoch": 0.84, + "grad_norm": 0.8842592835426331, + "learning_rate": 6.211383807342008e-07, + "loss": 0.5602, + "step": 13329 + }, + { + "epoch": 0.84, + "grad_norm": 0.9496366381645203, + "learning_rate": 6.206432007527368e-07, + "loss": 0.5424, + "step": 13330 + }, + { + "epoch": 0.84, + "grad_norm": 0.9109143018722534, + "learning_rate": 6.201482051707542e-07, + "loss": 0.5971, + "step": 13331 + }, + { + "epoch": 0.84, + "grad_norm": 0.8485182523727417, + "learning_rate": 6.196533940090932e-07, + "loss": 0.5543, + "step": 13332 + }, + { + "epoch": 0.84, + "grad_norm": 1.0051995515823364, + "learning_rate": 6.191587672885896e-07, + "loss": 0.5688, + "step": 13333 + }, + { + "epoch": 0.84, + "grad_norm": 0.8793126940727234, + "learning_rate": 6.186643250300706e-07, + "loss": 0.5992, + "step": 13334 + }, + { + "epoch": 0.84, + "grad_norm": 0.9340550303459167, + "learning_rate": 6.18170067254355e-07, + "loss": 0.5385, + "step": 13335 + }, + { + "epoch": 0.84, + "grad_norm": 0.9096164107322693, + "learning_rate": 6.176759939822557e-07, + "loss": 0.551, + "step": 13336 + }, + { + "epoch": 0.84, + "grad_norm": 0.9081304669380188, + "learning_rate": 6.171821052345744e-07, + "loss": 0.5687, + "step": 13337 + }, + { + "epoch": 0.85, + "grad_norm": 0.943519651889801, + "learning_rate": 6.166884010321072e-07, + "loss": 0.6095, + "step": 13338 + }, + { + "epoch": 0.85, + "grad_norm": 0.8517118096351624, + "learning_rate": 6.161948813956447e-07, + "loss": 0.6042, + "step": 13339 + }, + { + "epoch": 0.85, + "grad_norm": 0.8399627804756165, + "learning_rate": 6.157015463459648e-07, + "loss": 0.5601, + "step": 13340 + }, + { + "epoch": 0.85, + "grad_norm": 0.8427531123161316, + "learning_rate": 6.152083959038407e-07, + "loss": 0.5183, + "step": 13341 + }, + { + "epoch": 0.85, + "grad_norm": 0.9020541310310364, + "learning_rate": 6.147154300900377e-07, + "loss": 0.5783, + "step": 13342 + }, + { + "epoch": 0.85, + "grad_norm": 0.9388177990913391, + "learning_rate": 6.142226489253122e-07, + "loss": 0.58, + "step": 13343 + }, + { + "epoch": 0.85, + "grad_norm": 0.8787881135940552, + "learning_rate": 6.137300524304151e-07, + "loss": 0.5774, + "step": 13344 + }, + { + "epoch": 0.85, + "grad_norm": 0.8807479739189148, + "learning_rate": 6.132376406260865e-07, + "loss": 0.5687, + "step": 13345 + }, + { + "epoch": 0.85, + "grad_norm": 0.8714962601661682, + "learning_rate": 6.127454135330585e-07, + "loss": 0.6012, + "step": 13346 + }, + { + "epoch": 0.85, + "grad_norm": 0.8867830038070679, + "learning_rate": 6.122533711720613e-07, + "loss": 0.5601, + "step": 13347 + }, + { + "epoch": 0.85, + "grad_norm": 0.9229559898376465, + "learning_rate": 6.1176151356381e-07, + "loss": 0.6503, + "step": 13348 + }, + { + "epoch": 0.85, + "grad_norm": 0.8665587902069092, + "learning_rate": 6.112698407290158e-07, + "loss": 0.6234, + "step": 13349 + }, + { + "epoch": 0.85, + "grad_norm": 0.8939769864082336, + "learning_rate": 6.107783526883809e-07, + "loss": 0.601, + "step": 13350 + }, + { + "epoch": 0.85, + "grad_norm": 0.8392643332481384, + "learning_rate": 6.102870494626006e-07, + "loss": 0.548, + "step": 13351 + }, + { + "epoch": 0.85, + "grad_norm": 0.9490659236907959, + "learning_rate": 6.097959310723633e-07, + "loss": 0.6169, + "step": 13352 + }, + { + "epoch": 0.85, + "grad_norm": 0.8439939618110657, + "learning_rate": 6.093049975383458e-07, + "loss": 0.5877, + "step": 13353 + }, + { + "epoch": 0.85, + "grad_norm": 0.9144013524055481, + "learning_rate": 6.08814248881221e-07, + "loss": 0.5566, + "step": 13354 + }, + { + "epoch": 0.85, + "grad_norm": 0.9243726134300232, + "learning_rate": 6.083236851216517e-07, + "loss": 0.5481, + "step": 13355 + }, + { + "epoch": 0.85, + "grad_norm": 0.9642614722251892, + "learning_rate": 6.078333062802949e-07, + "loss": 0.6463, + "step": 13356 + }, + { + "epoch": 0.85, + "grad_norm": 0.959270715713501, + "learning_rate": 6.073431123777984e-07, + "loss": 0.6111, + "step": 13357 + }, + { + "epoch": 0.85, + "grad_norm": 0.894008219242096, + "learning_rate": 6.068531034348035e-07, + "loss": 0.5951, + "step": 13358 + }, + { + "epoch": 0.85, + "grad_norm": 0.8639335632324219, + "learning_rate": 6.063632794719399e-07, + "loss": 0.5667, + "step": 13359 + }, + { + "epoch": 0.85, + "grad_norm": 0.9120550751686096, + "learning_rate": 6.058736405098359e-07, + "loss": 0.6197, + "step": 13360 + }, + { + "epoch": 0.85, + "grad_norm": 0.9336058497428894, + "learning_rate": 6.053841865691063e-07, + "loss": 0.6587, + "step": 13361 + }, + { + "epoch": 0.85, + "grad_norm": 0.8768007159233093, + "learning_rate": 6.048949176703606e-07, + "loss": 0.5653, + "step": 13362 + }, + { + "epoch": 0.85, + "grad_norm": 0.8573430180549622, + "learning_rate": 6.044058338342002e-07, + "loss": 0.5534, + "step": 13363 + }, + { + "epoch": 0.85, + "grad_norm": 0.8514514565467834, + "learning_rate": 6.039169350812191e-07, + "loss": 0.587, + "step": 13364 + }, + { + "epoch": 0.85, + "grad_norm": 0.9030587673187256, + "learning_rate": 6.034282214320031e-07, + "loss": 0.5804, + "step": 13365 + }, + { + "epoch": 0.85, + "grad_norm": 0.9698714017868042, + "learning_rate": 6.029396929071313e-07, + "loss": 0.6393, + "step": 13366 + }, + { + "epoch": 0.85, + "grad_norm": 0.9271089434623718, + "learning_rate": 6.024513495271705e-07, + "loss": 0.5814, + "step": 13367 + }, + { + "epoch": 0.85, + "grad_norm": 0.8566939234733582, + "learning_rate": 6.019631913126877e-07, + "loss": 0.4997, + "step": 13368 + }, + { + "epoch": 0.85, + "grad_norm": 0.9297276139259338, + "learning_rate": 6.014752182842343e-07, + "loss": 0.5583, + "step": 13369 + }, + { + "epoch": 0.85, + "grad_norm": 0.8690567016601562, + "learning_rate": 6.009874304623576e-07, + "loss": 0.5628, + "step": 13370 + }, + { + "epoch": 0.85, + "grad_norm": 0.8401360511779785, + "learning_rate": 6.004998278675988e-07, + "loss": 0.5212, + "step": 13371 + }, + { + "epoch": 0.85, + "grad_norm": 0.9174624681472778, + "learning_rate": 6.000124105204847e-07, + "loss": 0.5717, + "step": 13372 + }, + { + "epoch": 0.85, + "grad_norm": 0.9405276775360107, + "learning_rate": 5.995251784415435e-07, + "loss": 0.5852, + "step": 13373 + }, + { + "epoch": 0.85, + "grad_norm": 0.8843702673912048, + "learning_rate": 5.990381316512894e-07, + "loss": 0.5789, + "step": 13374 + }, + { + "epoch": 0.85, + "grad_norm": 0.9235939383506775, + "learning_rate": 5.985512701702284e-07, + "loss": 0.5835, + "step": 13375 + }, + { + "epoch": 0.85, + "grad_norm": 0.8629280924797058, + "learning_rate": 5.980645940188623e-07, + "loss": 0.5666, + "step": 13376 + }, + { + "epoch": 0.85, + "grad_norm": 0.9397252202033997, + "learning_rate": 5.975781032176831e-07, + "loss": 0.5569, + "step": 13377 + }, + { + "epoch": 0.85, + "grad_norm": 0.937901496887207, + "learning_rate": 5.970917977871749e-07, + "loss": 0.5956, + "step": 13378 + }, + { + "epoch": 0.85, + "grad_norm": 0.87679123878479, + "learning_rate": 5.966056777478152e-07, + "loss": 0.5403, + "step": 13379 + }, + { + "epoch": 0.85, + "grad_norm": 0.9953281283378601, + "learning_rate": 5.961197431200705e-07, + "loss": 0.5968, + "step": 13380 + }, + { + "epoch": 0.85, + "grad_norm": 0.9243939518928528, + "learning_rate": 5.956339939244044e-07, + "loss": 0.5376, + "step": 13381 + }, + { + "epoch": 0.85, + "grad_norm": 0.8928592205047607, + "learning_rate": 5.951484301812699e-07, + "loss": 0.5919, + "step": 13382 + }, + { + "epoch": 0.85, + "grad_norm": 0.9274805188179016, + "learning_rate": 5.946630519111107e-07, + "loss": 0.5901, + "step": 13383 + }, + { + "epoch": 0.85, + "grad_norm": 0.9007667303085327, + "learning_rate": 5.941778591343656e-07, + "loss": 0.5883, + "step": 13384 + }, + { + "epoch": 0.85, + "grad_norm": 0.8749024868011475, + "learning_rate": 5.936928518714641e-07, + "loss": 0.5715, + "step": 13385 + }, + { + "epoch": 0.85, + "grad_norm": 0.8843820691108704, + "learning_rate": 5.932080301428278e-07, + "loss": 0.5757, + "step": 13386 + }, + { + "epoch": 0.85, + "grad_norm": 0.88556969165802, + "learning_rate": 5.927233939688714e-07, + "loss": 0.6128, + "step": 13387 + }, + { + "epoch": 0.85, + "grad_norm": 0.8423168659210205, + "learning_rate": 5.922389433700021e-07, + "loss": 0.5531, + "step": 13388 + }, + { + "epoch": 0.85, + "grad_norm": 0.8334605693817139, + "learning_rate": 5.917546783666156e-07, + "loss": 0.5426, + "step": 13389 + }, + { + "epoch": 0.85, + "grad_norm": 0.9283615946769714, + "learning_rate": 5.912705989791062e-07, + "loss": 0.5903, + "step": 13390 + }, + { + "epoch": 0.85, + "grad_norm": 0.8497453927993774, + "learning_rate": 5.907867052278543e-07, + "loss": 0.5435, + "step": 13391 + }, + { + "epoch": 0.85, + "grad_norm": 0.9120060205459595, + "learning_rate": 5.903029971332353e-07, + "loss": 0.6199, + "step": 13392 + }, + { + "epoch": 0.85, + "grad_norm": 0.912470817565918, + "learning_rate": 5.898194747156171e-07, + "loss": 0.6023, + "step": 13393 + }, + { + "epoch": 0.85, + "grad_norm": 0.9185166358947754, + "learning_rate": 5.893361379953588e-07, + "loss": 0.5856, + "step": 13394 + }, + { + "epoch": 0.85, + "grad_norm": 0.9453598260879517, + "learning_rate": 5.888529869928122e-07, + "loss": 0.6039, + "step": 13395 + }, + { + "epoch": 0.85, + "grad_norm": 0.9415664672851562, + "learning_rate": 5.883700217283223e-07, + "loss": 0.5434, + "step": 13396 + }, + { + "epoch": 0.85, + "grad_norm": 0.9080526232719421, + "learning_rate": 5.878872422222215e-07, + "loss": 0.5224, + "step": 13397 + }, + { + "epoch": 0.85, + "grad_norm": 0.9032747745513916, + "learning_rate": 5.874046484948426e-07, + "loss": 0.6058, + "step": 13398 + }, + { + "epoch": 0.85, + "grad_norm": 0.9231809377670288, + "learning_rate": 5.869222405665026e-07, + "loss": 0.5567, + "step": 13399 + }, + { + "epoch": 0.85, + "grad_norm": 0.9746513366699219, + "learning_rate": 5.864400184575153e-07, + "loss": 0.5836, + "step": 13400 + }, + { + "epoch": 0.85, + "grad_norm": 0.927158534526825, + "learning_rate": 5.859579821881855e-07, + "loss": 0.6028, + "step": 13401 + }, + { + "epoch": 0.85, + "grad_norm": 0.8797077536582947, + "learning_rate": 5.854761317788082e-07, + "loss": 0.5583, + "step": 13402 + }, + { + "epoch": 0.85, + "grad_norm": 0.8659250736236572, + "learning_rate": 5.849944672496749e-07, + "loss": 0.6296, + "step": 13403 + }, + { + "epoch": 0.85, + "grad_norm": 0.9689622521400452, + "learning_rate": 5.845129886210671e-07, + "loss": 0.6281, + "step": 13404 + }, + { + "epoch": 0.85, + "grad_norm": 0.8522788286209106, + "learning_rate": 5.840316959132558e-07, + "loss": 0.5162, + "step": 13405 + }, + { + "epoch": 0.85, + "grad_norm": 0.904559850692749, + "learning_rate": 5.835505891465076e-07, + "loss": 0.5924, + "step": 13406 + }, + { + "epoch": 0.85, + "grad_norm": 0.8650006651878357, + "learning_rate": 5.830696683410802e-07, + "loss": 0.5552, + "step": 13407 + }, + { + "epoch": 0.85, + "grad_norm": 0.8110765218734741, + "learning_rate": 5.825889335172241e-07, + "loss": 0.5428, + "step": 13408 + }, + { + "epoch": 0.85, + "grad_norm": 0.8690059185028076, + "learning_rate": 5.821083846951819e-07, + "loss": 0.6166, + "step": 13409 + }, + { + "epoch": 0.85, + "grad_norm": 0.8721504807472229, + "learning_rate": 5.816280218951847e-07, + "loss": 0.5206, + "step": 13410 + }, + { + "epoch": 0.85, + "grad_norm": 0.8614574074745178, + "learning_rate": 5.811478451374625e-07, + "loss": 0.563, + "step": 13411 + }, + { + "epoch": 0.85, + "grad_norm": 0.9383098483085632, + "learning_rate": 5.806678544422334e-07, + "loss": 0.6108, + "step": 13412 + }, + { + "epoch": 0.85, + "grad_norm": 0.8840879201889038, + "learning_rate": 5.801880498297057e-07, + "loss": 0.57, + "step": 13413 + }, + { + "epoch": 0.85, + "grad_norm": 0.8915720582008362, + "learning_rate": 5.797084313200846e-07, + "loss": 0.5684, + "step": 13414 + }, + { + "epoch": 0.85, + "grad_norm": 0.8662636876106262, + "learning_rate": 5.792289989335637e-07, + "loss": 0.6, + "step": 13415 + }, + { + "epoch": 0.85, + "grad_norm": 0.9727985858917236, + "learning_rate": 5.787497526903313e-07, + "loss": 0.6059, + "step": 13416 + }, + { + "epoch": 0.85, + "grad_norm": 0.8884052634239197, + "learning_rate": 5.782706926105674e-07, + "loss": 0.6134, + "step": 13417 + }, + { + "epoch": 0.85, + "grad_norm": 0.8743575215339661, + "learning_rate": 5.777918187144416e-07, + "loss": 0.5764, + "step": 13418 + }, + { + "epoch": 0.85, + "grad_norm": 0.9156510233879089, + "learning_rate": 5.773131310221169e-07, + "loss": 0.6167, + "step": 13419 + }, + { + "epoch": 0.85, + "grad_norm": 0.898995578289032, + "learning_rate": 5.768346295537536e-07, + "loss": 0.6059, + "step": 13420 + }, + { + "epoch": 0.85, + "grad_norm": 0.8932662606239319, + "learning_rate": 5.76356314329496e-07, + "loss": 0.6145, + "step": 13421 + }, + { + "epoch": 0.85, + "grad_norm": 0.9043698310852051, + "learning_rate": 5.758781853694845e-07, + "loss": 0.6164, + "step": 13422 + }, + { + "epoch": 0.85, + "grad_norm": 0.8941948413848877, + "learning_rate": 5.754002426938532e-07, + "loss": 0.5835, + "step": 13423 + }, + { + "epoch": 0.85, + "grad_norm": 0.871859073638916, + "learning_rate": 5.749224863227249e-07, + "loss": 0.5474, + "step": 13424 + }, + { + "epoch": 0.85, + "grad_norm": 0.9060640335083008, + "learning_rate": 5.744449162762183e-07, + "loss": 0.5546, + "step": 13425 + }, + { + "epoch": 0.85, + "grad_norm": 0.9025922417640686, + "learning_rate": 5.739675325744398e-07, + "loss": 0.5765, + "step": 13426 + }, + { + "epoch": 0.85, + "grad_norm": 0.9106086492538452, + "learning_rate": 5.734903352374904e-07, + "loss": 0.5614, + "step": 13427 + }, + { + "epoch": 0.85, + "grad_norm": 0.8653062582015991, + "learning_rate": 5.730133242854663e-07, + "loss": 0.5451, + "step": 13428 + }, + { + "epoch": 0.85, + "grad_norm": 0.8670951724052429, + "learning_rate": 5.725364997384498e-07, + "loss": 0.5791, + "step": 13429 + }, + { + "epoch": 0.85, + "grad_norm": 0.8815758228302002, + "learning_rate": 5.720598616165196e-07, + "loss": 0.5999, + "step": 13430 + }, + { + "epoch": 0.85, + "grad_norm": 0.8936425447463989, + "learning_rate": 5.715834099397455e-07, + "loss": 0.5746, + "step": 13431 + }, + { + "epoch": 0.85, + "grad_norm": 0.8447661995887756, + "learning_rate": 5.711071447281868e-07, + "loss": 0.5322, + "step": 13432 + }, + { + "epoch": 0.85, + "grad_norm": 0.8403939008712769, + "learning_rate": 5.70631066001901e-07, + "loss": 0.5512, + "step": 13433 + }, + { + "epoch": 0.85, + "grad_norm": 0.8799472451210022, + "learning_rate": 5.701551737809319e-07, + "loss": 0.5443, + "step": 13434 + }, + { + "epoch": 0.85, + "grad_norm": 0.9245263934135437, + "learning_rate": 5.696794680853179e-07, + "loss": 0.558, + "step": 13435 + }, + { + "epoch": 0.85, + "grad_norm": 0.8561593890190125, + "learning_rate": 5.692039489350892e-07, + "loss": 0.5743, + "step": 13436 + }, + { + "epoch": 0.85, + "grad_norm": 0.8703195452690125, + "learning_rate": 5.687286163502687e-07, + "loss": 0.5518, + "step": 13437 + }, + { + "epoch": 0.85, + "grad_norm": 0.9124912619590759, + "learning_rate": 5.682534703508713e-07, + "loss": 0.5345, + "step": 13438 + }, + { + "epoch": 0.85, + "grad_norm": 0.9110020995140076, + "learning_rate": 5.67778510956904e-07, + "loss": 0.5506, + "step": 13439 + }, + { + "epoch": 0.85, + "grad_norm": 0.8271638751029968, + "learning_rate": 5.673037381883634e-07, + "loss": 0.4917, + "step": 13440 + }, + { + "epoch": 0.85, + "grad_norm": 0.8652800917625427, + "learning_rate": 5.668291520652436e-07, + "loss": 0.5618, + "step": 13441 + }, + { + "epoch": 0.85, + "grad_norm": 0.8255113959312439, + "learning_rate": 5.663547526075258e-07, + "loss": 0.5208, + "step": 13442 + }, + { + "epoch": 0.85, + "grad_norm": 0.8780609965324402, + "learning_rate": 5.658805398351858e-07, + "loss": 0.6078, + "step": 13443 + }, + { + "epoch": 0.85, + "grad_norm": 0.8495383858680725, + "learning_rate": 5.654065137681907e-07, + "loss": 0.5683, + "step": 13444 + }, + { + "epoch": 0.85, + "grad_norm": 0.9232254028320312, + "learning_rate": 5.64932674426501e-07, + "loss": 0.6365, + "step": 13445 + }, + { + "epoch": 0.85, + "grad_norm": 0.8610829710960388, + "learning_rate": 5.644590218300672e-07, + "loss": 0.5327, + "step": 13446 + }, + { + "epoch": 0.85, + "grad_norm": 0.8897087574005127, + "learning_rate": 5.639855559988356e-07, + "loss": 0.5343, + "step": 13447 + }, + { + "epoch": 0.85, + "grad_norm": 0.867492139339447, + "learning_rate": 5.63512276952739e-07, + "loss": 0.6033, + "step": 13448 + }, + { + "epoch": 0.85, + "grad_norm": 0.9001726508140564, + "learning_rate": 5.630391847117073e-07, + "loss": 0.6116, + "step": 13449 + }, + { + "epoch": 0.85, + "grad_norm": 0.9186358451843262, + "learning_rate": 5.625662792956604e-07, + "loss": 0.5888, + "step": 13450 + }, + { + "epoch": 0.85, + "grad_norm": 0.8732519745826721, + "learning_rate": 5.620935607245109e-07, + "loss": 0.5883, + "step": 13451 + }, + { + "epoch": 0.85, + "grad_norm": 0.8616448044776917, + "learning_rate": 5.616210290181628e-07, + "loss": 0.5351, + "step": 13452 + }, + { + "epoch": 0.85, + "grad_norm": 0.9044156074523926, + "learning_rate": 5.611486841965136e-07, + "loss": 0.5497, + "step": 13453 + }, + { + "epoch": 0.85, + "grad_norm": 0.8665462732315063, + "learning_rate": 5.606765262794512e-07, + "loss": 0.5753, + "step": 13454 + }, + { + "epoch": 0.85, + "grad_norm": 0.9290836453437805, + "learning_rate": 5.602045552868585e-07, + "loss": 0.5808, + "step": 13455 + }, + { + "epoch": 0.85, + "grad_norm": 0.8570681810379028, + "learning_rate": 5.597327712386058e-07, + "loss": 0.6147, + "step": 13456 + }, + { + "epoch": 0.85, + "grad_norm": 0.8335081338882446, + "learning_rate": 5.592611741545594e-07, + "loss": 0.5146, + "step": 13457 + }, + { + "epoch": 0.85, + "grad_norm": 0.8507091999053955, + "learning_rate": 5.58789764054577e-07, + "loss": 0.5839, + "step": 13458 + }, + { + "epoch": 0.85, + "grad_norm": 0.8704282641410828, + "learning_rate": 5.583185409585079e-07, + "loss": 0.5506, + "step": 13459 + }, + { + "epoch": 0.85, + "grad_norm": 0.9266949892044067, + "learning_rate": 5.578475048861931e-07, + "loss": 0.5791, + "step": 13460 + }, + { + "epoch": 0.85, + "grad_norm": 0.8802145719528198, + "learning_rate": 5.573766558574684e-07, + "loss": 0.5466, + "step": 13461 + }, + { + "epoch": 0.85, + "grad_norm": 0.7950432300567627, + "learning_rate": 5.569059938921551e-07, + "loss": 0.5532, + "step": 13462 + }, + { + "epoch": 0.85, + "grad_norm": 0.850308895111084, + "learning_rate": 5.564355190100768e-07, + "loss": 0.5333, + "step": 13463 + }, + { + "epoch": 0.85, + "grad_norm": 0.8470205664634705, + "learning_rate": 5.559652312310393e-07, + "loss": 0.5449, + "step": 13464 + }, + { + "epoch": 0.85, + "grad_norm": 0.8567230701446533, + "learning_rate": 5.554951305748462e-07, + "loss": 0.5504, + "step": 13465 + }, + { + "epoch": 0.85, + "grad_norm": 0.8885741829872131, + "learning_rate": 5.550252170612924e-07, + "loss": 0.5441, + "step": 13466 + }, + { + "epoch": 0.85, + "grad_norm": 0.9018322229385376, + "learning_rate": 5.545554907101636e-07, + "loss": 0.5781, + "step": 13467 + }, + { + "epoch": 0.85, + "grad_norm": 0.8181560039520264, + "learning_rate": 5.540859515412378e-07, + "loss": 0.5483, + "step": 13468 + }, + { + "epoch": 0.85, + "grad_norm": 0.8753595352172852, + "learning_rate": 5.536165995742882e-07, + "loss": 0.5315, + "step": 13469 + }, + { + "epoch": 0.85, + "grad_norm": 0.8592386841773987, + "learning_rate": 5.531474348290733e-07, + "loss": 0.5426, + "step": 13470 + }, + { + "epoch": 0.85, + "grad_norm": 0.8794154524803162, + "learning_rate": 5.526784573253525e-07, + "loss": 0.5856, + "step": 13471 + }, + { + "epoch": 0.85, + "grad_norm": 0.9070557951927185, + "learning_rate": 5.522096670828703e-07, + "loss": 0.5833, + "step": 13472 + }, + { + "epoch": 0.85, + "grad_norm": 0.8681169152259827, + "learning_rate": 5.517410641213656e-07, + "loss": 0.5704, + "step": 13473 + }, + { + "epoch": 0.85, + "grad_norm": 0.8716253042221069, + "learning_rate": 5.512726484605707e-07, + "loss": 0.557, + "step": 13474 + }, + { + "epoch": 0.85, + "grad_norm": 0.8904623985290527, + "learning_rate": 5.508044201202084e-07, + "loss": 0.5565, + "step": 13475 + }, + { + "epoch": 0.85, + "grad_norm": 0.842241644859314, + "learning_rate": 5.503363791199945e-07, + "loss": 0.526, + "step": 13476 + }, + { + "epoch": 0.85, + "grad_norm": 0.8667955994606018, + "learning_rate": 5.49868525479637e-07, + "loss": 0.6341, + "step": 13477 + }, + { + "epoch": 0.85, + "grad_norm": 0.8526463508605957, + "learning_rate": 5.494008592188344e-07, + "loss": 0.527, + "step": 13478 + }, + { + "epoch": 0.85, + "grad_norm": 0.8465002775192261, + "learning_rate": 5.489333803572788e-07, + "loss": 0.5513, + "step": 13479 + }, + { + "epoch": 0.85, + "grad_norm": 0.802689254283905, + "learning_rate": 5.484660889146548e-07, + "loss": 0.5247, + "step": 13480 + }, + { + "epoch": 0.85, + "grad_norm": 0.9201193451881409, + "learning_rate": 5.479989849106381e-07, + "loss": 0.5893, + "step": 13481 + }, + { + "epoch": 0.85, + "grad_norm": 0.8439991474151611, + "learning_rate": 5.475320683648977e-07, + "loss": 0.5606, + "step": 13482 + }, + { + "epoch": 0.85, + "grad_norm": 0.8193072080612183, + "learning_rate": 5.470653392970904e-07, + "loss": 0.5555, + "step": 13483 + }, + { + "epoch": 0.85, + "grad_norm": 0.876397967338562, + "learning_rate": 5.465987977268727e-07, + "loss": 0.5745, + "step": 13484 + }, + { + "epoch": 0.85, + "grad_norm": 0.9382455348968506, + "learning_rate": 5.46132443673888e-07, + "loss": 0.6023, + "step": 13485 + }, + { + "epoch": 0.85, + "grad_norm": 0.9134024977684021, + "learning_rate": 5.456662771577714e-07, + "loss": 0.56, + "step": 13486 + }, + { + "epoch": 0.85, + "grad_norm": 0.9391716718673706, + "learning_rate": 5.452002981981519e-07, + "loss": 0.6151, + "step": 13487 + }, + { + "epoch": 0.85, + "grad_norm": 0.9424962401390076, + "learning_rate": 5.447345068146515e-07, + "loss": 0.6357, + "step": 13488 + }, + { + "epoch": 0.85, + "grad_norm": 0.8668440580368042, + "learning_rate": 5.442689030268816e-07, + "loss": 0.5539, + "step": 13489 + }, + { + "epoch": 0.85, + "grad_norm": 0.8978198170661926, + "learning_rate": 5.438034868544495e-07, + "loss": 0.6061, + "step": 13490 + }, + { + "epoch": 0.85, + "grad_norm": 0.8872178196907043, + "learning_rate": 5.433382583169478e-07, + "loss": 0.5772, + "step": 13491 + }, + { + "epoch": 0.85, + "grad_norm": 0.9086841344833374, + "learning_rate": 5.428732174339702e-07, + "loss": 0.584, + "step": 13492 + }, + { + "epoch": 0.85, + "grad_norm": 0.8926877975463867, + "learning_rate": 5.424083642250966e-07, + "loss": 0.5608, + "step": 13493 + }, + { + "epoch": 0.85, + "grad_norm": 0.9636724591255188, + "learning_rate": 5.419436987098991e-07, + "loss": 0.6392, + "step": 13494 + }, + { + "epoch": 0.85, + "grad_norm": 0.8971894383430481, + "learning_rate": 5.414792209079445e-07, + "loss": 0.5755, + "step": 13495 + }, + { + "epoch": 0.86, + "grad_norm": 0.8618263006210327, + "learning_rate": 5.410149308387891e-07, + "loss": 0.6396, + "step": 13496 + }, + { + "epoch": 0.86, + "grad_norm": 0.8926728963851929, + "learning_rate": 5.405508285219835e-07, + "loss": 0.5824, + "step": 13497 + }, + { + "epoch": 0.86, + "grad_norm": 0.8346815705299377, + "learning_rate": 5.400869139770704e-07, + "loss": 0.5066, + "step": 13498 + }, + { + "epoch": 0.86, + "grad_norm": 0.9091081023216248, + "learning_rate": 5.396231872235819e-07, + "loss": 0.5603, + "step": 13499 + }, + { + "epoch": 0.86, + "grad_norm": 0.9015220403671265, + "learning_rate": 5.391596482810424e-07, + "loss": 0.5675, + "step": 13500 + }, + { + "epoch": 0.86, + "grad_norm": 0.9150410294532776, + "learning_rate": 5.386962971689746e-07, + "loss": 0.5732, + "step": 13501 + }, + { + "epoch": 0.86, + "grad_norm": 0.8379479050636292, + "learning_rate": 5.382331339068853e-07, + "loss": 0.5747, + "step": 13502 + }, + { + "epoch": 0.86, + "grad_norm": 0.8525556325912476, + "learning_rate": 5.377701585142769e-07, + "loss": 0.5536, + "step": 13503 + }, + { + "epoch": 0.86, + "grad_norm": 0.9342008233070374, + "learning_rate": 5.373073710106441e-07, + "loss": 0.6113, + "step": 13504 + }, + { + "epoch": 0.86, + "grad_norm": 0.9186147451400757, + "learning_rate": 5.368447714154734e-07, + "loss": 0.5781, + "step": 13505 + }, + { + "epoch": 0.86, + "grad_norm": 0.8697748780250549, + "learning_rate": 5.363823597482443e-07, + "loss": 0.5869, + "step": 13506 + }, + { + "epoch": 0.86, + "grad_norm": 0.8578813076019287, + "learning_rate": 5.359201360284255e-07, + "loss": 0.5598, + "step": 13507 + }, + { + "epoch": 0.86, + "grad_norm": 0.9658546447753906, + "learning_rate": 5.354581002754799e-07, + "loss": 0.5963, + "step": 13508 + }, + { + "epoch": 0.86, + "grad_norm": 0.8002378344535828, + "learning_rate": 5.349962525088631e-07, + "loss": 0.5307, + "step": 13509 + }, + { + "epoch": 0.86, + "grad_norm": 0.8562396764755249, + "learning_rate": 5.345345927480211e-07, + "loss": 0.5668, + "step": 13510 + }, + { + "epoch": 0.86, + "grad_norm": 0.8851287961006165, + "learning_rate": 5.340731210123934e-07, + "loss": 0.5659, + "step": 13511 + }, + { + "epoch": 0.86, + "grad_norm": 0.9325246214866638, + "learning_rate": 5.336118373214116e-07, + "loss": 0.6068, + "step": 13512 + }, + { + "epoch": 0.86, + "grad_norm": 0.9005350470542908, + "learning_rate": 5.331507416944965e-07, + "loss": 0.5734, + "step": 13513 + }, + { + "epoch": 0.86, + "grad_norm": 0.915073573589325, + "learning_rate": 5.326898341510655e-07, + "loss": 0.6254, + "step": 13514 + }, + { + "epoch": 0.86, + "grad_norm": 0.8757150173187256, + "learning_rate": 5.322291147105246e-07, + "loss": 0.5644, + "step": 13515 + }, + { + "epoch": 0.86, + "grad_norm": 0.8947983384132385, + "learning_rate": 5.317685833922737e-07, + "loss": 0.6423, + "step": 13516 + }, + { + "epoch": 0.86, + "grad_norm": 0.941947877407074, + "learning_rate": 5.313082402157039e-07, + "loss": 0.5757, + "step": 13517 + }, + { + "epoch": 0.86, + "grad_norm": 0.8160790801048279, + "learning_rate": 5.308480852001979e-07, + "loss": 0.5549, + "step": 13518 + }, + { + "epoch": 0.86, + "grad_norm": 0.8949527740478516, + "learning_rate": 5.303881183651327e-07, + "loss": 0.5, + "step": 13519 + }, + { + "epoch": 0.86, + "grad_norm": 0.9550206065177917, + "learning_rate": 5.29928339729876e-07, + "loss": 0.6233, + "step": 13520 + }, + { + "epoch": 0.86, + "grad_norm": 0.8570389747619629, + "learning_rate": 5.294687493137845e-07, + "loss": 0.5913, + "step": 13521 + }, + { + "epoch": 0.86, + "grad_norm": 0.8469735980033875, + "learning_rate": 5.290093471362145e-07, + "loss": 0.5931, + "step": 13522 + }, + { + "epoch": 0.86, + "grad_norm": 0.8493378162384033, + "learning_rate": 5.28550133216506e-07, + "loss": 0.608, + "step": 13523 + }, + { + "epoch": 0.86, + "grad_norm": 0.8677387237548828, + "learning_rate": 5.28091107573997e-07, + "loss": 0.5272, + "step": 13524 + }, + { + "epoch": 0.86, + "grad_norm": 0.8398542404174805, + "learning_rate": 5.27632270228014e-07, + "loss": 0.5557, + "step": 13525 + }, + { + "epoch": 0.86, + "grad_norm": 0.9466037154197693, + "learning_rate": 5.271736211978784e-07, + "loss": 0.5509, + "step": 13526 + }, + { + "epoch": 0.86, + "grad_norm": 0.9450697302818298, + "learning_rate": 5.267151605029014e-07, + "loss": 0.5604, + "step": 13527 + }, + { + "epoch": 0.86, + "grad_norm": 0.8523156046867371, + "learning_rate": 5.262568881623892e-07, + "loss": 0.5634, + "step": 13528 + }, + { + "epoch": 0.86, + "grad_norm": 0.8883264660835266, + "learning_rate": 5.257988041956347e-07, + "loss": 0.543, + "step": 13529 + }, + { + "epoch": 0.86, + "grad_norm": 0.9195562601089478, + "learning_rate": 5.253409086219274e-07, + "loss": 0.6409, + "step": 13530 + }, + { + "epoch": 0.86, + "grad_norm": 0.8769651055335999, + "learning_rate": 5.248832014605503e-07, + "loss": 0.542, + "step": 13531 + }, + { + "epoch": 0.86, + "grad_norm": 0.9406867623329163, + "learning_rate": 5.244256827307726e-07, + "loss": 0.6345, + "step": 13532 + }, + { + "epoch": 0.86, + "grad_norm": 0.811181366443634, + "learning_rate": 5.239683524518596e-07, + "loss": 0.5724, + "step": 13533 + }, + { + "epoch": 0.86, + "grad_norm": 0.9315853714942932, + "learning_rate": 5.23511210643069e-07, + "loss": 0.5907, + "step": 13534 + }, + { + "epoch": 0.86, + "grad_norm": 0.852668821811676, + "learning_rate": 5.230542573236485e-07, + "loss": 0.5694, + "step": 13535 + }, + { + "epoch": 0.86, + "grad_norm": 0.8345797657966614, + "learning_rate": 5.225974925128402e-07, + "loss": 0.5513, + "step": 13536 + }, + { + "epoch": 0.86, + "grad_norm": 0.9633619785308838, + "learning_rate": 5.221409162298741e-07, + "loss": 0.607, + "step": 13537 + }, + { + "epoch": 0.86, + "grad_norm": 0.8628314733505249, + "learning_rate": 5.216845284939764e-07, + "loss": 0.5985, + "step": 13538 + }, + { + "epoch": 0.86, + "grad_norm": 0.8650707602500916, + "learning_rate": 5.212283293243658e-07, + "loss": 0.5438, + "step": 13539 + }, + { + "epoch": 0.86, + "grad_norm": 0.8653766512870789, + "learning_rate": 5.207723187402491e-07, + "loss": 0.521, + "step": 13540 + }, + { + "epoch": 0.86, + "grad_norm": 0.9244462847709656, + "learning_rate": 5.203164967608282e-07, + "loss": 0.6332, + "step": 13541 + }, + { + "epoch": 0.86, + "grad_norm": 0.9744123816490173, + "learning_rate": 5.198608634052965e-07, + "loss": 0.6545, + "step": 13542 + }, + { + "epoch": 0.86, + "grad_norm": 0.8849944472312927, + "learning_rate": 5.194054186928365e-07, + "loss": 0.5575, + "step": 13543 + }, + { + "epoch": 0.86, + "grad_norm": 0.8665662407875061, + "learning_rate": 5.189501626426297e-07, + "loss": 0.5634, + "step": 13544 + }, + { + "epoch": 0.86, + "grad_norm": 0.9209324717521667, + "learning_rate": 5.184950952738421e-07, + "loss": 0.6306, + "step": 13545 + }, + { + "epoch": 0.86, + "grad_norm": 0.86234050989151, + "learning_rate": 5.180402166056359e-07, + "loss": 0.5072, + "step": 13546 + }, + { + "epoch": 0.86, + "grad_norm": 0.9226478338241577, + "learning_rate": 5.175855266571644e-07, + "loss": 0.6141, + "step": 13547 + }, + { + "epoch": 0.86, + "grad_norm": 0.9241039752960205, + "learning_rate": 5.171310254475737e-07, + "loss": 0.5793, + "step": 13548 + }, + { + "epoch": 0.86, + "grad_norm": 0.8989474177360535, + "learning_rate": 5.166767129960004e-07, + "loss": 0.5708, + "step": 13549 + }, + { + "epoch": 0.86, + "grad_norm": 0.8441492915153503, + "learning_rate": 5.162225893215755e-07, + "loss": 0.5564, + "step": 13550 + }, + { + "epoch": 0.86, + "grad_norm": 0.8294525742530823, + "learning_rate": 5.157686544434176e-07, + "loss": 0.553, + "step": 13551 + }, + { + "epoch": 0.86, + "grad_norm": 0.8823322057723999, + "learning_rate": 5.153149083806436e-07, + "loss": 0.5434, + "step": 13552 + }, + { + "epoch": 0.86, + "grad_norm": 0.9079649448394775, + "learning_rate": 5.14861351152357e-07, + "loss": 0.6156, + "step": 13553 + }, + { + "epoch": 0.86, + "grad_norm": 0.8697636723518372, + "learning_rate": 5.144079827776566e-07, + "loss": 0.5319, + "step": 13554 + }, + { + "epoch": 0.86, + "grad_norm": 0.8235500454902649, + "learning_rate": 5.139548032756325e-07, + "loss": 0.5539, + "step": 13555 + }, + { + "epoch": 0.86, + "grad_norm": 0.8555493354797363, + "learning_rate": 5.13501812665364e-07, + "loss": 0.5011, + "step": 13556 + }, + { + "epoch": 0.86, + "grad_norm": 0.8816463351249695, + "learning_rate": 5.130490109659275e-07, + "loss": 0.5324, + "step": 13557 + }, + { + "epoch": 0.86, + "grad_norm": 0.8975476622581482, + "learning_rate": 5.125963981963894e-07, + "loss": 0.5839, + "step": 13558 + }, + { + "epoch": 0.86, + "grad_norm": 0.8355741500854492, + "learning_rate": 5.12143974375805e-07, + "loss": 0.5476, + "step": 13559 + }, + { + "epoch": 0.86, + "grad_norm": 0.8712900876998901, + "learning_rate": 5.116917395232262e-07, + "loss": 0.6212, + "step": 13560 + }, + { + "epoch": 0.86, + "grad_norm": 0.8691787123680115, + "learning_rate": 5.112396936576947e-07, + "loss": 0.5257, + "step": 13561 + }, + { + "epoch": 0.86, + "grad_norm": 0.860202968120575, + "learning_rate": 5.107878367982438e-07, + "loss": 0.5328, + "step": 13562 + }, + { + "epoch": 0.86, + "grad_norm": 0.9462293386459351, + "learning_rate": 5.103361689639019e-07, + "loss": 0.6081, + "step": 13563 + }, + { + "epoch": 0.86, + "grad_norm": 0.9243309497833252, + "learning_rate": 5.098846901736832e-07, + "loss": 0.5952, + "step": 13564 + }, + { + "epoch": 0.86, + "grad_norm": 0.8597437739372253, + "learning_rate": 5.094334004466012e-07, + "loss": 0.6039, + "step": 13565 + }, + { + "epoch": 0.86, + "grad_norm": 0.9059598445892334, + "learning_rate": 5.089822998016586e-07, + "loss": 0.5546, + "step": 13566 + }, + { + "epoch": 0.86, + "grad_norm": 0.9531145691871643, + "learning_rate": 5.085313882578469e-07, + "loss": 0.5446, + "step": 13567 + }, + { + "epoch": 0.86, + "grad_norm": 0.923179566860199, + "learning_rate": 5.080806658341536e-07, + "loss": 0.5803, + "step": 13568 + }, + { + "epoch": 0.86, + "grad_norm": 0.9919398427009583, + "learning_rate": 5.076301325495575e-07, + "loss": 0.6093, + "step": 13569 + }, + { + "epoch": 0.86, + "grad_norm": 0.8127473592758179, + "learning_rate": 5.071797884230284e-07, + "loss": 0.528, + "step": 13570 + }, + { + "epoch": 0.86, + "grad_norm": 0.9124990701675415, + "learning_rate": 5.067296334735306e-07, + "loss": 0.6227, + "step": 13571 + }, + { + "epoch": 0.86, + "grad_norm": 0.9629392623901367, + "learning_rate": 5.062796677200154e-07, + "loss": 0.613, + "step": 13572 + }, + { + "epoch": 0.86, + "grad_norm": 0.921553373336792, + "learning_rate": 5.058298911814302e-07, + "loss": 0.5961, + "step": 13573 + }, + { + "epoch": 0.86, + "grad_norm": 0.9422236680984497, + "learning_rate": 5.053803038767158e-07, + "loss": 0.5932, + "step": 13574 + }, + { + "epoch": 0.86, + "grad_norm": 0.8603041172027588, + "learning_rate": 5.049309058248004e-07, + "loss": 0.528, + "step": 13575 + }, + { + "epoch": 0.86, + "grad_norm": 0.8307815790176392, + "learning_rate": 5.044816970446076e-07, + "loss": 0.5176, + "step": 13576 + }, + { + "epoch": 0.86, + "grad_norm": 0.8835110068321228, + "learning_rate": 5.040326775550514e-07, + "loss": 0.5863, + "step": 13577 + }, + { + "epoch": 0.86, + "grad_norm": 0.9652464985847473, + "learning_rate": 5.035838473750393e-07, + "loss": 0.5984, + "step": 13578 + }, + { + "epoch": 0.86, + "grad_norm": 0.8423542380332947, + "learning_rate": 5.031352065234702e-07, + "loss": 0.5387, + "step": 13579 + }, + { + "epoch": 0.86, + "grad_norm": 0.8693512678146362, + "learning_rate": 5.026867550192327e-07, + "loss": 0.5339, + "step": 13580 + }, + { + "epoch": 0.86, + "grad_norm": 0.9612827301025391, + "learning_rate": 5.022384928812107e-07, + "loss": 0.5946, + "step": 13581 + }, + { + "epoch": 0.86, + "grad_norm": 0.8645419478416443, + "learning_rate": 5.017904201282808e-07, + "loss": 0.5286, + "step": 13582 + }, + { + "epoch": 0.86, + "grad_norm": 0.875821053981781, + "learning_rate": 5.013425367793074e-07, + "loss": 0.5546, + "step": 13583 + }, + { + "epoch": 0.86, + "grad_norm": 0.8827986121177673, + "learning_rate": 5.008948428531496e-07, + "loss": 0.5512, + "step": 13584 + }, + { + "epoch": 0.86, + "grad_norm": 0.8592386245727539, + "learning_rate": 5.004473383686592e-07, + "loss": 0.5975, + "step": 13585 + }, + { + "epoch": 0.86, + "grad_norm": 0.9252444505691528, + "learning_rate": 5.000000233446783e-07, + "loss": 0.5423, + "step": 13586 + }, + { + "epoch": 0.86, + "grad_norm": 0.893185019493103, + "learning_rate": 4.99552897800043e-07, + "loss": 0.5593, + "step": 13587 + }, + { + "epoch": 0.86, + "grad_norm": 0.8774006366729736, + "learning_rate": 4.991059617535781e-07, + "loss": 0.5636, + "step": 13588 + }, + { + "epoch": 0.86, + "grad_norm": 0.8796536922454834, + "learning_rate": 4.986592152241043e-07, + "loss": 0.6007, + "step": 13589 + }, + { + "epoch": 0.86, + "grad_norm": 0.8507401943206787, + "learning_rate": 4.982126582304314e-07, + "loss": 0.5618, + "step": 13590 + }, + { + "epoch": 0.86, + "grad_norm": 0.9354941248893738, + "learning_rate": 4.977662907913633e-07, + "loss": 0.6212, + "step": 13591 + }, + { + "epoch": 0.86, + "grad_norm": 0.8648061156272888, + "learning_rate": 4.973201129256943e-07, + "loss": 0.5562, + "step": 13592 + }, + { + "epoch": 0.86, + "grad_norm": 0.9033337831497192, + "learning_rate": 4.968741246522129e-07, + "loss": 0.6071, + "step": 13593 + }, + { + "epoch": 0.86, + "grad_norm": 0.9331035017967224, + "learning_rate": 4.964283259896945e-07, + "loss": 0.568, + "step": 13594 + }, + { + "epoch": 0.86, + "grad_norm": 0.903471052646637, + "learning_rate": 4.959827169569136e-07, + "loss": 0.542, + "step": 13595 + }, + { + "epoch": 0.86, + "grad_norm": 0.8744809627532959, + "learning_rate": 4.955372975726336e-07, + "loss": 0.5319, + "step": 13596 + }, + { + "epoch": 0.86, + "grad_norm": 0.9031259417533875, + "learning_rate": 4.950920678556065e-07, + "loss": 0.5862, + "step": 13597 + }, + { + "epoch": 0.86, + "grad_norm": 0.836344838142395, + "learning_rate": 4.946470278245813e-07, + "loss": 0.5504, + "step": 13598 + }, + { + "epoch": 0.86, + "grad_norm": 0.8633370995521545, + "learning_rate": 4.942021774982969e-07, + "loss": 0.5772, + "step": 13599 + }, + { + "epoch": 0.86, + "grad_norm": 0.8724879622459412, + "learning_rate": 4.937575168954845e-07, + "loss": 0.5347, + "step": 13600 + }, + { + "epoch": 0.86, + "grad_norm": 0.9412771463394165, + "learning_rate": 4.933130460348673e-07, + "loss": 0.5512, + "step": 13601 + }, + { + "epoch": 0.86, + "grad_norm": 0.9978772401809692, + "learning_rate": 4.928687649351594e-07, + "loss": 0.5511, + "step": 13602 + }, + { + "epoch": 0.86, + "grad_norm": 0.8989056348800659, + "learning_rate": 4.924246736150679e-07, + "loss": 0.5492, + "step": 13603 + }, + { + "epoch": 0.86, + "grad_norm": 0.9470418095588684, + "learning_rate": 4.919807720932946e-07, + "loss": 0.5756, + "step": 13604 + }, + { + "epoch": 0.86, + "grad_norm": 0.8301222324371338, + "learning_rate": 4.915370603885272e-07, + "loss": 0.5398, + "step": 13605 + }, + { + "epoch": 0.86, + "grad_norm": 0.8426318764686584, + "learning_rate": 4.91093538519451e-07, + "loss": 0.5703, + "step": 13606 + }, + { + "epoch": 0.86, + "grad_norm": 0.8601441383361816, + "learning_rate": 4.906502065047403e-07, + "loss": 0.5795, + "step": 13607 + }, + { + "epoch": 0.86, + "grad_norm": 0.8154615163803101, + "learning_rate": 4.902070643630624e-07, + "loss": 0.5552, + "step": 13608 + }, + { + "epoch": 0.86, + "grad_norm": 0.8969496488571167, + "learning_rate": 4.89764112113078e-07, + "loss": 0.5814, + "step": 13609 + }, + { + "epoch": 0.86, + "grad_norm": 0.9093883633613586, + "learning_rate": 4.893213497734356e-07, + "loss": 0.5667, + "step": 13610 + }, + { + "epoch": 0.86, + "grad_norm": 0.8988984227180481, + "learning_rate": 4.888787773627785e-07, + "loss": 0.5766, + "step": 13611 + }, + { + "epoch": 0.86, + "grad_norm": 0.8481857180595398, + "learning_rate": 4.884363948997455e-07, + "loss": 0.5216, + "step": 13612 + }, + { + "epoch": 0.86, + "grad_norm": 0.8880239725112915, + "learning_rate": 4.879942024029599e-07, + "loss": 0.5504, + "step": 13613 + }, + { + "epoch": 0.86, + "grad_norm": 0.8837846517562866, + "learning_rate": 4.875521998910426e-07, + "loss": 0.5126, + "step": 13614 + }, + { + "epoch": 0.86, + "grad_norm": 0.905758798122406, + "learning_rate": 4.871103873826044e-07, + "loss": 0.5803, + "step": 13615 + }, + { + "epoch": 0.86, + "grad_norm": 0.8283089995384216, + "learning_rate": 4.866687648962487e-07, + "loss": 0.6043, + "step": 13616 + }, + { + "epoch": 0.86, + "grad_norm": 0.9035173654556274, + "learning_rate": 4.862273324505712e-07, + "loss": 0.5171, + "step": 13617 + }, + { + "epoch": 0.86, + "grad_norm": 0.8479889631271362, + "learning_rate": 4.857860900641576e-07, + "loss": 0.5226, + "step": 13618 + }, + { + "epoch": 0.86, + "grad_norm": 0.8784950375556946, + "learning_rate": 4.853450377555879e-07, + "loss": 0.5695, + "step": 13619 + }, + { + "epoch": 0.86, + "grad_norm": 0.8352934122085571, + "learning_rate": 4.849041755434336e-07, + "loss": 0.548, + "step": 13620 + }, + { + "epoch": 0.86, + "grad_norm": 0.8737031817436218, + "learning_rate": 4.844635034462574e-07, + "loss": 0.503, + "step": 13621 + }, + { + "epoch": 0.86, + "grad_norm": 0.8494743704795837, + "learning_rate": 4.840230214826147e-07, + "loss": 0.6044, + "step": 13622 + }, + { + "epoch": 0.86, + "grad_norm": 0.866535484790802, + "learning_rate": 4.835827296710537e-07, + "loss": 0.625, + "step": 13623 + }, + { + "epoch": 0.86, + "grad_norm": 0.9683859944343567, + "learning_rate": 4.831426280301105e-07, + "loss": 0.6063, + "step": 13624 + }, + { + "epoch": 0.86, + "grad_norm": 0.8437833189964294, + "learning_rate": 4.8270271657832e-07, + "loss": 0.4983, + "step": 13625 + }, + { + "epoch": 0.86, + "grad_norm": 0.9570308327674866, + "learning_rate": 4.822629953342028e-07, + "loss": 0.5752, + "step": 13626 + }, + { + "epoch": 0.86, + "grad_norm": 0.8903212547302246, + "learning_rate": 4.81823464316275e-07, + "loss": 0.558, + "step": 13627 + }, + { + "epoch": 0.86, + "grad_norm": 0.916301429271698, + "learning_rate": 4.813841235430433e-07, + "loss": 0.5274, + "step": 13628 + }, + { + "epoch": 0.86, + "grad_norm": 0.863028883934021, + "learning_rate": 4.809449730330068e-07, + "loss": 0.544, + "step": 13629 + }, + { + "epoch": 0.86, + "grad_norm": 0.9283245205879211, + "learning_rate": 4.805060128046574e-07, + "loss": 0.5725, + "step": 13630 + }, + { + "epoch": 0.86, + "grad_norm": 0.8969873189926147, + "learning_rate": 4.80067242876478e-07, + "loss": 0.5858, + "step": 13631 + }, + { + "epoch": 0.86, + "grad_norm": 0.9229633808135986, + "learning_rate": 4.796286632669417e-07, + "loss": 0.5792, + "step": 13632 + }, + { + "epoch": 0.86, + "grad_norm": 0.9000493288040161, + "learning_rate": 4.791902739945187e-07, + "loss": 0.5484, + "step": 13633 + }, + { + "epoch": 0.86, + "grad_norm": 0.8788484930992126, + "learning_rate": 4.787520750776658e-07, + "loss": 0.5745, + "step": 13634 + }, + { + "epoch": 0.86, + "grad_norm": 0.8873356580734253, + "learning_rate": 4.783140665348352e-07, + "loss": 0.509, + "step": 13635 + }, + { + "epoch": 0.86, + "grad_norm": 0.8811357021331787, + "learning_rate": 4.778762483844701e-07, + "loss": 0.5573, + "step": 13636 + }, + { + "epoch": 0.86, + "grad_norm": 0.8424716591835022, + "learning_rate": 4.774386206450027e-07, + "loss": 0.5391, + "step": 13637 + }, + { + "epoch": 0.86, + "grad_norm": 0.8560691475868225, + "learning_rate": 4.770011833348631e-07, + "loss": 0.5524, + "step": 13638 + }, + { + "epoch": 0.86, + "grad_norm": 0.8537570238113403, + "learning_rate": 4.7656393647247054e-07, + "loss": 0.5459, + "step": 13639 + }, + { + "epoch": 0.86, + "grad_norm": 0.8615885972976685, + "learning_rate": 4.7612688007623363e-07, + "loss": 0.5397, + "step": 13640 + }, + { + "epoch": 0.86, + "grad_norm": 0.8361106514930725, + "learning_rate": 4.756900141645565e-07, + "loss": 0.6052, + "step": 13641 + }, + { + "epoch": 0.86, + "grad_norm": 0.8686729669570923, + "learning_rate": 4.752533387558339e-07, + "loss": 0.5867, + "step": 13642 + }, + { + "epoch": 0.86, + "grad_norm": 0.8469032645225525, + "learning_rate": 4.748168538684528e-07, + "loss": 0.5686, + "step": 13643 + }, + { + "epoch": 0.86, + "grad_norm": 0.9301448464393616, + "learning_rate": 4.7438055952079287e-07, + "loss": 0.6143, + "step": 13644 + }, + { + "epoch": 0.86, + "grad_norm": 0.9038071036338806, + "learning_rate": 4.739444557312223e-07, + "loss": 0.5559, + "step": 13645 + }, + { + "epoch": 0.86, + "grad_norm": 0.8837379813194275, + "learning_rate": 4.735085425181063e-07, + "loss": 0.5633, + "step": 13646 + }, + { + "epoch": 0.86, + "grad_norm": 0.884790301322937, + "learning_rate": 4.730728198998008e-07, + "loss": 0.6254, + "step": 13647 + }, + { + "epoch": 0.86, + "grad_norm": 0.918059766292572, + "learning_rate": 4.726372878946489e-07, + "loss": 0.5338, + "step": 13648 + }, + { + "epoch": 0.86, + "grad_norm": 0.8778460621833801, + "learning_rate": 4.7220194652099204e-07, + "loss": 0.5891, + "step": 13649 + }, + { + "epoch": 0.86, + "grad_norm": 0.9263706207275391, + "learning_rate": 4.7176679579716e-07, + "loss": 0.5847, + "step": 13650 + }, + { + "epoch": 0.86, + "grad_norm": 0.9742307066917419, + "learning_rate": 4.7133183574147534e-07, + "loss": 0.6366, + "step": 13651 + }, + { + "epoch": 0.86, + "grad_norm": 0.9247993230819702, + "learning_rate": 4.7089706637225283e-07, + "loss": 0.6062, + "step": 13652 + }, + { + "epoch": 0.86, + "grad_norm": 0.8701785802841187, + "learning_rate": 4.7046248770780065e-07, + "loss": 0.5936, + "step": 13653 + }, + { + "epoch": 0.87, + "grad_norm": 0.8334656953811646, + "learning_rate": 4.7002809976641417e-07, + "loss": 0.5348, + "step": 13654 + }, + { + "epoch": 0.87, + "grad_norm": 0.9703954458236694, + "learning_rate": 4.6959390256638703e-07, + "loss": 0.5806, + "step": 13655 + }, + { + "epoch": 0.87, + "grad_norm": 0.9238660335540771, + "learning_rate": 4.691598961260002e-07, + "loss": 0.5771, + "step": 13656 + }, + { + "epoch": 0.87, + "grad_norm": 0.9725003838539124, + "learning_rate": 4.68726080463528e-07, + "loss": 0.6504, + "step": 13657 + }, + { + "epoch": 0.87, + "grad_norm": 0.8711181879043579, + "learning_rate": 4.682924555972379e-07, + "loss": 0.5642, + "step": 13658 + }, + { + "epoch": 0.87, + "grad_norm": 0.8927187919616699, + "learning_rate": 4.6785902154538763e-07, + "loss": 0.6176, + "step": 13659 + }, + { + "epoch": 0.87, + "grad_norm": 0.9735706448554993, + "learning_rate": 4.674257783262276e-07, + "loss": 0.6128, + "step": 13660 + }, + { + "epoch": 0.87, + "grad_norm": 0.9042197465896606, + "learning_rate": 4.669927259580015e-07, + "loss": 0.5782, + "step": 13661 + }, + { + "epoch": 0.87, + "grad_norm": 0.8241575956344604, + "learning_rate": 4.665598644589409e-07, + "loss": 0.4985, + "step": 13662 + }, + { + "epoch": 0.87, + "grad_norm": 0.8879325985908508, + "learning_rate": 4.6612719384727556e-07, + "loss": 0.5778, + "step": 13663 + }, + { + "epoch": 0.87, + "grad_norm": 0.9175477027893066, + "learning_rate": 4.656947141412205e-07, + "loss": 0.5892, + "step": 13664 + }, + { + "epoch": 0.87, + "grad_norm": 0.8603050112724304, + "learning_rate": 4.652624253589877e-07, + "loss": 0.6211, + "step": 13665 + }, + { + "epoch": 0.87, + "grad_norm": 0.9173632860183716, + "learning_rate": 4.6483032751877987e-07, + "loss": 0.6, + "step": 13666 + }, + { + "epoch": 0.87, + "grad_norm": 0.9489515423774719, + "learning_rate": 4.6439842063878803e-07, + "loss": 0.5417, + "step": 13667 + }, + { + "epoch": 0.87, + "grad_norm": 0.8993018865585327, + "learning_rate": 4.639667047372015e-07, + "loss": 0.5831, + "step": 13668 + }, + { + "epoch": 0.87, + "grad_norm": 0.8332312107086182, + "learning_rate": 4.6353517983219856e-07, + "loss": 0.5133, + "step": 13669 + }, + { + "epoch": 0.87, + "grad_norm": 0.8579809069633484, + "learning_rate": 4.631038459419468e-07, + "loss": 0.5473, + "step": 13670 + }, + { + "epoch": 0.87, + "grad_norm": 0.9015935659408569, + "learning_rate": 4.6267270308460955e-07, + "loss": 0.5857, + "step": 13671 + }, + { + "epoch": 0.87, + "grad_norm": 0.8048023581504822, + "learning_rate": 4.6224175127834057e-07, + "loss": 0.5637, + "step": 13672 + }, + { + "epoch": 0.87, + "grad_norm": 0.8290963172912598, + "learning_rate": 4.61810990541286e-07, + "loss": 0.5104, + "step": 13673 + }, + { + "epoch": 0.87, + "grad_norm": 0.8655577301979065, + "learning_rate": 4.61380420891584e-07, + "loss": 0.5858, + "step": 13674 + }, + { + "epoch": 0.87, + "grad_norm": 0.9205370545387268, + "learning_rate": 4.6095004234736175e-07, + "loss": 0.6064, + "step": 13675 + }, + { + "epoch": 0.87, + "grad_norm": 0.8846642374992371, + "learning_rate": 4.6051985492674425e-07, + "loss": 0.5652, + "step": 13676 + }, + { + "epoch": 0.87, + "grad_norm": 0.8984456062316895, + "learning_rate": 4.6008985864784473e-07, + "loss": 0.578, + "step": 13677 + }, + { + "epoch": 0.87, + "grad_norm": 0.9386430382728577, + "learning_rate": 4.596600535287671e-07, + "loss": 0.6143, + "step": 13678 + }, + { + "epoch": 0.87, + "grad_norm": 0.9357401132583618, + "learning_rate": 4.592304395876102e-07, + "loss": 0.5837, + "step": 13679 + }, + { + "epoch": 0.87, + "grad_norm": 0.9388497471809387, + "learning_rate": 4.588010168424628e-07, + "loss": 0.5809, + "step": 13680 + }, + { + "epoch": 0.87, + "grad_norm": 0.9037414193153381, + "learning_rate": 4.5837178531140723e-07, + "loss": 0.5671, + "step": 13681 + }, + { + "epoch": 0.87, + "grad_norm": 0.9199149012565613, + "learning_rate": 4.579427450125179e-07, + "loss": 0.5612, + "step": 13682 + }, + { + "epoch": 0.87, + "grad_norm": 0.8453497886657715, + "learning_rate": 4.5751389596385755e-07, + "loss": 0.5223, + "step": 13683 + }, + { + "epoch": 0.87, + "grad_norm": 0.9701248407363892, + "learning_rate": 4.570852381834839e-07, + "loss": 0.556, + "step": 13684 + }, + { + "epoch": 0.87, + "grad_norm": 0.8935304284095764, + "learning_rate": 4.5665677168944935e-07, + "loss": 0.6301, + "step": 13685 + }, + { + "epoch": 0.87, + "grad_norm": 0.8989062905311584, + "learning_rate": 4.562284964997915e-07, + "loss": 0.5626, + "step": 13686 + }, + { + "epoch": 0.87, + "grad_norm": 0.9354601502418518, + "learning_rate": 4.5580041263254547e-07, + "loss": 0.6159, + "step": 13687 + }, + { + "epoch": 0.87, + "grad_norm": 0.8538670539855957, + "learning_rate": 4.553725201057363e-07, + "loss": 0.5656, + "step": 13688 + }, + { + "epoch": 0.87, + "grad_norm": 0.9553387761116028, + "learning_rate": 4.5494481893738005e-07, + "loss": 0.5389, + "step": 13689 + }, + { + "epoch": 0.87, + "grad_norm": 1.0032283067703247, + "learning_rate": 4.5451730914548744e-07, + "loss": 0.6298, + "step": 13690 + }, + { + "epoch": 0.87, + "grad_norm": 0.8711049556732178, + "learning_rate": 4.540899907480578e-07, + "loss": 0.5762, + "step": 13691 + }, + { + "epoch": 0.87, + "grad_norm": 0.8655171990394592, + "learning_rate": 4.536628637630836e-07, + "loss": 0.552, + "step": 13692 + }, + { + "epoch": 0.87, + "grad_norm": 0.8877602815628052, + "learning_rate": 4.532359282085519e-07, + "loss": 0.6132, + "step": 13693 + }, + { + "epoch": 0.87, + "grad_norm": 0.8526985049247742, + "learning_rate": 4.528091841024379e-07, + "loss": 0.5487, + "step": 13694 + }, + { + "epoch": 0.87, + "grad_norm": 0.8731285333633423, + "learning_rate": 4.5238263146271053e-07, + "loss": 0.5844, + "step": 13695 + }, + { + "epoch": 0.87, + "grad_norm": 0.9351499676704407, + "learning_rate": 4.5195627030733156e-07, + "loss": 0.5862, + "step": 13696 + }, + { + "epoch": 0.87, + "grad_norm": 0.8490439653396606, + "learning_rate": 4.5153010065425054e-07, + "loss": 0.5402, + "step": 13697 + }, + { + "epoch": 0.87, + "grad_norm": 0.8659386038780212, + "learning_rate": 4.511041225214158e-07, + "loss": 0.561, + "step": 13698 + }, + { + "epoch": 0.87, + "grad_norm": 0.8737561106681824, + "learning_rate": 4.5067833592676136e-07, + "loss": 0.5404, + "step": 13699 + }, + { + "epoch": 0.87, + "grad_norm": 0.8905614614486694, + "learning_rate": 4.502527408882157e-07, + "loss": 0.5343, + "step": 13700 + }, + { + "epoch": 0.87, + "grad_norm": 0.8490473031997681, + "learning_rate": 4.498273374237e-07, + "loss": 0.5809, + "step": 13701 + }, + { + "epoch": 0.87, + "grad_norm": 0.9129199981689453, + "learning_rate": 4.494021255511266e-07, + "loss": 0.5969, + "step": 13702 + }, + { + "epoch": 0.87, + "grad_norm": 0.9153651595115662, + "learning_rate": 4.48977105288399e-07, + "loss": 0.571, + "step": 13703 + }, + { + "epoch": 0.87, + "grad_norm": 0.957604706287384, + "learning_rate": 4.485522766534145e-07, + "loss": 0.6299, + "step": 13704 + }, + { + "epoch": 0.87, + "grad_norm": 0.8316980600357056, + "learning_rate": 4.4812763966405825e-07, + "loss": 0.5236, + "step": 13705 + }, + { + "epoch": 0.87, + "grad_norm": 0.8910514712333679, + "learning_rate": 4.4770319433821487e-07, + "loss": 0.5742, + "step": 13706 + }, + { + "epoch": 0.87, + "grad_norm": 0.904670000076294, + "learning_rate": 4.472789406937522e-07, + "loss": 0.5931, + "step": 13707 + }, + { + "epoch": 0.87, + "grad_norm": 0.9204214215278625, + "learning_rate": 4.468548787485355e-07, + "loss": 0.5939, + "step": 13708 + }, + { + "epoch": 0.87, + "grad_norm": 0.9537574648857117, + "learning_rate": 4.4643100852042097e-07, + "loss": 0.5974, + "step": 13709 + }, + { + "epoch": 0.87, + "grad_norm": 0.9143358469009399, + "learning_rate": 4.4600733002725547e-07, + "loss": 0.5724, + "step": 13710 + }, + { + "epoch": 0.87, + "grad_norm": 0.8609566688537598, + "learning_rate": 4.4558384328687975e-07, + "loss": 0.5714, + "step": 13711 + }, + { + "epoch": 0.87, + "grad_norm": 0.8667165637016296, + "learning_rate": 4.451605483171251e-07, + "loss": 0.571, + "step": 13712 + }, + { + "epoch": 0.87, + "grad_norm": 0.8885953426361084, + "learning_rate": 4.4473744513581384e-07, + "loss": 0.5167, + "step": 13713 + }, + { + "epoch": 0.87, + "grad_norm": 0.8588200807571411, + "learning_rate": 4.443145337607624e-07, + "loss": 0.5433, + "step": 13714 + }, + { + "epoch": 0.87, + "grad_norm": 0.8709940910339355, + "learning_rate": 4.4389181420977814e-07, + "loss": 0.6293, + "step": 13715 + }, + { + "epoch": 0.87, + "grad_norm": 0.8453631401062012, + "learning_rate": 4.4346928650065957e-07, + "loss": 0.5408, + "step": 13716 + }, + { + "epoch": 0.87, + "grad_norm": 0.8846293091773987, + "learning_rate": 4.4304695065119807e-07, + "loss": 0.5588, + "step": 13717 + }, + { + "epoch": 0.87, + "grad_norm": 0.8700962066650391, + "learning_rate": 4.4262480667917774e-07, + "loss": 0.5479, + "step": 13718 + }, + { + "epoch": 0.87, + "grad_norm": 0.844928503036499, + "learning_rate": 4.422028546023721e-07, + "loss": 0.5638, + "step": 13719 + }, + { + "epoch": 0.87, + "grad_norm": 0.8891464471817017, + "learning_rate": 4.4178109443855033e-07, + "loss": 0.5845, + "step": 13720 + }, + { + "epoch": 0.87, + "grad_norm": 0.8719486594200134, + "learning_rate": 4.4135952620546876e-07, + "loss": 0.5724, + "step": 13721 + }, + { + "epoch": 0.87, + "grad_norm": 0.8617244958877563, + "learning_rate": 4.409381499208787e-07, + "loss": 0.5087, + "step": 13722 + }, + { + "epoch": 0.87, + "grad_norm": 0.8584579229354858, + "learning_rate": 4.405169656025238e-07, + "loss": 0.5701, + "step": 13723 + }, + { + "epoch": 0.87, + "grad_norm": 0.8903681635856628, + "learning_rate": 4.400959732681381e-07, + "loss": 0.5974, + "step": 13724 + }, + { + "epoch": 0.87, + "grad_norm": 0.878350019454956, + "learning_rate": 4.3967517293544814e-07, + "loss": 0.5478, + "step": 13725 + }, + { + "epoch": 0.87, + "grad_norm": 0.9635295271873474, + "learning_rate": 4.3925456462217244e-07, + "loss": 0.5976, + "step": 13726 + }, + { + "epoch": 0.87, + "grad_norm": 0.897746741771698, + "learning_rate": 4.3883414834602125e-07, + "loss": 0.5702, + "step": 13727 + }, + { + "epoch": 0.87, + "grad_norm": 0.8466120958328247, + "learning_rate": 4.384139241246982e-07, + "loss": 0.6266, + "step": 13728 + }, + { + "epoch": 0.87, + "grad_norm": 0.9046663045883179, + "learning_rate": 4.3799389197589525e-07, + "loss": 0.5742, + "step": 13729 + }, + { + "epoch": 0.87, + "grad_norm": 0.855974018573761, + "learning_rate": 4.375740519172994e-07, + "loss": 0.6135, + "step": 13730 + }, + { + "epoch": 0.87, + "grad_norm": 0.8562418818473816, + "learning_rate": 4.3715440396658816e-07, + "loss": 0.5726, + "step": 13731 + }, + { + "epoch": 0.87, + "grad_norm": 1.0470370054244995, + "learning_rate": 4.3673494814143234e-07, + "loss": 0.5896, + "step": 13732 + }, + { + "epoch": 0.87, + "grad_norm": 0.9556792378425598, + "learning_rate": 4.3631568445949403e-07, + "loss": 0.5409, + "step": 13733 + }, + { + "epoch": 0.87, + "grad_norm": 0.8872630000114441, + "learning_rate": 4.3589661293842624e-07, + "loss": 0.5565, + "step": 13734 + }, + { + "epoch": 0.87, + "grad_norm": 0.9071952104568481, + "learning_rate": 4.3547773359587377e-07, + "loss": 0.6007, + "step": 13735 + }, + { + "epoch": 0.87, + "grad_norm": 0.8532198071479797, + "learning_rate": 4.350590464494764e-07, + "loss": 0.521, + "step": 13736 + }, + { + "epoch": 0.87, + "grad_norm": 0.8936211466789246, + "learning_rate": 4.346405515168617e-07, + "loss": 0.5663, + "step": 13737 + }, + { + "epoch": 0.87, + "grad_norm": 0.9199041128158569, + "learning_rate": 4.342222488156511e-07, + "loss": 0.5873, + "step": 13738 + }, + { + "epoch": 0.87, + "grad_norm": 0.8879461884498596, + "learning_rate": 4.3380413836345893e-07, + "loss": 0.5838, + "step": 13739 + }, + { + "epoch": 0.87, + "grad_norm": 0.8619484305381775, + "learning_rate": 4.333862201778899e-07, + "loss": 0.5107, + "step": 13740 + }, + { + "epoch": 0.87, + "grad_norm": 0.9578720927238464, + "learning_rate": 4.329684942765411e-07, + "loss": 0.5738, + "step": 13741 + }, + { + "epoch": 0.87, + "grad_norm": 0.8812727928161621, + "learning_rate": 4.3255096067700176e-07, + "loss": 0.5691, + "step": 13742 + }, + { + "epoch": 0.87, + "grad_norm": 0.847726047039032, + "learning_rate": 4.321336193968523e-07, + "loss": 0.5662, + "step": 13743 + }, + { + "epoch": 0.87, + "grad_norm": 0.8409244418144226, + "learning_rate": 4.3171647045366525e-07, + "loss": 0.543, + "step": 13744 + }, + { + "epoch": 0.87, + "grad_norm": 0.966153621673584, + "learning_rate": 4.312995138650056e-07, + "loss": 0.5944, + "step": 13745 + }, + { + "epoch": 0.87, + "grad_norm": 0.8785676956176758, + "learning_rate": 4.3088274964843027e-07, + "loss": 0.5476, + "step": 13746 + }, + { + "epoch": 0.87, + "grad_norm": 0.9022130370140076, + "learning_rate": 4.3046617782148857e-07, + "loss": 0.5837, + "step": 13747 + }, + { + "epoch": 0.87, + "grad_norm": 0.9164488911628723, + "learning_rate": 4.300497984017182e-07, + "loss": 0.5348, + "step": 13748 + }, + { + "epoch": 0.87, + "grad_norm": 0.8544109463691711, + "learning_rate": 4.2963361140665405e-07, + "loss": 0.5099, + "step": 13749 + }, + { + "epoch": 0.87, + "grad_norm": 0.8812281489372253, + "learning_rate": 4.292176168538198e-07, + "loss": 0.5676, + "step": 13750 + }, + { + "epoch": 0.87, + "grad_norm": 0.9989331364631653, + "learning_rate": 4.2880181476073034e-07, + "loss": 0.5962, + "step": 13751 + }, + { + "epoch": 0.87, + "grad_norm": 0.8740145564079285, + "learning_rate": 4.283862051448945e-07, + "loss": 0.5772, + "step": 13752 + }, + { + "epoch": 0.87, + "grad_norm": 0.9067648649215698, + "learning_rate": 4.279707880238121e-07, + "loss": 0.57, + "step": 13753 + }, + { + "epoch": 0.87, + "grad_norm": 0.9112171530723572, + "learning_rate": 4.275555634149753e-07, + "loss": 0.605, + "step": 13754 + }, + { + "epoch": 0.87, + "grad_norm": 0.8743265867233276, + "learning_rate": 4.2714053133586785e-07, + "loss": 0.5284, + "step": 13755 + }, + { + "epoch": 0.87, + "grad_norm": 0.9052802324295044, + "learning_rate": 4.267256918039625e-07, + "loss": 0.5345, + "step": 13756 + }, + { + "epoch": 0.87, + "grad_norm": 0.840216875076294, + "learning_rate": 4.263110448367308e-07, + "loss": 0.5121, + "step": 13757 + }, + { + "epoch": 0.87, + "grad_norm": 0.8973818421363831, + "learning_rate": 4.2589659045163044e-07, + "loss": 0.6348, + "step": 13758 + }, + { + "epoch": 0.87, + "grad_norm": 0.8948500752449036, + "learning_rate": 4.254823286661125e-07, + "loss": 0.5814, + "step": 13759 + }, + { + "epoch": 0.87, + "grad_norm": 0.920590341091156, + "learning_rate": 4.250682594976191e-07, + "loss": 0.5692, + "step": 13760 + }, + { + "epoch": 0.87, + "grad_norm": 0.885006844997406, + "learning_rate": 4.2465438296358685e-07, + "loss": 0.5802, + "step": 13761 + }, + { + "epoch": 0.87, + "grad_norm": 0.847855269908905, + "learning_rate": 4.2424069908144236e-07, + "loss": 0.5368, + "step": 13762 + }, + { + "epoch": 0.87, + "grad_norm": 0.9912233948707581, + "learning_rate": 4.2382720786860453e-07, + "loss": 0.5901, + "step": 13763 + }, + { + "epoch": 0.87, + "grad_norm": 0.9090965986251831, + "learning_rate": 4.2341390934248273e-07, + "loss": 0.5469, + "step": 13764 + }, + { + "epoch": 0.87, + "grad_norm": 0.8936121463775635, + "learning_rate": 4.230008035204797e-07, + "loss": 0.5723, + "step": 13765 + }, + { + "epoch": 0.87, + "grad_norm": 0.8051524758338928, + "learning_rate": 4.225878904199926e-07, + "loss": 0.4853, + "step": 13766 + }, + { + "epoch": 0.87, + "grad_norm": 0.8978790640830994, + "learning_rate": 4.2217517005840423e-07, + "loss": 0.5568, + "step": 13767 + }, + { + "epoch": 0.87, + "grad_norm": 0.8630240559577942, + "learning_rate": 4.2176264245309517e-07, + "loss": 0.5686, + "step": 13768 + }, + { + "epoch": 0.87, + "grad_norm": 0.8735791444778442, + "learning_rate": 4.2135030762143424e-07, + "loss": 0.5625, + "step": 13769 + }, + { + "epoch": 0.87, + "grad_norm": 0.9239519238471985, + "learning_rate": 4.2093816558078373e-07, + "loss": 0.656, + "step": 13770 + }, + { + "epoch": 0.87, + "grad_norm": 0.8584021925926208, + "learning_rate": 4.205262163484991e-07, + "loss": 0.5384, + "step": 13771 + }, + { + "epoch": 0.87, + "grad_norm": 1.000178575515747, + "learning_rate": 4.2011445994192324e-07, + "loss": 0.5553, + "step": 13772 + }, + { + "epoch": 0.87, + "grad_norm": 1.005164384841919, + "learning_rate": 4.1970289637839556e-07, + "loss": 0.5724, + "step": 13773 + }, + { + "epoch": 0.87, + "grad_norm": 0.8568700551986694, + "learning_rate": 4.19291525675245e-07, + "loss": 0.567, + "step": 13774 + }, + { + "epoch": 0.87, + "grad_norm": 0.9740828275680542, + "learning_rate": 4.1888034784979326e-07, + "loss": 0.6062, + "step": 13775 + }, + { + "epoch": 0.87, + "grad_norm": 0.8327596187591553, + "learning_rate": 4.184693629193537e-07, + "loss": 0.627, + "step": 13776 + }, + { + "epoch": 0.87, + "grad_norm": 0.8938369154930115, + "learning_rate": 4.180585709012319e-07, + "loss": 0.577, + "step": 13777 + }, + { + "epoch": 0.87, + "grad_norm": 0.8749104738235474, + "learning_rate": 4.1764797181272296e-07, + "loss": 0.5819, + "step": 13778 + }, + { + "epoch": 0.87, + "grad_norm": 0.8669180870056152, + "learning_rate": 4.172375656711181e-07, + "loss": 0.5144, + "step": 13779 + }, + { + "epoch": 0.87, + "grad_norm": 0.9040730595588684, + "learning_rate": 4.1682735249369663e-07, + "loss": 0.6053, + "step": 13780 + }, + { + "epoch": 0.87, + "grad_norm": 0.9332876801490784, + "learning_rate": 4.1641733229773163e-07, + "loss": 0.5463, + "step": 13781 + }, + { + "epoch": 0.87, + "grad_norm": 0.8687313795089722, + "learning_rate": 4.1600750510048805e-07, + "loss": 0.5951, + "step": 13782 + }, + { + "epoch": 0.87, + "grad_norm": 0.9103281497955322, + "learning_rate": 4.1559787091922153e-07, + "loss": 0.5824, + "step": 13783 + }, + { + "epoch": 0.87, + "grad_norm": 0.840823233127594, + "learning_rate": 4.151884297711806e-07, + "loss": 0.5203, + "step": 13784 + }, + { + "epoch": 0.87, + "grad_norm": 0.890895664691925, + "learning_rate": 4.147791816736063e-07, + "loss": 0.5681, + "step": 13785 + }, + { + "epoch": 0.87, + "grad_norm": 0.8432772755622864, + "learning_rate": 4.143701266437283e-07, + "loss": 0.5618, + "step": 13786 + }, + { + "epoch": 0.87, + "grad_norm": 0.8869197368621826, + "learning_rate": 4.139612646987734e-07, + "loss": 0.5829, + "step": 13787 + }, + { + "epoch": 0.87, + "grad_norm": 0.9564074873924255, + "learning_rate": 4.135525958559555e-07, + "loss": 0.6305, + "step": 13788 + }, + { + "epoch": 0.87, + "grad_norm": 0.8258056044578552, + "learning_rate": 4.131441201324826e-07, + "loss": 0.5646, + "step": 13789 + }, + { + "epoch": 0.87, + "grad_norm": 0.9355778694152832, + "learning_rate": 4.1273583754555424e-07, + "loss": 0.5543, + "step": 13790 + }, + { + "epoch": 0.87, + "grad_norm": 0.895876407623291, + "learning_rate": 4.123277481123622e-07, + "loss": 0.5631, + "step": 13791 + }, + { + "epoch": 0.87, + "grad_norm": 0.8127149343490601, + "learning_rate": 4.1191985185008887e-07, + "loss": 0.5681, + "step": 13792 + }, + { + "epoch": 0.87, + "grad_norm": 0.8988841772079468, + "learning_rate": 4.1151214877591105e-07, + "loss": 0.5723, + "step": 13793 + }, + { + "epoch": 0.87, + "grad_norm": 0.9248142242431641, + "learning_rate": 4.1110463890699336e-07, + "loss": 0.5945, + "step": 13794 + }, + { + "epoch": 0.87, + "grad_norm": 0.9253085851669312, + "learning_rate": 4.1069732226049484e-07, + "loss": 0.6259, + "step": 13795 + }, + { + "epoch": 0.87, + "grad_norm": 0.8506118059158325, + "learning_rate": 4.102901988535685e-07, + "loss": 0.5496, + "step": 13796 + }, + { + "epoch": 0.87, + "grad_norm": 0.8817588686943054, + "learning_rate": 4.0988326870335494e-07, + "loss": 0.5534, + "step": 13797 + }, + { + "epoch": 0.87, + "grad_norm": 0.9294220805168152, + "learning_rate": 4.0947653182698887e-07, + "loss": 0.6071, + "step": 13798 + }, + { + "epoch": 0.87, + "grad_norm": 0.9263404011726379, + "learning_rate": 4.0906998824159715e-07, + "loss": 0.6115, + "step": 13799 + }, + { + "epoch": 0.87, + "grad_norm": 0.9097710251808167, + "learning_rate": 4.086636379642972e-07, + "loss": 0.5834, + "step": 13800 + }, + { + "epoch": 0.87, + "grad_norm": 0.9048157930374146, + "learning_rate": 4.0825748101220087e-07, + "loss": 0.5976, + "step": 13801 + }, + { + "epoch": 0.87, + "grad_norm": 0.8855105042457581, + "learning_rate": 4.078515174024067e-07, + "loss": 0.571, + "step": 13802 + }, + { + "epoch": 0.87, + "grad_norm": 0.8673086762428284, + "learning_rate": 4.074457471520099e-07, + "loss": 0.5968, + "step": 13803 + }, + { + "epoch": 0.87, + "grad_norm": 0.8577106595039368, + "learning_rate": 4.0704017027809797e-07, + "loss": 0.5826, + "step": 13804 + }, + { + "epoch": 0.87, + "grad_norm": 0.8799236416816711, + "learning_rate": 4.0663478679774604e-07, + "loss": 0.5497, + "step": 13805 + }, + { + "epoch": 0.87, + "grad_norm": 0.8574314117431641, + "learning_rate": 4.062295967280239e-07, + "loss": 0.5452, + "step": 13806 + }, + { + "epoch": 0.87, + "grad_norm": 0.8260728120803833, + "learning_rate": 4.058246000859939e-07, + "loss": 0.6122, + "step": 13807 + }, + { + "epoch": 0.87, + "grad_norm": 0.8879086375236511, + "learning_rate": 4.054197968887064e-07, + "loss": 0.5183, + "step": 13808 + }, + { + "epoch": 0.87, + "grad_norm": 0.8634669184684753, + "learning_rate": 4.0501518715320933e-07, + "loss": 0.5658, + "step": 13809 + }, + { + "epoch": 0.87, + "grad_norm": 0.8982515931129456, + "learning_rate": 4.046107708965369e-07, + "loss": 0.5977, + "step": 13810 + }, + { + "epoch": 0.88, + "grad_norm": 0.9243874549865723, + "learning_rate": 4.042065481357188e-07, + "loss": 0.5895, + "step": 13811 + }, + { + "epoch": 0.88, + "grad_norm": 0.9220935702323914, + "learning_rate": 4.038025188877753e-07, + "loss": 0.5842, + "step": 13812 + }, + { + "epoch": 0.88, + "grad_norm": 0.9362528920173645, + "learning_rate": 4.03398683169719e-07, + "loss": 0.5917, + "step": 13813 + }, + { + "epoch": 0.88, + "grad_norm": 0.9390722513198853, + "learning_rate": 4.029950409985539e-07, + "loss": 0.5915, + "step": 13814 + }, + { + "epoch": 0.88, + "grad_norm": 0.9555262327194214, + "learning_rate": 4.0259159239127656e-07, + "loss": 0.5891, + "step": 13815 + }, + { + "epoch": 0.88, + "grad_norm": 0.8341988325119019, + "learning_rate": 4.021883373648722e-07, + "loss": 0.5347, + "step": 13816 + }, + { + "epoch": 0.88, + "grad_norm": 0.8047258853912354, + "learning_rate": 4.017852759363239e-07, + "loss": 0.4463, + "step": 13817 + }, + { + "epoch": 0.88, + "grad_norm": 0.8843516707420349, + "learning_rate": 4.013824081226009e-07, + "loss": 0.5822, + "step": 13818 + }, + { + "epoch": 0.88, + "grad_norm": 0.889491856098175, + "learning_rate": 4.009797339406674e-07, + "loss": 0.5696, + "step": 13819 + }, + { + "epoch": 0.88, + "grad_norm": 0.866584062576294, + "learning_rate": 4.005772534074792e-07, + "loss": 0.5666, + "step": 13820 + }, + { + "epoch": 0.88, + "grad_norm": 0.9304389953613281, + "learning_rate": 4.001749665399807e-07, + "loss": 0.5963, + "step": 13821 + }, + { + "epoch": 0.88, + "grad_norm": 0.9130128026008606, + "learning_rate": 3.997728733551137e-07, + "loss": 0.5348, + "step": 13822 + }, + { + "epoch": 0.88, + "grad_norm": 0.8755511045455933, + "learning_rate": 3.993709738698093e-07, + "loss": 0.5714, + "step": 13823 + }, + { + "epoch": 0.88, + "grad_norm": 0.8576176762580872, + "learning_rate": 3.989692681009877e-07, + "loss": 0.5845, + "step": 13824 + }, + { + "epoch": 0.88, + "grad_norm": 0.9526224136352539, + "learning_rate": 3.985677560655643e-07, + "loss": 0.5841, + "step": 13825 + }, + { + "epoch": 0.88, + "grad_norm": 0.915798544883728, + "learning_rate": 3.9816643778044506e-07, + "loss": 0.6271, + "step": 13826 + }, + { + "epoch": 0.88, + "grad_norm": 0.8685756921768188, + "learning_rate": 3.977653132625292e-07, + "loss": 0.5363, + "step": 13827 + }, + { + "epoch": 0.88, + "grad_norm": 0.8983719348907471, + "learning_rate": 3.9736438252870655e-07, + "loss": 0.589, + "step": 13828 + }, + { + "epoch": 0.88, + "grad_norm": 0.9057663679122925, + "learning_rate": 3.969636455958564e-07, + "loss": 0.587, + "step": 13829 + }, + { + "epoch": 0.88, + "grad_norm": 0.886985719203949, + "learning_rate": 3.965631024808553e-07, + "loss": 0.6049, + "step": 13830 + }, + { + "epoch": 0.88, + "grad_norm": 0.8384401798248291, + "learning_rate": 3.961627532005691e-07, + "loss": 0.519, + "step": 13831 + }, + { + "epoch": 0.88, + "grad_norm": 0.851017951965332, + "learning_rate": 3.957625977718527e-07, + "loss": 0.5821, + "step": 13832 + }, + { + "epoch": 0.88, + "grad_norm": 0.8468850255012512, + "learning_rate": 3.953626362115559e-07, + "loss": 0.5651, + "step": 13833 + }, + { + "epoch": 0.88, + "grad_norm": 0.9317176938056946, + "learning_rate": 3.949628685365203e-07, + "loss": 0.5769, + "step": 13834 + }, + { + "epoch": 0.88, + "grad_norm": 0.888606607913971, + "learning_rate": 3.945632947635791e-07, + "loss": 0.5461, + "step": 13835 + }, + { + "epoch": 0.88, + "grad_norm": 0.8703384399414062, + "learning_rate": 3.941639149095566e-07, + "loss": 0.6008, + "step": 13836 + }, + { + "epoch": 0.88, + "grad_norm": 0.8924920558929443, + "learning_rate": 3.9376472899126884e-07, + "loss": 0.5616, + "step": 13837 + }, + { + "epoch": 0.88, + "grad_norm": 0.8976526856422424, + "learning_rate": 3.933657370255228e-07, + "loss": 0.603, + "step": 13838 + }, + { + "epoch": 0.88, + "grad_norm": 0.9381121397018433, + "learning_rate": 3.9296693902912244e-07, + "loss": 0.5989, + "step": 13839 + }, + { + "epoch": 0.88, + "grad_norm": 0.8531518578529358, + "learning_rate": 3.9256833501885693e-07, + "loss": 0.613, + "step": 13840 + }, + { + "epoch": 0.88, + "grad_norm": 0.8720222115516663, + "learning_rate": 3.9216992501151074e-07, + "loss": 0.5606, + "step": 13841 + }, + { + "epoch": 0.88, + "grad_norm": 0.8882898688316345, + "learning_rate": 3.917717090238593e-07, + "loss": 0.6199, + "step": 13842 + }, + { + "epoch": 0.88, + "grad_norm": 0.898091197013855, + "learning_rate": 3.913736870726703e-07, + "loss": 0.5583, + "step": 13843 + }, + { + "epoch": 0.88, + "grad_norm": 0.8894566893577576, + "learning_rate": 3.909758591747037e-07, + "loss": 0.6388, + "step": 13844 + }, + { + "epoch": 0.88, + "grad_norm": 0.8718437552452087, + "learning_rate": 3.905782253467094e-07, + "loss": 0.5293, + "step": 13845 + }, + { + "epoch": 0.88, + "grad_norm": 0.8743876218795776, + "learning_rate": 3.9018078560543015e-07, + "loss": 0.5883, + "step": 13846 + }, + { + "epoch": 0.88, + "grad_norm": 0.9666427373886108, + "learning_rate": 3.8978353996760365e-07, + "loss": 0.5466, + "step": 13847 + }, + { + "epoch": 0.88, + "grad_norm": 0.8270087838172913, + "learning_rate": 3.8938648844995374e-07, + "loss": 0.5458, + "step": 13848 + }, + { + "epoch": 0.88, + "grad_norm": 0.8940174579620361, + "learning_rate": 3.889896310691993e-07, + "loss": 0.5766, + "step": 13849 + }, + { + "epoch": 0.88, + "grad_norm": 0.9705901741981506, + "learning_rate": 3.885929678420508e-07, + "loss": 0.5894, + "step": 13850 + }, + { + "epoch": 0.88, + "grad_norm": 0.8427651524543762, + "learning_rate": 3.881964987852105e-07, + "loss": 0.5606, + "step": 13851 + }, + { + "epoch": 0.88, + "grad_norm": 0.9404253363609314, + "learning_rate": 3.878002239153739e-07, + "loss": 0.635, + "step": 13852 + }, + { + "epoch": 0.88, + "grad_norm": 0.8982987403869629, + "learning_rate": 3.874041432492237e-07, + "loss": 0.6041, + "step": 13853 + }, + { + "epoch": 0.88, + "grad_norm": 0.9181349873542786, + "learning_rate": 3.870082568034389e-07, + "loss": 0.5793, + "step": 13854 + }, + { + "epoch": 0.88, + "grad_norm": 0.9045166969299316, + "learning_rate": 3.866125645946894e-07, + "loss": 0.533, + "step": 13855 + }, + { + "epoch": 0.88, + "grad_norm": 0.8742471933364868, + "learning_rate": 3.862170666396359e-07, + "loss": 0.5753, + "step": 13856 + }, + { + "epoch": 0.88, + "grad_norm": 0.8175247311592102, + "learning_rate": 3.858217629549316e-07, + "loss": 0.5068, + "step": 13857 + }, + { + "epoch": 0.88, + "grad_norm": 0.9291229248046875, + "learning_rate": 3.8542665355722154e-07, + "loss": 0.5553, + "step": 13858 + }, + { + "epoch": 0.88, + "grad_norm": 0.8577315807342529, + "learning_rate": 3.8503173846314137e-07, + "loss": 0.489, + "step": 13859 + }, + { + "epoch": 0.88, + "grad_norm": 0.9240617752075195, + "learning_rate": 3.846370176893205e-07, + "loss": 0.5803, + "step": 13860 + }, + { + "epoch": 0.88, + "grad_norm": 0.9180030226707458, + "learning_rate": 3.8424249125238065e-07, + "loss": 0.5931, + "step": 13861 + }, + { + "epoch": 0.88, + "grad_norm": 0.8835217356681824, + "learning_rate": 3.838481591689308e-07, + "loss": 0.5581, + "step": 13862 + }, + { + "epoch": 0.88, + "grad_norm": 0.9661378860473633, + "learning_rate": 3.834540214555771e-07, + "loss": 0.628, + "step": 13863 + }, + { + "epoch": 0.88, + "grad_norm": 0.8079046607017517, + "learning_rate": 3.830600781289151e-07, + "loss": 0.5342, + "step": 13864 + }, + { + "epoch": 0.88, + "grad_norm": 0.8845729231834412, + "learning_rate": 3.826663292055316e-07, + "loss": 0.5631, + "step": 13865 + }, + { + "epoch": 0.88, + "grad_norm": 0.9163376092910767, + "learning_rate": 3.822727747020072e-07, + "loss": 0.6073, + "step": 13866 + }, + { + "epoch": 0.88, + "grad_norm": 0.9018417596817017, + "learning_rate": 3.818794146349114e-07, + "loss": 0.5893, + "step": 13867 + }, + { + "epoch": 0.88, + "grad_norm": 0.9381417036056519, + "learning_rate": 3.8148624902080764e-07, + "loss": 0.5507, + "step": 13868 + }, + { + "epoch": 0.88, + "grad_norm": 0.8565617203712463, + "learning_rate": 3.8109327787625273e-07, + "loss": 0.581, + "step": 13869 + }, + { + "epoch": 0.88, + "grad_norm": 0.8831982016563416, + "learning_rate": 3.807005012177911e-07, + "loss": 0.5437, + "step": 13870 + }, + { + "epoch": 0.88, + "grad_norm": 0.9772710204124451, + "learning_rate": 3.803079190619624e-07, + "loss": 0.6289, + "step": 13871 + }, + { + "epoch": 0.88, + "grad_norm": 0.892636239528656, + "learning_rate": 3.7991553142529616e-07, + "loss": 0.5559, + "step": 13872 + }, + { + "epoch": 0.88, + "grad_norm": 0.8699362277984619, + "learning_rate": 3.7952333832431466e-07, + "loss": 0.5419, + "step": 13873 + }, + { + "epoch": 0.88, + "grad_norm": 0.8843465447425842, + "learning_rate": 3.7913133977553306e-07, + "loss": 0.5928, + "step": 13874 + }, + { + "epoch": 0.88, + "grad_norm": 0.9290990233421326, + "learning_rate": 3.7873953579545486e-07, + "loss": 0.5642, + "step": 13875 + }, + { + "epoch": 0.88, + "grad_norm": 0.9399954080581665, + "learning_rate": 3.783479264005779e-07, + "loss": 0.5733, + "step": 13876 + }, + { + "epoch": 0.88, + "grad_norm": 0.9060240387916565, + "learning_rate": 3.779565116073941e-07, + "loss": 0.5444, + "step": 13877 + }, + { + "epoch": 0.88, + "grad_norm": 0.8936532735824585, + "learning_rate": 3.775652914323813e-07, + "loss": 0.6461, + "step": 13878 + }, + { + "epoch": 0.88, + "grad_norm": 0.9363529086112976, + "learning_rate": 3.771742658920141e-07, + "loss": 0.586, + "step": 13879 + }, + { + "epoch": 0.88, + "grad_norm": 0.8696059584617615, + "learning_rate": 3.767834350027572e-07, + "loss": 0.5465, + "step": 13880 + }, + { + "epoch": 0.88, + "grad_norm": 0.8789991736412048, + "learning_rate": 3.7639279878106616e-07, + "loss": 0.5846, + "step": 13881 + }, + { + "epoch": 0.88, + "grad_norm": 0.9171331524848938, + "learning_rate": 3.7600235724339127e-07, + "loss": 0.5901, + "step": 13882 + }, + { + "epoch": 0.88, + "grad_norm": 0.8733245134353638, + "learning_rate": 3.756121104061705e-07, + "loss": 0.54, + "step": 13883 + }, + { + "epoch": 0.88, + "grad_norm": 0.8445425033569336, + "learning_rate": 3.752220582858368e-07, + "loss": 0.5577, + "step": 13884 + }, + { + "epoch": 0.88, + "grad_norm": 0.8852535486221313, + "learning_rate": 3.748322008988137e-07, + "loss": 0.6001, + "step": 13885 + }, + { + "epoch": 0.88, + "grad_norm": 0.9022545218467712, + "learning_rate": 3.744425382615169e-07, + "loss": 0.6012, + "step": 13886 + }, + { + "epoch": 0.88, + "grad_norm": 0.8933830261230469, + "learning_rate": 3.7405307039035387e-07, + "loss": 0.6361, + "step": 13887 + }, + { + "epoch": 0.88, + "grad_norm": 0.8915839195251465, + "learning_rate": 3.7366379730172376e-07, + "loss": 0.6196, + "step": 13888 + }, + { + "epoch": 0.88, + "grad_norm": 0.8615158200263977, + "learning_rate": 3.732747190120162e-07, + "loss": 0.6029, + "step": 13889 + }, + { + "epoch": 0.88, + "grad_norm": 0.8902248740196228, + "learning_rate": 3.728858355376164e-07, + "loss": 0.6026, + "step": 13890 + }, + { + "epoch": 0.88, + "grad_norm": 0.8872123956680298, + "learning_rate": 3.724971468948968e-07, + "loss": 0.5928, + "step": 13891 + }, + { + "epoch": 0.88, + "grad_norm": 0.8912094831466675, + "learning_rate": 3.721086531002244e-07, + "loss": 0.588, + "step": 13892 + }, + { + "epoch": 0.88, + "grad_norm": 0.9006378054618835, + "learning_rate": 3.7172035416995765e-07, + "loss": 0.5815, + "step": 13893 + }, + { + "epoch": 0.88, + "grad_norm": 0.877053439617157, + "learning_rate": 3.7133225012044585e-07, + "loss": 0.5095, + "step": 13894 + }, + { + "epoch": 0.88, + "grad_norm": 0.9015281796455383, + "learning_rate": 3.709443409680308e-07, + "loss": 0.5498, + "step": 13895 + }, + { + "epoch": 0.88, + "grad_norm": 0.8962835073471069, + "learning_rate": 3.7055662672904723e-07, + "loss": 0.569, + "step": 13896 + }, + { + "epoch": 0.88, + "grad_norm": 0.8946380615234375, + "learning_rate": 3.7016910741981825e-07, + "loss": 0.5726, + "step": 13897 + }, + { + "epoch": 0.88, + "grad_norm": 0.8510831594467163, + "learning_rate": 3.6978178305666357e-07, + "loss": 0.496, + "step": 13898 + }, + { + "epoch": 0.88, + "grad_norm": 0.9237155914306641, + "learning_rate": 3.693946536558896e-07, + "loss": 0.5727, + "step": 13899 + }, + { + "epoch": 0.88, + "grad_norm": 0.8890257477760315, + "learning_rate": 3.6900771923379817e-07, + "loss": 0.5797, + "step": 13900 + }, + { + "epoch": 0.88, + "grad_norm": 0.931348979473114, + "learning_rate": 3.6862097980668255e-07, + "loss": 0.6333, + "step": 13901 + }, + { + "epoch": 0.88, + "grad_norm": 0.8788846135139465, + "learning_rate": 3.68234435390824e-07, + "loss": 0.6, + "step": 13902 + }, + { + "epoch": 0.88, + "grad_norm": 0.9328796863555908, + "learning_rate": 3.6784808600250186e-07, + "loss": 0.5973, + "step": 13903 + }, + { + "epoch": 0.88, + "grad_norm": 0.9625527262687683, + "learning_rate": 3.674619316579836e-07, + "loss": 0.5795, + "step": 13904 + }, + { + "epoch": 0.88, + "grad_norm": 0.9201703071594238, + "learning_rate": 3.670759723735273e-07, + "loss": 0.5592, + "step": 13905 + }, + { + "epoch": 0.88, + "grad_norm": 0.8659148812294006, + "learning_rate": 3.666902081653845e-07, + "loss": 0.5503, + "step": 13906 + }, + { + "epoch": 0.88, + "grad_norm": 0.9211107492446899, + "learning_rate": 3.663046390497993e-07, + "loss": 0.5797, + "step": 13907 + }, + { + "epoch": 0.88, + "grad_norm": 0.9264574646949768, + "learning_rate": 3.659192650430066e-07, + "loss": 0.5992, + "step": 13908 + }, + { + "epoch": 0.88, + "grad_norm": 0.8879082202911377, + "learning_rate": 3.655340861612333e-07, + "loss": 0.5596, + "step": 13909 + }, + { + "epoch": 0.88, + "grad_norm": 0.8419327139854431, + "learning_rate": 3.6514910242069547e-07, + "loss": 0.5711, + "step": 13910 + }, + { + "epoch": 0.88, + "grad_norm": 0.8223779797554016, + "learning_rate": 3.647643138376067e-07, + "loss": 0.5581, + "step": 13911 + }, + { + "epoch": 0.88, + "grad_norm": 0.9604656100273132, + "learning_rate": 3.6437972042816904e-07, + "loss": 0.6241, + "step": 13912 + }, + { + "epoch": 0.88, + "grad_norm": 0.8464024662971497, + "learning_rate": 3.6399532220857403e-07, + "loss": 0.5518, + "step": 13913 + }, + { + "epoch": 0.88, + "grad_norm": 0.8726653456687927, + "learning_rate": 3.6361111919500815e-07, + "loss": 0.5917, + "step": 13914 + }, + { + "epoch": 0.88, + "grad_norm": 0.888209879398346, + "learning_rate": 3.6322711140364953e-07, + "loss": 0.6297, + "step": 13915 + }, + { + "epoch": 0.88, + "grad_norm": 0.8308035731315613, + "learning_rate": 3.628432988506675e-07, + "loss": 0.5388, + "step": 13916 + }, + { + "epoch": 0.88, + "grad_norm": 0.9365667700767517, + "learning_rate": 3.6245968155222243e-07, + "loss": 0.5515, + "step": 13917 + }, + { + "epoch": 0.88, + "grad_norm": 0.8524565100669861, + "learning_rate": 3.6207625952446756e-07, + "loss": 0.5938, + "step": 13918 + }, + { + "epoch": 0.88, + "grad_norm": 0.8615753650665283, + "learning_rate": 3.616930327835466e-07, + "loss": 0.5269, + "step": 13919 + }, + { + "epoch": 0.88, + "grad_norm": 0.894736111164093, + "learning_rate": 3.613100013455972e-07, + "loss": 0.6134, + "step": 13920 + }, + { + "epoch": 0.88, + "grad_norm": 0.9682538509368896, + "learning_rate": 3.609271652267465e-07, + "loss": 0.5901, + "step": 13921 + }, + { + "epoch": 0.88, + "grad_norm": 0.9055116772651672, + "learning_rate": 3.6054452444311493e-07, + "loss": 0.6037, + "step": 13922 + }, + { + "epoch": 0.88, + "grad_norm": 0.8035820126533508, + "learning_rate": 3.601620790108135e-07, + "loss": 0.5568, + "step": 13923 + }, + { + "epoch": 0.88, + "grad_norm": 0.930554211139679, + "learning_rate": 3.597798289459464e-07, + "loss": 0.6585, + "step": 13924 + }, + { + "epoch": 0.88, + "grad_norm": 0.9319306015968323, + "learning_rate": 3.593977742646088e-07, + "loss": 0.5412, + "step": 13925 + }, + { + "epoch": 0.88, + "grad_norm": 0.8614120483398438, + "learning_rate": 3.5901591498288755e-07, + "loss": 0.5399, + "step": 13926 + }, + { + "epoch": 0.88, + "grad_norm": 0.897907555103302, + "learning_rate": 3.5863425111686e-07, + "loss": 0.5419, + "step": 13927 + }, + { + "epoch": 0.88, + "grad_norm": 0.9247115254402161, + "learning_rate": 3.5825278268259987e-07, + "loss": 0.6008, + "step": 13928 + }, + { + "epoch": 0.88, + "grad_norm": 0.886035680770874, + "learning_rate": 3.5787150969616657e-07, + "loss": 0.559, + "step": 13929 + }, + { + "epoch": 0.88, + "grad_norm": 0.9609770774841309, + "learning_rate": 3.57490432173615e-07, + "loss": 0.5855, + "step": 13930 + }, + { + "epoch": 0.88, + "grad_norm": 0.8835691809654236, + "learning_rate": 3.5710955013099233e-07, + "loss": 0.5454, + "step": 13931 + }, + { + "epoch": 0.88, + "grad_norm": 0.9028952717781067, + "learning_rate": 3.5672886358433356e-07, + "loss": 0.57, + "step": 13932 + }, + { + "epoch": 0.88, + "grad_norm": 0.8876438140869141, + "learning_rate": 3.5634837254967023e-07, + "loss": 0.5503, + "step": 13933 + }, + { + "epoch": 0.88, + "grad_norm": 0.8692540526390076, + "learning_rate": 3.559680770430235e-07, + "loss": 0.5504, + "step": 13934 + }, + { + "epoch": 0.88, + "grad_norm": 0.8966943025588989, + "learning_rate": 3.555879770804049e-07, + "loss": 0.6109, + "step": 13935 + }, + { + "epoch": 0.88, + "grad_norm": 0.8818347454071045, + "learning_rate": 3.5520807267782007e-07, + "loss": 0.5318, + "step": 13936 + }, + { + "epoch": 0.88, + "grad_norm": 0.9468558430671692, + "learning_rate": 3.548283638512651e-07, + "loss": 0.6075, + "step": 13937 + }, + { + "epoch": 0.88, + "grad_norm": 0.8646183013916016, + "learning_rate": 3.544488506167282e-07, + "loss": 0.5779, + "step": 13938 + }, + { + "epoch": 0.88, + "grad_norm": 0.8838092088699341, + "learning_rate": 3.5406953299019056e-07, + "loss": 0.5319, + "step": 13939 + }, + { + "epoch": 0.88, + "grad_norm": 0.9578419327735901, + "learning_rate": 3.5369041098762103e-07, + "loss": 0.5811, + "step": 13940 + }, + { + "epoch": 0.88, + "grad_norm": 0.9535823464393616, + "learning_rate": 3.5331148462498635e-07, + "loss": 0.5944, + "step": 13941 + }, + { + "epoch": 0.88, + "grad_norm": 0.8988412618637085, + "learning_rate": 3.529327539182403e-07, + "loss": 0.5615, + "step": 13942 + }, + { + "epoch": 0.88, + "grad_norm": 0.8499272465705872, + "learning_rate": 3.5255421888332976e-07, + "loss": 0.5468, + "step": 13943 + }, + { + "epoch": 0.88, + "grad_norm": 0.8369562029838562, + "learning_rate": 3.5217587953619404e-07, + "loss": 0.5457, + "step": 13944 + }, + { + "epoch": 0.88, + "grad_norm": 0.8756216168403625, + "learning_rate": 3.517977358927632e-07, + "loss": 0.5575, + "step": 13945 + }, + { + "epoch": 0.88, + "grad_norm": 0.8889182209968567, + "learning_rate": 3.514197879689596e-07, + "loss": 0.5777, + "step": 13946 + }, + { + "epoch": 0.88, + "grad_norm": 0.9014686942100525, + "learning_rate": 3.5104203578069817e-07, + "loss": 0.5799, + "step": 13947 + }, + { + "epoch": 0.88, + "grad_norm": 0.8833813071250916, + "learning_rate": 3.506644793438835e-07, + "loss": 0.5552, + "step": 13948 + }, + { + "epoch": 0.88, + "grad_norm": 0.9554223418235779, + "learning_rate": 3.502871186744128e-07, + "loss": 0.5921, + "step": 13949 + }, + { + "epoch": 0.88, + "grad_norm": 0.879019021987915, + "learning_rate": 3.499099537881784e-07, + "loss": 0.6137, + "step": 13950 + }, + { + "epoch": 0.88, + "grad_norm": 0.9680423140525818, + "learning_rate": 3.495329847010581e-07, + "loss": 0.5773, + "step": 13951 + }, + { + "epoch": 0.88, + "grad_norm": 0.8740622997283936, + "learning_rate": 3.4915621142892595e-07, + "loss": 0.5879, + "step": 13952 + }, + { + "epoch": 0.88, + "grad_norm": 0.9262283444404602, + "learning_rate": 3.48779633987647e-07, + "loss": 0.5917, + "step": 13953 + }, + { + "epoch": 0.88, + "grad_norm": 0.8888399004936218, + "learning_rate": 3.4840325239307693e-07, + "loss": 0.6129, + "step": 13954 + }, + { + "epoch": 0.88, + "grad_norm": 0.855827808380127, + "learning_rate": 3.4802706666106525e-07, + "loss": 0.5782, + "step": 13955 + }, + { + "epoch": 0.88, + "grad_norm": 0.9697046279907227, + "learning_rate": 3.476510768074498e-07, + "loss": 0.5647, + "step": 13956 + }, + { + "epoch": 0.88, + "grad_norm": 0.8779304623603821, + "learning_rate": 3.4727528284806247e-07, + "loss": 0.5947, + "step": 13957 + }, + { + "epoch": 0.88, + "grad_norm": 0.8670563697814941, + "learning_rate": 3.468996847987288e-07, + "loss": 0.5335, + "step": 13958 + }, + { + "epoch": 0.88, + "grad_norm": 0.9170581102371216, + "learning_rate": 3.4652428267526184e-07, + "loss": 0.6615, + "step": 13959 + }, + { + "epoch": 0.88, + "grad_norm": 0.8803734183311462, + "learning_rate": 3.4614907649346884e-07, + "loss": 0.5578, + "step": 13960 + }, + { + "epoch": 0.88, + "grad_norm": 1.0183887481689453, + "learning_rate": 3.4577406626914947e-07, + "loss": 0.6096, + "step": 13961 + }, + { + "epoch": 0.88, + "grad_norm": 0.911680281162262, + "learning_rate": 3.453992520180921e-07, + "loss": 0.5951, + "step": 13962 + }, + { + "epoch": 0.88, + "grad_norm": 0.8374574780464172, + "learning_rate": 3.4502463375608143e-07, + "loss": 0.5592, + "step": 13963 + }, + { + "epoch": 0.88, + "grad_norm": 0.862108588218689, + "learning_rate": 3.446502114988892e-07, + "loss": 0.5704, + "step": 13964 + }, + { + "epoch": 0.88, + "grad_norm": 0.9024475812911987, + "learning_rate": 3.442759852622812e-07, + "loss": 0.6107, + "step": 13965 + }, + { + "epoch": 0.88, + "grad_norm": 0.8448460102081299, + "learning_rate": 3.4390195506201594e-07, + "loss": 0.5212, + "step": 13966 + }, + { + "epoch": 0.88, + "grad_norm": 0.8650651574134827, + "learning_rate": 3.435281209138419e-07, + "loss": 0.5699, + "step": 13967 + }, + { + "epoch": 0.88, + "grad_norm": 0.8967364430427551, + "learning_rate": 3.4315448283349985e-07, + "loss": 0.58, + "step": 13968 + }, + { + "epoch": 0.89, + "grad_norm": 0.8695257902145386, + "learning_rate": 3.4278104083672383e-07, + "loss": 0.5894, + "step": 13969 + }, + { + "epoch": 0.89, + "grad_norm": 0.85512775182724, + "learning_rate": 3.424077949392346e-07, + "loss": 0.5647, + "step": 13970 + }, + { + "epoch": 0.89, + "grad_norm": 0.908794641494751, + "learning_rate": 3.4203474515675293e-07, + "loss": 0.5832, + "step": 13971 + }, + { + "epoch": 0.89, + "grad_norm": 0.859935462474823, + "learning_rate": 3.4166189150498297e-07, + "loss": 0.5769, + "step": 13972 + }, + { + "epoch": 0.89, + "grad_norm": 0.892041802406311, + "learning_rate": 3.4128923399962543e-07, + "loss": 0.5362, + "step": 13973 + }, + { + "epoch": 0.89, + "grad_norm": 0.9205070734024048, + "learning_rate": 3.4091677265637224e-07, + "loss": 0.6354, + "step": 13974 + }, + { + "epoch": 0.89, + "grad_norm": 0.8668642640113831, + "learning_rate": 3.405445074909053e-07, + "loss": 0.5666, + "step": 13975 + }, + { + "epoch": 0.89, + "grad_norm": 0.8559014797210693, + "learning_rate": 3.401724385189009e-07, + "loss": 0.528, + "step": 13976 + }, + { + "epoch": 0.89, + "grad_norm": 0.8495075702667236, + "learning_rate": 3.398005657560249e-07, + "loss": 0.568, + "step": 13977 + }, + { + "epoch": 0.89, + "grad_norm": 0.8912281394004822, + "learning_rate": 3.394288892179348e-07, + "loss": 0.5637, + "step": 13978 + }, + { + "epoch": 0.89, + "grad_norm": 0.8422101140022278, + "learning_rate": 3.390574089202814e-07, + "loss": 0.5824, + "step": 13979 + }, + { + "epoch": 0.89, + "grad_norm": 0.8872683048248291, + "learning_rate": 3.3868612487870657e-07, + "loss": 0.6023, + "step": 13980 + }, + { + "epoch": 0.89, + "grad_norm": 0.871668815612793, + "learning_rate": 3.3831503710884286e-07, + "loss": 0.5728, + "step": 13981 + }, + { + "epoch": 0.89, + "grad_norm": 0.9398965835571289, + "learning_rate": 3.379441456263166e-07, + "loss": 0.5988, + "step": 13982 + }, + { + "epoch": 0.89, + "grad_norm": 0.804894208908081, + "learning_rate": 3.375734504467437e-07, + "loss": 0.545, + "step": 13983 + }, + { + "epoch": 0.89, + "grad_norm": 0.8272179961204529, + "learning_rate": 3.372029515857339e-07, + "loss": 0.5751, + "step": 13984 + }, + { + "epoch": 0.89, + "grad_norm": 0.8695221543312073, + "learning_rate": 3.368326490588875e-07, + "loss": 0.5484, + "step": 13985 + }, + { + "epoch": 0.89, + "grad_norm": 0.865994930267334, + "learning_rate": 3.364625428817958e-07, + "loss": 0.5282, + "step": 13986 + }, + { + "epoch": 0.89, + "grad_norm": 0.9086118340492249, + "learning_rate": 3.360926330700431e-07, + "loss": 0.573, + "step": 13987 + }, + { + "epoch": 0.89, + "grad_norm": 0.8299586772918701, + "learning_rate": 3.3572291963920536e-07, + "loss": 0.5297, + "step": 13988 + }, + { + "epoch": 0.89, + "grad_norm": 0.8901399970054626, + "learning_rate": 3.353534026048494e-07, + "loss": 0.6119, + "step": 13989 + }, + { + "epoch": 0.89, + "grad_norm": 0.8746377229690552, + "learning_rate": 3.3498408198253453e-07, + "loss": 0.5493, + "step": 13990 + }, + { + "epoch": 0.89, + "grad_norm": 0.8297358751296997, + "learning_rate": 3.3461495778781104e-07, + "loss": 0.5338, + "step": 13991 + }, + { + "epoch": 0.89, + "grad_norm": 0.826378345489502, + "learning_rate": 3.342460300362227e-07, + "loss": 0.5438, + "step": 13992 + }, + { + "epoch": 0.89, + "grad_norm": 0.8547459244728088, + "learning_rate": 3.3387729874330367e-07, + "loss": 0.5513, + "step": 13993 + }, + { + "epoch": 0.89, + "grad_norm": 0.8909960985183716, + "learning_rate": 3.335087639245782e-07, + "loss": 0.6038, + "step": 13994 + }, + { + "epoch": 0.89, + "grad_norm": 0.992056131362915, + "learning_rate": 3.331404255955656e-07, + "loss": 0.5645, + "step": 13995 + }, + { + "epoch": 0.89, + "grad_norm": 0.8902263641357422, + "learning_rate": 3.327722837717745e-07, + "loss": 0.5573, + "step": 13996 + }, + { + "epoch": 0.89, + "grad_norm": 0.896858274936676, + "learning_rate": 3.32404338468707e-07, + "loss": 0.5627, + "step": 13997 + }, + { + "epoch": 0.89, + "grad_norm": 0.9204726219177246, + "learning_rate": 3.320365897018546e-07, + "loss": 0.5782, + "step": 13998 + }, + { + "epoch": 0.89, + "grad_norm": 0.9295701384544373, + "learning_rate": 3.316690374867043e-07, + "loss": 0.5769, + "step": 13999 + }, + { + "epoch": 0.89, + "grad_norm": 0.8957133293151855, + "learning_rate": 3.313016818387288e-07, + "loss": 0.5495, + "step": 14000 + }, + { + "epoch": 0.89, + "grad_norm": 0.850740909576416, + "learning_rate": 3.309345227734001e-07, + "loss": 0.5891, + "step": 14001 + }, + { + "epoch": 0.89, + "grad_norm": 0.8553286790847778, + "learning_rate": 3.305675603061753e-07, + "loss": 0.5852, + "step": 14002 + }, + { + "epoch": 0.89, + "grad_norm": 0.8360522389411926, + "learning_rate": 3.3020079445250655e-07, + "loss": 0.6143, + "step": 14003 + }, + { + "epoch": 0.89, + "grad_norm": 0.8745806813240051, + "learning_rate": 3.2983422522783747e-07, + "loss": 0.5673, + "step": 14004 + }, + { + "epoch": 0.89, + "grad_norm": 0.8642773032188416, + "learning_rate": 3.2946785264760305e-07, + "loss": 0.5315, + "step": 14005 + }, + { + "epoch": 0.89, + "grad_norm": 0.9193217158317566, + "learning_rate": 3.291016767272298e-07, + "loss": 0.6075, + "step": 14006 + }, + { + "epoch": 0.89, + "grad_norm": 0.8983094692230225, + "learning_rate": 3.287356974821365e-07, + "loss": 0.5878, + "step": 14007 + }, + { + "epoch": 0.89, + "grad_norm": 0.8727191090583801, + "learning_rate": 3.28369914927732e-07, + "loss": 0.5405, + "step": 14008 + }, + { + "epoch": 0.89, + "grad_norm": 0.8366736769676208, + "learning_rate": 3.2800432907941935e-07, + "loss": 0.5571, + "step": 14009 + }, + { + "epoch": 0.89, + "grad_norm": 0.9027994275093079, + "learning_rate": 3.276389399525914e-07, + "loss": 0.5512, + "step": 14010 + }, + { + "epoch": 0.89, + "grad_norm": 0.9058107137680054, + "learning_rate": 3.272737475626342e-07, + "loss": 0.5707, + "step": 14011 + }, + { + "epoch": 0.89, + "grad_norm": 0.9187793731689453, + "learning_rate": 3.269087519249242e-07, + "loss": 0.5556, + "step": 14012 + }, + { + "epoch": 0.89, + "grad_norm": 0.8657212257385254, + "learning_rate": 3.2654395305482924e-07, + "loss": 0.5968, + "step": 14013 + }, + { + "epoch": 0.89, + "grad_norm": 0.8940473198890686, + "learning_rate": 3.2617935096771137e-07, + "loss": 0.5712, + "step": 14014 + }, + { + "epoch": 0.89, + "grad_norm": 0.876758873462677, + "learning_rate": 3.258149456789228e-07, + "loss": 0.5664, + "step": 14015 + }, + { + "epoch": 0.89, + "grad_norm": 0.9137895703315735, + "learning_rate": 3.2545073720380573e-07, + "loss": 0.5223, + "step": 14016 + }, + { + "epoch": 0.89, + "grad_norm": 0.9135635495185852, + "learning_rate": 3.2508672555769617e-07, + "loss": 0.5516, + "step": 14017 + }, + { + "epoch": 0.89, + "grad_norm": 1.005510926246643, + "learning_rate": 3.2472291075592246e-07, + "loss": 0.6037, + "step": 14018 + }, + { + "epoch": 0.89, + "grad_norm": 0.8511359691619873, + "learning_rate": 3.243592928138023e-07, + "loss": 0.542, + "step": 14019 + }, + { + "epoch": 0.89, + "grad_norm": 0.8994077444076538, + "learning_rate": 3.2399587174664794e-07, + "loss": 0.5644, + "step": 14020 + }, + { + "epoch": 0.89, + "grad_norm": 0.8860614895820618, + "learning_rate": 3.236326475697593e-07, + "loss": 0.5672, + "step": 14021 + }, + { + "epoch": 0.89, + "grad_norm": 0.9307529926300049, + "learning_rate": 3.232696202984326e-07, + "loss": 0.5568, + "step": 14022 + }, + { + "epoch": 0.89, + "grad_norm": 0.7981402277946472, + "learning_rate": 3.2290678994795377e-07, + "loss": 0.537, + "step": 14023 + }, + { + "epoch": 0.89, + "grad_norm": 0.8489423990249634, + "learning_rate": 3.2254415653359906e-07, + "loss": 0.5791, + "step": 14024 + }, + { + "epoch": 0.89, + "grad_norm": 0.9137423634529114, + "learning_rate": 3.2218172007063787e-07, + "loss": 0.6229, + "step": 14025 + }, + { + "epoch": 0.89, + "grad_norm": 0.8917653560638428, + "learning_rate": 3.218194805743319e-07, + "loss": 0.5276, + "step": 14026 + }, + { + "epoch": 0.89, + "grad_norm": 0.8989799618721008, + "learning_rate": 3.2145743805993334e-07, + "loss": 0.5448, + "step": 14027 + }, + { + "epoch": 0.89, + "grad_norm": 0.8666809797286987, + "learning_rate": 3.210955925426873e-07, + "loss": 0.5514, + "step": 14028 + }, + { + "epoch": 0.89, + "grad_norm": 0.9025545120239258, + "learning_rate": 3.2073394403782823e-07, + "loss": 0.6176, + "step": 14029 + }, + { + "epoch": 0.89, + "grad_norm": 0.8828505873680115, + "learning_rate": 3.2037249256058445e-07, + "loss": 0.6386, + "step": 14030 + }, + { + "epoch": 0.89, + "grad_norm": 0.9091066122055054, + "learning_rate": 3.2001123812617663e-07, + "loss": 0.5939, + "step": 14031 + }, + { + "epoch": 0.89, + "grad_norm": 0.9628225564956665, + "learning_rate": 3.196501807498148e-07, + "loss": 0.5717, + "step": 14032 + }, + { + "epoch": 0.89, + "grad_norm": 0.8795494437217712, + "learning_rate": 3.192893204467018e-07, + "loss": 0.5421, + "step": 14033 + }, + { + "epoch": 0.89, + "grad_norm": 0.9152184128761292, + "learning_rate": 3.189286572320327e-07, + "loss": 0.596, + "step": 14034 + }, + { + "epoch": 0.89, + "grad_norm": 0.9376192092895508, + "learning_rate": 3.185681911209937e-07, + "loss": 0.5486, + "step": 14035 + }, + { + "epoch": 0.89, + "grad_norm": 0.9213606119155884, + "learning_rate": 3.1820792212876316e-07, + "loss": 0.5384, + "step": 14036 + }, + { + "epoch": 0.89, + "grad_norm": 0.9375748038291931, + "learning_rate": 3.178478502705101e-07, + "loss": 0.5772, + "step": 14037 + }, + { + "epoch": 0.89, + "grad_norm": 0.900903582572937, + "learning_rate": 3.174879755613952e-07, + "loss": 0.5485, + "step": 14038 + }, + { + "epoch": 0.89, + "grad_norm": 0.9398730993270874, + "learning_rate": 3.1712829801657294e-07, + "loss": 0.5505, + "step": 14039 + }, + { + "epoch": 0.89, + "grad_norm": 0.9179508686065674, + "learning_rate": 3.167688176511874e-07, + "loss": 0.6111, + "step": 14040 + }, + { + "epoch": 0.89, + "grad_norm": 0.916954755783081, + "learning_rate": 3.1640953448037527e-07, + "loss": 0.605, + "step": 14041 + }, + { + "epoch": 0.89, + "grad_norm": 0.8982189893722534, + "learning_rate": 3.1605044851926504e-07, + "loss": 0.5572, + "step": 14042 + }, + { + "epoch": 0.89, + "grad_norm": 0.8893555998802185, + "learning_rate": 3.1569155978297463e-07, + "loss": 0.5713, + "step": 14043 + }, + { + "epoch": 0.89, + "grad_norm": 0.9181665182113647, + "learning_rate": 3.1533286828661915e-07, + "loss": 0.5584, + "step": 14044 + }, + { + "epoch": 0.89, + "grad_norm": 0.8733421564102173, + "learning_rate": 3.1497437404529875e-07, + "loss": 0.6073, + "step": 14045 + }, + { + "epoch": 0.89, + "grad_norm": 0.8900958895683289, + "learning_rate": 3.1461607707410914e-07, + "loss": 0.5909, + "step": 14046 + }, + { + "epoch": 0.89, + "grad_norm": 0.9942273497581482, + "learning_rate": 3.142579773881377e-07, + "loss": 0.5821, + "step": 14047 + }, + { + "epoch": 0.89, + "grad_norm": 0.87491375207901, + "learning_rate": 3.1390007500246236e-07, + "loss": 0.576, + "step": 14048 + }, + { + "epoch": 0.89, + "grad_norm": 0.9551854729652405, + "learning_rate": 3.135423699321527e-07, + "loss": 0.632, + "step": 14049 + }, + { + "epoch": 0.89, + "grad_norm": 0.8860867023468018, + "learning_rate": 3.131848621922717e-07, + "loss": 0.5733, + "step": 14050 + }, + { + "epoch": 0.89, + "grad_norm": 0.8768588900566101, + "learning_rate": 3.128275517978707e-07, + "loss": 0.5568, + "step": 14051 + }, + { + "epoch": 0.89, + "grad_norm": 0.9353142976760864, + "learning_rate": 3.124704387639976e-07, + "loss": 0.6497, + "step": 14052 + }, + { + "epoch": 0.89, + "grad_norm": 0.9081913828849792, + "learning_rate": 3.1211352310568655e-07, + "loss": 0.6053, + "step": 14053 + }, + { + "epoch": 0.89, + "grad_norm": 0.9168758988380432, + "learning_rate": 3.1175680483796713e-07, + "loss": 0.5753, + "step": 14054 + }, + { + "epoch": 0.89, + "grad_norm": 0.8890372514724731, + "learning_rate": 3.1140028397585953e-07, + "loss": 0.5491, + "step": 14055 + }, + { + "epoch": 0.89, + "grad_norm": 0.8347086906433105, + "learning_rate": 3.110439605343751e-07, + "loss": 0.5384, + "step": 14056 + }, + { + "epoch": 0.89, + "grad_norm": 0.9348717331886292, + "learning_rate": 3.1068783452851856e-07, + "loss": 0.6164, + "step": 14057 + }, + { + "epoch": 0.89, + "grad_norm": 0.8585134744644165, + "learning_rate": 3.1033190597328456e-07, + "loss": 0.6011, + "step": 14058 + }, + { + "epoch": 0.89, + "grad_norm": 0.843673586845398, + "learning_rate": 3.099761748836594e-07, + "loss": 0.5566, + "step": 14059 + }, + { + "epoch": 0.89, + "grad_norm": 0.8475186824798584, + "learning_rate": 3.0962064127462167e-07, + "loss": 0.5611, + "step": 14060 + }, + { + "epoch": 0.89, + "grad_norm": 0.8992692232131958, + "learning_rate": 3.092653051611427e-07, + "loss": 0.5382, + "step": 14061 + }, + { + "epoch": 0.89, + "grad_norm": 0.8602596521377563, + "learning_rate": 3.089101665581834e-07, + "loss": 0.6269, + "step": 14062 + }, + { + "epoch": 0.89, + "grad_norm": 0.8721755146980286, + "learning_rate": 3.085552254806978e-07, + "loss": 0.5552, + "step": 14063 + }, + { + "epoch": 0.89, + "grad_norm": 0.9432767629623413, + "learning_rate": 3.0820048194363183e-07, + "loss": 0.6001, + "step": 14064 + }, + { + "epoch": 0.89, + "grad_norm": 0.883188009262085, + "learning_rate": 3.0784593596192123e-07, + "loss": 0.5695, + "step": 14065 + }, + { + "epoch": 0.89, + "grad_norm": 0.8545171022415161, + "learning_rate": 3.074915875504969e-07, + "loss": 0.5165, + "step": 14066 + }, + { + "epoch": 0.89, + "grad_norm": 0.782908022403717, + "learning_rate": 3.0713743672427686e-07, + "loss": 0.4764, + "step": 14067 + }, + { + "epoch": 0.89, + "grad_norm": 0.8021277785301208, + "learning_rate": 3.067834834981731e-07, + "loss": 0.521, + "step": 14068 + }, + { + "epoch": 0.89, + "grad_norm": 0.8493959903717041, + "learning_rate": 3.0642972788709203e-07, + "loss": 0.5231, + "step": 14069 + }, + { + "epoch": 0.89, + "grad_norm": 0.8718252778053284, + "learning_rate": 3.060761699059267e-07, + "loss": 0.562, + "step": 14070 + }, + { + "epoch": 0.89, + "grad_norm": 0.9048642516136169, + "learning_rate": 3.057228095695647e-07, + "loss": 0.6242, + "step": 14071 + }, + { + "epoch": 0.89, + "grad_norm": 0.8449458479881287, + "learning_rate": 3.053696468928857e-07, + "loss": 0.5006, + "step": 14072 + }, + { + "epoch": 0.89, + "grad_norm": 0.901394248008728, + "learning_rate": 3.0501668189075794e-07, + "loss": 0.5834, + "step": 14073 + }, + { + "epoch": 0.89, + "grad_norm": 0.9242204427719116, + "learning_rate": 3.0466391457804666e-07, + "loss": 0.5403, + "step": 14074 + }, + { + "epoch": 0.89, + "grad_norm": 0.8967319130897522, + "learning_rate": 3.0431134496960333e-07, + "loss": 0.6035, + "step": 14075 + }, + { + "epoch": 0.89, + "grad_norm": 0.8391687870025635, + "learning_rate": 3.0395897308027443e-07, + "loss": 0.5579, + "step": 14076 + }, + { + "epoch": 0.89, + "grad_norm": 0.9151217937469482, + "learning_rate": 3.0360679892489643e-07, + "loss": 0.5881, + "step": 14077 + }, + { + "epoch": 0.89, + "grad_norm": 0.8429851531982422, + "learning_rate": 3.032548225182985e-07, + "loss": 0.5864, + "step": 14078 + }, + { + "epoch": 0.89, + "grad_norm": 0.8832830786705017, + "learning_rate": 3.029030438753017e-07, + "loss": 0.5989, + "step": 14079 + }, + { + "epoch": 0.89, + "grad_norm": 0.859230101108551, + "learning_rate": 3.025514630107179e-07, + "loss": 0.5483, + "step": 14080 + }, + { + "epoch": 0.89, + "grad_norm": 0.8875642418861389, + "learning_rate": 3.0220007993934987e-07, + "loss": 0.6254, + "step": 14081 + }, + { + "epoch": 0.89, + "grad_norm": 0.8424782156944275, + "learning_rate": 3.018488946759951e-07, + "loss": 0.5502, + "step": 14082 + }, + { + "epoch": 0.89, + "grad_norm": 0.8858059644699097, + "learning_rate": 3.01497907235439e-07, + "loss": 0.5659, + "step": 14083 + }, + { + "epoch": 0.89, + "grad_norm": 0.8075915575027466, + "learning_rate": 3.0114711763246096e-07, + "loss": 0.53, + "step": 14084 + }, + { + "epoch": 0.89, + "grad_norm": 0.8604494333267212, + "learning_rate": 3.007965258818324e-07, + "loss": 0.6075, + "step": 14085 + }, + { + "epoch": 0.89, + "grad_norm": 0.9078390002250671, + "learning_rate": 3.0044613199831373e-07, + "loss": 0.5984, + "step": 14086 + }, + { + "epoch": 0.89, + "grad_norm": 0.9238450527191162, + "learning_rate": 3.0009593599666044e-07, + "loss": 0.6284, + "step": 14087 + }, + { + "epoch": 0.89, + "grad_norm": 0.9430950284004211, + "learning_rate": 2.9974593789161843e-07, + "loss": 0.5938, + "step": 14088 + }, + { + "epoch": 0.89, + "grad_norm": 0.9374951720237732, + "learning_rate": 2.9939613769792265e-07, + "loss": 0.5881, + "step": 14089 + }, + { + "epoch": 0.89, + "grad_norm": 0.8708373308181763, + "learning_rate": 2.9904653543030406e-07, + "loss": 0.6102, + "step": 14090 + }, + { + "epoch": 0.89, + "grad_norm": 0.926876425743103, + "learning_rate": 2.98697131103482e-07, + "loss": 0.5941, + "step": 14091 + }, + { + "epoch": 0.89, + "grad_norm": 0.8974210619926453, + "learning_rate": 2.983479247321691e-07, + "loss": 0.6017, + "step": 14092 + }, + { + "epoch": 0.89, + "grad_norm": 0.8967876434326172, + "learning_rate": 2.979989163310704e-07, + "loss": 0.5812, + "step": 14093 + }, + { + "epoch": 0.89, + "grad_norm": 0.914035975933075, + "learning_rate": 2.976501059148779e-07, + "loss": 0.5896, + "step": 14094 + }, + { + "epoch": 0.89, + "grad_norm": 0.8700651526451111, + "learning_rate": 2.9730149349828265e-07, + "loss": 0.4908, + "step": 14095 + }, + { + "epoch": 0.89, + "grad_norm": 0.9540930390357971, + "learning_rate": 2.969530790959624e-07, + "loss": 0.5677, + "step": 14096 + }, + { + "epoch": 0.89, + "grad_norm": 0.9152606725692749, + "learning_rate": 2.9660486272258703e-07, + "loss": 0.5638, + "step": 14097 + }, + { + "epoch": 0.89, + "grad_norm": 0.9777679443359375, + "learning_rate": 2.9625684439281875e-07, + "loss": 0.5569, + "step": 14098 + }, + { + "epoch": 0.89, + "grad_norm": 0.9192463159561157, + "learning_rate": 2.959090241213114e-07, + "loss": 0.608, + "step": 14099 + }, + { + "epoch": 0.89, + "grad_norm": 0.9115022420883179, + "learning_rate": 2.9556140192271045e-07, + "loss": 0.6048, + "step": 14100 + }, + { + "epoch": 0.89, + "grad_norm": 0.8501712679862976, + "learning_rate": 2.9521397781165475e-07, + "loss": 0.5831, + "step": 14101 + }, + { + "epoch": 0.89, + "grad_norm": 0.8686020374298096, + "learning_rate": 2.9486675180277035e-07, + "loss": 0.5704, + "step": 14102 + }, + { + "epoch": 0.89, + "grad_norm": 0.8605404496192932, + "learning_rate": 2.9451972391067897e-07, + "loss": 0.5463, + "step": 14103 + }, + { + "epoch": 0.89, + "grad_norm": 0.873845636844635, + "learning_rate": 2.941728941499938e-07, + "loss": 0.531, + "step": 14104 + }, + { + "epoch": 0.89, + "grad_norm": 0.934984564781189, + "learning_rate": 2.938262625353172e-07, + "loss": 0.5583, + "step": 14105 + }, + { + "epoch": 0.89, + "grad_norm": 0.8914570808410645, + "learning_rate": 2.934798290812446e-07, + "loss": 0.6008, + "step": 14106 + }, + { + "epoch": 0.89, + "grad_norm": 0.8697715401649475, + "learning_rate": 2.931335938023644e-07, + "loss": 0.5582, + "step": 14107 + }, + { + "epoch": 0.89, + "grad_norm": 0.9065754413604736, + "learning_rate": 2.9278755671325377e-07, + "loss": 0.5708, + "step": 14108 + }, + { + "epoch": 0.89, + "grad_norm": 0.8564165830612183, + "learning_rate": 2.924417178284855e-07, + "loss": 0.5854, + "step": 14109 + }, + { + "epoch": 0.89, + "grad_norm": 0.8233086466789246, + "learning_rate": 2.9209607716261856e-07, + "loss": 0.5431, + "step": 14110 + }, + { + "epoch": 0.89, + "grad_norm": 0.8898478746414185, + "learning_rate": 2.917506347302079e-07, + "loss": 0.5987, + "step": 14111 + }, + { + "epoch": 0.89, + "grad_norm": 0.9087411761283875, + "learning_rate": 2.9140539054580087e-07, + "loss": 0.586, + "step": 14112 + }, + { + "epoch": 0.89, + "grad_norm": 0.8777049779891968, + "learning_rate": 2.9106034462393187e-07, + "loss": 0.534, + "step": 14113 + }, + { + "epoch": 0.89, + "grad_norm": 0.8712142109870911, + "learning_rate": 2.9071549697913035e-07, + "loss": 0.564, + "step": 14114 + }, + { + "epoch": 0.89, + "grad_norm": 0.8860015869140625, + "learning_rate": 2.9037084762591704e-07, + "loss": 0.492, + "step": 14115 + }, + { + "epoch": 0.89, + "grad_norm": 0.9180863499641418, + "learning_rate": 2.900263965788036e-07, + "loss": 0.6046, + "step": 14116 + }, + { + "epoch": 0.89, + "grad_norm": 0.8687685132026672, + "learning_rate": 2.8968214385229453e-07, + "loss": 0.5626, + "step": 14117 + }, + { + "epoch": 0.89, + "grad_norm": 0.9041091203689575, + "learning_rate": 2.8933808946088383e-07, + "loss": 0.5691, + "step": 14118 + }, + { + "epoch": 0.89, + "grad_norm": 0.9091986417770386, + "learning_rate": 2.889942334190593e-07, + "loss": 0.5794, + "step": 14119 + }, + { + "epoch": 0.89, + "grad_norm": 0.8923031687736511, + "learning_rate": 2.8865057574129883e-07, + "loss": 0.5731, + "step": 14120 + }, + { + "epoch": 0.89, + "grad_norm": 0.8908477425575256, + "learning_rate": 2.8830711644207257e-07, + "loss": 0.5569, + "step": 14121 + }, + { + "epoch": 0.89, + "grad_norm": 1.0278847217559814, + "learning_rate": 2.8796385553584326e-07, + "loss": 0.59, + "step": 14122 + }, + { + "epoch": 0.89, + "grad_norm": 0.8459283709526062, + "learning_rate": 2.8762079303706505e-07, + "loss": 0.5899, + "step": 14123 + }, + { + "epoch": 0.89, + "grad_norm": 0.8664804100990295, + "learning_rate": 2.8727792896018015e-07, + "loss": 0.56, + "step": 14124 + }, + { + "epoch": 0.89, + "grad_norm": 0.8533109426498413, + "learning_rate": 2.8693526331962875e-07, + "loss": 0.5472, + "step": 14125 + }, + { + "epoch": 0.89, + "grad_norm": 0.8533556461334229, + "learning_rate": 2.865927961298376e-07, + "loss": 0.5552, + "step": 14126 + }, + { + "epoch": 0.9, + "grad_norm": 0.8829198479652405, + "learning_rate": 2.8625052740522683e-07, + "loss": 0.5775, + "step": 14127 + }, + { + "epoch": 0.9, + "grad_norm": 0.92991703748703, + "learning_rate": 2.859084571602083e-07, + "loss": 0.5723, + "step": 14128 + }, + { + "epoch": 0.9, + "grad_norm": 0.9061645269393921, + "learning_rate": 2.8556658540918603e-07, + "loss": 0.5473, + "step": 14129 + }, + { + "epoch": 0.9, + "grad_norm": 0.8385295867919922, + "learning_rate": 2.8522491216655403e-07, + "loss": 0.5513, + "step": 14130 + }, + { + "epoch": 0.9, + "grad_norm": 0.8606228828430176, + "learning_rate": 2.848834374467002e-07, + "loss": 0.5805, + "step": 14131 + }, + { + "epoch": 0.9, + "grad_norm": 0.9587467908859253, + "learning_rate": 2.8454216126400146e-07, + "loss": 0.6116, + "step": 14132 + }, + { + "epoch": 0.9, + "grad_norm": 0.9845794439315796, + "learning_rate": 2.842010836328274e-07, + "loss": 0.6159, + "step": 14133 + }, + { + "epoch": 0.9, + "grad_norm": 0.8591241240501404, + "learning_rate": 2.838602045675426e-07, + "loss": 0.5634, + "step": 14134 + }, + { + "epoch": 0.9, + "grad_norm": 0.916466236114502, + "learning_rate": 2.8351952408249726e-07, + "loss": 0.5489, + "step": 14135 + }, + { + "epoch": 0.9, + "grad_norm": 0.851662278175354, + "learning_rate": 2.831790421920377e-07, + "loss": 0.5919, + "step": 14136 + }, + { + "epoch": 0.9, + "grad_norm": 0.8946172595024109, + "learning_rate": 2.828387589104997e-07, + "loss": 0.6006, + "step": 14137 + }, + { + "epoch": 0.9, + "grad_norm": 0.8727723360061646, + "learning_rate": 2.824986742522118e-07, + "loss": 0.5747, + "step": 14138 + }, + { + "epoch": 0.9, + "grad_norm": 0.9003124237060547, + "learning_rate": 2.8215878823149466e-07, + "loss": 0.6208, + "step": 14139 + }, + { + "epoch": 0.9, + "grad_norm": 0.9446995258331299, + "learning_rate": 2.818191008626581e-07, + "loss": 0.5331, + "step": 14140 + }, + { + "epoch": 0.9, + "grad_norm": 0.8727278709411621, + "learning_rate": 2.8147961216000497e-07, + "loss": 0.5418, + "step": 14141 + }, + { + "epoch": 0.9, + "grad_norm": 0.8375770449638367, + "learning_rate": 2.8114032213783226e-07, + "loss": 0.5109, + "step": 14142 + }, + { + "epoch": 0.9, + "grad_norm": 0.8434416055679321, + "learning_rate": 2.808012308104241e-07, + "loss": 0.6113, + "step": 14143 + }, + { + "epoch": 0.9, + "grad_norm": 0.8837141394615173, + "learning_rate": 2.80462338192059e-07, + "loss": 0.5899, + "step": 14144 + }, + { + "epoch": 0.9, + "grad_norm": 0.8532682657241821, + "learning_rate": 2.801236442970073e-07, + "loss": 0.5329, + "step": 14145 + }, + { + "epoch": 0.9, + "grad_norm": 0.8666834831237793, + "learning_rate": 2.797851491395293e-07, + "loss": 0.5825, + "step": 14146 + }, + { + "epoch": 0.9, + "grad_norm": 0.9193355441093445, + "learning_rate": 2.79446852733879e-07, + "loss": 0.6144, + "step": 14147 + }, + { + "epoch": 0.9, + "grad_norm": 0.862277090549469, + "learning_rate": 2.791087550942995e-07, + "loss": 0.5617, + "step": 14148 + }, + { + "epoch": 0.9, + "grad_norm": 0.8777857422828674, + "learning_rate": 2.7877085623502775e-07, + "loss": 0.5741, + "step": 14149 + }, + { + "epoch": 0.9, + "grad_norm": 0.9369240999221802, + "learning_rate": 2.784331561702908e-07, + "loss": 0.5701, + "step": 14150 + }, + { + "epoch": 0.9, + "grad_norm": 0.8633100986480713, + "learning_rate": 2.780956549143088e-07, + "loss": 0.5204, + "step": 14151 + }, + { + "epoch": 0.9, + "grad_norm": 0.8361502289772034, + "learning_rate": 2.7775835248129267e-07, + "loss": 0.5389, + "step": 14152 + }, + { + "epoch": 0.9, + "grad_norm": 0.9618591070175171, + "learning_rate": 2.7742124888544497e-07, + "loss": 0.5818, + "step": 14153 + }, + { + "epoch": 0.9, + "grad_norm": 0.8982853889465332, + "learning_rate": 2.7708434414095875e-07, + "loss": 0.5917, + "step": 14154 + }, + { + "epoch": 0.9, + "grad_norm": 0.9148767590522766, + "learning_rate": 2.7674763826202265e-07, + "loss": 0.5946, + "step": 14155 + }, + { + "epoch": 0.9, + "grad_norm": 0.8875370621681213, + "learning_rate": 2.764111312628115e-07, + "loss": 0.6017, + "step": 14156 + }, + { + "epoch": 0.9, + "grad_norm": 0.9090349078178406, + "learning_rate": 2.7607482315749554e-07, + "loss": 0.5424, + "step": 14157 + }, + { + "epoch": 0.9, + "grad_norm": 0.9224393963813782, + "learning_rate": 2.757387139602352e-07, + "loss": 0.5548, + "step": 14158 + }, + { + "epoch": 0.9, + "grad_norm": 0.8845816850662231, + "learning_rate": 2.754028036851836e-07, + "loss": 0.5169, + "step": 14159 + }, + { + "epoch": 0.9, + "grad_norm": 0.880143404006958, + "learning_rate": 2.750670923464838e-07, + "loss": 0.5661, + "step": 14160 + }, + { + "epoch": 0.9, + "grad_norm": 0.9638619422912598, + "learning_rate": 2.747315799582728e-07, + "loss": 0.5205, + "step": 14161 + }, + { + "epoch": 0.9, + "grad_norm": 0.87690269947052, + "learning_rate": 2.7439626653467555e-07, + "loss": 0.6158, + "step": 14162 + }, + { + "epoch": 0.9, + "grad_norm": 0.9064611196517944, + "learning_rate": 2.7406115208981345e-07, + "loss": 0.5675, + "step": 14163 + }, + { + "epoch": 0.9, + "grad_norm": 0.9225680232048035, + "learning_rate": 2.7372623663779575e-07, + "loss": 0.5312, + "step": 14164 + }, + { + "epoch": 0.9, + "grad_norm": 0.8610286116600037, + "learning_rate": 2.733915201927245e-07, + "loss": 0.563, + "step": 14165 + }, + { + "epoch": 0.9, + "grad_norm": 0.8890798091888428, + "learning_rate": 2.7305700276869406e-07, + "loss": 0.5899, + "step": 14166 + }, + { + "epoch": 0.9, + "grad_norm": 0.8515585660934448, + "learning_rate": 2.727226843797881e-07, + "loss": 0.5643, + "step": 14167 + }, + { + "epoch": 0.9, + "grad_norm": 0.8853866457939148, + "learning_rate": 2.7238856504008594e-07, + "loss": 0.6033, + "step": 14168 + }, + { + "epoch": 0.9, + "grad_norm": 0.8813034296035767, + "learning_rate": 2.7205464476365575e-07, + "loss": 0.5382, + "step": 14169 + }, + { + "epoch": 0.9, + "grad_norm": 0.9416490197181702, + "learning_rate": 2.7172092356455626e-07, + "loss": 0.6018, + "step": 14170 + }, + { + "epoch": 0.9, + "grad_norm": 0.9497674703598022, + "learning_rate": 2.7138740145684017e-07, + "loss": 0.6127, + "step": 14171 + }, + { + "epoch": 0.9, + "grad_norm": 0.9365571737289429, + "learning_rate": 2.7105407845455124e-07, + "loss": 0.631, + "step": 14172 + }, + { + "epoch": 0.9, + "grad_norm": 0.8589094877243042, + "learning_rate": 2.707209545717238e-07, + "loss": 0.5762, + "step": 14173 + }, + { + "epoch": 0.9, + "grad_norm": 0.8943716287612915, + "learning_rate": 2.70388029822386e-07, + "loss": 0.5679, + "step": 14174 + }, + { + "epoch": 0.9, + "grad_norm": 0.9539296627044678, + "learning_rate": 2.700553042205539e-07, + "loss": 0.5642, + "step": 14175 + }, + { + "epoch": 0.9, + "grad_norm": 0.8620119690895081, + "learning_rate": 2.6972277778023913e-07, + "loss": 0.5144, + "step": 14176 + }, + { + "epoch": 0.9, + "grad_norm": 0.8798508644104004, + "learning_rate": 2.693904505154432e-07, + "loss": 0.5847, + "step": 14177 + }, + { + "epoch": 0.9, + "grad_norm": 0.9159492254257202, + "learning_rate": 2.690583224401588e-07, + "loss": 0.5889, + "step": 14178 + }, + { + "epoch": 0.9, + "grad_norm": 0.8451624512672424, + "learning_rate": 2.687263935683704e-07, + "loss": 0.5517, + "step": 14179 + }, + { + "epoch": 0.9, + "grad_norm": 0.9503071308135986, + "learning_rate": 2.6839466391405444e-07, + "loss": 0.6165, + "step": 14180 + }, + { + "epoch": 0.9, + "grad_norm": 0.8462880849838257, + "learning_rate": 2.680631334911793e-07, + "loss": 0.5567, + "step": 14181 + }, + { + "epoch": 0.9, + "grad_norm": 0.94367516040802, + "learning_rate": 2.677318023137049e-07, + "loss": 0.6164, + "step": 14182 + }, + { + "epoch": 0.9, + "grad_norm": 0.9024264216423035, + "learning_rate": 2.674006703955817e-07, + "loss": 0.5862, + "step": 14183 + }, + { + "epoch": 0.9, + "grad_norm": 0.9006355404853821, + "learning_rate": 2.670697377507514e-07, + "loss": 0.5654, + "step": 14184 + }, + { + "epoch": 0.9, + "grad_norm": 0.8944267630577087, + "learning_rate": 2.667390043931517e-07, + "loss": 0.5717, + "step": 14185 + }, + { + "epoch": 0.9, + "grad_norm": 0.8411933779716492, + "learning_rate": 2.664084703367059e-07, + "loss": 0.5411, + "step": 14186 + }, + { + "epoch": 0.9, + "grad_norm": 0.9336392283439636, + "learning_rate": 2.6607813559533236e-07, + "loss": 0.5433, + "step": 14187 + }, + { + "epoch": 0.9, + "grad_norm": 0.8673104643821716, + "learning_rate": 2.6574800018294043e-07, + "loss": 0.5634, + "step": 14188 + }, + { + "epoch": 0.9, + "grad_norm": 0.9110936522483826, + "learning_rate": 2.654180641134313e-07, + "loss": 0.5422, + "step": 14189 + }, + { + "epoch": 0.9, + "grad_norm": 0.8549519777297974, + "learning_rate": 2.650883274006966e-07, + "loss": 0.5758, + "step": 14190 + }, + { + "epoch": 0.9, + "grad_norm": 0.9573348164558411, + "learning_rate": 2.6475879005862183e-07, + "loss": 0.6086, + "step": 14191 + }, + { + "epoch": 0.9, + "grad_norm": 0.8656295537948608, + "learning_rate": 2.644294521010804e-07, + "loss": 0.5346, + "step": 14192 + }, + { + "epoch": 0.9, + "grad_norm": 0.9295397996902466, + "learning_rate": 2.6410031354194175e-07, + "loss": 0.6098, + "step": 14193 + }, + { + "epoch": 0.9, + "grad_norm": 0.8475077748298645, + "learning_rate": 2.6377137439506373e-07, + "loss": 0.5531, + "step": 14194 + }, + { + "epoch": 0.9, + "grad_norm": 0.9059809446334839, + "learning_rate": 2.634426346742969e-07, + "loss": 0.5737, + "step": 14195 + }, + { + "epoch": 0.9, + "grad_norm": 0.9273040890693665, + "learning_rate": 2.6311409439348403e-07, + "loss": 0.5759, + "step": 14196 + }, + { + "epoch": 0.9, + "grad_norm": 0.8897231221199036, + "learning_rate": 2.6278575356645687e-07, + "loss": 0.5843, + "step": 14197 + }, + { + "epoch": 0.9, + "grad_norm": 0.9147869348526001, + "learning_rate": 2.624576122070427e-07, + "loss": 0.6142, + "step": 14198 + }, + { + "epoch": 0.9, + "grad_norm": 0.8592173457145691, + "learning_rate": 2.621296703290588e-07, + "loss": 0.5259, + "step": 14199 + }, + { + "epoch": 0.9, + "grad_norm": 0.8588521480560303, + "learning_rate": 2.6180192794631133e-07, + "loss": 0.5644, + "step": 14200 + }, + { + "epoch": 0.9, + "grad_norm": 0.913691520690918, + "learning_rate": 2.6147438507260205e-07, + "loss": 0.5955, + "step": 14201 + }, + { + "epoch": 0.9, + "grad_norm": 0.9359204769134521, + "learning_rate": 2.611470417217227e-07, + "loss": 0.563, + "step": 14202 + }, + { + "epoch": 0.9, + "grad_norm": 0.8501827120780945, + "learning_rate": 2.6081989790745554e-07, + "loss": 0.553, + "step": 14203 + }, + { + "epoch": 0.9, + "grad_norm": 0.8793197274208069, + "learning_rate": 2.6049295364357684e-07, + "loss": 0.6034, + "step": 14204 + }, + { + "epoch": 0.9, + "grad_norm": 0.8312693238258362, + "learning_rate": 2.6016620894385113e-07, + "loss": 0.5353, + "step": 14205 + }, + { + "epoch": 0.9, + "grad_norm": 0.8711232542991638, + "learning_rate": 2.59839663822038e-07, + "loss": 0.5567, + "step": 14206 + }, + { + "epoch": 0.9, + "grad_norm": 0.9392272233963013, + "learning_rate": 2.5951331829188797e-07, + "loss": 0.5764, + "step": 14207 + }, + { + "epoch": 0.9, + "grad_norm": 0.8943392634391785, + "learning_rate": 2.591871723671402e-07, + "loss": 0.5447, + "step": 14208 + }, + { + "epoch": 0.9, + "grad_norm": 0.8029001355171204, + "learning_rate": 2.5886122606152866e-07, + "loss": 0.5799, + "step": 14209 + }, + { + "epoch": 0.9, + "grad_norm": 0.8463373780250549, + "learning_rate": 2.585354793887779e-07, + "loss": 0.5161, + "step": 14210 + }, + { + "epoch": 0.9, + "grad_norm": 0.9136335849761963, + "learning_rate": 2.5820993236260305e-07, + "loss": 0.5433, + "step": 14211 + }, + { + "epoch": 0.9, + "grad_norm": 0.8141673803329468, + "learning_rate": 2.5788458499671376e-07, + "loss": 0.5733, + "step": 14212 + }, + { + "epoch": 0.9, + "grad_norm": 0.9717278480529785, + "learning_rate": 2.5755943730480735e-07, + "loss": 0.5817, + "step": 14213 + }, + { + "epoch": 0.9, + "grad_norm": 0.91008460521698, + "learning_rate": 2.5723448930057405e-07, + "loss": 0.6328, + "step": 14214 + }, + { + "epoch": 0.9, + "grad_norm": 0.9435662031173706, + "learning_rate": 2.569097409976995e-07, + "loss": 0.5827, + "step": 14215 + }, + { + "epoch": 0.9, + "grad_norm": 0.8856955766677856, + "learning_rate": 2.5658519240985444e-07, + "loss": 0.5993, + "step": 14216 + }, + { + "epoch": 0.9, + "grad_norm": 0.8379449248313904, + "learning_rate": 2.5626084355070634e-07, + "loss": 0.596, + "step": 14217 + }, + { + "epoch": 0.9, + "grad_norm": 0.8931264281272888, + "learning_rate": 2.5593669443391145e-07, + "loss": 0.6241, + "step": 14218 + }, + { + "epoch": 0.9, + "grad_norm": 1.0004993677139282, + "learning_rate": 2.556127450731194e-07, + "loss": 0.6224, + "step": 14219 + }, + { + "epoch": 0.9, + "grad_norm": 0.9444043040275574, + "learning_rate": 2.552889954819704e-07, + "loss": 0.56, + "step": 14220 + }, + { + "epoch": 0.9, + "grad_norm": 0.9467916488647461, + "learning_rate": 2.5496544567409577e-07, + "loss": 0.593, + "step": 14221 + }, + { + "epoch": 0.9, + "grad_norm": 0.9097285866737366, + "learning_rate": 2.5464209566311847e-07, + "loss": 0.5677, + "step": 14222 + }, + { + "epoch": 0.9, + "grad_norm": 1.0071593523025513, + "learning_rate": 2.5431894546265654e-07, + "loss": 0.6015, + "step": 14223 + }, + { + "epoch": 0.9, + "grad_norm": 0.8538757562637329, + "learning_rate": 2.5399599508631356e-07, + "loss": 0.5435, + "step": 14224 + }, + { + "epoch": 0.9, + "grad_norm": 0.8897154331207275, + "learning_rate": 2.5367324454768916e-07, + "loss": 0.6174, + "step": 14225 + }, + { + "epoch": 0.9, + "grad_norm": 0.8830700516700745, + "learning_rate": 2.5335069386037414e-07, + "loss": 0.604, + "step": 14226 + }, + { + "epoch": 0.9, + "grad_norm": 0.8626001477241516, + "learning_rate": 2.530283430379471e-07, + "loss": 0.5986, + "step": 14227 + }, + { + "epoch": 0.9, + "grad_norm": 0.8587076663970947, + "learning_rate": 2.5270619209398497e-07, + "loss": 0.5063, + "step": 14228 + }, + { + "epoch": 0.9, + "grad_norm": 0.9192159175872803, + "learning_rate": 2.523842410420496e-07, + "loss": 0.6178, + "step": 14229 + }, + { + "epoch": 0.9, + "grad_norm": 0.9127901196479797, + "learning_rate": 2.5206248989569803e-07, + "loss": 0.5617, + "step": 14230 + }, + { + "epoch": 0.9, + "grad_norm": 0.9163671135902405, + "learning_rate": 2.5174093866847826e-07, + "loss": 0.5319, + "step": 14231 + }, + { + "epoch": 0.9, + "grad_norm": 0.8983326554298401, + "learning_rate": 2.5141958737392947e-07, + "loss": 0.5889, + "step": 14232 + }, + { + "epoch": 0.9, + "grad_norm": 0.9140615463256836, + "learning_rate": 2.5109843602558247e-07, + "loss": 0.5825, + "step": 14233 + }, + { + "epoch": 0.9, + "grad_norm": 0.837908148765564, + "learning_rate": 2.507774846369615e-07, + "loss": 0.5451, + "step": 14234 + }, + { + "epoch": 0.9, + "grad_norm": 0.9031140804290771, + "learning_rate": 2.5045673322157735e-07, + "loss": 0.5876, + "step": 14235 + }, + { + "epoch": 0.9, + "grad_norm": 0.8454420566558838, + "learning_rate": 2.501361817929393e-07, + "loss": 0.5165, + "step": 14236 + }, + { + "epoch": 0.9, + "grad_norm": 0.8986586332321167, + "learning_rate": 2.4981583036454203e-07, + "loss": 0.554, + "step": 14237 + }, + { + "epoch": 0.9, + "grad_norm": 0.8993757367134094, + "learning_rate": 2.494956789498759e-07, + "loss": 0.5715, + "step": 14238 + }, + { + "epoch": 0.9, + "grad_norm": 0.9246693253517151, + "learning_rate": 2.491757275624207e-07, + "loss": 0.5542, + "step": 14239 + }, + { + "epoch": 0.9, + "grad_norm": 0.961254894733429, + "learning_rate": 2.4885597621564896e-07, + "loss": 0.6091, + "step": 14240 + }, + { + "epoch": 0.9, + "grad_norm": 0.9174337387084961, + "learning_rate": 2.485364249230238e-07, + "loss": 0.5664, + "step": 14241 + }, + { + "epoch": 0.9, + "grad_norm": 0.824385941028595, + "learning_rate": 2.4821707369800163e-07, + "loss": 0.5478, + "step": 14242 + }, + { + "epoch": 0.9, + "grad_norm": 0.868877649307251, + "learning_rate": 2.478979225540268e-07, + "loss": 0.5719, + "step": 14243 + }, + { + "epoch": 0.9, + "grad_norm": 0.9093589782714844, + "learning_rate": 2.475789715045401e-07, + "loss": 0.5828, + "step": 14244 + }, + { + "epoch": 0.9, + "grad_norm": 0.881280243396759, + "learning_rate": 2.472602205629698e-07, + "loss": 0.5403, + "step": 14245 + }, + { + "epoch": 0.9, + "grad_norm": 0.9545583128929138, + "learning_rate": 2.469416697427379e-07, + "loss": 0.5959, + "step": 14246 + }, + { + "epoch": 0.9, + "grad_norm": 0.9496628642082214, + "learning_rate": 2.466233190572581e-07, + "loss": 0.5762, + "step": 14247 + }, + { + "epoch": 0.9, + "grad_norm": 0.8831350803375244, + "learning_rate": 2.463051685199341e-07, + "loss": 0.6048, + "step": 14248 + }, + { + "epoch": 0.9, + "grad_norm": 0.9093460440635681, + "learning_rate": 2.4598721814416306e-07, + "loss": 0.5595, + "step": 14249 + }, + { + "epoch": 0.9, + "grad_norm": 0.8506335616111755, + "learning_rate": 2.4566946794333247e-07, + "loss": 0.579, + "step": 14250 + }, + { + "epoch": 0.9, + "grad_norm": 0.9103783369064331, + "learning_rate": 2.4535191793082116e-07, + "loss": 0.5803, + "step": 14251 + }, + { + "epoch": 0.9, + "grad_norm": 0.8295513391494751, + "learning_rate": 2.4503456812e-07, + "loss": 0.5156, + "step": 14252 + }, + { + "epoch": 0.9, + "grad_norm": 0.956263542175293, + "learning_rate": 2.447174185242324e-07, + "loss": 0.5824, + "step": 14253 + }, + { + "epoch": 0.9, + "grad_norm": 0.8482615947723389, + "learning_rate": 2.4440046915687135e-07, + "loss": 0.531, + "step": 14254 + }, + { + "epoch": 0.9, + "grad_norm": 0.8663813471794128, + "learning_rate": 2.4408372003126345e-07, + "loss": 0.6005, + "step": 14255 + }, + { + "epoch": 0.9, + "grad_norm": 0.9090369939804077, + "learning_rate": 2.4376717116074533e-07, + "loss": 0.5589, + "step": 14256 + }, + { + "epoch": 0.9, + "grad_norm": 0.803523600101471, + "learning_rate": 2.434508225586457e-07, + "loss": 0.5677, + "step": 14257 + }, + { + "epoch": 0.9, + "grad_norm": 0.8888107538223267, + "learning_rate": 2.431346742382856e-07, + "loss": 0.5486, + "step": 14258 + }, + { + "epoch": 0.9, + "grad_norm": 0.874443769454956, + "learning_rate": 2.428187262129761e-07, + "loss": 0.5408, + "step": 14259 + }, + { + "epoch": 0.9, + "grad_norm": 0.8557073473930359, + "learning_rate": 2.4250297849602145e-07, + "loss": 0.6157, + "step": 14260 + }, + { + "epoch": 0.9, + "grad_norm": 0.890663743019104, + "learning_rate": 2.421874311007155e-07, + "loss": 0.5452, + "step": 14261 + }, + { + "epoch": 0.9, + "grad_norm": 0.9649395942687988, + "learning_rate": 2.41872084040346e-07, + "loss": 0.5703, + "step": 14262 + }, + { + "epoch": 0.9, + "grad_norm": 0.8820181488990784, + "learning_rate": 2.4155693732819065e-07, + "loss": 0.5555, + "step": 14263 + }, + { + "epoch": 0.9, + "grad_norm": 0.9557092189788818, + "learning_rate": 2.412419909775199e-07, + "loss": 0.5643, + "step": 14264 + }, + { + "epoch": 0.9, + "grad_norm": 0.8582893013954163, + "learning_rate": 2.4092724500159315e-07, + "loss": 0.5908, + "step": 14265 + }, + { + "epoch": 0.9, + "grad_norm": 0.8829105496406555, + "learning_rate": 2.406126994136654e-07, + "loss": 0.5481, + "step": 14266 + }, + { + "epoch": 0.9, + "grad_norm": 0.8586880564689636, + "learning_rate": 2.402983542269799e-07, + "loss": 0.5469, + "step": 14267 + }, + { + "epoch": 0.9, + "grad_norm": 0.9591865539550781, + "learning_rate": 2.3998420945477276e-07, + "loss": 0.5816, + "step": 14268 + }, + { + "epoch": 0.9, + "grad_norm": 0.8931147456169128, + "learning_rate": 2.3967026511027224e-07, + "loss": 0.6198, + "step": 14269 + }, + { + "epoch": 0.9, + "grad_norm": 0.8495928645133972, + "learning_rate": 2.393565212066962e-07, + "loss": 0.5475, + "step": 14270 + }, + { + "epoch": 0.9, + "grad_norm": 0.8455556035041809, + "learning_rate": 2.3904297775725614e-07, + "loss": 0.5551, + "step": 14271 + }, + { + "epoch": 0.9, + "grad_norm": 0.8738865256309509, + "learning_rate": 2.3872963477515497e-07, + "loss": 0.5653, + "step": 14272 + }, + { + "epoch": 0.9, + "grad_norm": 0.89951092004776, + "learning_rate": 2.3841649227358489e-07, + "loss": 0.556, + "step": 14273 + }, + { + "epoch": 0.9, + "grad_norm": 0.8624039888381958, + "learning_rate": 2.3810355026573195e-07, + "loss": 0.5726, + "step": 14274 + }, + { + "epoch": 0.9, + "grad_norm": 0.9102184176445007, + "learning_rate": 2.377908087647729e-07, + "loss": 0.5875, + "step": 14275 + }, + { + "epoch": 0.9, + "grad_norm": 0.8978198766708374, + "learning_rate": 2.374782677838766e-07, + "loss": 0.5379, + "step": 14276 + }, + { + "epoch": 0.9, + "grad_norm": 0.8829779028892517, + "learning_rate": 2.3716592733620315e-07, + "loss": 0.6237, + "step": 14277 + }, + { + "epoch": 0.9, + "grad_norm": 0.9458640217781067, + "learning_rate": 2.3685378743490306e-07, + "loss": 0.5471, + "step": 14278 + }, + { + "epoch": 0.9, + "grad_norm": 0.9152674078941345, + "learning_rate": 2.3654184809312032e-07, + "loss": 0.563, + "step": 14279 + }, + { + "epoch": 0.9, + "grad_norm": 0.7956060171127319, + "learning_rate": 2.362301093239905e-07, + "loss": 0.514, + "step": 14280 + }, + { + "epoch": 0.9, + "grad_norm": 0.8902352452278137, + "learning_rate": 2.359185711406381e-07, + "loss": 0.5497, + "step": 14281 + }, + { + "epoch": 0.9, + "grad_norm": 0.8263982534408569, + "learning_rate": 2.3560723355618152e-07, + "loss": 0.5835, + "step": 14282 + }, + { + "epoch": 0.9, + "grad_norm": 0.8512941002845764, + "learning_rate": 2.3529609658373032e-07, + "loss": 0.5797, + "step": 14283 + }, + { + "epoch": 0.9, + "grad_norm": 0.9477369785308838, + "learning_rate": 2.3498516023638562e-07, + "loss": 0.5849, + "step": 14284 + }, + { + "epoch": 0.91, + "grad_norm": 0.9041728377342224, + "learning_rate": 2.3467442452723976e-07, + "loss": 0.5284, + "step": 14285 + }, + { + "epoch": 0.91, + "grad_norm": 0.9231773614883423, + "learning_rate": 2.3436388946937504e-07, + "loss": 0.533, + "step": 14286 + }, + { + "epoch": 0.91, + "grad_norm": 0.8928778171539307, + "learning_rate": 2.3405355507586992e-07, + "loss": 0.5975, + "step": 14287 + }, + { + "epoch": 0.91, + "grad_norm": 0.8574181795120239, + "learning_rate": 2.3374342135979e-07, + "loss": 0.5832, + "step": 14288 + }, + { + "epoch": 0.91, + "grad_norm": 0.900062620639801, + "learning_rate": 2.3343348833419377e-07, + "loss": 0.6058, + "step": 14289 + }, + { + "epoch": 0.91, + "grad_norm": 0.9051018953323364, + "learning_rate": 2.3312375601213134e-07, + "loss": 0.5408, + "step": 14290 + }, + { + "epoch": 0.91, + "grad_norm": 0.9527705311775208, + "learning_rate": 2.3281422440664503e-07, + "loss": 0.5875, + "step": 14291 + }, + { + "epoch": 0.91, + "grad_norm": 0.9164147973060608, + "learning_rate": 2.3250489353076777e-07, + "loss": 0.5846, + "step": 14292 + }, + { + "epoch": 0.91, + "grad_norm": 0.9179802536964417, + "learning_rate": 2.3219576339752525e-07, + "loss": 0.58, + "step": 14293 + }, + { + "epoch": 0.91, + "grad_norm": 0.9129411578178406, + "learning_rate": 2.3188683401993261e-07, + "loss": 0.5996, + "step": 14294 + }, + { + "epoch": 0.91, + "grad_norm": 0.8692997097969055, + "learning_rate": 2.3157810541099724e-07, + "loss": 0.5227, + "step": 14295 + }, + { + "epoch": 0.91, + "grad_norm": 0.8740088939666748, + "learning_rate": 2.3126957758372149e-07, + "loss": 0.5473, + "step": 14296 + }, + { + "epoch": 0.91, + "grad_norm": 0.908848762512207, + "learning_rate": 2.3096125055109386e-07, + "loss": 0.543, + "step": 14297 + }, + { + "epoch": 0.91, + "grad_norm": 0.8550407290458679, + "learning_rate": 2.3065312432609788e-07, + "loss": 0.557, + "step": 14298 + }, + { + "epoch": 0.91, + "grad_norm": 0.852614164352417, + "learning_rate": 2.3034519892170705e-07, + "loss": 0.5691, + "step": 14299 + }, + { + "epoch": 0.91, + "grad_norm": 0.8640037775039673, + "learning_rate": 2.3003747435088764e-07, + "loss": 0.5388, + "step": 14300 + }, + { + "epoch": 0.91, + "grad_norm": 0.8585585355758667, + "learning_rate": 2.2972995062659764e-07, + "loss": 0.5929, + "step": 14301 + }, + { + "epoch": 0.91, + "grad_norm": 0.9112229943275452, + "learning_rate": 2.2942262776178392e-07, + "loss": 0.5954, + "step": 14302 + }, + { + "epoch": 0.91, + "grad_norm": 0.8441083431243896, + "learning_rate": 2.291155057693878e-07, + "loss": 0.571, + "step": 14303 + }, + { + "epoch": 0.91, + "grad_norm": 0.922584593296051, + "learning_rate": 2.2880858466234114e-07, + "loss": 0.5756, + "step": 14304 + }, + { + "epoch": 0.91, + "grad_norm": 0.860567569732666, + "learning_rate": 2.2850186445356693e-07, + "loss": 0.5948, + "step": 14305 + }, + { + "epoch": 0.91, + "grad_norm": 0.8519131541252136, + "learning_rate": 2.281953451559804e-07, + "loss": 0.5374, + "step": 14306 + }, + { + "epoch": 0.91, + "grad_norm": 0.9286757111549377, + "learning_rate": 2.2788902678248904e-07, + "loss": 0.6002, + "step": 14307 + }, + { + "epoch": 0.91, + "grad_norm": 0.8797339797019958, + "learning_rate": 2.2758290934598805e-07, + "loss": 0.5811, + "step": 14308 + }, + { + "epoch": 0.91, + "grad_norm": 0.871191680431366, + "learning_rate": 2.2727699285937043e-07, + "loss": 0.5332, + "step": 14309 + }, + { + "epoch": 0.91, + "grad_norm": 1.0105799436569214, + "learning_rate": 2.2697127733551483e-07, + "loss": 0.6011, + "step": 14310 + }, + { + "epoch": 0.91, + "grad_norm": 0.8978657722473145, + "learning_rate": 2.2666576278729424e-07, + "loss": 0.6085, + "step": 14311 + }, + { + "epoch": 0.91, + "grad_norm": 0.9255068302154541, + "learning_rate": 2.2636044922757339e-07, + "loss": 0.5758, + "step": 14312 + }, + { + "epoch": 0.91, + "grad_norm": 0.8957815170288086, + "learning_rate": 2.2605533666920753e-07, + "loss": 0.5948, + "step": 14313 + }, + { + "epoch": 0.91, + "grad_norm": 0.9146010279655457, + "learning_rate": 2.257504251250442e-07, + "loss": 0.5328, + "step": 14314 + }, + { + "epoch": 0.91, + "grad_norm": 0.8640668392181396, + "learning_rate": 2.2544571460792308e-07, + "loss": 0.5836, + "step": 14315 + }, + { + "epoch": 0.91, + "grad_norm": 0.8612959980964661, + "learning_rate": 2.251412051306717e-07, + "loss": 0.5623, + "step": 14316 + }, + { + "epoch": 0.91, + "grad_norm": 0.8819116950035095, + "learning_rate": 2.2483689670611542e-07, + "loss": 0.6256, + "step": 14317 + }, + { + "epoch": 0.91, + "grad_norm": 0.9524977207183838, + "learning_rate": 2.2453278934706446e-07, + "loss": 0.6195, + "step": 14318 + }, + { + "epoch": 0.91, + "grad_norm": 0.912262499332428, + "learning_rate": 2.2422888306632584e-07, + "loss": 0.5568, + "step": 14319 + }, + { + "epoch": 0.91, + "grad_norm": 0.8547895550727844, + "learning_rate": 2.2392517787669487e-07, + "loss": 0.5127, + "step": 14320 + }, + { + "epoch": 0.91, + "grad_norm": 0.9063989520072937, + "learning_rate": 2.2362167379096023e-07, + "loss": 0.5975, + "step": 14321 + }, + { + "epoch": 0.91, + "grad_norm": 0.8552689552307129, + "learning_rate": 2.2331837082190056e-07, + "loss": 0.5232, + "step": 14322 + }, + { + "epoch": 0.91, + "grad_norm": 0.8747928142547607, + "learning_rate": 2.2301526898228842e-07, + "loss": 0.5471, + "step": 14323 + }, + { + "epoch": 0.91, + "grad_norm": 0.879938006401062, + "learning_rate": 2.2271236828488474e-07, + "loss": 0.5942, + "step": 14324 + }, + { + "epoch": 0.91, + "grad_norm": 0.9146292209625244, + "learning_rate": 2.224096687424443e-07, + "loss": 0.57, + "step": 14325 + }, + { + "epoch": 0.91, + "grad_norm": 0.8963407874107361, + "learning_rate": 2.2210717036771246e-07, + "loss": 0.549, + "step": 14326 + }, + { + "epoch": 0.91, + "grad_norm": 0.9316021800041199, + "learning_rate": 2.218048731734268e-07, + "loss": 0.6144, + "step": 14327 + }, + { + "epoch": 0.91, + "grad_norm": 0.8399151563644409, + "learning_rate": 2.215027771723155e-07, + "loss": 0.5571, + "step": 14328 + }, + { + "epoch": 0.91, + "grad_norm": 0.8416658043861389, + "learning_rate": 2.2120088237709946e-07, + "loss": 0.5699, + "step": 14329 + }, + { + "epoch": 0.91, + "grad_norm": 0.9296332597732544, + "learning_rate": 2.2089918880049023e-07, + "loss": 0.5445, + "step": 14330 + }, + { + "epoch": 0.91, + "grad_norm": 0.8599060773849487, + "learning_rate": 2.205976964551909e-07, + "loss": 0.5574, + "step": 14331 + }, + { + "epoch": 0.91, + "grad_norm": 0.9314765334129333, + "learning_rate": 2.2029640535389586e-07, + "loss": 0.5455, + "step": 14332 + }, + { + "epoch": 0.91, + "grad_norm": 0.8735009431838989, + "learning_rate": 2.1999531550929098e-07, + "loss": 0.5763, + "step": 14333 + }, + { + "epoch": 0.91, + "grad_norm": 0.8349171280860901, + "learning_rate": 2.1969442693405673e-07, + "loss": 0.5455, + "step": 14334 + }, + { + "epoch": 0.91, + "grad_norm": 0.8516371846199036, + "learning_rate": 2.1939373964085964e-07, + "loss": 0.6168, + "step": 14335 + }, + { + "epoch": 0.91, + "grad_norm": 0.8744125366210938, + "learning_rate": 2.190932536423618e-07, + "loss": 0.5543, + "step": 14336 + }, + { + "epoch": 0.91, + "grad_norm": 0.9350723028182983, + "learning_rate": 2.1879296895121637e-07, + "loss": 0.6037, + "step": 14337 + }, + { + "epoch": 0.91, + "grad_norm": 0.8729871511459351, + "learning_rate": 2.1849288558006442e-07, + "loss": 0.5759, + "step": 14338 + }, + { + "epoch": 0.91, + "grad_norm": 0.9231459498405457, + "learning_rate": 2.1819300354154526e-07, + "loss": 0.6121, + "step": 14339 + }, + { + "epoch": 0.91, + "grad_norm": 0.941109299659729, + "learning_rate": 2.1789332284828323e-07, + "loss": 0.5852, + "step": 14340 + }, + { + "epoch": 0.91, + "grad_norm": 0.7934569716453552, + "learning_rate": 2.175938435128977e-07, + "loss": 0.501, + "step": 14341 + }, + { + "epoch": 0.91, + "grad_norm": 0.8977835774421692, + "learning_rate": 2.1729456554799855e-07, + "loss": 0.5864, + "step": 14342 + }, + { + "epoch": 0.91, + "grad_norm": 0.9556468725204468, + "learning_rate": 2.1699548896618795e-07, + "loss": 0.6184, + "step": 14343 + }, + { + "epoch": 0.91, + "grad_norm": 0.8871400952339172, + "learning_rate": 2.1669661378005802e-07, + "loss": 0.55, + "step": 14344 + }, + { + "epoch": 0.91, + "grad_norm": 0.8647100329399109, + "learning_rate": 2.1639794000219426e-07, + "loss": 0.5605, + "step": 14345 + }, + { + "epoch": 0.91, + "grad_norm": 0.9198769330978394, + "learning_rate": 2.1609946764517108e-07, + "loss": 0.5757, + "step": 14346 + }, + { + "epoch": 0.91, + "grad_norm": 0.9258266687393188, + "learning_rate": 2.1580119672155898e-07, + "loss": 0.6241, + "step": 14347 + }, + { + "epoch": 0.91, + "grad_norm": 0.8942254781723022, + "learning_rate": 2.1550312724391452e-07, + "loss": 0.5967, + "step": 14348 + }, + { + "epoch": 0.91, + "grad_norm": 0.8659271597862244, + "learning_rate": 2.152052592247894e-07, + "loss": 0.5591, + "step": 14349 + }, + { + "epoch": 0.91, + "grad_norm": 0.86356520652771, + "learning_rate": 2.1490759267672634e-07, + "loss": 0.5706, + "step": 14350 + }, + { + "epoch": 0.91, + "grad_norm": 0.9083942770957947, + "learning_rate": 2.1461012761225696e-07, + "loss": 0.5906, + "step": 14351 + }, + { + "epoch": 0.91, + "grad_norm": 0.8626084327697754, + "learning_rate": 2.143128640439085e-07, + "loss": 0.5377, + "step": 14352 + }, + { + "epoch": 0.91, + "grad_norm": 0.8646213412284851, + "learning_rate": 2.1401580198419812e-07, + "loss": 0.5453, + "step": 14353 + }, + { + "epoch": 0.91, + "grad_norm": 0.8685572147369385, + "learning_rate": 2.1371894144563254e-07, + "loss": 0.5698, + "step": 14354 + }, + { + "epoch": 0.91, + "grad_norm": 0.8844971060752869, + "learning_rate": 2.1342228244071173e-07, + "loss": 0.5964, + "step": 14355 + }, + { + "epoch": 0.91, + "grad_norm": 0.8692290186882019, + "learning_rate": 2.1312582498192792e-07, + "loss": 0.5788, + "step": 14356 + }, + { + "epoch": 0.91, + "grad_norm": 0.9128808975219727, + "learning_rate": 2.1282956908176277e-07, + "loss": 0.5831, + "step": 14357 + }, + { + "epoch": 0.91, + "grad_norm": 0.8476049304008484, + "learning_rate": 2.125335147526919e-07, + "loss": 0.5757, + "step": 14358 + }, + { + "epoch": 0.91, + "grad_norm": 0.9030495882034302, + "learning_rate": 2.122376620071792e-07, + "loss": 0.6114, + "step": 14359 + }, + { + "epoch": 0.91, + "grad_norm": 0.9279770851135254, + "learning_rate": 2.1194201085768363e-07, + "loss": 0.5573, + "step": 14360 + }, + { + "epoch": 0.91, + "grad_norm": 0.915275514125824, + "learning_rate": 2.1164656131665407e-07, + "loss": 0.5517, + "step": 14361 + }, + { + "epoch": 0.91, + "grad_norm": 0.8605398535728455, + "learning_rate": 2.1135131339652947e-07, + "loss": 0.5765, + "step": 14362 + }, + { + "epoch": 0.91, + "grad_norm": 0.8572301864624023, + "learning_rate": 2.1105626710974325e-07, + "loss": 0.5418, + "step": 14363 + }, + { + "epoch": 0.91, + "grad_norm": 1.0193010568618774, + "learning_rate": 2.1076142246871766e-07, + "loss": 0.523, + "step": 14364 + }, + { + "epoch": 0.91, + "grad_norm": 0.8625369668006897, + "learning_rate": 2.1046677948586836e-07, + "loss": 0.5679, + "step": 14365 + }, + { + "epoch": 0.91, + "grad_norm": 0.878371000289917, + "learning_rate": 2.1017233817360149e-07, + "loss": 0.5779, + "step": 14366 + }, + { + "epoch": 0.91, + "grad_norm": 0.9470487833023071, + "learning_rate": 2.098780985443144e-07, + "loss": 0.6326, + "step": 14367 + }, + { + "epoch": 0.91, + "grad_norm": 0.8446786999702454, + "learning_rate": 2.095840606103966e-07, + "loss": 0.5459, + "step": 14368 + }, + { + "epoch": 0.91, + "grad_norm": 0.862938642501831, + "learning_rate": 2.092902243842304e-07, + "loss": 0.5463, + "step": 14369 + }, + { + "epoch": 0.91, + "grad_norm": 0.9745242595672607, + "learning_rate": 2.0899658987818705e-07, + "loss": 0.5714, + "step": 14370 + }, + { + "epoch": 0.91, + "grad_norm": 0.9046638607978821, + "learning_rate": 2.0870315710462996e-07, + "loss": 0.5915, + "step": 14371 + }, + { + "epoch": 0.91, + "grad_norm": 0.9039925932884216, + "learning_rate": 2.0840992607591593e-07, + "loss": 0.5574, + "step": 14372 + }, + { + "epoch": 0.91, + "grad_norm": 0.9303500056266785, + "learning_rate": 2.081168968043906e-07, + "loss": 0.5953, + "step": 14373 + }, + { + "epoch": 0.91, + "grad_norm": 0.9302768707275391, + "learning_rate": 2.0782406930239363e-07, + "loss": 0.6172, + "step": 14374 + }, + { + "epoch": 0.91, + "grad_norm": 0.8675611019134521, + "learning_rate": 2.0753144358225397e-07, + "loss": 0.5759, + "step": 14375 + }, + { + "epoch": 0.91, + "grad_norm": 0.9458869695663452, + "learning_rate": 2.072390196562929e-07, + "loss": 0.5794, + "step": 14376 + }, + { + "epoch": 0.91, + "grad_norm": 0.934730052947998, + "learning_rate": 2.0694679753682445e-07, + "loss": 0.5584, + "step": 14377 + }, + { + "epoch": 0.91, + "grad_norm": 0.8245408535003662, + "learning_rate": 2.0665477723615268e-07, + "loss": 0.5508, + "step": 14378 + }, + { + "epoch": 0.91, + "grad_norm": 0.8302925229072571, + "learning_rate": 2.063629587665733e-07, + "loss": 0.5543, + "step": 14379 + }, + { + "epoch": 0.91, + "grad_norm": 0.8917962908744812, + "learning_rate": 2.0607134214037373e-07, + "loss": 0.5144, + "step": 14380 + }, + { + "epoch": 0.91, + "grad_norm": 0.830355703830719, + "learning_rate": 2.05779927369833e-07, + "loss": 0.5366, + "step": 14381 + }, + { + "epoch": 0.91, + "grad_norm": 0.8706912994384766, + "learning_rate": 2.054887144672224e-07, + "loss": 0.5151, + "step": 14382 + }, + { + "epoch": 0.91, + "grad_norm": 0.8185093402862549, + "learning_rate": 2.0519770344480272e-07, + "loss": 0.5727, + "step": 14383 + }, + { + "epoch": 0.91, + "grad_norm": 0.876105785369873, + "learning_rate": 2.0490689431482746e-07, + "loss": 0.541, + "step": 14384 + }, + { + "epoch": 0.91, + "grad_norm": 0.899113118648529, + "learning_rate": 2.0461628708954183e-07, + "loss": 0.6337, + "step": 14385 + }, + { + "epoch": 0.91, + "grad_norm": 0.8771045207977295, + "learning_rate": 2.0432588178118274e-07, + "loss": 0.5753, + "step": 14386 + }, + { + "epoch": 0.91, + "grad_norm": 0.8254141211509705, + "learning_rate": 2.0403567840197813e-07, + "loss": 0.5234, + "step": 14387 + }, + { + "epoch": 0.91, + "grad_norm": 0.9506052732467651, + "learning_rate": 2.0374567696414716e-07, + "loss": 0.5637, + "step": 14388 + }, + { + "epoch": 0.91, + "grad_norm": 0.9225839972496033, + "learning_rate": 2.0345587747990004e-07, + "loss": 0.5918, + "step": 14389 + }, + { + "epoch": 0.91, + "grad_norm": 0.9083296656608582, + "learning_rate": 2.0316627996144035e-07, + "loss": 0.5388, + "step": 14390 + }, + { + "epoch": 0.91, + "grad_norm": 0.9312944412231445, + "learning_rate": 2.028768844209622e-07, + "loss": 0.5816, + "step": 14391 + }, + { + "epoch": 0.91, + "grad_norm": 0.9078527092933655, + "learning_rate": 2.0258769087065034e-07, + "loss": 0.62, + "step": 14392 + }, + { + "epoch": 0.91, + "grad_norm": 0.8647677898406982, + "learning_rate": 2.022986993226811e-07, + "loss": 0.5776, + "step": 14393 + }, + { + "epoch": 0.91, + "grad_norm": 0.8233307600021362, + "learning_rate": 2.020099097892242e-07, + "loss": 0.5451, + "step": 14394 + }, + { + "epoch": 0.91, + "grad_norm": 0.8663762211799622, + "learning_rate": 2.0172132228243878e-07, + "loss": 0.6064, + "step": 14395 + }, + { + "epoch": 0.91, + "grad_norm": 0.8814685940742493, + "learning_rate": 2.014329368144774e-07, + "loss": 0.5361, + "step": 14396 + }, + { + "epoch": 0.91, + "grad_norm": 0.9814503788948059, + "learning_rate": 2.0114475339748085e-07, + "loss": 0.6066, + "step": 14397 + }, + { + "epoch": 0.91, + "grad_norm": 0.8616315126419067, + "learning_rate": 2.0085677204358445e-07, + "loss": 0.5469, + "step": 14398 + }, + { + "epoch": 0.91, + "grad_norm": 0.8662075400352478, + "learning_rate": 2.005689927649157e-07, + "loss": 0.601, + "step": 14399 + }, + { + "epoch": 0.91, + "grad_norm": 0.8765701055526733, + "learning_rate": 2.0028141557358992e-07, + "loss": 0.5845, + "step": 14400 + }, + { + "epoch": 0.91, + "grad_norm": 0.9240060448646545, + "learning_rate": 1.999940404817169e-07, + "loss": 0.5803, + "step": 14401 + }, + { + "epoch": 0.91, + "grad_norm": 0.8918695449829102, + "learning_rate": 1.9970686750139633e-07, + "loss": 0.5592, + "step": 14402 + }, + { + "epoch": 0.91, + "grad_norm": 0.8836629986763, + "learning_rate": 1.994198966447214e-07, + "loss": 0.6042, + "step": 14403 + }, + { + "epoch": 0.91, + "grad_norm": 0.8865856528282166, + "learning_rate": 1.991331279237746e-07, + "loss": 0.6085, + "step": 14404 + }, + { + "epoch": 0.91, + "grad_norm": 0.8742964863777161, + "learning_rate": 1.988465613506302e-07, + "loss": 0.5309, + "step": 14405 + }, + { + "epoch": 0.91, + "grad_norm": 0.9155290126800537, + "learning_rate": 1.9856019693735463e-07, + "loss": 0.6035, + "step": 14406 + }, + { + "epoch": 0.91, + "grad_norm": 0.8845750093460083, + "learning_rate": 1.982740346960077e-07, + "loss": 0.5975, + "step": 14407 + }, + { + "epoch": 0.91, + "grad_norm": 1.014074683189392, + "learning_rate": 1.9798807463863589e-07, + "loss": 0.615, + "step": 14408 + }, + { + "epoch": 0.91, + "grad_norm": 0.8332728147506714, + "learning_rate": 1.977023167772818e-07, + "loss": 0.551, + "step": 14409 + }, + { + "epoch": 0.91, + "grad_norm": 0.8898774981498718, + "learning_rate": 1.9741676112397688e-07, + "loss": 0.5352, + "step": 14410 + }, + { + "epoch": 0.91, + "grad_norm": 0.8795101046562195, + "learning_rate": 1.9713140769074546e-07, + "loss": 0.5739, + "step": 14411 + }, + { + "epoch": 0.91, + "grad_norm": 0.9184449315071106, + "learning_rate": 1.9684625648960287e-07, + "loss": 0.5652, + "step": 14412 + }, + { + "epoch": 0.91, + "grad_norm": 0.837814450263977, + "learning_rate": 1.965613075325551e-07, + "loss": 0.5506, + "step": 14413 + }, + { + "epoch": 0.91, + "grad_norm": 0.9577629566192627, + "learning_rate": 1.9627656083160085e-07, + "loss": 0.6132, + "step": 14414 + }, + { + "epoch": 0.91, + "grad_norm": 0.8790633678436279, + "learning_rate": 1.9599201639872943e-07, + "loss": 0.5509, + "step": 14415 + }, + { + "epoch": 0.91, + "grad_norm": 0.8492127060890198, + "learning_rate": 1.9570767424592186e-07, + "loss": 0.5252, + "step": 14416 + }, + { + "epoch": 0.91, + "grad_norm": 0.9555572271347046, + "learning_rate": 1.9542353438515183e-07, + "loss": 0.5623, + "step": 14417 + }, + { + "epoch": 0.91, + "grad_norm": 0.8835919499397278, + "learning_rate": 1.9513959682838314e-07, + "loss": 0.5849, + "step": 14418 + }, + { + "epoch": 0.91, + "grad_norm": 0.882376492023468, + "learning_rate": 1.9485586158757009e-07, + "loss": 0.5733, + "step": 14419 + }, + { + "epoch": 0.91, + "grad_norm": 0.944187343120575, + "learning_rate": 1.9457232867466204e-07, + "loss": 0.6102, + "step": 14420 + }, + { + "epoch": 0.91, + "grad_norm": 0.9396758079528809, + "learning_rate": 1.9428899810159606e-07, + "loss": 0.5872, + "step": 14421 + }, + { + "epoch": 0.91, + "grad_norm": 0.98106449842453, + "learning_rate": 1.9400586988030212e-07, + "loss": 0.5745, + "step": 14422 + }, + { + "epoch": 0.91, + "grad_norm": 0.8788214921951294, + "learning_rate": 1.937229440227023e-07, + "loss": 0.5746, + "step": 14423 + }, + { + "epoch": 0.91, + "grad_norm": 0.8514944911003113, + "learning_rate": 1.9344022054070933e-07, + "loss": 0.5369, + "step": 14424 + }, + { + "epoch": 0.91, + "grad_norm": 0.8359341025352478, + "learning_rate": 1.9315769944622808e-07, + "loss": 0.5719, + "step": 14425 + }, + { + "epoch": 0.91, + "grad_norm": 0.8570429682731628, + "learning_rate": 1.9287538075115463e-07, + "loss": 0.6033, + "step": 14426 + }, + { + "epoch": 0.91, + "grad_norm": 0.8559786081314087, + "learning_rate": 1.9259326446737503e-07, + "loss": 0.5631, + "step": 14427 + }, + { + "epoch": 0.91, + "grad_norm": 0.8701675534248352, + "learning_rate": 1.9231135060677087e-07, + "loss": 0.581, + "step": 14428 + }, + { + "epoch": 0.91, + "grad_norm": 0.9724037051200867, + "learning_rate": 1.9202963918120988e-07, + "loss": 0.6107, + "step": 14429 + }, + { + "epoch": 0.91, + "grad_norm": 0.8299076557159424, + "learning_rate": 1.9174813020255533e-07, + "loss": 0.5938, + "step": 14430 + }, + { + "epoch": 0.91, + "grad_norm": 0.84014493227005, + "learning_rate": 1.9146682368266112e-07, + "loss": 0.5601, + "step": 14431 + }, + { + "epoch": 0.91, + "grad_norm": 0.9042718410491943, + "learning_rate": 1.9118571963336996e-07, + "loss": 0.5843, + "step": 14432 + }, + { + "epoch": 0.91, + "grad_norm": 0.8914376497268677, + "learning_rate": 1.9090481806652017e-07, + "loss": 0.5966, + "step": 14433 + }, + { + "epoch": 0.91, + "grad_norm": 0.832227885723114, + "learning_rate": 1.9062411899393896e-07, + "loss": 0.5538, + "step": 14434 + }, + { + "epoch": 0.91, + "grad_norm": 0.8363732695579529, + "learning_rate": 1.9034362242744576e-07, + "loss": 0.5605, + "step": 14435 + }, + { + "epoch": 0.91, + "grad_norm": 0.9074519872665405, + "learning_rate": 1.9006332837885054e-07, + "loss": 0.5895, + "step": 14436 + }, + { + "epoch": 0.91, + "grad_norm": 0.9413220882415771, + "learning_rate": 1.8978323685995558e-07, + "loss": 0.6647, + "step": 14437 + }, + { + "epoch": 0.91, + "grad_norm": 0.9103296995162964, + "learning_rate": 1.8950334788255586e-07, + "loss": 0.5697, + "step": 14438 + }, + { + "epoch": 0.91, + "grad_norm": 0.8581710457801819, + "learning_rate": 1.8922366145843585e-07, + "loss": 0.5329, + "step": 14439 + }, + { + "epoch": 0.91, + "grad_norm": 0.8271042704582214, + "learning_rate": 1.8894417759937055e-07, + "loss": 0.5779, + "step": 14440 + }, + { + "epoch": 0.91, + "grad_norm": 0.8599358797073364, + "learning_rate": 1.886648963171306e-07, + "loss": 0.5483, + "step": 14441 + }, + { + "epoch": 0.91, + "grad_norm": 0.8909770250320435, + "learning_rate": 1.8838581762347485e-07, + "loss": 0.5755, + "step": 14442 + }, + { + "epoch": 0.92, + "grad_norm": 0.9004727005958557, + "learning_rate": 1.881069415301534e-07, + "loss": 0.5401, + "step": 14443 + }, + { + "epoch": 0.92, + "grad_norm": 0.926632821559906, + "learning_rate": 1.8782826804890908e-07, + "loss": 0.6182, + "step": 14444 + }, + { + "epoch": 0.92, + "grad_norm": 0.9216212630271912, + "learning_rate": 1.875497971914758e-07, + "loss": 0.6631, + "step": 14445 + }, + { + "epoch": 0.92, + "grad_norm": 0.9242258071899414, + "learning_rate": 1.872715289695798e-07, + "loss": 0.5645, + "step": 14446 + }, + { + "epoch": 0.92, + "grad_norm": 0.8471218943595886, + "learning_rate": 1.8699346339493774e-07, + "loss": 0.5925, + "step": 14447 + }, + { + "epoch": 0.92, + "grad_norm": 0.8889009952545166, + "learning_rate": 1.867156004792575e-07, + "loss": 0.5835, + "step": 14448 + }, + { + "epoch": 0.92, + "grad_norm": 0.9420746564865112, + "learning_rate": 1.864379402342381e-07, + "loss": 0.5516, + "step": 14449 + }, + { + "epoch": 0.92, + "grad_norm": 0.8928573131561279, + "learning_rate": 1.8616048267157348e-07, + "loss": 0.5829, + "step": 14450 + }, + { + "epoch": 0.92, + "grad_norm": 0.9594048261642456, + "learning_rate": 1.8588322780294377e-07, + "loss": 0.5724, + "step": 14451 + }, + { + "epoch": 0.92, + "grad_norm": 0.8984374403953552, + "learning_rate": 1.8560617564002458e-07, + "loss": 0.5723, + "step": 14452 + }, + { + "epoch": 0.92, + "grad_norm": 0.9626867175102234, + "learning_rate": 1.8532932619448106e-07, + "loss": 0.5672, + "step": 14453 + }, + { + "epoch": 0.92, + "grad_norm": 0.8767659068107605, + "learning_rate": 1.8505267947797056e-07, + "loss": 0.5572, + "step": 14454 + }, + { + "epoch": 0.92, + "grad_norm": 0.8869348168373108, + "learning_rate": 1.847762355021421e-07, + "loss": 0.5568, + "step": 14455 + }, + { + "epoch": 0.92, + "grad_norm": 0.9030115604400635, + "learning_rate": 1.8449999427863575e-07, + "loss": 0.5726, + "step": 14456 + }, + { + "epoch": 0.92, + "grad_norm": 0.8998832106590271, + "learning_rate": 1.842239558190817e-07, + "loss": 0.5776, + "step": 14457 + }, + { + "epoch": 0.92, + "grad_norm": 0.9000546932220459, + "learning_rate": 1.839481201351051e-07, + "loss": 0.5754, + "step": 14458 + }, + { + "epoch": 0.92, + "grad_norm": 0.9226672053337097, + "learning_rate": 1.8367248723831889e-07, + "loss": 0.6349, + "step": 14459 + }, + { + "epoch": 0.92, + "grad_norm": 0.8481553792953491, + "learning_rate": 1.833970571403293e-07, + "loss": 0.5471, + "step": 14460 + }, + { + "epoch": 0.92, + "grad_norm": 0.8419100046157837, + "learning_rate": 1.831218298527343e-07, + "loss": 0.5638, + "step": 14461 + }, + { + "epoch": 0.92, + "grad_norm": 0.9119687080383301, + "learning_rate": 1.828468053871213e-07, + "loss": 0.6084, + "step": 14462 + }, + { + "epoch": 0.92, + "grad_norm": 0.8742016553878784, + "learning_rate": 1.825719837550727e-07, + "loss": 0.5708, + "step": 14463 + }, + { + "epoch": 0.92, + "grad_norm": 0.8338335752487183, + "learning_rate": 1.822973649681592e-07, + "loss": 0.5689, + "step": 14464 + }, + { + "epoch": 0.92, + "grad_norm": 0.8597403168678284, + "learning_rate": 1.820229490379438e-07, + "loss": 0.4852, + "step": 14465 + }, + { + "epoch": 0.92, + "grad_norm": 0.8943460583686829, + "learning_rate": 1.8174873597598176e-07, + "loss": 0.5449, + "step": 14466 + }, + { + "epoch": 0.92, + "grad_norm": 0.9471714496612549, + "learning_rate": 1.814747257938182e-07, + "loss": 0.6154, + "step": 14467 + }, + { + "epoch": 0.92, + "grad_norm": 0.8958661556243896, + "learning_rate": 1.8120091850299225e-07, + "loss": 0.5881, + "step": 14468 + }, + { + "epoch": 0.92, + "grad_norm": 0.8442410826683044, + "learning_rate": 1.809273141150325e-07, + "loss": 0.5285, + "step": 14469 + }, + { + "epoch": 0.92, + "grad_norm": 0.8980113863945007, + "learning_rate": 1.8065391264145805e-07, + "loss": 0.5612, + "step": 14470 + }, + { + "epoch": 0.92, + "grad_norm": 0.8895835876464844, + "learning_rate": 1.8038071409378299e-07, + "loss": 0.6386, + "step": 14471 + }, + { + "epoch": 0.92, + "grad_norm": 0.872721791267395, + "learning_rate": 1.8010771848350983e-07, + "loss": 0.6137, + "step": 14472 + }, + { + "epoch": 0.92, + "grad_norm": 0.8433144688606262, + "learning_rate": 1.7983492582213324e-07, + "loss": 0.5736, + "step": 14473 + }, + { + "epoch": 0.92, + "grad_norm": 0.9304208159446716, + "learning_rate": 1.7956233612114017e-07, + "loss": 0.5614, + "step": 14474 + }, + { + "epoch": 0.92, + "grad_norm": 0.8683229088783264, + "learning_rate": 1.792899493920075e-07, + "loss": 0.5202, + "step": 14475 + }, + { + "epoch": 0.92, + "grad_norm": 0.9167693257331848, + "learning_rate": 1.79017765646205e-07, + "loss": 0.6225, + "step": 14476 + }, + { + "epoch": 0.92, + "grad_norm": 0.8281375169754028, + "learning_rate": 1.78745784895194e-07, + "loss": 0.5476, + "step": 14477 + }, + { + "epoch": 0.92, + "grad_norm": 0.8747629523277283, + "learning_rate": 1.7847400715042594e-07, + "loss": 0.5737, + "step": 14478 + }, + { + "epoch": 0.92, + "grad_norm": 0.916631817817688, + "learning_rate": 1.7820243242334334e-07, + "loss": 0.6067, + "step": 14479 + }, + { + "epoch": 0.92, + "grad_norm": 0.9027150869369507, + "learning_rate": 1.7793106072538423e-07, + "loss": 0.5696, + "step": 14480 + }, + { + "epoch": 0.92, + "grad_norm": 0.917669415473938, + "learning_rate": 1.7765989206797285e-07, + "loss": 0.6068, + "step": 14481 + }, + { + "epoch": 0.92, + "grad_norm": 1.0007344484329224, + "learning_rate": 1.7738892646252726e-07, + "loss": 0.5725, + "step": 14482 + }, + { + "epoch": 0.92, + "grad_norm": 0.9161766767501831, + "learning_rate": 1.7711816392045778e-07, + "loss": 0.5761, + "step": 14483 + }, + { + "epoch": 0.92, + "grad_norm": 0.892318606376648, + "learning_rate": 1.7684760445316418e-07, + "loss": 0.5412, + "step": 14484 + }, + { + "epoch": 0.92, + "grad_norm": 0.9114658236503601, + "learning_rate": 1.765772480720407e-07, + "loss": 0.5503, + "step": 14485 + }, + { + "epoch": 0.92, + "grad_norm": 0.9310790300369263, + "learning_rate": 1.763070947884693e-07, + "loss": 0.5859, + "step": 14486 + }, + { + "epoch": 0.92, + "grad_norm": 0.898358941078186, + "learning_rate": 1.7603714461382481e-07, + "loss": 0.6073, + "step": 14487 + }, + { + "epoch": 0.92, + "grad_norm": 0.846410870552063, + "learning_rate": 1.7576739755947593e-07, + "loss": 0.5229, + "step": 14488 + }, + { + "epoch": 0.92, + "grad_norm": 0.8562573194503784, + "learning_rate": 1.7549785363677906e-07, + "loss": 0.5742, + "step": 14489 + }, + { + "epoch": 0.92, + "grad_norm": 0.8590916395187378, + "learning_rate": 1.7522851285708465e-07, + "loss": 0.5965, + "step": 14490 + }, + { + "epoch": 0.92, + "grad_norm": 0.8809557557106018, + "learning_rate": 1.7495937523173356e-07, + "loss": 0.5514, + "step": 14491 + }, + { + "epoch": 0.92, + "grad_norm": 0.9356900453567505, + "learning_rate": 1.7469044077205732e-07, + "loss": 0.5705, + "step": 14492 + }, + { + "epoch": 0.92, + "grad_norm": 0.8484225869178772, + "learning_rate": 1.744217094893813e-07, + "loss": 0.5891, + "step": 14493 + }, + { + "epoch": 0.92, + "grad_norm": 0.8933218121528625, + "learning_rate": 1.7415318139502036e-07, + "loss": 0.55, + "step": 14494 + }, + { + "epoch": 0.92, + "grad_norm": 0.9217604398727417, + "learning_rate": 1.7388485650028043e-07, + "loss": 0.5794, + "step": 14495 + }, + { + "epoch": 0.92, + "grad_norm": 0.9160068035125732, + "learning_rate": 1.7361673481646025e-07, + "loss": 0.5694, + "step": 14496 + }, + { + "epoch": 0.92, + "grad_norm": 1.0053678750991821, + "learning_rate": 1.7334881635485023e-07, + "loss": 0.5796, + "step": 14497 + }, + { + "epoch": 0.92, + "grad_norm": 0.934262216091156, + "learning_rate": 1.7308110112673027e-07, + "loss": 0.5943, + "step": 14498 + }, + { + "epoch": 0.92, + "grad_norm": 0.8574293255805969, + "learning_rate": 1.7281358914337408e-07, + "loss": 0.5807, + "step": 14499 + }, + { + "epoch": 0.92, + "grad_norm": 0.9166094064712524, + "learning_rate": 1.7254628041604437e-07, + "loss": 0.5807, + "step": 14500 + }, + { + "epoch": 0.92, + "grad_norm": 0.8386301398277283, + "learning_rate": 1.7227917495599823e-07, + "loss": 0.5858, + "step": 14501 + }, + { + "epoch": 0.92, + "grad_norm": 1.001879096031189, + "learning_rate": 1.7201227277448108e-07, + "loss": 0.6831, + "step": 14502 + }, + { + "epoch": 0.92, + "grad_norm": 0.887852668762207, + "learning_rate": 1.7174557388273173e-07, + "loss": 0.5888, + "step": 14503 + }, + { + "epoch": 0.92, + "grad_norm": 0.9143977165222168, + "learning_rate": 1.7147907829198008e-07, + "loss": 0.5656, + "step": 14504 + }, + { + "epoch": 0.92, + "grad_norm": 0.9383800029754639, + "learning_rate": 1.7121278601344715e-07, + "loss": 0.6177, + "step": 14505 + }, + { + "epoch": 0.92, + "grad_norm": 0.918759822845459, + "learning_rate": 1.7094669705834566e-07, + "loss": 0.58, + "step": 14506 + }, + { + "epoch": 0.92, + "grad_norm": 0.8827510476112366, + "learning_rate": 1.706808114378805e-07, + "loss": 0.5384, + "step": 14507 + }, + { + "epoch": 0.92, + "grad_norm": 0.8679559230804443, + "learning_rate": 1.7041512916324554e-07, + "loss": 0.4867, + "step": 14508 + }, + { + "epoch": 0.92, + "grad_norm": 0.9122892618179321, + "learning_rate": 1.7014965024562846e-07, + "loss": 0.5651, + "step": 14509 + }, + { + "epoch": 0.92, + "grad_norm": 0.8981230854988098, + "learning_rate": 1.698843746962081e-07, + "loss": 0.5893, + "step": 14510 + }, + { + "epoch": 0.92, + "grad_norm": 0.9092260599136353, + "learning_rate": 1.6961930252615388e-07, + "loss": 0.5357, + "step": 14511 + }, + { + "epoch": 0.92, + "grad_norm": 0.8531707525253296, + "learning_rate": 1.6935443374662741e-07, + "loss": 0.5466, + "step": 14512 + }, + { + "epoch": 0.92, + "grad_norm": 0.890770673751831, + "learning_rate": 1.6908976836878088e-07, + "loss": 0.5673, + "step": 14513 + }, + { + "epoch": 0.92, + "grad_norm": 0.9549497961997986, + "learning_rate": 1.6882530640375872e-07, + "loss": 0.5602, + "step": 14514 + }, + { + "epoch": 0.92, + "grad_norm": 0.89844810962677, + "learning_rate": 1.68561047862697e-07, + "loss": 0.5751, + "step": 14515 + }, + { + "epoch": 0.92, + "grad_norm": 0.9014208912849426, + "learning_rate": 1.6829699275672186e-07, + "loss": 0.5715, + "step": 14516 + }, + { + "epoch": 0.92, + "grad_norm": 0.9853465557098389, + "learning_rate": 1.6803314109695157e-07, + "loss": 0.5917, + "step": 14517 + }, + { + "epoch": 0.92, + "grad_norm": 0.8697071075439453, + "learning_rate": 1.677694928944973e-07, + "loss": 0.5795, + "step": 14518 + }, + { + "epoch": 0.92, + "grad_norm": 0.9067200422286987, + "learning_rate": 1.6750604816045902e-07, + "loss": 0.5587, + "step": 14519 + }, + { + "epoch": 0.92, + "grad_norm": 0.899541437625885, + "learning_rate": 1.6724280690593008e-07, + "loss": 0.5799, + "step": 14520 + }, + { + "epoch": 0.92, + "grad_norm": 0.8571711778640747, + "learning_rate": 1.6697976914199497e-07, + "loss": 0.5425, + "step": 14521 + }, + { + "epoch": 0.92, + "grad_norm": 0.8938726782798767, + "learning_rate": 1.6671693487972818e-07, + "loss": 0.5316, + "step": 14522 + }, + { + "epoch": 0.92, + "grad_norm": 0.9331005215644836, + "learning_rate": 1.6645430413019858e-07, + "loss": 0.6139, + "step": 14523 + }, + { + "epoch": 0.92, + "grad_norm": 0.89864182472229, + "learning_rate": 1.6619187690446293e-07, + "loss": 0.5949, + "step": 14524 + }, + { + "epoch": 0.92, + "grad_norm": 0.8560614585876465, + "learning_rate": 1.659296532135718e-07, + "loss": 0.5573, + "step": 14525 + }, + { + "epoch": 0.92, + "grad_norm": 0.9034044742584229, + "learning_rate": 1.6566763306856638e-07, + "loss": 0.5997, + "step": 14526 + }, + { + "epoch": 0.92, + "grad_norm": 0.8897153735160828, + "learning_rate": 1.6540581648048003e-07, + "loss": 0.5859, + "step": 14527 + }, + { + "epoch": 0.92, + "grad_norm": 0.8635241985321045, + "learning_rate": 1.6514420346033565e-07, + "loss": 0.5781, + "step": 14528 + }, + { + "epoch": 0.92, + "grad_norm": 0.9002516865730286, + "learning_rate": 1.6488279401915052e-07, + "loss": 0.6092, + "step": 14529 + }, + { + "epoch": 0.92, + "grad_norm": 0.8698979020118713, + "learning_rate": 1.6462158816792973e-07, + "loss": 0.5981, + "step": 14530 + }, + { + "epoch": 0.92, + "grad_norm": 0.9019778966903687, + "learning_rate": 1.643605859176739e-07, + "loss": 0.5418, + "step": 14531 + }, + { + "epoch": 0.92, + "grad_norm": 0.8013015389442444, + "learning_rate": 1.6409978727937094e-07, + "loss": 0.5363, + "step": 14532 + }, + { + "epoch": 0.92, + "grad_norm": 0.9132005572319031, + "learning_rate": 1.6383919226400368e-07, + "loss": 0.5711, + "step": 14533 + }, + { + "epoch": 0.92, + "grad_norm": 0.8765537738800049, + "learning_rate": 1.6357880088254396e-07, + "loss": 0.5648, + "step": 14534 + }, + { + "epoch": 0.92, + "grad_norm": 0.8809942007064819, + "learning_rate": 1.633186131459563e-07, + "loss": 0.529, + "step": 14535 + }, + { + "epoch": 0.92, + "grad_norm": 0.9183542728424072, + "learning_rate": 1.6305862906519587e-07, + "loss": 0.5666, + "step": 14536 + }, + { + "epoch": 0.92, + "grad_norm": 0.9470837712287903, + "learning_rate": 1.6279884865121108e-07, + "loss": 0.5908, + "step": 14537 + }, + { + "epoch": 0.92, + "grad_norm": 0.9300570487976074, + "learning_rate": 1.6253927191493879e-07, + "loss": 0.6089, + "step": 14538 + }, + { + "epoch": 0.92, + "grad_norm": 0.8753145337104797, + "learning_rate": 1.622798988673091e-07, + "loss": 0.5747, + "step": 14539 + }, + { + "epoch": 0.92, + "grad_norm": 0.8983120918273926, + "learning_rate": 1.6202072951924386e-07, + "loss": 0.5561, + "step": 14540 + }, + { + "epoch": 0.92, + "grad_norm": 0.8710740208625793, + "learning_rate": 1.6176176388165598e-07, + "loss": 0.5833, + "step": 14541 + }, + { + "epoch": 0.92, + "grad_norm": 0.8573417067527771, + "learning_rate": 1.6150300196544955e-07, + "loss": 0.5251, + "step": 14542 + }, + { + "epoch": 0.92, + "grad_norm": 0.8613002300262451, + "learning_rate": 1.612444437815186e-07, + "loss": 0.5635, + "step": 14543 + }, + { + "epoch": 0.92, + "grad_norm": 0.7859262824058533, + "learning_rate": 1.6098608934075166e-07, + "loss": 0.5038, + "step": 14544 + }, + { + "epoch": 0.92, + "grad_norm": 0.9234058260917664, + "learning_rate": 1.607279386540278e-07, + "loss": 0.6108, + "step": 14545 + }, + { + "epoch": 0.92, + "grad_norm": 0.9104276895523071, + "learning_rate": 1.60469991732215e-07, + "loss": 0.5749, + "step": 14546 + }, + { + "epoch": 0.92, + "grad_norm": 0.9492734670639038, + "learning_rate": 1.6021224858617513e-07, + "loss": 0.5761, + "step": 14547 + }, + { + "epoch": 0.92, + "grad_norm": 0.8354452848434448, + "learning_rate": 1.5995470922676116e-07, + "loss": 0.636, + "step": 14548 + }, + { + "epoch": 0.92, + "grad_norm": 0.8883960247039795, + "learning_rate": 1.5969737366481774e-07, + "loss": 0.5439, + "step": 14549 + }, + { + "epoch": 0.92, + "grad_norm": 0.9534339308738708, + "learning_rate": 1.5944024191117958e-07, + "loss": 0.6199, + "step": 14550 + }, + { + "epoch": 0.92, + "grad_norm": 0.9829249382019043, + "learning_rate": 1.5918331397667298e-07, + "loss": 0.5705, + "step": 14551 + }, + { + "epoch": 0.92, + "grad_norm": 0.8952674865722656, + "learning_rate": 1.589265898721176e-07, + "loss": 0.5662, + "step": 14552 + }, + { + "epoch": 0.92, + "grad_norm": 0.8737239241600037, + "learning_rate": 1.586700696083232e-07, + "loss": 0.5791, + "step": 14553 + }, + { + "epoch": 0.92, + "grad_norm": 0.9135823249816895, + "learning_rate": 1.5841375319608943e-07, + "loss": 0.5703, + "step": 14554 + }, + { + "epoch": 0.92, + "grad_norm": 0.8945186734199524, + "learning_rate": 1.5815764064621043e-07, + "loss": 0.5432, + "step": 14555 + }, + { + "epoch": 0.92, + "grad_norm": 0.9179873466491699, + "learning_rate": 1.5790173196946924e-07, + "loss": 0.5429, + "step": 14556 + }, + { + "epoch": 0.92, + "grad_norm": 0.9364351630210876, + "learning_rate": 1.5764602717664224e-07, + "loss": 0.5607, + "step": 14557 + }, + { + "epoch": 0.92, + "grad_norm": 0.8686890602111816, + "learning_rate": 1.5739052627849581e-07, + "loss": 0.5407, + "step": 14558 + }, + { + "epoch": 0.92, + "grad_norm": 0.9229563474655151, + "learning_rate": 1.571352292857875e-07, + "loss": 0.523, + "step": 14559 + }, + { + "epoch": 0.92, + "grad_norm": 0.9978078603744507, + "learning_rate": 1.5688013620926757e-07, + "loss": 0.5824, + "step": 14560 + }, + { + "epoch": 0.92, + "grad_norm": 0.8744204640388489, + "learning_rate": 1.566252470596774e-07, + "loss": 0.5485, + "step": 14561 + }, + { + "epoch": 0.92, + "grad_norm": 0.8485409617424011, + "learning_rate": 1.5637056184774958e-07, + "loss": 0.588, + "step": 14562 + }, + { + "epoch": 0.92, + "grad_norm": 0.8641082048416138, + "learning_rate": 1.5611608058420714e-07, + "loss": 0.5467, + "step": 14563 + }, + { + "epoch": 0.92, + "grad_norm": 0.9121243357658386, + "learning_rate": 1.5586180327976598e-07, + "loss": 0.5439, + "step": 14564 + }, + { + "epoch": 0.92, + "grad_norm": 0.9285200834274292, + "learning_rate": 1.5560772994513251e-07, + "loss": 0.5821, + "step": 14565 + }, + { + "epoch": 0.92, + "grad_norm": 0.868303120136261, + "learning_rate": 1.55353860591006e-07, + "loss": 0.5052, + "step": 14566 + }, + { + "epoch": 0.92, + "grad_norm": 0.8897990584373474, + "learning_rate": 1.5510019522807397e-07, + "loss": 0.545, + "step": 14567 + }, + { + "epoch": 0.92, + "grad_norm": 0.8986243009567261, + "learning_rate": 1.5484673386701953e-07, + "loss": 0.6177, + "step": 14568 + }, + { + "epoch": 0.92, + "grad_norm": 0.8620361685752869, + "learning_rate": 1.545934765185131e-07, + "loss": 0.5743, + "step": 14569 + }, + { + "epoch": 0.92, + "grad_norm": 0.8731402158737183, + "learning_rate": 1.5434042319321996e-07, + "loss": 0.5466, + "step": 14570 + }, + { + "epoch": 0.92, + "grad_norm": 0.8847206830978394, + "learning_rate": 1.5408757390179496e-07, + "loss": 0.548, + "step": 14571 + }, + { + "epoch": 0.92, + "grad_norm": 0.8469605445861816, + "learning_rate": 1.5383492865488459e-07, + "loss": 0.5754, + "step": 14572 + }, + { + "epoch": 0.92, + "grad_norm": 0.8870298266410828, + "learning_rate": 1.5358248746312588e-07, + "loss": 0.5896, + "step": 14573 + }, + { + "epoch": 0.92, + "grad_norm": 0.9190220236778259, + "learning_rate": 1.533302503371503e-07, + "loss": 0.5394, + "step": 14574 + }, + { + "epoch": 0.92, + "grad_norm": 0.9334941506385803, + "learning_rate": 1.5307821728757722e-07, + "loss": 0.5883, + "step": 14575 + }, + { + "epoch": 0.92, + "grad_norm": 0.9348692297935486, + "learning_rate": 1.5282638832501917e-07, + "loss": 0.6218, + "step": 14576 + }, + { + "epoch": 0.92, + "grad_norm": 0.8661801218986511, + "learning_rate": 1.5257476346007938e-07, + "loss": 0.5299, + "step": 14577 + }, + { + "epoch": 0.92, + "grad_norm": 0.9223611354827881, + "learning_rate": 1.523233427033538e-07, + "loss": 0.5646, + "step": 14578 + }, + { + "epoch": 0.92, + "grad_norm": 0.8899549245834351, + "learning_rate": 1.5207212606542786e-07, + "loss": 0.6004, + "step": 14579 + }, + { + "epoch": 0.92, + "grad_norm": 0.8400233387947083, + "learning_rate": 1.518211135568809e-07, + "loss": 0.5215, + "step": 14580 + }, + { + "epoch": 0.92, + "grad_norm": 0.8840736746788025, + "learning_rate": 1.5157030518828054e-07, + "loss": 0.6081, + "step": 14581 + }, + { + "epoch": 0.92, + "grad_norm": 0.8895472884178162, + "learning_rate": 1.513197009701889e-07, + "loss": 0.5254, + "step": 14582 + }, + { + "epoch": 0.92, + "grad_norm": 0.8849679827690125, + "learning_rate": 1.510693009131564e-07, + "loss": 0.5798, + "step": 14583 + }, + { + "epoch": 0.92, + "grad_norm": 0.8696837425231934, + "learning_rate": 1.50819105027728e-07, + "loss": 0.4908, + "step": 14584 + }, + { + "epoch": 0.92, + "grad_norm": 0.8720855116844177, + "learning_rate": 1.5056911332443801e-07, + "loss": 0.6337, + "step": 14585 + }, + { + "epoch": 0.92, + "grad_norm": 0.9008244276046753, + "learning_rate": 1.5031932581381247e-07, + "loss": 0.6017, + "step": 14586 + }, + { + "epoch": 0.92, + "grad_norm": 0.8799957036972046, + "learning_rate": 1.5006974250636906e-07, + "loss": 0.5997, + "step": 14587 + }, + { + "epoch": 0.92, + "grad_norm": 0.8658244013786316, + "learning_rate": 1.498203634126183e-07, + "loss": 0.5255, + "step": 14588 + }, + { + "epoch": 0.92, + "grad_norm": 0.876167893409729, + "learning_rate": 1.4957118854305842e-07, + "loss": 0.5822, + "step": 14589 + }, + { + "epoch": 0.92, + "grad_norm": 0.926031768321991, + "learning_rate": 1.4932221790818268e-07, + "loss": 0.5881, + "step": 14590 + }, + { + "epoch": 0.92, + "grad_norm": 1.0563932657241821, + "learning_rate": 1.4907345151847387e-07, + "loss": 0.5859, + "step": 14591 + }, + { + "epoch": 0.92, + "grad_norm": 0.832943320274353, + "learning_rate": 1.4882488938440688e-07, + "loss": 0.5449, + "step": 14592 + }, + { + "epoch": 0.92, + "grad_norm": 0.97652667760849, + "learning_rate": 1.485765315164478e-07, + "loss": 0.5737, + "step": 14593 + }, + { + "epoch": 0.92, + "grad_norm": 0.8609157800674438, + "learning_rate": 1.483283779250544e-07, + "loss": 0.5534, + "step": 14594 + }, + { + "epoch": 0.92, + "grad_norm": 1.0561258792877197, + "learning_rate": 1.4808042862067496e-07, + "loss": 0.5937, + "step": 14595 + }, + { + "epoch": 0.92, + "grad_norm": 0.9125807881355286, + "learning_rate": 1.4783268361375058e-07, + "loss": 0.6365, + "step": 14596 + }, + { + "epoch": 0.92, + "grad_norm": 0.8677931427955627, + "learning_rate": 1.4758514291471238e-07, + "loss": 0.5369, + "step": 14597 + }, + { + "epoch": 0.92, + "grad_norm": 1.0120726823806763, + "learning_rate": 1.4733780653398254e-07, + "loss": 0.5515, + "step": 14598 + }, + { + "epoch": 0.92, + "grad_norm": 0.9230685234069824, + "learning_rate": 1.4709067448197722e-07, + "loss": 0.6248, + "step": 14599 + }, + { + "epoch": 0.92, + "grad_norm": 0.8900234699249268, + "learning_rate": 1.4684374676910197e-07, + "loss": 0.5869, + "step": 14600 + }, + { + "epoch": 0.93, + "grad_norm": 0.8961969614028931, + "learning_rate": 1.4659702340575287e-07, + "loss": 0.5668, + "step": 14601 + }, + { + "epoch": 0.93, + "grad_norm": 0.843304455280304, + "learning_rate": 1.4635050440232002e-07, + "loss": 0.5334, + "step": 14602 + }, + { + "epoch": 0.93, + "grad_norm": 0.8992311358451843, + "learning_rate": 1.4610418976918172e-07, + "loss": 0.5863, + "step": 14603 + }, + { + "epoch": 0.93, + "grad_norm": 0.9412532448768616, + "learning_rate": 1.4585807951671194e-07, + "loss": 0.5809, + "step": 14604 + }, + { + "epoch": 0.93, + "grad_norm": 0.8952974677085876, + "learning_rate": 1.4561217365527124e-07, + "loss": 0.6129, + "step": 14605 + }, + { + "epoch": 0.93, + "grad_norm": 0.9405317306518555, + "learning_rate": 1.453664721952147e-07, + "loss": 0.5574, + "step": 14606 + }, + { + "epoch": 0.93, + "grad_norm": 0.8412303924560547, + "learning_rate": 1.451209751468885e-07, + "loss": 0.5746, + "step": 14607 + }, + { + "epoch": 0.93, + "grad_norm": 0.896740734577179, + "learning_rate": 1.448756825206288e-07, + "loss": 0.5703, + "step": 14608 + }, + { + "epoch": 0.93, + "grad_norm": 0.9362704753875732, + "learning_rate": 1.4463059432676395e-07, + "loss": 0.618, + "step": 14609 + }, + { + "epoch": 0.93, + "grad_norm": 0.89310622215271, + "learning_rate": 1.4438571057561523e-07, + "loss": 0.5166, + "step": 14610 + }, + { + "epoch": 0.93, + "grad_norm": 0.8676783442497253, + "learning_rate": 1.4414103127749157e-07, + "loss": 0.5502, + "step": 14611 + }, + { + "epoch": 0.93, + "grad_norm": 0.9016738533973694, + "learning_rate": 1.4389655644269752e-07, + "loss": 0.6399, + "step": 14612 + }, + { + "epoch": 0.93, + "grad_norm": 0.9519631266593933, + "learning_rate": 1.4365228608152647e-07, + "loss": 0.6178, + "step": 14613 + }, + { + "epoch": 0.93, + "grad_norm": 0.9060442447662354, + "learning_rate": 1.4340822020426304e-07, + "loss": 0.6017, + "step": 14614 + }, + { + "epoch": 0.93, + "grad_norm": 0.922366201877594, + "learning_rate": 1.4316435882118563e-07, + "loss": 0.6266, + "step": 14615 + }, + { + "epoch": 0.93, + "grad_norm": 0.9243087768554688, + "learning_rate": 1.429207019425599e-07, + "loss": 0.5854, + "step": 14616 + }, + { + "epoch": 0.93, + "grad_norm": 0.9128119945526123, + "learning_rate": 1.426772495786477e-07, + "loss": 0.5616, + "step": 14617 + }, + { + "epoch": 0.93, + "grad_norm": 0.8875778913497925, + "learning_rate": 1.4243400173969968e-07, + "loss": 0.5322, + "step": 14618 + }, + { + "epoch": 0.93, + "grad_norm": 0.875525712966919, + "learning_rate": 1.4219095843595654e-07, + "loss": 0.5701, + "step": 14619 + }, + { + "epoch": 0.93, + "grad_norm": 0.8632004857063293, + "learning_rate": 1.4194811967765344e-07, + "loss": 0.555, + "step": 14620 + }, + { + "epoch": 0.93, + "grad_norm": 0.9275280237197876, + "learning_rate": 1.417054854750155e-07, + "loss": 0.5548, + "step": 14621 + }, + { + "epoch": 0.93, + "grad_norm": 0.893173098564148, + "learning_rate": 1.414630558382579e-07, + "loss": 0.5044, + "step": 14622 + }, + { + "epoch": 0.93, + "grad_norm": 0.8877639174461365, + "learning_rate": 1.4122083077759087e-07, + "loss": 0.556, + "step": 14623 + }, + { + "epoch": 0.93, + "grad_norm": 0.8409522771835327, + "learning_rate": 1.409788103032106e-07, + "loss": 0.5588, + "step": 14624 + }, + { + "epoch": 0.93, + "grad_norm": 0.8505089282989502, + "learning_rate": 1.4073699442531007e-07, + "loss": 0.5673, + "step": 14625 + }, + { + "epoch": 0.93, + "grad_norm": 0.865674614906311, + "learning_rate": 1.4049538315407064e-07, + "loss": 0.5732, + "step": 14626 + }, + { + "epoch": 0.93, + "grad_norm": 0.8754248023033142, + "learning_rate": 1.4025397649966577e-07, + "loss": 0.5596, + "step": 14627 + }, + { + "epoch": 0.93, + "grad_norm": 0.8468300104141235, + "learning_rate": 1.400127744722596e-07, + "loss": 0.5757, + "step": 14628 + }, + { + "epoch": 0.93, + "grad_norm": 0.9094893336296082, + "learning_rate": 1.3977177708200896e-07, + "loss": 0.5904, + "step": 14629 + }, + { + "epoch": 0.93, + "grad_norm": 0.9047295451164246, + "learning_rate": 1.395309843390613e-07, + "loss": 0.584, + "step": 14630 + }, + { + "epoch": 0.93, + "grad_norm": 0.8569273948669434, + "learning_rate": 1.3929039625355633e-07, + "loss": 0.5859, + "step": 14631 + }, + { + "epoch": 0.93, + "grad_norm": 0.9276108145713806, + "learning_rate": 1.3905001283562257e-07, + "loss": 0.5994, + "step": 14632 + }, + { + "epoch": 0.93, + "grad_norm": 0.8770352602005005, + "learning_rate": 1.3880983409538252e-07, + "loss": 0.562, + "step": 14633 + }, + { + "epoch": 0.93, + "grad_norm": 0.9754252433776855, + "learning_rate": 1.3856986004295082e-07, + "loss": 0.5636, + "step": 14634 + }, + { + "epoch": 0.93, + "grad_norm": 0.8698052763938904, + "learning_rate": 1.3833009068842995e-07, + "loss": 0.567, + "step": 14635 + }, + { + "epoch": 0.93, + "grad_norm": 0.9316123723983765, + "learning_rate": 1.3809052604191632e-07, + "loss": 0.5865, + "step": 14636 + }, + { + "epoch": 0.93, + "grad_norm": 0.9362663626670837, + "learning_rate": 1.3785116611349736e-07, + "loss": 0.5783, + "step": 14637 + }, + { + "epoch": 0.93, + "grad_norm": 0.91670161485672, + "learning_rate": 1.3761201091325172e-07, + "loss": 0.5896, + "step": 14638 + }, + { + "epoch": 0.93, + "grad_norm": 0.856816828250885, + "learning_rate": 1.3737306045124966e-07, + "loss": 0.6125, + "step": 14639 + }, + { + "epoch": 0.93, + "grad_norm": 0.9262798428535461, + "learning_rate": 1.3713431473755147e-07, + "loss": 0.5683, + "step": 14640 + }, + { + "epoch": 0.93, + "grad_norm": 0.9130752086639404, + "learning_rate": 1.3689577378221019e-07, + "loss": 0.6457, + "step": 14641 + }, + { + "epoch": 0.93, + "grad_norm": 0.8534045815467834, + "learning_rate": 1.3665743759527173e-07, + "loss": 0.5155, + "step": 14642 + }, + { + "epoch": 0.93, + "grad_norm": 0.9429634213447571, + "learning_rate": 1.3641930618676912e-07, + "loss": 0.5988, + "step": 14643 + }, + { + "epoch": 0.93, + "grad_norm": 0.9114389419555664, + "learning_rate": 1.3618137956673105e-07, + "loss": 0.5677, + "step": 14644 + }, + { + "epoch": 0.93, + "grad_norm": 0.9275795221328735, + "learning_rate": 1.3594365774517447e-07, + "loss": 0.5971, + "step": 14645 + }, + { + "epoch": 0.93, + "grad_norm": 0.9180171489715576, + "learning_rate": 1.357061407321103e-07, + "loss": 0.545, + "step": 14646 + }, + { + "epoch": 0.93, + "grad_norm": 0.8974061608314514, + "learning_rate": 1.3546882853753885e-07, + "loss": 0.5988, + "step": 14647 + }, + { + "epoch": 0.93, + "grad_norm": 0.8184780478477478, + "learning_rate": 1.3523172117145212e-07, + "loss": 0.606, + "step": 14648 + }, + { + "epoch": 0.93, + "grad_norm": 0.8791584968566895, + "learning_rate": 1.349948186438349e-07, + "loss": 0.5674, + "step": 14649 + }, + { + "epoch": 0.93, + "grad_norm": 0.900364100933075, + "learning_rate": 1.347581209646609e-07, + "loss": 0.5965, + "step": 14650 + }, + { + "epoch": 0.93, + "grad_norm": 0.9039656519889832, + "learning_rate": 1.3452162814389824e-07, + "loss": 0.5437, + "step": 14651 + }, + { + "epoch": 0.93, + "grad_norm": 0.8871658444404602, + "learning_rate": 1.342853401915034e-07, + "loss": 0.6313, + "step": 14652 + }, + { + "epoch": 0.93, + "grad_norm": 0.8789704442024231, + "learning_rate": 1.3404925711742734e-07, + "loss": 0.6161, + "step": 14653 + }, + { + "epoch": 0.93, + "grad_norm": 0.8579655885696411, + "learning_rate": 1.3381337893160818e-07, + "loss": 0.5503, + "step": 14654 + }, + { + "epoch": 0.93, + "grad_norm": 0.9106957316398621, + "learning_rate": 1.3357770564398075e-07, + "loss": 0.5624, + "step": 14655 + }, + { + "epoch": 0.93, + "grad_norm": 0.8811336159706116, + "learning_rate": 1.333422372644666e-07, + "loss": 0.5981, + "step": 14656 + }, + { + "epoch": 0.93, + "grad_norm": 0.8811172842979431, + "learning_rate": 1.331069738029811e-07, + "loss": 0.5696, + "step": 14657 + }, + { + "epoch": 0.93, + "grad_norm": 0.8447614908218384, + "learning_rate": 1.3287191526942968e-07, + "loss": 0.6139, + "step": 14658 + }, + { + "epoch": 0.93, + "grad_norm": 0.8404377102851868, + "learning_rate": 1.3263706167371104e-07, + "loss": 0.5405, + "step": 14659 + }, + { + "epoch": 0.93, + "grad_norm": 0.9460970163345337, + "learning_rate": 1.324024130257129e-07, + "loss": 0.6109, + "step": 14660 + }, + { + "epoch": 0.93, + "grad_norm": 0.862629234790802, + "learning_rate": 1.3216796933531672e-07, + "loss": 0.5738, + "step": 14661 + }, + { + "epoch": 0.93, + "grad_norm": 0.8235554695129395, + "learning_rate": 1.31933730612393e-07, + "loss": 0.598, + "step": 14662 + }, + { + "epoch": 0.93, + "grad_norm": 0.8776896595954895, + "learning_rate": 1.316996968668044e-07, + "loss": 0.5908, + "step": 14663 + }, + { + "epoch": 0.93, + "grad_norm": 0.8844308257102966, + "learning_rate": 1.3146586810840745e-07, + "loss": 0.5464, + "step": 14664 + }, + { + "epoch": 0.93, + "grad_norm": 0.9336026310920715, + "learning_rate": 1.312322443470454e-07, + "loss": 0.5776, + "step": 14665 + }, + { + "epoch": 0.93, + "grad_norm": 0.9121221303939819, + "learning_rate": 1.309988255925565e-07, + "loss": 0.5844, + "step": 14666 + }, + { + "epoch": 0.93, + "grad_norm": 0.9228976964950562, + "learning_rate": 1.30765611854769e-07, + "loss": 0.6109, + "step": 14667 + }, + { + "epoch": 0.93, + "grad_norm": 0.9101218581199646, + "learning_rate": 1.305326031435028e-07, + "loss": 0.5677, + "step": 14668 + }, + { + "epoch": 0.93, + "grad_norm": 0.9400882720947266, + "learning_rate": 1.3029979946856953e-07, + "loss": 0.6526, + "step": 14669 + }, + { + "epoch": 0.93, + "grad_norm": 0.8727139830589294, + "learning_rate": 1.3006720083977076e-07, + "loss": 0.5734, + "step": 14670 + }, + { + "epoch": 0.93, + "grad_norm": 0.8939454555511475, + "learning_rate": 1.2983480726690033e-07, + "loss": 0.5467, + "step": 14671 + }, + { + "epoch": 0.93, + "grad_norm": 0.962372899055481, + "learning_rate": 1.296026187597449e-07, + "loss": 0.5609, + "step": 14672 + }, + { + "epoch": 0.93, + "grad_norm": 0.8936379551887512, + "learning_rate": 1.2937063532807992e-07, + "loss": 0.5464, + "step": 14673 + }, + { + "epoch": 0.93, + "grad_norm": 0.8870679140090942, + "learning_rate": 1.2913885698167427e-07, + "loss": 0.5659, + "step": 14674 + }, + { + "epoch": 0.93, + "grad_norm": 0.901860237121582, + "learning_rate": 1.2890728373028626e-07, + "loss": 0.5578, + "step": 14675 + }, + { + "epoch": 0.93, + "grad_norm": 0.8647616505622864, + "learning_rate": 1.2867591558366755e-07, + "loss": 0.5257, + "step": 14676 + }, + { + "epoch": 0.93, + "grad_norm": 0.8566569089889526, + "learning_rate": 1.2844475255156087e-07, + "loss": 0.5479, + "step": 14677 + }, + { + "epoch": 0.93, + "grad_norm": 0.8648780584335327, + "learning_rate": 1.2821379464369732e-07, + "loss": 0.5547, + "step": 14678 + }, + { + "epoch": 0.93, + "grad_norm": 0.9167495369911194, + "learning_rate": 1.2798304186980358e-07, + "loss": 0.5968, + "step": 14679 + }, + { + "epoch": 0.93, + "grad_norm": 0.877565860748291, + "learning_rate": 1.277524942395958e-07, + "loss": 0.5279, + "step": 14680 + }, + { + "epoch": 0.93, + "grad_norm": 0.9053774476051331, + "learning_rate": 1.275221517627806e-07, + "loss": 0.5837, + "step": 14681 + }, + { + "epoch": 0.93, + "grad_norm": 0.8372228145599365, + "learning_rate": 1.2729201444905803e-07, + "loss": 0.5663, + "step": 14682 + }, + { + "epoch": 0.93, + "grad_norm": 0.8383122086524963, + "learning_rate": 1.2706208230811812e-07, + "loss": 0.5338, + "step": 14683 + }, + { + "epoch": 0.93, + "grad_norm": 0.8852533102035522, + "learning_rate": 1.2683235534964088e-07, + "loss": 0.6176, + "step": 14684 + }, + { + "epoch": 0.93, + "grad_norm": 0.9276344180107117, + "learning_rate": 1.2660283358330195e-07, + "loss": 0.5446, + "step": 14685 + }, + { + "epoch": 0.93, + "grad_norm": 0.9432269334793091, + "learning_rate": 1.263735170187641e-07, + "loss": 0.5843, + "step": 14686 + }, + { + "epoch": 0.93, + "grad_norm": 0.8667259812355042, + "learning_rate": 1.26144405665683e-07, + "loss": 0.5467, + "step": 14687 + }, + { + "epoch": 0.93, + "grad_norm": 0.8887820243835449, + "learning_rate": 1.2591549953370586e-07, + "loss": 0.5782, + "step": 14688 + }, + { + "epoch": 0.93, + "grad_norm": 0.908986508846283, + "learning_rate": 1.2568679863247168e-07, + "loss": 0.5866, + "step": 14689 + }, + { + "epoch": 0.93, + "grad_norm": 0.8384619951248169, + "learning_rate": 1.2545830297161e-07, + "loss": 0.5293, + "step": 14690 + }, + { + "epoch": 0.93, + "grad_norm": 0.8378182053565979, + "learning_rate": 1.2523001256074196e-07, + "loss": 0.5163, + "step": 14691 + }, + { + "epoch": 0.93, + "grad_norm": 0.9510812759399414, + "learning_rate": 1.2500192740947936e-07, + "loss": 0.5682, + "step": 14692 + }, + { + "epoch": 0.93, + "grad_norm": 0.8728382587432861, + "learning_rate": 1.2477404752742784e-07, + "loss": 0.5734, + "step": 14693 + }, + { + "epoch": 0.93, + "grad_norm": 0.9147626757621765, + "learning_rate": 1.2454637292418082e-07, + "loss": 0.573, + "step": 14694 + }, + { + "epoch": 0.93, + "grad_norm": 0.9553124308586121, + "learning_rate": 1.2431890360932507e-07, + "loss": 0.5857, + "step": 14695 + }, + { + "epoch": 0.93, + "grad_norm": 0.9232133626937866, + "learning_rate": 1.2409163959244019e-07, + "loss": 0.5843, + "step": 14696 + }, + { + "epoch": 0.93, + "grad_norm": 0.8695604801177979, + "learning_rate": 1.2386458088309296e-07, + "loss": 0.5436, + "step": 14697 + }, + { + "epoch": 0.93, + "grad_norm": 0.9066473245620728, + "learning_rate": 1.2363772749084625e-07, + "loss": 0.5383, + "step": 14698 + }, + { + "epoch": 0.93, + "grad_norm": 0.9268561005592346, + "learning_rate": 1.2341107942525132e-07, + "loss": 0.5507, + "step": 14699 + }, + { + "epoch": 0.93, + "grad_norm": 0.8040413856506348, + "learning_rate": 1.2318463669585112e-07, + "loss": 0.5478, + "step": 14700 + }, + { + "epoch": 0.93, + "grad_norm": 0.8859974145889282, + "learning_rate": 1.229583993121808e-07, + "loss": 0.5666, + "step": 14701 + }, + { + "epoch": 0.93, + "grad_norm": 0.9229069948196411, + "learning_rate": 1.2273236728376604e-07, + "loss": 0.5676, + "step": 14702 + }, + { + "epoch": 0.93, + "grad_norm": 0.853150486946106, + "learning_rate": 1.2250654062012478e-07, + "loss": 0.5378, + "step": 14703 + }, + { + "epoch": 0.93, + "grad_norm": 0.8615385890007019, + "learning_rate": 1.2228091933076613e-07, + "loss": 0.5285, + "step": 14704 + }, + { + "epoch": 0.93, + "grad_norm": 0.8250787854194641, + "learning_rate": 1.2205550342518803e-07, + "loss": 0.5093, + "step": 14705 + }, + { + "epoch": 0.93, + "grad_norm": 0.8962552547454834, + "learning_rate": 1.2183029291288452e-07, + "loss": 0.5964, + "step": 14706 + }, + { + "epoch": 0.93, + "grad_norm": 0.8384519815444946, + "learning_rate": 1.2160528780333803e-07, + "loss": 0.5196, + "step": 14707 + }, + { + "epoch": 0.93, + "grad_norm": 0.8676429986953735, + "learning_rate": 1.2138048810602154e-07, + "loss": 0.5945, + "step": 14708 + }, + { + "epoch": 0.93, + "grad_norm": 0.8354253172874451, + "learning_rate": 1.2115589383040083e-07, + "loss": 0.604, + "step": 14709 + }, + { + "epoch": 0.93, + "grad_norm": 0.9310884475708008, + "learning_rate": 1.2093150498593387e-07, + "loss": 0.5702, + "step": 14710 + }, + { + "epoch": 0.93, + "grad_norm": 0.878158450126648, + "learning_rate": 1.2070732158206754e-07, + "loss": 0.6138, + "step": 14711 + }, + { + "epoch": 0.93, + "grad_norm": 0.9077056050300598, + "learning_rate": 1.2048334362824265e-07, + "loss": 0.6086, + "step": 14712 + }, + { + "epoch": 0.93, + "grad_norm": 0.8881044983863831, + "learning_rate": 1.202595711338894e-07, + "loss": 0.5935, + "step": 14713 + }, + { + "epoch": 0.93, + "grad_norm": 0.8508563041687012, + "learning_rate": 1.2003600410842974e-07, + "loss": 0.5741, + "step": 14714 + }, + { + "epoch": 0.93, + "grad_norm": 0.9420803785324097, + "learning_rate": 1.1981264256127832e-07, + "loss": 0.5942, + "step": 14715 + }, + { + "epoch": 0.93, + "grad_norm": 0.8780478239059448, + "learning_rate": 1.1958948650183988e-07, + "loss": 0.5893, + "step": 14716 + }, + { + "epoch": 0.93, + "grad_norm": 0.8164428472518921, + "learning_rate": 1.1936653593950964e-07, + "loss": 0.5472, + "step": 14717 + }, + { + "epoch": 0.93, + "grad_norm": 0.901099681854248, + "learning_rate": 1.1914379088367677e-07, + "loss": 0.5937, + "step": 14718 + }, + { + "epoch": 0.93, + "grad_norm": 0.8668814897537231, + "learning_rate": 1.1892125134371935e-07, + "loss": 0.5709, + "step": 14719 + }, + { + "epoch": 0.93, + "grad_norm": 0.9474896788597107, + "learning_rate": 1.1869891732900762e-07, + "loss": 0.607, + "step": 14720 + }, + { + "epoch": 0.93, + "grad_norm": 0.9030824303627014, + "learning_rate": 1.1847678884890467e-07, + "loss": 0.5238, + "step": 14721 + }, + { + "epoch": 0.93, + "grad_norm": 0.81502366065979, + "learning_rate": 1.1825486591276136e-07, + "loss": 0.5206, + "step": 14722 + }, + { + "epoch": 0.93, + "grad_norm": 0.9036549925804138, + "learning_rate": 1.1803314852992409e-07, + "loss": 0.6143, + "step": 14723 + }, + { + "epoch": 0.93, + "grad_norm": 0.8753872513771057, + "learning_rate": 1.1781163670972762e-07, + "loss": 0.571, + "step": 14724 + }, + { + "epoch": 0.93, + "grad_norm": 0.9172664284706116, + "learning_rate": 1.1759033046149948e-07, + "loss": 0.5988, + "step": 14725 + }, + { + "epoch": 0.93, + "grad_norm": 0.8600670695304871, + "learning_rate": 1.1736922979455778e-07, + "loss": 0.6114, + "step": 14726 + }, + { + "epoch": 0.93, + "grad_norm": 0.9102545380592346, + "learning_rate": 1.1714833471821175e-07, + "loss": 0.612, + "step": 14727 + }, + { + "epoch": 0.93, + "grad_norm": 0.8528123497962952, + "learning_rate": 1.1692764524176337e-07, + "loss": 0.5978, + "step": 14728 + }, + { + "epoch": 0.93, + "grad_norm": 0.8206274509429932, + "learning_rate": 1.1670716137450577e-07, + "loss": 0.5306, + "step": 14729 + }, + { + "epoch": 0.93, + "grad_norm": 0.8492806553840637, + "learning_rate": 1.1648688312572099e-07, + "loss": 0.5431, + "step": 14730 + }, + { + "epoch": 0.93, + "grad_norm": 0.9542714357376099, + "learning_rate": 1.1626681050468492e-07, + "loss": 0.5932, + "step": 14731 + }, + { + "epoch": 0.93, + "grad_norm": 0.9290028810501099, + "learning_rate": 1.1604694352066459e-07, + "loss": 0.6256, + "step": 14732 + }, + { + "epoch": 0.93, + "grad_norm": 0.8055222034454346, + "learning_rate": 1.1582728218291761e-07, + "loss": 0.5609, + "step": 14733 + }, + { + "epoch": 0.93, + "grad_norm": 0.874623715877533, + "learning_rate": 1.1560782650069269e-07, + "loss": 0.5352, + "step": 14734 + }, + { + "epoch": 0.93, + "grad_norm": 0.9114512205123901, + "learning_rate": 1.153885764832302e-07, + "loss": 0.5984, + "step": 14735 + }, + { + "epoch": 0.93, + "grad_norm": 0.9033612608909607, + "learning_rate": 1.1516953213976278e-07, + "loss": 0.602, + "step": 14736 + }, + { + "epoch": 0.93, + "grad_norm": 0.905761182308197, + "learning_rate": 1.1495069347951416e-07, + "loss": 0.573, + "step": 14737 + }, + { + "epoch": 0.93, + "grad_norm": 0.9094721078872681, + "learning_rate": 1.1473206051169694e-07, + "loss": 0.6089, + "step": 14738 + }, + { + "epoch": 0.93, + "grad_norm": 0.875142514705658, + "learning_rate": 1.1451363324551822e-07, + "loss": 0.5662, + "step": 14739 + }, + { + "epoch": 0.93, + "grad_norm": 0.9052537679672241, + "learning_rate": 1.1429541169017511e-07, + "loss": 0.5498, + "step": 14740 + }, + { + "epoch": 0.93, + "grad_norm": 0.9127451777458191, + "learning_rate": 1.1407739585485633e-07, + "loss": 0.5891, + "step": 14741 + }, + { + "epoch": 0.93, + "grad_norm": 0.9252974390983582, + "learning_rate": 1.1385958574874178e-07, + "loss": 0.5736, + "step": 14742 + }, + { + "epoch": 0.93, + "grad_norm": 0.9759803414344788, + "learning_rate": 1.1364198138100191e-07, + "loss": 0.64, + "step": 14743 + }, + { + "epoch": 0.93, + "grad_norm": 1.012252926826477, + "learning_rate": 1.1342458276079937e-07, + "loss": 0.6271, + "step": 14744 + }, + { + "epoch": 0.93, + "grad_norm": 0.8845311403274536, + "learning_rate": 1.1320738989728963e-07, + "loss": 0.5, + "step": 14745 + }, + { + "epoch": 0.93, + "grad_norm": 0.7984757423400879, + "learning_rate": 1.1299040279961593e-07, + "loss": 0.516, + "step": 14746 + }, + { + "epoch": 0.93, + "grad_norm": 0.8785191774368286, + "learning_rate": 1.1277362147691595e-07, + "loss": 0.5672, + "step": 14747 + }, + { + "epoch": 0.93, + "grad_norm": 0.8724797368049622, + "learning_rate": 1.125570459383174e-07, + "loss": 0.5881, + "step": 14748 + }, + { + "epoch": 0.93, + "grad_norm": 0.8874450325965881, + "learning_rate": 1.1234067619293909e-07, + "loss": 0.5522, + "step": 14749 + }, + { + "epoch": 0.93, + "grad_norm": 0.9328641891479492, + "learning_rate": 1.1212451224989262e-07, + "loss": 0.5661, + "step": 14750 + }, + { + "epoch": 0.93, + "grad_norm": 0.9419904351234436, + "learning_rate": 1.1190855411827906e-07, + "loss": 0.5788, + "step": 14751 + }, + { + "epoch": 0.93, + "grad_norm": 0.945692241191864, + "learning_rate": 1.1169280180719111e-07, + "loss": 0.5635, + "step": 14752 + }, + { + "epoch": 0.93, + "grad_norm": 0.8278078436851501, + "learning_rate": 1.114772553257154e-07, + "loss": 0.5664, + "step": 14753 + }, + { + "epoch": 0.93, + "grad_norm": 0.8511500358581543, + "learning_rate": 1.1126191468292579e-07, + "loss": 0.5296, + "step": 14754 + }, + { + "epoch": 0.93, + "grad_norm": 0.921492338180542, + "learning_rate": 1.1104677988789004e-07, + "loss": 0.6163, + "step": 14755 + }, + { + "epoch": 0.93, + "grad_norm": 0.8180157542228699, + "learning_rate": 1.1083185094966753e-07, + "loss": 0.519, + "step": 14756 + }, + { + "epoch": 0.93, + "grad_norm": 0.9290024638175964, + "learning_rate": 1.1061712787730716e-07, + "loss": 0.5699, + "step": 14757 + }, + { + "epoch": 0.93, + "grad_norm": 0.8530245423316956, + "learning_rate": 1.1040261067985114e-07, + "loss": 0.5732, + "step": 14758 + }, + { + "epoch": 0.94, + "grad_norm": 0.8908236026763916, + "learning_rate": 1.1018829936633113e-07, + "loss": 0.6248, + "step": 14759 + }, + { + "epoch": 0.94, + "grad_norm": 0.9305959939956665, + "learning_rate": 1.0997419394577158e-07, + "loss": 0.6109, + "step": 14760 + }, + { + "epoch": 0.94, + "grad_norm": 0.8903090357780457, + "learning_rate": 1.0976029442718694e-07, + "loss": 0.5836, + "step": 14761 + }, + { + "epoch": 0.94, + "grad_norm": 0.9302195310592651, + "learning_rate": 1.0954660081958502e-07, + "loss": 0.5357, + "step": 14762 + }, + { + "epoch": 0.94, + "grad_norm": 0.9230768084526062, + "learning_rate": 1.0933311313196304e-07, + "loss": 0.6302, + "step": 14763 + }, + { + "epoch": 0.94, + "grad_norm": 0.876150906085968, + "learning_rate": 1.091198313733105e-07, + "loss": 0.5398, + "step": 14764 + }, + { + "epoch": 0.94, + "grad_norm": 0.8719222545623779, + "learning_rate": 1.0890675555260688e-07, + "loss": 0.5634, + "step": 14765 + }, + { + "epoch": 0.94, + "grad_norm": 0.8793936967849731, + "learning_rate": 1.086938856788261e-07, + "loss": 0.558, + "step": 14766 + }, + { + "epoch": 0.94, + "grad_norm": 0.9724277257919312, + "learning_rate": 1.0848122176092935e-07, + "loss": 0.5401, + "step": 14767 + }, + { + "epoch": 0.94, + "grad_norm": 0.8842143416404724, + "learning_rate": 1.0826876380787221e-07, + "loss": 0.5905, + "step": 14768 + }, + { + "epoch": 0.94, + "grad_norm": 0.8512188196182251, + "learning_rate": 1.0805651182860033e-07, + "loss": 0.5681, + "step": 14769 + }, + { + "epoch": 0.94, + "grad_norm": 0.8788979649543762, + "learning_rate": 1.0784446583205099e-07, + "loss": 0.5593, + "step": 14770 + }, + { + "epoch": 0.94, + "grad_norm": 0.9469314217567444, + "learning_rate": 1.0763262582715206e-07, + "loss": 0.6131, + "step": 14771 + }, + { + "epoch": 0.94, + "grad_norm": 0.8478160500526428, + "learning_rate": 1.0742099182282529e-07, + "loss": 0.5218, + "step": 14772 + }, + { + "epoch": 0.94, + "grad_norm": 0.8808424472808838, + "learning_rate": 1.0720956382797965e-07, + "loss": 0.5471, + "step": 14773 + }, + { + "epoch": 0.94, + "grad_norm": 0.9337725639343262, + "learning_rate": 1.0699834185151802e-07, + "loss": 0.5696, + "step": 14774 + }, + { + "epoch": 0.94, + "grad_norm": 0.9188077449798584, + "learning_rate": 1.0678732590233553e-07, + "loss": 0.5836, + "step": 14775 + }, + { + "epoch": 0.94, + "grad_norm": 0.8440690040588379, + "learning_rate": 1.0657651598931563e-07, + "loss": 0.5362, + "step": 14776 + }, + { + "epoch": 0.94, + "grad_norm": 0.886298418045044, + "learning_rate": 1.0636591212133673e-07, + "loss": 0.5703, + "step": 14777 + }, + { + "epoch": 0.94, + "grad_norm": 0.8677876591682434, + "learning_rate": 1.0615551430726456e-07, + "loss": 0.581, + "step": 14778 + }, + { + "epoch": 0.94, + "grad_norm": 0.8505701422691345, + "learning_rate": 1.0594532255595979e-07, + "loss": 0.554, + "step": 14779 + }, + { + "epoch": 0.94, + "grad_norm": 0.8900310397148132, + "learning_rate": 1.0573533687627258e-07, + "loss": 0.5679, + "step": 14780 + }, + { + "epoch": 0.94, + "grad_norm": 0.7801492214202881, + "learning_rate": 1.0552555727704417e-07, + "loss": 0.5483, + "step": 14781 + }, + { + "epoch": 0.94, + "grad_norm": 0.9094178676605225, + "learning_rate": 1.053159837671075e-07, + "loss": 0.5976, + "step": 14782 + }, + { + "epoch": 0.94, + "grad_norm": 0.8431292772293091, + "learning_rate": 1.0510661635528774e-07, + "loss": 0.5384, + "step": 14783 + }, + { + "epoch": 0.94, + "grad_norm": 0.9035899639129639, + "learning_rate": 1.0489745505040006e-07, + "loss": 0.5172, + "step": 14784 + }, + { + "epoch": 0.94, + "grad_norm": 0.849152147769928, + "learning_rate": 1.0468849986125185e-07, + "loss": 0.5678, + "step": 14785 + }, + { + "epoch": 0.94, + "grad_norm": 0.8912017941474915, + "learning_rate": 1.0447975079664163e-07, + "loss": 0.556, + "step": 14786 + }, + { + "epoch": 0.94, + "grad_norm": 0.87160325050354, + "learning_rate": 1.042712078653585e-07, + "loss": 0.5773, + "step": 14787 + }, + { + "epoch": 0.94, + "grad_norm": 0.8713656067848206, + "learning_rate": 1.0406287107618429e-07, + "loss": 0.5917, + "step": 14788 + }, + { + "epoch": 0.94, + "grad_norm": 0.8525165319442749, + "learning_rate": 1.0385474043789034e-07, + "loss": 0.5386, + "step": 14789 + }, + { + "epoch": 0.94, + "grad_norm": 0.8784705996513367, + "learning_rate": 1.0364681595924131e-07, + "loss": 0.5234, + "step": 14790 + }, + { + "epoch": 0.94, + "grad_norm": 0.8679460287094116, + "learning_rate": 1.034390976489913e-07, + "loss": 0.5367, + "step": 14791 + }, + { + "epoch": 0.94, + "grad_norm": 0.8518469929695129, + "learning_rate": 1.0323158551588663e-07, + "loss": 0.5427, + "step": 14792 + }, + { + "epoch": 0.94, + "grad_norm": 0.8853098750114441, + "learning_rate": 1.030242795686659e-07, + "loss": 0.5349, + "step": 14793 + }, + { + "epoch": 0.94, + "grad_norm": 0.9167693257331848, + "learning_rate": 1.0281717981605765e-07, + "loss": 0.5876, + "step": 14794 + }, + { + "epoch": 0.94, + "grad_norm": 0.8666211366653442, + "learning_rate": 1.0261028626678104e-07, + "loss": 0.491, + "step": 14795 + }, + { + "epoch": 0.94, + "grad_norm": 0.8543499708175659, + "learning_rate": 1.024035989295491e-07, + "loss": 0.5786, + "step": 14796 + }, + { + "epoch": 0.94, + "grad_norm": 0.9377774000167847, + "learning_rate": 1.0219711781306374e-07, + "loss": 0.6073, + "step": 14797 + }, + { + "epoch": 0.94, + "grad_norm": 0.9474400877952576, + "learning_rate": 1.0199084292602024e-07, + "loss": 0.5722, + "step": 14798 + }, + { + "epoch": 0.94, + "grad_norm": 0.9373930096626282, + "learning_rate": 1.0178477427710276e-07, + "loss": 0.6303, + "step": 14799 + }, + { + "epoch": 0.94, + "grad_norm": 0.9386447668075562, + "learning_rate": 1.015789118749888e-07, + "loss": 0.5895, + "step": 14800 + }, + { + "epoch": 0.94, + "grad_norm": 0.8682166337966919, + "learning_rate": 1.0137325572834644e-07, + "loss": 0.5264, + "step": 14801 + }, + { + "epoch": 0.94, + "grad_norm": 0.9619151949882507, + "learning_rate": 1.0116780584583596e-07, + "loss": 0.5927, + "step": 14802 + }, + { + "epoch": 0.94, + "grad_norm": 0.8697635531425476, + "learning_rate": 1.0096256223610657e-07, + "loss": 0.5474, + "step": 14803 + }, + { + "epoch": 0.94, + "grad_norm": 0.9294276833534241, + "learning_rate": 1.0075752490780133e-07, + "loss": 0.6086, + "step": 14804 + }, + { + "epoch": 0.94, + "grad_norm": 0.8718865513801575, + "learning_rate": 1.0055269386955391e-07, + "loss": 0.5709, + "step": 14805 + }, + { + "epoch": 0.94, + "grad_norm": 0.8805193305015564, + "learning_rate": 1.0034806912998796e-07, + "loss": 0.5771, + "step": 14806 + }, + { + "epoch": 0.94, + "grad_norm": 0.8892708420753479, + "learning_rate": 1.0014365069772102e-07, + "loss": 0.5927, + "step": 14807 + }, + { + "epoch": 0.94, + "grad_norm": 0.878617525100708, + "learning_rate": 9.993943858135846e-08, + "loss": 0.632, + "step": 14808 + }, + { + "epoch": 0.94, + "grad_norm": 0.9322656393051147, + "learning_rate": 9.973543278950115e-08, + "loss": 0.5679, + "step": 14809 + }, + { + "epoch": 0.94, + "grad_norm": 1.0550402402877808, + "learning_rate": 9.953163333073779e-08, + "loss": 0.6446, + "step": 14810 + }, + { + "epoch": 0.94, + "grad_norm": 0.8860265016555786, + "learning_rate": 9.932804021364928e-08, + "loss": 0.5912, + "step": 14811 + }, + { + "epoch": 0.94, + "grad_norm": 0.9326887726783752, + "learning_rate": 9.912465344680933e-08, + "loss": 0.5793, + "step": 14812 + }, + { + "epoch": 0.94, + "grad_norm": 0.7893259525299072, + "learning_rate": 9.892147303878108e-08, + "loss": 0.4661, + "step": 14813 + }, + { + "epoch": 0.94, + "grad_norm": 0.8492047786712646, + "learning_rate": 9.871849899811991e-08, + "loss": 0.5652, + "step": 14814 + }, + { + "epoch": 0.94, + "grad_norm": 0.9137168526649475, + "learning_rate": 9.851573133337288e-08, + "loss": 0.6163, + "step": 14815 + }, + { + "epoch": 0.94, + "grad_norm": 0.8527875542640686, + "learning_rate": 9.83131700530765e-08, + "loss": 0.5876, + "step": 14816 + }, + { + "epoch": 0.94, + "grad_norm": 0.9483136534690857, + "learning_rate": 9.81108151657617e-08, + "loss": 0.5822, + "step": 14817 + }, + { + "epoch": 0.94, + "grad_norm": 0.9227954149246216, + "learning_rate": 9.790866667994781e-08, + "loss": 0.589, + "step": 14818 + }, + { + "epoch": 0.94, + "grad_norm": 0.9560403823852539, + "learning_rate": 9.770672460414688e-08, + "loss": 0.6438, + "step": 14819 + }, + { + "epoch": 0.94, + "grad_norm": 0.9232782125473022, + "learning_rate": 9.750498894686156e-08, + "loss": 0.5775, + "step": 14820 + }, + { + "epoch": 0.94, + "grad_norm": 0.9010851383209229, + "learning_rate": 9.730345971658728e-08, + "loss": 0.5361, + "step": 14821 + }, + { + "epoch": 0.94, + "grad_norm": 0.8730600476264954, + "learning_rate": 9.710213692180836e-08, + "loss": 0.5925, + "step": 14822 + }, + { + "epoch": 0.94, + "grad_norm": 0.9010719060897827, + "learning_rate": 9.690102057100304e-08, + "loss": 0.5806, + "step": 14823 + }, + { + "epoch": 0.94, + "grad_norm": 0.8896587491035461, + "learning_rate": 9.670011067263896e-08, + "loss": 0.538, + "step": 14824 + }, + { + "epoch": 0.94, + "grad_norm": 0.8645528554916382, + "learning_rate": 9.649940723517549e-08, + "loss": 0.5407, + "step": 14825 + }, + { + "epoch": 0.94, + "grad_norm": 0.9243733286857605, + "learning_rate": 9.629891026706472e-08, + "loss": 0.5227, + "step": 14826 + }, + { + "epoch": 0.94, + "grad_norm": 0.8660601377487183, + "learning_rate": 9.609861977674773e-08, + "loss": 0.5238, + "step": 14827 + }, + { + "epoch": 0.94, + "grad_norm": 0.8025797605514526, + "learning_rate": 9.589853577265829e-08, + "loss": 0.5363, + "step": 14828 + }, + { + "epoch": 0.94, + "grad_norm": 0.8957266807556152, + "learning_rate": 9.569865826322133e-08, + "loss": 0.5925, + "step": 14829 + }, + { + "epoch": 0.94, + "grad_norm": 0.8849166035652161, + "learning_rate": 9.549898725685291e-08, + "loss": 0.5775, + "step": 14830 + }, + { + "epoch": 0.94, + "grad_norm": 0.8966931700706482, + "learning_rate": 9.52995227619613e-08, + "loss": 0.5489, + "step": 14831 + }, + { + "epoch": 0.94, + "grad_norm": 0.8650779724121094, + "learning_rate": 9.510026478694423e-08, + "loss": 0.5734, + "step": 14832 + }, + { + "epoch": 0.94, + "grad_norm": 0.799005925655365, + "learning_rate": 9.49012133401922e-08, + "loss": 0.5848, + "step": 14833 + }, + { + "epoch": 0.94, + "grad_norm": 0.8896704912185669, + "learning_rate": 9.47023684300863e-08, + "loss": 0.6051, + "step": 14834 + }, + { + "epoch": 0.94, + "grad_norm": 0.9791715145111084, + "learning_rate": 9.450373006499924e-08, + "loss": 0.5622, + "step": 14835 + }, + { + "epoch": 0.94, + "grad_norm": 0.9830961227416992, + "learning_rate": 9.430529825329492e-08, + "loss": 0.5459, + "step": 14836 + }, + { + "epoch": 0.94, + "grad_norm": 0.8603661060333252, + "learning_rate": 9.410707300333e-08, + "loss": 0.6293, + "step": 14837 + }, + { + "epoch": 0.94, + "grad_norm": 0.8368241190910339, + "learning_rate": 9.390905432344833e-08, + "loss": 0.5623, + "step": 14838 + }, + { + "epoch": 0.94, + "grad_norm": 0.9664581418037415, + "learning_rate": 9.371124222199046e-08, + "loss": 0.5745, + "step": 14839 + }, + { + "epoch": 0.94, + "grad_norm": 0.9014714360237122, + "learning_rate": 9.35136367072842e-08, + "loss": 0.5557, + "step": 14840 + }, + { + "epoch": 0.94, + "grad_norm": 0.9360548257827759, + "learning_rate": 9.331623778765009e-08, + "loss": 0.5281, + "step": 14841 + }, + { + "epoch": 0.94, + "grad_norm": 0.938177764415741, + "learning_rate": 9.311904547139982e-08, + "loss": 0.5604, + "step": 14842 + }, + { + "epoch": 0.94, + "grad_norm": 0.9227628707885742, + "learning_rate": 9.292205976683733e-08, + "loss": 0.5932, + "step": 14843 + }, + { + "epoch": 0.94, + "grad_norm": 0.8790847063064575, + "learning_rate": 9.272528068225595e-08, + "loss": 0.5774, + "step": 14844 + }, + { + "epoch": 0.94, + "grad_norm": 0.8632011413574219, + "learning_rate": 9.252870822594239e-08, + "loss": 0.574, + "step": 14845 + }, + { + "epoch": 0.94, + "grad_norm": 0.8468140959739685, + "learning_rate": 9.233234240617228e-08, + "loss": 0.5549, + "step": 14846 + }, + { + "epoch": 0.94, + "grad_norm": 0.878075897693634, + "learning_rate": 9.213618323121564e-08, + "loss": 0.589, + "step": 14847 + }, + { + "epoch": 0.94, + "grad_norm": 0.8827310800552368, + "learning_rate": 9.19402307093309e-08, + "loss": 0.5678, + "step": 14848 + }, + { + "epoch": 0.94, + "grad_norm": 0.9109396934509277, + "learning_rate": 9.174448484876864e-08, + "loss": 0.5426, + "step": 14849 + }, + { + "epoch": 0.94, + "grad_norm": 0.8124753832817078, + "learning_rate": 9.154894565777173e-08, + "loss": 0.486, + "step": 14850 + }, + { + "epoch": 0.94, + "grad_norm": 0.9251282215118408, + "learning_rate": 9.135361314457358e-08, + "loss": 0.5975, + "step": 14851 + }, + { + "epoch": 0.94, + "grad_norm": 0.8516286015510559, + "learning_rate": 9.115848731739874e-08, + "loss": 0.5802, + "step": 14852 + }, + { + "epoch": 0.94, + "grad_norm": 0.9649109244346619, + "learning_rate": 9.096356818446395e-08, + "loss": 0.6226, + "step": 14853 + }, + { + "epoch": 0.94, + "grad_norm": 0.8949142694473267, + "learning_rate": 9.076885575397543e-08, + "loss": 0.5577, + "step": 14854 + }, + { + "epoch": 0.94, + "grad_norm": 0.8870638012886047, + "learning_rate": 9.057435003413273e-08, + "loss": 0.619, + "step": 14855 + }, + { + "epoch": 0.94, + "grad_norm": 0.911157488822937, + "learning_rate": 9.038005103312486e-08, + "loss": 0.5763, + "step": 14856 + }, + { + "epoch": 0.94, + "grad_norm": 0.9567490816116333, + "learning_rate": 9.018595875913416e-08, + "loss": 0.5394, + "step": 14857 + }, + { + "epoch": 0.94, + "grad_norm": 0.9272708296775818, + "learning_rate": 8.999207322033299e-08, + "loss": 0.5666, + "step": 14858 + }, + { + "epoch": 0.94, + "grad_norm": 0.8706281781196594, + "learning_rate": 8.979839442488425e-08, + "loss": 0.5775, + "step": 14859 + }, + { + "epoch": 0.94, + "grad_norm": 0.8722688555717468, + "learning_rate": 8.960492238094421e-08, + "loss": 0.511, + "step": 14860 + }, + { + "epoch": 0.94, + "grad_norm": 0.8723371624946594, + "learning_rate": 8.941165709665966e-08, + "loss": 0.5932, + "step": 14861 + }, + { + "epoch": 0.94, + "grad_norm": 0.8604128360748291, + "learning_rate": 8.921859858016635e-08, + "loss": 0.5499, + "step": 14862 + }, + { + "epoch": 0.94, + "grad_norm": 0.9421664476394653, + "learning_rate": 8.902574683959442e-08, + "loss": 0.5585, + "step": 14863 + }, + { + "epoch": 0.94, + "grad_norm": 0.8716034889221191, + "learning_rate": 8.883310188306515e-08, + "loss": 0.5443, + "step": 14864 + }, + { + "epoch": 0.94, + "grad_norm": 0.9441227316856384, + "learning_rate": 8.864066371868873e-08, + "loss": 0.6073, + "step": 14865 + }, + { + "epoch": 0.94, + "grad_norm": 0.925212562084198, + "learning_rate": 8.844843235456868e-08, + "loss": 0.5865, + "step": 14866 + }, + { + "epoch": 0.94, + "grad_norm": 0.8931495547294617, + "learning_rate": 8.825640779879962e-08, + "loss": 0.516, + "step": 14867 + }, + { + "epoch": 0.94, + "grad_norm": 0.8108246326446533, + "learning_rate": 8.806459005946565e-08, + "loss": 0.5725, + "step": 14868 + }, + { + "epoch": 0.94, + "grad_norm": 0.92397141456604, + "learning_rate": 8.787297914464533e-08, + "loss": 0.5473, + "step": 14869 + }, + { + "epoch": 0.94, + "grad_norm": 0.9079901576042175, + "learning_rate": 8.768157506240494e-08, + "loss": 0.616, + "step": 14870 + }, + { + "epoch": 0.94, + "grad_norm": 0.9255422353744507, + "learning_rate": 8.749037782080528e-08, + "loss": 0.6009, + "step": 14871 + }, + { + "epoch": 0.94, + "grad_norm": 0.8876083493232727, + "learning_rate": 8.729938742789601e-08, + "loss": 0.5668, + "step": 14872 + }, + { + "epoch": 0.94, + "grad_norm": 0.8934264183044434, + "learning_rate": 8.71086038917196e-08, + "loss": 0.6096, + "step": 14873 + }, + { + "epoch": 0.94, + "grad_norm": 0.9331870079040527, + "learning_rate": 8.691802722030906e-08, + "loss": 0.5824, + "step": 14874 + }, + { + "epoch": 0.94, + "grad_norm": 0.8458074927330017, + "learning_rate": 8.672765742168964e-08, + "loss": 0.5393, + "step": 14875 + }, + { + "epoch": 0.94, + "grad_norm": 0.9242926836013794, + "learning_rate": 8.65374945038755e-08, + "loss": 0.5651, + "step": 14876 + }, + { + "epoch": 0.94, + "grad_norm": 0.9075822830200195, + "learning_rate": 8.634753847487575e-08, + "loss": 0.6445, + "step": 14877 + }, + { + "epoch": 0.94, + "grad_norm": 0.8882941603660583, + "learning_rate": 8.615778934268793e-08, + "loss": 0.6056, + "step": 14878 + }, + { + "epoch": 0.94, + "grad_norm": 0.8262979984283447, + "learning_rate": 8.59682471153006e-08, + "loss": 0.5354, + "step": 14879 + }, + { + "epoch": 0.94, + "grad_norm": 0.8280760645866394, + "learning_rate": 8.577891180069687e-08, + "loss": 0.5311, + "step": 14880 + }, + { + "epoch": 0.94, + "grad_norm": 0.9035456776618958, + "learning_rate": 8.558978340684642e-08, + "loss": 0.572, + "step": 14881 + }, + { + "epoch": 0.94, + "grad_norm": 0.8470786213874817, + "learning_rate": 8.540086194171515e-08, + "loss": 0.5889, + "step": 14882 + }, + { + "epoch": 0.94, + "grad_norm": 0.8594496250152588, + "learning_rate": 8.521214741325722e-08, + "loss": 0.5709, + "step": 14883 + }, + { + "epoch": 0.94, + "grad_norm": 0.8812367916107178, + "learning_rate": 8.502363982941797e-08, + "loss": 0.5249, + "step": 14884 + }, + { + "epoch": 0.94, + "grad_norm": 0.8610761165618896, + "learning_rate": 8.483533919813546e-08, + "loss": 0.5788, + "step": 14885 + }, + { + "epoch": 0.94, + "grad_norm": 0.882064700126648, + "learning_rate": 8.464724552733782e-08, + "loss": 0.6047, + "step": 14886 + }, + { + "epoch": 0.94, + "grad_norm": 0.9064013361930847, + "learning_rate": 8.445935882494593e-08, + "loss": 0.5604, + "step": 14887 + }, + { + "epoch": 0.94, + "grad_norm": 0.9299684166908264, + "learning_rate": 8.427167909887069e-08, + "loss": 0.5411, + "step": 14888 + }, + { + "epoch": 0.94, + "grad_norm": 0.9375229477882385, + "learning_rate": 8.408420635701353e-08, + "loss": 0.5626, + "step": 14889 + }, + { + "epoch": 0.94, + "grad_norm": 0.9137569665908813, + "learning_rate": 8.389694060726927e-08, + "loss": 0.6039, + "step": 14890 + }, + { + "epoch": 0.94, + "grad_norm": 0.9540867209434509, + "learning_rate": 8.370988185752383e-08, + "loss": 0.631, + "step": 14891 + }, + { + "epoch": 0.94, + "grad_norm": 0.8793188333511353, + "learning_rate": 8.352303011565254e-08, + "loss": 0.5763, + "step": 14892 + }, + { + "epoch": 0.94, + "grad_norm": 0.880684494972229, + "learning_rate": 8.333638538952305e-08, + "loss": 0.5755, + "step": 14893 + }, + { + "epoch": 0.94, + "grad_norm": 0.8908638954162598, + "learning_rate": 8.314994768699458e-08, + "loss": 0.5873, + "step": 14894 + }, + { + "epoch": 0.94, + "grad_norm": 0.9388841986656189, + "learning_rate": 8.296371701591699e-08, + "loss": 0.5329, + "step": 14895 + }, + { + "epoch": 0.94, + "grad_norm": 0.8881575465202332, + "learning_rate": 8.277769338413288e-08, + "loss": 0.6017, + "step": 14896 + }, + { + "epoch": 0.94, + "grad_norm": 0.8779671788215637, + "learning_rate": 8.259187679947434e-08, + "loss": 0.5545, + "step": 14897 + }, + { + "epoch": 0.94, + "grad_norm": 0.874380350112915, + "learning_rate": 8.240626726976453e-08, + "loss": 0.5587, + "step": 14898 + }, + { + "epoch": 0.94, + "grad_norm": 0.9045870900154114, + "learning_rate": 8.222086480282054e-08, + "loss": 0.5667, + "step": 14899 + }, + { + "epoch": 0.94, + "grad_norm": 0.9012387990951538, + "learning_rate": 8.20356694064478e-08, + "loss": 0.6325, + "step": 14900 + }, + { + "epoch": 0.94, + "grad_norm": 0.8285881280899048, + "learning_rate": 8.185068108844507e-08, + "loss": 0.5491, + "step": 14901 + }, + { + "epoch": 0.94, + "grad_norm": 0.8597615361213684, + "learning_rate": 8.166589985660056e-08, + "loss": 0.5429, + "step": 14902 + }, + { + "epoch": 0.94, + "grad_norm": 0.9608265161514282, + "learning_rate": 8.148132571869582e-08, + "loss": 0.6108, + "step": 14903 + }, + { + "epoch": 0.94, + "grad_norm": 0.8628665208816528, + "learning_rate": 8.129695868250242e-08, + "loss": 0.5564, + "step": 14904 + }, + { + "epoch": 0.94, + "grad_norm": 0.8341482877731323, + "learning_rate": 8.111279875578304e-08, + "loss": 0.5658, + "step": 14905 + }, + { + "epoch": 0.94, + "grad_norm": 0.8764296770095825, + "learning_rate": 8.092884594629147e-08, + "loss": 0.582, + "step": 14906 + }, + { + "epoch": 0.94, + "grad_norm": 0.8712512254714966, + "learning_rate": 8.074510026177485e-08, + "loss": 0.5598, + "step": 14907 + }, + { + "epoch": 0.94, + "grad_norm": 1.110312581062317, + "learning_rate": 8.056156170996866e-08, + "loss": 0.6074, + "step": 14908 + }, + { + "epoch": 0.94, + "grad_norm": 0.8867812156677246, + "learning_rate": 8.03782302986017e-08, + "loss": 0.5814, + "step": 14909 + }, + { + "epoch": 0.94, + "grad_norm": 0.9579918384552002, + "learning_rate": 8.019510603539338e-08, + "loss": 0.5672, + "step": 14910 + }, + { + "epoch": 0.94, + "grad_norm": 0.8660980463027954, + "learning_rate": 8.001218892805474e-08, + "loss": 0.569, + "step": 14911 + }, + { + "epoch": 0.94, + "grad_norm": 0.9106853604316711, + "learning_rate": 7.982947898428739e-08, + "loss": 0.577, + "step": 14912 + }, + { + "epoch": 0.94, + "grad_norm": 0.8973606824874878, + "learning_rate": 7.964697621178463e-08, + "loss": 0.623, + "step": 14913 + }, + { + "epoch": 0.94, + "grad_norm": 0.8993417024612427, + "learning_rate": 7.946468061823031e-08, + "loss": 0.5553, + "step": 14914 + }, + { + "epoch": 0.94, + "grad_norm": 0.9079226851463318, + "learning_rate": 7.928259221130163e-08, + "loss": 0.5785, + "step": 14915 + }, + { + "epoch": 0.95, + "grad_norm": 0.8791465759277344, + "learning_rate": 7.910071099866523e-08, + "loss": 0.6139, + "step": 14916 + }, + { + "epoch": 0.95, + "grad_norm": 0.8370904326438904, + "learning_rate": 7.891903698797886e-08, + "loss": 0.5087, + "step": 14917 + }, + { + "epoch": 0.95, + "grad_norm": 0.8958890438079834, + "learning_rate": 7.87375701868931e-08, + "loss": 0.5871, + "step": 14918 + }, + { + "epoch": 0.95, + "grad_norm": 0.863865315914154, + "learning_rate": 7.855631060304792e-08, + "loss": 0.5092, + "step": 14919 + }, + { + "epoch": 0.95, + "grad_norm": 0.8901463747024536, + "learning_rate": 7.837525824407665e-08, + "loss": 0.5633, + "step": 14920 + }, + { + "epoch": 0.95, + "grad_norm": 0.8937858939170837, + "learning_rate": 7.819441311760156e-08, + "loss": 0.5461, + "step": 14921 + }, + { + "epoch": 0.95, + "grad_norm": 0.9540120363235474, + "learning_rate": 7.801377523123877e-08, + "loss": 0.6248, + "step": 14922 + }, + { + "epoch": 0.95, + "grad_norm": 0.8416619300842285, + "learning_rate": 7.783334459259273e-08, + "loss": 0.5367, + "step": 14923 + }, + { + "epoch": 0.95, + "grad_norm": 0.9118484854698181, + "learning_rate": 7.765312120926182e-08, + "loss": 0.6225, + "step": 14924 + }, + { + "epoch": 0.95, + "grad_norm": 0.8478346467018127, + "learning_rate": 7.747310508883444e-08, + "loss": 0.5574, + "step": 14925 + }, + { + "epoch": 0.95, + "grad_norm": 0.8656757473945618, + "learning_rate": 7.729329623889114e-08, + "loss": 0.5149, + "step": 14926 + }, + { + "epoch": 0.95, + "grad_norm": 0.9110966920852661, + "learning_rate": 7.711369466700147e-08, + "loss": 0.6383, + "step": 14927 + }, + { + "epoch": 0.95, + "grad_norm": 0.8306471109390259, + "learning_rate": 7.693430038072824e-08, + "loss": 0.5397, + "step": 14928 + }, + { + "epoch": 0.95, + "grad_norm": 0.8142772316932678, + "learning_rate": 7.675511338762654e-08, + "loss": 0.481, + "step": 14929 + }, + { + "epoch": 0.95, + "grad_norm": 0.8677499890327454, + "learning_rate": 7.657613369523975e-08, + "loss": 0.5712, + "step": 14930 + }, + { + "epoch": 0.95, + "grad_norm": 0.8763403296470642, + "learning_rate": 7.639736131110465e-08, + "loss": 0.581, + "step": 14931 + }, + { + "epoch": 0.95, + "grad_norm": 1.208530068397522, + "learning_rate": 7.621879624274853e-08, + "loss": 0.5727, + "step": 14932 + }, + { + "epoch": 0.95, + "grad_norm": 0.8692548274993896, + "learning_rate": 7.604043849769094e-08, + "loss": 0.5459, + "step": 14933 + }, + { + "epoch": 0.95, + "grad_norm": 0.9086669087409973, + "learning_rate": 7.586228808344087e-08, + "loss": 0.6114, + "step": 14934 + }, + { + "epoch": 0.95, + "grad_norm": 0.9117394089698792, + "learning_rate": 7.56843450075001e-08, + "loss": 0.5875, + "step": 14935 + }, + { + "epoch": 0.95, + "grad_norm": 0.873921811580658, + "learning_rate": 7.550660927736042e-08, + "loss": 0.529, + "step": 14936 + }, + { + "epoch": 0.95, + "grad_norm": 0.918420135974884, + "learning_rate": 7.53290809005075e-08, + "loss": 0.5835, + "step": 14937 + }, + { + "epoch": 0.95, + "grad_norm": 0.8901522755622864, + "learning_rate": 7.515175988441481e-08, + "loss": 0.5254, + "step": 14938 + }, + { + "epoch": 0.95, + "grad_norm": 0.9384918212890625, + "learning_rate": 7.497464623654915e-08, + "loss": 0.5693, + "step": 14939 + }, + { + "epoch": 0.95, + "grad_norm": 0.9153959155082703, + "learning_rate": 7.479773996436845e-08, + "loss": 0.533, + "step": 14940 + }, + { + "epoch": 0.95, + "grad_norm": 0.8577287793159485, + "learning_rate": 7.46210410753212e-08, + "loss": 0.5409, + "step": 14941 + }, + { + "epoch": 0.95, + "grad_norm": 0.9197996854782104, + "learning_rate": 7.44445495768481e-08, + "loss": 0.5721, + "step": 14942 + }, + { + "epoch": 0.95, + "grad_norm": 0.9248746633529663, + "learning_rate": 7.426826547637989e-08, + "loss": 0.6288, + "step": 14943 + }, + { + "epoch": 0.95, + "grad_norm": 0.8270097374916077, + "learning_rate": 7.40921887813395e-08, + "loss": 0.565, + "step": 14944 + }, + { + "epoch": 0.95, + "grad_norm": 0.8829072713851929, + "learning_rate": 7.391631949914102e-08, + "loss": 0.5745, + "step": 14945 + }, + { + "epoch": 0.95, + "grad_norm": 0.9314706325531006, + "learning_rate": 7.374065763719018e-08, + "loss": 0.5642, + "step": 14946 + }, + { + "epoch": 0.95, + "grad_norm": 0.9067994356155396, + "learning_rate": 7.356520320288274e-08, + "loss": 0.6136, + "step": 14947 + }, + { + "epoch": 0.95, + "grad_norm": 0.8551090359687805, + "learning_rate": 7.338995620360722e-08, + "loss": 0.5157, + "step": 14948 + }, + { + "epoch": 0.95, + "grad_norm": 0.8279046416282654, + "learning_rate": 7.321491664674163e-08, + "loss": 0.5155, + "step": 14949 + }, + { + "epoch": 0.95, + "grad_norm": 0.9258044362068176, + "learning_rate": 7.304008453965727e-08, + "loss": 0.6114, + "step": 14950 + }, + { + "epoch": 0.95, + "grad_norm": 0.9103056192398071, + "learning_rate": 7.286545988971495e-08, + "loss": 0.5794, + "step": 14951 + }, + { + "epoch": 0.95, + "grad_norm": 0.885990560054779, + "learning_rate": 7.269104270426818e-08, + "loss": 0.5718, + "step": 14952 + }, + { + "epoch": 0.95, + "grad_norm": 0.965684175491333, + "learning_rate": 7.251683299066059e-08, + "loss": 0.5777, + "step": 14953 + }, + { + "epoch": 0.95, + "grad_norm": 0.9360918998718262, + "learning_rate": 7.23428307562274e-08, + "loss": 0.5961, + "step": 14954 + }, + { + "epoch": 0.95, + "grad_norm": 0.8759440183639526, + "learning_rate": 7.216903600829605e-08, + "loss": 0.5533, + "step": 14955 + }, + { + "epoch": 0.95, + "grad_norm": 0.92622309923172, + "learning_rate": 7.199544875418407e-08, + "loss": 0.6231, + "step": 14956 + }, + { + "epoch": 0.95, + "grad_norm": 0.9205344319343567, + "learning_rate": 7.182206900119948e-08, + "loss": 0.5824, + "step": 14957 + }, + { + "epoch": 0.95, + "grad_norm": 0.8723695874214172, + "learning_rate": 7.164889675664477e-08, + "loss": 0.6116, + "step": 14958 + }, + { + "epoch": 0.95, + "grad_norm": 0.9015873074531555, + "learning_rate": 7.147593202781022e-08, + "loss": 0.5673, + "step": 14959 + }, + { + "epoch": 0.95, + "grad_norm": 0.9231569170951843, + "learning_rate": 7.13031748219789e-08, + "loss": 0.5602, + "step": 14960 + }, + { + "epoch": 0.95, + "grad_norm": 0.8926693797111511, + "learning_rate": 7.113062514642555e-08, + "loss": 0.569, + "step": 14961 + }, + { + "epoch": 0.95, + "grad_norm": 0.9044926762580872, + "learning_rate": 7.095828300841435e-08, + "loss": 0.5192, + "step": 14962 + }, + { + "epoch": 0.95, + "grad_norm": 0.9111180901527405, + "learning_rate": 7.078614841520392e-08, + "loss": 0.6293, + "step": 14963 + }, + { + "epoch": 0.95, + "grad_norm": 0.8154220581054688, + "learning_rate": 7.061422137404129e-08, + "loss": 0.5807, + "step": 14964 + }, + { + "epoch": 0.95, + "grad_norm": 0.8674167394638062, + "learning_rate": 7.044250189216561e-08, + "loss": 0.5303, + "step": 14965 + }, + { + "epoch": 0.95, + "grad_norm": 0.9023363590240479, + "learning_rate": 7.027098997680726e-08, + "loss": 0.6174, + "step": 14966 + }, + { + "epoch": 0.95, + "grad_norm": 0.9068924784660339, + "learning_rate": 7.00996856351882e-08, + "loss": 0.5724, + "step": 14967 + }, + { + "epoch": 0.95, + "grad_norm": 0.9573983550071716, + "learning_rate": 6.992858887452158e-08, + "loss": 0.5502, + "step": 14968 + }, + { + "epoch": 0.95, + "grad_norm": 0.9019178748130798, + "learning_rate": 6.975769970201163e-08, + "loss": 0.5813, + "step": 14969 + }, + { + "epoch": 0.95, + "grad_norm": 0.8365936279296875, + "learning_rate": 6.958701812485369e-08, + "loss": 0.5851, + "step": 14970 + }, + { + "epoch": 0.95, + "grad_norm": 0.9230535626411438, + "learning_rate": 6.94165441502348e-08, + "loss": 0.5764, + "step": 14971 + }, + { + "epoch": 0.95, + "grad_norm": 0.9693920612335205, + "learning_rate": 6.924627778533366e-08, + "loss": 0.5714, + "step": 14972 + }, + { + "epoch": 0.95, + "grad_norm": 0.8915910720825195, + "learning_rate": 6.907621903731842e-08, + "loss": 0.6017, + "step": 14973 + }, + { + "epoch": 0.95, + "grad_norm": 0.8678449392318726, + "learning_rate": 6.890636791335003e-08, + "loss": 0.5304, + "step": 14974 + }, + { + "epoch": 0.95, + "grad_norm": 0.8314898014068604, + "learning_rate": 6.873672442058054e-08, + "loss": 0.5401, + "step": 14975 + }, + { + "epoch": 0.95, + "grad_norm": 0.893195629119873, + "learning_rate": 6.856728856615314e-08, + "loss": 0.5449, + "step": 14976 + }, + { + "epoch": 0.95, + "grad_norm": 0.8856935501098633, + "learning_rate": 6.839806035720209e-08, + "loss": 0.5813, + "step": 14977 + }, + { + "epoch": 0.95, + "grad_norm": 0.844600260257721, + "learning_rate": 6.822903980085282e-08, + "loss": 0.5913, + "step": 14978 + }, + { + "epoch": 0.95, + "grad_norm": 0.8763694763183594, + "learning_rate": 6.806022690422187e-08, + "loss": 0.5783, + "step": 14979 + }, + { + "epoch": 0.95, + "grad_norm": 0.8728997111320496, + "learning_rate": 6.789162167441798e-08, + "loss": 0.5942, + "step": 14980 + }, + { + "epoch": 0.95, + "grad_norm": 0.9138465523719788, + "learning_rate": 6.772322411854048e-08, + "loss": 0.6402, + "step": 14981 + }, + { + "epoch": 0.95, + "grad_norm": 0.9363642930984497, + "learning_rate": 6.755503424368037e-08, + "loss": 0.5904, + "step": 14982 + }, + { + "epoch": 0.95, + "grad_norm": 0.9248054027557373, + "learning_rate": 6.73870520569181e-08, + "loss": 0.6464, + "step": 14983 + }, + { + "epoch": 0.95, + "grad_norm": 0.9054921269416809, + "learning_rate": 6.721927756532853e-08, + "loss": 0.5367, + "step": 14984 + }, + { + "epoch": 0.95, + "grad_norm": 0.8897875547409058, + "learning_rate": 6.705171077597495e-08, + "loss": 0.5691, + "step": 14985 + }, + { + "epoch": 0.95, + "grad_norm": 0.9076294302940369, + "learning_rate": 6.68843516959139e-08, + "loss": 0.6368, + "step": 14986 + }, + { + "epoch": 0.95, + "grad_norm": 1.060864806175232, + "learning_rate": 6.67172003321903e-08, + "loss": 0.6116, + "step": 14987 + }, + { + "epoch": 0.95, + "grad_norm": 0.9097266793251038, + "learning_rate": 6.655025669184522e-08, + "loss": 0.6004, + "step": 14988 + }, + { + "epoch": 0.95, + "grad_norm": 0.8445072174072266, + "learning_rate": 6.638352078190636e-08, + "loss": 0.5223, + "step": 14989 + }, + { + "epoch": 0.95, + "grad_norm": 0.8733325600624084, + "learning_rate": 6.621699260939418e-08, + "loss": 0.5643, + "step": 14990 + }, + { + "epoch": 0.95, + "grad_norm": 0.8759425282478333, + "learning_rate": 6.605067218132145e-08, + "loss": 0.6103, + "step": 14991 + }, + { + "epoch": 0.95, + "grad_norm": 0.9013230800628662, + "learning_rate": 6.58845595046903e-08, + "loss": 0.643, + "step": 14992 + }, + { + "epoch": 0.95, + "grad_norm": 0.9331822991371155, + "learning_rate": 6.571865458649629e-08, + "loss": 0.6525, + "step": 14993 + }, + { + "epoch": 0.95, + "grad_norm": 0.8814842700958252, + "learning_rate": 6.555295743372492e-08, + "loss": 0.5572, + "step": 14994 + }, + { + "epoch": 0.95, + "grad_norm": 0.8553088903427124, + "learning_rate": 6.538746805335284e-08, + "loss": 0.5265, + "step": 14995 + }, + { + "epoch": 0.95, + "grad_norm": 0.9224663376808167, + "learning_rate": 6.52221864523478e-08, + "loss": 0.6197, + "step": 14996 + }, + { + "epoch": 0.95, + "grad_norm": 0.8610914945602417, + "learning_rate": 6.505711263766978e-08, + "loss": 0.5687, + "step": 14997 + }, + { + "epoch": 0.95, + "grad_norm": 0.8670297265052795, + "learning_rate": 6.48922466162688e-08, + "loss": 0.5878, + "step": 14998 + }, + { + "epoch": 0.95, + "grad_norm": 0.8055164217948914, + "learning_rate": 6.472758839508819e-08, + "loss": 0.553, + "step": 14999 + }, + { + "epoch": 0.95, + "grad_norm": 0.8382790088653564, + "learning_rate": 6.456313798105962e-08, + "loss": 0.5657, + "step": 15000 + }, + { + "epoch": 0.95, + "grad_norm": 0.8944666385650635, + "learning_rate": 6.439889538110867e-08, + "loss": 0.6654, + "step": 15001 + }, + { + "epoch": 0.95, + "grad_norm": 0.9049035310745239, + "learning_rate": 6.423486060215034e-08, + "loss": 0.5784, + "step": 15002 + }, + { + "epoch": 0.95, + "grad_norm": 0.8876950144767761, + "learning_rate": 6.40710336510919e-08, + "loss": 0.5315, + "step": 15003 + }, + { + "epoch": 0.95, + "grad_norm": 0.873111367225647, + "learning_rate": 6.390741453483119e-08, + "loss": 0.5567, + "step": 15004 + }, + { + "epoch": 0.95, + "grad_norm": 0.8574492931365967, + "learning_rate": 6.374400326025765e-08, + "loss": 0.5751, + "step": 15005 + }, + { + "epoch": 0.95, + "grad_norm": 0.9291654825210571, + "learning_rate": 6.358079983425247e-08, + "loss": 0.5736, + "step": 15006 + }, + { + "epoch": 0.95, + "grad_norm": 0.880135715007782, + "learning_rate": 6.341780426368737e-08, + "loss": 0.5925, + "step": 15007 + }, + { + "epoch": 0.95, + "grad_norm": 0.8919762372970581, + "learning_rate": 6.32550165554252e-08, + "loss": 0.5895, + "step": 15008 + }, + { + "epoch": 0.95, + "grad_norm": 0.9122921824455261, + "learning_rate": 6.309243671632048e-08, + "loss": 0.6224, + "step": 15009 + }, + { + "epoch": 0.95, + "grad_norm": 0.9773128628730774, + "learning_rate": 6.293006475321939e-08, + "loss": 0.5849, + "step": 15010 + }, + { + "epoch": 0.95, + "grad_norm": 0.9223852157592773, + "learning_rate": 6.276790067295813e-08, + "loss": 0.593, + "step": 15011 + }, + { + "epoch": 0.95, + "grad_norm": 0.8956741094589233, + "learning_rate": 6.260594448236513e-08, + "loss": 0.554, + "step": 15012 + }, + { + "epoch": 0.95, + "grad_norm": 0.8495054244995117, + "learning_rate": 6.244419618825992e-08, + "loss": 0.5502, + "step": 15013 + }, + { + "epoch": 0.95, + "grad_norm": 0.8928588628768921, + "learning_rate": 6.228265579745318e-08, + "loss": 0.5813, + "step": 15014 + }, + { + "epoch": 0.95, + "grad_norm": 0.8524266481399536, + "learning_rate": 6.212132331674725e-08, + "loss": 0.5767, + "step": 15015 + }, + { + "epoch": 0.95, + "grad_norm": 0.9605539441108704, + "learning_rate": 6.196019875293391e-08, + "loss": 0.635, + "step": 15016 + }, + { + "epoch": 0.95, + "grad_norm": 0.8842973709106445, + "learning_rate": 6.179928211279884e-08, + "loss": 0.5695, + "step": 15017 + }, + { + "epoch": 0.95, + "grad_norm": 0.9240403175354004, + "learning_rate": 6.163857340311718e-08, + "loss": 0.5168, + "step": 15018 + }, + { + "epoch": 0.95, + "grad_norm": 0.8997433185577393, + "learning_rate": 6.147807263065575e-08, + "loss": 0.5261, + "step": 15019 + }, + { + "epoch": 0.95, + "grad_norm": 0.9333794713020325, + "learning_rate": 6.131777980217302e-08, + "loss": 0.6007, + "step": 15020 + }, + { + "epoch": 0.95, + "grad_norm": 0.8286879062652588, + "learning_rate": 6.115769492441859e-08, + "loss": 0.5741, + "step": 15021 + }, + { + "epoch": 0.95, + "grad_norm": 0.9274932146072388, + "learning_rate": 6.099781800413151e-08, + "loss": 0.605, + "step": 15022 + }, + { + "epoch": 0.95, + "grad_norm": 0.9669122099876404, + "learning_rate": 6.083814904804586e-08, + "loss": 0.6425, + "step": 15023 + }, + { + "epoch": 0.95, + "grad_norm": 0.869624674320221, + "learning_rate": 6.067868806288346e-08, + "loss": 0.5806, + "step": 15024 + }, + { + "epoch": 0.95, + "grad_norm": 0.8654236197471619, + "learning_rate": 6.05194350553584e-08, + "loss": 0.561, + "step": 15025 + }, + { + "epoch": 0.95, + "grad_norm": 0.9452944397926331, + "learning_rate": 6.036039003217697e-08, + "loss": 0.5796, + "step": 15026 + }, + { + "epoch": 0.95, + "grad_norm": 0.9010240435600281, + "learning_rate": 6.02015530000355e-08, + "loss": 0.6307, + "step": 15027 + }, + { + "epoch": 0.95, + "grad_norm": 0.8794552683830261, + "learning_rate": 6.00429239656225e-08, + "loss": 0.5545, + "step": 15028 + }, + { + "epoch": 0.95, + "grad_norm": 0.8755041360855103, + "learning_rate": 5.988450293561765e-08, + "loss": 0.5406, + "step": 15029 + }, + { + "epoch": 0.95, + "grad_norm": 0.8471874594688416, + "learning_rate": 5.972628991669006e-08, + "loss": 0.5309, + "step": 15030 + }, + { + "epoch": 0.95, + "grad_norm": 0.9170916080474854, + "learning_rate": 5.956828491550326e-08, + "loss": 0.6034, + "step": 15031 + }, + { + "epoch": 0.95, + "grad_norm": 0.8616017699241638, + "learning_rate": 5.941048793870918e-08, + "loss": 0.5492, + "step": 15032 + }, + { + "epoch": 0.95, + "grad_norm": 0.8741750717163086, + "learning_rate": 5.92528989929525e-08, + "loss": 0.5936, + "step": 15033 + }, + { + "epoch": 0.95, + "grad_norm": 0.8802669048309326, + "learning_rate": 5.9095518084868467e-08, + "loss": 0.5961, + "step": 15034 + }, + { + "epoch": 0.95, + "grad_norm": 0.8247042298316956, + "learning_rate": 5.893834522108399e-08, + "loss": 0.5909, + "step": 15035 + }, + { + "epoch": 0.95, + "grad_norm": 0.9004446864128113, + "learning_rate": 5.8781380408217124e-08, + "loss": 0.5701, + "step": 15036 + }, + { + "epoch": 0.95, + "grad_norm": 0.8959584832191467, + "learning_rate": 5.862462365287702e-08, + "loss": 0.5698, + "step": 15037 + }, + { + "epoch": 0.95, + "grad_norm": 0.9272680878639221, + "learning_rate": 5.846807496166451e-08, + "loss": 0.5963, + "step": 15038 + }, + { + "epoch": 0.95, + "grad_norm": 0.9091727137565613, + "learning_rate": 5.831173434117043e-08, + "loss": 0.5561, + "step": 15039 + }, + { + "epoch": 0.95, + "grad_norm": 0.8189939260482788, + "learning_rate": 5.815560179797897e-08, + "loss": 0.5501, + "step": 15040 + }, + { + "epoch": 0.95, + "grad_norm": 0.8391079306602478, + "learning_rate": 5.7999677338663184e-08, + "loss": 0.5442, + "step": 15041 + }, + { + "epoch": 0.95, + "grad_norm": 0.8865288496017456, + "learning_rate": 5.7843960969790056e-08, + "loss": 0.5783, + "step": 15042 + }, + { + "epoch": 0.95, + "grad_norm": 0.8449594378471375, + "learning_rate": 5.768845269791379e-08, + "loss": 0.5728, + "step": 15043 + }, + { + "epoch": 0.95, + "grad_norm": 0.8410879373550415, + "learning_rate": 5.7533152529584135e-08, + "loss": 0.4878, + "step": 15044 + }, + { + "epoch": 0.95, + "grad_norm": 0.9583869576454163, + "learning_rate": 5.7378060471340866e-08, + "loss": 0.5291, + "step": 15045 + }, + { + "epoch": 0.95, + "grad_norm": 0.9091804623603821, + "learning_rate": 5.7223176529712097e-08, + "loss": 0.6207, + "step": 15046 + }, + { + "epoch": 0.95, + "grad_norm": 0.8123113512992859, + "learning_rate": 5.70685007112215e-08, + "loss": 0.5242, + "step": 15047 + }, + { + "epoch": 0.95, + "grad_norm": 0.8403564095497131, + "learning_rate": 5.691403302238052e-08, + "loss": 0.5305, + "step": 15048 + }, + { + "epoch": 0.95, + "grad_norm": 0.8629001379013062, + "learning_rate": 5.6759773469694523e-08, + "loss": 0.5932, + "step": 15049 + }, + { + "epoch": 0.95, + "grad_norm": 0.8440834283828735, + "learning_rate": 5.660572205965775e-08, + "loss": 0.5434, + "step": 15050 + }, + { + "epoch": 0.95, + "grad_norm": 0.9365416169166565, + "learning_rate": 5.645187879875724e-08, + "loss": 0.5669, + "step": 15051 + }, + { + "epoch": 0.95, + "grad_norm": 0.8103097677230835, + "learning_rate": 5.6298243693470586e-08, + "loss": 0.5747, + "step": 15052 + }, + { + "epoch": 0.95, + "grad_norm": 0.9661427736282349, + "learning_rate": 5.614481675026762e-08, + "loss": 0.5651, + "step": 15053 + }, + { + "epoch": 0.95, + "grad_norm": 0.8838450908660889, + "learning_rate": 5.59915979756076e-08, + "loss": 0.5523, + "step": 15054 + }, + { + "epoch": 0.95, + "grad_norm": 0.8871442675590515, + "learning_rate": 5.583858737594205e-08, + "loss": 0.5693, + "step": 15055 + }, + { + "epoch": 0.95, + "grad_norm": 0.9019421339035034, + "learning_rate": 5.5685784957714707e-08, + "loss": 0.5649, + "step": 15056 + }, + { + "epoch": 0.95, + "grad_norm": 0.8939514756202698, + "learning_rate": 5.5533190727358745e-08, + "loss": 0.595, + "step": 15057 + }, + { + "epoch": 0.95, + "grad_norm": 0.8858687281608582, + "learning_rate": 5.538080469129958e-08, + "loss": 0.5937, + "step": 15058 + }, + { + "epoch": 0.95, + "grad_norm": 0.8240897059440613, + "learning_rate": 5.522862685595376e-08, + "loss": 0.5633, + "step": 15059 + }, + { + "epoch": 0.95, + "grad_norm": 0.846260666847229, + "learning_rate": 5.507665722772837e-08, + "loss": 0.5337, + "step": 15060 + }, + { + "epoch": 0.95, + "grad_norm": 0.8530579209327698, + "learning_rate": 5.492489581302329e-08, + "loss": 0.5521, + "step": 15061 + }, + { + "epoch": 0.95, + "grad_norm": 0.9388294816017151, + "learning_rate": 5.477334261822842e-08, + "loss": 0.5776, + "step": 15062 + }, + { + "epoch": 0.95, + "grad_norm": 0.9231818914413452, + "learning_rate": 5.46219976497242e-08, + "loss": 0.555, + "step": 15063 + }, + { + "epoch": 0.95, + "grad_norm": 0.8340309262275696, + "learning_rate": 5.447086091388443e-08, + "loss": 0.5376, + "step": 15064 + }, + { + "epoch": 0.95, + "grad_norm": 0.8696257472038269, + "learning_rate": 5.4319932417072344e-08, + "loss": 0.5702, + "step": 15065 + }, + { + "epoch": 0.95, + "grad_norm": 0.929787814617157, + "learning_rate": 5.416921216564286e-08, + "loss": 0.6018, + "step": 15066 + }, + { + "epoch": 0.95, + "grad_norm": 0.8924655914306641, + "learning_rate": 5.401870016594313e-08, + "loss": 0.5825, + "step": 15067 + }, + { + "epoch": 0.95, + "grad_norm": 0.9113631248474121, + "learning_rate": 5.38683964243103e-08, + "loss": 0.5701, + "step": 15068 + }, + { + "epoch": 0.95, + "grad_norm": 0.8545387387275696, + "learning_rate": 5.3718300947072086e-08, + "loss": 0.5573, + "step": 15069 + }, + { + "epoch": 0.95, + "grad_norm": 0.9120925068855286, + "learning_rate": 5.356841374055011e-08, + "loss": 0.6162, + "step": 15070 + }, + { + "epoch": 0.95, + "grad_norm": 0.843596875667572, + "learning_rate": 5.341873481105431e-08, + "loss": 0.6176, + "step": 15071 + }, + { + "epoch": 0.95, + "grad_norm": 0.9411029815673828, + "learning_rate": 5.3269264164887977e-08, + "loss": 0.6389, + "step": 15072 + }, + { + "epoch": 0.95, + "grad_norm": 0.9116719961166382, + "learning_rate": 5.3120001808344425e-08, + "loss": 0.5561, + "step": 15073 + }, + { + "epoch": 0.96, + "grad_norm": 0.8383541703224182, + "learning_rate": 5.297094774770861e-08, + "loss": 0.5032, + "step": 15074 + }, + { + "epoch": 0.96, + "grad_norm": 0.8465346097946167, + "learning_rate": 5.282210198925664e-08, + "loss": 0.598, + "step": 15075 + }, + { + "epoch": 0.96, + "grad_norm": 0.9090423583984375, + "learning_rate": 5.267346453925626e-08, + "loss": 0.5912, + "step": 15076 + }, + { + "epoch": 0.96, + "grad_norm": 0.9705894589424133, + "learning_rate": 5.2525035403965805e-08, + "loss": 0.5754, + "step": 15077 + }, + { + "epoch": 0.96, + "grad_norm": 0.9257639646530151, + "learning_rate": 5.237681458963473e-08, + "loss": 0.6093, + "step": 15078 + }, + { + "epoch": 0.96, + "grad_norm": 0.8317881226539612, + "learning_rate": 5.222880210250469e-08, + "loss": 0.5539, + "step": 15079 + }, + { + "epoch": 0.96, + "grad_norm": 0.8732230067253113, + "learning_rate": 5.2080997948807944e-08, + "loss": 0.5984, + "step": 15080 + }, + { + "epoch": 0.96, + "grad_norm": 0.9015724658966064, + "learning_rate": 5.193340213476727e-08, + "loss": 0.5556, + "step": 15081 + }, + { + "epoch": 0.96, + "grad_norm": 0.912030041217804, + "learning_rate": 5.178601466659827e-08, + "loss": 0.5774, + "step": 15082 + }, + { + "epoch": 0.96, + "grad_norm": 0.9283223748207092, + "learning_rate": 5.163883555050708e-08, + "loss": 0.5966, + "step": 15083 + }, + { + "epoch": 0.96, + "grad_norm": 0.8436444997787476, + "learning_rate": 5.149186479268986e-08, + "loss": 0.5682, + "step": 15084 + }, + { + "epoch": 0.96, + "grad_norm": 0.9397615194320679, + "learning_rate": 5.134510239933554e-08, + "loss": 0.6182, + "step": 15085 + }, + { + "epoch": 0.96, + "grad_norm": 0.8505292534828186, + "learning_rate": 5.119854837662419e-08, + "loss": 0.5501, + "step": 15086 + }, + { + "epoch": 0.96, + "grad_norm": 0.9102478623390198, + "learning_rate": 5.1052202730725865e-08, + "loss": 0.5655, + "step": 15087 + }, + { + "epoch": 0.96, + "grad_norm": 0.8728495240211487, + "learning_rate": 5.0906065467803965e-08, + "loss": 0.5521, + "step": 15088 + }, + { + "epoch": 0.96, + "grad_norm": 0.9245344400405884, + "learning_rate": 5.0760136594010246e-08, + "loss": 0.5207, + "step": 15089 + }, + { + "epoch": 0.96, + "grad_norm": 0.8598072528839111, + "learning_rate": 5.061441611549034e-08, + "loss": 0.5593, + "step": 15090 + }, + { + "epoch": 0.96, + "grad_norm": 0.8677455186843872, + "learning_rate": 5.046890403837989e-08, + "loss": 0.5772, + "step": 15091 + }, + { + "epoch": 0.96, + "grad_norm": 0.8624927401542664, + "learning_rate": 5.032360036880568e-08, + "loss": 0.5621, + "step": 15092 + }, + { + "epoch": 0.96, + "grad_norm": 0.8490076661109924, + "learning_rate": 5.0178505112885576e-08, + "loss": 0.5548, + "step": 15093 + }, + { + "epoch": 0.96, + "grad_norm": 0.9380584359169006, + "learning_rate": 5.00336182767297e-08, + "loss": 0.5661, + "step": 15094 + }, + { + "epoch": 0.96, + "grad_norm": 0.9134517908096313, + "learning_rate": 4.988893986643817e-08, + "loss": 0.5705, + "step": 15095 + }, + { + "epoch": 0.96, + "grad_norm": 0.8528224229812622, + "learning_rate": 4.9744469888103887e-08, + "loss": 0.6008, + "step": 15096 + }, + { + "epoch": 0.96, + "grad_norm": 0.8432444930076599, + "learning_rate": 4.9600208347809206e-08, + "loss": 0.5205, + "step": 15097 + }, + { + "epoch": 0.96, + "grad_norm": 0.9236396551132202, + "learning_rate": 4.945615525162761e-08, + "loss": 0.594, + "step": 15098 + }, + { + "epoch": 0.96, + "grad_norm": 0.8537143468856812, + "learning_rate": 4.931231060562702e-08, + "loss": 0.5326, + "step": 15099 + }, + { + "epoch": 0.96, + "grad_norm": 0.929633378982544, + "learning_rate": 4.916867441586204e-08, + "loss": 0.637, + "step": 15100 + }, + { + "epoch": 0.96, + "grad_norm": 0.8501139283180237, + "learning_rate": 4.902524668838116e-08, + "loss": 0.5492, + "step": 15101 + }, + { + "epoch": 0.96, + "grad_norm": 0.8739571571350098, + "learning_rate": 4.88820274292251e-08, + "loss": 0.5558, + "step": 15102 + }, + { + "epoch": 0.96, + "grad_norm": 0.8676999807357788, + "learning_rate": 4.873901664442182e-08, + "loss": 0.5524, + "step": 15103 + }, + { + "epoch": 0.96, + "grad_norm": 0.864040195941925, + "learning_rate": 4.8596214339995395e-08, + "loss": 0.5512, + "step": 15104 + }, + { + "epoch": 0.96, + "grad_norm": 0.9180824160575867, + "learning_rate": 4.8453620521957124e-08, + "loss": 0.571, + "step": 15105 + }, + { + "epoch": 0.96, + "grad_norm": 0.9425962567329407, + "learning_rate": 4.83112351963122e-08, + "loss": 0.5272, + "step": 15106 + }, + { + "epoch": 0.96, + "grad_norm": 0.9564583897590637, + "learning_rate": 4.816905836905528e-08, + "loss": 0.5995, + "step": 15107 + }, + { + "epoch": 0.96, + "grad_norm": 0.8326453566551208, + "learning_rate": 4.802709004617267e-08, + "loss": 0.5695, + "step": 15108 + }, + { + "epoch": 0.96, + "grad_norm": 0.8696445822715759, + "learning_rate": 4.788533023364295e-08, + "loss": 0.5204, + "step": 15109 + }, + { + "epoch": 0.96, + "grad_norm": 0.9089856743812561, + "learning_rate": 4.77437789374352e-08, + "loss": 0.6211, + "step": 15110 + }, + { + "epoch": 0.96, + "grad_norm": 0.9463760256767273, + "learning_rate": 4.760243616350913e-08, + "loss": 0.5978, + "step": 15111 + }, + { + "epoch": 0.96, + "grad_norm": 0.8912383913993835, + "learning_rate": 4.746130191781606e-08, + "loss": 0.5724, + "step": 15112 + }, + { + "epoch": 0.96, + "grad_norm": 0.8581725358963013, + "learning_rate": 4.7320376206299034e-08, + "loss": 0.5992, + "step": 15113 + }, + { + "epoch": 0.96, + "grad_norm": 0.878282368183136, + "learning_rate": 4.717965903489219e-08, + "loss": 0.5798, + "step": 15114 + }, + { + "epoch": 0.96, + "grad_norm": 0.866300642490387, + "learning_rate": 4.7039150409519674e-08, + "loss": 0.5509, + "step": 15115 + }, + { + "epoch": 0.96, + "grad_norm": 0.8967102766036987, + "learning_rate": 4.6898850336098975e-08, + "loss": 0.5595, + "step": 15116 + }, + { + "epoch": 0.96, + "grad_norm": 0.8599669337272644, + "learning_rate": 4.675875882053704e-08, + "loss": 0.5854, + "step": 15117 + }, + { + "epoch": 0.96, + "grad_norm": 0.8142878413200378, + "learning_rate": 4.6618875868733037e-08, + "loss": 0.5502, + "step": 15118 + }, + { + "epoch": 0.96, + "grad_norm": 0.8852924704551697, + "learning_rate": 4.6479201486575585e-08, + "loss": 0.54, + "step": 15119 + }, + { + "epoch": 0.96, + "grad_norm": 0.8815605044364929, + "learning_rate": 4.633973567994776e-08, + "loss": 0.6029, + "step": 15120 + }, + { + "epoch": 0.96, + "grad_norm": 0.8725368976593018, + "learning_rate": 4.620047845472098e-08, + "loss": 0.576, + "step": 15121 + }, + { + "epoch": 0.96, + "grad_norm": 0.8406761288642883, + "learning_rate": 4.606142981675887e-08, + "loss": 0.5832, + "step": 15122 + }, + { + "epoch": 0.96, + "grad_norm": 0.9023706316947937, + "learning_rate": 4.592258977191622e-08, + "loss": 0.5905, + "step": 15123 + }, + { + "epoch": 0.96, + "grad_norm": 0.8814811706542969, + "learning_rate": 4.578395832603999e-08, + "loss": 0.567, + "step": 15124 + }, + { + "epoch": 0.96, + "grad_norm": 0.9129397869110107, + "learning_rate": 4.5645535484966085e-08, + "loss": 0.5647, + "step": 15125 + }, + { + "epoch": 0.96, + "grad_norm": 0.9053840637207031, + "learning_rate": 4.5507321254524287e-08, + "loss": 0.561, + "step": 15126 + }, + { + "epoch": 0.96, + "grad_norm": 0.9199474453926086, + "learning_rate": 4.536931564053382e-08, + "loss": 0.5823, + "step": 15127 + }, + { + "epoch": 0.96, + "grad_norm": 0.9460538625717163, + "learning_rate": 4.523151864880504e-08, + "loss": 0.5639, + "step": 15128 + }, + { + "epoch": 0.96, + "grad_norm": 0.9503843188285828, + "learning_rate": 4.5093930285141086e-08, + "loss": 0.5857, + "step": 15129 + }, + { + "epoch": 0.96, + "grad_norm": 0.8704271912574768, + "learning_rate": 4.4956550555334546e-08, + "loss": 0.5987, + "step": 15130 + }, + { + "epoch": 0.96, + "grad_norm": 0.8602560758590698, + "learning_rate": 4.4819379465170785e-08, + "loss": 0.548, + "step": 15131 + }, + { + "epoch": 0.96, + "grad_norm": 0.9668799638748169, + "learning_rate": 4.4682417020425194e-08, + "loss": 0.5793, + "step": 15132 + }, + { + "epoch": 0.96, + "grad_norm": 0.9470771551132202, + "learning_rate": 4.454566322686371e-08, + "loss": 0.6333, + "step": 15133 + }, + { + "epoch": 0.96, + "grad_norm": 0.8778170943260193, + "learning_rate": 4.440911809024673e-08, + "loss": 0.5797, + "step": 15134 + }, + { + "epoch": 0.96, + "grad_norm": 0.9537930488586426, + "learning_rate": 4.427278161632187e-08, + "loss": 0.5665, + "step": 15135 + }, + { + "epoch": 0.96, + "grad_norm": 0.860418975353241, + "learning_rate": 4.413665381083065e-08, + "loss": 0.5036, + "step": 15136 + }, + { + "epoch": 0.96, + "grad_norm": 0.8717173933982849, + "learning_rate": 4.4000734679504606e-08, + "loss": 0.54, + "step": 15137 + }, + { + "epoch": 0.96, + "grad_norm": 0.8939148187637329, + "learning_rate": 4.386502422806749e-08, + "loss": 0.5823, + "step": 15138 + }, + { + "epoch": 0.96, + "grad_norm": 0.8451435565948486, + "learning_rate": 4.37295224622325e-08, + "loss": 0.5591, + "step": 15139 + }, + { + "epoch": 0.96, + "grad_norm": 0.8331038951873779, + "learning_rate": 4.359422938770619e-08, + "loss": 0.5757, + "step": 15140 + }, + { + "epoch": 0.96, + "grad_norm": 0.9195752739906311, + "learning_rate": 4.3459145010184e-08, + "loss": 0.5171, + "step": 15141 + }, + { + "epoch": 0.96, + "grad_norm": 0.9011934995651245, + "learning_rate": 4.3324269335355274e-08, + "loss": 0.6086, + "step": 15142 + }, + { + "epoch": 0.96, + "grad_norm": 0.929836094379425, + "learning_rate": 4.31896023688988e-08, + "loss": 0.5732, + "step": 15143 + }, + { + "epoch": 0.96, + "grad_norm": 0.9297063946723938, + "learning_rate": 4.305514411648393e-08, + "loss": 0.5652, + "step": 15144 + }, + { + "epoch": 0.96, + "grad_norm": 0.8522423505783081, + "learning_rate": 4.2920894583773906e-08, + "loss": 0.5228, + "step": 15145 + }, + { + "epoch": 0.96, + "grad_norm": 0.8246431350708008, + "learning_rate": 4.278685377641978e-08, + "loss": 0.5246, + "step": 15146 + }, + { + "epoch": 0.96, + "grad_norm": 0.918134868144989, + "learning_rate": 4.2653021700066466e-08, + "loss": 0.5447, + "step": 15147 + }, + { + "epoch": 0.96, + "grad_norm": 0.9000471830368042, + "learning_rate": 4.251939836034946e-08, + "loss": 0.5654, + "step": 15148 + }, + { + "epoch": 0.96, + "grad_norm": 0.8861368298530579, + "learning_rate": 4.238598376289482e-08, + "loss": 0.5441, + "step": 15149 + }, + { + "epoch": 0.96, + "grad_norm": 0.8738230466842651, + "learning_rate": 4.225277791331972e-08, + "loss": 0.5582, + "step": 15150 + }, + { + "epoch": 0.96, + "grad_norm": 0.8639594316482544, + "learning_rate": 4.211978081723356e-08, + "loss": 0.5699, + "step": 15151 + }, + { + "epoch": 0.96, + "grad_norm": 0.855905294418335, + "learning_rate": 4.19869924802363e-08, + "loss": 0.5731, + "step": 15152 + }, + { + "epoch": 0.96, + "grad_norm": 0.8283065557479858, + "learning_rate": 4.185441290791903e-08, + "loss": 0.5216, + "step": 15153 + }, + { + "epoch": 0.96, + "grad_norm": 0.9064654111862183, + "learning_rate": 4.1722042105863946e-08, + "loss": 0.593, + "step": 15154 + }, + { + "epoch": 0.96, + "grad_norm": 0.9007862210273743, + "learning_rate": 4.158988007964548e-08, + "loss": 0.5809, + "step": 15155 + }, + { + "epoch": 0.96, + "grad_norm": 0.8880447149276733, + "learning_rate": 4.145792683482808e-08, + "loss": 0.5822, + "step": 15156 + }, + { + "epoch": 0.96, + "grad_norm": 0.8769849538803101, + "learning_rate": 4.132618237696784e-08, + "loss": 0.6075, + "step": 15157 + }, + { + "epoch": 0.96, + "grad_norm": 0.9147706627845764, + "learning_rate": 4.1194646711612555e-08, + "loss": 0.6183, + "step": 15158 + }, + { + "epoch": 0.96, + "grad_norm": 0.9345024824142456, + "learning_rate": 4.1063319844299454e-08, + "loss": 0.5518, + "step": 15159 + }, + { + "epoch": 0.96, + "grad_norm": 0.8593862652778625, + "learning_rate": 4.0932201780559674e-08, + "loss": 0.5756, + "step": 15160 + }, + { + "epoch": 0.96, + "grad_norm": 0.8958163857460022, + "learning_rate": 4.080129252591325e-08, + "loss": 0.5601, + "step": 15161 + }, + { + "epoch": 0.96, + "grad_norm": 0.8987419009208679, + "learning_rate": 4.0670592085872984e-08, + "loss": 0.5828, + "step": 15162 + }, + { + "epoch": 0.96, + "grad_norm": 0.8477271795272827, + "learning_rate": 4.054010046594115e-08, + "loss": 0.5592, + "step": 15163 + }, + { + "epoch": 0.96, + "grad_norm": 0.8878704309463501, + "learning_rate": 4.040981767161334e-08, + "loss": 0.5865, + "step": 15164 + }, + { + "epoch": 0.96, + "grad_norm": 0.8833525776863098, + "learning_rate": 4.027974370837518e-08, + "loss": 0.5872, + "step": 15165 + }, + { + "epoch": 0.96, + "grad_norm": 0.8798415064811707, + "learning_rate": 4.014987858170283e-08, + "loss": 0.5793, + "step": 15166 + }, + { + "epoch": 0.96, + "grad_norm": 0.8760266900062561, + "learning_rate": 4.0020222297065256e-08, + "loss": 0.5665, + "step": 15167 + }, + { + "epoch": 0.96, + "grad_norm": 0.9930770993232727, + "learning_rate": 3.9890774859921987e-08, + "loss": 0.591, + "step": 15168 + }, + { + "epoch": 0.96, + "grad_norm": 0.9291293621063232, + "learning_rate": 3.97615362757231e-08, + "loss": 0.6082, + "step": 15169 + }, + { + "epoch": 0.96, + "grad_norm": 0.9287815093994141, + "learning_rate": 3.9632506549910356e-08, + "loss": 0.5971, + "step": 15170 + }, + { + "epoch": 0.96, + "grad_norm": 0.9018691778182983, + "learning_rate": 3.9503685687916627e-08, + "loss": 0.5316, + "step": 15171 + }, + { + "epoch": 0.96, + "grad_norm": 0.8846923112869263, + "learning_rate": 3.937507369516702e-08, + "loss": 0.5959, + "step": 15172 + }, + { + "epoch": 0.96, + "grad_norm": 0.9421688914299011, + "learning_rate": 3.92466705770761e-08, + "loss": 0.5618, + "step": 15173 + }, + { + "epoch": 0.96, + "grad_norm": 0.8841253519058228, + "learning_rate": 3.911847633905008e-08, + "loss": 0.5895, + "step": 15174 + }, + { + "epoch": 0.96, + "grad_norm": 0.9070528149604797, + "learning_rate": 3.899049098648799e-08, + "loss": 0.5664, + "step": 15175 + }, + { + "epoch": 0.96, + "grad_norm": 0.8911782503128052, + "learning_rate": 3.88627145247783e-08, + "loss": 0.5772, + "step": 15176 + }, + { + "epoch": 0.96, + "grad_norm": 0.9055638909339905, + "learning_rate": 3.873514695930114e-08, + "loss": 0.6169, + "step": 15177 + }, + { + "epoch": 0.96, + "grad_norm": 0.915357232093811, + "learning_rate": 3.860778829542777e-08, + "loss": 0.6026, + "step": 15178 + }, + { + "epoch": 0.96, + "grad_norm": 0.9006307721138, + "learning_rate": 3.848063853852113e-08, + "loss": 0.5744, + "step": 15179 + }, + { + "epoch": 0.96, + "grad_norm": 0.845581591129303, + "learning_rate": 3.835369769393471e-08, + "loss": 0.5451, + "step": 15180 + }, + { + "epoch": 0.96, + "grad_norm": 0.8819062113761902, + "learning_rate": 3.822696576701368e-08, + "loss": 0.5855, + "step": 15181 + }, + { + "epoch": 0.96, + "grad_norm": 0.8674046993255615, + "learning_rate": 3.8100442763094324e-08, + "loss": 0.5511, + "step": 15182 + }, + { + "epoch": 0.96, + "grad_norm": 0.8748277425765991, + "learning_rate": 3.797412868750461e-08, + "loss": 0.5703, + "step": 15183 + }, + { + "epoch": 0.96, + "grad_norm": 0.9121760129928589, + "learning_rate": 3.784802354556249e-08, + "loss": 0.5536, + "step": 15184 + }, + { + "epoch": 0.96, + "grad_norm": 0.9617106318473816, + "learning_rate": 3.7722127342578183e-08, + "loss": 0.5778, + "step": 15185 + }, + { + "epoch": 0.96, + "grad_norm": 0.9024949073791504, + "learning_rate": 3.759644008385244e-08, + "loss": 0.5615, + "step": 15186 + }, + { + "epoch": 0.96, + "grad_norm": 0.9101724028587341, + "learning_rate": 3.747096177467768e-08, + "loss": 0.5541, + "step": 15187 + }, + { + "epoch": 0.96, + "grad_norm": 0.8800556659698486, + "learning_rate": 3.7345692420337476e-08, + "loss": 0.535, + "step": 15188 + }, + { + "epoch": 0.96, + "grad_norm": 0.9029918909072876, + "learning_rate": 3.722063202610593e-08, + "loss": 0.591, + "step": 15189 + }, + { + "epoch": 0.96, + "grad_norm": 0.9296280145645142, + "learning_rate": 3.709578059724939e-08, + "loss": 0.5728, + "step": 15190 + }, + { + "epoch": 0.96, + "grad_norm": 0.865096390247345, + "learning_rate": 3.697113813902531e-08, + "loss": 0.564, + "step": 15191 + }, + { + "epoch": 0.96, + "grad_norm": 0.9005051851272583, + "learning_rate": 3.684670465668116e-08, + "loss": 0.6006, + "step": 15192 + }, + { + "epoch": 0.96, + "grad_norm": 0.8931210041046143, + "learning_rate": 3.6722480155456655e-08, + "loss": 0.5469, + "step": 15193 + }, + { + "epoch": 0.96, + "grad_norm": 0.8682152032852173, + "learning_rate": 3.6598464640582586e-08, + "loss": 0.5758, + "step": 15194 + }, + { + "epoch": 0.96, + "grad_norm": 0.9252963662147522, + "learning_rate": 3.64746581172809e-08, + "loss": 0.6333, + "step": 15195 + }, + { + "epoch": 0.96, + "grad_norm": 0.9037860035896301, + "learning_rate": 3.6351060590764656e-08, + "loss": 0.567, + "step": 15196 + }, + { + "epoch": 0.96, + "grad_norm": 0.827499270439148, + "learning_rate": 3.6227672066237454e-08, + "loss": 0.5443, + "step": 15197 + }, + { + "epoch": 0.96, + "grad_norm": 0.8574694395065308, + "learning_rate": 3.6104492548895695e-08, + "loss": 0.6125, + "step": 15198 + }, + { + "epoch": 0.96, + "grad_norm": 0.8817412853240967, + "learning_rate": 3.5981522043925796e-08, + "loss": 0.5583, + "step": 15199 + }, + { + "epoch": 0.96, + "grad_norm": 0.8701195120811462, + "learning_rate": 3.585876055650528e-08, + "loss": 0.5599, + "step": 15200 + }, + { + "epoch": 0.96, + "grad_norm": 0.8706973195075989, + "learning_rate": 3.5736208091802784e-08, + "loss": 0.5572, + "step": 15201 + }, + { + "epoch": 0.96, + "grad_norm": 0.9477795958518982, + "learning_rate": 3.5613864654979734e-08, + "loss": 0.6371, + "step": 15202 + }, + { + "epoch": 0.96, + "grad_norm": 0.9767326712608337, + "learning_rate": 3.5491730251187016e-08, + "loss": 0.5926, + "step": 15203 + }, + { + "epoch": 0.96, + "grad_norm": 0.8724082708358765, + "learning_rate": 3.5369804885567185e-08, + "loss": 0.5583, + "step": 15204 + }, + { + "epoch": 0.96, + "grad_norm": 0.868426501750946, + "learning_rate": 3.52480885632539e-08, + "loss": 0.5689, + "step": 15205 + }, + { + "epoch": 0.96, + "grad_norm": 0.9158200621604919, + "learning_rate": 3.512658128937252e-08, + "loss": 0.5782, + "step": 15206 + }, + { + "epoch": 0.96, + "grad_norm": 0.889900267124176, + "learning_rate": 3.500528306904005e-08, + "loss": 0.5747, + "step": 15207 + }, + { + "epoch": 0.96, + "grad_norm": 0.9287896752357483, + "learning_rate": 3.488419390736242e-08, + "loss": 0.5598, + "step": 15208 + }, + { + "epoch": 0.96, + "grad_norm": 0.9180722236633301, + "learning_rate": 3.476331380943887e-08, + "loss": 0.6264, + "step": 15209 + }, + { + "epoch": 0.96, + "grad_norm": 0.9249047040939331, + "learning_rate": 3.464264278035978e-08, + "loss": 0.5909, + "step": 15210 + }, + { + "epoch": 0.96, + "grad_norm": 0.8158274292945862, + "learning_rate": 3.452218082520553e-08, + "loss": 0.5317, + "step": 15211 + }, + { + "epoch": 0.96, + "grad_norm": 0.8706426620483398, + "learning_rate": 3.440192794904873e-08, + "loss": 0.547, + "step": 15212 + }, + { + "epoch": 0.96, + "grad_norm": 0.8472093343734741, + "learning_rate": 3.4281884156953106e-08, + "loss": 0.5676, + "step": 15213 + }, + { + "epoch": 0.96, + "grad_norm": 0.8854379057884216, + "learning_rate": 3.416204945397239e-08, + "loss": 0.5735, + "step": 15214 + }, + { + "epoch": 0.96, + "grad_norm": 0.8368361592292786, + "learning_rate": 3.4042423845153104e-08, + "loss": 0.5672, + "step": 15215 + }, + { + "epoch": 0.96, + "grad_norm": 0.8709746599197388, + "learning_rate": 3.392300733553178e-08, + "loss": 0.573, + "step": 15216 + }, + { + "epoch": 0.96, + "grad_norm": 0.9157810807228088, + "learning_rate": 3.380379993013716e-08, + "loss": 0.6188, + "step": 15217 + }, + { + "epoch": 0.96, + "grad_norm": 0.9439373016357422, + "learning_rate": 3.368480163398802e-08, + "loss": 0.5568, + "step": 15218 + }, + { + "epoch": 0.96, + "grad_norm": 0.8551452159881592, + "learning_rate": 3.356601245209534e-08, + "loss": 0.5708, + "step": 15219 + }, + { + "epoch": 0.96, + "grad_norm": 0.9316923022270203, + "learning_rate": 3.344743238946124e-08, + "loss": 0.5438, + "step": 15220 + }, + { + "epoch": 0.96, + "grad_norm": 0.912805438041687, + "learning_rate": 3.332906145107839e-08, + "loss": 0.5485, + "step": 15221 + }, + { + "epoch": 0.96, + "grad_norm": 0.8899156451225281, + "learning_rate": 3.3210899641930586e-08, + "loss": 0.5284, + "step": 15222 + }, + { + "epoch": 0.96, + "grad_norm": 0.9470401406288147, + "learning_rate": 3.3092946966994385e-08, + "loss": 0.6202, + "step": 15223 + }, + { + "epoch": 0.96, + "grad_norm": 0.8981837630271912, + "learning_rate": 3.297520343123473e-08, + "loss": 0.5932, + "step": 15224 + }, + { + "epoch": 0.96, + "grad_norm": 0.8313995003700256, + "learning_rate": 3.285766903961096e-08, + "loss": 0.5378, + "step": 15225 + }, + { + "epoch": 0.96, + "grad_norm": 0.9461470246315002, + "learning_rate": 3.274034379707081e-08, + "loss": 0.6186, + "step": 15226 + }, + { + "epoch": 0.96, + "grad_norm": 0.8666161298751831, + "learning_rate": 3.262322770855475e-08, + "loss": 0.5918, + "step": 15227 + }, + { + "epoch": 0.96, + "grad_norm": 0.9340410828590393, + "learning_rate": 3.250632077899496e-08, + "loss": 0.6198, + "step": 15228 + }, + { + "epoch": 0.96, + "grad_norm": 0.9431737661361694, + "learning_rate": 3.238962301331305e-08, + "loss": 0.6251, + "step": 15229 + }, + { + "epoch": 0.96, + "grad_norm": 0.9066559076309204, + "learning_rate": 3.227313441642288e-08, + "loss": 0.6061, + "step": 15230 + }, + { + "epoch": 0.96, + "grad_norm": 0.9017807245254517, + "learning_rate": 3.2156854993229955e-08, + "loss": 0.5255, + "step": 15231 + }, + { + "epoch": 0.97, + "grad_norm": 0.8806298971176147, + "learning_rate": 3.2040784748629814e-08, + "loss": 0.5691, + "step": 15232 + }, + { + "epoch": 0.97, + "grad_norm": 0.9558776617050171, + "learning_rate": 3.192492368750966e-08, + "loss": 0.5747, + "step": 15233 + }, + { + "epoch": 0.97, + "grad_norm": 0.9031001329421997, + "learning_rate": 3.180927181474891e-08, + "loss": 0.6138, + "step": 15234 + }, + { + "epoch": 0.97, + "grad_norm": 0.9222975373268127, + "learning_rate": 3.16938291352159e-08, + "loss": 0.5767, + "step": 15235 + }, + { + "epoch": 0.97, + "grad_norm": 0.8207947611808777, + "learning_rate": 3.157859565377286e-08, + "loss": 0.5334, + "step": 15236 + }, + { + "epoch": 0.97, + "grad_norm": 0.8401099443435669, + "learning_rate": 3.146357137527145e-08, + "loss": 0.5246, + "step": 15237 + }, + { + "epoch": 0.97, + "grad_norm": 0.8946247100830078, + "learning_rate": 3.1348756304554475e-08, + "loss": 0.5931, + "step": 15238 + }, + { + "epoch": 0.97, + "grad_norm": 0.877619743347168, + "learning_rate": 3.123415044645639e-08, + "loss": 0.5706, + "step": 15239 + }, + { + "epoch": 0.97, + "grad_norm": 0.865767240524292, + "learning_rate": 3.111975380580334e-08, + "loss": 0.5536, + "step": 15240 + }, + { + "epoch": 0.97, + "grad_norm": 0.8900549411773682, + "learning_rate": 3.100556638741203e-08, + "loss": 0.6122, + "step": 15241 + }, + { + "epoch": 0.97, + "grad_norm": 0.8810964822769165, + "learning_rate": 3.089158819609084e-08, + "loss": 0.5664, + "step": 15242 + }, + { + "epoch": 0.97, + "grad_norm": 0.8617457747459412, + "learning_rate": 3.077781923663814e-08, + "loss": 0.604, + "step": 15243 + }, + { + "epoch": 0.97, + "grad_norm": 0.8632597923278809, + "learning_rate": 3.066425951384455e-08, + "loss": 0.4994, + "step": 15244 + }, + { + "epoch": 0.97, + "grad_norm": 0.8872633576393127, + "learning_rate": 3.055090903249236e-08, + "loss": 0.6185, + "step": 15245 + }, + { + "epoch": 0.97, + "grad_norm": 0.8761091828346252, + "learning_rate": 3.0437767797353856e-08, + "loss": 0.5448, + "step": 15246 + }, + { + "epoch": 0.97, + "grad_norm": 0.8925213813781738, + "learning_rate": 3.032483581319301e-08, + "loss": 0.5856, + "step": 15247 + }, + { + "epoch": 0.97, + "grad_norm": 0.9047413468360901, + "learning_rate": 3.021211308476546e-08, + "loss": 0.5834, + "step": 15248 + }, + { + "epoch": 0.97, + "grad_norm": 0.8721828460693359, + "learning_rate": 3.0099599616816856e-08, + "loss": 0.571, + "step": 15249 + }, + { + "epoch": 0.97, + "grad_norm": 0.9577370882034302, + "learning_rate": 2.998729541408507e-08, + "loss": 0.5948, + "step": 15250 + }, + { + "epoch": 0.97, + "grad_norm": 0.9254369139671326, + "learning_rate": 2.987520048129911e-08, + "loss": 0.5841, + "step": 15251 + }, + { + "epoch": 0.97, + "grad_norm": 0.8802624940872192, + "learning_rate": 2.976331482317796e-08, + "loss": 0.5672, + "step": 15252 + }, + { + "epoch": 0.97, + "grad_norm": 0.8794838786125183, + "learning_rate": 2.9651638444434528e-08, + "loss": 0.6076, + "step": 15253 + }, + { + "epoch": 0.97, + "grad_norm": 0.8821896910667419, + "learning_rate": 2.9540171349769497e-08, + "loss": 0.5937, + "step": 15254 + }, + { + "epoch": 0.97, + "grad_norm": 0.8732861876487732, + "learning_rate": 2.942891354387689e-08, + "loss": 0.5617, + "step": 15255 + }, + { + "epoch": 0.97, + "grad_norm": 0.9027414321899414, + "learning_rate": 2.9317865031441295e-08, + "loss": 0.5962, + "step": 15256 + }, + { + "epoch": 0.97, + "grad_norm": 0.9479333162307739, + "learning_rate": 2.920702581713841e-08, + "loss": 0.5659, + "step": 15257 + }, + { + "epoch": 0.97, + "grad_norm": 0.9096110463142395, + "learning_rate": 2.909639590563562e-08, + "loss": 0.6119, + "step": 15258 + }, + { + "epoch": 0.97, + "grad_norm": 0.8687134981155396, + "learning_rate": 2.8985975301591975e-08, + "loss": 0.5714, + "step": 15259 + }, + { + "epoch": 0.97, + "grad_norm": 0.8929232358932495, + "learning_rate": 2.887576400965486e-08, + "loss": 0.5406, + "step": 15260 + }, + { + "epoch": 0.97, + "grad_norm": 0.8353814482688904, + "learning_rate": 2.8765762034466682e-08, + "loss": 0.5534, + "step": 15261 + }, + { + "epoch": 0.97, + "grad_norm": 0.8765125274658203, + "learning_rate": 2.8655969380658177e-08, + "loss": 0.5562, + "step": 15262 + }, + { + "epoch": 0.97, + "grad_norm": 0.9010085463523865, + "learning_rate": 2.8546386052853427e-08, + "loss": 0.5473, + "step": 15263 + }, + { + "epoch": 0.97, + "grad_norm": 0.9564641118049622, + "learning_rate": 2.8437012055665403e-08, + "loss": 0.6325, + "step": 15264 + }, + { + "epoch": 0.97, + "grad_norm": 0.8980580568313599, + "learning_rate": 2.832784739369987e-08, + "loss": 0.5999, + "step": 15265 + }, + { + "epoch": 0.97, + "grad_norm": 0.8257995843887329, + "learning_rate": 2.8218892071553705e-08, + "loss": 0.5208, + "step": 15266 + }, + { + "epoch": 0.97, + "grad_norm": 0.9016112685203552, + "learning_rate": 2.8110146093814906e-08, + "loss": 0.555, + "step": 15267 + }, + { + "epoch": 0.97, + "grad_norm": 0.8790071606636047, + "learning_rate": 2.8001609465061474e-08, + "loss": 0.5744, + "step": 15268 + }, + { + "epoch": 0.97, + "grad_norm": 0.8602889180183411, + "learning_rate": 2.7893282189863647e-08, + "loss": 0.5734, + "step": 15269 + }, + { + "epoch": 0.97, + "grad_norm": 0.8754189014434814, + "learning_rate": 2.7785164272783327e-08, + "loss": 0.5627, + "step": 15270 + }, + { + "epoch": 0.97, + "grad_norm": 0.9688771367073059, + "learning_rate": 2.7677255718372986e-08, + "loss": 0.5618, + "step": 15271 + }, + { + "epoch": 0.97, + "grad_norm": 0.8828610181808472, + "learning_rate": 2.7569556531175657e-08, + "loss": 0.5808, + "step": 15272 + }, + { + "epoch": 0.97, + "grad_norm": 0.9202200174331665, + "learning_rate": 2.7462066715726045e-08, + "loss": 0.6168, + "step": 15273 + }, + { + "epoch": 0.97, + "grad_norm": 0.9173966646194458, + "learning_rate": 2.7354786276551083e-08, + "loss": 0.6054, + "step": 15274 + }, + { + "epoch": 0.97, + "grad_norm": 0.9728456139564514, + "learning_rate": 2.7247715218167714e-08, + "loss": 0.6074, + "step": 15275 + }, + { + "epoch": 0.97, + "grad_norm": 0.905205249786377, + "learning_rate": 2.7140853545083447e-08, + "loss": 0.5679, + "step": 15276 + }, + { + "epoch": 0.97, + "grad_norm": 0.9280872344970703, + "learning_rate": 2.703420126179912e-08, + "loss": 0.5779, + "step": 15277 + }, + { + "epoch": 0.97, + "grad_norm": 0.9241009950637817, + "learning_rate": 2.692775837280448e-08, + "loss": 0.6027, + "step": 15278 + }, + { + "epoch": 0.97, + "grad_norm": 0.8840800523757935, + "learning_rate": 2.682152488258205e-08, + "loss": 0.5911, + "step": 15279 + }, + { + "epoch": 0.97, + "grad_norm": 0.8568680286407471, + "learning_rate": 2.671550079560492e-08, + "loss": 0.5292, + "step": 15280 + }, + { + "epoch": 0.97, + "grad_norm": 0.8979047536849976, + "learning_rate": 2.6609686116337296e-08, + "loss": 0.5474, + "step": 15281 + }, + { + "epoch": 0.97, + "grad_norm": 0.9321437478065491, + "learning_rate": 2.6504080849234504e-08, + "loss": 0.6161, + "step": 15282 + }, + { + "epoch": 0.97, + "grad_norm": 0.8534190654754639, + "learning_rate": 2.6398684998742986e-08, + "loss": 0.5428, + "step": 15283 + }, + { + "epoch": 0.97, + "grad_norm": 0.855660080909729, + "learning_rate": 2.629349856930141e-08, + "loss": 0.5282, + "step": 15284 + }, + { + "epoch": 0.97, + "grad_norm": 0.9074161052703857, + "learning_rate": 2.6188521565338466e-08, + "loss": 0.5735, + "step": 15285 + }, + { + "epoch": 0.97, + "grad_norm": 0.8709417581558228, + "learning_rate": 2.608375399127394e-08, + "loss": 0.558, + "step": 15286 + }, + { + "epoch": 0.97, + "grad_norm": 0.8391317129135132, + "learning_rate": 2.5979195851519313e-08, + "loss": 0.5302, + "step": 15287 + }, + { + "epoch": 0.97, + "grad_norm": 0.9150146842002869, + "learning_rate": 2.5874847150477722e-08, + "loss": 0.5882, + "step": 15288 + }, + { + "epoch": 0.97, + "grad_norm": 0.8675903081893921, + "learning_rate": 2.5770707892542878e-08, + "loss": 0.5531, + "step": 15289 + }, + { + "epoch": 0.97, + "grad_norm": 0.9216609597206116, + "learning_rate": 2.5666778082099052e-08, + "loss": 0.5489, + "step": 15290 + }, + { + "epoch": 0.97, + "grad_norm": 0.9866342544555664, + "learning_rate": 2.5563057723522742e-08, + "loss": 0.5831, + "step": 15291 + }, + { + "epoch": 0.97, + "grad_norm": 0.9153217673301697, + "learning_rate": 2.5459546821181014e-08, + "loss": 0.5968, + "step": 15292 + }, + { + "epoch": 0.97, + "grad_norm": 0.8692642450332642, + "learning_rate": 2.5356245379433154e-08, + "loss": 0.5611, + "step": 15293 + }, + { + "epoch": 0.97, + "grad_norm": 0.8365652561187744, + "learning_rate": 2.525315340262846e-08, + "loss": 0.5344, + "step": 15294 + }, + { + "epoch": 0.97, + "grad_norm": 0.8992009162902832, + "learning_rate": 2.51502708951068e-08, + "loss": 0.6268, + "step": 15295 + }, + { + "epoch": 0.97, + "grad_norm": 0.9119232892990112, + "learning_rate": 2.5047597861201368e-08, + "loss": 0.5838, + "step": 15296 + }, + { + "epoch": 0.97, + "grad_norm": 0.9377126097679138, + "learning_rate": 2.494513430523482e-08, + "loss": 0.5693, + "step": 15297 + }, + { + "epoch": 0.97, + "grad_norm": 0.9767472147941589, + "learning_rate": 2.4842880231522038e-08, + "loss": 0.5736, + "step": 15298 + }, + { + "epoch": 0.97, + "grad_norm": 0.8412918448448181, + "learning_rate": 2.474083564436791e-08, + "loss": 0.5326, + "step": 15299 + }, + { + "epoch": 0.97, + "grad_norm": 0.8873780965805054, + "learning_rate": 2.4639000548070114e-08, + "loss": 0.6094, + "step": 15300 + }, + { + "epoch": 0.97, + "grad_norm": 0.8979631066322327, + "learning_rate": 2.4537374946915215e-08, + "loss": 0.5258, + "step": 15301 + }, + { + "epoch": 0.97, + "grad_norm": 0.8989670872688293, + "learning_rate": 2.4435958845183684e-08, + "loss": 0.5554, + "step": 15302 + }, + { + "epoch": 0.97, + "grad_norm": 0.8940199017524719, + "learning_rate": 2.4334752247145433e-08, + "loss": 0.5619, + "step": 15303 + }, + { + "epoch": 0.97, + "grad_norm": 0.8079856038093567, + "learning_rate": 2.4233755157060944e-08, + "loss": 0.5733, + "step": 15304 + }, + { + "epoch": 0.97, + "grad_norm": 0.9142255187034607, + "learning_rate": 2.413296757918404e-08, + "loss": 0.5756, + "step": 15305 + }, + { + "epoch": 0.97, + "grad_norm": 0.8849722146987915, + "learning_rate": 2.4032389517757993e-08, + "loss": 0.5724, + "step": 15306 + }, + { + "epoch": 0.97, + "grad_norm": 0.8250989317893982, + "learning_rate": 2.3932020977017745e-08, + "loss": 0.5244, + "step": 15307 + }, + { + "epoch": 0.97, + "grad_norm": 0.8586926460266113, + "learning_rate": 2.3831861961189917e-08, + "loss": 0.5474, + "step": 15308 + }, + { + "epoch": 0.97, + "grad_norm": 0.8741357326507568, + "learning_rate": 2.3731912474491137e-08, + "loss": 0.5359, + "step": 15309 + }, + { + "epoch": 0.97, + "grad_norm": 0.8332794308662415, + "learning_rate": 2.3632172521130815e-08, + "loss": 0.51, + "step": 15310 + }, + { + "epoch": 0.97, + "grad_norm": 0.8357459902763367, + "learning_rate": 2.3532642105307813e-08, + "loss": 0.5387, + "step": 15311 + }, + { + "epoch": 0.97, + "grad_norm": 0.900471568107605, + "learning_rate": 2.3433321231213778e-08, + "loss": 0.5677, + "step": 15312 + }, + { + "epoch": 0.97, + "grad_norm": 0.9303227066993713, + "learning_rate": 2.3334209903029815e-08, + "loss": 0.5955, + "step": 15313 + }, + { + "epoch": 0.97, + "grad_norm": 0.8947436809539795, + "learning_rate": 2.3235308124930357e-08, + "loss": 0.5752, + "step": 15314 + }, + { + "epoch": 0.97, + "grad_norm": 0.9085913896560669, + "learning_rate": 2.3136615901078742e-08, + "loss": 0.5911, + "step": 15315 + }, + { + "epoch": 0.97, + "grad_norm": 0.8889240026473999, + "learning_rate": 2.303813323563109e-08, + "loss": 0.5651, + "step": 15316 + }, + { + "epoch": 0.97, + "grad_norm": 0.9324416518211365, + "learning_rate": 2.2939860132734084e-08, + "loss": 0.5499, + "step": 15317 + }, + { + "epoch": 0.97, + "grad_norm": 0.8463728427886963, + "learning_rate": 2.2841796596525522e-08, + "loss": 0.5775, + "step": 15318 + }, + { + "epoch": 0.97, + "grad_norm": 0.9454851150512695, + "learning_rate": 2.2743942631134886e-08, + "loss": 0.5532, + "step": 15319 + }, + { + "epoch": 0.97, + "grad_norm": 0.9939208030700684, + "learning_rate": 2.264629824068165e-08, + "loss": 0.661, + "step": 15320 + }, + { + "epoch": 0.97, + "grad_norm": 0.952218234539032, + "learning_rate": 2.2548863429278645e-08, + "loss": 0.6229, + "step": 15321 + }, + { + "epoch": 0.97, + "grad_norm": 0.8898659348487854, + "learning_rate": 2.2451638201027026e-08, + "loss": 0.5843, + "step": 15322 + }, + { + "epoch": 0.97, + "grad_norm": 0.9616308212280273, + "learning_rate": 2.2354622560021854e-08, + "loss": 0.5664, + "step": 15323 + }, + { + "epoch": 0.97, + "grad_norm": 0.8662042021751404, + "learning_rate": 2.2257816510347086e-08, + "loss": 0.55, + "step": 15324 + }, + { + "epoch": 0.97, + "grad_norm": 0.9076823592185974, + "learning_rate": 2.2161220056079457e-08, + "loss": 0.5385, + "step": 15325 + }, + { + "epoch": 0.97, + "grad_norm": 0.9138240218162537, + "learning_rate": 2.2064833201286827e-08, + "loss": 0.5659, + "step": 15326 + }, + { + "epoch": 0.97, + "grad_norm": 0.8747329115867615, + "learning_rate": 2.1968655950026508e-08, + "loss": 0.6079, + "step": 15327 + }, + { + "epoch": 0.97, + "grad_norm": 0.8825821876525879, + "learning_rate": 2.187268830634859e-08, + "loss": 0.5695, + "step": 15328 + }, + { + "epoch": 0.97, + "grad_norm": 0.8999965190887451, + "learning_rate": 2.1776930274294283e-08, + "loss": 0.5606, + "step": 15329 + }, + { + "epoch": 0.97, + "grad_norm": 0.8908900022506714, + "learning_rate": 2.1681381857895923e-08, + "loss": 0.5693, + "step": 15330 + }, + { + "epoch": 0.97, + "grad_norm": 0.87883061170578, + "learning_rate": 2.1586043061175842e-08, + "loss": 0.5597, + "step": 15331 + }, + { + "epoch": 0.97, + "grad_norm": 0.8488723635673523, + "learning_rate": 2.1490913888149166e-08, + "loss": 0.6041, + "step": 15332 + }, + { + "epoch": 0.97, + "grad_norm": 0.9183140993118286, + "learning_rate": 2.139599434282047e-08, + "loss": 0.5741, + "step": 15333 + }, + { + "epoch": 0.97, + "grad_norm": 0.9319660663604736, + "learning_rate": 2.130128442918766e-08, + "loss": 0.641, + "step": 15334 + }, + { + "epoch": 0.97, + "grad_norm": 0.9717698097229004, + "learning_rate": 2.1206784151238113e-08, + "loss": 0.5749, + "step": 15335 + }, + { + "epoch": 0.97, + "grad_norm": 0.8720336556434631, + "learning_rate": 2.111249351295086e-08, + "loss": 0.6203, + "step": 15336 + }, + { + "epoch": 0.97, + "grad_norm": 0.8387833833694458, + "learning_rate": 2.1018412518296617e-08, + "loss": 0.5874, + "step": 15337 + }, + { + "epoch": 0.97, + "grad_norm": 0.8112475872039795, + "learning_rate": 2.0924541171235545e-08, + "loss": 0.5653, + "step": 15338 + }, + { + "epoch": 0.97, + "grad_norm": 0.9344534277915955, + "learning_rate": 2.083087947572171e-08, + "loss": 0.567, + "step": 15339 + }, + { + "epoch": 0.97, + "grad_norm": 0.9421919584274292, + "learning_rate": 2.073742743569862e-08, + "loss": 0.5728, + "step": 15340 + }, + { + "epoch": 0.97, + "grad_norm": 0.9264227151870728, + "learning_rate": 2.0644185055100352e-08, + "loss": 0.6343, + "step": 15341 + }, + { + "epoch": 0.97, + "grad_norm": 0.8339887857437134, + "learning_rate": 2.0551152337853208e-08, + "loss": 0.5588, + "step": 15342 + }, + { + "epoch": 0.97, + "grad_norm": 0.9266855120658875, + "learning_rate": 2.0458329287875168e-08, + "loss": 0.5816, + "step": 15343 + }, + { + "epoch": 0.97, + "grad_norm": 0.9328429698944092, + "learning_rate": 2.0365715909074213e-08, + "loss": 0.5793, + "step": 15344 + }, + { + "epoch": 0.97, + "grad_norm": 0.8851380944252014, + "learning_rate": 2.027331220535056e-08, + "loss": 0.6006, + "step": 15345 + }, + { + "epoch": 0.97, + "grad_norm": 0.9010560512542725, + "learning_rate": 2.018111818059387e-08, + "loss": 0.4876, + "step": 15346 + }, + { + "epoch": 0.97, + "grad_norm": 0.9649263620376587, + "learning_rate": 2.008913383868716e-08, + "loss": 0.6252, + "step": 15347 + }, + { + "epoch": 0.97, + "grad_norm": 0.9451420903205872, + "learning_rate": 1.999735918350343e-08, + "loss": 0.5907, + "step": 15348 + }, + { + "epoch": 0.97, + "grad_norm": 0.9090909361839294, + "learning_rate": 1.990579421890626e-08, + "loss": 0.5909, + "step": 15349 + }, + { + "epoch": 0.97, + "grad_norm": 0.8864248991012573, + "learning_rate": 1.9814438948751458e-08, + "loss": 0.5775, + "step": 15350 + }, + { + "epoch": 0.97, + "grad_norm": 0.8502189517021179, + "learning_rate": 1.9723293376886497e-08, + "loss": 0.5513, + "step": 15351 + }, + { + "epoch": 0.97, + "grad_norm": 0.8676384687423706, + "learning_rate": 1.963235750714776e-08, + "loss": 0.6178, + "step": 15352 + }, + { + "epoch": 0.97, + "grad_norm": 0.9123603701591492, + "learning_rate": 1.9541631343365507e-08, + "loss": 0.5994, + "step": 15353 + }, + { + "epoch": 0.97, + "grad_norm": 1.0008543729782104, + "learning_rate": 1.9451114889359468e-08, + "loss": 0.6402, + "step": 15354 + }, + { + "epoch": 0.97, + "grad_norm": 0.925025999546051, + "learning_rate": 1.936080814894048e-08, + "loss": 0.5693, + "step": 15355 + }, + { + "epoch": 0.97, + "grad_norm": 0.9011825323104858, + "learning_rate": 1.9270711125912167e-08, + "loss": 0.5558, + "step": 15356 + }, + { + "epoch": 0.97, + "grad_norm": 0.8734168410301208, + "learning_rate": 1.9180823824067053e-08, + "loss": 0.5738, + "step": 15357 + }, + { + "epoch": 0.97, + "grad_norm": 0.858608067035675, + "learning_rate": 1.909114624719044e-08, + "loss": 0.578, + "step": 15358 + }, + { + "epoch": 0.97, + "grad_norm": 0.8652524352073669, + "learning_rate": 1.90016783990582e-08, + "loss": 0.5556, + "step": 15359 + }, + { + "epoch": 0.97, + "grad_norm": 0.9070523977279663, + "learning_rate": 1.891242028343787e-08, + "loss": 0.6098, + "step": 15360 + }, + { + "epoch": 0.97, + "grad_norm": 0.8430723547935486, + "learning_rate": 1.8823371904087563e-08, + "loss": 0.5671, + "step": 15361 + }, + { + "epoch": 0.97, + "grad_norm": 0.8682308197021484, + "learning_rate": 1.8734533264757047e-08, + "loss": 0.5375, + "step": 15362 + }, + { + "epoch": 0.97, + "grad_norm": 0.9168040156364441, + "learning_rate": 1.864590436918612e-08, + "loss": 0.611, + "step": 15363 + }, + { + "epoch": 0.97, + "grad_norm": 0.9281341433525085, + "learning_rate": 1.8557485221107897e-08, + "loss": 0.5621, + "step": 15364 + }, + { + "epoch": 0.97, + "grad_norm": 0.9351321458816528, + "learning_rate": 1.8469275824244958e-08, + "loss": 0.5595, + "step": 15365 + }, + { + "epoch": 0.97, + "grad_norm": 0.9267570376396179, + "learning_rate": 1.8381276182311004e-08, + "loss": 0.5362, + "step": 15366 + }, + { + "epoch": 0.97, + "grad_norm": 0.9369710087776184, + "learning_rate": 1.8293486299011398e-08, + "loss": 0.5719, + "step": 15367 + }, + { + "epoch": 0.97, + "grad_norm": 0.9404371380805969, + "learning_rate": 1.8205906178043186e-08, + "loss": 0.5835, + "step": 15368 + }, + { + "epoch": 0.97, + "grad_norm": 0.8816442489624023, + "learning_rate": 1.811853582309453e-08, + "loss": 0.6092, + "step": 15369 + }, + { + "epoch": 0.97, + "grad_norm": 0.8771417737007141, + "learning_rate": 1.803137523784304e-08, + "loss": 0.5125, + "step": 15370 + }, + { + "epoch": 0.97, + "grad_norm": 0.8539003133773804, + "learning_rate": 1.7944424425959116e-08, + "loss": 0.5784, + "step": 15371 + }, + { + "epoch": 0.97, + "grad_norm": 0.876171886920929, + "learning_rate": 1.7857683391104273e-08, + "loss": 0.5423, + "step": 15372 + }, + { + "epoch": 0.97, + "grad_norm": 0.8787450790405273, + "learning_rate": 1.7771152136931147e-08, + "loss": 0.5901, + "step": 15373 + }, + { + "epoch": 0.97, + "grad_norm": 0.9184008836746216, + "learning_rate": 1.7684830667082377e-08, + "loss": 0.5802, + "step": 15374 + }, + { + "epoch": 0.97, + "grad_norm": 0.887069046497345, + "learning_rate": 1.759871898519394e-08, + "loss": 0.5638, + "step": 15375 + }, + { + "epoch": 0.97, + "grad_norm": 0.8621271848678589, + "learning_rate": 1.7512817094890167e-08, + "loss": 0.5408, + "step": 15376 + }, + { + "epoch": 0.97, + "grad_norm": 0.8775637745857239, + "learning_rate": 1.742712499978927e-08, + "loss": 0.5448, + "step": 15377 + }, + { + "epoch": 0.97, + "grad_norm": 0.9085080027580261, + "learning_rate": 1.734164270349892e-08, + "loss": 0.5373, + "step": 15378 + }, + { + "epoch": 0.97, + "grad_norm": 0.8680553436279297, + "learning_rate": 1.7256370209618458e-08, + "loss": 0.6011, + "step": 15379 + }, + { + "epoch": 0.97, + "grad_norm": 0.9011398553848267, + "learning_rate": 1.71713075217389e-08, + "loss": 0.5927, + "step": 15380 + }, + { + "epoch": 0.97, + "grad_norm": 0.9100791215896606, + "learning_rate": 1.7086454643441273e-08, + "loss": 0.592, + "step": 15381 + }, + { + "epoch": 0.97, + "grad_norm": 0.8486478924751282, + "learning_rate": 1.7001811578298832e-08, + "loss": 0.5193, + "step": 15382 + }, + { + "epoch": 0.97, + "grad_norm": 0.8972152471542358, + "learning_rate": 1.6917378329875946e-08, + "loss": 0.5594, + "step": 15383 + }, + { + "epoch": 0.97, + "grad_norm": 0.8258572220802307, + "learning_rate": 1.6833154901726988e-08, + "loss": 0.5933, + "step": 15384 + }, + { + "epoch": 0.97, + "grad_norm": 0.9576346278190613, + "learning_rate": 1.6749141297398574e-08, + "loss": 0.5544, + "step": 15385 + }, + { + "epoch": 0.97, + "grad_norm": 0.8871638178825378, + "learning_rate": 1.6665337520428427e-08, + "loss": 0.5285, + "step": 15386 + }, + { + "epoch": 0.97, + "grad_norm": 0.8549116253852844, + "learning_rate": 1.658174357434483e-08, + "loss": 0.5918, + "step": 15387 + }, + { + "epoch": 0.97, + "grad_norm": 0.9582047462463379, + "learning_rate": 1.649835946266831e-08, + "loss": 0.6273, + "step": 15388 + }, + { + "epoch": 0.97, + "grad_norm": 0.8410069346427917, + "learning_rate": 1.6415185188909944e-08, + "loss": 0.5405, + "step": 15389 + }, + { + "epoch": 0.98, + "grad_norm": 0.8835951685905457, + "learning_rate": 1.6332220756570815e-08, + "loss": 0.5562, + "step": 15390 + }, + { + "epoch": 0.98, + "grad_norm": 0.8853712677955627, + "learning_rate": 1.6249466169145354e-08, + "loss": 0.6191, + "step": 15391 + }, + { + "epoch": 0.98, + "grad_norm": 0.887049674987793, + "learning_rate": 1.6166921430118e-08, + "loss": 0.5563, + "step": 15392 + }, + { + "epoch": 0.98, + "grad_norm": 0.9243970513343811, + "learning_rate": 1.608458654296319e-08, + "loss": 0.5632, + "step": 15393 + }, + { + "epoch": 0.98, + "grad_norm": 0.9056985378265381, + "learning_rate": 1.600246151114926e-08, + "loss": 0.5555, + "step": 15394 + }, + { + "epoch": 0.98, + "grad_norm": 0.8855133056640625, + "learning_rate": 1.5920546338133447e-08, + "loss": 0.5699, + "step": 15395 + }, + { + "epoch": 0.98, + "grad_norm": 0.8974730968475342, + "learning_rate": 1.5838841027365215e-08, + "loss": 0.5759, + "step": 15396 + }, + { + "epoch": 0.98, + "grad_norm": 0.876063883304596, + "learning_rate": 1.5757345582285144e-08, + "loss": 0.5996, + "step": 15397 + }, + { + "epoch": 0.98, + "grad_norm": 0.8951111435890198, + "learning_rate": 1.5676060006323267e-08, + "loss": 0.6397, + "step": 15398 + }, + { + "epoch": 0.98, + "grad_norm": 0.9184896349906921, + "learning_rate": 1.559498430290407e-08, + "loss": 0.5522, + "step": 15399 + }, + { + "epoch": 0.98, + "grad_norm": 0.9113056063652039, + "learning_rate": 1.5514118475440378e-08, + "loss": 0.5542, + "step": 15400 + }, + { + "epoch": 0.98, + "grad_norm": 0.8028354048728943, + "learning_rate": 1.5433462527337793e-08, + "loss": 0.5046, + "step": 15401 + }, + { + "epoch": 0.98, + "grad_norm": 0.8966811299324036, + "learning_rate": 1.5353016461991387e-08, + "loss": 0.6253, + "step": 15402 + }, + { + "epoch": 0.98, + "grad_norm": 0.8664458394050598, + "learning_rate": 1.5272780282789556e-08, + "loss": 0.6081, + "step": 15403 + }, + { + "epoch": 0.98, + "grad_norm": 0.8263579607009888, + "learning_rate": 1.5192753993110155e-08, + "loss": 0.535, + "step": 15404 + }, + { + "epoch": 0.98, + "grad_norm": 0.908085286617279, + "learning_rate": 1.5112937596323263e-08, + "loss": 0.6077, + "step": 15405 + }, + { + "epoch": 0.98, + "grad_norm": 0.895283579826355, + "learning_rate": 1.5033331095788973e-08, + "loss": 0.5622, + "step": 15406 + }, + { + "epoch": 0.98, + "grad_norm": 0.84937584400177, + "learning_rate": 1.4953934494860155e-08, + "loss": 0.4815, + "step": 15407 + }, + { + "epoch": 0.98, + "grad_norm": 0.8702456951141357, + "learning_rate": 1.4874747796879142e-08, + "loss": 0.5375, + "step": 15408 + }, + { + "epoch": 0.98, + "grad_norm": 0.9346665740013123, + "learning_rate": 1.4795771005181036e-08, + "loss": 0.633, + "step": 15409 + }, + { + "epoch": 0.98, + "grad_norm": 0.8707761764526367, + "learning_rate": 1.4717004123090406e-08, + "loss": 0.5525, + "step": 15410 + }, + { + "epoch": 0.98, + "grad_norm": 0.9167184829711914, + "learning_rate": 1.463844715392404e-08, + "loss": 0.6256, + "step": 15411 + }, + { + "epoch": 0.98, + "grad_norm": 0.8158385157585144, + "learning_rate": 1.4560100100989849e-08, + "loss": 0.514, + "step": 15412 + }, + { + "epoch": 0.98, + "grad_norm": 0.8651106953620911, + "learning_rate": 1.448196296758686e-08, + "loss": 0.5446, + "step": 15413 + }, + { + "epoch": 0.98, + "grad_norm": 0.8702985644340515, + "learning_rate": 1.4404035757005219e-08, + "loss": 0.5671, + "step": 15414 + }, + { + "epoch": 0.98, + "grad_norm": 0.9393275380134583, + "learning_rate": 1.4326318472525635e-08, + "loss": 0.567, + "step": 15415 + }, + { + "epoch": 0.98, + "grad_norm": 0.9015846252441406, + "learning_rate": 1.4248811117421046e-08, + "loss": 0.5923, + "step": 15416 + }, + { + "epoch": 0.98, + "grad_norm": 0.9426272511482239, + "learning_rate": 1.4171513694954953e-08, + "loss": 0.5628, + "step": 15417 + }, + { + "epoch": 0.98, + "grad_norm": 0.9212367534637451, + "learning_rate": 1.4094426208381972e-08, + "loss": 0.6307, + "step": 15418 + }, + { + "epoch": 0.98, + "grad_norm": 0.9438113570213318, + "learning_rate": 1.4017548660947844e-08, + "loss": 0.5934, + "step": 15419 + }, + { + "epoch": 0.98, + "grad_norm": 0.8640725016593933, + "learning_rate": 1.3940881055889976e-08, + "loss": 0.594, + "step": 15420 + }, + { + "epoch": 0.98, + "grad_norm": 0.9187299609184265, + "learning_rate": 1.3864423396436344e-08, + "loss": 0.55, + "step": 15421 + }, + { + "epoch": 0.98, + "grad_norm": 0.8524268865585327, + "learning_rate": 1.3788175685806594e-08, + "loss": 0.5912, + "step": 15422 + }, + { + "epoch": 0.98, + "grad_norm": 0.9235708713531494, + "learning_rate": 1.3712137927210377e-08, + "loss": 0.5946, + "step": 15423 + }, + { + "epoch": 0.98, + "grad_norm": 0.911343514919281, + "learning_rate": 1.3636310123850694e-08, + "loss": 0.6099, + "step": 15424 + }, + { + "epoch": 0.98, + "grad_norm": 0.8266077637672424, + "learning_rate": 1.3560692278919429e-08, + "loss": 0.554, + "step": 15425 + }, + { + "epoch": 0.98, + "grad_norm": 0.8780014514923096, + "learning_rate": 1.3485284395600707e-08, + "loss": 0.5294, + "step": 15426 + }, + { + "epoch": 0.98, + "grad_norm": 0.9280437231063843, + "learning_rate": 1.3410086477069761e-08, + "loss": 0.6171, + "step": 15427 + }, + { + "epoch": 0.98, + "grad_norm": 0.8806030750274658, + "learning_rate": 1.333509852649295e-08, + "loss": 0.551, + "step": 15428 + }, + { + "epoch": 0.98, + "grad_norm": 0.8715260028839111, + "learning_rate": 1.3260320547028305e-08, + "loss": 0.5821, + "step": 15429 + }, + { + "epoch": 0.98, + "grad_norm": 0.8022364377975464, + "learning_rate": 1.3185752541823304e-08, + "loss": 0.5608, + "step": 15430 + }, + { + "epoch": 0.98, + "grad_norm": 0.9580459594726562, + "learning_rate": 1.3111394514018772e-08, + "loss": 0.5416, + "step": 15431 + }, + { + "epoch": 0.98, + "grad_norm": 0.8772706985473633, + "learning_rate": 1.3037246466745535e-08, + "loss": 0.5591, + "step": 15432 + }, + { + "epoch": 0.98, + "grad_norm": 0.8983436822891235, + "learning_rate": 1.2963308403124985e-08, + "loss": 0.5797, + "step": 15433 + }, + { + "epoch": 0.98, + "grad_norm": 0.9700096249580383, + "learning_rate": 1.2889580326271301e-08, + "loss": 0.5816, + "step": 15434 + }, + { + "epoch": 0.98, + "grad_norm": 0.8904829621315002, + "learning_rate": 1.2816062239288107e-08, + "loss": 0.5378, + "step": 15435 + }, + { + "epoch": 0.98, + "grad_norm": 0.9526095390319824, + "learning_rate": 1.2742754145271264e-08, + "loss": 0.5637, + "step": 15436 + }, + { + "epoch": 0.98, + "grad_norm": 0.9104148745536804, + "learning_rate": 1.2669656047308299e-08, + "loss": 0.5773, + "step": 15437 + }, + { + "epoch": 0.98, + "grad_norm": 0.9310768246650696, + "learning_rate": 1.2596767948476196e-08, + "loss": 0.603, + "step": 15438 + }, + { + "epoch": 0.98, + "grad_norm": 0.9144603610038757, + "learning_rate": 1.2524089851844168e-08, + "loss": 0.5422, + "step": 15439 + }, + { + "epoch": 0.98, + "grad_norm": 0.856395959854126, + "learning_rate": 1.2451621760472544e-08, + "loss": 0.5526, + "step": 15440 + }, + { + "epoch": 0.98, + "grad_norm": 0.9461926817893982, + "learning_rate": 1.237936367741277e-08, + "loss": 0.5741, + "step": 15441 + }, + { + "epoch": 0.98, + "grad_norm": 0.9388793706893921, + "learning_rate": 1.2307315605707416e-08, + "loss": 0.5818, + "step": 15442 + }, + { + "epoch": 0.98, + "grad_norm": 0.9289106726646423, + "learning_rate": 1.2235477548390162e-08, + "loss": 0.6053, + "step": 15443 + }, + { + "epoch": 0.98, + "grad_norm": 0.821549654006958, + "learning_rate": 1.2163849508485259e-08, + "loss": 0.5101, + "step": 15444 + }, + { + "epoch": 0.98, + "grad_norm": 0.8986890316009521, + "learning_rate": 1.2092431489009738e-08, + "loss": 0.5866, + "step": 15445 + }, + { + "epoch": 0.98, + "grad_norm": 0.8315547704696655, + "learning_rate": 1.202122349297008e-08, + "loss": 0.5505, + "step": 15446 + }, + { + "epoch": 0.98, + "grad_norm": 0.9459112882614136, + "learning_rate": 1.1950225523365e-08, + "loss": 0.6082, + "step": 15447 + }, + { + "epoch": 0.98, + "grad_norm": 0.8490333557128906, + "learning_rate": 1.1879437583183217e-08, + "loss": 0.5561, + "step": 15448 + }, + { + "epoch": 0.98, + "grad_norm": 0.8695975542068481, + "learning_rate": 1.1808859675406236e-08, + "loss": 0.5987, + "step": 15449 + }, + { + "epoch": 0.98, + "grad_norm": 0.9123320579528809, + "learning_rate": 1.173849180300557e-08, + "loss": 0.5599, + "step": 15450 + }, + { + "epoch": 0.98, + "grad_norm": 0.9430971145629883, + "learning_rate": 1.16683339689444e-08, + "loss": 0.6193, + "step": 15451 + }, + { + "epoch": 0.98, + "grad_norm": 0.871895968914032, + "learning_rate": 1.1598386176175924e-08, + "loss": 0.567, + "step": 15452 + }, + { + "epoch": 0.98, + "grad_norm": 0.852607786655426, + "learning_rate": 1.1528648427646671e-08, + "loss": 0.5368, + "step": 15453 + }, + { + "epoch": 0.98, + "grad_norm": 0.887416422367096, + "learning_rate": 1.1459120726292072e-08, + "loss": 0.5469, + "step": 15454 + }, + { + "epoch": 0.98, + "grad_norm": 0.9012024998664856, + "learning_rate": 1.1389803075039785e-08, + "loss": 0.5667, + "step": 15455 + }, + { + "epoch": 0.98, + "grad_norm": 0.8867619633674622, + "learning_rate": 1.1320695476809141e-08, + "loss": 0.6016, + "step": 15456 + }, + { + "epoch": 0.98, + "grad_norm": 0.896775484085083, + "learning_rate": 1.1251797934509478e-08, + "loss": 0.5845, + "step": 15457 + }, + { + "epoch": 0.98, + "grad_norm": 0.9201370477676392, + "learning_rate": 1.1183110451042368e-08, + "loss": 0.5236, + "step": 15458 + }, + { + "epoch": 0.98, + "grad_norm": 0.877086877822876, + "learning_rate": 1.1114633029299382e-08, + "loss": 0.5334, + "step": 15459 + }, + { + "epoch": 0.98, + "grad_norm": 0.8296651840209961, + "learning_rate": 1.1046365672163772e-08, + "loss": 0.5453, + "step": 15460 + }, + { + "epoch": 0.98, + "grad_norm": 0.8853237628936768, + "learning_rate": 1.0978308382511016e-08, + "loss": 0.5937, + "step": 15461 + }, + { + "epoch": 0.98, + "grad_norm": 0.9216740131378174, + "learning_rate": 1.0910461163206043e-08, + "loss": 0.5611, + "step": 15462 + }, + { + "epoch": 0.98, + "grad_norm": 0.9715553522109985, + "learning_rate": 1.0842824017105458e-08, + "loss": 0.5791, + "step": 15463 + }, + { + "epoch": 0.98, + "grad_norm": 0.8290508985519409, + "learning_rate": 1.0775396947057537e-08, + "loss": 0.5391, + "step": 15464 + }, + { + "epoch": 0.98, + "grad_norm": 0.8305854201316833, + "learning_rate": 1.0708179955901677e-08, + "loss": 0.5389, + "step": 15465 + }, + { + "epoch": 0.98, + "grad_norm": 0.9410961866378784, + "learning_rate": 1.0641173046467833e-08, + "loss": 0.5963, + "step": 15466 + }, + { + "epoch": 0.98, + "grad_norm": 0.9235133528709412, + "learning_rate": 1.0574376221577642e-08, + "loss": 0.5421, + "step": 15467 + }, + { + "epoch": 0.98, + "grad_norm": 0.8532764315605164, + "learning_rate": 1.0507789484043295e-08, + "loss": 0.4974, + "step": 15468 + }, + { + "epoch": 0.98, + "grad_norm": 0.874849796295166, + "learning_rate": 1.0441412836668663e-08, + "loss": 0.5337, + "step": 15469 + }, + { + "epoch": 0.98, + "grad_norm": 0.8603571057319641, + "learning_rate": 1.037524628224873e-08, + "loss": 0.5523, + "step": 15470 + }, + { + "epoch": 0.98, + "grad_norm": 0.871017575263977, + "learning_rate": 1.0309289823569601e-08, + "loss": 0.5627, + "step": 15471 + }, + { + "epoch": 0.98, + "grad_norm": 0.9409274458885193, + "learning_rate": 1.02435434634085e-08, + "loss": 0.5904, + "step": 15472 + }, + { + "epoch": 0.98, + "grad_norm": 1.0156452655792236, + "learning_rate": 1.0178007204533768e-08, + "loss": 0.5892, + "step": 15473 + }, + { + "epoch": 0.98, + "grad_norm": 0.8976706266403198, + "learning_rate": 1.0112681049704865e-08, + "loss": 0.5797, + "step": 15474 + }, + { + "epoch": 0.98, + "grad_norm": 0.8561591506004333, + "learning_rate": 1.004756500167181e-08, + "loss": 0.5468, + "step": 15475 + }, + { + "epoch": 0.98, + "grad_norm": 0.8820354342460632, + "learning_rate": 9.982659063177413e-09, + "loss": 0.57, + "step": 15476 + }, + { + "epoch": 0.98, + "grad_norm": 0.8407560586929321, + "learning_rate": 9.917963236954487e-09, + "loss": 0.5503, + "step": 15477 + }, + { + "epoch": 0.98, + "grad_norm": 0.9282391667366028, + "learning_rate": 9.853477525726962e-09, + "loss": 0.5349, + "step": 15478 + }, + { + "epoch": 0.98, + "grad_norm": 0.8840251564979553, + "learning_rate": 9.789201932209335e-09, + "loss": 0.6081, + "step": 15479 + }, + { + "epoch": 0.98, + "grad_norm": 0.9751169681549072, + "learning_rate": 9.725136459109441e-09, + "loss": 0.6371, + "step": 15480 + }, + { + "epoch": 0.98, + "grad_norm": 0.8782668709754944, + "learning_rate": 9.66128110912401e-09, + "loss": 0.5463, + "step": 15481 + }, + { + "epoch": 0.98, + "grad_norm": 0.7854354381561279, + "learning_rate": 9.597635884941447e-09, + "loss": 0.5376, + "step": 15482 + }, + { + "epoch": 0.98, + "grad_norm": 0.847726047039032, + "learning_rate": 9.534200789242388e-09, + "loss": 0.5804, + "step": 15483 + }, + { + "epoch": 0.98, + "grad_norm": 0.8897963166236877, + "learning_rate": 9.470975824698025e-09, + "loss": 0.6062, + "step": 15484 + }, + { + "epoch": 0.98, + "grad_norm": 0.930458128452301, + "learning_rate": 9.407960993969567e-09, + "loss": 0.6237, + "step": 15485 + }, + { + "epoch": 0.98, + "grad_norm": 0.8038657903671265, + "learning_rate": 9.345156299711e-09, + "loss": 0.5404, + "step": 15486 + }, + { + "epoch": 0.98, + "grad_norm": 0.8747665286064148, + "learning_rate": 9.282561744566321e-09, + "loss": 0.551, + "step": 15487 + }, + { + "epoch": 0.98, + "grad_norm": 0.8687538504600525, + "learning_rate": 9.220177331172309e-09, + "loss": 0.5515, + "step": 15488 + }, + { + "epoch": 0.98, + "grad_norm": 0.8491008877754211, + "learning_rate": 9.158003062154642e-09, + "loss": 0.5227, + "step": 15489 + }, + { + "epoch": 0.98, + "grad_norm": 0.9173932671546936, + "learning_rate": 9.096038940131225e-09, + "loss": 0.6004, + "step": 15490 + }, + { + "epoch": 0.98, + "grad_norm": 0.817330539226532, + "learning_rate": 9.034284967711637e-09, + "loss": 0.5088, + "step": 15491 + }, + { + "epoch": 0.98, + "grad_norm": 0.8443012833595276, + "learning_rate": 8.972741147496023e-09, + "loss": 0.5755, + "step": 15492 + }, + { + "epoch": 0.98, + "grad_norm": 0.804356575012207, + "learning_rate": 8.911407482076196e-09, + "loss": 0.5244, + "step": 15493 + }, + { + "epoch": 0.98, + "grad_norm": 0.865323007106781, + "learning_rate": 8.85028397403398e-09, + "loss": 0.5702, + "step": 15494 + }, + { + "epoch": 0.98, + "grad_norm": 0.9057186245918274, + "learning_rate": 8.789370625943427e-09, + "loss": 0.5933, + "step": 15495 + }, + { + "epoch": 0.98, + "grad_norm": 0.9095432162284851, + "learning_rate": 8.728667440369153e-09, + "loss": 0.5645, + "step": 15496 + }, + { + "epoch": 0.98, + "grad_norm": 0.8884914517402649, + "learning_rate": 8.668174419867449e-09, + "loss": 0.6086, + "step": 15497 + }, + { + "epoch": 0.98, + "grad_norm": 0.9319071173667908, + "learning_rate": 8.60789156698516e-09, + "loss": 0.5349, + "step": 15498 + }, + { + "epoch": 0.98, + "grad_norm": 0.9238869547843933, + "learning_rate": 8.547818884260816e-09, + "loss": 0.5856, + "step": 15499 + }, + { + "epoch": 0.98, + "grad_norm": 0.8431046009063721, + "learning_rate": 8.48795637422406e-09, + "loss": 0.5275, + "step": 15500 + }, + { + "epoch": 0.98, + "grad_norm": 0.884147584438324, + "learning_rate": 8.428304039395096e-09, + "loss": 0.5912, + "step": 15501 + }, + { + "epoch": 0.98, + "grad_norm": 0.8942022919654846, + "learning_rate": 8.368861882285806e-09, + "loss": 0.5818, + "step": 15502 + }, + { + "epoch": 0.98, + "grad_norm": 0.8837722539901733, + "learning_rate": 8.309629905399186e-09, + "loss": 0.5234, + "step": 15503 + }, + { + "epoch": 0.98, + "grad_norm": 0.8658926486968994, + "learning_rate": 8.250608111229352e-09, + "loss": 0.599, + "step": 15504 + }, + { + "epoch": 0.98, + "grad_norm": 0.9732296466827393, + "learning_rate": 8.191796502260985e-09, + "loss": 0.6188, + "step": 15505 + }, + { + "epoch": 0.98, + "grad_norm": 0.9690650701522827, + "learning_rate": 8.13319508097099e-09, + "loss": 0.6361, + "step": 15506 + }, + { + "epoch": 0.98, + "grad_norm": 0.9391032457351685, + "learning_rate": 8.074803849827395e-09, + "loss": 0.5799, + "step": 15507 + }, + { + "epoch": 0.98, + "grad_norm": 0.8532130122184753, + "learning_rate": 8.016622811287123e-09, + "loss": 0.5294, + "step": 15508 + }, + { + "epoch": 0.98, + "grad_norm": 0.8348953127861023, + "learning_rate": 7.958651967801545e-09, + "loss": 0.5303, + "step": 15509 + }, + { + "epoch": 0.98, + "grad_norm": 0.865597128868103, + "learning_rate": 7.900891321810932e-09, + "loss": 0.5601, + "step": 15510 + }, + { + "epoch": 0.98, + "grad_norm": 0.9278403520584106, + "learning_rate": 7.843340875747785e-09, + "loss": 0.6282, + "step": 15511 + }, + { + "epoch": 0.98, + "grad_norm": 0.8793516159057617, + "learning_rate": 7.786000632035163e-09, + "loss": 0.5598, + "step": 15512 + }, + { + "epoch": 0.98, + "grad_norm": 0.8610735535621643, + "learning_rate": 7.728870593087246e-09, + "loss": 0.5739, + "step": 15513 + }, + { + "epoch": 0.98, + "grad_norm": 0.9375894665718079, + "learning_rate": 7.671950761309333e-09, + "loss": 0.5678, + "step": 15514 + }, + { + "epoch": 0.98, + "grad_norm": 0.8614829182624817, + "learning_rate": 7.61524113909895e-09, + "loss": 0.5117, + "step": 15515 + }, + { + "epoch": 0.98, + "grad_norm": 0.8722830414772034, + "learning_rate": 7.558741728843633e-09, + "loss": 0.5368, + "step": 15516 + }, + { + "epoch": 0.98, + "grad_norm": 0.9089422821998596, + "learning_rate": 7.502452532922033e-09, + "loss": 0.5445, + "step": 15517 + }, + { + "epoch": 0.98, + "grad_norm": 0.9493569135665894, + "learning_rate": 7.446373553705033e-09, + "loss": 0.601, + "step": 15518 + }, + { + "epoch": 0.98, + "grad_norm": 0.8956559896469116, + "learning_rate": 7.390504793552966e-09, + "loss": 0.6217, + "step": 15519 + }, + { + "epoch": 0.98, + "grad_norm": 0.8692125082015991, + "learning_rate": 7.3348462548183955e-09, + "loss": 0.587, + "step": 15520 + }, + { + "epoch": 0.98, + "grad_norm": 0.9651332497596741, + "learning_rate": 7.279397939845556e-09, + "loss": 0.5755, + "step": 15521 + }, + { + "epoch": 0.98, + "grad_norm": 0.9648064374923706, + "learning_rate": 7.2241598509686926e-09, + "loss": 0.5921, + "step": 15522 + }, + { + "epoch": 0.98, + "grad_norm": 0.8698114156723022, + "learning_rate": 7.169131990514278e-09, + "loss": 0.6096, + "step": 15523 + }, + { + "epoch": 0.98, + "grad_norm": 0.8747323751449585, + "learning_rate": 7.114314360798791e-09, + "loss": 0.6015, + "step": 15524 + }, + { + "epoch": 0.98, + "grad_norm": 0.8900646567344666, + "learning_rate": 7.0597069641303865e-09, + "loss": 0.5879, + "step": 15525 + }, + { + "epoch": 0.98, + "grad_norm": 0.9312129020690918, + "learning_rate": 7.00530980280889e-09, + "loss": 0.5577, + "step": 15526 + }, + { + "epoch": 0.98, + "grad_norm": 0.8599275946617126, + "learning_rate": 6.951122879124139e-09, + "loss": 0.541, + "step": 15527 + }, + { + "epoch": 0.98, + "grad_norm": 0.8398301601409912, + "learning_rate": 6.89714619535764e-09, + "loss": 0.5422, + "step": 15528 + }, + { + "epoch": 0.98, + "grad_norm": 0.8573556542396545, + "learning_rate": 6.84337975378313e-09, + "loss": 0.5242, + "step": 15529 + }, + { + "epoch": 0.98, + "grad_norm": 0.8811535239219666, + "learning_rate": 6.789823556663799e-09, + "loss": 0.571, + "step": 15530 + }, + { + "epoch": 0.98, + "grad_norm": 0.8871817588806152, + "learning_rate": 6.736477606255065e-09, + "loss": 0.5309, + "step": 15531 + }, + { + "epoch": 0.98, + "grad_norm": 0.8854000568389893, + "learning_rate": 6.683341904802909e-09, + "loss": 0.5643, + "step": 15532 + }, + { + "epoch": 0.98, + "grad_norm": 0.9325771927833557, + "learning_rate": 6.63041645454443e-09, + "loss": 0.5453, + "step": 15533 + }, + { + "epoch": 0.98, + "grad_norm": 0.9529849290847778, + "learning_rate": 6.577701257708957e-09, + "loss": 0.6482, + "step": 15534 + }, + { + "epoch": 0.98, + "grad_norm": 0.8886032700538635, + "learning_rate": 6.52519631651527e-09, + "loss": 0.5402, + "step": 15535 + }, + { + "epoch": 0.98, + "grad_norm": 0.933830201625824, + "learning_rate": 6.4729016331749325e-09, + "loss": 0.5783, + "step": 15536 + }, + { + "epoch": 0.98, + "grad_norm": 0.8807794451713562, + "learning_rate": 6.420817209888963e-09, + "loss": 0.5386, + "step": 15537 + }, + { + "epoch": 0.98, + "grad_norm": 0.8794984221458435, + "learning_rate": 6.368943048851162e-09, + "loss": 0.5568, + "step": 15538 + }, + { + "epoch": 0.98, + "grad_norm": 0.9522714018821716, + "learning_rate": 6.317279152245892e-09, + "loss": 0.555, + "step": 15539 + }, + { + "epoch": 0.98, + "grad_norm": 0.9179518222808838, + "learning_rate": 6.265825522248082e-09, + "loss": 0.5825, + "step": 15540 + }, + { + "epoch": 0.98, + "grad_norm": 0.8840945959091187, + "learning_rate": 6.2145821610243296e-09, + "loss": 0.5798, + "step": 15541 + }, + { + "epoch": 0.98, + "grad_norm": 0.9103140830993652, + "learning_rate": 6.163549070732356e-09, + "loss": 0.6138, + "step": 15542 + }, + { + "epoch": 0.98, + "grad_norm": 0.9227690696716309, + "learning_rate": 6.1127262535209955e-09, + "loss": 0.6251, + "step": 15543 + }, + { + "epoch": 0.98, + "grad_norm": 0.9462999105453491, + "learning_rate": 6.062113711530204e-09, + "loss": 0.5747, + "step": 15544 + }, + { + "epoch": 0.98, + "grad_norm": 0.9513913989067078, + "learning_rate": 6.01171144689161e-09, + "loss": 0.5616, + "step": 15545 + }, + { + "epoch": 0.98, + "grad_norm": 0.9170199036598206, + "learning_rate": 5.96151946172685e-09, + "loss": 0.6263, + "step": 15546 + }, + { + "epoch": 0.98, + "grad_norm": 0.8427810668945312, + "learning_rate": 5.911537758149233e-09, + "loss": 0.538, + "step": 15547 + }, + { + "epoch": 0.99, + "grad_norm": 0.8685246109962463, + "learning_rate": 5.861766338263741e-09, + "loss": 0.5725, + "step": 15548 + }, + { + "epoch": 0.99, + "grad_norm": 0.9405071139335632, + "learning_rate": 5.812205204165922e-09, + "loss": 0.5458, + "step": 15549 + }, + { + "epoch": 0.99, + "grad_norm": 0.933382511138916, + "learning_rate": 5.762854357942993e-09, + "loss": 0.6187, + "step": 15550 + }, + { + "epoch": 0.99, + "grad_norm": 0.8507725596427917, + "learning_rate": 5.7137138016721825e-09, + "loss": 0.529, + "step": 15551 + }, + { + "epoch": 0.99, + "grad_norm": 0.827217698097229, + "learning_rate": 5.6647835374229465e-09, + "loss": 0.5867, + "step": 15552 + }, + { + "epoch": 0.99, + "grad_norm": 0.8920066356658936, + "learning_rate": 5.616063567255859e-09, + "loss": 0.6224, + "step": 15553 + }, + { + "epoch": 0.99, + "grad_norm": 0.9211912751197815, + "learning_rate": 5.5675538932220555e-09, + "loss": 0.6406, + "step": 15554 + }, + { + "epoch": 0.99, + "grad_norm": 0.8937119245529175, + "learning_rate": 5.519254517364347e-09, + "loss": 0.526, + "step": 15555 + }, + { + "epoch": 0.99, + "grad_norm": 0.9318338632583618, + "learning_rate": 5.471165441716108e-09, + "loss": 0.5555, + "step": 15556 + }, + { + "epoch": 0.99, + "grad_norm": 0.8785502910614014, + "learning_rate": 5.4232866683023856e-09, + "loss": 0.5905, + "step": 15557 + }, + { + "epoch": 0.99, + "grad_norm": 0.870749831199646, + "learning_rate": 5.375618199139343e-09, + "loss": 0.539, + "step": 15558 + }, + { + "epoch": 0.99, + "grad_norm": 0.923859179019928, + "learning_rate": 5.328160036234264e-09, + "loss": 0.6001, + "step": 15559 + }, + { + "epoch": 0.99, + "grad_norm": 0.9950880408287048, + "learning_rate": 5.280912181584441e-09, + "loss": 0.6232, + "step": 15560 + }, + { + "epoch": 0.99, + "grad_norm": 0.887122392654419, + "learning_rate": 5.233874637180503e-09, + "loss": 0.6034, + "step": 15561 + }, + { + "epoch": 0.99, + "grad_norm": 0.8819499015808105, + "learning_rate": 5.1870474050025325e-09, + "loss": 0.5857, + "step": 15562 + }, + { + "epoch": 0.99, + "grad_norm": 0.859145998954773, + "learning_rate": 5.1404304870222856e-09, + "loss": 0.5484, + "step": 15563 + }, + { + "epoch": 0.99, + "grad_norm": 0.8710299134254456, + "learning_rate": 5.094023885203192e-09, + "loss": 0.5355, + "step": 15564 + }, + { + "epoch": 0.99, + "grad_norm": 0.8982166647911072, + "learning_rate": 5.0478276014981345e-09, + "loss": 0.6073, + "step": 15565 + }, + { + "epoch": 0.99, + "grad_norm": 0.981425404548645, + "learning_rate": 5.001841637852778e-09, + "loss": 0.5632, + "step": 15566 + }, + { + "epoch": 0.99, + "grad_norm": 0.865263044834137, + "learning_rate": 4.956065996203907e-09, + "loss": 0.5613, + "step": 15567 + }, + { + "epoch": 0.99, + "grad_norm": 0.9212016463279724, + "learning_rate": 4.910500678478314e-09, + "loss": 0.5628, + "step": 15568 + }, + { + "epoch": 0.99, + "grad_norm": 0.9121674299240112, + "learning_rate": 4.865145686595019e-09, + "loss": 0.5908, + "step": 15569 + }, + { + "epoch": 0.99, + "grad_norm": 0.8445576429367065, + "learning_rate": 4.820001022463605e-09, + "loss": 0.5616, + "step": 15570 + }, + { + "epoch": 0.99, + "grad_norm": 0.9339314699172974, + "learning_rate": 4.77506668798533e-09, + "loss": 0.5892, + "step": 15571 + }, + { + "epoch": 0.99, + "grad_norm": 0.8746134638786316, + "learning_rate": 4.730342685051459e-09, + "loss": 0.574, + "step": 15572 + }, + { + "epoch": 0.99, + "grad_norm": 0.9173433780670166, + "learning_rate": 4.685829015545485e-09, + "loss": 0.5736, + "step": 15573 + }, + { + "epoch": 0.99, + "grad_norm": 0.9090994596481323, + "learning_rate": 4.641525681342019e-09, + "loss": 0.5846, + "step": 15574 + }, + { + "epoch": 0.99, + "grad_norm": 0.9413917064666748, + "learning_rate": 4.597432684306236e-09, + "loss": 0.6241, + "step": 15575 + }, + { + "epoch": 0.99, + "grad_norm": 0.9874243140220642, + "learning_rate": 4.553550026294984e-09, + "loss": 0.6424, + "step": 15576 + }, + { + "epoch": 0.99, + "grad_norm": 0.8794564604759216, + "learning_rate": 4.5098777091556745e-09, + "loss": 0.5624, + "step": 15577 + }, + { + "epoch": 0.99, + "grad_norm": 0.8835657238960266, + "learning_rate": 4.4664157347273916e-09, + "loss": 0.5964, + "step": 15578 + }, + { + "epoch": 0.99, + "grad_norm": 0.8630079627037048, + "learning_rate": 4.423164104840339e-09, + "loss": 0.5759, + "step": 15579 + }, + { + "epoch": 0.99, + "grad_norm": 0.8491309285163879, + "learning_rate": 4.38012282131528e-09, + "loss": 0.539, + "step": 15580 + }, + { + "epoch": 0.99, + "grad_norm": 0.8870819807052612, + "learning_rate": 4.3372918859652115e-09, + "loss": 0.5754, + "step": 15581 + }, + { + "epoch": 0.99, + "grad_norm": 0.9265652894973755, + "learning_rate": 4.294671300592579e-09, + "loss": 0.5735, + "step": 15582 + }, + { + "epoch": 0.99, + "grad_norm": 0.8699434995651245, + "learning_rate": 4.252261066993169e-09, + "loss": 0.5775, + "step": 15583 + }, + { + "epoch": 0.99, + "grad_norm": 0.9221080541610718, + "learning_rate": 4.210061186951664e-09, + "loss": 0.6277, + "step": 15584 + }, + { + "epoch": 0.99, + "grad_norm": 0.9161962866783142, + "learning_rate": 4.168071662245532e-09, + "loss": 0.5884, + "step": 15585 + }, + { + "epoch": 0.99, + "grad_norm": 0.9073721170425415, + "learning_rate": 4.1262924946422476e-09, + "loss": 0.5669, + "step": 15586 + }, + { + "epoch": 0.99, + "grad_norm": 0.8920649290084839, + "learning_rate": 4.084723685901515e-09, + "loss": 0.608, + "step": 15587 + }, + { + "epoch": 0.99, + "grad_norm": 0.8832874298095703, + "learning_rate": 4.043365237774155e-09, + "loss": 0.5981, + "step": 15588 + }, + { + "epoch": 0.99, + "grad_norm": 0.9273045063018799, + "learning_rate": 4.002217152000443e-09, + "loss": 0.574, + "step": 15589 + }, + { + "epoch": 0.99, + "grad_norm": 0.8001242280006409, + "learning_rate": 3.961279430313991e-09, + "loss": 0.465, + "step": 15590 + }, + { + "epoch": 0.99, + "grad_norm": 0.900482714176178, + "learning_rate": 3.920552074437867e-09, + "loss": 0.5724, + "step": 15591 + }, + { + "epoch": 0.99, + "grad_norm": 0.9371671080589294, + "learning_rate": 3.880035086086808e-09, + "loss": 0.5378, + "step": 15592 + }, + { + "epoch": 0.99, + "grad_norm": 0.8328535556793213, + "learning_rate": 3.839728466967785e-09, + "loss": 0.5461, + "step": 15593 + }, + { + "epoch": 0.99, + "grad_norm": 0.9578930139541626, + "learning_rate": 3.799632218777216e-09, + "loss": 0.5931, + "step": 15594 + }, + { + "epoch": 0.99, + "grad_norm": 0.9057374596595764, + "learning_rate": 3.759746343203751e-09, + "loss": 0.5542, + "step": 15595 + }, + { + "epoch": 0.99, + "grad_norm": 0.9067438840866089, + "learning_rate": 3.720070841926604e-09, + "loss": 0.5555, + "step": 15596 + }, + { + "epoch": 0.99, + "grad_norm": 0.8708641529083252, + "learning_rate": 3.6806057166166585e-09, + "loss": 0.5649, + "step": 15597 + }, + { + "epoch": 0.99, + "grad_norm": 0.857673168182373, + "learning_rate": 3.6413509689353644e-09, + "loss": 0.5904, + "step": 15598 + }, + { + "epoch": 0.99, + "grad_norm": 0.9187641739845276, + "learning_rate": 3.602306600535843e-09, + "loss": 0.573, + "step": 15599 + }, + { + "epoch": 0.99, + "grad_norm": 0.9030482172966003, + "learning_rate": 3.5634726130617802e-09, + "loss": 0.6042, + "step": 15600 + }, + { + "epoch": 0.99, + "grad_norm": 0.814935028553009, + "learning_rate": 3.5248490081485343e-09, + "loss": 0.5411, + "step": 15601 + }, + { + "epoch": 0.99, + "grad_norm": 0.8016582131385803, + "learning_rate": 3.486435787422582e-09, + "loss": 0.5426, + "step": 15602 + }, + { + "epoch": 0.99, + "grad_norm": 0.8617429733276367, + "learning_rate": 3.4482329525009627e-09, + "loss": 0.5523, + "step": 15603 + }, + { + "epoch": 0.99, + "grad_norm": 0.8524816632270813, + "learning_rate": 3.4102405049929455e-09, + "loss": 0.6124, + "step": 15604 + }, + { + "epoch": 0.99, + "grad_norm": 0.8613601922988892, + "learning_rate": 3.372458446497251e-09, + "loss": 0.5662, + "step": 15605 + }, + { + "epoch": 0.99, + "grad_norm": 0.9535294771194458, + "learning_rate": 3.3348867786059393e-09, + "loss": 0.5969, + "step": 15606 + }, + { + "epoch": 0.99, + "grad_norm": 0.8776538372039795, + "learning_rate": 3.2975255028999675e-09, + "loss": 0.5582, + "step": 15607 + }, + { + "epoch": 0.99, + "grad_norm": 0.9301447868347168, + "learning_rate": 3.2603746209530774e-09, + "loss": 0.5439, + "step": 15608 + }, + { + "epoch": 0.99, + "grad_norm": 0.8789377212524414, + "learning_rate": 3.223434134329573e-09, + "loss": 0.5597, + "step": 15609 + }, + { + "epoch": 0.99, + "grad_norm": 0.9617857336997986, + "learning_rate": 3.1867040445848764e-09, + "loss": 0.5971, + "step": 15610 + }, + { + "epoch": 0.99, + "grad_norm": 0.9199445843696594, + "learning_rate": 3.1501843532649734e-09, + "loss": 0.51, + "step": 15611 + }, + { + "epoch": 0.99, + "grad_norm": 0.9115186929702759, + "learning_rate": 3.113875061908078e-09, + "loss": 0.5553, + "step": 15612 + }, + { + "epoch": 0.99, + "grad_norm": 0.9086104035377502, + "learning_rate": 3.077776172043523e-09, + "loss": 0.5724, + "step": 15613 + }, + { + "epoch": 0.99, + "grad_norm": 0.9141691327095032, + "learning_rate": 3.0418876851900924e-09, + "loss": 0.5002, + "step": 15614 + }, + { + "epoch": 0.99, + "grad_norm": 0.8640308380126953, + "learning_rate": 3.0062096028599108e-09, + "loss": 0.511, + "step": 15615 + }, + { + "epoch": 0.99, + "grad_norm": 0.8536925911903381, + "learning_rate": 2.9707419265551097e-09, + "loss": 0.5577, + "step": 15616 + }, + { + "epoch": 0.99, + "grad_norm": 0.9029596447944641, + "learning_rate": 2.9354846577689387e-09, + "loss": 0.6127, + "step": 15617 + }, + { + "epoch": 0.99, + "grad_norm": 0.877974271774292, + "learning_rate": 2.900437797986322e-09, + "loss": 0.5218, + "step": 15618 + }, + { + "epoch": 0.99, + "grad_norm": 0.8660022616386414, + "learning_rate": 2.8656013486821897e-09, + "loss": 0.58, + "step": 15619 + }, + { + "epoch": 0.99, + "grad_norm": 0.8560828566551208, + "learning_rate": 2.8309753113237025e-09, + "loss": 0.5314, + "step": 15620 + }, + { + "epoch": 0.99, + "grad_norm": 0.8698206543922424, + "learning_rate": 2.796559687369138e-09, + "loss": 0.5735, + "step": 15621 + }, + { + "epoch": 0.99, + "grad_norm": 0.8277180790901184, + "learning_rate": 2.7623544782673372e-09, + "loss": 0.5116, + "step": 15622 + }, + { + "epoch": 0.99, + "grad_norm": 0.8713210225105286, + "learning_rate": 2.7283596854588148e-09, + "loss": 0.583, + "step": 15623 + }, + { + "epoch": 0.99, + "grad_norm": 0.931766927242279, + "learning_rate": 2.6945753103746475e-09, + "loss": 0.5452, + "step": 15624 + }, + { + "epoch": 0.99, + "grad_norm": 0.9144072532653809, + "learning_rate": 2.661001354437587e-09, + "loss": 0.5662, + "step": 15625 + }, + { + "epoch": 0.99, + "grad_norm": 0.9093576073646545, + "learning_rate": 2.6276378190615016e-09, + "loss": 0.56, + "step": 15626 + }, + { + "epoch": 0.99, + "grad_norm": 0.9563875198364258, + "learning_rate": 2.5944847056508237e-09, + "loss": 0.5346, + "step": 15627 + }, + { + "epoch": 0.99, + "grad_norm": 0.9016739726066589, + "learning_rate": 2.561542015601659e-09, + "loss": 0.6267, + "step": 15628 + }, + { + "epoch": 0.99, + "grad_norm": 0.9251307249069214, + "learning_rate": 2.528809750301231e-09, + "loss": 0.5576, + "step": 15629 + }, + { + "epoch": 0.99, + "grad_norm": 0.9503340721130371, + "learning_rate": 2.4962879111278813e-09, + "loss": 0.5327, + "step": 15630 + }, + { + "epoch": 0.99, + "grad_norm": 0.8992822766304016, + "learning_rate": 2.4639764994505156e-09, + "loss": 0.5629, + "step": 15631 + }, + { + "epoch": 0.99, + "grad_norm": 0.8664157390594482, + "learning_rate": 2.4318755166302668e-09, + "loss": 0.5529, + "step": 15632 + }, + { + "epoch": 0.99, + "grad_norm": 0.837928831577301, + "learning_rate": 2.399984964018276e-09, + "loss": 0.5679, + "step": 15633 + }, + { + "epoch": 0.99, + "grad_norm": 0.8134336471557617, + "learning_rate": 2.3683048429573587e-09, + "loss": 0.5256, + "step": 15634 + }, + { + "epoch": 0.99, + "grad_norm": 0.9103096127510071, + "learning_rate": 2.3368351547820023e-09, + "loss": 0.58, + "step": 15635 + }, + { + "epoch": 0.99, + "grad_norm": 0.8657124638557434, + "learning_rate": 2.3055759008167033e-09, + "loss": 0.5196, + "step": 15636 + }, + { + "epoch": 0.99, + "grad_norm": 0.8632974028587341, + "learning_rate": 2.2745270823776312e-09, + "loss": 0.5559, + "step": 15637 + }, + { + "epoch": 0.99, + "grad_norm": 0.9439393877983093, + "learning_rate": 2.243688700772628e-09, + "loss": 0.6166, + "step": 15638 + }, + { + "epoch": 0.99, + "grad_norm": 0.8731262683868408, + "learning_rate": 2.2130607573001006e-09, + "loss": 0.6254, + "step": 15639 + }, + { + "epoch": 0.99, + "grad_norm": 0.867850124835968, + "learning_rate": 2.1826432532495724e-09, + "loss": 0.5505, + "step": 15640 + }, + { + "epoch": 0.99, + "grad_norm": 0.9074274897575378, + "learning_rate": 2.1524361899016853e-09, + "loss": 0.581, + "step": 15641 + }, + { + "epoch": 0.99, + "grad_norm": 0.8568212389945984, + "learning_rate": 2.1224395685282008e-09, + "loss": 0.6037, + "step": 15642 + }, + { + "epoch": 0.99, + "grad_norm": 0.8557693362236023, + "learning_rate": 2.0926533903925516e-09, + "loss": 0.5362, + "step": 15643 + }, + { + "epoch": 0.99, + "grad_norm": 0.8828796744346619, + "learning_rate": 2.0630776567492904e-09, + "loss": 0.573, + "step": 15644 + }, + { + "epoch": 0.99, + "grad_norm": 0.8889597058296204, + "learning_rate": 2.033712368842977e-09, + "loss": 0.5788, + "step": 15645 + }, + { + "epoch": 0.99, + "grad_norm": 0.8992346525192261, + "learning_rate": 2.004557527909845e-09, + "loss": 0.5368, + "step": 15646 + }, + { + "epoch": 0.99, + "grad_norm": 0.8839378356933594, + "learning_rate": 1.975613135178911e-09, + "loss": 0.5774, + "step": 15647 + }, + { + "epoch": 0.99, + "grad_norm": 0.9571453928947449, + "learning_rate": 1.9468791918675345e-09, + "loss": 0.5711, + "step": 15648 + }, + { + "epoch": 0.99, + "grad_norm": 0.8286476731300354, + "learning_rate": 1.918355699186414e-09, + "loss": 0.5746, + "step": 15649 + }, + { + "epoch": 0.99, + "grad_norm": 0.8622028827667236, + "learning_rate": 1.8900426583357003e-09, + "loss": 0.4927, + "step": 15650 + }, + { + "epoch": 0.99, + "grad_norm": 0.818505048751831, + "learning_rate": 1.861940070508883e-09, + "loss": 0.5347, + "step": 15651 + }, + { + "epoch": 0.99, + "grad_norm": 0.9037706851959229, + "learning_rate": 1.8340479368883502e-09, + "loss": 0.5652, + "step": 15652 + }, + { + "epoch": 0.99, + "grad_norm": 0.8619953989982605, + "learning_rate": 1.8063662586481622e-09, + "loss": 0.5376, + "step": 15653 + }, + { + "epoch": 0.99, + "grad_norm": 0.8764271140098572, + "learning_rate": 1.7788950369551638e-09, + "loss": 0.544, + "step": 15654 + }, + { + "epoch": 0.99, + "grad_norm": 0.9021615386009216, + "learning_rate": 1.751634272964542e-09, + "loss": 0.5352, + "step": 15655 + }, + { + "epoch": 0.99, + "grad_norm": 0.9306265115737915, + "learning_rate": 1.7245839678259323e-09, + "loss": 0.549, + "step": 15656 + }, + { + "epoch": 0.99, + "grad_norm": 0.9229673743247986, + "learning_rate": 1.6977441226767589e-09, + "loss": 0.6132, + "step": 15657 + }, + { + "epoch": 0.99, + "grad_norm": 0.8322945237159729, + "learning_rate": 1.6711147386477833e-09, + "loss": 0.4952, + "step": 15658 + }, + { + "epoch": 0.99, + "grad_norm": 0.9024680852890015, + "learning_rate": 1.644695816860331e-09, + "loss": 0.5401, + "step": 15659 + }, + { + "epoch": 0.99, + "grad_norm": 0.86787348985672, + "learning_rate": 1.6184873584268457e-09, + "loss": 0.6113, + "step": 15660 + }, + { + "epoch": 0.99, + "grad_norm": 0.9406611323356628, + "learning_rate": 1.5924893644503336e-09, + "loss": 0.6352, + "step": 15661 + }, + { + "epoch": 0.99, + "grad_norm": 0.9413818120956421, + "learning_rate": 1.566701836026585e-09, + "loss": 0.5955, + "step": 15662 + }, + { + "epoch": 0.99, + "grad_norm": 0.8670917749404907, + "learning_rate": 1.541124774240288e-09, + "loss": 0.5796, + "step": 15663 + }, + { + "epoch": 0.99, + "grad_norm": 0.850266695022583, + "learning_rate": 1.5157581801689137e-09, + "loss": 0.5393, + "step": 15664 + }, + { + "epoch": 0.99, + "grad_norm": 0.9164510369300842, + "learning_rate": 1.4906020548804968e-09, + "loss": 0.5778, + "step": 15665 + }, + { + "epoch": 0.99, + "grad_norm": 0.8657138347625732, + "learning_rate": 1.4656563994341898e-09, + "loss": 0.5405, + "step": 15666 + }, + { + "epoch": 0.99, + "grad_norm": 0.925816535949707, + "learning_rate": 1.4409212148802643e-09, + "loss": 0.6178, + "step": 15667 + }, + { + "epoch": 0.99, + "grad_norm": 0.8860337138175964, + "learning_rate": 1.416396502260664e-09, + "loss": 0.5587, + "step": 15668 + }, + { + "epoch": 0.99, + "grad_norm": 0.9135521054267883, + "learning_rate": 1.3920822626078967e-09, + "loss": 0.5695, + "step": 15669 + }, + { + "epoch": 0.99, + "grad_norm": 0.8713152408599854, + "learning_rate": 1.3679784969461429e-09, + "loss": 0.601, + "step": 15670 + }, + { + "epoch": 0.99, + "grad_norm": 0.8884409666061401, + "learning_rate": 1.3440852062890364e-09, + "loss": 0.5773, + "step": 15671 + }, + { + "epoch": 0.99, + "grad_norm": 0.8426517248153687, + "learning_rate": 1.3204023916435494e-09, + "loss": 0.5539, + "step": 15672 + }, + { + "epoch": 0.99, + "grad_norm": 0.9166735410690308, + "learning_rate": 1.2969300540072171e-09, + "loss": 0.5526, + "step": 15673 + }, + { + "epoch": 0.99, + "grad_norm": 0.8987283706665039, + "learning_rate": 1.2736681943675833e-09, + "loss": 0.562, + "step": 15674 + }, + { + "epoch": 0.99, + "grad_norm": 0.8762261867523193, + "learning_rate": 1.2506168137049747e-09, + "loss": 0.5946, + "step": 15675 + }, + { + "epoch": 0.99, + "grad_norm": 0.8147190809249878, + "learning_rate": 1.2277759129886158e-09, + "loss": 0.5149, + "step": 15676 + }, + { + "epoch": 0.99, + "grad_norm": 0.8669753670692444, + "learning_rate": 1.2051454931816254e-09, + "loss": 0.544, + "step": 15677 + }, + { + "epoch": 0.99, + "grad_norm": 0.9210073351860046, + "learning_rate": 1.1827255552365745e-09, + "loss": 0.5321, + "step": 15678 + }, + { + "epoch": 0.99, + "grad_norm": 0.9330329895019531, + "learning_rate": 1.1605161000971532e-09, + "loss": 0.5983, + "step": 15679 + }, + { + "epoch": 0.99, + "grad_norm": 0.8832536935806274, + "learning_rate": 1.1385171286992791e-09, + "loss": 0.5729, + "step": 15680 + }, + { + "epoch": 0.99, + "grad_norm": 0.8871389627456665, + "learning_rate": 1.116728641967768e-09, + "loss": 0.6509, + "step": 15681 + }, + { + "epoch": 0.99, + "grad_norm": 0.9270039200782776, + "learning_rate": 1.0951506408213298e-09, + "loss": 0.588, + "step": 15682 + }, + { + "epoch": 0.99, + "grad_norm": 0.8575807213783264, + "learning_rate": 1.0737831261686815e-09, + "loss": 0.6011, + "step": 15683 + }, + { + "epoch": 0.99, + "grad_norm": 0.8653765320777893, + "learning_rate": 1.052626098907994e-09, + "loss": 0.546, + "step": 15684 + }, + { + "epoch": 0.99, + "grad_norm": 0.9024002552032471, + "learning_rate": 1.0316795599318862e-09, + "loss": 0.5587, + "step": 15685 + }, + { + "epoch": 0.99, + "grad_norm": 0.9174841046333313, + "learning_rate": 1.0109435101218757e-09, + "loss": 0.5712, + "step": 15686 + }, + { + "epoch": 0.99, + "grad_norm": 0.9049075841903687, + "learning_rate": 9.90417950350042e-10, + "loss": 0.5595, + "step": 15687 + }, + { + "epoch": 0.99, + "grad_norm": 0.873509407043457, + "learning_rate": 9.701028814818047e-10, + "loss": 0.5713, + "step": 15688 + }, + { + "epoch": 0.99, + "grad_norm": 0.9006879329681396, + "learning_rate": 9.499983043720351e-10, + "loss": 0.5857, + "step": 15689 + }, + { + "epoch": 0.99, + "grad_norm": 0.9213382601737976, + "learning_rate": 9.301042198678334e-10, + "loss": 0.5795, + "step": 15690 + }, + { + "epoch": 0.99, + "grad_norm": 0.9829197525978088, + "learning_rate": 9.104206288057527e-10, + "loss": 0.5673, + "step": 15691 + }, + { + "epoch": 0.99, + "grad_norm": 0.8557707071304321, + "learning_rate": 8.909475320156846e-10, + "loss": 0.5127, + "step": 15692 + }, + { + "epoch": 0.99, + "grad_norm": 0.9053747057914734, + "learning_rate": 8.71684930317529e-10, + "loss": 0.5696, + "step": 15693 + }, + { + "epoch": 0.99, + "grad_norm": 0.8902594447135925, + "learning_rate": 8.526328245217485e-10, + "loss": 0.5673, + "step": 15694 + }, + { + "epoch": 0.99, + "grad_norm": 0.86614990234375, + "learning_rate": 8.337912154304795e-10, + "loss": 0.5927, + "step": 15695 + }, + { + "epoch": 0.99, + "grad_norm": 0.8675297498703003, + "learning_rate": 8.151601038375312e-10, + "loss": 0.5788, + "step": 15696 + }, + { + "epoch": 0.99, + "grad_norm": 0.8987656831741333, + "learning_rate": 7.967394905278314e-10, + "loss": 0.5593, + "step": 15697 + }, + { + "epoch": 0.99, + "grad_norm": 0.8395030498504639, + "learning_rate": 7.785293762757607e-10, + "loss": 0.5587, + "step": 15698 + }, + { + "epoch": 0.99, + "grad_norm": 0.8946781754493713, + "learning_rate": 7.605297618495932e-10, + "loss": 0.6318, + "step": 15699 + }, + { + "epoch": 0.99, + "grad_norm": 0.917945384979248, + "learning_rate": 7.427406480059463e-10, + "loss": 0.6166, + "step": 15700 + }, + { + "epoch": 0.99, + "grad_norm": 0.8553743362426758, + "learning_rate": 7.251620354942201e-10, + "loss": 0.5548, + "step": 15701 + }, + { + "epoch": 0.99, + "grad_norm": 0.9134872555732727, + "learning_rate": 7.077939250549337e-10, + "loss": 0.5561, + "step": 15702 + }, + { + "epoch": 0.99, + "grad_norm": 0.8560099601745605, + "learning_rate": 6.906363174191688e-10, + "loss": 0.5588, + "step": 15703 + }, + { + "epoch": 0.99, + "grad_norm": 0.8506429195404053, + "learning_rate": 6.736892133091255e-10, + "loss": 0.496, + "step": 15704 + }, + { + "epoch": 0.99, + "grad_norm": 0.8329866528511047, + "learning_rate": 6.569526134392324e-10, + "loss": 0.4726, + "step": 15705 + }, + { + "epoch": 1.0, + "grad_norm": 0.8823480606079102, + "learning_rate": 6.404265185128155e-10, + "loss": 0.6387, + "step": 15706 + }, + { + "epoch": 1.0, + "grad_norm": 0.8894028067588806, + "learning_rate": 6.241109292270953e-10, + "loss": 0.5589, + "step": 15707 + }, + { + "epoch": 1.0, + "grad_norm": 0.9294013977050781, + "learning_rate": 6.080058462687444e-10, + "loss": 0.5855, + "step": 15708 + }, + { + "epoch": 1.0, + "grad_norm": 0.8650762438774109, + "learning_rate": 5.92111270314999e-10, + "loss": 0.5452, + "step": 15709 + }, + { + "epoch": 1.0, + "grad_norm": 0.8934659361839294, + "learning_rate": 5.764272020358785e-10, + "loss": 0.5798, + "step": 15710 + }, + { + "epoch": 1.0, + "grad_norm": 1.0057034492492676, + "learning_rate": 5.609536420919659e-10, + "loss": 0.5901, + "step": 15711 + }, + { + "epoch": 1.0, + "grad_norm": 0.9346972703933716, + "learning_rate": 5.456905911344068e-10, + "loss": 0.5695, + "step": 15712 + }, + { + "epoch": 1.0, + "grad_norm": 0.9616386294364929, + "learning_rate": 5.306380498060204e-10, + "loss": 0.5871, + "step": 15713 + }, + { + "epoch": 1.0, + "grad_norm": 0.8900029063224792, + "learning_rate": 5.15796018740744e-10, + "loss": 0.5773, + "step": 15714 + }, + { + "epoch": 1.0, + "grad_norm": 0.9126656651496887, + "learning_rate": 5.011644985630781e-10, + "loss": 0.5591, + "step": 15715 + }, + { + "epoch": 1.0, + "grad_norm": 0.9090896248817444, + "learning_rate": 4.867434898891965e-10, + "loss": 0.5804, + "step": 15716 + }, + { + "epoch": 1.0, + "grad_norm": 0.8257348537445068, + "learning_rate": 4.725329933269463e-10, + "loss": 0.5875, + "step": 15717 + }, + { + "epoch": 1.0, + "grad_norm": 0.9159669876098633, + "learning_rate": 4.5853300947418247e-10, + "loss": 0.5699, + "step": 15718 + }, + { + "epoch": 1.0, + "grad_norm": 0.8885412812232971, + "learning_rate": 4.4474353892043356e-10, + "loss": 0.615, + "step": 15719 + }, + { + "epoch": 1.0, + "grad_norm": 0.8214197158813477, + "learning_rate": 4.311645822463462e-10, + "loss": 0.5229, + "step": 15720 + }, + { + "epoch": 1.0, + "grad_norm": 0.9046826362609863, + "learning_rate": 4.177961400236852e-10, + "loss": 0.5779, + "step": 15721 + }, + { + "epoch": 1.0, + "grad_norm": 0.8636698722839355, + "learning_rate": 4.046382128147786e-10, + "loss": 0.5556, + "step": 15722 + }, + { + "epoch": 1.0, + "grad_norm": 1.039339542388916, + "learning_rate": 3.916908011747378e-10, + "loss": 0.6415, + "step": 15723 + }, + { + "epoch": 1.0, + "grad_norm": 0.8799046874046326, + "learning_rate": 3.7895390564868237e-10, + "loss": 0.5939, + "step": 15724 + }, + { + "epoch": 1.0, + "grad_norm": 0.855849027633667, + "learning_rate": 3.664275267717399e-10, + "loss": 0.5676, + "step": 15725 + }, + { + "epoch": 1.0, + "grad_norm": 0.8732782602310181, + "learning_rate": 3.541116650723764e-10, + "loss": 0.5287, + "step": 15726 + }, + { + "epoch": 1.0, + "grad_norm": 0.8243375420570374, + "learning_rate": 3.4200632106906605e-10, + "loss": 0.5535, + "step": 15727 + }, + { + "epoch": 1.0, + "grad_norm": 0.8324832320213318, + "learning_rate": 3.301114952708462e-10, + "loss": 0.5476, + "step": 15728 + }, + { + "epoch": 1.0, + "grad_norm": 0.9179962873458862, + "learning_rate": 3.1842718817953755e-10, + "loss": 0.6169, + "step": 15729 + }, + { + "epoch": 1.0, + "grad_norm": 0.9185804724693298, + "learning_rate": 3.0695340028641383e-10, + "loss": 0.6479, + "step": 15730 + }, + { + "epoch": 1.0, + "grad_norm": 0.9088355302810669, + "learning_rate": 2.956901320744221e-10, + "loss": 0.5929, + "step": 15731 + }, + { + "epoch": 1.0, + "grad_norm": 0.8863728642463684, + "learning_rate": 2.8463738401873776e-10, + "loss": 0.5875, + "step": 15732 + }, + { + "epoch": 1.0, + "grad_norm": 0.9010648727416992, + "learning_rate": 2.7379515658398915e-10, + "loss": 0.5564, + "step": 15733 + }, + { + "epoch": 1.0, + "grad_norm": 0.9122373461723328, + "learning_rate": 2.6316345022703307e-10, + "loss": 0.5589, + "step": 15734 + }, + { + "epoch": 1.0, + "grad_norm": 0.9271931052207947, + "learning_rate": 2.5274226539584445e-10, + "loss": 0.5492, + "step": 15735 + }, + { + "epoch": 1.0, + "grad_norm": 0.8589327931404114, + "learning_rate": 2.4253160252840636e-10, + "loss": 0.4886, + "step": 15736 + }, + { + "epoch": 1.0, + "grad_norm": 0.9020355343818665, + "learning_rate": 2.3253146205493014e-10, + "loss": 0.5461, + "step": 15737 + }, + { + "epoch": 1.0, + "grad_norm": 0.8690382838249207, + "learning_rate": 2.227418443967455e-10, + "loss": 0.5784, + "step": 15738 + }, + { + "epoch": 1.0, + "grad_norm": 0.9112160801887512, + "learning_rate": 2.1316274996630026e-10, + "loss": 0.6373, + "step": 15739 + }, + { + "epoch": 1.0, + "grad_norm": 0.8418201208114624, + "learning_rate": 2.0379417916605027e-10, + "loss": 0.5148, + "step": 15740 + }, + { + "epoch": 1.0, + "grad_norm": 0.9191485047340393, + "learning_rate": 1.946361323912349e-10, + "loss": 0.5662, + "step": 15741 + }, + { + "epoch": 1.0, + "grad_norm": 0.9604305028915405, + "learning_rate": 1.856886100276567e-10, + "loss": 0.5844, + "step": 15742 + }, + { + "epoch": 1.0, + "grad_norm": 0.8791788816452026, + "learning_rate": 1.7695161245112613e-10, + "loss": 0.5593, + "step": 15743 + }, + { + "epoch": 1.0, + "grad_norm": 0.9725036025047302, + "learning_rate": 1.6842514003023724e-10, + "loss": 0.5887, + "step": 15744 + }, + { + "epoch": 1.0, + "grad_norm": 0.8569048047065735, + "learning_rate": 1.6010919312359208e-10, + "loss": 0.5369, + "step": 15745 + }, + { + "epoch": 1.0, + "grad_norm": 0.8856031894683838, + "learning_rate": 1.520037720820211e-10, + "loss": 0.5598, + "step": 15746 + }, + { + "epoch": 1.0, + "grad_norm": 0.8477560877799988, + "learning_rate": 1.4410887724580768e-10, + "loss": 0.5036, + "step": 15747 + }, + { + "epoch": 1.0, + "grad_norm": 0.8959560394287109, + "learning_rate": 1.3642450894801872e-10, + "loss": 0.5094, + "step": 15748 + }, + { + "epoch": 1.0, + "grad_norm": 0.896802544593811, + "learning_rate": 1.2895066751283935e-10, + "loss": 0.5458, + "step": 15749 + }, + { + "epoch": 1.0, + "grad_norm": 0.8456780910491943, + "learning_rate": 1.2168735325335246e-10, + "loss": 0.5843, + "step": 15750 + }, + { + "epoch": 1.0, + "grad_norm": 0.8784763813018799, + "learning_rate": 1.1463456647708982e-10, + "loss": 0.5836, + "step": 15751 + }, + { + "epoch": 1.0, + "grad_norm": 0.900086522102356, + "learning_rate": 1.0779230747992587e-10, + "loss": 0.583, + "step": 15752 + }, + { + "epoch": 1.0, + "grad_norm": 0.9559805393218994, + "learning_rate": 1.0116057654996348e-10, + "loss": 0.6462, + "step": 15753 + }, + { + "epoch": 1.0, + "grad_norm": 0.9030122756958008, + "learning_rate": 9.473937396697885e-11, + "loss": 0.5079, + "step": 15754 + }, + { + "epoch": 1.0, + "grad_norm": 0.8667322993278503, + "learning_rate": 8.85287000013113e-11, + "loss": 0.5711, + "step": 15755 + }, + { + "epoch": 1.0, + "grad_norm": 0.9083278179168701, + "learning_rate": 8.252855491386325e-11, + "loss": 0.5124, + "step": 15756 + }, + { + "epoch": 1.0, + "grad_norm": 0.8988203406333923, + "learning_rate": 7.673893895776551e-11, + "loss": 0.5766, + "step": 15757 + }, + { + "epoch": 1.0, + "grad_norm": 0.9265889525413513, + "learning_rate": 7.115985237726719e-11, + "loss": 0.605, + "step": 15758 + }, + { + "epoch": 1.0, + "grad_norm": 0.8614688515663147, + "learning_rate": 6.57912954060702e-11, + "loss": 0.5763, + "step": 15759 + }, + { + "epoch": 1.0, + "grad_norm": 0.8665549755096436, + "learning_rate": 6.063326827121518e-11, + "loss": 0.5447, + "step": 15760 + }, + { + "epoch": 1.0, + "grad_norm": 0.9184945225715637, + "learning_rate": 5.5685771189750714e-11, + "loss": 0.5477, + "step": 15761 + }, + { + "epoch": 1.0, + "grad_norm": 0.9115839004516602, + "learning_rate": 5.0948804369843616e-11, + "loss": 0.5807, + "step": 15762 + }, + { + "epoch": 1.0, + "grad_norm": 0.865247905254364, + "learning_rate": 4.642236801022382e-11, + "loss": 0.59, + "step": 15763 + }, + { + "epoch": 1.0, + "grad_norm": 0.9395208358764648, + "learning_rate": 4.210646230295989e-11, + "loss": 0.553, + "step": 15764 + }, + { + "epoch": 1.0, + "grad_norm": 0.8607648611068726, + "learning_rate": 3.800108742846309e-11, + "loss": 0.5839, + "step": 15765 + }, + { + "epoch": 1.0, + "grad_norm": 0.9533546566963196, + "learning_rate": 3.410624356048331e-11, + "loss": 0.6237, + "step": 15766 + }, + { + "epoch": 1.0, + "grad_norm": 0.8995264768600464, + "learning_rate": 3.0421930862778446e-11, + "loss": 0.5904, + "step": 15767 + }, + { + "epoch": 1.0, + "grad_norm": 0.8825034499168396, + "learning_rate": 2.6948149490224618e-11, + "loss": 0.5683, + "step": 15768 + }, + { + "epoch": 1.0, + "grad_norm": 0.8862566351890564, + "learning_rate": 2.3684899589371256e-11, + "loss": 0.5178, + "step": 15769 + }, + { + "epoch": 1.0, + "grad_norm": 0.9058730006217957, + "learning_rate": 2.063218129733091e-11, + "loss": 0.5956, + "step": 15770 + }, + { + "epoch": 1.0, + "grad_norm": 0.9475806355476379, + "learning_rate": 1.7789994742889448e-11, + "loss": 0.6292, + "step": 15771 + }, + { + "epoch": 1.0, + "grad_norm": 0.8897664546966553, + "learning_rate": 1.5158340045395847e-11, + "loss": 0.5825, + "step": 15772 + }, + { + "epoch": 1.0, + "grad_norm": 0.8877468705177307, + "learning_rate": 1.2737217315872407e-11, + "loss": 0.572, + "step": 15773 + }, + { + "epoch": 1.0, + "grad_norm": 0.9111242294311523, + "learning_rate": 1.0526626656459648e-11, + "loss": 0.557, + "step": 15774 + }, + { + "epoch": 1.0, + "grad_norm": 0.8383316397666931, + "learning_rate": 8.526568160416304e-12, + "loss": 0.5977, + "step": 15775 + }, + { + "epoch": 1.0, + "grad_norm": 0.9318212270736694, + "learning_rate": 6.737041911564213e-12, + "loss": 0.608, + "step": 15776 + }, + { + "epoch": 1.0, + "grad_norm": 0.901352047920227, + "learning_rate": 5.158047984843428e-12, + "loss": 0.5461, + "step": 15777 + }, + { + "epoch": 1.0, + "grad_norm": 0.8835100531578064, + "learning_rate": 3.789586447422444e-12, + "loss": 0.5323, + "step": 15778 + }, + { + "epoch": 1.0, + "grad_norm": 0.8877159953117371, + "learning_rate": 2.631657356477746e-12, + "loss": 0.5907, + "step": 15779 + }, + { + "epoch": 1.0, + "grad_norm": 0.8495984077453613, + "learning_rate": 1.6842607614142582e-12, + "loss": 0.5492, + "step": 15780 + }, + { + "epoch": 1.0, + "grad_norm": 0.9034891724586487, + "learning_rate": 9.473967016448982e-13, + "loss": 0.6355, + "step": 15781 + }, + { + "epoch": 1.0, + "grad_norm": 0.9179747104644775, + "learning_rate": 4.210652082559108e-13, + "loss": 0.5649, + "step": 15782 + }, + { + "epoch": 1.0, + "grad_norm": 0.877388060092926, + "learning_rate": 1.0526630289664496e-13, + "loss": 0.5585, + "step": 15783 + }, + { + "epoch": 1.0, + "grad_norm": 0.8421609997749329, + "learning_rate": 0.0, + "loss": 0.5535, + "step": 15784 + }, + { + "epoch": 1.0, + "step": 15784, + "total_flos": 8.263869225865576e+18, + "train_loss": 0.6036004031589656, + "train_runtime": 161757.4173, + "train_samples_per_second": 24.981, + "train_steps_per_second": 0.098 + } + ], + "logging_steps": 1.0, + "max_steps": 15784, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 8000, + "total_flos": 8.263869225865576e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}