{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999841614267386, "eval_steps": 500, "global_step": 15784, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 6.4256110191345215, "learning_rate": 2.109704641350211e-08, "loss": 1.1779, "step": 1 }, { "epoch": 0.0, "grad_norm": 6.354576587677002, "learning_rate": 4.219409282700422e-08, "loss": 1.214, "step": 2 }, { "epoch": 0.0, "grad_norm": 6.824097633361816, "learning_rate": 6.329113924050633e-08, "loss": 1.1802, "step": 3 }, { "epoch": 0.0, "grad_norm": 5.9395294189453125, "learning_rate": 8.438818565400844e-08, "loss": 1.1571, "step": 4 }, { "epoch": 0.0, "grad_norm": 5.668644905090332, "learning_rate": 1.0548523206751055e-07, "loss": 1.1222, "step": 5 }, { "epoch": 0.0, "grad_norm": 6.907066822052002, "learning_rate": 1.2658227848101266e-07, "loss": 1.2215, "step": 6 }, { "epoch": 0.0, "grad_norm": 6.276373863220215, "learning_rate": 1.4767932489451477e-07, "loss": 1.2274, "step": 7 }, { "epoch": 0.0, "grad_norm": 6.175144195556641, "learning_rate": 1.6877637130801689e-07, "loss": 1.1467, "step": 8 }, { "epoch": 0.0, "grad_norm": 6.861614227294922, "learning_rate": 1.89873417721519e-07, "loss": 1.168, "step": 9 }, { "epoch": 0.0, "grad_norm": 6.300581932067871, "learning_rate": 2.109704641350211e-07, "loss": 1.1902, "step": 10 }, { "epoch": 0.0, "grad_norm": 7.374689102172852, "learning_rate": 2.3206751054852324e-07, "loss": 1.1582, "step": 11 }, { "epoch": 0.0, "grad_norm": 6.39311408996582, "learning_rate": 2.5316455696202533e-07, "loss": 1.1747, "step": 12 }, { "epoch": 0.0, "grad_norm": 6.60832405090332, "learning_rate": 2.7426160337552746e-07, "loss": 1.1727, "step": 13 }, { "epoch": 0.0, "grad_norm": 6.393230438232422, "learning_rate": 2.9535864978902955e-07, "loss": 1.1668, "step": 14 }, { "epoch": 0.0, "grad_norm": 6.010373592376709, "learning_rate": 3.164556962025317e-07, "loss": 1.1542, "step": 15 }, { "epoch": 0.0, "grad_norm": 6.905627727508545, "learning_rate": 3.3755274261603377e-07, "loss": 1.2184, "step": 16 }, { "epoch": 0.0, "grad_norm": 5.977834224700928, "learning_rate": 3.586497890295359e-07, "loss": 1.2015, "step": 17 }, { "epoch": 0.0, "grad_norm": 6.738651275634766, "learning_rate": 3.79746835443038e-07, "loss": 1.2082, "step": 18 }, { "epoch": 0.0, "grad_norm": 5.820187091827393, "learning_rate": 4.0084388185654013e-07, "loss": 1.1105, "step": 19 }, { "epoch": 0.0, "grad_norm": 6.201944828033447, "learning_rate": 4.219409282700422e-07, "loss": 1.1601, "step": 20 }, { "epoch": 0.0, "grad_norm": 6.043580532073975, "learning_rate": 4.4303797468354435e-07, "loss": 1.1457, "step": 21 }, { "epoch": 0.0, "grad_norm": 5.428764820098877, "learning_rate": 4.641350210970465e-07, "loss": 1.1577, "step": 22 }, { "epoch": 0.0, "grad_norm": 4.656769275665283, "learning_rate": 4.852320675105486e-07, "loss": 1.0191, "step": 23 }, { "epoch": 0.0, "grad_norm": 6.203961372375488, "learning_rate": 5.063291139240507e-07, "loss": 1.1022, "step": 24 }, { "epoch": 0.0, "grad_norm": 5.214546203613281, "learning_rate": 5.274261603375528e-07, "loss": 1.1161, "step": 25 }, { "epoch": 0.0, "grad_norm": 5.631003379821777, "learning_rate": 5.485232067510549e-07, "loss": 1.1692, "step": 26 }, { "epoch": 0.0, "grad_norm": 5.013113975524902, "learning_rate": 5.69620253164557e-07, "loss": 1.123, "step": 27 }, { "epoch": 0.0, "grad_norm": 4.746349334716797, "learning_rate": 5.907172995780591e-07, "loss": 1.059, "step": 28 }, { "epoch": 0.0, "grad_norm": 5.102616310119629, "learning_rate": 6.118143459915613e-07, "loss": 1.0429, "step": 29 }, { "epoch": 0.0, "grad_norm": 4.59443998336792, "learning_rate": 6.329113924050634e-07, "loss": 1.0285, "step": 30 }, { "epoch": 0.0, "grad_norm": 4.017740726470947, "learning_rate": 6.540084388185656e-07, "loss": 0.9746, "step": 31 }, { "epoch": 0.0, "grad_norm": 3.804844856262207, "learning_rate": 6.751054852320675e-07, "loss": 1.0295, "step": 32 }, { "epoch": 0.0, "grad_norm": 4.023899078369141, "learning_rate": 6.962025316455696e-07, "loss": 0.9803, "step": 33 }, { "epoch": 0.0, "grad_norm": 3.57771635055542, "learning_rate": 7.172995780590718e-07, "loss": 1.0228, "step": 34 }, { "epoch": 0.0, "grad_norm": 3.537170886993408, "learning_rate": 7.383966244725739e-07, "loss": 0.9385, "step": 35 }, { "epoch": 0.0, "grad_norm": 3.388979196548462, "learning_rate": 7.59493670886076e-07, "loss": 0.9712, "step": 36 }, { "epoch": 0.0, "grad_norm": 3.173189878463745, "learning_rate": 7.805907172995782e-07, "loss": 0.9959, "step": 37 }, { "epoch": 0.0, "grad_norm": 3.2532761096954346, "learning_rate": 8.016877637130803e-07, "loss": 0.8836, "step": 38 }, { "epoch": 0.0, "grad_norm": 2.966832160949707, "learning_rate": 8.227848101265823e-07, "loss": 0.9153, "step": 39 }, { "epoch": 0.0, "grad_norm": 2.814666748046875, "learning_rate": 8.438818565400844e-07, "loss": 0.8867, "step": 40 }, { "epoch": 0.0, "grad_norm": 2.621326446533203, "learning_rate": 8.649789029535865e-07, "loss": 0.9165, "step": 41 }, { "epoch": 0.0, "grad_norm": 2.1372127532958984, "learning_rate": 8.860759493670887e-07, "loss": 0.8869, "step": 42 }, { "epoch": 0.0, "grad_norm": 2.641986608505249, "learning_rate": 9.071729957805908e-07, "loss": 0.831, "step": 43 }, { "epoch": 0.0, "grad_norm": 2.5991573333740234, "learning_rate": 9.28270042194093e-07, "loss": 0.837, "step": 44 }, { "epoch": 0.0, "grad_norm": 2.680292844772339, "learning_rate": 9.493670886075951e-07, "loss": 0.7899, "step": 45 }, { "epoch": 0.0, "grad_norm": 2.4318466186523438, "learning_rate": 9.704641350210971e-07, "loss": 0.8298, "step": 46 }, { "epoch": 0.0, "grad_norm": 2.0200953483581543, "learning_rate": 9.915611814345991e-07, "loss": 0.7765, "step": 47 }, { "epoch": 0.0, "grad_norm": 1.9560511112213135, "learning_rate": 1.0126582278481013e-06, "loss": 0.8225, "step": 48 }, { "epoch": 0.0, "grad_norm": 2.1863861083984375, "learning_rate": 1.0337552742616035e-06, "loss": 0.8678, "step": 49 }, { "epoch": 0.0, "grad_norm": 1.9763541221618652, "learning_rate": 1.0548523206751057e-06, "loss": 0.8258, "step": 50 }, { "epoch": 0.0, "grad_norm": 1.8083155155181885, "learning_rate": 1.0759493670886077e-06, "loss": 0.7827, "step": 51 }, { "epoch": 0.0, "grad_norm": 1.7802972793579102, "learning_rate": 1.0970464135021099e-06, "loss": 0.8143, "step": 52 }, { "epoch": 0.0, "grad_norm": 1.8700745105743408, "learning_rate": 1.1181434599156118e-06, "loss": 0.85, "step": 53 }, { "epoch": 0.0, "grad_norm": 1.676476240158081, "learning_rate": 1.139240506329114e-06, "loss": 0.805, "step": 54 }, { "epoch": 0.0, "grad_norm": 1.8485150337219238, "learning_rate": 1.1603375527426162e-06, "loss": 0.7847, "step": 55 }, { "epoch": 0.0, "grad_norm": 1.823886513710022, "learning_rate": 1.1814345991561182e-06, "loss": 0.7407, "step": 56 }, { "epoch": 0.0, "grad_norm": 1.807379961013794, "learning_rate": 1.2025316455696204e-06, "loss": 0.7688, "step": 57 }, { "epoch": 0.0, "grad_norm": 1.6759995222091675, "learning_rate": 1.2236286919831226e-06, "loss": 0.8022, "step": 58 }, { "epoch": 0.0, "grad_norm": 1.8627663850784302, "learning_rate": 1.2447257383966246e-06, "loss": 0.8492, "step": 59 }, { "epoch": 0.0, "grad_norm": 1.8799936771392822, "learning_rate": 1.2658227848101267e-06, "loss": 0.7646, "step": 60 }, { "epoch": 0.0, "grad_norm": 1.7080084085464478, "learning_rate": 1.286919831223629e-06, "loss": 0.7187, "step": 61 }, { "epoch": 0.0, "grad_norm": 1.6503185033798218, "learning_rate": 1.3080168776371311e-06, "loss": 0.7496, "step": 62 }, { "epoch": 0.0, "grad_norm": 1.507122278213501, "learning_rate": 1.3291139240506329e-06, "loss": 0.74, "step": 63 }, { "epoch": 0.0, "grad_norm": 1.5223617553710938, "learning_rate": 1.350210970464135e-06, "loss": 0.7642, "step": 64 }, { "epoch": 0.0, "grad_norm": 1.4301729202270508, "learning_rate": 1.371308016877637e-06, "loss": 0.7188, "step": 65 }, { "epoch": 0.0, "grad_norm": 1.5367064476013184, "learning_rate": 1.3924050632911392e-06, "loss": 0.7353, "step": 66 }, { "epoch": 0.0, "grad_norm": 1.5191432237625122, "learning_rate": 1.4135021097046414e-06, "loss": 0.7382, "step": 67 }, { "epoch": 0.0, "grad_norm": 1.6111494302749634, "learning_rate": 1.4345991561181436e-06, "loss": 0.7471, "step": 68 }, { "epoch": 0.0, "grad_norm": 1.3847888708114624, "learning_rate": 1.4556962025316456e-06, "loss": 0.7138, "step": 69 }, { "epoch": 0.0, "grad_norm": 1.604929804801941, "learning_rate": 1.4767932489451478e-06, "loss": 0.8025, "step": 70 }, { "epoch": 0.0, "grad_norm": 1.4901641607284546, "learning_rate": 1.49789029535865e-06, "loss": 0.7477, "step": 71 }, { "epoch": 0.0, "grad_norm": 1.4785903692245483, "learning_rate": 1.518987341772152e-06, "loss": 0.7408, "step": 72 }, { "epoch": 0.0, "grad_norm": 1.3478227853775024, "learning_rate": 1.5400843881856542e-06, "loss": 0.6841, "step": 73 }, { "epoch": 0.0, "grad_norm": 1.3157073259353638, "learning_rate": 1.5611814345991563e-06, "loss": 0.6798, "step": 74 }, { "epoch": 0.0, "grad_norm": 1.49278724193573, "learning_rate": 1.5822784810126585e-06, "loss": 0.7075, "step": 75 }, { "epoch": 0.0, "grad_norm": 1.2032966613769531, "learning_rate": 1.6033755274261605e-06, "loss": 0.7001, "step": 76 }, { "epoch": 0.0, "grad_norm": 1.3113423585891724, "learning_rate": 1.6244725738396625e-06, "loss": 0.684, "step": 77 }, { "epoch": 0.0, "grad_norm": 1.49677574634552, "learning_rate": 1.6455696202531647e-06, "loss": 0.7296, "step": 78 }, { "epoch": 0.01, "grad_norm": 1.48220694065094, "learning_rate": 1.6666666666666667e-06, "loss": 0.7797, "step": 79 }, { "epoch": 0.01, "grad_norm": 1.3708736896514893, "learning_rate": 1.6877637130801689e-06, "loss": 0.7261, "step": 80 }, { "epoch": 0.01, "grad_norm": 1.4457319974899292, "learning_rate": 1.708860759493671e-06, "loss": 0.7705, "step": 81 }, { "epoch": 0.01, "grad_norm": 1.2591625452041626, "learning_rate": 1.729957805907173e-06, "loss": 0.6925, "step": 82 }, { "epoch": 0.01, "grad_norm": 1.3564741611480713, "learning_rate": 1.7510548523206752e-06, "loss": 0.7613, "step": 83 }, { "epoch": 0.01, "grad_norm": 1.3493813276290894, "learning_rate": 1.7721518987341774e-06, "loss": 0.6696, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.2987971305847168, "learning_rate": 1.7932489451476796e-06, "loss": 0.6874, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.4315829277038574, "learning_rate": 1.8143459915611816e-06, "loss": 0.7451, "step": 86 }, { "epoch": 0.01, "grad_norm": 1.3597711324691772, "learning_rate": 1.8354430379746838e-06, "loss": 0.6905, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.313363790512085, "learning_rate": 1.856540084388186e-06, "loss": 0.6986, "step": 88 }, { "epoch": 0.01, "grad_norm": 1.448975920677185, "learning_rate": 1.877637130801688e-06, "loss": 0.7411, "step": 89 }, { "epoch": 0.01, "grad_norm": 1.4621657133102417, "learning_rate": 1.8987341772151901e-06, "loss": 0.7285, "step": 90 }, { "epoch": 0.01, "grad_norm": 1.3639302253723145, "learning_rate": 1.919831223628692e-06, "loss": 0.6993, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.2497588396072388, "learning_rate": 1.9409282700421943e-06, "loss": 0.7117, "step": 92 }, { "epoch": 0.01, "grad_norm": 1.3762421607971191, "learning_rate": 1.9620253164556965e-06, "loss": 0.7167, "step": 93 }, { "epoch": 0.01, "grad_norm": 1.2534737586975098, "learning_rate": 1.9831223628691982e-06, "loss": 0.6592, "step": 94 }, { "epoch": 0.01, "grad_norm": 1.3847312927246094, "learning_rate": 2.0042194092827004e-06, "loss": 0.7365, "step": 95 }, { "epoch": 0.01, "grad_norm": 1.3049088716506958, "learning_rate": 2.0253164556962026e-06, "loss": 0.6969, "step": 96 }, { "epoch": 0.01, "grad_norm": 1.3645730018615723, "learning_rate": 2.046413502109705e-06, "loss": 0.5954, "step": 97 }, { "epoch": 0.01, "grad_norm": 1.2779875993728638, "learning_rate": 2.067510548523207e-06, "loss": 0.6862, "step": 98 }, { "epoch": 0.01, "grad_norm": 1.3105217218399048, "learning_rate": 2.088607594936709e-06, "loss": 0.6493, "step": 99 }, { "epoch": 0.01, "grad_norm": 1.3567004203796387, "learning_rate": 2.1097046413502114e-06, "loss": 0.7487, "step": 100 }, { "epoch": 0.01, "grad_norm": 1.3794139623641968, "learning_rate": 2.130801687763713e-06, "loss": 0.6479, "step": 101 }, { "epoch": 0.01, "grad_norm": 1.2701658010482788, "learning_rate": 2.1518987341772153e-06, "loss": 0.7033, "step": 102 }, { "epoch": 0.01, "grad_norm": 1.2012029886245728, "learning_rate": 2.1729957805907175e-06, "loss": 0.6878, "step": 103 }, { "epoch": 0.01, "grad_norm": 1.284591555595398, "learning_rate": 2.1940928270042197e-06, "loss": 0.6745, "step": 104 }, { "epoch": 0.01, "grad_norm": 1.3748619556427002, "learning_rate": 2.2151898734177215e-06, "loss": 0.6969, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.363826870918274, "learning_rate": 2.2362869198312237e-06, "loss": 0.6566, "step": 106 }, { "epoch": 0.01, "grad_norm": 1.235390305519104, "learning_rate": 2.257383966244726e-06, "loss": 0.6597, "step": 107 }, { "epoch": 0.01, "grad_norm": 1.4955501556396484, "learning_rate": 2.278481012658228e-06, "loss": 0.7158, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.3154562711715698, "learning_rate": 2.2995780590717302e-06, "loss": 0.6884, "step": 109 }, { "epoch": 0.01, "grad_norm": 1.2907527685165405, "learning_rate": 2.3206751054852324e-06, "loss": 0.6706, "step": 110 }, { "epoch": 0.01, "grad_norm": 1.4128575325012207, "learning_rate": 2.341772151898734e-06, "loss": 0.6852, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.2075326442718506, "learning_rate": 2.3628691983122364e-06, "loss": 0.6213, "step": 112 }, { "epoch": 0.01, "grad_norm": 1.3743549585342407, "learning_rate": 2.3839662447257386e-06, "loss": 0.6632, "step": 113 }, { "epoch": 0.01, "grad_norm": 1.2332968711853027, "learning_rate": 2.4050632911392408e-06, "loss": 0.6708, "step": 114 }, { "epoch": 0.01, "grad_norm": 1.2831799983978271, "learning_rate": 2.426160337552743e-06, "loss": 0.6561, "step": 115 }, { "epoch": 0.01, "grad_norm": 1.2498950958251953, "learning_rate": 2.447257383966245e-06, "loss": 0.6214, "step": 116 }, { "epoch": 0.01, "grad_norm": 1.3748935461044312, "learning_rate": 2.4683544303797473e-06, "loss": 0.7216, "step": 117 }, { "epoch": 0.01, "grad_norm": 1.389044165611267, "learning_rate": 2.489451476793249e-06, "loss": 0.6933, "step": 118 }, { "epoch": 0.01, "grad_norm": 1.2250936031341553, "learning_rate": 2.5105485232067513e-06, "loss": 0.6555, "step": 119 }, { "epoch": 0.01, "grad_norm": 1.3902949094772339, "learning_rate": 2.5316455696202535e-06, "loss": 0.6963, "step": 120 }, { "epoch": 0.01, "grad_norm": 1.1964465379714966, "learning_rate": 2.5527426160337553e-06, "loss": 0.6963, "step": 121 }, { "epoch": 0.01, "grad_norm": 1.2163293361663818, "learning_rate": 2.573839662447258e-06, "loss": 0.6337, "step": 122 }, { "epoch": 0.01, "grad_norm": 1.2851331233978271, "learning_rate": 2.5949367088607596e-06, "loss": 0.6371, "step": 123 }, { "epoch": 0.01, "grad_norm": 1.2649273872375488, "learning_rate": 2.6160337552742622e-06, "loss": 0.6451, "step": 124 }, { "epoch": 0.01, "grad_norm": 1.3221467733383179, "learning_rate": 2.637130801687764e-06, "loss": 0.7405, "step": 125 }, { "epoch": 0.01, "grad_norm": 1.2666411399841309, "learning_rate": 2.6582278481012658e-06, "loss": 0.6165, "step": 126 }, { "epoch": 0.01, "grad_norm": 1.3712652921676636, "learning_rate": 2.679324894514768e-06, "loss": 0.6694, "step": 127 }, { "epoch": 0.01, "grad_norm": 1.2705154418945312, "learning_rate": 2.70042194092827e-06, "loss": 0.6234, "step": 128 }, { "epoch": 0.01, "grad_norm": 1.2879364490509033, "learning_rate": 2.7215189873417724e-06, "loss": 0.6715, "step": 129 }, { "epoch": 0.01, "grad_norm": 1.1963900327682495, "learning_rate": 2.742616033755274e-06, "loss": 0.6046, "step": 130 }, { "epoch": 0.01, "grad_norm": 1.3172811269760132, "learning_rate": 2.7637130801687767e-06, "loss": 0.5856, "step": 131 }, { "epoch": 0.01, "grad_norm": 1.3311567306518555, "learning_rate": 2.7848101265822785e-06, "loss": 0.7124, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.2794066667556763, "learning_rate": 2.805907172995781e-06, "loss": 0.6716, "step": 133 }, { "epoch": 0.01, "grad_norm": 1.3711148500442505, "learning_rate": 2.827004219409283e-06, "loss": 0.6892, "step": 134 }, { "epoch": 0.01, "grad_norm": 1.2461341619491577, "learning_rate": 2.848101265822785e-06, "loss": 0.616, "step": 135 }, { "epoch": 0.01, "grad_norm": 1.2201720476150513, "learning_rate": 2.8691983122362873e-06, "loss": 0.6604, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.288766860961914, "learning_rate": 2.8902953586497895e-06, "loss": 0.7103, "step": 137 }, { "epoch": 0.01, "grad_norm": 1.404861330986023, "learning_rate": 2.9113924050632912e-06, "loss": 0.646, "step": 138 }, { "epoch": 0.01, "grad_norm": 1.3452450037002563, "learning_rate": 2.932489451476794e-06, "loss": 0.65, "step": 139 }, { "epoch": 0.01, "grad_norm": 1.358460545539856, "learning_rate": 2.9535864978902956e-06, "loss": 0.663, "step": 140 }, { "epoch": 0.01, "grad_norm": 1.4406980276107788, "learning_rate": 2.9746835443037974e-06, "loss": 0.646, "step": 141 }, { "epoch": 0.01, "grad_norm": 1.2622942924499512, "learning_rate": 2.9957805907173e-06, "loss": 0.6549, "step": 142 }, { "epoch": 0.01, "grad_norm": 1.2825264930725098, "learning_rate": 3.0168776371308017e-06, "loss": 0.6701, "step": 143 }, { "epoch": 0.01, "grad_norm": 1.2501925230026245, "learning_rate": 3.037974683544304e-06, "loss": 0.6562, "step": 144 }, { "epoch": 0.01, "grad_norm": 1.2910652160644531, "learning_rate": 3.059071729957806e-06, "loss": 0.702, "step": 145 }, { "epoch": 0.01, "grad_norm": 1.4148486852645874, "learning_rate": 3.0801687763713083e-06, "loss": 0.6481, "step": 146 }, { "epoch": 0.01, "grad_norm": 1.3893300294876099, "learning_rate": 3.10126582278481e-06, "loss": 0.6627, "step": 147 }, { "epoch": 0.01, "grad_norm": 1.348795771598816, "learning_rate": 3.1223628691983127e-06, "loss": 0.6378, "step": 148 }, { "epoch": 0.01, "grad_norm": 1.1906160116195679, "learning_rate": 3.1434599156118145e-06, "loss": 0.6098, "step": 149 }, { "epoch": 0.01, "grad_norm": 1.3667880296707153, "learning_rate": 3.164556962025317e-06, "loss": 0.6956, "step": 150 }, { "epoch": 0.01, "grad_norm": 1.290480136871338, "learning_rate": 3.185654008438819e-06, "loss": 0.6724, "step": 151 }, { "epoch": 0.01, "grad_norm": 1.2710357904434204, "learning_rate": 3.206751054852321e-06, "loss": 0.653, "step": 152 }, { "epoch": 0.01, "grad_norm": 1.3685747385025024, "learning_rate": 3.2278481012658232e-06, "loss": 0.6718, "step": 153 }, { "epoch": 0.01, "grad_norm": 1.4111366271972656, "learning_rate": 3.248945147679325e-06, "loss": 0.6845, "step": 154 }, { "epoch": 0.01, "grad_norm": 1.438011646270752, "learning_rate": 3.270042194092827e-06, "loss": 0.6813, "step": 155 }, { "epoch": 0.01, "grad_norm": 1.1767878532409668, "learning_rate": 3.2911392405063294e-06, "loss": 0.6103, "step": 156 }, { "epoch": 0.01, "grad_norm": 1.2970857620239258, "learning_rate": 3.3122362869198316e-06, "loss": 0.6954, "step": 157 }, { "epoch": 0.01, "grad_norm": 1.3606257438659668, "learning_rate": 3.3333333333333333e-06, "loss": 0.6547, "step": 158 }, { "epoch": 0.01, "grad_norm": 1.2998062372207642, "learning_rate": 3.354430379746836e-06, "loss": 0.6103, "step": 159 }, { "epoch": 0.01, "grad_norm": 1.3717045783996582, "learning_rate": 3.3755274261603377e-06, "loss": 0.5959, "step": 160 }, { "epoch": 0.01, "grad_norm": 1.2737528085708618, "learning_rate": 3.39662447257384e-06, "loss": 0.7052, "step": 161 }, { "epoch": 0.01, "grad_norm": 1.1850390434265137, "learning_rate": 3.417721518987342e-06, "loss": 0.6558, "step": 162 }, { "epoch": 0.01, "grad_norm": 1.4002028703689575, "learning_rate": 3.4388185654008443e-06, "loss": 0.6718, "step": 163 }, { "epoch": 0.01, "grad_norm": 1.2701243162155151, "learning_rate": 3.459915611814346e-06, "loss": 0.6182, "step": 164 }, { "epoch": 0.01, "grad_norm": 1.2225536108016968, "learning_rate": 3.4810126582278487e-06, "loss": 0.6627, "step": 165 }, { "epoch": 0.01, "grad_norm": 1.2968800067901611, "learning_rate": 3.5021097046413504e-06, "loss": 0.6921, "step": 166 }, { "epoch": 0.01, "grad_norm": 1.2677010297775269, "learning_rate": 3.523206751054853e-06, "loss": 0.6065, "step": 167 }, { "epoch": 0.01, "grad_norm": 1.2947274446487427, "learning_rate": 3.544303797468355e-06, "loss": 0.654, "step": 168 }, { "epoch": 0.01, "grad_norm": 1.283827543258667, "learning_rate": 3.5654008438818566e-06, "loss": 0.619, "step": 169 }, { "epoch": 0.01, "grad_norm": 1.2735494375228882, "learning_rate": 3.586497890295359e-06, "loss": 0.6638, "step": 170 }, { "epoch": 0.01, "grad_norm": 1.306062936782837, "learning_rate": 3.607594936708861e-06, "loss": 0.6402, "step": 171 }, { "epoch": 0.01, "grad_norm": 1.271420955657959, "learning_rate": 3.628691983122363e-06, "loss": 0.7022, "step": 172 }, { "epoch": 0.01, "grad_norm": 1.3412617444992065, "learning_rate": 3.649789029535865e-06, "loss": 0.6584, "step": 173 }, { "epoch": 0.01, "grad_norm": 1.2950142621994019, "learning_rate": 3.6708860759493675e-06, "loss": 0.6655, "step": 174 }, { "epoch": 0.01, "grad_norm": 1.3346741199493408, "learning_rate": 3.6919831223628693e-06, "loss": 0.6208, "step": 175 }, { "epoch": 0.01, "grad_norm": 1.2691746950149536, "learning_rate": 3.713080168776372e-06, "loss": 0.6727, "step": 176 }, { "epoch": 0.01, "grad_norm": 1.215303659439087, "learning_rate": 3.7341772151898737e-06, "loss": 0.636, "step": 177 }, { "epoch": 0.01, "grad_norm": 1.4336482286453247, "learning_rate": 3.755274261603376e-06, "loss": 0.6203, "step": 178 }, { "epoch": 0.01, "grad_norm": 1.29073166847229, "learning_rate": 3.776371308016878e-06, "loss": 0.6743, "step": 179 }, { "epoch": 0.01, "grad_norm": 1.3532391786575317, "learning_rate": 3.7974683544303802e-06, "loss": 0.6038, "step": 180 }, { "epoch": 0.01, "grad_norm": 1.4867761135101318, "learning_rate": 3.818565400843882e-06, "loss": 0.6562, "step": 181 }, { "epoch": 0.01, "grad_norm": 1.2906378507614136, "learning_rate": 3.839662447257384e-06, "loss": 0.6336, "step": 182 }, { "epoch": 0.01, "grad_norm": 1.2839657068252563, "learning_rate": 3.860759493670886e-06, "loss": 0.6719, "step": 183 }, { "epoch": 0.01, "grad_norm": 1.3496785163879395, "learning_rate": 3.8818565400843886e-06, "loss": 0.6481, "step": 184 }, { "epoch": 0.01, "grad_norm": 1.404876708984375, "learning_rate": 3.902953586497891e-06, "loss": 0.5807, "step": 185 }, { "epoch": 0.01, "grad_norm": 1.4152852296829224, "learning_rate": 3.924050632911393e-06, "loss": 0.6433, "step": 186 }, { "epoch": 0.01, "grad_norm": 1.3215677738189697, "learning_rate": 3.945147679324895e-06, "loss": 0.6719, "step": 187 }, { "epoch": 0.01, "grad_norm": 1.31669282913208, "learning_rate": 3.9662447257383965e-06, "loss": 0.6771, "step": 188 }, { "epoch": 0.01, "grad_norm": 1.2074658870697021, "learning_rate": 3.9873417721518995e-06, "loss": 0.657, "step": 189 }, { "epoch": 0.01, "grad_norm": 1.282547116279602, "learning_rate": 4.008438818565401e-06, "loss": 0.6981, "step": 190 }, { "epoch": 0.01, "grad_norm": 1.3060194253921509, "learning_rate": 4.029535864978903e-06, "loss": 0.6262, "step": 191 }, { "epoch": 0.01, "grad_norm": 1.2825993299484253, "learning_rate": 4.050632911392405e-06, "loss": 0.6416, "step": 192 }, { "epoch": 0.01, "grad_norm": 1.2881138324737549, "learning_rate": 4.0717299578059074e-06, "loss": 0.5945, "step": 193 }, { "epoch": 0.01, "grad_norm": 1.2855603694915771, "learning_rate": 4.09282700421941e-06, "loss": 0.615, "step": 194 }, { "epoch": 0.01, "grad_norm": 1.3043166399002075, "learning_rate": 4.113924050632912e-06, "loss": 0.7045, "step": 195 }, { "epoch": 0.01, "grad_norm": 1.2501403093338013, "learning_rate": 4.135021097046414e-06, "loss": 0.6447, "step": 196 }, { "epoch": 0.01, "grad_norm": 1.2611316442489624, "learning_rate": 4.156118143459915e-06, "loss": 0.6681, "step": 197 }, { "epoch": 0.01, "grad_norm": 1.3795578479766846, "learning_rate": 4.177215189873418e-06, "loss": 0.6826, "step": 198 }, { "epoch": 0.01, "grad_norm": 1.3322073221206665, "learning_rate": 4.19831223628692e-06, "loss": 0.6414, "step": 199 }, { "epoch": 0.01, "grad_norm": 1.2988042831420898, "learning_rate": 4.219409282700423e-06, "loss": 0.6194, "step": 200 }, { "epoch": 0.01, "grad_norm": 1.2681517601013184, "learning_rate": 4.240506329113924e-06, "loss": 0.6497, "step": 201 }, { "epoch": 0.01, "grad_norm": 1.4114967584609985, "learning_rate": 4.261603375527426e-06, "loss": 0.7306, "step": 202 }, { "epoch": 0.01, "grad_norm": 1.2016258239746094, "learning_rate": 4.2827004219409285e-06, "loss": 0.6195, "step": 203 }, { "epoch": 0.01, "grad_norm": 1.4103429317474365, "learning_rate": 4.303797468354431e-06, "loss": 0.6501, "step": 204 }, { "epoch": 0.01, "grad_norm": 1.246969223022461, "learning_rate": 4.324894514767933e-06, "loss": 0.6464, "step": 205 }, { "epoch": 0.01, "grad_norm": 1.2877082824707031, "learning_rate": 4.345991561181435e-06, "loss": 0.6357, "step": 206 }, { "epoch": 0.01, "grad_norm": 1.2532278299331665, "learning_rate": 4.367088607594937e-06, "loss": 0.6386, "step": 207 }, { "epoch": 0.01, "grad_norm": 1.39167058467865, "learning_rate": 4.3881856540084394e-06, "loss": 0.6713, "step": 208 }, { "epoch": 0.01, "grad_norm": 1.2851932048797607, "learning_rate": 4.409282700421942e-06, "loss": 0.5946, "step": 209 }, { "epoch": 0.01, "grad_norm": 1.2754656076431274, "learning_rate": 4.430379746835443e-06, "loss": 0.66, "step": 210 }, { "epoch": 0.01, "grad_norm": 1.3959016799926758, "learning_rate": 4.451476793248945e-06, "loss": 0.6753, "step": 211 }, { "epoch": 0.01, "grad_norm": 1.2273962497711182, "learning_rate": 4.472573839662447e-06, "loss": 0.5892, "step": 212 }, { "epoch": 0.01, "grad_norm": 1.2749860286712646, "learning_rate": 4.4936708860759495e-06, "loss": 0.7182, "step": 213 }, { "epoch": 0.01, "grad_norm": 1.4585115909576416, "learning_rate": 4.514767932489452e-06, "loss": 0.6285, "step": 214 }, { "epoch": 0.01, "grad_norm": 1.291067361831665, "learning_rate": 4.535864978902954e-06, "loss": 0.6543, "step": 215 }, { "epoch": 0.01, "grad_norm": 1.2669910192489624, "learning_rate": 4.556962025316456e-06, "loss": 0.6242, "step": 216 }, { "epoch": 0.01, "grad_norm": 1.303594708442688, "learning_rate": 4.578059071729958e-06, "loss": 0.6184, "step": 217 }, { "epoch": 0.01, "grad_norm": 1.3243204355239868, "learning_rate": 4.5991561181434605e-06, "loss": 0.678, "step": 218 }, { "epoch": 0.01, "grad_norm": 1.2260215282440186, "learning_rate": 4.620253164556963e-06, "loss": 0.6327, "step": 219 }, { "epoch": 0.01, "grad_norm": 1.3388763666152954, "learning_rate": 4.641350210970465e-06, "loss": 0.5952, "step": 220 }, { "epoch": 0.01, "grad_norm": 1.4450039863586426, "learning_rate": 4.662447257383967e-06, "loss": 0.6568, "step": 221 }, { "epoch": 0.01, "grad_norm": 1.388580322265625, "learning_rate": 4.683544303797468e-06, "loss": 0.617, "step": 222 }, { "epoch": 0.01, "grad_norm": 1.2090303897857666, "learning_rate": 4.7046413502109714e-06, "loss": 0.6157, "step": 223 }, { "epoch": 0.01, "grad_norm": 1.400683879852295, "learning_rate": 4.725738396624473e-06, "loss": 0.686, "step": 224 }, { "epoch": 0.01, "grad_norm": 1.3882019519805908, "learning_rate": 4.746835443037975e-06, "loss": 0.6476, "step": 225 }, { "epoch": 0.01, "grad_norm": 1.3863544464111328, "learning_rate": 4.767932489451477e-06, "loss": 0.6183, "step": 226 }, { "epoch": 0.01, "grad_norm": 1.2990771532058716, "learning_rate": 4.789029535864979e-06, "loss": 0.5885, "step": 227 }, { "epoch": 0.01, "grad_norm": 1.2280645370483398, "learning_rate": 4.8101265822784815e-06, "loss": 0.5813, "step": 228 }, { "epoch": 0.01, "grad_norm": 1.3123680353164673, "learning_rate": 4.831223628691984e-06, "loss": 0.6771, "step": 229 }, { "epoch": 0.01, "grad_norm": 1.398052453994751, "learning_rate": 4.852320675105486e-06, "loss": 0.6691, "step": 230 }, { "epoch": 0.01, "grad_norm": 1.208593726158142, "learning_rate": 4.873417721518987e-06, "loss": 0.5812, "step": 231 }, { "epoch": 0.01, "grad_norm": 1.3669441938400269, "learning_rate": 4.89451476793249e-06, "loss": 0.6813, "step": 232 }, { "epoch": 0.01, "grad_norm": 1.3295658826828003, "learning_rate": 4.915611814345992e-06, "loss": 0.6214, "step": 233 }, { "epoch": 0.01, "grad_norm": 1.269345760345459, "learning_rate": 4.936708860759495e-06, "loss": 0.5648, "step": 234 }, { "epoch": 0.01, "grad_norm": 1.429956078529358, "learning_rate": 4.957805907172996e-06, "loss": 0.6428, "step": 235 }, { "epoch": 0.01, "grad_norm": 1.2624046802520752, "learning_rate": 4.978902953586498e-06, "loss": 0.6186, "step": 236 }, { "epoch": 0.02, "grad_norm": 1.4069180488586426, "learning_rate": 5e-06, "loss": 0.6369, "step": 237 }, { "epoch": 0.02, "grad_norm": 1.2838106155395508, "learning_rate": 5.021097046413503e-06, "loss": 0.6694, "step": 238 }, { "epoch": 0.02, "grad_norm": 1.2982817888259888, "learning_rate": 5.042194092827004e-06, "loss": 0.6414, "step": 239 }, { "epoch": 0.02, "grad_norm": 1.2129889726638794, "learning_rate": 5.063291139240507e-06, "loss": 0.6026, "step": 240 }, { "epoch": 0.02, "grad_norm": 1.3048906326293945, "learning_rate": 5.084388185654009e-06, "loss": 0.6512, "step": 241 }, { "epoch": 0.02, "grad_norm": 1.2372359037399292, "learning_rate": 5.1054852320675105e-06, "loss": 0.5698, "step": 242 }, { "epoch": 0.02, "grad_norm": 1.251413345336914, "learning_rate": 5.126582278481013e-06, "loss": 0.6061, "step": 243 }, { "epoch": 0.02, "grad_norm": 1.2215440273284912, "learning_rate": 5.147679324894516e-06, "loss": 0.5464, "step": 244 }, { "epoch": 0.02, "grad_norm": 1.246283769607544, "learning_rate": 5.168776371308017e-06, "loss": 0.618, "step": 245 }, { "epoch": 0.02, "grad_norm": 1.2899631261825562, "learning_rate": 5.189873417721519e-06, "loss": 0.6387, "step": 246 }, { "epoch": 0.02, "grad_norm": 1.3374756574630737, "learning_rate": 5.2109704641350215e-06, "loss": 0.634, "step": 247 }, { "epoch": 0.02, "grad_norm": 1.3475295305252075, "learning_rate": 5.2320675105485245e-06, "loss": 0.6372, "step": 248 }, { "epoch": 0.02, "grad_norm": 1.252197265625, "learning_rate": 5.253164556962026e-06, "loss": 0.6681, "step": 249 }, { "epoch": 0.02, "grad_norm": 1.2391002178192139, "learning_rate": 5.274261603375528e-06, "loss": 0.6076, "step": 250 }, { "epoch": 0.02, "grad_norm": 1.3592095375061035, "learning_rate": 5.295358649789029e-06, "loss": 0.6513, "step": 251 }, { "epoch": 0.02, "grad_norm": 1.2067376375198364, "learning_rate": 5.3164556962025316e-06, "loss": 0.6251, "step": 252 }, { "epoch": 0.02, "grad_norm": 1.1935391426086426, "learning_rate": 5.337552742616035e-06, "loss": 0.6499, "step": 253 }, { "epoch": 0.02, "grad_norm": 1.2630081176757812, "learning_rate": 5.358649789029536e-06, "loss": 0.6361, "step": 254 }, { "epoch": 0.02, "grad_norm": 1.2552322149276733, "learning_rate": 5.379746835443038e-06, "loss": 0.6477, "step": 255 }, { "epoch": 0.02, "grad_norm": 1.2347304821014404, "learning_rate": 5.40084388185654e-06, "loss": 0.618, "step": 256 }, { "epoch": 0.02, "grad_norm": 1.4220587015151978, "learning_rate": 5.421940928270043e-06, "loss": 0.6588, "step": 257 }, { "epoch": 0.02, "grad_norm": 1.246965765953064, "learning_rate": 5.443037974683545e-06, "loss": 0.6084, "step": 258 }, { "epoch": 0.02, "grad_norm": 1.2736470699310303, "learning_rate": 5.464135021097047e-06, "loss": 0.6702, "step": 259 }, { "epoch": 0.02, "grad_norm": 1.2836354970932007, "learning_rate": 5.485232067510548e-06, "loss": 0.6246, "step": 260 }, { "epoch": 0.02, "grad_norm": 1.2576249837875366, "learning_rate": 5.506329113924051e-06, "loss": 0.6357, "step": 261 }, { "epoch": 0.02, "grad_norm": 1.3883161544799805, "learning_rate": 5.5274261603375535e-06, "loss": 0.6021, "step": 262 }, { "epoch": 0.02, "grad_norm": 1.266746163368225, "learning_rate": 5.548523206751056e-06, "loss": 0.5971, "step": 263 }, { "epoch": 0.02, "grad_norm": 1.1999036073684692, "learning_rate": 5.569620253164557e-06, "loss": 0.6817, "step": 264 }, { "epoch": 0.02, "grad_norm": 1.2391798496246338, "learning_rate": 5.590717299578059e-06, "loss": 0.6384, "step": 265 }, { "epoch": 0.02, "grad_norm": 1.2688382863998413, "learning_rate": 5.611814345991562e-06, "loss": 0.6419, "step": 266 }, { "epoch": 0.02, "grad_norm": 1.2850573062896729, "learning_rate": 5.6329113924050636e-06, "loss": 0.6192, "step": 267 }, { "epoch": 0.02, "grad_norm": 1.2011271715164185, "learning_rate": 5.654008438818566e-06, "loss": 0.5714, "step": 268 }, { "epoch": 0.02, "grad_norm": 1.2120412588119507, "learning_rate": 5.675105485232067e-06, "loss": 0.6382, "step": 269 }, { "epoch": 0.02, "grad_norm": 1.2102055549621582, "learning_rate": 5.69620253164557e-06, "loss": 0.6238, "step": 270 }, { "epoch": 0.02, "grad_norm": 1.3209874629974365, "learning_rate": 5.717299578059072e-06, "loss": 0.6885, "step": 271 }, { "epoch": 0.02, "grad_norm": 1.2263984680175781, "learning_rate": 5.7383966244725745e-06, "loss": 0.6304, "step": 272 }, { "epoch": 0.02, "grad_norm": 1.271883487701416, "learning_rate": 5.759493670886076e-06, "loss": 0.6758, "step": 273 }, { "epoch": 0.02, "grad_norm": 1.3089218139648438, "learning_rate": 5.780590717299579e-06, "loss": 0.6316, "step": 274 }, { "epoch": 0.02, "grad_norm": 1.329040288925171, "learning_rate": 5.801687763713081e-06, "loss": 0.6756, "step": 275 }, { "epoch": 0.02, "grad_norm": 1.2899181842803955, "learning_rate": 5.8227848101265824e-06, "loss": 0.6439, "step": 276 }, { "epoch": 0.02, "grad_norm": 1.1611582040786743, "learning_rate": 5.843881856540085e-06, "loss": 0.6241, "step": 277 }, { "epoch": 0.02, "grad_norm": 1.1877211332321167, "learning_rate": 5.864978902953588e-06, "loss": 0.5726, "step": 278 }, { "epoch": 0.02, "grad_norm": 1.23735511302948, "learning_rate": 5.886075949367089e-06, "loss": 0.6491, "step": 279 }, { "epoch": 0.02, "grad_norm": 1.3182356357574463, "learning_rate": 5.907172995780591e-06, "loss": 0.6364, "step": 280 }, { "epoch": 0.02, "grad_norm": 1.4192547798156738, "learning_rate": 5.928270042194093e-06, "loss": 0.6907, "step": 281 }, { "epoch": 0.02, "grad_norm": 1.256334662437439, "learning_rate": 5.949367088607595e-06, "loss": 0.6639, "step": 282 }, { "epoch": 0.02, "grad_norm": 1.2886290550231934, "learning_rate": 5.970464135021098e-06, "loss": 0.6576, "step": 283 }, { "epoch": 0.02, "grad_norm": 1.1646943092346191, "learning_rate": 5.9915611814346e-06, "loss": 0.6297, "step": 284 }, { "epoch": 0.02, "grad_norm": 1.215549349784851, "learning_rate": 6.012658227848101e-06, "loss": 0.6089, "step": 285 }, { "epoch": 0.02, "grad_norm": 1.2174471616744995, "learning_rate": 6.0337552742616035e-06, "loss": 0.5496, "step": 286 }, { "epoch": 0.02, "grad_norm": 1.255016565322876, "learning_rate": 6.0548523206751065e-06, "loss": 0.6058, "step": 287 }, { "epoch": 0.02, "grad_norm": 1.2536252737045288, "learning_rate": 6.075949367088608e-06, "loss": 0.5952, "step": 288 }, { "epoch": 0.02, "grad_norm": 1.2640748023986816, "learning_rate": 6.09704641350211e-06, "loss": 0.618, "step": 289 }, { "epoch": 0.02, "grad_norm": 1.2663012742996216, "learning_rate": 6.118143459915612e-06, "loss": 0.6525, "step": 290 }, { "epoch": 0.02, "grad_norm": 1.2259374856948853, "learning_rate": 6.139240506329115e-06, "loss": 0.6079, "step": 291 }, { "epoch": 0.02, "grad_norm": 1.3531912565231323, "learning_rate": 6.160337552742617e-06, "loss": 0.6583, "step": 292 }, { "epoch": 0.02, "grad_norm": 1.3680455684661865, "learning_rate": 6.181434599156119e-06, "loss": 0.6359, "step": 293 }, { "epoch": 0.02, "grad_norm": 1.4130650758743286, "learning_rate": 6.20253164556962e-06, "loss": 0.64, "step": 294 }, { "epoch": 0.02, "grad_norm": 1.283833622932434, "learning_rate": 6.223628691983122e-06, "loss": 0.6207, "step": 295 }, { "epoch": 0.02, "grad_norm": 1.3075100183486938, "learning_rate": 6.244725738396625e-06, "loss": 0.6618, "step": 296 }, { "epoch": 0.02, "grad_norm": 1.2828034162521362, "learning_rate": 6.265822784810128e-06, "loss": 0.5909, "step": 297 }, { "epoch": 0.02, "grad_norm": 1.2394911050796509, "learning_rate": 6.286919831223629e-06, "loss": 0.6256, "step": 298 }, { "epoch": 0.02, "grad_norm": 1.3512240648269653, "learning_rate": 6.308016877637131e-06, "loss": 0.6058, "step": 299 }, { "epoch": 0.02, "grad_norm": 1.2114907503128052, "learning_rate": 6.329113924050634e-06, "loss": 0.6713, "step": 300 }, { "epoch": 0.02, "grad_norm": 1.235547423362732, "learning_rate": 6.3502109704641355e-06, "loss": 0.5995, "step": 301 }, { "epoch": 0.02, "grad_norm": 1.3060510158538818, "learning_rate": 6.371308016877638e-06, "loss": 0.624, "step": 302 }, { "epoch": 0.02, "grad_norm": 1.2200214862823486, "learning_rate": 6.392405063291139e-06, "loss": 0.6165, "step": 303 }, { "epoch": 0.02, "grad_norm": 1.299473524093628, "learning_rate": 6.413502109704642e-06, "loss": 0.6672, "step": 304 }, { "epoch": 0.02, "grad_norm": 1.1858652830123901, "learning_rate": 6.434599156118144e-06, "loss": 0.6199, "step": 305 }, { "epoch": 0.02, "grad_norm": 1.2412612438201904, "learning_rate": 6.4556962025316464e-06, "loss": 0.6092, "step": 306 }, { "epoch": 0.02, "grad_norm": 1.1821410655975342, "learning_rate": 6.476793248945148e-06, "loss": 0.6029, "step": 307 }, { "epoch": 0.02, "grad_norm": 1.3090671300888062, "learning_rate": 6.49789029535865e-06, "loss": 0.6865, "step": 308 }, { "epoch": 0.02, "grad_norm": 1.2623692750930786, "learning_rate": 6.518987341772153e-06, "loss": 0.6331, "step": 309 }, { "epoch": 0.02, "grad_norm": 1.2148505449295044, "learning_rate": 6.540084388185654e-06, "loss": 0.5844, "step": 310 }, { "epoch": 0.02, "grad_norm": 1.206979751586914, "learning_rate": 6.5611814345991565e-06, "loss": 0.6161, "step": 311 }, { "epoch": 0.02, "grad_norm": 1.2950704097747803, "learning_rate": 6.582278481012659e-06, "loss": 0.6101, "step": 312 }, { "epoch": 0.02, "grad_norm": 1.2553280591964722, "learning_rate": 6.603375527426161e-06, "loss": 0.6826, "step": 313 }, { "epoch": 0.02, "grad_norm": 1.237945795059204, "learning_rate": 6.624472573839663e-06, "loss": 0.581, "step": 314 }, { "epoch": 0.02, "grad_norm": 1.280417799949646, "learning_rate": 6.645569620253165e-06, "loss": 0.6078, "step": 315 }, { "epoch": 0.02, "grad_norm": 1.371483325958252, "learning_rate": 6.666666666666667e-06, "loss": 0.6104, "step": 316 }, { "epoch": 0.02, "grad_norm": 1.2257425785064697, "learning_rate": 6.68776371308017e-06, "loss": 0.5975, "step": 317 }, { "epoch": 0.02, "grad_norm": 1.2869188785552979, "learning_rate": 6.708860759493672e-06, "loss": 0.6517, "step": 318 }, { "epoch": 0.02, "grad_norm": 1.474818229675293, "learning_rate": 6.729957805907173e-06, "loss": 0.6719, "step": 319 }, { "epoch": 0.02, "grad_norm": 1.31566321849823, "learning_rate": 6.751054852320675e-06, "loss": 0.6426, "step": 320 }, { "epoch": 0.02, "grad_norm": 1.3478425741195679, "learning_rate": 6.772151898734178e-06, "loss": 0.6462, "step": 321 }, { "epoch": 0.02, "grad_norm": 1.1856145858764648, "learning_rate": 6.79324894514768e-06, "loss": 0.6296, "step": 322 }, { "epoch": 0.02, "grad_norm": 1.2015944719314575, "learning_rate": 6.814345991561182e-06, "loss": 0.6179, "step": 323 }, { "epoch": 0.02, "grad_norm": 1.2188371419906616, "learning_rate": 6.835443037974684e-06, "loss": 0.6049, "step": 324 }, { "epoch": 0.02, "grad_norm": 1.313015103340149, "learning_rate": 6.8565400843881855e-06, "loss": 0.5847, "step": 325 }, { "epoch": 0.02, "grad_norm": 1.424932599067688, "learning_rate": 6.8776371308016885e-06, "loss": 0.6946, "step": 326 }, { "epoch": 0.02, "grad_norm": 1.3193198442459106, "learning_rate": 6.898734177215191e-06, "loss": 0.6464, "step": 327 }, { "epoch": 0.02, "grad_norm": 1.247219443321228, "learning_rate": 6.919831223628692e-06, "loss": 0.6904, "step": 328 }, { "epoch": 0.02, "grad_norm": 1.3283206224441528, "learning_rate": 6.940928270042194e-06, "loss": 0.6483, "step": 329 }, { "epoch": 0.02, "grad_norm": 1.2536122798919678, "learning_rate": 6.962025316455697e-06, "loss": 0.6327, "step": 330 }, { "epoch": 0.02, "grad_norm": 1.2731391191482544, "learning_rate": 6.9831223628691995e-06, "loss": 0.6554, "step": 331 }, { "epoch": 0.02, "grad_norm": 1.2560125589370728, "learning_rate": 7.004219409282701e-06, "loss": 0.6123, "step": 332 }, { "epoch": 0.02, "grad_norm": 1.2068290710449219, "learning_rate": 7.025316455696203e-06, "loss": 0.6428, "step": 333 }, { "epoch": 0.02, "grad_norm": 1.1613904237747192, "learning_rate": 7.046413502109706e-06, "loss": 0.5978, "step": 334 }, { "epoch": 0.02, "grad_norm": 1.3051648139953613, "learning_rate": 7.067510548523207e-06, "loss": 0.686, "step": 335 }, { "epoch": 0.02, "grad_norm": 1.2591476440429688, "learning_rate": 7.08860759493671e-06, "loss": 0.5891, "step": 336 }, { "epoch": 0.02, "grad_norm": 1.2479435205459595, "learning_rate": 7.109704641350211e-06, "loss": 0.6221, "step": 337 }, { "epoch": 0.02, "grad_norm": 1.326296091079712, "learning_rate": 7.130801687763713e-06, "loss": 0.6327, "step": 338 }, { "epoch": 0.02, "grad_norm": 1.2220957279205322, "learning_rate": 7.151898734177216e-06, "loss": 0.6364, "step": 339 }, { "epoch": 0.02, "grad_norm": 1.1175003051757812, "learning_rate": 7.172995780590718e-06, "loss": 0.5575, "step": 340 }, { "epoch": 0.02, "grad_norm": 1.2847028970718384, "learning_rate": 7.19409282700422e-06, "loss": 0.5732, "step": 341 }, { "epoch": 0.02, "grad_norm": 1.261705756187439, "learning_rate": 7.215189873417722e-06, "loss": 0.6702, "step": 342 }, { "epoch": 0.02, "grad_norm": 1.3592685461044312, "learning_rate": 7.236286919831225e-06, "loss": 0.6816, "step": 343 }, { "epoch": 0.02, "grad_norm": 1.1502933502197266, "learning_rate": 7.257383966244726e-06, "loss": 0.6092, "step": 344 }, { "epoch": 0.02, "grad_norm": 1.1761858463287354, "learning_rate": 7.2784810126582285e-06, "loss": 0.6094, "step": 345 }, { "epoch": 0.02, "grad_norm": 1.249475121498108, "learning_rate": 7.29957805907173e-06, "loss": 0.6694, "step": 346 }, { "epoch": 0.02, "grad_norm": 1.2194502353668213, "learning_rate": 7.320675105485233e-06, "loss": 0.5863, "step": 347 }, { "epoch": 0.02, "grad_norm": 1.2675786018371582, "learning_rate": 7.341772151898735e-06, "loss": 0.6129, "step": 348 }, { "epoch": 0.02, "grad_norm": 1.1856513023376465, "learning_rate": 7.362869198312237e-06, "loss": 0.5751, "step": 349 }, { "epoch": 0.02, "grad_norm": 1.2041898965835571, "learning_rate": 7.3839662447257386e-06, "loss": 0.6197, "step": 350 }, { "epoch": 0.02, "grad_norm": 1.4077736139297485, "learning_rate": 7.405063291139241e-06, "loss": 0.6282, "step": 351 }, { "epoch": 0.02, "grad_norm": 1.231197714805603, "learning_rate": 7.426160337552744e-06, "loss": 0.6035, "step": 352 }, { "epoch": 0.02, "grad_norm": 1.1258540153503418, "learning_rate": 7.447257383966245e-06, "loss": 0.613, "step": 353 }, { "epoch": 0.02, "grad_norm": 1.286897897720337, "learning_rate": 7.468354430379747e-06, "loss": 0.5775, "step": 354 }, { "epoch": 0.02, "grad_norm": 1.216144323348999, "learning_rate": 7.4894514767932495e-06, "loss": 0.6519, "step": 355 }, { "epoch": 0.02, "grad_norm": 1.3355437517166138, "learning_rate": 7.510548523206752e-06, "loss": 0.6512, "step": 356 }, { "epoch": 0.02, "grad_norm": 1.551064372062683, "learning_rate": 7.531645569620254e-06, "loss": 0.5391, "step": 357 }, { "epoch": 0.02, "grad_norm": 1.2588145732879639, "learning_rate": 7.552742616033756e-06, "loss": 0.6467, "step": 358 }, { "epoch": 0.02, "grad_norm": 1.2731475830078125, "learning_rate": 7.5738396624472574e-06, "loss": 0.6195, "step": 359 }, { "epoch": 0.02, "grad_norm": 1.2335354089736938, "learning_rate": 7.5949367088607605e-06, "loss": 0.5462, "step": 360 }, { "epoch": 0.02, "grad_norm": 1.2900036573410034, "learning_rate": 7.616033755274263e-06, "loss": 0.6777, "step": 361 }, { "epoch": 0.02, "grad_norm": 1.3332929611206055, "learning_rate": 7.637130801687764e-06, "loss": 0.5707, "step": 362 }, { "epoch": 0.02, "grad_norm": 1.2227133512496948, "learning_rate": 7.658227848101265e-06, "loss": 0.6009, "step": 363 }, { "epoch": 0.02, "grad_norm": 1.2745919227600098, "learning_rate": 7.679324894514768e-06, "loss": 0.6533, "step": 364 }, { "epoch": 0.02, "grad_norm": 1.2688459157943726, "learning_rate": 7.700421940928271e-06, "loss": 0.6577, "step": 365 }, { "epoch": 0.02, "grad_norm": 1.0965393781661987, "learning_rate": 7.721518987341773e-06, "loss": 0.5903, "step": 366 }, { "epoch": 0.02, "grad_norm": 1.2288157939910889, "learning_rate": 7.742616033755274e-06, "loss": 0.5953, "step": 367 }, { "epoch": 0.02, "grad_norm": 1.3503426313400269, "learning_rate": 7.763713080168777e-06, "loss": 0.6585, "step": 368 }, { "epoch": 0.02, "grad_norm": 1.1747641563415527, "learning_rate": 7.78481012658228e-06, "loss": 0.6289, "step": 369 }, { "epoch": 0.02, "grad_norm": 1.2812758684158325, "learning_rate": 7.805907172995782e-06, "loss": 0.638, "step": 370 }, { "epoch": 0.02, "grad_norm": 1.3146843910217285, "learning_rate": 7.827004219409283e-06, "loss": 0.6224, "step": 371 }, { "epoch": 0.02, "grad_norm": 1.1805860996246338, "learning_rate": 7.848101265822786e-06, "loss": 0.5742, "step": 372 }, { "epoch": 0.02, "grad_norm": 1.3260769844055176, "learning_rate": 7.869198312236287e-06, "loss": 0.5949, "step": 373 }, { "epoch": 0.02, "grad_norm": 1.2296415567398071, "learning_rate": 7.89029535864979e-06, "loss": 0.5776, "step": 374 }, { "epoch": 0.02, "grad_norm": 1.2580851316452026, "learning_rate": 7.911392405063292e-06, "loss": 0.6392, "step": 375 }, { "epoch": 0.02, "grad_norm": 1.1601670980453491, "learning_rate": 7.932489451476793e-06, "loss": 0.5467, "step": 376 }, { "epoch": 0.02, "grad_norm": 1.3382383584976196, "learning_rate": 7.953586497890296e-06, "loss": 0.6324, "step": 377 }, { "epoch": 0.02, "grad_norm": 1.2548341751098633, "learning_rate": 7.974683544303799e-06, "loss": 0.6006, "step": 378 }, { "epoch": 0.02, "grad_norm": 1.270756721496582, "learning_rate": 7.9957805907173e-06, "loss": 0.6084, "step": 379 }, { "epoch": 0.02, "grad_norm": 1.4024615287780762, "learning_rate": 8.016877637130802e-06, "loss": 0.648, "step": 380 }, { "epoch": 0.02, "grad_norm": 1.0922244787216187, "learning_rate": 8.037974683544305e-06, "loss": 0.5659, "step": 381 }, { "epoch": 0.02, "grad_norm": 1.2462538480758667, "learning_rate": 8.059071729957806e-06, "loss": 0.6239, "step": 382 }, { "epoch": 0.02, "grad_norm": 1.3170617818832397, "learning_rate": 8.080168776371309e-06, "loss": 0.6455, "step": 383 }, { "epoch": 0.02, "grad_norm": 1.2223594188690186, "learning_rate": 8.10126582278481e-06, "loss": 0.6454, "step": 384 }, { "epoch": 0.02, "grad_norm": 1.2347493171691895, "learning_rate": 8.122362869198312e-06, "loss": 0.5671, "step": 385 }, { "epoch": 0.02, "grad_norm": 1.1949635744094849, "learning_rate": 8.143459915611815e-06, "loss": 0.635, "step": 386 }, { "epoch": 0.02, "grad_norm": 1.260286569595337, "learning_rate": 8.164556962025318e-06, "loss": 0.6135, "step": 387 }, { "epoch": 0.02, "grad_norm": 1.3054084777832031, "learning_rate": 8.18565400843882e-06, "loss": 0.6299, "step": 388 }, { "epoch": 0.02, "grad_norm": 1.4015529155731201, "learning_rate": 8.20675105485232e-06, "loss": 0.6197, "step": 389 }, { "epoch": 0.02, "grad_norm": 1.281866431236267, "learning_rate": 8.227848101265824e-06, "loss": 0.6863, "step": 390 }, { "epoch": 0.02, "grad_norm": 1.171478509902954, "learning_rate": 8.248945147679327e-06, "loss": 0.5865, "step": 391 }, { "epoch": 0.02, "grad_norm": 1.2968735694885254, "learning_rate": 8.270042194092828e-06, "loss": 0.6147, "step": 392 }, { "epoch": 0.02, "grad_norm": 1.1304808855056763, "learning_rate": 8.29113924050633e-06, "loss": 0.5535, "step": 393 }, { "epoch": 0.02, "grad_norm": 1.1816270351409912, "learning_rate": 8.31223628691983e-06, "loss": 0.6069, "step": 394 }, { "epoch": 0.03, "grad_norm": 1.217431902885437, "learning_rate": 8.333333333333334e-06, "loss": 0.5988, "step": 395 }, { "epoch": 0.03, "grad_norm": 1.1394872665405273, "learning_rate": 8.354430379746837e-06, "loss": 0.5513, "step": 396 }, { "epoch": 0.03, "grad_norm": 1.251174807548523, "learning_rate": 8.375527426160338e-06, "loss": 0.6487, "step": 397 }, { "epoch": 0.03, "grad_norm": 1.2142462730407715, "learning_rate": 8.39662447257384e-06, "loss": 0.6135, "step": 398 }, { "epoch": 0.03, "grad_norm": 1.1851967573165894, "learning_rate": 8.417721518987342e-06, "loss": 0.5548, "step": 399 }, { "epoch": 0.03, "grad_norm": 1.1517951488494873, "learning_rate": 8.438818565400846e-06, "loss": 0.5771, "step": 400 }, { "epoch": 0.03, "grad_norm": 1.4722862243652344, "learning_rate": 8.459915611814347e-06, "loss": 0.6222, "step": 401 }, { "epoch": 0.03, "grad_norm": 1.2671726942062378, "learning_rate": 8.481012658227848e-06, "loss": 0.592, "step": 402 }, { "epoch": 0.03, "grad_norm": 1.3089793920516968, "learning_rate": 8.502109704641351e-06, "loss": 0.6425, "step": 403 }, { "epoch": 0.03, "grad_norm": 1.251240849494934, "learning_rate": 8.523206751054853e-06, "loss": 0.585, "step": 404 }, { "epoch": 0.03, "grad_norm": 1.3644077777862549, "learning_rate": 8.544303797468356e-06, "loss": 0.6152, "step": 405 }, { "epoch": 0.03, "grad_norm": 1.3811988830566406, "learning_rate": 8.565400843881857e-06, "loss": 0.647, "step": 406 }, { "epoch": 0.03, "grad_norm": 1.2296816110610962, "learning_rate": 8.586497890295358e-06, "loss": 0.6464, "step": 407 }, { "epoch": 0.03, "grad_norm": 1.3101154565811157, "learning_rate": 8.607594936708861e-06, "loss": 0.5907, "step": 408 }, { "epoch": 0.03, "grad_norm": 1.3449515104293823, "learning_rate": 8.628691983122364e-06, "loss": 0.6398, "step": 409 }, { "epoch": 0.03, "grad_norm": 1.2597445249557495, "learning_rate": 8.649789029535866e-06, "loss": 0.6462, "step": 410 }, { "epoch": 0.03, "grad_norm": 1.109322428703308, "learning_rate": 8.670886075949367e-06, "loss": 0.6258, "step": 411 }, { "epoch": 0.03, "grad_norm": 1.2519370317459106, "learning_rate": 8.69198312236287e-06, "loss": 0.7079, "step": 412 }, { "epoch": 0.03, "grad_norm": 1.2595500946044922, "learning_rate": 8.713080168776371e-06, "loss": 0.6475, "step": 413 }, { "epoch": 0.03, "grad_norm": 1.129270076751709, "learning_rate": 8.734177215189874e-06, "loss": 0.5764, "step": 414 }, { "epoch": 0.03, "grad_norm": 1.285945177078247, "learning_rate": 8.755274261603376e-06, "loss": 0.6278, "step": 415 }, { "epoch": 0.03, "grad_norm": 1.2584099769592285, "learning_rate": 8.776371308016879e-06, "loss": 0.6143, "step": 416 }, { "epoch": 0.03, "grad_norm": 1.2111893892288208, "learning_rate": 8.79746835443038e-06, "loss": 0.6285, "step": 417 }, { "epoch": 0.03, "grad_norm": 1.1318415403366089, "learning_rate": 8.818565400843883e-06, "loss": 0.6495, "step": 418 }, { "epoch": 0.03, "grad_norm": 1.1128448247909546, "learning_rate": 8.839662447257385e-06, "loss": 0.5995, "step": 419 }, { "epoch": 0.03, "grad_norm": 1.1388238668441772, "learning_rate": 8.860759493670886e-06, "loss": 0.6209, "step": 420 }, { "epoch": 0.03, "grad_norm": 1.27338445186615, "learning_rate": 8.881856540084389e-06, "loss": 0.6759, "step": 421 }, { "epoch": 0.03, "grad_norm": 1.169218897819519, "learning_rate": 8.90295358649789e-06, "loss": 0.5423, "step": 422 }, { "epoch": 0.03, "grad_norm": 1.2038993835449219, "learning_rate": 8.924050632911393e-06, "loss": 0.5966, "step": 423 }, { "epoch": 0.03, "grad_norm": 1.1410913467407227, "learning_rate": 8.945147679324895e-06, "loss": 0.5852, "step": 424 }, { "epoch": 0.03, "grad_norm": 1.1868853569030762, "learning_rate": 8.966244725738398e-06, "loss": 0.5782, "step": 425 }, { "epoch": 0.03, "grad_norm": 1.2283443212509155, "learning_rate": 8.987341772151899e-06, "loss": 0.5799, "step": 426 }, { "epoch": 0.03, "grad_norm": 1.3854525089263916, "learning_rate": 9.008438818565402e-06, "loss": 0.6789, "step": 427 }, { "epoch": 0.03, "grad_norm": 1.2753512859344482, "learning_rate": 9.029535864978903e-06, "loss": 0.5774, "step": 428 }, { "epoch": 0.03, "grad_norm": 1.3836292028427124, "learning_rate": 9.050632911392407e-06, "loss": 0.6171, "step": 429 }, { "epoch": 0.03, "grad_norm": 1.2557188272476196, "learning_rate": 9.071729957805908e-06, "loss": 0.6662, "step": 430 }, { "epoch": 0.03, "grad_norm": 1.2048022747039795, "learning_rate": 9.09282700421941e-06, "loss": 0.6439, "step": 431 }, { "epoch": 0.03, "grad_norm": 1.355612874031067, "learning_rate": 9.113924050632912e-06, "loss": 0.6367, "step": 432 }, { "epoch": 0.03, "grad_norm": 1.238109827041626, "learning_rate": 9.135021097046414e-06, "loss": 0.5903, "step": 433 }, { "epoch": 0.03, "grad_norm": 1.22252357006073, "learning_rate": 9.156118143459917e-06, "loss": 0.6198, "step": 434 }, { "epoch": 0.03, "grad_norm": 1.4150843620300293, "learning_rate": 9.177215189873418e-06, "loss": 0.6179, "step": 435 }, { "epoch": 0.03, "grad_norm": 1.2307286262512207, "learning_rate": 9.198312236286921e-06, "loss": 0.5735, "step": 436 }, { "epoch": 0.03, "grad_norm": 1.193342685699463, "learning_rate": 9.219409282700422e-06, "loss": 0.6004, "step": 437 }, { "epoch": 0.03, "grad_norm": 1.158647894859314, "learning_rate": 9.240506329113925e-06, "loss": 0.6049, "step": 438 }, { "epoch": 0.03, "grad_norm": 1.204749584197998, "learning_rate": 9.261603375527427e-06, "loss": 0.664, "step": 439 }, { "epoch": 0.03, "grad_norm": 1.111306071281433, "learning_rate": 9.28270042194093e-06, "loss": 0.6079, "step": 440 }, { "epoch": 0.03, "grad_norm": 1.299516201019287, "learning_rate": 9.303797468354431e-06, "loss": 0.6563, "step": 441 }, { "epoch": 0.03, "grad_norm": 1.3435544967651367, "learning_rate": 9.324894514767934e-06, "loss": 0.6232, "step": 442 }, { "epoch": 0.03, "grad_norm": 1.2585128545761108, "learning_rate": 9.345991561181435e-06, "loss": 0.6193, "step": 443 }, { "epoch": 0.03, "grad_norm": 1.248583197593689, "learning_rate": 9.367088607594937e-06, "loss": 0.6057, "step": 444 }, { "epoch": 0.03, "grad_norm": 1.1512912511825562, "learning_rate": 9.38818565400844e-06, "loss": 0.6147, "step": 445 }, { "epoch": 0.03, "grad_norm": 1.2142647504806519, "learning_rate": 9.409282700421943e-06, "loss": 0.5901, "step": 446 }, { "epoch": 0.03, "grad_norm": 1.3141642808914185, "learning_rate": 9.430379746835444e-06, "loss": 0.6572, "step": 447 }, { "epoch": 0.03, "grad_norm": 1.09758722782135, "learning_rate": 9.451476793248946e-06, "loss": 0.5999, "step": 448 }, { "epoch": 0.03, "grad_norm": 1.1424005031585693, "learning_rate": 9.472573839662449e-06, "loss": 0.5772, "step": 449 }, { "epoch": 0.03, "grad_norm": 1.171758770942688, "learning_rate": 9.49367088607595e-06, "loss": 0.6537, "step": 450 }, { "epoch": 0.03, "grad_norm": 1.0351412296295166, "learning_rate": 9.514767932489453e-06, "loss": 0.5663, "step": 451 }, { "epoch": 0.03, "grad_norm": 1.1341289281845093, "learning_rate": 9.535864978902954e-06, "loss": 0.6281, "step": 452 }, { "epoch": 0.03, "grad_norm": 1.115142822265625, "learning_rate": 9.556962025316456e-06, "loss": 0.5919, "step": 453 }, { "epoch": 0.03, "grad_norm": 1.2643224000930786, "learning_rate": 9.578059071729959e-06, "loss": 0.6949, "step": 454 }, { "epoch": 0.03, "grad_norm": 1.2069917917251587, "learning_rate": 9.599156118143462e-06, "loss": 0.6319, "step": 455 }, { "epoch": 0.03, "grad_norm": 1.1984665393829346, "learning_rate": 9.620253164556963e-06, "loss": 0.6516, "step": 456 }, { "epoch": 0.03, "grad_norm": 1.4079281091690063, "learning_rate": 9.641350210970464e-06, "loss": 0.6593, "step": 457 }, { "epoch": 0.03, "grad_norm": 1.1656159162521362, "learning_rate": 9.662447257383967e-06, "loss": 0.5894, "step": 458 }, { "epoch": 0.03, "grad_norm": 1.1576460599899292, "learning_rate": 9.68354430379747e-06, "loss": 0.5557, "step": 459 }, { "epoch": 0.03, "grad_norm": 1.2340763807296753, "learning_rate": 9.704641350210972e-06, "loss": 0.6803, "step": 460 }, { "epoch": 0.03, "grad_norm": 1.1936951875686646, "learning_rate": 9.725738396624473e-06, "loss": 0.5841, "step": 461 }, { "epoch": 0.03, "grad_norm": 1.1302146911621094, "learning_rate": 9.746835443037975e-06, "loss": 0.5765, "step": 462 }, { "epoch": 0.03, "grad_norm": 1.3100109100341797, "learning_rate": 9.767932489451478e-06, "loss": 0.6448, "step": 463 }, { "epoch": 0.03, "grad_norm": 1.223763346672058, "learning_rate": 9.78902953586498e-06, "loss": 0.6135, "step": 464 }, { "epoch": 0.03, "grad_norm": 1.2290325164794922, "learning_rate": 9.810126582278482e-06, "loss": 0.6517, "step": 465 }, { "epoch": 0.03, "grad_norm": 1.3180433511734009, "learning_rate": 9.831223628691983e-06, "loss": 0.6086, "step": 466 }, { "epoch": 0.03, "grad_norm": 1.0901906490325928, "learning_rate": 9.852320675105486e-06, "loss": 0.5328, "step": 467 }, { "epoch": 0.03, "grad_norm": 1.3592183589935303, "learning_rate": 9.87341772151899e-06, "loss": 0.6652, "step": 468 }, { "epoch": 0.03, "grad_norm": 1.3109347820281982, "learning_rate": 9.89451476793249e-06, "loss": 0.6091, "step": 469 }, { "epoch": 0.03, "grad_norm": 1.2154662609100342, "learning_rate": 9.915611814345992e-06, "loss": 0.6194, "step": 470 }, { "epoch": 0.03, "grad_norm": 1.1480965614318848, "learning_rate": 9.936708860759493e-06, "loss": 0.6695, "step": 471 }, { "epoch": 0.03, "grad_norm": 1.3060542345046997, "learning_rate": 9.957805907172996e-06, "loss": 0.6688, "step": 472 }, { "epoch": 0.03, "grad_norm": 1.5538403987884521, "learning_rate": 9.9789029535865e-06, "loss": 0.6385, "step": 473 }, { "epoch": 0.03, "grad_norm": 1.187518835067749, "learning_rate": 1e-05, "loss": 0.6299, "step": 474 }, { "epoch": 0.03, "grad_norm": 1.238229751586914, "learning_rate": 9.999999894733699e-06, "loss": 0.6207, "step": 475 }, { "epoch": 0.03, "grad_norm": 1.2438807487487793, "learning_rate": 9.999999578934793e-06, "loss": 0.6678, "step": 476 }, { "epoch": 0.03, "grad_norm": 1.2306510210037231, "learning_rate": 9.9999990526033e-06, "loss": 0.5857, "step": 477 }, { "epoch": 0.03, "grad_norm": 1.2313143014907837, "learning_rate": 9.99999831573924e-06, "loss": 0.5777, "step": 478 }, { "epoch": 0.03, "grad_norm": 1.2812029123306274, "learning_rate": 9.999997368342644e-06, "loss": 0.6478, "step": 479 }, { "epoch": 0.03, "grad_norm": 1.1557166576385498, "learning_rate": 9.999996210413553e-06, "loss": 0.6285, "step": 480 }, { "epoch": 0.03, "grad_norm": 1.1735140085220337, "learning_rate": 9.999994841952016e-06, "loss": 0.6189, "step": 481 }, { "epoch": 0.03, "grad_norm": 1.2090628147125244, "learning_rate": 9.99999326295809e-06, "loss": 0.672, "step": 482 }, { "epoch": 0.03, "grad_norm": 1.2201446294784546, "learning_rate": 9.99999147343184e-06, "loss": 0.5718, "step": 483 }, { "epoch": 0.03, "grad_norm": 1.196315884590149, "learning_rate": 9.999989473373344e-06, "loss": 0.5772, "step": 484 }, { "epoch": 0.03, "grad_norm": 1.229958176612854, "learning_rate": 9.999987262782684e-06, "loss": 0.6194, "step": 485 }, { "epoch": 0.03, "grad_norm": 1.2189887762069702, "learning_rate": 9.999984841659955e-06, "loss": 0.6379, "step": 486 }, { "epoch": 0.03, "grad_norm": 1.4202560186386108, "learning_rate": 9.999982210005258e-06, "loss": 0.6053, "step": 487 }, { "epoch": 0.03, "grad_norm": 1.1163641214370728, "learning_rate": 9.999979367818704e-06, "loss": 0.5955, "step": 488 }, { "epoch": 0.03, "grad_norm": 1.1054067611694336, "learning_rate": 9.999976315100412e-06, "loss": 0.568, "step": 489 }, { "epoch": 0.03, "grad_norm": 1.2214596271514893, "learning_rate": 9.99997305185051e-06, "loss": 0.5809, "step": 490 }, { "epoch": 0.03, "grad_norm": 1.1755683422088623, "learning_rate": 9.999969578069137e-06, "loss": 0.6626, "step": 491 }, { "epoch": 0.03, "grad_norm": 1.170556664466858, "learning_rate": 9.99996589375644e-06, "loss": 0.5958, "step": 492 }, { "epoch": 0.03, "grad_norm": 1.284775733947754, "learning_rate": 9.999961998912573e-06, "loss": 0.6206, "step": 493 }, { "epoch": 0.03, "grad_norm": 1.216280221939087, "learning_rate": 9.999957893537697e-06, "loss": 0.6323, "step": 494 }, { "epoch": 0.03, "grad_norm": 1.2194132804870605, "learning_rate": 9.999953577631991e-06, "loss": 0.6386, "step": 495 }, { "epoch": 0.03, "grad_norm": 1.2110742330551147, "learning_rate": 9.999949051195631e-06, "loss": 0.6262, "step": 496 }, { "epoch": 0.03, "grad_norm": 1.1123095750808716, "learning_rate": 9.999944314228811e-06, "loss": 0.59, "step": 497 }, { "epoch": 0.03, "grad_norm": 1.190177083015442, "learning_rate": 9.99993936673173e-06, "loss": 0.5809, "step": 498 }, { "epoch": 0.03, "grad_norm": 1.1872690916061401, "learning_rate": 9.999934208704595e-06, "loss": 0.5754, "step": 499 }, { "epoch": 0.03, "grad_norm": 1.129944920539856, "learning_rate": 9.999928840147624e-06, "loss": 0.6183, "step": 500 }, { "epoch": 0.03, "grad_norm": 1.1830451488494873, "learning_rate": 9.999923261061043e-06, "loss": 0.6128, "step": 501 }, { "epoch": 0.03, "grad_norm": 1.171960711479187, "learning_rate": 9.999917471445086e-06, "loss": 0.5835, "step": 502 }, { "epoch": 0.03, "grad_norm": 1.1681584119796753, "learning_rate": 9.999911471299998e-06, "loss": 0.617, "step": 503 }, { "epoch": 0.03, "grad_norm": 1.2297478914260864, "learning_rate": 9.999905260626033e-06, "loss": 0.6365, "step": 504 }, { "epoch": 0.03, "grad_norm": 1.1890262365341187, "learning_rate": 9.99989883942345e-06, "loss": 0.6715, "step": 505 }, { "epoch": 0.03, "grad_norm": 1.2099827527999878, "learning_rate": 9.999892207692521e-06, "loss": 0.5958, "step": 506 }, { "epoch": 0.03, "grad_norm": 1.4914437532424927, "learning_rate": 9.999885365433523e-06, "loss": 0.6158, "step": 507 }, { "epoch": 0.03, "grad_norm": 1.2526476383209229, "learning_rate": 9.999878312646748e-06, "loss": 0.6795, "step": 508 }, { "epoch": 0.03, "grad_norm": 1.2908756732940674, "learning_rate": 9.999871049332488e-06, "loss": 0.6971, "step": 509 }, { "epoch": 0.03, "grad_norm": 1.206207036972046, "learning_rate": 9.999863575491053e-06, "loss": 0.6511, "step": 510 }, { "epoch": 0.03, "grad_norm": 1.1960170269012451, "learning_rate": 9.999855891122754e-06, "loss": 0.6687, "step": 511 }, { "epoch": 0.03, "grad_norm": 1.252910852432251, "learning_rate": 9.999847996227918e-06, "loss": 0.6606, "step": 512 }, { "epoch": 0.03, "grad_norm": 1.2176874876022339, "learning_rate": 9.999839890806877e-06, "loss": 0.6424, "step": 513 }, { "epoch": 0.03, "grad_norm": 1.1943899393081665, "learning_rate": 9.99983157485997e-06, "loss": 0.632, "step": 514 }, { "epoch": 0.03, "grad_norm": 1.1325303316116333, "learning_rate": 9.99982304838755e-06, "loss": 0.6118, "step": 515 }, { "epoch": 0.03, "grad_norm": 1.2069593667984009, "learning_rate": 9.999814311389973e-06, "loss": 0.5657, "step": 516 }, { "epoch": 0.03, "grad_norm": 1.1481478214263916, "learning_rate": 9.99980536386761e-06, "loss": 0.6379, "step": 517 }, { "epoch": 0.03, "grad_norm": 1.1180585622787476, "learning_rate": 9.999796205820835e-06, "loss": 0.6024, "step": 518 }, { "epoch": 0.03, "grad_norm": 1.2709500789642334, "learning_rate": 9.999786837250034e-06, "loss": 0.6019, "step": 519 }, { "epoch": 0.03, "grad_norm": 1.2685295343399048, "learning_rate": 9.999777258155604e-06, "loss": 0.6268, "step": 520 }, { "epoch": 0.03, "grad_norm": 1.2882782220840454, "learning_rate": 9.999767468537947e-06, "loss": 0.6204, "step": 521 }, { "epoch": 0.03, "grad_norm": 1.2946155071258545, "learning_rate": 9.999757468397473e-06, "loss": 0.6244, "step": 522 }, { "epoch": 0.03, "grad_norm": 1.1825463771820068, "learning_rate": 9.999747257734605e-06, "loss": 0.5921, "step": 523 }, { "epoch": 0.03, "grad_norm": 1.3448220491409302, "learning_rate": 9.999736836549773e-06, "loss": 0.6155, "step": 524 }, { "epoch": 0.03, "grad_norm": 1.0695064067840576, "learning_rate": 9.999726204843417e-06, "loss": 0.573, "step": 525 }, { "epoch": 0.03, "grad_norm": 1.0890889167785645, "learning_rate": 9.999715362615983e-06, "loss": 0.5706, "step": 526 }, { "epoch": 0.03, "grad_norm": 1.129686951637268, "learning_rate": 9.999704309867926e-06, "loss": 0.6327, "step": 527 }, { "epoch": 0.03, "grad_norm": 1.162773847579956, "learning_rate": 9.999693046599715e-06, "loss": 0.5607, "step": 528 }, { "epoch": 0.03, "grad_norm": 1.2680752277374268, "learning_rate": 9.99968157281182e-06, "loss": 0.5889, "step": 529 }, { "epoch": 0.03, "grad_norm": 1.2788618803024292, "learning_rate": 9.999669888504731e-06, "loss": 0.606, "step": 530 }, { "epoch": 0.03, "grad_norm": 1.1362082958221436, "learning_rate": 9.999657993678932e-06, "loss": 0.6007, "step": 531 }, { "epoch": 0.03, "grad_norm": 1.184064269065857, "learning_rate": 9.999645888334927e-06, "loss": 0.6283, "step": 532 }, { "epoch": 0.03, "grad_norm": 1.1241337060928345, "learning_rate": 9.999633572473228e-06, "loss": 0.6264, "step": 533 }, { "epoch": 0.03, "grad_norm": 1.2259488105773926, "learning_rate": 9.999621046094353e-06, "loss": 0.6546, "step": 534 }, { "epoch": 0.03, "grad_norm": 1.1768872737884521, "learning_rate": 9.999608309198827e-06, "loss": 0.5647, "step": 535 }, { "epoch": 0.03, "grad_norm": 1.1303322315216064, "learning_rate": 9.999595361787187e-06, "loss": 0.5865, "step": 536 }, { "epoch": 0.03, "grad_norm": 1.2701512575149536, "learning_rate": 9.999582203859977e-06, "loss": 0.6254, "step": 537 }, { "epoch": 0.03, "grad_norm": 1.2194970846176147, "learning_rate": 9.999568835417755e-06, "loss": 0.6115, "step": 538 }, { "epoch": 0.03, "grad_norm": 1.1903170347213745, "learning_rate": 9.99955525646108e-06, "loss": 0.6354, "step": 539 }, { "epoch": 0.03, "grad_norm": 1.1576377153396606, "learning_rate": 9.999541466990526e-06, "loss": 0.6241, "step": 540 }, { "epoch": 0.03, "grad_norm": 1.1156654357910156, "learning_rate": 9.999527467006674e-06, "loss": 0.5825, "step": 541 }, { "epoch": 0.03, "grad_norm": 1.1373095512390137, "learning_rate": 9.999513256510112e-06, "loss": 0.59, "step": 542 }, { "epoch": 0.03, "grad_norm": 1.4597004652023315, "learning_rate": 9.999498835501438e-06, "loss": 0.641, "step": 543 }, { "epoch": 0.03, "grad_norm": 1.1645809412002563, "learning_rate": 9.99948420398126e-06, "loss": 0.6794, "step": 544 }, { "epoch": 0.03, "grad_norm": 1.2074896097183228, "learning_rate": 9.999469361950195e-06, "loss": 0.6506, "step": 545 }, { "epoch": 0.03, "grad_norm": 1.1769201755523682, "learning_rate": 9.999454309408868e-06, "loss": 0.6291, "step": 546 }, { "epoch": 0.03, "grad_norm": 1.1868668794631958, "learning_rate": 9.999439046357908e-06, "loss": 0.6222, "step": 547 }, { "epoch": 0.03, "grad_norm": 1.2111363410949707, "learning_rate": 9.999423572797964e-06, "loss": 0.5993, "step": 548 }, { "epoch": 0.03, "grad_norm": 1.208024024963379, "learning_rate": 9.999407888729686e-06, "loss": 0.618, "step": 549 }, { "epoch": 0.03, "grad_norm": 1.2100365161895752, "learning_rate": 9.999391994153734e-06, "loss": 0.5905, "step": 550 }, { "epoch": 0.03, "grad_norm": 1.2433186769485474, "learning_rate": 9.999375889070773e-06, "loss": 0.6088, "step": 551 }, { "epoch": 0.03, "grad_norm": 1.0811570882797241, "learning_rate": 9.99935957348149e-06, "loss": 0.6018, "step": 552 }, { "epoch": 0.04, "grad_norm": 1.0921618938446045, "learning_rate": 9.999343047386562e-06, "loss": 0.5659, "step": 553 }, { "epoch": 0.04, "grad_norm": 1.173858880996704, "learning_rate": 9.999326310786692e-06, "loss": 0.5816, "step": 554 }, { "epoch": 0.04, "grad_norm": 1.1979855298995972, "learning_rate": 9.999309363682582e-06, "loss": 0.5685, "step": 555 }, { "epoch": 0.04, "grad_norm": 1.1871007680892944, "learning_rate": 9.999292206074946e-06, "loss": 0.5614, "step": 556 }, { "epoch": 0.04, "grad_norm": 1.2109037637710571, "learning_rate": 9.999274837964507e-06, "loss": 0.6184, "step": 557 }, { "epoch": 0.04, "grad_norm": 1.3156616687774658, "learning_rate": 9.999257259351995e-06, "loss": 0.6321, "step": 558 }, { "epoch": 0.04, "grad_norm": 1.1706651449203491, "learning_rate": 9.999239470238151e-06, "loss": 0.5902, "step": 559 }, { "epoch": 0.04, "grad_norm": 1.1345263719558716, "learning_rate": 9.999221470623726e-06, "loss": 0.6015, "step": 560 }, { "epoch": 0.04, "grad_norm": 1.209594488143921, "learning_rate": 9.999203260509473e-06, "loss": 0.5741, "step": 561 }, { "epoch": 0.04, "grad_norm": 1.112541913986206, "learning_rate": 9.999184839896163e-06, "loss": 0.6346, "step": 562 }, { "epoch": 0.04, "grad_norm": 1.1392542123794556, "learning_rate": 9.99916620878457e-06, "loss": 0.5987, "step": 563 }, { "epoch": 0.04, "grad_norm": 1.2119113206863403, "learning_rate": 9.99914736717548e-06, "loss": 0.6329, "step": 564 }, { "epoch": 0.04, "grad_norm": 1.1497561931610107, "learning_rate": 9.999128315069684e-06, "loss": 0.6207, "step": 565 }, { "epoch": 0.04, "grad_norm": 1.2501511573791504, "learning_rate": 9.999109052467986e-06, "loss": 0.621, "step": 566 }, { "epoch": 0.04, "grad_norm": 1.1397020816802979, "learning_rate": 9.999089579371195e-06, "loss": 0.594, "step": 567 }, { "epoch": 0.04, "grad_norm": 1.1550371646881104, "learning_rate": 9.999069895780133e-06, "loss": 0.5853, "step": 568 }, { "epoch": 0.04, "grad_norm": 1.1494985818862915, "learning_rate": 9.99905000169563e-06, "loss": 0.5681, "step": 569 }, { "epoch": 0.04, "grad_norm": 1.1133198738098145, "learning_rate": 9.99902989711852e-06, "loss": 0.5421, "step": 570 }, { "epoch": 0.04, "grad_norm": 1.1223570108413696, "learning_rate": 9.99900958204965e-06, "loss": 0.596, "step": 571 }, { "epoch": 0.04, "grad_norm": 1.1596499681472778, "learning_rate": 9.99898905648988e-06, "loss": 0.5999, "step": 572 }, { "epoch": 0.04, "grad_norm": 1.1666220426559448, "learning_rate": 9.998968320440068e-06, "loss": 0.599, "step": 573 }, { "epoch": 0.04, "grad_norm": 1.1381292343139648, "learning_rate": 9.998947373901092e-06, "loss": 0.5885, "step": 574 }, { "epoch": 0.04, "grad_norm": 1.002712607383728, "learning_rate": 9.998926216873833e-06, "loss": 0.5451, "step": 575 }, { "epoch": 0.04, "grad_norm": 1.16362726688385, "learning_rate": 9.998904849359179e-06, "loss": 0.6408, "step": 576 }, { "epoch": 0.04, "grad_norm": 1.1892794370651245, "learning_rate": 9.998883271358033e-06, "loss": 0.6021, "step": 577 }, { "epoch": 0.04, "grad_norm": 1.097153902053833, "learning_rate": 9.998861482871303e-06, "loss": 0.5795, "step": 578 }, { "epoch": 0.04, "grad_norm": 1.1804133653640747, "learning_rate": 9.998839483899904e-06, "loss": 0.6161, "step": 579 }, { "epoch": 0.04, "grad_norm": 1.2267212867736816, "learning_rate": 9.998817274444765e-06, "loss": 0.6092, "step": 580 }, { "epoch": 0.04, "grad_norm": 1.1391352415084839, "learning_rate": 9.998794854506819e-06, "loss": 0.5594, "step": 581 }, { "epoch": 0.04, "grad_norm": 1.1507866382598877, "learning_rate": 9.998772224087011e-06, "loss": 0.6018, "step": 582 }, { "epoch": 0.04, "grad_norm": 1.114465355873108, "learning_rate": 9.998749383186296e-06, "loss": 0.5713, "step": 583 }, { "epoch": 0.04, "grad_norm": 1.209734320640564, "learning_rate": 9.998726331805632e-06, "loss": 0.5776, "step": 584 }, { "epoch": 0.04, "grad_norm": 1.1595596075057983, "learning_rate": 9.998703069945995e-06, "loss": 0.5707, "step": 585 }, { "epoch": 0.04, "grad_norm": 1.2344090938568115, "learning_rate": 9.998679597608357e-06, "loss": 0.6657, "step": 586 }, { "epoch": 0.04, "grad_norm": 1.140102744102478, "learning_rate": 9.998655914793711e-06, "loss": 0.5602, "step": 587 }, { "epoch": 0.04, "grad_norm": 1.185093641281128, "learning_rate": 9.998632021503055e-06, "loss": 0.5627, "step": 588 }, { "epoch": 0.04, "grad_norm": 1.0849400758743286, "learning_rate": 9.998607917737393e-06, "loss": 0.5439, "step": 589 }, { "epoch": 0.04, "grad_norm": 1.2265455722808838, "learning_rate": 9.99858360349774e-06, "loss": 0.5755, "step": 590 }, { "epoch": 0.04, "grad_norm": 1.20005202293396, "learning_rate": 9.99855907878512e-06, "loss": 0.5961, "step": 591 }, { "epoch": 0.04, "grad_norm": 1.128761887550354, "learning_rate": 9.998534343600567e-06, "loss": 0.5522, "step": 592 }, { "epoch": 0.04, "grad_norm": 1.067212700843811, "learning_rate": 9.99850939794512e-06, "loss": 0.6102, "step": 593 }, { "epoch": 0.04, "grad_norm": 1.1241345405578613, "learning_rate": 9.998484241819833e-06, "loss": 0.5949, "step": 594 }, { "epoch": 0.04, "grad_norm": 1.2213162183761597, "learning_rate": 9.99845887522576e-06, "loss": 0.5527, "step": 595 }, { "epoch": 0.04, "grad_norm": 1.2188106775283813, "learning_rate": 9.998433298163974e-06, "loss": 0.6375, "step": 596 }, { "epoch": 0.04, "grad_norm": 1.0957785844802856, "learning_rate": 9.99840751063555e-06, "loss": 0.6003, "step": 597 }, { "epoch": 0.04, "grad_norm": 1.1837674379348755, "learning_rate": 9.998381512641574e-06, "loss": 0.5664, "step": 598 }, { "epoch": 0.04, "grad_norm": 1.1946494579315186, "learning_rate": 9.99835530418314e-06, "loss": 0.632, "step": 599 }, { "epoch": 0.04, "grad_norm": 1.1648926734924316, "learning_rate": 9.998328885261352e-06, "loss": 0.6483, "step": 600 }, { "epoch": 0.04, "grad_norm": 1.1611378192901611, "learning_rate": 9.998302255877323e-06, "loss": 0.6712, "step": 601 }, { "epoch": 0.04, "grad_norm": 1.1499131917953491, "learning_rate": 9.998275416032176e-06, "loss": 0.5657, "step": 602 }, { "epoch": 0.04, "grad_norm": 1.2018954753875732, "learning_rate": 9.998248365727037e-06, "loss": 0.5774, "step": 603 }, { "epoch": 0.04, "grad_norm": 1.237338662147522, "learning_rate": 9.998221104963047e-06, "loss": 0.6025, "step": 604 }, { "epoch": 0.04, "grad_norm": 1.0457485914230347, "learning_rate": 9.998193633741353e-06, "loss": 0.5635, "step": 605 }, { "epoch": 0.04, "grad_norm": 1.1704199314117432, "learning_rate": 9.998165952063113e-06, "loss": 0.5775, "step": 606 }, { "epoch": 0.04, "grad_norm": 1.0876002311706543, "learning_rate": 9.998138059929493e-06, "loss": 0.5717, "step": 607 }, { "epoch": 0.04, "grad_norm": 1.2615928649902344, "learning_rate": 9.998109957341665e-06, "loss": 0.6188, "step": 608 }, { "epoch": 0.04, "grad_norm": 1.1969835758209229, "learning_rate": 9.998081644300815e-06, "loss": 0.6108, "step": 609 }, { "epoch": 0.04, "grad_norm": 1.231022834777832, "learning_rate": 9.998053120808133e-06, "loss": 0.5429, "step": 610 }, { "epoch": 0.04, "grad_norm": 1.2075358629226685, "learning_rate": 9.998024386864821e-06, "loss": 0.5969, "step": 611 }, { "epoch": 0.04, "grad_norm": 1.1285358667373657, "learning_rate": 9.99799544247209e-06, "loss": 0.5597, "step": 612 }, { "epoch": 0.04, "grad_norm": 1.1488717794418335, "learning_rate": 9.997966287631157e-06, "loss": 0.575, "step": 613 }, { "epoch": 0.04, "grad_norm": 1.1601454019546509, "learning_rate": 9.997936922343253e-06, "loss": 0.5865, "step": 614 }, { "epoch": 0.04, "grad_norm": 1.2259072065353394, "learning_rate": 9.997907346609608e-06, "loss": 0.6072, "step": 615 }, { "epoch": 0.04, "grad_norm": 1.1978145837783813, "learning_rate": 9.997877560431472e-06, "loss": 0.613, "step": 616 }, { "epoch": 0.04, "grad_norm": 1.1502573490142822, "learning_rate": 9.9978475638101e-06, "loss": 0.6282, "step": 617 }, { "epoch": 0.04, "grad_norm": 1.1207703351974487, "learning_rate": 9.997817356746751e-06, "loss": 0.5915, "step": 618 }, { "epoch": 0.04, "grad_norm": 1.049208641052246, "learning_rate": 9.9977869392427e-06, "loss": 0.5555, "step": 619 }, { "epoch": 0.04, "grad_norm": 1.2138818502426147, "learning_rate": 9.997756311299229e-06, "loss": 0.6416, "step": 620 }, { "epoch": 0.04, "grad_norm": 1.236164927482605, "learning_rate": 9.997725472917623e-06, "loss": 0.6295, "step": 621 }, { "epoch": 0.04, "grad_norm": 1.1690360307693481, "learning_rate": 9.997694424099184e-06, "loss": 0.5542, "step": 622 }, { "epoch": 0.04, "grad_norm": 1.0889374017715454, "learning_rate": 9.99766316484522e-06, "loss": 0.5348, "step": 623 }, { "epoch": 0.04, "grad_norm": 1.1452760696411133, "learning_rate": 9.997631695157043e-06, "loss": 0.5982, "step": 624 }, { "epoch": 0.04, "grad_norm": 1.1351194381713867, "learning_rate": 9.997600015035982e-06, "loss": 0.6595, "step": 625 }, { "epoch": 0.04, "grad_norm": 1.218604326248169, "learning_rate": 9.99756812448337e-06, "loss": 0.5595, "step": 626 }, { "epoch": 0.04, "grad_norm": 1.171190857887268, "learning_rate": 9.99753602350055e-06, "loss": 0.6254, "step": 627 }, { "epoch": 0.04, "grad_norm": 1.0584040880203247, "learning_rate": 9.997503712088873e-06, "loss": 0.5831, "step": 628 }, { "epoch": 0.04, "grad_norm": 1.1942036151885986, "learning_rate": 9.9974711902497e-06, "loss": 0.6383, "step": 629 }, { "epoch": 0.04, "grad_norm": 1.178575873374939, "learning_rate": 9.997438457984398e-06, "loss": 0.6528, "step": 630 }, { "epoch": 0.04, "grad_norm": 1.159193992614746, "learning_rate": 9.997405515294349e-06, "loss": 0.5805, "step": 631 }, { "epoch": 0.04, "grad_norm": 1.1333950757980347, "learning_rate": 9.99737236218094e-06, "loss": 0.6203, "step": 632 }, { "epoch": 0.04, "grad_norm": 1.2014952898025513, "learning_rate": 9.997338998645562e-06, "loss": 0.6107, "step": 633 }, { "epoch": 0.04, "grad_norm": 1.1212387084960938, "learning_rate": 9.997305424689626e-06, "loss": 0.6099, "step": 634 }, { "epoch": 0.04, "grad_norm": 1.1277556419372559, "learning_rate": 9.997271640314542e-06, "loss": 0.6042, "step": 635 }, { "epoch": 0.04, "grad_norm": 1.149415135383606, "learning_rate": 9.997237645521733e-06, "loss": 0.614, "step": 636 }, { "epoch": 0.04, "grad_norm": 1.1585355997085571, "learning_rate": 9.997203440312632e-06, "loss": 0.5867, "step": 637 }, { "epoch": 0.04, "grad_norm": 1.165755271911621, "learning_rate": 9.997169024688678e-06, "loss": 0.57, "step": 638 }, { "epoch": 0.04, "grad_norm": 1.1480127573013306, "learning_rate": 9.997134398651318e-06, "loss": 0.5741, "step": 639 }, { "epoch": 0.04, "grad_norm": 1.1376844644546509, "learning_rate": 9.997099562202015e-06, "loss": 0.6307, "step": 640 }, { "epoch": 0.04, "grad_norm": 1.2017743587493896, "learning_rate": 9.997064515342232e-06, "loss": 0.5886, "step": 641 }, { "epoch": 0.04, "grad_norm": 1.2070648670196533, "learning_rate": 9.997029258073445e-06, "loss": 0.6148, "step": 642 }, { "epoch": 0.04, "grad_norm": 1.1960395574569702, "learning_rate": 9.99699379039714e-06, "loss": 0.6255, "step": 643 }, { "epoch": 0.04, "grad_norm": 1.232494831085205, "learning_rate": 9.996958112314811e-06, "loss": 0.5936, "step": 644 }, { "epoch": 0.04, "grad_norm": 1.1607786417007446, "learning_rate": 9.996922223827958e-06, "loss": 0.5937, "step": 645 }, { "epoch": 0.04, "grad_norm": 1.5826513767242432, "learning_rate": 9.996886124938092e-06, "loss": 0.6248, "step": 646 }, { "epoch": 0.04, "grad_norm": 1.1372714042663574, "learning_rate": 9.996849815646736e-06, "loss": 0.5862, "step": 647 }, { "epoch": 0.04, "grad_norm": 1.1420936584472656, "learning_rate": 9.996813295955417e-06, "loss": 0.6175, "step": 648 }, { "epoch": 0.04, "grad_norm": 1.1022953987121582, "learning_rate": 9.996776565865671e-06, "loss": 0.5774, "step": 649 }, { "epoch": 0.04, "grad_norm": 1.1705585718154907, "learning_rate": 9.996739625379049e-06, "loss": 0.595, "step": 650 }, { "epoch": 0.04, "grad_norm": 1.2827461957931519, "learning_rate": 9.9967024744971e-06, "loss": 0.5825, "step": 651 }, { "epoch": 0.04, "grad_norm": 1.2297942638397217, "learning_rate": 9.996665113221396e-06, "loss": 0.6021, "step": 652 }, { "epoch": 0.04, "grad_norm": 1.0891423225402832, "learning_rate": 9.996627541553504e-06, "loss": 0.5903, "step": 653 }, { "epoch": 0.04, "grad_norm": 1.1050660610198975, "learning_rate": 9.996589759495008e-06, "loss": 0.5593, "step": 654 }, { "epoch": 0.04, "grad_norm": 1.292536973953247, "learning_rate": 9.9965517670475e-06, "loss": 0.6653, "step": 655 }, { "epoch": 0.04, "grad_norm": 1.2416012287139893, "learning_rate": 9.996513564212577e-06, "loss": 0.5872, "step": 656 }, { "epoch": 0.04, "grad_norm": 1.0946153402328491, "learning_rate": 9.996475150991852e-06, "loss": 0.5968, "step": 657 }, { "epoch": 0.04, "grad_norm": 1.259873628616333, "learning_rate": 9.99643652738694e-06, "loss": 0.6341, "step": 658 }, { "epoch": 0.04, "grad_norm": 1.1964812278747559, "learning_rate": 9.996397693399465e-06, "loss": 0.6192, "step": 659 }, { "epoch": 0.04, "grad_norm": 1.1455782651901245, "learning_rate": 9.996358649031066e-06, "loss": 0.6043, "step": 660 }, { "epoch": 0.04, "grad_norm": 1.1354379653930664, "learning_rate": 9.996319394283384e-06, "loss": 0.5798, "step": 661 }, { "epoch": 0.04, "grad_norm": 1.1652181148529053, "learning_rate": 9.996279929158074e-06, "loss": 0.6829, "step": 662 }, { "epoch": 0.04, "grad_norm": 1.167168378829956, "learning_rate": 9.996240253656796e-06, "loss": 0.5979, "step": 663 }, { "epoch": 0.04, "grad_norm": 1.1037861108779907, "learning_rate": 9.996200367781224e-06, "loss": 0.5988, "step": 664 }, { "epoch": 0.04, "grad_norm": 1.1305545568466187, "learning_rate": 9.996160271533033e-06, "loss": 0.6001, "step": 665 }, { "epoch": 0.04, "grad_norm": 1.2025097608566284, "learning_rate": 9.996119964913914e-06, "loss": 0.5899, "step": 666 }, { "epoch": 0.04, "grad_norm": 1.2237567901611328, "learning_rate": 9.996079447925563e-06, "loss": 0.6244, "step": 667 }, { "epoch": 0.04, "grad_norm": 1.1735132932662964, "learning_rate": 9.996038720569688e-06, "loss": 0.5966, "step": 668 }, { "epoch": 0.04, "grad_norm": 1.0479474067687988, "learning_rate": 9.995997782848e-06, "loss": 0.6146, "step": 669 }, { "epoch": 0.04, "grad_norm": 1.071723222732544, "learning_rate": 9.995956634762227e-06, "loss": 0.5648, "step": 670 }, { "epoch": 0.04, "grad_norm": 1.1734130382537842, "learning_rate": 9.995915276314099e-06, "loss": 0.605, "step": 671 }, { "epoch": 0.04, "grad_norm": 1.209862232208252, "learning_rate": 9.995873707505358e-06, "loss": 0.5932, "step": 672 }, { "epoch": 0.04, "grad_norm": 1.1562873125076294, "learning_rate": 9.995831928337756e-06, "loss": 0.5997, "step": 673 }, { "epoch": 0.04, "grad_norm": 1.2242332696914673, "learning_rate": 9.99578993881305e-06, "loss": 0.6525, "step": 674 }, { "epoch": 0.04, "grad_norm": 1.2193008661270142, "learning_rate": 9.995747738933009e-06, "loss": 0.5475, "step": 675 }, { "epoch": 0.04, "grad_norm": 1.106239914894104, "learning_rate": 9.995705328699408e-06, "loss": 0.6182, "step": 676 }, { "epoch": 0.04, "grad_norm": 1.2187772989273071, "learning_rate": 9.995662708114036e-06, "loss": 0.6567, "step": 677 }, { "epoch": 0.04, "grad_norm": 1.192084789276123, "learning_rate": 9.995619877178685e-06, "loss": 0.636, "step": 678 }, { "epoch": 0.04, "grad_norm": 1.1521915197372437, "learning_rate": 9.99557683589516e-06, "loss": 0.6575, "step": 679 }, { "epoch": 0.04, "grad_norm": 1.2543455362319946, "learning_rate": 9.995533584265273e-06, "loss": 0.6198, "step": 680 }, { "epoch": 0.04, "grad_norm": 1.362525224685669, "learning_rate": 9.995490122290845e-06, "loss": 0.6835, "step": 681 }, { "epoch": 0.04, "grad_norm": 1.1788954734802246, "learning_rate": 9.995446449973705e-06, "loss": 0.6033, "step": 682 }, { "epoch": 0.04, "grad_norm": 1.065611720085144, "learning_rate": 9.995402567315695e-06, "loss": 0.5724, "step": 683 }, { "epoch": 0.04, "grad_norm": 1.1058067083358765, "learning_rate": 9.99535847431866e-06, "loss": 0.6266, "step": 684 }, { "epoch": 0.04, "grad_norm": 1.1485073566436768, "learning_rate": 9.995314170984457e-06, "loss": 0.5928, "step": 685 }, { "epoch": 0.04, "grad_norm": 1.17409348487854, "learning_rate": 9.99526965731495e-06, "loss": 0.5886, "step": 686 }, { "epoch": 0.04, "grad_norm": 1.1527072191238403, "learning_rate": 9.995224933312016e-06, "loss": 0.6152, "step": 687 }, { "epoch": 0.04, "grad_norm": 1.2115527391433716, "learning_rate": 9.995179998977537e-06, "loss": 0.5918, "step": 688 }, { "epoch": 0.04, "grad_norm": 1.0342247486114502, "learning_rate": 9.995134854313407e-06, "loss": 0.5787, "step": 689 }, { "epoch": 0.04, "grad_norm": 1.2421724796295166, "learning_rate": 9.995089499321521e-06, "loss": 0.6279, "step": 690 }, { "epoch": 0.04, "grad_norm": 1.2693426609039307, "learning_rate": 9.995043934003796e-06, "loss": 0.5894, "step": 691 }, { "epoch": 0.04, "grad_norm": 1.1222983598709106, "learning_rate": 9.994998158362148e-06, "loss": 0.6051, "step": 692 }, { "epoch": 0.04, "grad_norm": 1.3039426803588867, "learning_rate": 9.994952172398502e-06, "loss": 0.6284, "step": 693 }, { "epoch": 0.04, "grad_norm": 1.194825530052185, "learning_rate": 9.994905976114799e-06, "loss": 0.6795, "step": 694 }, { "epoch": 0.04, "grad_norm": 1.4020371437072754, "learning_rate": 9.994859569512978e-06, "loss": 0.5985, "step": 695 }, { "epoch": 0.04, "grad_norm": 2.410729169845581, "learning_rate": 9.994812952594998e-06, "loss": 0.6006, "step": 696 }, { "epoch": 0.04, "grad_norm": 1.3058116436004639, "learning_rate": 9.994766125362821e-06, "loss": 0.5779, "step": 697 }, { "epoch": 0.04, "grad_norm": 1.1895015239715576, "learning_rate": 9.994719087818416e-06, "loss": 0.6208, "step": 698 }, { "epoch": 0.04, "grad_norm": 1.1635770797729492, "learning_rate": 9.994671839963766e-06, "loss": 0.586, "step": 699 }, { "epoch": 0.04, "grad_norm": 1.1304101943969727, "learning_rate": 9.994624381800861e-06, "loss": 0.5658, "step": 700 }, { "epoch": 0.04, "grad_norm": 1.0639395713806152, "learning_rate": 9.994576713331699e-06, "loss": 0.5744, "step": 701 }, { "epoch": 0.04, "grad_norm": 1.1864066123962402, "learning_rate": 9.994528834558285e-06, "loss": 0.6339, "step": 702 }, { "epoch": 0.04, "grad_norm": 1.1535203456878662, "learning_rate": 9.994480745482636e-06, "loss": 0.6583, "step": 703 }, { "epoch": 0.04, "grad_norm": 1.063400149345398, "learning_rate": 9.99443244610678e-06, "loss": 0.5663, "step": 704 }, { "epoch": 0.04, "grad_norm": 1.0646471977233887, "learning_rate": 9.994383936432745e-06, "loss": 0.5485, "step": 705 }, { "epoch": 0.04, "grad_norm": 1.2647340297698975, "learning_rate": 9.994335216462579e-06, "loss": 0.5776, "step": 706 }, { "epoch": 0.04, "grad_norm": 1.1197046041488647, "learning_rate": 9.99428628619833e-06, "loss": 0.5988, "step": 707 }, { "epoch": 0.04, "grad_norm": 1.2443758249282837, "learning_rate": 9.994237145642058e-06, "loss": 0.6535, "step": 708 }, { "epoch": 0.04, "grad_norm": 1.0635206699371338, "learning_rate": 9.994187794795835e-06, "loss": 0.5706, "step": 709 }, { "epoch": 0.04, "grad_norm": 1.1187067031860352, "learning_rate": 9.994138233661737e-06, "loss": 0.6307, "step": 710 }, { "epoch": 0.05, "grad_norm": 1.2295868396759033, "learning_rate": 9.994088462241851e-06, "loss": 0.6248, "step": 711 }, { "epoch": 0.05, "grad_norm": 1.2285339832305908, "learning_rate": 9.994038480538274e-06, "loss": 0.5898, "step": 712 }, { "epoch": 0.05, "grad_norm": 1.2357820272445679, "learning_rate": 9.993988288553109e-06, "loss": 0.6122, "step": 713 }, { "epoch": 0.05, "grad_norm": 1.1227229833602905, "learning_rate": 9.993937886288471e-06, "loss": 0.5517, "step": 714 }, { "epoch": 0.05, "grad_norm": 1.25047767162323, "learning_rate": 9.99388727374648e-06, "loss": 0.6434, "step": 715 }, { "epoch": 0.05, "grad_norm": 1.0810105800628662, "learning_rate": 9.993836450929268e-06, "loss": 0.5894, "step": 716 }, { "epoch": 0.05, "grad_norm": 1.1047571897506714, "learning_rate": 9.993785417838978e-06, "loss": 0.5227, "step": 717 }, { "epoch": 0.05, "grad_norm": 1.249957799911499, "learning_rate": 9.993734174477752e-06, "loss": 0.5827, "step": 718 }, { "epoch": 0.05, "grad_norm": 1.2992061376571655, "learning_rate": 9.993682720847755e-06, "loss": 0.5683, "step": 719 }, { "epoch": 0.05, "grad_norm": 1.4556405544281006, "learning_rate": 9.99363105695115e-06, "loss": 0.6054, "step": 720 }, { "epoch": 0.05, "grad_norm": 1.3055137395858765, "learning_rate": 9.993579182790111e-06, "loss": 0.6127, "step": 721 }, { "epoch": 0.05, "grad_norm": 1.1818618774414062, "learning_rate": 9.993527098366826e-06, "loss": 0.554, "step": 722 }, { "epoch": 0.05, "grad_norm": 1.219436526298523, "learning_rate": 9.993474803683486e-06, "loss": 0.6292, "step": 723 }, { "epoch": 0.05, "grad_norm": 1.216051697731018, "learning_rate": 9.993422298742293e-06, "loss": 0.576, "step": 724 }, { "epoch": 0.05, "grad_norm": 1.2530943155288696, "learning_rate": 9.993369583545456e-06, "loss": 0.5972, "step": 725 }, { "epoch": 0.05, "grad_norm": 1.2716203927993774, "learning_rate": 9.993316658095198e-06, "loss": 0.6217, "step": 726 }, { "epoch": 0.05, "grad_norm": 1.1261982917785645, "learning_rate": 9.993263522393745e-06, "loss": 0.6376, "step": 727 }, { "epoch": 0.05, "grad_norm": 1.1981035470962524, "learning_rate": 9.993210176443338e-06, "loss": 0.611, "step": 728 }, { "epoch": 0.05, "grad_norm": 1.3648395538330078, "learning_rate": 9.993156620246219e-06, "loss": 0.6541, "step": 729 }, { "epoch": 0.05, "grad_norm": 1.226815938949585, "learning_rate": 9.993102853804643e-06, "loss": 0.6128, "step": 730 }, { "epoch": 0.05, "grad_norm": 1.1845619678497314, "learning_rate": 9.993048877120876e-06, "loss": 0.5807, "step": 731 }, { "epoch": 0.05, "grad_norm": 1.221136450767517, "learning_rate": 9.992994690197192e-06, "loss": 0.6016, "step": 732 }, { "epoch": 0.05, "grad_norm": 1.1222420930862427, "learning_rate": 9.992940293035871e-06, "loss": 0.6127, "step": 733 }, { "epoch": 0.05, "grad_norm": 1.106492042541504, "learning_rate": 9.992885685639203e-06, "loss": 0.5583, "step": 734 }, { "epoch": 0.05, "grad_norm": 1.1257810592651367, "learning_rate": 9.992830868009487e-06, "loss": 0.6227, "step": 735 }, { "epoch": 0.05, "grad_norm": 1.1145000457763672, "learning_rate": 9.992775840149031e-06, "loss": 0.5552, "step": 736 }, { "epoch": 0.05, "grad_norm": 1.205623745918274, "learning_rate": 9.992720602060155e-06, "loss": 0.6107, "step": 737 }, { "epoch": 0.05, "grad_norm": 1.2029021978378296, "learning_rate": 9.992665153745182e-06, "loss": 0.6487, "step": 738 }, { "epoch": 0.05, "grad_norm": 1.222591519355774, "learning_rate": 9.992609495206448e-06, "loss": 0.648, "step": 739 }, { "epoch": 0.05, "grad_norm": 1.11492121219635, "learning_rate": 9.992553626446296e-06, "loss": 0.5802, "step": 740 }, { "epoch": 0.05, "grad_norm": 1.2474178075790405, "learning_rate": 9.992497547467079e-06, "loss": 0.6533, "step": 741 }, { "epoch": 0.05, "grad_norm": 1.1707470417022705, "learning_rate": 9.992441258271157e-06, "loss": 0.6291, "step": 742 }, { "epoch": 0.05, "grad_norm": 1.2481380701065063, "learning_rate": 9.992384758860902e-06, "loss": 0.6109, "step": 743 }, { "epoch": 0.05, "grad_norm": 1.1987773180007935, "learning_rate": 9.99232804923869e-06, "loss": 0.605, "step": 744 }, { "epoch": 0.05, "grad_norm": 1.2029579877853394, "learning_rate": 9.992271129406914e-06, "loss": 0.6233, "step": 745 }, { "epoch": 0.05, "grad_norm": 1.165914535522461, "learning_rate": 9.992213999367965e-06, "loss": 0.5919, "step": 746 }, { "epoch": 0.05, "grad_norm": 1.1722065210342407, "learning_rate": 9.992156659124253e-06, "loss": 0.5918, "step": 747 }, { "epoch": 0.05, "grad_norm": 1.137799620628357, "learning_rate": 9.99209910867819e-06, "loss": 0.6184, "step": 748 }, { "epoch": 0.05, "grad_norm": 1.033355951309204, "learning_rate": 9.9920413480322e-06, "loss": 0.5688, "step": 749 }, { "epoch": 0.05, "grad_norm": 1.2363529205322266, "learning_rate": 9.991983377188715e-06, "loss": 0.5838, "step": 750 }, { "epoch": 0.05, "grad_norm": 1.1456830501556396, "learning_rate": 9.991925196150174e-06, "loss": 0.5965, "step": 751 }, { "epoch": 0.05, "grad_norm": 1.098024606704712, "learning_rate": 9.99186680491903e-06, "loss": 0.5547, "step": 752 }, { "epoch": 0.05, "grad_norm": 1.1140855550765991, "learning_rate": 9.99180820349774e-06, "loss": 0.6416, "step": 753 }, { "epoch": 0.05, "grad_norm": 1.2580140829086304, "learning_rate": 9.991749391888772e-06, "loss": 0.6367, "step": 754 }, { "epoch": 0.05, "grad_norm": 1.2655225992202759, "learning_rate": 9.991690370094603e-06, "loss": 0.6472, "step": 755 }, { "epoch": 0.05, "grad_norm": 1.2652753591537476, "learning_rate": 9.991631138117715e-06, "loss": 0.6705, "step": 756 }, { "epoch": 0.05, "grad_norm": 1.1617118120193481, "learning_rate": 9.991571695960606e-06, "loss": 0.6106, "step": 757 }, { "epoch": 0.05, "grad_norm": 0.9705402851104736, "learning_rate": 9.991512043625777e-06, "loss": 0.5495, "step": 758 }, { "epoch": 0.05, "grad_norm": 1.1828492879867554, "learning_rate": 9.991452181115739e-06, "loss": 0.5876, "step": 759 }, { "epoch": 0.05, "grad_norm": 1.1426823139190674, "learning_rate": 9.991392108433016e-06, "loss": 0.6483, "step": 760 }, { "epoch": 0.05, "grad_norm": 1.100878119468689, "learning_rate": 9.991331825580132e-06, "loss": 0.6322, "step": 761 }, { "epoch": 0.05, "grad_norm": 1.1711697578430176, "learning_rate": 9.99127133255963e-06, "loss": 0.6244, "step": 762 }, { "epoch": 0.05, "grad_norm": 1.1956875324249268, "learning_rate": 9.991210629374058e-06, "loss": 0.5852, "step": 763 }, { "epoch": 0.05, "grad_norm": 1.176194429397583, "learning_rate": 9.991149716025967e-06, "loss": 0.606, "step": 764 }, { "epoch": 0.05, "grad_norm": 1.2652812004089355, "learning_rate": 9.991088592517924e-06, "loss": 0.6008, "step": 765 }, { "epoch": 0.05, "grad_norm": 1.122599720954895, "learning_rate": 9.991027258852505e-06, "loss": 0.596, "step": 766 }, { "epoch": 0.05, "grad_norm": 1.0915685892105103, "learning_rate": 9.990965715032289e-06, "loss": 0.5382, "step": 767 }, { "epoch": 0.05, "grad_norm": 1.0834283828735352, "learning_rate": 9.99090396105987e-06, "loss": 0.5706, "step": 768 }, { "epoch": 0.05, "grad_norm": 1.1483625173568726, "learning_rate": 9.990841996937846e-06, "loss": 0.6083, "step": 769 }, { "epoch": 0.05, "grad_norm": 1.1269500255584717, "learning_rate": 9.990779822668827e-06, "loss": 0.6419, "step": 770 }, { "epoch": 0.05, "grad_norm": 1.0941485166549683, "learning_rate": 9.990717438255435e-06, "loss": 0.5956, "step": 771 }, { "epoch": 0.05, "grad_norm": 1.1730438470840454, "learning_rate": 9.99065484370029e-06, "loss": 0.6609, "step": 772 }, { "epoch": 0.05, "grad_norm": 1.1437172889709473, "learning_rate": 9.99059203900603e-06, "loss": 0.5855, "step": 773 }, { "epoch": 0.05, "grad_norm": 1.1737104654312134, "learning_rate": 9.990529024175303e-06, "loss": 0.514, "step": 774 }, { "epoch": 0.05, "grad_norm": 1.113284945487976, "learning_rate": 9.990465799210757e-06, "loss": 0.5614, "step": 775 }, { "epoch": 0.05, "grad_norm": 1.1417404413223267, "learning_rate": 9.99040236411506e-06, "loss": 0.6639, "step": 776 }, { "epoch": 0.05, "grad_norm": 1.1533658504486084, "learning_rate": 9.990338718890878e-06, "loss": 0.6042, "step": 777 }, { "epoch": 0.05, "grad_norm": 1.1291682720184326, "learning_rate": 9.990274863540891e-06, "loss": 0.5664, "step": 778 }, { "epoch": 0.05, "grad_norm": 1.1921929121017456, "learning_rate": 9.990210798067792e-06, "loss": 0.6371, "step": 779 }, { "epoch": 0.05, "grad_norm": 1.2903766632080078, "learning_rate": 9.990146522474273e-06, "loss": 0.6215, "step": 780 }, { "epoch": 0.05, "grad_norm": 1.139093279838562, "learning_rate": 9.990082036763046e-06, "loss": 0.6203, "step": 781 }, { "epoch": 0.05, "grad_norm": 1.0165280103683472, "learning_rate": 9.990017340936823e-06, "loss": 0.5773, "step": 782 }, { "epoch": 0.05, "grad_norm": 1.0505095720291138, "learning_rate": 9.989952434998328e-06, "loss": 0.6327, "step": 783 }, { "epoch": 0.05, "grad_norm": 1.1323219537734985, "learning_rate": 9.989887318950295e-06, "loss": 0.6173, "step": 784 }, { "epoch": 0.05, "grad_norm": 1.1180750131607056, "learning_rate": 9.989821992795467e-06, "loss": 0.5911, "step": 785 }, { "epoch": 0.05, "grad_norm": 1.067559003829956, "learning_rate": 9.989756456536593e-06, "loss": 0.555, "step": 786 }, { "epoch": 0.05, "grad_norm": 1.2159489393234253, "learning_rate": 9.989690710176433e-06, "loss": 0.603, "step": 787 }, { "epoch": 0.05, "grad_norm": 1.1859744787216187, "learning_rate": 9.989624753717752e-06, "loss": 0.632, "step": 788 }, { "epoch": 0.05, "grad_norm": 1.1191562414169312, "learning_rate": 9.989558587163332e-06, "loss": 0.5882, "step": 789 }, { "epoch": 0.05, "grad_norm": 1.043337106704712, "learning_rate": 9.989492210515958e-06, "loss": 0.5937, "step": 790 }, { "epoch": 0.05, "grad_norm": 1.1230212450027466, "learning_rate": 9.989425623778423e-06, "loss": 0.611, "step": 791 }, { "epoch": 0.05, "grad_norm": 1.199364423751831, "learning_rate": 9.989358826953533e-06, "loss": 0.6173, "step": 792 }, { "epoch": 0.05, "grad_norm": 1.087085247039795, "learning_rate": 9.989291820044099e-06, "loss": 0.5921, "step": 793 }, { "epoch": 0.05, "grad_norm": 1.1019243001937866, "learning_rate": 9.989224603052943e-06, "loss": 0.5381, "step": 794 }, { "epoch": 0.05, "grad_norm": 1.1220672130584717, "learning_rate": 9.989157175982896e-06, "loss": 0.6278, "step": 795 }, { "epoch": 0.05, "grad_norm": 1.167831540107727, "learning_rate": 9.989089538836795e-06, "loss": 0.5562, "step": 796 }, { "epoch": 0.05, "grad_norm": 1.1575336456298828, "learning_rate": 9.98902169161749e-06, "loss": 0.6192, "step": 797 }, { "epoch": 0.05, "grad_norm": 1.1505603790283203, "learning_rate": 9.988953634327836e-06, "loss": 0.6589, "step": 798 }, { "epoch": 0.05, "grad_norm": 1.0694489479064941, "learning_rate": 9.988885366970701e-06, "loss": 0.6314, "step": 799 }, { "epoch": 0.05, "grad_norm": 1.1580840349197388, "learning_rate": 9.988816889548958e-06, "loss": 0.6169, "step": 800 }, { "epoch": 0.05, "grad_norm": 1.1379700899124146, "learning_rate": 9.98874820206549e-06, "loss": 0.597, "step": 801 }, { "epoch": 0.05, "grad_norm": 1.1595264673233032, "learning_rate": 9.988679304523192e-06, "loss": 0.5541, "step": 802 }, { "epoch": 0.05, "grad_norm": 1.1024327278137207, "learning_rate": 9.988610196924962e-06, "loss": 0.6035, "step": 803 }, { "epoch": 0.05, "grad_norm": 1.074629306793213, "learning_rate": 9.98854087927371e-06, "loss": 0.5977, "step": 804 }, { "epoch": 0.05, "grad_norm": 1.0339958667755127, "learning_rate": 9.988471351572355e-06, "loss": 0.5759, "step": 805 }, { "epoch": 0.05, "grad_norm": 1.179978609085083, "learning_rate": 9.988401613823825e-06, "loss": 0.6151, "step": 806 }, { "epoch": 0.05, "grad_norm": 1.2351336479187012, "learning_rate": 9.988331666031056e-06, "loss": 0.6234, "step": 807 }, { "epoch": 0.05, "grad_norm": 1.1425678730010986, "learning_rate": 9.988261508196994e-06, "loss": 0.5846, "step": 808 }, { "epoch": 0.05, "grad_norm": 1.1734411716461182, "learning_rate": 9.988191140324595e-06, "loss": 0.5806, "step": 809 }, { "epoch": 0.05, "grad_norm": 0.999897837638855, "learning_rate": 9.988120562416817e-06, "loss": 0.5956, "step": 810 }, { "epoch": 0.05, "grad_norm": 1.0977915525436401, "learning_rate": 9.988049774476636e-06, "loss": 0.5887, "step": 811 }, { "epoch": 0.05, "grad_norm": 1.0584194660186768, "learning_rate": 9.98797877650703e-06, "loss": 0.5919, "step": 812 }, { "epoch": 0.05, "grad_norm": 1.0896602869033813, "learning_rate": 9.987907568510991e-06, "loss": 0.5705, "step": 813 }, { "epoch": 0.05, "grad_norm": 1.2355543375015259, "learning_rate": 9.987836150491515e-06, "loss": 0.6367, "step": 814 }, { "epoch": 0.05, "grad_norm": 1.2684205770492554, "learning_rate": 9.98776452245161e-06, "loss": 0.6017, "step": 815 }, { "epoch": 0.05, "grad_norm": 0.9456241726875305, "learning_rate": 9.987692684394294e-06, "loss": 0.5532, "step": 816 }, { "epoch": 0.05, "grad_norm": 1.085779070854187, "learning_rate": 9.987620636322589e-06, "loss": 0.5819, "step": 817 }, { "epoch": 0.05, "grad_norm": 1.2396453619003296, "learning_rate": 9.987548378239529e-06, "loss": 0.6707, "step": 818 }, { "epoch": 0.05, "grad_norm": 1.173922061920166, "learning_rate": 9.987475910148156e-06, "loss": 0.6068, "step": 819 }, { "epoch": 0.05, "grad_norm": 0.970253050327301, "learning_rate": 9.987403232051525e-06, "loss": 0.5501, "step": 820 }, { "epoch": 0.05, "grad_norm": 1.0929089784622192, "learning_rate": 9.987330343952692e-06, "loss": 0.5669, "step": 821 }, { "epoch": 0.05, "grad_norm": 1.085546851158142, "learning_rate": 9.987257245854729e-06, "loss": 0.5842, "step": 822 }, { "epoch": 0.05, "grad_norm": 1.1120977401733398, "learning_rate": 9.987183937760713e-06, "loss": 0.6162, "step": 823 }, { "epoch": 0.05, "grad_norm": 1.1035264730453491, "learning_rate": 9.98711041967373e-06, "loss": 0.6099, "step": 824 }, { "epoch": 0.05, "grad_norm": 1.0656368732452393, "learning_rate": 9.987036691596877e-06, "loss": 0.5894, "step": 825 }, { "epoch": 0.05, "grad_norm": 1.1329158544540405, "learning_rate": 9.986962753533257e-06, "loss": 0.6243, "step": 826 }, { "epoch": 0.05, "grad_norm": 1.0665289163589478, "learning_rate": 9.986888605485983e-06, "loss": 0.5545, "step": 827 }, { "epoch": 0.05, "grad_norm": 1.1567240953445435, "learning_rate": 9.986814247458177e-06, "loss": 0.583, "step": 828 }, { "epoch": 0.05, "grad_norm": 1.1955913305282593, "learning_rate": 9.986739679452973e-06, "loss": 0.6278, "step": 829 }, { "epoch": 0.05, "grad_norm": 1.0708664655685425, "learning_rate": 9.986664901473508e-06, "loss": 0.5302, "step": 830 }, { "epoch": 0.05, "grad_norm": 1.0710406303405762, "learning_rate": 9.98658991352293e-06, "loss": 0.5877, "step": 831 }, { "epoch": 0.05, "grad_norm": 1.0962172746658325, "learning_rate": 9.986514715604401e-06, "loss": 0.5817, "step": 832 }, { "epoch": 0.05, "grad_norm": 1.1612870693206787, "learning_rate": 9.986439307721083e-06, "loss": 0.6012, "step": 833 }, { "epoch": 0.05, "grad_norm": 1.1956734657287598, "learning_rate": 9.98636368987615e-06, "loss": 0.604, "step": 834 }, { "epoch": 0.05, "grad_norm": 1.098853588104248, "learning_rate": 9.98628786207279e-06, "loss": 0.5551, "step": 835 }, { "epoch": 0.05, "grad_norm": 1.0404162406921387, "learning_rate": 9.986211824314193e-06, "loss": 0.5897, "step": 836 }, { "epoch": 0.05, "grad_norm": 1.105139136314392, "learning_rate": 9.986135576603564e-06, "loss": 0.6254, "step": 837 }, { "epoch": 0.05, "grad_norm": 1.0726534128189087, "learning_rate": 9.98605911894411e-06, "loss": 0.5993, "step": 838 }, { "epoch": 0.05, "grad_norm": 1.0231831073760986, "learning_rate": 9.985982451339054e-06, "loss": 0.5989, "step": 839 }, { "epoch": 0.05, "grad_norm": 1.1226348876953125, "learning_rate": 9.985905573791619e-06, "loss": 0.6273, "step": 840 }, { "epoch": 0.05, "grad_norm": 1.0960124731063843, "learning_rate": 9.985828486305046e-06, "loss": 0.6176, "step": 841 }, { "epoch": 0.05, "grad_norm": 1.1282395124435425, "learning_rate": 9.98575118888258e-06, "loss": 0.5881, "step": 842 }, { "epoch": 0.05, "grad_norm": 1.2027688026428223, "learning_rate": 9.985673681527474e-06, "loss": 0.6038, "step": 843 }, { "epoch": 0.05, "grad_norm": 1.2403696775436401, "learning_rate": 9.985595964242996e-06, "loss": 0.5738, "step": 844 }, { "epoch": 0.05, "grad_norm": 1.21244478225708, "learning_rate": 9.985518037032413e-06, "loss": 0.6733, "step": 845 }, { "epoch": 0.05, "grad_norm": 1.1472803354263306, "learning_rate": 9.98543989989901e-06, "loss": 0.6156, "step": 846 }, { "epoch": 0.05, "grad_norm": 1.2124862670898438, "learning_rate": 9.985361552846076e-06, "loss": 0.5946, "step": 847 }, { "epoch": 0.05, "grad_norm": 1.091420292854309, "learning_rate": 9.98528299587691e-06, "loss": 0.5529, "step": 848 }, { "epoch": 0.05, "grad_norm": 1.020264744758606, "learning_rate": 9.98520422899482e-06, "loss": 0.612, "step": 849 }, { "epoch": 0.05, "grad_norm": 1.190901756286621, "learning_rate": 9.985125252203122e-06, "loss": 0.5835, "step": 850 }, { "epoch": 0.05, "grad_norm": 1.1265788078308105, "learning_rate": 9.985046065505141e-06, "loss": 0.5982, "step": 851 }, { "epoch": 0.05, "grad_norm": 1.1043763160705566, "learning_rate": 9.984966668904211e-06, "loss": 0.557, "step": 852 }, { "epoch": 0.05, "grad_norm": 1.1575721502304077, "learning_rate": 9.984887062403678e-06, "loss": 0.6205, "step": 853 }, { "epoch": 0.05, "grad_norm": 1.1319472789764404, "learning_rate": 9.984807246006891e-06, "loss": 0.5921, "step": 854 }, { "epoch": 0.05, "grad_norm": 1.1670777797698975, "learning_rate": 9.984727219717212e-06, "loss": 0.608, "step": 855 }, { "epoch": 0.05, "grad_norm": 1.0812734365463257, "learning_rate": 9.984646983538009e-06, "loss": 0.6118, "step": 856 }, { "epoch": 0.05, "grad_norm": 1.2277408838272095, "learning_rate": 9.984566537472662e-06, "loss": 0.6244, "step": 857 }, { "epoch": 0.05, "grad_norm": 1.131480097770691, "learning_rate": 9.98448588152456e-06, "loss": 0.6142, "step": 858 }, { "epoch": 0.05, "grad_norm": 1.1145899295806885, "learning_rate": 9.984405015697097e-06, "loss": 0.5584, "step": 859 }, { "epoch": 0.05, "grad_norm": 1.1443415880203247, "learning_rate": 9.984323939993678e-06, "loss": 0.5902, "step": 860 }, { "epoch": 0.05, "grad_norm": 1.162580966949463, "learning_rate": 9.984242654417716e-06, "loss": 0.6336, "step": 861 }, { "epoch": 0.05, "grad_norm": 1.0649712085723877, "learning_rate": 9.984161158972636e-06, "loss": 0.5705, "step": 862 }, { "epoch": 0.05, "grad_norm": 1.1186282634735107, "learning_rate": 9.984079453661869e-06, "loss": 0.6159, "step": 863 }, { "epoch": 0.05, "grad_norm": 1.147649884223938, "learning_rate": 9.983997538488851e-06, "loss": 0.593, "step": 864 }, { "epoch": 0.05, "grad_norm": 1.0798815488815308, "learning_rate": 9.983915413457036e-06, "loss": 0.5644, "step": 865 }, { "epoch": 0.05, "grad_norm": 1.051112174987793, "learning_rate": 9.983833078569883e-06, "loss": 0.6032, "step": 866 }, { "epoch": 0.05, "grad_norm": 1.1565731763839722, "learning_rate": 9.983750533830856e-06, "loss": 0.6559, "step": 867 }, { "epoch": 0.05, "grad_norm": 1.2471048831939697, "learning_rate": 9.98366777924343e-06, "loss": 0.6315, "step": 868 }, { "epoch": 0.06, "grad_norm": 1.1234080791473389, "learning_rate": 9.983584814811092e-06, "loss": 0.5931, "step": 869 }, { "epoch": 0.06, "grad_norm": 1.1437253952026367, "learning_rate": 9.983501640537333e-06, "loss": 0.6697, "step": 870 }, { "epoch": 0.06, "grad_norm": 1.1277947425842285, "learning_rate": 9.983418256425656e-06, "loss": 0.6253, "step": 871 }, { "epoch": 0.06, "grad_norm": 1.1978877782821655, "learning_rate": 9.983334662479572e-06, "loss": 0.5755, "step": 872 }, { "epoch": 0.06, "grad_norm": 1.0907119512557983, "learning_rate": 9.983250858702603e-06, "loss": 0.5602, "step": 873 }, { "epoch": 0.06, "grad_norm": 1.0816748142242432, "learning_rate": 9.983166845098275e-06, "loss": 0.5799, "step": 874 }, { "epoch": 0.06, "grad_norm": 1.0800241231918335, "learning_rate": 9.983082621670126e-06, "loss": 0.541, "step": 875 }, { "epoch": 0.06, "grad_norm": 1.234094500541687, "learning_rate": 9.982998188421702e-06, "loss": 0.6351, "step": 876 }, { "epoch": 0.06, "grad_norm": 1.1599239110946655, "learning_rate": 9.98291354535656e-06, "loss": 0.6103, "step": 877 }, { "epoch": 0.06, "grad_norm": 1.1438367366790771, "learning_rate": 9.982828692478261e-06, "loss": 0.5765, "step": 878 }, { "epoch": 0.06, "grad_norm": 1.3228342533111572, "learning_rate": 9.982743629790382e-06, "loss": 0.5715, "step": 879 }, { "epoch": 0.06, "grad_norm": 1.1042391061782837, "learning_rate": 9.982658357296502e-06, "loss": 0.5674, "step": 880 }, { "epoch": 0.06, "grad_norm": 1.194968819618225, "learning_rate": 9.982572875000212e-06, "loss": 0.5971, "step": 881 }, { "epoch": 0.06, "grad_norm": 1.1787186861038208, "learning_rate": 9.98248718290511e-06, "loss": 0.6431, "step": 882 }, { "epoch": 0.06, "grad_norm": 0.9943352341651917, "learning_rate": 9.982401281014806e-06, "loss": 0.5489, "step": 883 }, { "epoch": 0.06, "grad_norm": 1.1775997877120972, "learning_rate": 9.982315169332918e-06, "loss": 0.655, "step": 884 }, { "epoch": 0.06, "grad_norm": 1.1457655429840088, "learning_rate": 9.982228847863069e-06, "loss": 0.5942, "step": 885 }, { "epoch": 0.06, "grad_norm": 1.0527290105819702, "learning_rate": 9.982142316608897e-06, "loss": 0.5787, "step": 886 }, { "epoch": 0.06, "grad_norm": 1.2122856378555298, "learning_rate": 9.982055575574042e-06, "loss": 0.6067, "step": 887 }, { "epoch": 0.06, "grad_norm": 1.1700632572174072, "learning_rate": 9.981968624762159e-06, "loss": 0.6277, "step": 888 }, { "epoch": 0.06, "grad_norm": 1.1444690227508545, "learning_rate": 9.981881464176908e-06, "loss": 0.5792, "step": 889 }, { "epoch": 0.06, "grad_norm": 1.1081745624542236, "learning_rate": 9.981794093821957e-06, "loss": 0.5623, "step": 890 }, { "epoch": 0.06, "grad_norm": 1.0724091529846191, "learning_rate": 9.981706513700989e-06, "loss": 0.6175, "step": 891 }, { "epoch": 0.06, "grad_norm": 1.0415534973144531, "learning_rate": 9.98161872381769e-06, "loss": 0.5494, "step": 892 }, { "epoch": 0.06, "grad_norm": 1.1281497478485107, "learning_rate": 9.981530724175756e-06, "loss": 0.5118, "step": 893 }, { "epoch": 0.06, "grad_norm": 1.136470079421997, "learning_rate": 9.981442514778892e-06, "loss": 0.5983, "step": 894 }, { "epoch": 0.06, "grad_norm": 1.0326050519943237, "learning_rate": 9.981354095630816e-06, "loss": 0.5643, "step": 895 }, { "epoch": 0.06, "grad_norm": 1.1196739673614502, "learning_rate": 9.981265466735244e-06, "loss": 0.5643, "step": 896 }, { "epoch": 0.06, "grad_norm": 1.0784188508987427, "learning_rate": 9.981176628095913e-06, "loss": 0.595, "step": 897 }, { "epoch": 0.06, "grad_norm": 1.1706982851028442, "learning_rate": 9.981087579716564e-06, "loss": 0.5883, "step": 898 }, { "epoch": 0.06, "grad_norm": 1.096197485923767, "learning_rate": 9.980998321600944e-06, "loss": 0.5545, "step": 899 }, { "epoch": 0.06, "grad_norm": 1.0704962015151978, "learning_rate": 9.98090885375281e-06, "loss": 0.6562, "step": 900 }, { "epoch": 0.06, "grad_norm": 1.2794032096862793, "learning_rate": 9.980819176175932e-06, "loss": 0.6009, "step": 901 }, { "epoch": 0.06, "grad_norm": 1.1802647113800049, "learning_rate": 9.980729288874088e-06, "loss": 0.6218, "step": 902 }, { "epoch": 0.06, "grad_norm": 1.1883010864257812, "learning_rate": 9.98063919185106e-06, "loss": 0.5624, "step": 903 }, { "epoch": 0.06, "grad_norm": 1.2000126838684082, "learning_rate": 9.980548885110641e-06, "loss": 0.5604, "step": 904 }, { "epoch": 0.06, "grad_norm": 1.0881659984588623, "learning_rate": 9.980458368656635e-06, "loss": 0.6595, "step": 905 }, { "epoch": 0.06, "grad_norm": 1.1219079494476318, "learning_rate": 9.980367642492852e-06, "loss": 0.6681, "step": 906 }, { "epoch": 0.06, "grad_norm": 1.2020022869110107, "learning_rate": 9.980276706623114e-06, "loss": 0.5726, "step": 907 }, { "epoch": 0.06, "grad_norm": 1.061338186264038, "learning_rate": 9.98018556105125e-06, "loss": 0.5565, "step": 908 }, { "epoch": 0.06, "grad_norm": 1.218387484550476, "learning_rate": 9.980094205781094e-06, "loss": 0.5792, "step": 909 }, { "epoch": 0.06, "grad_norm": 1.1016128063201904, "learning_rate": 9.980002640816498e-06, "loss": 0.58, "step": 910 }, { "epoch": 0.06, "grad_norm": 1.0820120573043823, "learning_rate": 9.979910866161313e-06, "loss": 0.6373, "step": 911 }, { "epoch": 0.06, "grad_norm": 1.1650376319885254, "learning_rate": 9.979818881819408e-06, "loss": 0.5965, "step": 912 }, { "epoch": 0.06, "grad_norm": 1.1960431337356567, "learning_rate": 9.979726687794651e-06, "loss": 0.6847, "step": 913 }, { "epoch": 0.06, "grad_norm": 1.202374815940857, "learning_rate": 9.979634284090927e-06, "loss": 0.5545, "step": 914 }, { "epoch": 0.06, "grad_norm": 1.1402519941329956, "learning_rate": 9.979541670712125e-06, "loss": 0.6338, "step": 915 }, { "epoch": 0.06, "grad_norm": 1.1344772577285767, "learning_rate": 9.979448847662148e-06, "loss": 0.5729, "step": 916 }, { "epoch": 0.06, "grad_norm": 1.156521201133728, "learning_rate": 9.979355814944901e-06, "loss": 0.6434, "step": 917 }, { "epoch": 0.06, "grad_norm": 1.120430588722229, "learning_rate": 9.979262572564303e-06, "loss": 0.552, "step": 918 }, { "epoch": 0.06, "grad_norm": 1.125915288925171, "learning_rate": 9.979169120524279e-06, "loss": 0.57, "step": 919 }, { "epoch": 0.06, "grad_norm": 1.1396992206573486, "learning_rate": 9.979075458828765e-06, "loss": 0.6027, "step": 920 }, { "epoch": 0.06, "grad_norm": 1.1143865585327148, "learning_rate": 9.978981587481705e-06, "loss": 0.6111, "step": 921 }, { "epoch": 0.06, "grad_norm": 1.3239092826843262, "learning_rate": 9.978887506487049e-06, "loss": 0.6774, "step": 922 }, { "epoch": 0.06, "grad_norm": 1.0860577821731567, "learning_rate": 9.978793215848763e-06, "loss": 0.577, "step": 923 }, { "epoch": 0.06, "grad_norm": 1.1568961143493652, "learning_rate": 9.978698715570814e-06, "loss": 0.6195, "step": 924 }, { "epoch": 0.06, "grad_norm": 1.1322360038757324, "learning_rate": 9.97860400565718e-06, "loss": 0.5702, "step": 925 }, { "epoch": 0.06, "grad_norm": 1.077203392982483, "learning_rate": 9.978509086111852e-06, "loss": 0.6116, "step": 926 }, { "epoch": 0.06, "grad_norm": 1.0534157752990723, "learning_rate": 9.978413956938824e-06, "loss": 0.5759, "step": 927 }, { "epoch": 0.06, "grad_norm": 1.1379859447479248, "learning_rate": 9.978318618142104e-06, "loss": 0.5823, "step": 928 }, { "epoch": 0.06, "grad_norm": 1.1714255809783936, "learning_rate": 9.978223069725706e-06, "loss": 0.5625, "step": 929 }, { "epoch": 0.06, "grad_norm": 1.1636358499526978, "learning_rate": 9.978127311693653e-06, "loss": 0.5302, "step": 930 }, { "epoch": 0.06, "grad_norm": 1.116036295890808, "learning_rate": 9.978031344049975e-06, "loss": 0.5863, "step": 931 }, { "epoch": 0.06, "grad_norm": 1.1864908933639526, "learning_rate": 9.977935166798714e-06, "loss": 0.6192, "step": 932 }, { "epoch": 0.06, "grad_norm": 1.0718437433242798, "learning_rate": 9.977838779943921e-06, "loss": 0.5689, "step": 933 }, { "epoch": 0.06, "grad_norm": 1.1931647062301636, "learning_rate": 9.977742183489653e-06, "loss": 0.6382, "step": 934 }, { "epoch": 0.06, "grad_norm": 1.1988134384155273, "learning_rate": 9.977645377439979e-06, "loss": 0.5826, "step": 935 }, { "epoch": 0.06, "grad_norm": 1.1773120164871216, "learning_rate": 9.977548361798975e-06, "loss": 0.5932, "step": 936 }, { "epoch": 0.06, "grad_norm": 1.1375964879989624, "learning_rate": 9.977451136570722e-06, "loss": 0.6283, "step": 937 }, { "epoch": 0.06, "grad_norm": 1.1152372360229492, "learning_rate": 9.97735370175932e-06, "loss": 0.5704, "step": 938 }, { "epoch": 0.06, "grad_norm": 1.0861738920211792, "learning_rate": 9.977256057368865e-06, "loss": 0.6062, "step": 939 }, { "epoch": 0.06, "grad_norm": 1.097822904586792, "learning_rate": 9.977158203403475e-06, "loss": 0.6039, "step": 940 }, { "epoch": 0.06, "grad_norm": 1.0957974195480347, "learning_rate": 9.977060139867268e-06, "loss": 0.5616, "step": 941 }, { "epoch": 0.06, "grad_norm": 1.0065202713012695, "learning_rate": 9.97696186676437e-06, "loss": 0.5236, "step": 942 }, { "epoch": 0.06, "grad_norm": 1.1139683723449707, "learning_rate": 9.976863384098923e-06, "loss": 0.6152, "step": 943 }, { "epoch": 0.06, "grad_norm": 1.1870875358581543, "learning_rate": 9.976764691875072e-06, "loss": 0.6356, "step": 944 }, { "epoch": 0.06, "grad_norm": 1.1149030923843384, "learning_rate": 9.976665790096971e-06, "loss": 0.5808, "step": 945 }, { "epoch": 0.06, "grad_norm": 1.0395978689193726, "learning_rate": 9.976566678768787e-06, "loss": 0.5415, "step": 946 }, { "epoch": 0.06, "grad_norm": 1.148126244544983, "learning_rate": 9.976467357894693e-06, "loss": 0.5846, "step": 947 }, { "epoch": 0.06, "grad_norm": 1.151173710823059, "learning_rate": 9.97636782747887e-06, "loss": 0.6315, "step": 948 }, { "epoch": 0.06, "grad_norm": 1.1681454181671143, "learning_rate": 9.976268087525509e-06, "loss": 0.6539, "step": 949 }, { "epoch": 0.06, "grad_norm": 1.097345232963562, "learning_rate": 9.976168138038812e-06, "loss": 0.6086, "step": 950 }, { "epoch": 0.06, "grad_norm": 1.06402587890625, "learning_rate": 9.976067979022983e-06, "loss": 0.6146, "step": 951 }, { "epoch": 0.06, "grad_norm": 1.0805296897888184, "learning_rate": 9.975967610482243e-06, "loss": 0.5981, "step": 952 }, { "epoch": 0.06, "grad_norm": 1.0387953519821167, "learning_rate": 9.975867032420816e-06, "loss": 0.5773, "step": 953 }, { "epoch": 0.06, "grad_norm": 1.0913708209991455, "learning_rate": 9.97576624484294e-06, "loss": 0.6334, "step": 954 }, { "epoch": 0.06, "grad_norm": 1.1604536771774292, "learning_rate": 9.975665247752855e-06, "loss": 0.6094, "step": 955 }, { "epoch": 0.06, "grad_norm": 1.125901460647583, "learning_rate": 9.975564041154817e-06, "loss": 0.5946, "step": 956 }, { "epoch": 0.06, "grad_norm": 1.1342413425445557, "learning_rate": 9.975462625053085e-06, "loss": 0.6266, "step": 957 }, { "epoch": 0.06, "grad_norm": 2.1175763607025146, "learning_rate": 9.97536099945193e-06, "loss": 0.5808, "step": 958 }, { "epoch": 0.06, "grad_norm": 1.0494226217269897, "learning_rate": 9.975259164355632e-06, "loss": 0.6203, "step": 959 }, { "epoch": 0.06, "grad_norm": 1.1396679878234863, "learning_rate": 9.97515711976848e-06, "loss": 0.6191, "step": 960 }, { "epoch": 0.06, "grad_norm": 1.1368567943572998, "learning_rate": 9.975054865694767e-06, "loss": 0.6091, "step": 961 }, { "epoch": 0.06, "grad_norm": 1.1227906942367554, "learning_rate": 9.9749524021388e-06, "loss": 0.6355, "step": 962 }, { "epoch": 0.06, "grad_norm": 1.091934323310852, "learning_rate": 9.974849729104894e-06, "loss": 0.6362, "step": 963 }, { "epoch": 0.06, "grad_norm": 1.120595097541809, "learning_rate": 9.974746846597373e-06, "loss": 0.6677, "step": 964 }, { "epoch": 0.06, "grad_norm": 1.110969066619873, "learning_rate": 9.974643754620567e-06, "loss": 0.6195, "step": 965 }, { "epoch": 0.06, "grad_norm": 1.074960708618164, "learning_rate": 9.97454045317882e-06, "loss": 0.5969, "step": 966 }, { "epoch": 0.06, "grad_norm": 1.1078239679336548, "learning_rate": 9.974436942276477e-06, "loss": 0.5878, "step": 967 }, { "epoch": 0.06, "grad_norm": 1.0919568538665771, "learning_rate": 9.974333221917903e-06, "loss": 0.6343, "step": 968 }, { "epoch": 0.06, "grad_norm": 1.0447242259979248, "learning_rate": 9.974229292107458e-06, "loss": 0.5797, "step": 969 }, { "epoch": 0.06, "grad_norm": 1.1111451387405396, "learning_rate": 9.974125152849523e-06, "loss": 0.627, "step": 970 }, { "epoch": 0.06, "grad_norm": 1.1359940767288208, "learning_rate": 9.974020804148482e-06, "loss": 0.5759, "step": 971 }, { "epoch": 0.06, "grad_norm": 1.1420283317565918, "learning_rate": 9.973916246008727e-06, "loss": 0.5921, "step": 972 }, { "epoch": 0.06, "grad_norm": 1.0663940906524658, "learning_rate": 9.973811478434662e-06, "loss": 0.616, "step": 973 }, { "epoch": 0.06, "grad_norm": 1.1082817316055298, "learning_rate": 9.9737065014307e-06, "loss": 0.611, "step": 974 }, { "epoch": 0.06, "grad_norm": 1.0665665864944458, "learning_rate": 9.973601315001258e-06, "loss": 0.6119, "step": 975 }, { "epoch": 0.06, "grad_norm": 1.1018037796020508, "learning_rate": 9.973495919150766e-06, "loss": 0.6169, "step": 976 }, { "epoch": 0.06, "grad_norm": 1.0560823678970337, "learning_rate": 9.973390313883664e-06, "loss": 0.5525, "step": 977 }, { "epoch": 0.06, "grad_norm": 1.0385541915893555, "learning_rate": 9.973284499204396e-06, "loss": 0.5556, "step": 978 }, { "epoch": 0.06, "grad_norm": 1.1537460088729858, "learning_rate": 9.973178475117419e-06, "loss": 0.6003, "step": 979 }, { "epoch": 0.06, "grad_norm": 1.0920456647872925, "learning_rate": 9.973072241627196e-06, "loss": 0.6364, "step": 980 }, { "epoch": 0.06, "grad_norm": 1.1587941646575928, "learning_rate": 9.972965798738202e-06, "loss": 0.6213, "step": 981 }, { "epoch": 0.06, "grad_norm": 1.0415784120559692, "learning_rate": 9.972859146454917e-06, "loss": 0.5436, "step": 982 }, { "epoch": 0.06, "grad_norm": 1.1108245849609375, "learning_rate": 9.972752284781832e-06, "loss": 0.6119, "step": 983 }, { "epoch": 0.06, "grad_norm": 1.1475266218185425, "learning_rate": 9.97264521372345e-06, "loss": 0.5819, "step": 984 }, { "epoch": 0.06, "grad_norm": 1.013890027999878, "learning_rate": 9.972537933284274e-06, "loss": 0.5579, "step": 985 }, { "epoch": 0.06, "grad_norm": 1.0834392309188843, "learning_rate": 9.972430443468826e-06, "loss": 0.617, "step": 986 }, { "epoch": 0.06, "grad_norm": 1.0244008302688599, "learning_rate": 9.972322744281628e-06, "loss": 0.537, "step": 987 }, { "epoch": 0.06, "grad_norm": 1.0993061065673828, "learning_rate": 9.972214835727218e-06, "loss": 0.6344, "step": 988 }, { "epoch": 0.06, "grad_norm": 1.1424047946929932, "learning_rate": 9.972106717810137e-06, "loss": 0.5968, "step": 989 }, { "epoch": 0.06, "grad_norm": 1.1044074296951294, "learning_rate": 9.97199839053494e-06, "loss": 0.6462, "step": 990 }, { "epoch": 0.06, "grad_norm": 1.1467039585113525, "learning_rate": 9.971889853906186e-06, "loss": 0.6162, "step": 991 }, { "epoch": 0.06, "grad_norm": 1.0741792917251587, "learning_rate": 9.971781107928447e-06, "loss": 0.556, "step": 992 }, { "epoch": 0.06, "grad_norm": 1.1318819522857666, "learning_rate": 9.9716721526063e-06, "loss": 0.5789, "step": 993 }, { "epoch": 0.06, "grad_norm": 1.0687830448150635, "learning_rate": 9.971562987944336e-06, "loss": 0.6205, "step": 994 }, { "epoch": 0.06, "grad_norm": 1.0508983135223389, "learning_rate": 9.971453613947147e-06, "loss": 0.6119, "step": 995 }, { "epoch": 0.06, "grad_norm": 1.043202519416809, "learning_rate": 9.971344030619342e-06, "loss": 0.5733, "step": 996 }, { "epoch": 0.06, "grad_norm": 1.0849485397338867, "learning_rate": 9.971234237965534e-06, "loss": 0.5942, "step": 997 }, { "epoch": 0.06, "grad_norm": 1.1442948579788208, "learning_rate": 9.971124235990346e-06, "loss": 0.62, "step": 998 }, { "epoch": 0.06, "grad_norm": 1.1188938617706299, "learning_rate": 9.971014024698408e-06, "loss": 0.5871, "step": 999 }, { "epoch": 0.06, "grad_norm": 1.1602274179458618, "learning_rate": 9.970903604094365e-06, "loss": 0.6137, "step": 1000 }, { "epoch": 0.06, "grad_norm": 1.1121653318405151, "learning_rate": 9.970792974182863e-06, "loss": 0.6155, "step": 1001 }, { "epoch": 0.06, "grad_norm": 1.0772852897644043, "learning_rate": 9.97068213496856e-06, "loss": 0.5483, "step": 1002 }, { "epoch": 0.06, "grad_norm": 1.1640626192092896, "learning_rate": 9.970571086456124e-06, "loss": 0.5862, "step": 1003 }, { "epoch": 0.06, "grad_norm": 1.1127629280090332, "learning_rate": 9.970459828650232e-06, "loss": 0.5686, "step": 1004 }, { "epoch": 0.06, "grad_norm": 1.2149871587753296, "learning_rate": 9.970348361555566e-06, "loss": 0.5699, "step": 1005 }, { "epoch": 0.06, "grad_norm": 1.1070051193237305, "learning_rate": 9.970236685176821e-06, "loss": 0.6147, "step": 1006 }, { "epoch": 0.06, "grad_norm": 1.0872983932495117, "learning_rate": 9.9701247995187e-06, "loss": 0.5657, "step": 1007 }, { "epoch": 0.06, "grad_norm": 1.1832880973815918, "learning_rate": 9.970012704585916e-06, "loss": 0.6123, "step": 1008 }, { "epoch": 0.06, "grad_norm": 1.1965460777282715, "learning_rate": 9.969900400383183e-06, "loss": 0.6197, "step": 1009 }, { "epoch": 0.06, "grad_norm": 1.1256316900253296, "learning_rate": 9.969787886915236e-06, "loss": 0.5836, "step": 1010 }, { "epoch": 0.06, "grad_norm": 1.096758484840393, "learning_rate": 9.969675164186807e-06, "loss": 0.6016, "step": 1011 }, { "epoch": 0.06, "grad_norm": 1.0753967761993408, "learning_rate": 9.969562232202647e-06, "loss": 0.5918, "step": 1012 }, { "epoch": 0.06, "grad_norm": 1.175142765045166, "learning_rate": 9.969449090967509e-06, "loss": 0.6166, "step": 1013 }, { "epoch": 0.06, "grad_norm": 1.0600279569625854, "learning_rate": 9.969335740486157e-06, "loss": 0.5064, "step": 1014 }, { "epoch": 0.06, "grad_norm": 1.0868827104568481, "learning_rate": 9.969222180763363e-06, "loss": 0.5997, "step": 1015 }, { "epoch": 0.06, "grad_norm": 1.199386477470398, "learning_rate": 9.96910841180391e-06, "loss": 0.5554, "step": 1016 }, { "epoch": 0.06, "grad_norm": 1.175727367401123, "learning_rate": 9.968994433612589e-06, "loss": 0.6348, "step": 1017 }, { "epoch": 0.06, "grad_norm": 1.0635806322097778, "learning_rate": 9.968880246194198e-06, "loss": 0.6146, "step": 1018 }, { "epoch": 0.06, "grad_norm": 1.1115741729736328, "learning_rate": 9.968765849553544e-06, "loss": 0.6164, "step": 1019 }, { "epoch": 0.06, "grad_norm": 1.145827054977417, "learning_rate": 9.968651243695446e-06, "loss": 0.5657, "step": 1020 }, { "epoch": 0.06, "grad_norm": 1.1853764057159424, "learning_rate": 9.968536428624729e-06, "loss": 0.6122, "step": 1021 }, { "epoch": 0.06, "grad_norm": 1.1163513660430908, "learning_rate": 9.968421404346228e-06, "loss": 0.6144, "step": 1022 }, { "epoch": 0.06, "grad_norm": 1.1911154985427856, "learning_rate": 9.968306170864786e-06, "loss": 0.6815, "step": 1023 }, { "epoch": 0.06, "grad_norm": 1.1843432188034058, "learning_rate": 9.968190728185251e-06, "loss": 0.5854, "step": 1024 }, { "epoch": 0.06, "grad_norm": 1.0040591955184937, "learning_rate": 9.968075076312492e-06, "loss": 0.5802, "step": 1025 }, { "epoch": 0.07, "grad_norm": 1.1693812608718872, "learning_rate": 9.96795921525137e-06, "loss": 0.6038, "step": 1026 }, { "epoch": 0.07, "grad_norm": 1.116028070449829, "learning_rate": 9.967843145006771e-06, "loss": 0.5952, "step": 1027 }, { "epoch": 0.07, "grad_norm": 1.187366247177124, "learning_rate": 9.967726865583578e-06, "loss": 0.5987, "step": 1028 }, { "epoch": 0.07, "grad_norm": 1.1540720462799072, "learning_rate": 9.967610376986687e-06, "loss": 0.5899, "step": 1029 }, { "epoch": 0.07, "grad_norm": 1.3024755716323853, "learning_rate": 9.967493679221006e-06, "loss": 0.6473, "step": 1030 }, { "epoch": 0.07, "grad_norm": 1.0868054628372192, "learning_rate": 9.967376772291446e-06, "loss": 0.5728, "step": 1031 }, { "epoch": 0.07, "grad_norm": 1.077175498008728, "learning_rate": 9.96725965620293e-06, "loss": 0.5796, "step": 1032 }, { "epoch": 0.07, "grad_norm": 1.1268564462661743, "learning_rate": 9.96714233096039e-06, "loss": 0.5815, "step": 1033 }, { "epoch": 0.07, "grad_norm": 1.1633411645889282, "learning_rate": 9.967024796568766e-06, "loss": 0.6436, "step": 1034 }, { "epoch": 0.07, "grad_norm": 1.1669288873672485, "learning_rate": 9.966907053033006e-06, "loss": 0.6238, "step": 1035 }, { "epoch": 0.07, "grad_norm": 1.12864351272583, "learning_rate": 9.96678910035807e-06, "loss": 0.6005, "step": 1036 }, { "epoch": 0.07, "grad_norm": 1.0559660196304321, "learning_rate": 9.966670938548923e-06, "loss": 0.6212, "step": 1037 }, { "epoch": 0.07, "grad_norm": 1.215419054031372, "learning_rate": 9.96655256761054e-06, "loss": 0.58, "step": 1038 }, { "epoch": 0.07, "grad_norm": 1.1690610647201538, "learning_rate": 9.966433987547906e-06, "loss": 0.5528, "step": 1039 }, { "epoch": 0.07, "grad_norm": 1.0882534980773926, "learning_rate": 9.966315198366011e-06, "loss": 0.6244, "step": 1040 }, { "epoch": 0.07, "grad_norm": 1.1955314874649048, "learning_rate": 9.966196200069863e-06, "loss": 0.6262, "step": 1041 }, { "epoch": 0.07, "grad_norm": 1.2565194368362427, "learning_rate": 9.966076992664469e-06, "loss": 0.6175, "step": 1042 }, { "epoch": 0.07, "grad_norm": 1.1895841360092163, "learning_rate": 9.965957576154848e-06, "loss": 0.5811, "step": 1043 }, { "epoch": 0.07, "grad_norm": 1.1305845975875854, "learning_rate": 9.96583795054603e-06, "loss": 0.6107, "step": 1044 }, { "epoch": 0.07, "grad_norm": 1.1758750677108765, "learning_rate": 9.965718115843048e-06, "loss": 0.6219, "step": 1045 }, { "epoch": 0.07, "grad_norm": 1.077109456062317, "learning_rate": 9.965598072050953e-06, "loss": 0.6318, "step": 1046 }, { "epoch": 0.07, "grad_norm": 1.064382553100586, "learning_rate": 9.965477819174796e-06, "loss": 0.5972, "step": 1047 }, { "epoch": 0.07, "grad_norm": 1.0851179361343384, "learning_rate": 9.96535735721964e-06, "loss": 0.5786, "step": 1048 }, { "epoch": 0.07, "grad_norm": 1.0553301572799683, "learning_rate": 9.965236686190563e-06, "loss": 0.6608, "step": 1049 }, { "epoch": 0.07, "grad_norm": 1.1511121988296509, "learning_rate": 9.965115806092638e-06, "loss": 0.5651, "step": 1050 }, { "epoch": 0.07, "grad_norm": 1.1849722862243652, "learning_rate": 9.96499471693096e-06, "loss": 0.5827, "step": 1051 }, { "epoch": 0.07, "grad_norm": 1.0399516820907593, "learning_rate": 9.964873418710628e-06, "loss": 0.5149, "step": 1052 }, { "epoch": 0.07, "grad_norm": 1.1090704202651978, "learning_rate": 9.964751911436748e-06, "loss": 0.5881, "step": 1053 }, { "epoch": 0.07, "grad_norm": 1.10074782371521, "learning_rate": 9.964630195114432e-06, "loss": 0.5828, "step": 1054 }, { "epoch": 0.07, "grad_norm": 1.1069676876068115, "learning_rate": 9.964508269748814e-06, "loss": 0.6137, "step": 1055 }, { "epoch": 0.07, "grad_norm": 1.06825590133667, "learning_rate": 9.96438613534502e-06, "loss": 0.5847, "step": 1056 }, { "epoch": 0.07, "grad_norm": 1.0860706567764282, "learning_rate": 9.964263791908198e-06, "loss": 0.5926, "step": 1057 }, { "epoch": 0.07, "grad_norm": 1.1424225568771362, "learning_rate": 9.964141239443497e-06, "loss": 0.557, "step": 1058 }, { "epoch": 0.07, "grad_norm": 1.1204310655593872, "learning_rate": 9.964018477956075e-06, "loss": 0.6212, "step": 1059 }, { "epoch": 0.07, "grad_norm": 1.011555552482605, "learning_rate": 9.963895507451104e-06, "loss": 0.5961, "step": 1060 }, { "epoch": 0.07, "grad_norm": 1.108712077140808, "learning_rate": 9.963772327933764e-06, "loss": 0.6081, "step": 1061 }, { "epoch": 0.07, "grad_norm": 1.1001176834106445, "learning_rate": 9.963648939409236e-06, "loss": 0.6096, "step": 1062 }, { "epoch": 0.07, "grad_norm": 1.0480148792266846, "learning_rate": 9.96352534188272e-06, "loss": 0.5524, "step": 1063 }, { "epoch": 0.07, "grad_norm": 1.0474708080291748, "learning_rate": 9.963401535359418e-06, "loss": 0.565, "step": 1064 }, { "epoch": 0.07, "grad_norm": 1.0563637018203735, "learning_rate": 9.963277519844544e-06, "loss": 0.5545, "step": 1065 }, { "epoch": 0.07, "grad_norm": 1.2024121284484863, "learning_rate": 9.963153295343319e-06, "loss": 0.6513, "step": 1066 }, { "epoch": 0.07, "grad_norm": 1.0970734357833862, "learning_rate": 9.963028861860975e-06, "loss": 0.616, "step": 1067 }, { "epoch": 0.07, "grad_norm": 1.1616771221160889, "learning_rate": 9.962904219402752e-06, "loss": 0.6491, "step": 1068 }, { "epoch": 0.07, "grad_norm": 1.1157522201538086, "learning_rate": 9.962779367973896e-06, "loss": 0.6063, "step": 1069 }, { "epoch": 0.07, "grad_norm": 1.102754831314087, "learning_rate": 9.962654307579665e-06, "loss": 0.5964, "step": 1070 }, { "epoch": 0.07, "grad_norm": 1.0813874006271362, "learning_rate": 9.962529038225324e-06, "loss": 0.6397, "step": 1071 }, { "epoch": 0.07, "grad_norm": 1.0415786504745483, "learning_rate": 9.962403559916149e-06, "loss": 0.5828, "step": 1072 }, { "epoch": 0.07, "grad_norm": 1.076107144355774, "learning_rate": 9.962277872657422e-06, "loss": 0.6111, "step": 1073 }, { "epoch": 0.07, "grad_norm": 1.0587031841278076, "learning_rate": 9.962151976454439e-06, "loss": 0.5755, "step": 1074 }, { "epoch": 0.07, "grad_norm": 1.2270575761795044, "learning_rate": 9.962025871312497e-06, "loss": 0.6434, "step": 1075 }, { "epoch": 0.07, "grad_norm": 1.1191643476486206, "learning_rate": 9.961899557236907e-06, "loss": 0.62, "step": 1076 }, { "epoch": 0.07, "grad_norm": 1.0456963777542114, "learning_rate": 9.961773034232987e-06, "loss": 0.582, "step": 1077 }, { "epoch": 0.07, "grad_norm": 1.031314730644226, "learning_rate": 9.961646302306066e-06, "loss": 0.5834, "step": 1078 }, { "epoch": 0.07, "grad_norm": 0.999323308467865, "learning_rate": 9.961519361461481e-06, "loss": 0.5689, "step": 1079 }, { "epoch": 0.07, "grad_norm": 1.1052522659301758, "learning_rate": 9.961392211704573e-06, "loss": 0.602, "step": 1080 }, { "epoch": 0.07, "grad_norm": 1.1391727924346924, "learning_rate": 9.9612648530407e-06, "loss": 0.5811, "step": 1081 }, { "epoch": 0.07, "grad_norm": 1.1579862833023071, "learning_rate": 9.961137285475223e-06, "loss": 0.5907, "step": 1082 }, { "epoch": 0.07, "grad_norm": 1.085680365562439, "learning_rate": 9.961009509013512e-06, "loss": 0.6132, "step": 1083 }, { "epoch": 0.07, "grad_norm": 1.0278522968292236, "learning_rate": 9.96088152366095e-06, "loss": 0.6039, "step": 1084 }, { "epoch": 0.07, "grad_norm": 1.0606110095977783, "learning_rate": 9.960753329422925e-06, "loss": 0.5905, "step": 1085 }, { "epoch": 0.07, "grad_norm": 1.0981287956237793, "learning_rate": 9.960624926304834e-06, "loss": 0.5811, "step": 1086 }, { "epoch": 0.07, "grad_norm": 1.0174564123153687, "learning_rate": 9.960496314312085e-06, "loss": 0.5961, "step": 1087 }, { "epoch": 0.07, "grad_norm": 1.0941609144210815, "learning_rate": 9.96036749345009e-06, "loss": 0.5961, "step": 1088 }, { "epoch": 0.07, "grad_norm": 1.0558466911315918, "learning_rate": 9.960238463724278e-06, "loss": 0.5721, "step": 1089 }, { "epoch": 0.07, "grad_norm": 1.0658026933670044, "learning_rate": 9.96010922514008e-06, "loss": 0.5975, "step": 1090 }, { "epoch": 0.07, "grad_norm": 1.031471848487854, "learning_rate": 9.959979777702935e-06, "loss": 0.5878, "step": 1091 }, { "epoch": 0.07, "grad_norm": 1.068110704421997, "learning_rate": 9.959850121418298e-06, "loss": 0.5674, "step": 1092 }, { "epoch": 0.07, "grad_norm": 1.0213265419006348, "learning_rate": 9.959720256291626e-06, "loss": 0.5638, "step": 1093 }, { "epoch": 0.07, "grad_norm": 1.1408964395523071, "learning_rate": 9.959590182328387e-06, "loss": 0.621, "step": 1094 }, { "epoch": 0.07, "grad_norm": 1.088255763053894, "learning_rate": 9.95945989953406e-06, "loss": 0.6143, "step": 1095 }, { "epoch": 0.07, "grad_norm": 1.1432324647903442, "learning_rate": 9.959329407914129e-06, "loss": 0.584, "step": 1096 }, { "epoch": 0.07, "grad_norm": 1.1209784746170044, "learning_rate": 9.959198707474087e-06, "loss": 0.5808, "step": 1097 }, { "epoch": 0.07, "grad_norm": 1.054780125617981, "learning_rate": 9.959067798219442e-06, "loss": 0.6093, "step": 1098 }, { "epoch": 0.07, "grad_norm": 1.0760698318481445, "learning_rate": 9.958936680155702e-06, "loss": 0.5989, "step": 1099 }, { "epoch": 0.07, "grad_norm": 1.042184591293335, "learning_rate": 9.958805353288388e-06, "loss": 0.5551, "step": 1100 }, { "epoch": 0.07, "grad_norm": 1.1320393085479736, "learning_rate": 9.958673817623033e-06, "loss": 0.5596, "step": 1101 }, { "epoch": 0.07, "grad_norm": 1.1195068359375, "learning_rate": 9.958542073165172e-06, "loss": 0.6096, "step": 1102 }, { "epoch": 0.07, "grad_norm": 1.1104859113693237, "learning_rate": 9.958410119920355e-06, "loss": 0.5914, "step": 1103 }, { "epoch": 0.07, "grad_norm": 1.078932285308838, "learning_rate": 9.958277957894137e-06, "loss": 0.5668, "step": 1104 }, { "epoch": 0.07, "grad_norm": 1.0539746284484863, "learning_rate": 9.958145587092082e-06, "loss": 0.5268, "step": 1105 }, { "epoch": 0.07, "grad_norm": 1.1207184791564941, "learning_rate": 9.958013007519764e-06, "loss": 0.6298, "step": 1106 }, { "epoch": 0.07, "grad_norm": 1.040810465812683, "learning_rate": 9.957880219182767e-06, "loss": 0.525, "step": 1107 }, { "epoch": 0.07, "grad_norm": 1.1117539405822754, "learning_rate": 9.957747222086682e-06, "loss": 0.6414, "step": 1108 }, { "epoch": 0.07, "grad_norm": 1.1745656728744507, "learning_rate": 9.957614016237106e-06, "loss": 0.627, "step": 1109 }, { "epoch": 0.07, "grad_norm": 1.0769981145858765, "learning_rate": 9.957480601639652e-06, "loss": 0.6363, "step": 1110 }, { "epoch": 0.07, "grad_norm": 1.079752802848816, "learning_rate": 9.957346978299935e-06, "loss": 0.586, "step": 1111 }, { "epoch": 0.07, "grad_norm": 1.1357930898666382, "learning_rate": 9.957213146223581e-06, "loss": 0.607, "step": 1112 }, { "epoch": 0.07, "grad_norm": 1.0285855531692505, "learning_rate": 9.957079105416228e-06, "loss": 0.5827, "step": 1113 }, { "epoch": 0.07, "grad_norm": 1.126513123512268, "learning_rate": 9.956944855883516e-06, "loss": 0.6163, "step": 1114 }, { "epoch": 0.07, "grad_norm": 1.0914958715438843, "learning_rate": 9.956810397631103e-06, "loss": 0.6203, "step": 1115 }, { "epoch": 0.07, "grad_norm": 1.1428629159927368, "learning_rate": 9.956675730664646e-06, "loss": 0.5632, "step": 1116 }, { "epoch": 0.07, "grad_norm": 1.1023683547973633, "learning_rate": 9.956540854989817e-06, "loss": 0.5896, "step": 1117 }, { "epoch": 0.07, "grad_norm": 1.1191551685333252, "learning_rate": 9.956405770612295e-06, "loss": 0.606, "step": 1118 }, { "epoch": 0.07, "grad_norm": 1.077361822128296, "learning_rate": 9.956270477537768e-06, "loss": 0.6225, "step": 1119 }, { "epoch": 0.07, "grad_norm": 1.1346423625946045, "learning_rate": 9.956134975771934e-06, "loss": 0.622, "step": 1120 }, { "epoch": 0.07, "grad_norm": 1.2580161094665527, "learning_rate": 9.955999265320495e-06, "loss": 0.6432, "step": 1121 }, { "epoch": 0.07, "grad_norm": 1.0496320724487305, "learning_rate": 9.95586334618917e-06, "loss": 0.6011, "step": 1122 }, { "epoch": 0.07, "grad_norm": 1.081202745437622, "learning_rate": 9.95572721838368e-06, "loss": 0.5669, "step": 1123 }, { "epoch": 0.07, "grad_norm": 1.075972318649292, "learning_rate": 9.955590881909753e-06, "loss": 0.5716, "step": 1124 }, { "epoch": 0.07, "grad_norm": 1.0538547039031982, "learning_rate": 9.955454336773136e-06, "loss": 0.546, "step": 1125 }, { "epoch": 0.07, "grad_norm": 0.9653191566467285, "learning_rate": 9.955317582979575e-06, "loss": 0.5264, "step": 1126 }, { "epoch": 0.07, "grad_norm": 1.1157087087631226, "learning_rate": 9.95518062053483e-06, "loss": 0.5734, "step": 1127 }, { "epoch": 0.07, "grad_norm": 1.1833845376968384, "learning_rate": 9.955043449444665e-06, "loss": 0.6188, "step": 1128 }, { "epoch": 0.07, "grad_norm": 1.036758303642273, "learning_rate": 9.95490606971486e-06, "loss": 0.5751, "step": 1129 }, { "epoch": 0.07, "grad_norm": 0.9968557357788086, "learning_rate": 9.954768481351196e-06, "loss": 0.5704, "step": 1130 }, { "epoch": 0.07, "grad_norm": 1.187659740447998, "learning_rate": 9.954630684359468e-06, "loss": 0.5832, "step": 1131 }, { "epoch": 0.07, "grad_norm": 1.1751303672790527, "learning_rate": 9.954492678745477e-06, "loss": 0.601, "step": 1132 }, { "epoch": 0.07, "grad_norm": 1.05357825756073, "learning_rate": 9.954354464515035e-06, "loss": 0.5443, "step": 1133 }, { "epoch": 0.07, "grad_norm": 1.0725443363189697, "learning_rate": 9.95421604167396e-06, "loss": 0.5463, "step": 1134 }, { "epoch": 0.07, "grad_norm": 1.2123327255249023, "learning_rate": 9.954077410228084e-06, "loss": 0.6595, "step": 1135 }, { "epoch": 0.07, "grad_norm": 1.052491545677185, "learning_rate": 9.95393857018324e-06, "loss": 0.588, "step": 1136 }, { "epoch": 0.07, "grad_norm": 1.1696809530258179, "learning_rate": 9.95379952154528e-06, "loss": 0.571, "step": 1137 }, { "epoch": 0.07, "grad_norm": 1.3228956460952759, "learning_rate": 9.953660264320053e-06, "loss": 0.6355, "step": 1138 }, { "epoch": 0.07, "grad_norm": 1.166395902633667, "learning_rate": 9.953520798513425e-06, "loss": 0.6844, "step": 1139 }, { "epoch": 0.07, "grad_norm": 1.0823599100112915, "learning_rate": 9.953381124131269e-06, "loss": 0.5826, "step": 1140 }, { "epoch": 0.07, "grad_norm": 1.1569372415542603, "learning_rate": 9.953241241179462e-06, "loss": 0.5752, "step": 1141 }, { "epoch": 0.07, "grad_norm": 1.0618703365325928, "learning_rate": 9.953101149663902e-06, "loss": 0.5845, "step": 1142 }, { "epoch": 0.07, "grad_norm": 1.0329277515411377, "learning_rate": 9.95296084959048e-06, "loss": 0.5383, "step": 1143 }, { "epoch": 0.07, "grad_norm": 1.1169556379318237, "learning_rate": 9.952820340965109e-06, "loss": 0.588, "step": 1144 }, { "epoch": 0.07, "grad_norm": 1.0022128820419312, "learning_rate": 9.952679623793702e-06, "loss": 0.5851, "step": 1145 }, { "epoch": 0.07, "grad_norm": 1.05393385887146, "learning_rate": 9.952538698082185e-06, "loss": 0.601, "step": 1146 }, { "epoch": 0.07, "grad_norm": 1.0881694555282593, "learning_rate": 9.95239756383649e-06, "loss": 0.6145, "step": 1147 }, { "epoch": 0.07, "grad_norm": 1.06650972366333, "learning_rate": 9.952256221062566e-06, "loss": 0.5194, "step": 1148 }, { "epoch": 0.07, "grad_norm": 1.181684970855713, "learning_rate": 9.952114669766357e-06, "loss": 0.5987, "step": 1149 }, { "epoch": 0.07, "grad_norm": 1.0362557172775269, "learning_rate": 9.951972909953828e-06, "loss": 0.535, "step": 1150 }, { "epoch": 0.07, "grad_norm": 1.1614176034927368, "learning_rate": 9.951830941630946e-06, "loss": 0.6163, "step": 1151 }, { "epoch": 0.07, "grad_norm": 1.1739448308944702, "learning_rate": 9.951688764803689e-06, "loss": 0.6276, "step": 1152 }, { "epoch": 0.07, "grad_norm": 1.2421715259552002, "learning_rate": 9.951546379478044e-06, "loss": 0.6289, "step": 1153 }, { "epoch": 0.07, "grad_norm": 1.1939436197280884, "learning_rate": 9.951403785660005e-06, "loss": 0.6502, "step": 1154 }, { "epoch": 0.07, "grad_norm": 1.0989609956741333, "learning_rate": 9.95126098335558e-06, "loss": 0.63, "step": 1155 }, { "epoch": 0.07, "grad_norm": 1.0621522665023804, "learning_rate": 9.951117972570776e-06, "loss": 0.5724, "step": 1156 }, { "epoch": 0.07, "grad_norm": 1.1018246412277222, "learning_rate": 9.95097475331162e-06, "loss": 0.6329, "step": 1157 }, { "epoch": 0.07, "grad_norm": 1.0743603706359863, "learning_rate": 9.950831325584138e-06, "loss": 0.5943, "step": 1158 }, { "epoch": 0.07, "grad_norm": 1.0884958505630493, "learning_rate": 9.950687689394373e-06, "loss": 0.5876, "step": 1159 }, { "epoch": 0.07, "grad_norm": 1.0881589651107788, "learning_rate": 9.950543844748372e-06, "loss": 0.6191, "step": 1160 }, { "epoch": 0.07, "grad_norm": 1.0929887294769287, "learning_rate": 9.950399791652193e-06, "loss": 0.6043, "step": 1161 }, { "epoch": 0.07, "grad_norm": 1.2016053199768066, "learning_rate": 9.950255530111896e-06, "loss": 0.6424, "step": 1162 }, { "epoch": 0.07, "grad_norm": 1.0238940715789795, "learning_rate": 9.950111060133562e-06, "loss": 0.5765, "step": 1163 }, { "epoch": 0.07, "grad_norm": 0.9373612999916077, "learning_rate": 9.94996638172327e-06, "loss": 0.5776, "step": 1164 }, { "epoch": 0.07, "grad_norm": 1.0880025625228882, "learning_rate": 9.949821494887116e-06, "loss": 0.6174, "step": 1165 }, { "epoch": 0.07, "grad_norm": 1.065089225769043, "learning_rate": 9.949676399631197e-06, "loss": 0.5716, "step": 1166 }, { "epoch": 0.07, "grad_norm": 1.1390105485916138, "learning_rate": 9.949531095961621e-06, "loss": 0.6253, "step": 1167 }, { "epoch": 0.07, "grad_norm": 1.0902137756347656, "learning_rate": 9.94938558388451e-06, "loss": 0.5996, "step": 1168 }, { "epoch": 0.07, "grad_norm": 1.119095802307129, "learning_rate": 9.94923986340599e-06, "loss": 0.5923, "step": 1169 }, { "epoch": 0.07, "grad_norm": 1.0107195377349854, "learning_rate": 9.949093934532196e-06, "loss": 0.5501, "step": 1170 }, { "epoch": 0.07, "grad_norm": 1.0740958452224731, "learning_rate": 9.948947797269275e-06, "loss": 0.5829, "step": 1171 }, { "epoch": 0.07, "grad_norm": 1.0222264528274536, "learning_rate": 9.948801451623376e-06, "loss": 0.5853, "step": 1172 }, { "epoch": 0.07, "grad_norm": 1.1215888261795044, "learning_rate": 9.948654897600664e-06, "loss": 0.6162, "step": 1173 }, { "epoch": 0.07, "grad_norm": 1.0881133079528809, "learning_rate": 9.94850813520731e-06, "loss": 0.5895, "step": 1174 }, { "epoch": 0.07, "grad_norm": 1.0932121276855469, "learning_rate": 9.948361164449493e-06, "loss": 0.5604, "step": 1175 }, { "epoch": 0.07, "grad_norm": 1.211357593536377, "learning_rate": 9.948213985333403e-06, "loss": 0.6113, "step": 1176 }, { "epoch": 0.07, "grad_norm": 1.2227258682250977, "learning_rate": 9.948066597865234e-06, "loss": 0.5909, "step": 1177 }, { "epoch": 0.07, "grad_norm": 1.0413223505020142, "learning_rate": 9.947919002051194e-06, "loss": 0.5908, "step": 1178 }, { "epoch": 0.07, "grad_norm": 1.1543649435043335, "learning_rate": 9.947771197897495e-06, "loss": 0.588, "step": 1179 }, { "epoch": 0.07, "grad_norm": 1.0861040353775024, "learning_rate": 9.947623185410366e-06, "loss": 0.5964, "step": 1180 }, { "epoch": 0.07, "grad_norm": 1.1817620992660522, "learning_rate": 9.947474964596036e-06, "loss": 0.6389, "step": 1181 }, { "epoch": 0.07, "grad_norm": 1.1223866939544678, "learning_rate": 9.947326535460744e-06, "loss": 0.5958, "step": 1182 }, { "epoch": 0.07, "grad_norm": 1.0928974151611328, "learning_rate": 9.947177898010745e-06, "loss": 0.5861, "step": 1183 }, { "epoch": 0.08, "grad_norm": 1.0815249681472778, "learning_rate": 9.947029052252293e-06, "loss": 0.5637, "step": 1184 }, { "epoch": 0.08, "grad_norm": 1.0170737504959106, "learning_rate": 9.946879998191656e-06, "loss": 0.5843, "step": 1185 }, { "epoch": 0.08, "grad_norm": 1.1782039403915405, "learning_rate": 9.946730735835112e-06, "loss": 0.6589, "step": 1186 }, { "epoch": 0.08, "grad_norm": 1.1698777675628662, "learning_rate": 9.946581265188947e-06, "loss": 0.5822, "step": 1187 }, { "epoch": 0.08, "grad_norm": 1.050987958908081, "learning_rate": 9.946431586259451e-06, "loss": 0.5849, "step": 1188 }, { "epoch": 0.08, "grad_norm": 1.0946595668792725, "learning_rate": 9.946281699052928e-06, "loss": 0.6106, "step": 1189 }, { "epoch": 0.08, "grad_norm": 1.1560449600219727, "learning_rate": 9.946131603575691e-06, "loss": 0.6777, "step": 1190 }, { "epoch": 0.08, "grad_norm": 1.1142282485961914, "learning_rate": 9.945981299834058e-06, "loss": 0.606, "step": 1191 }, { "epoch": 0.08, "grad_norm": 1.2300156354904175, "learning_rate": 9.945830787834358e-06, "loss": 0.6271, "step": 1192 }, { "epoch": 0.08, "grad_norm": 1.1337618827819824, "learning_rate": 9.945680067582928e-06, "loss": 0.5671, "step": 1193 }, { "epoch": 0.08, "grad_norm": 1.0776329040527344, "learning_rate": 9.945529139086116e-06, "loss": 0.5696, "step": 1194 }, { "epoch": 0.08, "grad_norm": 1.1155674457550049, "learning_rate": 9.945378002350277e-06, "loss": 0.6009, "step": 1195 }, { "epoch": 0.08, "grad_norm": 1.103833794593811, "learning_rate": 9.945226657381773e-06, "loss": 0.6125, "step": 1196 }, { "epoch": 0.08, "grad_norm": 0.9649964570999146, "learning_rate": 9.945075104186978e-06, "loss": 0.557, "step": 1197 }, { "epoch": 0.08, "grad_norm": 0.988289475440979, "learning_rate": 9.944923342772272e-06, "loss": 0.5916, "step": 1198 }, { "epoch": 0.08, "grad_norm": 1.1717194318771362, "learning_rate": 9.944771373144047e-06, "loss": 0.5698, "step": 1199 }, { "epoch": 0.08, "grad_norm": 1.0416381359100342, "learning_rate": 9.944619195308701e-06, "loss": 0.5561, "step": 1200 }, { "epoch": 0.08, "grad_norm": 1.1190505027770996, "learning_rate": 9.944466809272642e-06, "loss": 0.5663, "step": 1201 }, { "epoch": 0.08, "grad_norm": 1.0151177644729614, "learning_rate": 9.944314215042286e-06, "loss": 0.5623, "step": 1202 }, { "epoch": 0.08, "grad_norm": 1.0351370573043823, "learning_rate": 9.944161412624059e-06, "loss": 0.5844, "step": 1203 }, { "epoch": 0.08, "grad_norm": 1.0102064609527588, "learning_rate": 9.944008402024395e-06, "loss": 0.6177, "step": 1204 }, { "epoch": 0.08, "grad_norm": 1.182949423789978, "learning_rate": 9.943855183249734e-06, "loss": 0.6217, "step": 1205 }, { "epoch": 0.08, "grad_norm": 1.0727534294128418, "learning_rate": 9.94370175630653e-06, "loss": 0.6389, "step": 1206 }, { "epoch": 0.08, "grad_norm": 1.1393942832946777, "learning_rate": 9.943548121201243e-06, "loss": 0.6057, "step": 1207 }, { "epoch": 0.08, "grad_norm": 1.0908935070037842, "learning_rate": 9.943394277940344e-06, "loss": 0.5453, "step": 1208 }, { "epoch": 0.08, "grad_norm": 1.0910968780517578, "learning_rate": 9.943240226530306e-06, "loss": 0.6553, "step": 1209 }, { "epoch": 0.08, "grad_norm": 1.0977840423583984, "learning_rate": 9.94308596697762e-06, "loss": 0.5948, "step": 1210 }, { "epoch": 0.08, "grad_norm": 1.1505475044250488, "learning_rate": 9.942931499288779e-06, "loss": 0.6209, "step": 1211 }, { "epoch": 0.08, "grad_norm": 1.0492063760757446, "learning_rate": 9.942776823470288e-06, "loss": 0.5464, "step": 1212 }, { "epoch": 0.08, "grad_norm": 1.0802040100097656, "learning_rate": 9.94262193952866e-06, "loss": 0.5941, "step": 1213 }, { "epoch": 0.08, "grad_norm": 1.1597340106964111, "learning_rate": 9.942466847470415e-06, "loss": 0.594, "step": 1214 }, { "epoch": 0.08, "grad_norm": 0.9489971399307251, "learning_rate": 9.942311547302087e-06, "loss": 0.5147, "step": 1215 }, { "epoch": 0.08, "grad_norm": 1.1232295036315918, "learning_rate": 9.94215603903021e-06, "loss": 0.5816, "step": 1216 }, { "epoch": 0.08, "grad_norm": 1.1503745317459106, "learning_rate": 9.942000322661339e-06, "loss": 0.5968, "step": 1217 }, { "epoch": 0.08, "grad_norm": 1.1849077939987183, "learning_rate": 9.941844398202022e-06, "loss": 0.5556, "step": 1218 }, { "epoch": 0.08, "grad_norm": 1.0492712259292603, "learning_rate": 9.941688265658832e-06, "loss": 0.5506, "step": 1219 }, { "epoch": 0.08, "grad_norm": 1.1029094457626343, "learning_rate": 9.941531925038337e-06, "loss": 0.5889, "step": 1220 }, { "epoch": 0.08, "grad_norm": 1.1172541379928589, "learning_rate": 9.941375376347124e-06, "loss": 0.6159, "step": 1221 }, { "epoch": 0.08, "grad_norm": 1.0389809608459473, "learning_rate": 9.941218619591783e-06, "loss": 0.6011, "step": 1222 }, { "epoch": 0.08, "grad_norm": 1.18142569065094, "learning_rate": 9.941061654778917e-06, "loss": 0.5498, "step": 1223 }, { "epoch": 0.08, "grad_norm": 1.2676929235458374, "learning_rate": 9.940904481915132e-06, "loss": 0.5948, "step": 1224 }, { "epoch": 0.08, "grad_norm": 1.0667624473571777, "learning_rate": 9.940747101007049e-06, "loss": 0.5855, "step": 1225 }, { "epoch": 0.08, "grad_norm": 1.029423475265503, "learning_rate": 9.940589512061292e-06, "loss": 0.574, "step": 1226 }, { "epoch": 0.08, "grad_norm": 1.1424000263214111, "learning_rate": 9.940431715084498e-06, "loss": 0.5789, "step": 1227 }, { "epoch": 0.08, "grad_norm": 1.0252925157546997, "learning_rate": 9.94027371008331e-06, "loss": 0.5667, "step": 1228 }, { "epoch": 0.08, "grad_norm": 1.1843030452728271, "learning_rate": 9.940115497064383e-06, "loss": 0.6177, "step": 1229 }, { "epoch": 0.08, "grad_norm": 1.3379067182540894, "learning_rate": 9.939957076034379e-06, "loss": 0.6186, "step": 1230 }, { "epoch": 0.08, "grad_norm": 1.186153769493103, "learning_rate": 9.939798446999965e-06, "loss": 0.5924, "step": 1231 }, { "epoch": 0.08, "grad_norm": 1.1364245414733887, "learning_rate": 9.939639609967825e-06, "loss": 0.617, "step": 1232 }, { "epoch": 0.08, "grad_norm": 1.0593096017837524, "learning_rate": 9.939480564944642e-06, "loss": 0.5803, "step": 1233 }, { "epoch": 0.08, "grad_norm": 1.1644937992095947, "learning_rate": 9.939321311937117e-06, "loss": 0.5769, "step": 1234 }, { "epoch": 0.08, "grad_norm": 1.1884273290634155, "learning_rate": 9.939161850951955e-06, "loss": 0.6122, "step": 1235 }, { "epoch": 0.08, "grad_norm": 1.0510512590408325, "learning_rate": 9.939002181995869e-06, "loss": 0.5604, "step": 1236 }, { "epoch": 0.08, "grad_norm": 1.0551425218582153, "learning_rate": 9.938842305075583e-06, "loss": 0.5719, "step": 1237 }, { "epoch": 0.08, "grad_norm": 1.1491656303405762, "learning_rate": 9.938682220197828e-06, "loss": 0.5445, "step": 1238 }, { "epoch": 0.08, "grad_norm": 1.017390251159668, "learning_rate": 9.938521927369344e-06, "loss": 0.554, "step": 1239 }, { "epoch": 0.08, "grad_norm": 1.0788060426712036, "learning_rate": 9.938361426596883e-06, "loss": 0.5958, "step": 1240 }, { "epoch": 0.08, "grad_norm": 1.1188304424285889, "learning_rate": 9.938200717887202e-06, "loss": 0.5762, "step": 1241 }, { "epoch": 0.08, "grad_norm": 1.1995320320129395, "learning_rate": 9.938039801247066e-06, "loss": 0.5796, "step": 1242 }, { "epoch": 0.08, "grad_norm": 1.2387287616729736, "learning_rate": 9.937878676683254e-06, "loss": 0.6327, "step": 1243 }, { "epoch": 0.08, "grad_norm": 1.0643160343170166, "learning_rate": 9.937717344202548e-06, "loss": 0.5255, "step": 1244 }, { "epoch": 0.08, "grad_norm": 1.1715853214263916, "learning_rate": 9.93755580381174e-06, "loss": 0.6288, "step": 1245 }, { "epoch": 0.08, "grad_norm": 1.027347445487976, "learning_rate": 9.937394055517635e-06, "loss": 0.591, "step": 1246 }, { "epoch": 0.08, "grad_norm": 1.1184815168380737, "learning_rate": 9.937232099327044e-06, "loss": 0.5302, "step": 1247 }, { "epoch": 0.08, "grad_norm": 1.129241943359375, "learning_rate": 9.937069935246782e-06, "loss": 0.5922, "step": 1248 }, { "epoch": 0.08, "grad_norm": 1.2312674522399902, "learning_rate": 9.93690756328368e-06, "loss": 0.5578, "step": 1249 }, { "epoch": 0.08, "grad_norm": 1.081891655921936, "learning_rate": 9.936744983444576e-06, "loss": 0.6104, "step": 1250 }, { "epoch": 0.08, "grad_norm": 1.0679914951324463, "learning_rate": 9.936582195736314e-06, "loss": 0.5763, "step": 1251 }, { "epoch": 0.08, "grad_norm": 1.1055591106414795, "learning_rate": 9.936419200165748e-06, "loss": 0.5504, "step": 1252 }, { "epoch": 0.08, "grad_norm": 1.1147511005401611, "learning_rate": 9.936255996739743e-06, "loss": 0.574, "step": 1253 }, { "epoch": 0.08, "grad_norm": 1.0842479467391968, "learning_rate": 9.93609258546517e-06, "loss": 0.6122, "step": 1254 }, { "epoch": 0.08, "grad_norm": 1.0599509477615356, "learning_rate": 9.93592896634891e-06, "loss": 0.6165, "step": 1255 }, { "epoch": 0.08, "grad_norm": 1.1376314163208008, "learning_rate": 9.93576513939785e-06, "loss": 0.5898, "step": 1256 }, { "epoch": 0.08, "grad_norm": 1.0422910451889038, "learning_rate": 9.935601104618892e-06, "loss": 0.6605, "step": 1257 }, { "epoch": 0.08, "grad_norm": 1.023500919342041, "learning_rate": 9.93543686201894e-06, "loss": 0.5934, "step": 1258 }, { "epoch": 0.08, "grad_norm": 1.1389398574829102, "learning_rate": 9.935272411604913e-06, "loss": 0.6282, "step": 1259 }, { "epoch": 0.08, "grad_norm": 1.0828518867492676, "learning_rate": 9.935107753383733e-06, "loss": 0.5374, "step": 1260 }, { "epoch": 0.08, "grad_norm": 1.1542357206344604, "learning_rate": 9.93494288736233e-06, "loss": 0.6057, "step": 1261 }, { "epoch": 0.08, "grad_norm": 1.2249398231506348, "learning_rate": 9.934777813547653e-06, "loss": 0.5892, "step": 1262 }, { "epoch": 0.08, "grad_norm": 1.1498955488204956, "learning_rate": 9.934612531946648e-06, "loss": 0.5485, "step": 1263 }, { "epoch": 0.08, "grad_norm": 1.1004254817962646, "learning_rate": 9.934447042566275e-06, "loss": 0.5916, "step": 1264 }, { "epoch": 0.08, "grad_norm": 1.1617814302444458, "learning_rate": 9.934281345413504e-06, "loss": 0.6255, "step": 1265 }, { "epoch": 0.08, "grad_norm": 1.06760835647583, "learning_rate": 9.934115440495311e-06, "loss": 0.6103, "step": 1266 }, { "epoch": 0.08, "grad_norm": 1.1368690729141235, "learning_rate": 9.93394932781868e-06, "loss": 0.6045, "step": 1267 }, { "epoch": 0.08, "grad_norm": 1.1010313034057617, "learning_rate": 9.933783007390608e-06, "loss": 0.6089, "step": 1268 }, { "epoch": 0.08, "grad_norm": 1.1142655611038208, "learning_rate": 9.933616479218095e-06, "loss": 0.576, "step": 1269 }, { "epoch": 0.08, "grad_norm": 1.0613747835159302, "learning_rate": 9.933449743308155e-06, "loss": 0.6072, "step": 1270 }, { "epoch": 0.08, "grad_norm": 1.1401052474975586, "learning_rate": 9.93328279966781e-06, "loss": 0.6172, "step": 1271 }, { "epoch": 0.08, "grad_norm": 1.1933995485305786, "learning_rate": 9.933115648304087e-06, "loss": 0.5495, "step": 1272 }, { "epoch": 0.08, "grad_norm": 0.9564380645751953, "learning_rate": 9.932948289224025e-06, "loss": 0.569, "step": 1273 }, { "epoch": 0.08, "grad_norm": 1.006587028503418, "learning_rate": 9.932780722434671e-06, "loss": 0.5972, "step": 1274 }, { "epoch": 0.08, "grad_norm": 1.0362249612808228, "learning_rate": 9.932612947943084e-06, "loss": 0.5653, "step": 1275 }, { "epoch": 0.08, "grad_norm": 1.0872751474380493, "learning_rate": 9.932444965756321e-06, "loss": 0.5605, "step": 1276 }, { "epoch": 0.08, "grad_norm": 1.135711431503296, "learning_rate": 9.93227677588146e-06, "loss": 0.6212, "step": 1277 }, { "epoch": 0.08, "grad_norm": 1.0566486120224, "learning_rate": 9.932108378325582e-06, "loss": 0.5669, "step": 1278 }, { "epoch": 0.08, "grad_norm": 1.031200885772705, "learning_rate": 9.931939773095779e-06, "loss": 0.5683, "step": 1279 }, { "epoch": 0.08, "grad_norm": 1.1347169876098633, "learning_rate": 9.93177096019915e-06, "loss": 0.5966, "step": 1280 }, { "epoch": 0.08, "grad_norm": 1.0358705520629883, "learning_rate": 9.9316019396428e-06, "loss": 0.6257, "step": 1281 }, { "epoch": 0.08, "grad_norm": 1.0359333753585815, "learning_rate": 9.931432711433849e-06, "loss": 0.6045, "step": 1282 }, { "epoch": 0.08, "grad_norm": 1.1425514221191406, "learning_rate": 9.93126327557942e-06, "loss": 0.6403, "step": 1283 }, { "epoch": 0.08, "grad_norm": 1.1299494504928589, "learning_rate": 9.931093632086651e-06, "loss": 0.5855, "step": 1284 }, { "epoch": 0.08, "grad_norm": 1.0689961910247803, "learning_rate": 9.930923780962683e-06, "loss": 0.5565, "step": 1285 }, { "epoch": 0.08, "grad_norm": 1.1040059328079224, "learning_rate": 9.930753722214668e-06, "loss": 0.6126, "step": 1286 }, { "epoch": 0.08, "grad_norm": 0.9784469604492188, "learning_rate": 9.930583455849766e-06, "loss": 0.5312, "step": 1287 }, { "epoch": 0.08, "grad_norm": 1.1913020610809326, "learning_rate": 9.930412981875148e-06, "loss": 0.6228, "step": 1288 }, { "epoch": 0.08, "grad_norm": 1.05826735496521, "learning_rate": 9.93024230029799e-06, "loss": 0.6485, "step": 1289 }, { "epoch": 0.08, "grad_norm": 1.064443826675415, "learning_rate": 9.93007141112548e-06, "loss": 0.5932, "step": 1290 }, { "epoch": 0.08, "grad_norm": 1.113539218902588, "learning_rate": 9.929900314364813e-06, "loss": 0.6224, "step": 1291 }, { "epoch": 0.08, "grad_norm": 1.0498301982879639, "learning_rate": 9.929729010023195e-06, "loss": 0.601, "step": 1292 }, { "epoch": 0.08, "grad_norm": 1.1142476797103882, "learning_rate": 9.929557498107836e-06, "loss": 0.6249, "step": 1293 }, { "epoch": 0.08, "grad_norm": 1.0876604318618774, "learning_rate": 9.929385778625959e-06, "loss": 0.6059, "step": 1294 }, { "epoch": 0.08, "grad_norm": 1.0598934888839722, "learning_rate": 9.929213851584798e-06, "loss": 0.6144, "step": 1295 }, { "epoch": 0.08, "grad_norm": 1.0487703084945679, "learning_rate": 9.929041716991587e-06, "loss": 0.5732, "step": 1296 }, { "epoch": 0.08, "grad_norm": 1.0446213483810425, "learning_rate": 9.928869374853576e-06, "loss": 0.5645, "step": 1297 }, { "epoch": 0.08, "grad_norm": 1.1094882488250732, "learning_rate": 9.928696825178021e-06, "loss": 0.5925, "step": 1298 }, { "epoch": 0.08, "grad_norm": 1.0140398740768433, "learning_rate": 9.92852406797219e-06, "loss": 0.6101, "step": 1299 }, { "epoch": 0.08, "grad_norm": 1.152634859085083, "learning_rate": 9.928351103243356e-06, "loss": 0.6287, "step": 1300 }, { "epoch": 0.08, "grad_norm": 1.0498493909835815, "learning_rate": 9.928177930998801e-06, "loss": 0.5741, "step": 1301 }, { "epoch": 0.08, "grad_norm": 1.1100025177001953, "learning_rate": 9.928004551245818e-06, "loss": 0.5462, "step": 1302 }, { "epoch": 0.08, "grad_norm": 1.0770983695983887, "learning_rate": 9.927830963991704e-06, "loss": 0.581, "step": 1303 }, { "epoch": 0.08, "grad_norm": 1.0831111669540405, "learning_rate": 9.927657169243773e-06, "loss": 0.6322, "step": 1304 }, { "epoch": 0.08, "grad_norm": 1.0878232717514038, "learning_rate": 9.92748316700934e-06, "loss": 0.6321, "step": 1305 }, { "epoch": 0.08, "grad_norm": 1.059654712677002, "learning_rate": 9.927308957295733e-06, "loss": 0.572, "step": 1306 }, { "epoch": 0.08, "grad_norm": 1.0426253080368042, "learning_rate": 9.927134540110286e-06, "loss": 0.5878, "step": 1307 }, { "epoch": 0.08, "grad_norm": 0.9689581394195557, "learning_rate": 9.926959915460344e-06, "loss": 0.5506, "step": 1308 }, { "epoch": 0.08, "grad_norm": 1.0300748348236084, "learning_rate": 9.926785083353258e-06, "loss": 0.5293, "step": 1309 }, { "epoch": 0.08, "grad_norm": 1.1261236667633057, "learning_rate": 9.926610043796394e-06, "loss": 0.5615, "step": 1310 }, { "epoch": 0.08, "grad_norm": 1.0079063177108765, "learning_rate": 9.926434796797117e-06, "loss": 0.561, "step": 1311 }, { "epoch": 0.08, "grad_norm": 1.2044016122817993, "learning_rate": 9.92625934236281e-06, "loss": 0.6302, "step": 1312 }, { "epoch": 0.08, "grad_norm": 1.182726263999939, "learning_rate": 9.92608368050086e-06, "loss": 0.6028, "step": 1313 }, { "epoch": 0.08, "grad_norm": 1.0777925252914429, "learning_rate": 9.925907811218661e-06, "loss": 0.5818, "step": 1314 }, { "epoch": 0.08, "grad_norm": 1.1328976154327393, "learning_rate": 9.925731734523621e-06, "loss": 0.6168, "step": 1315 }, { "epoch": 0.08, "grad_norm": 1.0863475799560547, "learning_rate": 9.925555450423153e-06, "loss": 0.5425, "step": 1316 }, { "epoch": 0.08, "grad_norm": 1.1228018999099731, "learning_rate": 9.92537895892468e-06, "loss": 0.5735, "step": 1317 }, { "epoch": 0.08, "grad_norm": 1.138264536857605, "learning_rate": 9.925202260035632e-06, "loss": 0.6271, "step": 1318 }, { "epoch": 0.08, "grad_norm": 1.0448517799377441, "learning_rate": 9.925025353763452e-06, "loss": 0.5501, "step": 1319 }, { "epoch": 0.08, "grad_norm": 1.146152138710022, "learning_rate": 9.924848240115585e-06, "loss": 0.6045, "step": 1320 }, { "epoch": 0.08, "grad_norm": 1.060131311416626, "learning_rate": 9.924670919099493e-06, "loss": 0.5421, "step": 1321 }, { "epoch": 0.08, "grad_norm": 1.1032346487045288, "learning_rate": 9.92449339072264e-06, "loss": 0.5954, "step": 1322 }, { "epoch": 0.08, "grad_norm": 1.110339879989624, "learning_rate": 9.924315654992501e-06, "loss": 0.5808, "step": 1323 }, { "epoch": 0.08, "grad_norm": 1.0799542665481567, "learning_rate": 9.924137711916559e-06, "loss": 0.568, "step": 1324 }, { "epoch": 0.08, "grad_norm": 1.0312904119491577, "learning_rate": 9.92395956150231e-06, "loss": 0.5882, "step": 1325 }, { "epoch": 0.08, "grad_norm": 1.0492658615112305, "learning_rate": 9.923781203757253e-06, "loss": 0.5699, "step": 1326 }, { "epoch": 0.08, "grad_norm": 1.170120358467102, "learning_rate": 9.923602638688897e-06, "loss": 0.5883, "step": 1327 }, { "epoch": 0.08, "grad_norm": 1.1021220684051514, "learning_rate": 9.923423866304761e-06, "loss": 0.5832, "step": 1328 }, { "epoch": 0.08, "grad_norm": 1.0523005723953247, "learning_rate": 9.923244886612375e-06, "loss": 0.6064, "step": 1329 }, { "epoch": 0.08, "grad_norm": 1.0522792339324951, "learning_rate": 9.923065699619273e-06, "loss": 0.5573, "step": 1330 }, { "epoch": 0.08, "grad_norm": 1.0156751871109009, "learning_rate": 9.922886305333e-06, "loss": 0.575, "step": 1331 }, { "epoch": 0.08, "grad_norm": 1.085269808769226, "learning_rate": 9.922706703761111e-06, "loss": 0.5673, "step": 1332 }, { "epoch": 0.08, "grad_norm": 1.0672869682312012, "learning_rate": 9.922526894911166e-06, "loss": 0.567, "step": 1333 }, { "epoch": 0.08, "grad_norm": 1.0538702011108398, "learning_rate": 9.922346878790739e-06, "loss": 0.6103, "step": 1334 }, { "epoch": 0.08, "grad_norm": 1.0380349159240723, "learning_rate": 9.922166655407408e-06, "loss": 0.6059, "step": 1335 }, { "epoch": 0.08, "grad_norm": 1.0586581230163574, "learning_rate": 9.921986224768762e-06, "loss": 0.573, "step": 1336 }, { "epoch": 0.08, "grad_norm": 1.1432409286499023, "learning_rate": 9.9218055868824e-06, "loss": 0.6387, "step": 1337 }, { "epoch": 0.08, "grad_norm": 1.0543259382247925, "learning_rate": 9.921624741755924e-06, "loss": 0.5625, "step": 1338 }, { "epoch": 0.08, "grad_norm": 1.185113549232483, "learning_rate": 9.921443689396952e-06, "loss": 0.5878, "step": 1339 }, { "epoch": 0.08, "grad_norm": 1.0658711194992065, "learning_rate": 9.921262429813107e-06, "loss": 0.539, "step": 1340 }, { "epoch": 0.08, "grad_norm": 1.1457328796386719, "learning_rate": 9.921080963012021e-06, "loss": 0.5799, "step": 1341 }, { "epoch": 0.09, "grad_norm": 1.0452888011932373, "learning_rate": 9.920899289001335e-06, "loss": 0.6373, "step": 1342 }, { "epoch": 0.09, "grad_norm": 1.0156432390213013, "learning_rate": 9.9207174077887e-06, "loss": 0.5544, "step": 1343 }, { "epoch": 0.09, "grad_norm": 1.1745818853378296, "learning_rate": 9.92053531938177e-06, "loss": 0.5478, "step": 1344 }, { "epoch": 0.09, "grad_norm": 1.074892520904541, "learning_rate": 9.920353023788216e-06, "loss": 0.6117, "step": 1345 }, { "epoch": 0.09, "grad_norm": 1.111828327178955, "learning_rate": 9.920170521015714e-06, "loss": 0.5599, "step": 1346 }, { "epoch": 0.09, "grad_norm": 1.118725299835205, "learning_rate": 9.919987811071946e-06, "loss": 0.6519, "step": 1347 }, { "epoch": 0.09, "grad_norm": 1.1201225519180298, "learning_rate": 9.919804893964607e-06, "loss": 0.6582, "step": 1348 }, { "epoch": 0.09, "grad_norm": 1.105054259300232, "learning_rate": 9.9196217697014e-06, "loss": 0.6288, "step": 1349 }, { "epoch": 0.09, "grad_norm": 1.117285132408142, "learning_rate": 9.919438438290032e-06, "loss": 0.6296, "step": 1350 }, { "epoch": 0.09, "grad_norm": 1.033210277557373, "learning_rate": 9.919254899738227e-06, "loss": 0.575, "step": 1351 }, { "epoch": 0.09, "grad_norm": 1.0252848863601685, "learning_rate": 9.91907115405371e-06, "loss": 0.5776, "step": 1352 }, { "epoch": 0.09, "grad_norm": 0.9571742415428162, "learning_rate": 9.918887201244219e-06, "loss": 0.5863, "step": 1353 }, { "epoch": 0.09, "grad_norm": 1.0897350311279297, "learning_rate": 9.918703041317498e-06, "loss": 0.6026, "step": 1354 }, { "epoch": 0.09, "grad_norm": 1.0628808736801147, "learning_rate": 9.918518674281305e-06, "loss": 0.5792, "step": 1355 }, { "epoch": 0.09, "grad_norm": 1.1064058542251587, "learning_rate": 9.9183341001434e-06, "loss": 0.6186, "step": 1356 }, { "epoch": 0.09, "grad_norm": 1.124521255493164, "learning_rate": 9.918149318911557e-06, "loss": 0.5886, "step": 1357 }, { "epoch": 0.09, "grad_norm": 1.1114414930343628, "learning_rate": 9.917964330593553e-06, "loss": 0.5855, "step": 1358 }, { "epoch": 0.09, "grad_norm": 1.0328484773635864, "learning_rate": 9.917779135197181e-06, "loss": 0.5917, "step": 1359 }, { "epoch": 0.09, "grad_norm": 1.0707226991653442, "learning_rate": 9.917593732730236e-06, "loss": 0.5941, "step": 1360 }, { "epoch": 0.09, "grad_norm": 1.1221344470977783, "learning_rate": 9.917408123200527e-06, "loss": 0.6313, "step": 1361 }, { "epoch": 0.09, "grad_norm": 1.060730218887329, "learning_rate": 9.917222306615868e-06, "loss": 0.5358, "step": 1362 }, { "epoch": 0.09, "grad_norm": 1.1551522016525269, "learning_rate": 9.917036282984084e-06, "loss": 0.6038, "step": 1363 }, { "epoch": 0.09, "grad_norm": 1.0436571836471558, "learning_rate": 9.916850052313007e-06, "loss": 0.617, "step": 1364 }, { "epoch": 0.09, "grad_norm": 0.9944663643836975, "learning_rate": 9.916663614610478e-06, "loss": 0.5487, "step": 1365 }, { "epoch": 0.09, "grad_norm": 1.0552699565887451, "learning_rate": 9.916476969884348e-06, "loss": 0.5983, "step": 1366 }, { "epoch": 0.09, "grad_norm": 1.0966901779174805, "learning_rate": 9.916290118142478e-06, "loss": 0.5831, "step": 1367 }, { "epoch": 0.09, "grad_norm": 1.059079647064209, "learning_rate": 9.91610305939273e-06, "loss": 0.5876, "step": 1368 }, { "epoch": 0.09, "grad_norm": 1.0341980457305908, "learning_rate": 9.915915793642987e-06, "loss": 0.5768, "step": 1369 }, { "epoch": 0.09, "grad_norm": 1.1835851669311523, "learning_rate": 9.91572832090113e-06, "loss": 0.5718, "step": 1370 }, { "epoch": 0.09, "grad_norm": 1.1765505075454712, "learning_rate": 9.915540641175055e-06, "loss": 0.613, "step": 1371 }, { "epoch": 0.09, "grad_norm": 1.0547789335250854, "learning_rate": 9.915352754472662e-06, "loss": 0.5908, "step": 1372 }, { "epoch": 0.09, "grad_norm": 1.1082268953323364, "learning_rate": 9.915164660801865e-06, "loss": 0.5664, "step": 1373 }, { "epoch": 0.09, "grad_norm": 1.1055669784545898, "learning_rate": 9.914976360170583e-06, "loss": 0.5496, "step": 1374 }, { "epoch": 0.09, "grad_norm": 1.0965393781661987, "learning_rate": 9.914787852586744e-06, "loss": 0.5578, "step": 1375 }, { "epoch": 0.09, "grad_norm": 1.1463366746902466, "learning_rate": 9.914599138058285e-06, "loss": 0.5767, "step": 1376 }, { "epoch": 0.09, "grad_norm": 1.0385583639144897, "learning_rate": 9.914410216593154e-06, "loss": 0.5495, "step": 1377 }, { "epoch": 0.09, "grad_norm": 1.1779868602752686, "learning_rate": 9.914221088199304e-06, "loss": 0.5763, "step": 1378 }, { "epoch": 0.09, "grad_norm": 1.1094473600387573, "learning_rate": 9.9140317528847e-06, "loss": 0.5735, "step": 1379 }, { "epoch": 0.09, "grad_norm": 1.0603336095809937, "learning_rate": 9.913842210657314e-06, "loss": 0.585, "step": 1380 }, { "epoch": 0.09, "grad_norm": 1.1335699558258057, "learning_rate": 9.913652461525126e-06, "loss": 0.6302, "step": 1381 }, { "epoch": 0.09, "grad_norm": 1.112284541130066, "learning_rate": 9.913462505496126e-06, "loss": 0.5903, "step": 1382 }, { "epoch": 0.09, "grad_norm": 1.1852585077285767, "learning_rate": 9.913272342578312e-06, "loss": 0.5635, "step": 1383 }, { "epoch": 0.09, "grad_norm": 1.136877179145813, "learning_rate": 9.913081972779692e-06, "loss": 0.5878, "step": 1384 }, { "epoch": 0.09, "grad_norm": 1.0918060541152954, "learning_rate": 9.912891396108281e-06, "loss": 0.5938, "step": 1385 }, { "epoch": 0.09, "grad_norm": 1.0967477560043335, "learning_rate": 9.912700612572106e-06, "loss": 0.5435, "step": 1386 }, { "epoch": 0.09, "grad_norm": 1.1458653211593628, "learning_rate": 9.912509622179197e-06, "loss": 0.6112, "step": 1387 }, { "epoch": 0.09, "grad_norm": 1.0895942449569702, "learning_rate": 9.912318424937596e-06, "loss": 0.6049, "step": 1388 }, { "epoch": 0.09, "grad_norm": 1.0798182487487793, "learning_rate": 9.912127020855356e-06, "loss": 0.6021, "step": 1389 }, { "epoch": 0.09, "grad_norm": 1.1087937355041504, "learning_rate": 9.911935409940536e-06, "loss": 0.6122, "step": 1390 }, { "epoch": 0.09, "grad_norm": 1.1287997961044312, "learning_rate": 9.911743592201203e-06, "loss": 0.5944, "step": 1391 }, { "epoch": 0.09, "grad_norm": 1.0558440685272217, "learning_rate": 9.911551567645433e-06, "loss": 0.5626, "step": 1392 }, { "epoch": 0.09, "grad_norm": 1.1150189638137817, "learning_rate": 9.911359336281312e-06, "loss": 0.6096, "step": 1393 }, { "epoch": 0.09, "grad_norm": 1.1409717798233032, "learning_rate": 9.911166898116935e-06, "loss": 0.6173, "step": 1394 }, { "epoch": 0.09, "grad_norm": 1.0079151391983032, "learning_rate": 9.910974253160405e-06, "loss": 0.5734, "step": 1395 }, { "epoch": 0.09, "grad_norm": 1.0540634393692017, "learning_rate": 9.910781401419835e-06, "loss": 0.5882, "step": 1396 }, { "epoch": 0.09, "grad_norm": 1.054129958152771, "learning_rate": 9.910588342903342e-06, "loss": 0.6047, "step": 1397 }, { "epoch": 0.09, "grad_norm": 1.0486072301864624, "learning_rate": 9.910395077619057e-06, "loss": 0.5836, "step": 1398 }, { "epoch": 0.09, "grad_norm": 1.0438059568405151, "learning_rate": 9.910201605575116e-06, "loss": 0.4912, "step": 1399 }, { "epoch": 0.09, "grad_norm": 1.0598132610321045, "learning_rate": 9.910007926779669e-06, "loss": 0.5759, "step": 1400 }, { "epoch": 0.09, "grad_norm": 1.0906940698623657, "learning_rate": 9.909814041240867e-06, "loss": 0.6319, "step": 1401 }, { "epoch": 0.09, "grad_norm": 1.1012886762619019, "learning_rate": 9.909619948966875e-06, "loss": 0.5676, "step": 1402 }, { "epoch": 0.09, "grad_norm": 1.0419704914093018, "learning_rate": 9.909425649965869e-06, "loss": 0.5474, "step": 1403 }, { "epoch": 0.09, "grad_norm": 1.0559276342391968, "learning_rate": 9.909231144246026e-06, "loss": 0.5799, "step": 1404 }, { "epoch": 0.09, "grad_norm": 1.1001919507980347, "learning_rate": 9.909036431815538e-06, "loss": 0.5837, "step": 1405 }, { "epoch": 0.09, "grad_norm": 1.1216944456100464, "learning_rate": 9.908841512682602e-06, "loss": 0.5731, "step": 1406 }, { "epoch": 0.09, "grad_norm": 1.017932653427124, "learning_rate": 9.908646386855427e-06, "loss": 0.5706, "step": 1407 }, { "epoch": 0.09, "grad_norm": 1.0552278757095337, "learning_rate": 9.90845105434223e-06, "loss": 0.6122, "step": 1408 }, { "epoch": 0.09, "grad_norm": 1.1080493927001953, "learning_rate": 9.908255515151232e-06, "loss": 0.6026, "step": 1409 }, { "epoch": 0.09, "grad_norm": 1.0634651184082031, "learning_rate": 9.90805976929067e-06, "loss": 0.5535, "step": 1410 }, { "epoch": 0.09, "grad_norm": 1.1341966390609741, "learning_rate": 9.907863816768786e-06, "loss": 0.6083, "step": 1411 }, { "epoch": 0.09, "grad_norm": 1.068042278289795, "learning_rate": 9.907667657593828e-06, "loss": 0.579, "step": 1412 }, { "epoch": 0.09, "grad_norm": 1.038496732711792, "learning_rate": 9.907471291774058e-06, "loss": 0.5722, "step": 1413 }, { "epoch": 0.09, "grad_norm": 1.072607159614563, "learning_rate": 9.907274719317746e-06, "loss": 0.5825, "step": 1414 }, { "epoch": 0.09, "grad_norm": 1.0538018941879272, "learning_rate": 9.907077940233162e-06, "loss": 0.5521, "step": 1415 }, { "epoch": 0.09, "grad_norm": 1.216343879699707, "learning_rate": 9.906880954528601e-06, "loss": 0.6778, "step": 1416 }, { "epoch": 0.09, "grad_norm": 1.142187237739563, "learning_rate": 9.90668376221235e-06, "loss": 0.6006, "step": 1417 }, { "epoch": 0.09, "grad_norm": 1.062034010887146, "learning_rate": 9.906486363292718e-06, "loss": 0.5901, "step": 1418 }, { "epoch": 0.09, "grad_norm": 1.0838314294815063, "learning_rate": 9.906288757778012e-06, "loss": 0.6226, "step": 1419 }, { "epoch": 0.09, "grad_norm": 1.0943511724472046, "learning_rate": 9.906090945676552e-06, "loss": 0.571, "step": 1420 }, { "epoch": 0.09, "grad_norm": 1.1141382455825806, "learning_rate": 9.905892926996672e-06, "loss": 0.5907, "step": 1421 }, { "epoch": 0.09, "grad_norm": 0.9890863299369812, "learning_rate": 9.905694701746706e-06, "loss": 0.6038, "step": 1422 }, { "epoch": 0.09, "grad_norm": 1.0769665241241455, "learning_rate": 9.905496269935002e-06, "loss": 0.6017, "step": 1423 }, { "epoch": 0.09, "grad_norm": 0.9992973208427429, "learning_rate": 9.905297631569915e-06, "loss": 0.5853, "step": 1424 }, { "epoch": 0.09, "grad_norm": 1.0979317426681519, "learning_rate": 9.905098786659809e-06, "loss": 0.5775, "step": 1425 }, { "epoch": 0.09, "grad_norm": 1.0635846853256226, "learning_rate": 9.904899735213058e-06, "loss": 0.6112, "step": 1426 }, { "epoch": 0.09, "grad_norm": 1.0106208324432373, "learning_rate": 9.90470047723804e-06, "loss": 0.5671, "step": 1427 }, { "epoch": 0.09, "grad_norm": 0.9854536056518555, "learning_rate": 9.904501012743149e-06, "loss": 0.5015, "step": 1428 }, { "epoch": 0.09, "grad_norm": 1.1171709299087524, "learning_rate": 9.90430134173678e-06, "loss": 0.5806, "step": 1429 }, { "epoch": 0.09, "grad_norm": 1.0839475393295288, "learning_rate": 9.904101464227342e-06, "loss": 0.5587, "step": 1430 }, { "epoch": 0.09, "grad_norm": 1.0651477575302124, "learning_rate": 9.903901380223254e-06, "loss": 0.5947, "step": 1431 }, { "epoch": 0.09, "grad_norm": 1.050728440284729, "learning_rate": 9.903701089732937e-06, "loss": 0.5744, "step": 1432 }, { "epoch": 0.09, "grad_norm": 1.0288559198379517, "learning_rate": 9.903500592764825e-06, "loss": 0.5407, "step": 1433 }, { "epoch": 0.09, "grad_norm": 1.1495304107666016, "learning_rate": 9.903299889327362e-06, "loss": 0.5824, "step": 1434 }, { "epoch": 0.09, "grad_norm": 1.187200903892517, "learning_rate": 9.903098979428998e-06, "loss": 0.5627, "step": 1435 }, { "epoch": 0.09, "grad_norm": 1.1792247295379639, "learning_rate": 9.902897863078192e-06, "loss": 0.5171, "step": 1436 }, { "epoch": 0.09, "grad_norm": 1.0808206796646118, "learning_rate": 9.902696540283414e-06, "loss": 0.6191, "step": 1437 }, { "epoch": 0.09, "grad_norm": 1.024139404296875, "learning_rate": 9.90249501105314e-06, "loss": 0.5325, "step": 1438 }, { "epoch": 0.09, "grad_norm": 1.0681065320968628, "learning_rate": 9.902293275395854e-06, "loss": 0.5886, "step": 1439 }, { "epoch": 0.09, "grad_norm": 1.2235044240951538, "learning_rate": 9.902091333320053e-06, "loss": 0.5487, "step": 1440 }, { "epoch": 0.09, "grad_norm": 1.1250619888305664, "learning_rate": 9.90188918483424e-06, "loss": 0.6068, "step": 1441 }, { "epoch": 0.09, "grad_norm": 1.2732850313186646, "learning_rate": 9.901686829946924e-06, "loss": 0.6125, "step": 1442 }, { "epoch": 0.09, "grad_norm": 1.1575543880462646, "learning_rate": 9.901484268666628e-06, "loss": 0.5941, "step": 1443 }, { "epoch": 0.09, "grad_norm": 1.035243034362793, "learning_rate": 9.90128150100188e-06, "loss": 0.6241, "step": 1444 }, { "epoch": 0.09, "grad_norm": 1.078845739364624, "learning_rate": 9.90107852696122e-06, "loss": 0.5723, "step": 1445 }, { "epoch": 0.09, "grad_norm": 1.1393600702285767, "learning_rate": 9.900875346553192e-06, "loss": 0.6079, "step": 1446 }, { "epoch": 0.09, "grad_norm": 1.1262428760528564, "learning_rate": 9.900671959786352e-06, "loss": 0.5905, "step": 1447 }, { "epoch": 0.09, "grad_norm": 1.2057019472122192, "learning_rate": 9.900468366669264e-06, "loss": 0.5814, "step": 1448 }, { "epoch": 0.09, "grad_norm": 1.1640501022338867, "learning_rate": 9.900264567210501e-06, "loss": 0.5562, "step": 1449 }, { "epoch": 0.09, "grad_norm": 0.9975053071975708, "learning_rate": 9.900060561418643e-06, "loss": 0.5484, "step": 1450 }, { "epoch": 0.09, "grad_norm": 1.0894286632537842, "learning_rate": 9.89985634930228e-06, "loss": 0.6026, "step": 1451 }, { "epoch": 0.09, "grad_norm": 1.0827438831329346, "learning_rate": 9.899651930870014e-06, "loss": 0.5798, "step": 1452 }, { "epoch": 0.09, "grad_norm": 1.058447241783142, "learning_rate": 9.899447306130447e-06, "loss": 0.5445, "step": 1453 }, { "epoch": 0.09, "grad_norm": 1.3698556423187256, "learning_rate": 9.8992424750922e-06, "loss": 0.6093, "step": 1454 }, { "epoch": 0.09, "grad_norm": 1.1240925788879395, "learning_rate": 9.899037437763894e-06, "loss": 0.5647, "step": 1455 }, { "epoch": 0.09, "grad_norm": 1.1698404550552368, "learning_rate": 9.898832194154165e-06, "loss": 0.5791, "step": 1456 }, { "epoch": 0.09, "grad_norm": 1.0323277711868286, "learning_rate": 9.898626744271654e-06, "loss": 0.5268, "step": 1457 }, { "epoch": 0.09, "grad_norm": 1.0751768350601196, "learning_rate": 9.898421088125012e-06, "loss": 0.5584, "step": 1458 }, { "epoch": 0.09, "grad_norm": 1.116385817527771, "learning_rate": 9.898215225722899e-06, "loss": 0.6126, "step": 1459 }, { "epoch": 0.09, "grad_norm": 1.0593472719192505, "learning_rate": 9.898009157073982e-06, "loss": 0.608, "step": 1460 }, { "epoch": 0.09, "grad_norm": 1.0496641397476196, "learning_rate": 9.897802882186938e-06, "loss": 0.5774, "step": 1461 }, { "epoch": 0.09, "grad_norm": 1.032878041267395, "learning_rate": 9.897596401070452e-06, "loss": 0.5633, "step": 1462 }, { "epoch": 0.09, "grad_norm": 1.102704644203186, "learning_rate": 9.89738971373322e-06, "loss": 0.5533, "step": 1463 }, { "epoch": 0.09, "grad_norm": 1.116784691810608, "learning_rate": 9.897182820183944e-06, "loss": 0.6055, "step": 1464 }, { "epoch": 0.09, "grad_norm": 1.0715374946594238, "learning_rate": 9.896975720431334e-06, "loss": 0.5584, "step": 1465 }, { "epoch": 0.09, "grad_norm": 1.0144976377487183, "learning_rate": 9.896768414484115e-06, "loss": 0.5891, "step": 1466 }, { "epoch": 0.09, "grad_norm": 1.0812373161315918, "learning_rate": 9.896560902351009e-06, "loss": 0.5897, "step": 1467 }, { "epoch": 0.09, "grad_norm": 1.1514919996261597, "learning_rate": 9.89635318404076e-06, "loss": 0.6151, "step": 1468 }, { "epoch": 0.09, "grad_norm": 1.000230073928833, "learning_rate": 9.896145259562111e-06, "loss": 0.5991, "step": 1469 }, { "epoch": 0.09, "grad_norm": 1.0490227937698364, "learning_rate": 9.895937128923816e-06, "loss": 0.5768, "step": 1470 }, { "epoch": 0.09, "grad_norm": 1.0367496013641357, "learning_rate": 9.895728792134642e-06, "loss": 0.5864, "step": 1471 }, { "epoch": 0.09, "grad_norm": 1.0676687955856323, "learning_rate": 9.895520249203358e-06, "loss": 0.6064, "step": 1472 }, { "epoch": 0.09, "grad_norm": 1.0376131534576416, "learning_rate": 9.895311500138749e-06, "loss": 0.5652, "step": 1473 }, { "epoch": 0.09, "grad_norm": 1.0196382999420166, "learning_rate": 9.8951025449496e-06, "loss": 0.5893, "step": 1474 }, { "epoch": 0.09, "grad_norm": 1.0555390119552612, "learning_rate": 9.894893383644713e-06, "loss": 0.5366, "step": 1475 }, { "epoch": 0.09, "grad_norm": 1.116921067237854, "learning_rate": 9.894684016232893e-06, "loss": 0.5993, "step": 1476 }, { "epoch": 0.09, "grad_norm": 1.143212914466858, "learning_rate": 9.894474442722956e-06, "loss": 0.6333, "step": 1477 }, { "epoch": 0.09, "grad_norm": 1.0427284240722656, "learning_rate": 9.89426466312373e-06, "loss": 0.5722, "step": 1478 }, { "epoch": 0.09, "grad_norm": 1.1189481019973755, "learning_rate": 9.89405467744404e-06, "loss": 0.6584, "step": 1479 }, { "epoch": 0.09, "grad_norm": 1.0095423460006714, "learning_rate": 9.893844485692736e-06, "loss": 0.5483, "step": 1480 }, { "epoch": 0.09, "grad_norm": 1.0975712537765503, "learning_rate": 9.893634087878665e-06, "loss": 0.5858, "step": 1481 }, { "epoch": 0.09, "grad_norm": 1.0731724500656128, "learning_rate": 9.893423484010685e-06, "loss": 0.5715, "step": 1482 }, { "epoch": 0.09, "grad_norm": 1.0699676275253296, "learning_rate": 9.893212674097666e-06, "loss": 0.5669, "step": 1483 }, { "epoch": 0.09, "grad_norm": 0.993610680103302, "learning_rate": 9.893001658148482e-06, "loss": 0.5196, "step": 1484 }, { "epoch": 0.09, "grad_norm": 0.9979503750801086, "learning_rate": 9.892790436172022e-06, "loss": 0.5465, "step": 1485 }, { "epoch": 0.09, "grad_norm": 1.0339465141296387, "learning_rate": 9.892579008177176e-06, "loss": 0.595, "step": 1486 }, { "epoch": 0.09, "grad_norm": 1.0727976560592651, "learning_rate": 9.892367374172849e-06, "loss": 0.6281, "step": 1487 }, { "epoch": 0.09, "grad_norm": 1.01498281955719, "learning_rate": 9.89215553416795e-06, "loss": 0.5745, "step": 1488 }, { "epoch": 0.09, "grad_norm": 1.0790021419525146, "learning_rate": 9.8919434881714e-06, "loss": 0.5655, "step": 1489 }, { "epoch": 0.09, "grad_norm": 0.9969554543495178, "learning_rate": 9.891731236192127e-06, "loss": 0.6151, "step": 1490 }, { "epoch": 0.09, "grad_norm": 1.0288242101669312, "learning_rate": 9.89151877823907e-06, "loss": 0.5744, "step": 1491 }, { "epoch": 0.09, "grad_norm": 1.0811976194381714, "learning_rate": 9.891306114321175e-06, "loss": 0.5768, "step": 1492 }, { "epoch": 0.09, "grad_norm": 1.0616317987442017, "learning_rate": 9.891093244447393e-06, "loss": 0.5762, "step": 1493 }, { "epoch": 0.09, "grad_norm": 1.1114658117294312, "learning_rate": 9.890880168626691e-06, "loss": 0.5799, "step": 1494 }, { "epoch": 0.09, "grad_norm": 1.052419662475586, "learning_rate": 9.890666886868038e-06, "loss": 0.5779, "step": 1495 }, { "epoch": 0.09, "grad_norm": 0.951212465763092, "learning_rate": 9.890453399180415e-06, "loss": 0.5546, "step": 1496 }, { "epoch": 0.09, "grad_norm": 1.1681593656539917, "learning_rate": 9.890239705572815e-06, "loss": 0.6047, "step": 1497 }, { "epoch": 0.09, "grad_norm": 1.0570625066757202, "learning_rate": 9.89002580605423e-06, "loss": 0.5864, "step": 1498 }, { "epoch": 0.09, "grad_norm": 1.1256753206253052, "learning_rate": 9.88981170063367e-06, "loss": 0.5726, "step": 1499 }, { "epoch": 0.1, "grad_norm": 1.0458052158355713, "learning_rate": 9.88959738932015e-06, "loss": 0.5125, "step": 1500 }, { "epoch": 0.1, "grad_norm": 1.1477687358856201, "learning_rate": 9.889382872122693e-06, "loss": 0.6334, "step": 1501 }, { "epoch": 0.1, "grad_norm": 1.1043277978897095, "learning_rate": 9.889168149050334e-06, "loss": 0.5889, "step": 1502 }, { "epoch": 0.1, "grad_norm": 1.1527658700942993, "learning_rate": 9.88895322011211e-06, "loss": 0.6627, "step": 1503 }, { "epoch": 0.1, "grad_norm": 1.1285358667373657, "learning_rate": 9.888738085317075e-06, "loss": 0.632, "step": 1504 }, { "epoch": 0.1, "grad_norm": 1.0251461267471313, "learning_rate": 9.888522744674286e-06, "loss": 0.5727, "step": 1505 }, { "epoch": 0.1, "grad_norm": 1.096802830696106, "learning_rate": 9.888307198192808e-06, "loss": 0.5645, "step": 1506 }, { "epoch": 0.1, "grad_norm": 1.0713390111923218, "learning_rate": 9.888091445881723e-06, "loss": 0.5978, "step": 1507 }, { "epoch": 0.1, "grad_norm": 1.0598140954971313, "learning_rate": 9.887875487750108e-06, "loss": 0.5756, "step": 1508 }, { "epoch": 0.1, "grad_norm": 1.0787073373794556, "learning_rate": 9.887659323807062e-06, "loss": 0.5913, "step": 1509 }, { "epoch": 0.1, "grad_norm": 1.0909522771835327, "learning_rate": 9.887442954061684e-06, "loss": 0.6353, "step": 1510 }, { "epoch": 0.1, "grad_norm": 1.0643187761306763, "learning_rate": 9.887226378523085e-06, "loss": 0.578, "step": 1511 }, { "epoch": 0.1, "grad_norm": 1.0913480520248413, "learning_rate": 9.887009597200385e-06, "loss": 0.5661, "step": 1512 }, { "epoch": 0.1, "grad_norm": 1.0126315355300903, "learning_rate": 9.88679261010271e-06, "loss": 0.5045, "step": 1513 }, { "epoch": 0.1, "grad_norm": 1.0305216312408447, "learning_rate": 9.886575417239202e-06, "loss": 0.5679, "step": 1514 }, { "epoch": 0.1, "grad_norm": 1.0515673160552979, "learning_rate": 9.886358018619e-06, "loss": 0.5889, "step": 1515 }, { "epoch": 0.1, "grad_norm": 1.0206800699234009, "learning_rate": 9.886140414251259e-06, "loss": 0.5633, "step": 1516 }, { "epoch": 0.1, "grad_norm": 1.1397572755813599, "learning_rate": 9.885922604145143e-06, "loss": 0.6153, "step": 1517 }, { "epoch": 0.1, "grad_norm": 1.0395382642745972, "learning_rate": 9.885704588309825e-06, "loss": 0.5592, "step": 1518 }, { "epoch": 0.1, "grad_norm": 1.1337170600891113, "learning_rate": 9.885486366754482e-06, "loss": 0.6303, "step": 1519 }, { "epoch": 0.1, "grad_norm": 1.12993323802948, "learning_rate": 9.885267939488303e-06, "loss": 0.6014, "step": 1520 }, { "epoch": 0.1, "grad_norm": 1.0222861766815186, "learning_rate": 9.885049306520487e-06, "loss": 0.5193, "step": 1521 }, { "epoch": 0.1, "grad_norm": 1.1298507452011108, "learning_rate": 9.884830467860238e-06, "loss": 0.5602, "step": 1522 }, { "epoch": 0.1, "grad_norm": 1.1178910732269287, "learning_rate": 9.88461142351677e-06, "loss": 0.6111, "step": 1523 }, { "epoch": 0.1, "grad_norm": 0.9981340765953064, "learning_rate": 9.884392173499308e-06, "loss": 0.5449, "step": 1524 }, { "epoch": 0.1, "grad_norm": 1.1069059371948242, "learning_rate": 9.884172717817085e-06, "loss": 0.601, "step": 1525 }, { "epoch": 0.1, "grad_norm": 1.0417429208755493, "learning_rate": 9.883953056479336e-06, "loss": 0.5891, "step": 1526 }, { "epoch": 0.1, "grad_norm": 1.144789457321167, "learning_rate": 9.883733189495316e-06, "loss": 0.6119, "step": 1527 }, { "epoch": 0.1, "grad_norm": 1.0751010179519653, "learning_rate": 9.88351311687428e-06, "loss": 0.5567, "step": 1528 }, { "epoch": 0.1, "grad_norm": 1.0349299907684326, "learning_rate": 9.883292838625495e-06, "loss": 0.5613, "step": 1529 }, { "epoch": 0.1, "grad_norm": 1.0517909526824951, "learning_rate": 9.883072354758237e-06, "loss": 0.5476, "step": 1530 }, { "epoch": 0.1, "grad_norm": 1.044845461845398, "learning_rate": 9.88285166528179e-06, "loss": 0.6208, "step": 1531 }, { "epoch": 0.1, "grad_norm": 1.0600078105926514, "learning_rate": 9.882630770205444e-06, "loss": 0.5358, "step": 1532 }, { "epoch": 0.1, "grad_norm": 1.0708039999008179, "learning_rate": 9.882409669538503e-06, "loss": 0.5602, "step": 1533 }, { "epoch": 0.1, "grad_norm": 1.1167932748794556, "learning_rate": 9.882188363290273e-06, "loss": 0.5941, "step": 1534 }, { "epoch": 0.1, "grad_norm": 1.1405528783798218, "learning_rate": 9.881966851470077e-06, "loss": 0.5814, "step": 1535 }, { "epoch": 0.1, "grad_norm": 1.0730372667312622, "learning_rate": 9.881745134087239e-06, "loss": 0.5897, "step": 1536 }, { "epoch": 0.1, "grad_norm": 1.1045047044754028, "learning_rate": 9.881523211151097e-06, "loss": 0.582, "step": 1537 }, { "epoch": 0.1, "grad_norm": 1.1381570100784302, "learning_rate": 9.881301082670992e-06, "loss": 0.5851, "step": 1538 }, { "epoch": 0.1, "grad_norm": 1.1380250453948975, "learning_rate": 9.881078748656282e-06, "loss": 0.6164, "step": 1539 }, { "epoch": 0.1, "grad_norm": 1.0847206115722656, "learning_rate": 9.880856209116324e-06, "loss": 0.562, "step": 1540 }, { "epoch": 0.1, "grad_norm": 1.1043504476547241, "learning_rate": 9.880633464060492e-06, "loss": 0.5713, "step": 1541 }, { "epoch": 0.1, "grad_norm": 1.0896272659301758, "learning_rate": 9.880410513498163e-06, "loss": 0.5626, "step": 1542 }, { "epoch": 0.1, "grad_norm": 1.1659765243530273, "learning_rate": 9.880187357438722e-06, "loss": 0.5996, "step": 1543 }, { "epoch": 0.1, "grad_norm": 1.1116617918014526, "learning_rate": 9.87996399589157e-06, "loss": 0.5873, "step": 1544 }, { "epoch": 0.1, "grad_norm": 1.0397089719772339, "learning_rate": 9.87974042886611e-06, "loss": 0.5574, "step": 1545 }, { "epoch": 0.1, "grad_norm": 0.982353925704956, "learning_rate": 9.879516656371758e-06, "loss": 0.5099, "step": 1546 }, { "epoch": 0.1, "grad_norm": 1.1460829973220825, "learning_rate": 9.879292678417934e-06, "loss": 0.6181, "step": 1547 }, { "epoch": 0.1, "grad_norm": 1.0767769813537598, "learning_rate": 9.879068495014068e-06, "loss": 0.5771, "step": 1548 }, { "epoch": 0.1, "grad_norm": 1.0753417015075684, "learning_rate": 9.878844106169601e-06, "loss": 0.6469, "step": 1549 }, { "epoch": 0.1, "grad_norm": 1.1642966270446777, "learning_rate": 9.87861951189398e-06, "loss": 0.6111, "step": 1550 }, { "epoch": 0.1, "grad_norm": 1.0993573665618896, "learning_rate": 9.878394712196665e-06, "loss": 0.6171, "step": 1551 }, { "epoch": 0.1, "grad_norm": 1.0451866388320923, "learning_rate": 9.878169707087116e-06, "loss": 0.5912, "step": 1552 }, { "epoch": 0.1, "grad_norm": 0.9942476749420166, "learning_rate": 9.877944496574813e-06, "loss": 0.5278, "step": 1553 }, { "epoch": 0.1, "grad_norm": 1.0837749242782593, "learning_rate": 9.877719080669235e-06, "loss": 0.5482, "step": 1554 }, { "epoch": 0.1, "grad_norm": 1.187171220779419, "learning_rate": 9.877493459379876e-06, "loss": 0.6004, "step": 1555 }, { "epoch": 0.1, "grad_norm": 1.1294541358947754, "learning_rate": 9.877267632716235e-06, "loss": 0.6328, "step": 1556 }, { "epoch": 0.1, "grad_norm": 1.1029921770095825, "learning_rate": 9.87704160068782e-06, "loss": 0.5721, "step": 1557 }, { "epoch": 0.1, "grad_norm": 1.1500641107559204, "learning_rate": 9.87681536330415e-06, "loss": 0.5731, "step": 1558 }, { "epoch": 0.1, "grad_norm": 1.218934416770935, "learning_rate": 9.87658892057475e-06, "loss": 0.6042, "step": 1559 }, { "epoch": 0.1, "grad_norm": 1.0704762935638428, "learning_rate": 9.876362272509154e-06, "loss": 0.6435, "step": 1560 }, { "epoch": 0.1, "grad_norm": 1.109381914138794, "learning_rate": 9.876135419116908e-06, "loss": 0.5703, "step": 1561 }, { "epoch": 0.1, "grad_norm": 1.0808093547821045, "learning_rate": 9.87590836040756e-06, "loss": 0.5565, "step": 1562 }, { "epoch": 0.1, "grad_norm": 1.0995346307754517, "learning_rate": 9.875681096390676e-06, "loss": 0.5791, "step": 1563 }, { "epoch": 0.1, "grad_norm": 1.0648128986358643, "learning_rate": 9.87545362707582e-06, "loss": 0.5752, "step": 1564 }, { "epoch": 0.1, "grad_norm": 1.0962284803390503, "learning_rate": 9.875225952472574e-06, "loss": 0.5804, "step": 1565 }, { "epoch": 0.1, "grad_norm": 1.032683253288269, "learning_rate": 9.874998072590521e-06, "loss": 0.6071, "step": 1566 }, { "epoch": 0.1, "grad_norm": 1.0552096366882324, "learning_rate": 9.874769987439259e-06, "loss": 0.611, "step": 1567 }, { "epoch": 0.1, "grad_norm": 1.04661226272583, "learning_rate": 9.87454169702839e-06, "loss": 0.5924, "step": 1568 }, { "epoch": 0.1, "grad_norm": 1.108020544052124, "learning_rate": 9.87431320136753e-06, "loss": 0.5242, "step": 1569 }, { "epoch": 0.1, "grad_norm": 1.098593831062317, "learning_rate": 9.874084500466295e-06, "loss": 0.5901, "step": 1570 }, { "epoch": 0.1, "grad_norm": 1.0470693111419678, "learning_rate": 9.873855594334319e-06, "loss": 0.6177, "step": 1571 }, { "epoch": 0.1, "grad_norm": 1.0568416118621826, "learning_rate": 9.873626482981238e-06, "loss": 0.5679, "step": 1572 }, { "epoch": 0.1, "grad_norm": 1.067253589630127, "learning_rate": 9.873397166416698e-06, "loss": 0.5495, "step": 1573 }, { "epoch": 0.1, "grad_norm": 0.996239960193634, "learning_rate": 9.87316764465036e-06, "loss": 0.5381, "step": 1574 }, { "epoch": 0.1, "grad_norm": 1.1866554021835327, "learning_rate": 9.872937917691883e-06, "loss": 0.5227, "step": 1575 }, { "epoch": 0.1, "grad_norm": 1.0590612888336182, "learning_rate": 9.872707985550942e-06, "loss": 0.5189, "step": 1576 }, { "epoch": 0.1, "grad_norm": 1.1451884508132935, "learning_rate": 9.872477848237221e-06, "loss": 0.588, "step": 1577 }, { "epoch": 0.1, "grad_norm": 1.1452605724334717, "learning_rate": 9.872247505760405e-06, "loss": 0.5537, "step": 1578 }, { "epoch": 0.1, "grad_norm": 1.2145410776138306, "learning_rate": 9.872016958130197e-06, "loss": 0.5707, "step": 1579 }, { "epoch": 0.1, "grad_norm": 1.0972182750701904, "learning_rate": 9.871786205356303e-06, "loss": 0.5754, "step": 1580 }, { "epoch": 0.1, "grad_norm": 1.0583133697509766, "learning_rate": 9.871555247448442e-06, "loss": 0.6119, "step": 1581 }, { "epoch": 0.1, "grad_norm": 1.1397663354873657, "learning_rate": 9.871324084416332e-06, "loss": 0.5531, "step": 1582 }, { "epoch": 0.1, "grad_norm": 1.2657743692398071, "learning_rate": 9.871092716269714e-06, "loss": 0.5982, "step": 1583 }, { "epoch": 0.1, "grad_norm": 1.0894304513931274, "learning_rate": 9.870861143018327e-06, "loss": 0.5701, "step": 1584 }, { "epoch": 0.1, "grad_norm": 1.0850940942764282, "learning_rate": 9.87062936467192e-06, "loss": 0.5933, "step": 1585 }, { "epoch": 0.1, "grad_norm": 1.080039381980896, "learning_rate": 9.870397381240256e-06, "loss": 0.5614, "step": 1586 }, { "epoch": 0.1, "grad_norm": 1.0982593297958374, "learning_rate": 9.870165192733101e-06, "loss": 0.5794, "step": 1587 }, { "epoch": 0.1, "grad_norm": 1.0418531894683838, "learning_rate": 9.869932799160232e-06, "loss": 0.6145, "step": 1588 }, { "epoch": 0.1, "grad_norm": 1.0308923721313477, "learning_rate": 9.869700200531431e-06, "loss": 0.5992, "step": 1589 }, { "epoch": 0.1, "grad_norm": 1.0501437187194824, "learning_rate": 9.869467396856499e-06, "loss": 0.5468, "step": 1590 }, { "epoch": 0.1, "grad_norm": 1.054578423500061, "learning_rate": 9.869234388145232e-06, "loss": 0.6308, "step": 1591 }, { "epoch": 0.1, "grad_norm": 1.2149218320846558, "learning_rate": 9.869001174407444e-06, "loss": 0.5391, "step": 1592 }, { "epoch": 0.1, "grad_norm": 0.9719024300575256, "learning_rate": 9.868767755652955e-06, "loss": 0.5596, "step": 1593 }, { "epoch": 0.1, "grad_norm": 1.015842080116272, "learning_rate": 9.868534131891594e-06, "loss": 0.5588, "step": 1594 }, { "epoch": 0.1, "grad_norm": 1.0591557025909424, "learning_rate": 9.868300303133195e-06, "loss": 0.6244, "step": 1595 }, { "epoch": 0.1, "grad_norm": 1.069869875907898, "learning_rate": 9.868066269387609e-06, "loss": 0.5485, "step": 1596 }, { "epoch": 0.1, "grad_norm": 1.0887794494628906, "learning_rate": 9.867832030664685e-06, "loss": 0.5765, "step": 1597 }, { "epoch": 0.1, "grad_norm": 1.2797043323516846, "learning_rate": 9.867597586974288e-06, "loss": 0.607, "step": 1598 }, { "epoch": 0.1, "grad_norm": 1.023294448852539, "learning_rate": 9.86736293832629e-06, "loss": 0.5669, "step": 1599 }, { "epoch": 0.1, "grad_norm": 1.0748317241668701, "learning_rate": 9.86712808473057e-06, "loss": 0.6009, "step": 1600 }, { "epoch": 0.1, "grad_norm": 1.0411274433135986, "learning_rate": 9.86689302619702e-06, "loss": 0.5721, "step": 1601 }, { "epoch": 0.1, "grad_norm": 1.059205412864685, "learning_rate": 9.866657762735534e-06, "loss": 0.5968, "step": 1602 }, { "epoch": 0.1, "grad_norm": 1.054073452949524, "learning_rate": 9.866422294356019e-06, "loss": 0.6236, "step": 1603 }, { "epoch": 0.1, "grad_norm": 0.9893490076065063, "learning_rate": 9.866186621068391e-06, "loss": 0.5158, "step": 1604 }, { "epoch": 0.1, "grad_norm": 1.1270335912704468, "learning_rate": 9.865950742882574e-06, "loss": 0.548, "step": 1605 }, { "epoch": 0.1, "grad_norm": 1.09773588180542, "learning_rate": 9.865714659808497e-06, "loss": 0.5779, "step": 1606 }, { "epoch": 0.1, "grad_norm": 1.0233718156814575, "learning_rate": 9.865478371856102e-06, "loss": 0.5779, "step": 1607 }, { "epoch": 0.1, "grad_norm": 1.0285214185714722, "learning_rate": 9.86524187903534e-06, "loss": 0.6001, "step": 1608 }, { "epoch": 0.1, "grad_norm": 1.0676337480545044, "learning_rate": 9.865005181356166e-06, "loss": 0.587, "step": 1609 }, { "epoch": 0.1, "grad_norm": 1.0382795333862305, "learning_rate": 9.864768278828548e-06, "loss": 0.5448, "step": 1610 }, { "epoch": 0.1, "grad_norm": 1.0121814012527466, "learning_rate": 9.864531171462462e-06, "loss": 0.5373, "step": 1611 }, { "epoch": 0.1, "grad_norm": 1.0314677953720093, "learning_rate": 9.86429385926789e-06, "loss": 0.6135, "step": 1612 }, { "epoch": 0.1, "grad_norm": 1.1360394954681396, "learning_rate": 9.864056342254827e-06, "loss": 0.5943, "step": 1613 }, { "epoch": 0.1, "grad_norm": 1.08867609500885, "learning_rate": 9.86381862043327e-06, "loss": 0.6075, "step": 1614 }, { "epoch": 0.1, "grad_norm": 1.1060529947280884, "learning_rate": 9.863580693813232e-06, "loss": 0.5825, "step": 1615 }, { "epoch": 0.1, "grad_norm": 1.0926846265792847, "learning_rate": 9.86334256240473e-06, "loss": 0.598, "step": 1616 }, { "epoch": 0.1, "grad_norm": 1.007134199142456, "learning_rate": 9.86310422621779e-06, "loss": 0.5843, "step": 1617 }, { "epoch": 0.1, "grad_norm": 1.060012698173523, "learning_rate": 9.86286568526245e-06, "loss": 0.6095, "step": 1618 }, { "epoch": 0.1, "grad_norm": 1.0170177221298218, "learning_rate": 9.862626939548751e-06, "loss": 0.6206, "step": 1619 }, { "epoch": 0.1, "grad_norm": 1.03775155544281, "learning_rate": 9.862387989086749e-06, "loss": 0.5696, "step": 1620 }, { "epoch": 0.1, "grad_norm": 1.032655119895935, "learning_rate": 9.862148833886504e-06, "loss": 0.5922, "step": 1621 }, { "epoch": 0.1, "grad_norm": 1.0248726606369019, "learning_rate": 9.861909473958084e-06, "loss": 0.5668, "step": 1622 }, { "epoch": 0.1, "grad_norm": 1.0710545778274536, "learning_rate": 9.861669909311571e-06, "loss": 0.567, "step": 1623 }, { "epoch": 0.1, "grad_norm": 0.9834234118461609, "learning_rate": 9.861430139957052e-06, "loss": 0.5864, "step": 1624 }, { "epoch": 0.1, "grad_norm": 1.1512020826339722, "learning_rate": 9.861190165904617e-06, "loss": 0.6048, "step": 1625 }, { "epoch": 0.1, "grad_norm": 1.1149070262908936, "learning_rate": 9.860949987164379e-06, "loss": 0.5597, "step": 1626 }, { "epoch": 0.1, "grad_norm": 1.0812784433364868, "learning_rate": 9.860709603746445e-06, "loss": 0.592, "step": 1627 }, { "epoch": 0.1, "grad_norm": 1.0941449403762817, "learning_rate": 9.86046901566094e-06, "loss": 0.575, "step": 1628 }, { "epoch": 0.1, "grad_norm": 1.1726043224334717, "learning_rate": 9.860228222917992e-06, "loss": 0.5678, "step": 1629 }, { "epoch": 0.1, "grad_norm": 1.0753968954086304, "learning_rate": 9.859987225527742e-06, "loss": 0.5704, "step": 1630 }, { "epoch": 0.1, "grad_norm": 1.135108470916748, "learning_rate": 9.859746023500337e-06, "loss": 0.6198, "step": 1631 }, { "epoch": 0.1, "grad_norm": 1.0584384202957153, "learning_rate": 9.85950461684593e-06, "loss": 0.5889, "step": 1632 }, { "epoch": 0.1, "grad_norm": 1.0583887100219727, "learning_rate": 9.85926300557469e-06, "loss": 0.5879, "step": 1633 }, { "epoch": 0.1, "grad_norm": 1.0071109533309937, "learning_rate": 9.85902118969679e-06, "loss": 0.5767, "step": 1634 }, { "epoch": 0.1, "grad_norm": 1.0471796989440918, "learning_rate": 9.85877916922241e-06, "loss": 0.5655, "step": 1635 }, { "epoch": 0.1, "grad_norm": 1.1224594116210938, "learning_rate": 9.858536944161743e-06, "loss": 0.6142, "step": 1636 }, { "epoch": 0.1, "grad_norm": 1.083262324333191, "learning_rate": 9.858294514524987e-06, "loss": 0.5738, "step": 1637 }, { "epoch": 0.1, "grad_norm": 1.0365420579910278, "learning_rate": 9.858051880322347e-06, "loss": 0.5379, "step": 1638 }, { "epoch": 0.1, "grad_norm": 1.0462414026260376, "learning_rate": 9.857809041564044e-06, "loss": 0.592, "step": 1639 }, { "epoch": 0.1, "grad_norm": 1.0216164588928223, "learning_rate": 9.857565998260302e-06, "loss": 0.6077, "step": 1640 }, { "epoch": 0.1, "grad_norm": 1.0120216608047485, "learning_rate": 9.857322750421353e-06, "loss": 0.5515, "step": 1641 }, { "epoch": 0.1, "grad_norm": 1.0469900369644165, "learning_rate": 9.857079298057442e-06, "loss": 0.5897, "step": 1642 }, { "epoch": 0.1, "grad_norm": 1.0659940242767334, "learning_rate": 9.856835641178816e-06, "loss": 0.6174, "step": 1643 }, { "epoch": 0.1, "grad_norm": 1.0688961744308472, "learning_rate": 9.856591779795738e-06, "loss": 0.584, "step": 1644 }, { "epoch": 0.1, "grad_norm": 1.0651742219924927, "learning_rate": 9.856347713918475e-06, "loss": 0.6084, "step": 1645 }, { "epoch": 0.1, "grad_norm": 0.9851531386375427, "learning_rate": 9.856103443557304e-06, "loss": 0.5539, "step": 1646 }, { "epoch": 0.1, "grad_norm": 1.1691269874572754, "learning_rate": 9.85585896872251e-06, "loss": 0.5984, "step": 1647 }, { "epoch": 0.1, "grad_norm": 1.0338088274002075, "learning_rate": 9.855614289424386e-06, "loss": 0.5695, "step": 1648 }, { "epoch": 0.1, "grad_norm": 1.0682861804962158, "learning_rate": 9.855369405673236e-06, "loss": 0.5168, "step": 1649 }, { "epoch": 0.1, "grad_norm": 1.0562790632247925, "learning_rate": 9.855124317479372e-06, "loss": 0.5706, "step": 1650 }, { "epoch": 0.1, "grad_norm": 1.1324230432510376, "learning_rate": 9.854879024853113e-06, "loss": 0.5845, "step": 1651 }, { "epoch": 0.1, "grad_norm": 1.1204321384429932, "learning_rate": 9.854633527804787e-06, "loss": 0.5954, "step": 1652 }, { "epoch": 0.1, "grad_norm": 0.9424839019775391, "learning_rate": 9.85438782634473e-06, "loss": 0.5087, "step": 1653 }, { "epoch": 0.1, "grad_norm": 0.9705440998077393, "learning_rate": 9.854141920483289e-06, "loss": 0.5626, "step": 1654 }, { "epoch": 0.1, "grad_norm": 1.1535807847976685, "learning_rate": 9.853895810230818e-06, "loss": 0.5677, "step": 1655 }, { "epoch": 0.1, "grad_norm": 1.1456506252288818, "learning_rate": 9.853649495597682e-06, "loss": 0.6238, "step": 1656 }, { "epoch": 0.1, "grad_norm": 1.103397011756897, "learning_rate": 9.853402976594248e-06, "loss": 0.6069, "step": 1657 }, { "epoch": 0.11, "grad_norm": 1.0972193479537964, "learning_rate": 9.8531562532309e-06, "loss": 0.5484, "step": 1658 }, { "epoch": 0.11, "grad_norm": 1.0873202085494995, "learning_rate": 9.852909325518022e-06, "loss": 0.6027, "step": 1659 }, { "epoch": 0.11, "grad_norm": 1.017956256866455, "learning_rate": 9.852662193466019e-06, "loss": 0.5701, "step": 1660 }, { "epoch": 0.11, "grad_norm": 1.097472906112671, "learning_rate": 9.852414857085288e-06, "loss": 0.6403, "step": 1661 }, { "epoch": 0.11, "grad_norm": 1.1169770956039429, "learning_rate": 9.85216731638625e-06, "loss": 0.5763, "step": 1662 }, { "epoch": 0.11, "grad_norm": 1.0574263334274292, "learning_rate": 9.851919571379326e-06, "loss": 0.6297, "step": 1663 }, { "epoch": 0.11, "grad_norm": 1.0826281309127808, "learning_rate": 9.851671622074947e-06, "loss": 0.5682, "step": 1664 }, { "epoch": 0.11, "grad_norm": 1.1474690437316895, "learning_rate": 9.851423468483554e-06, "loss": 0.6094, "step": 1665 }, { "epoch": 0.11, "grad_norm": 1.1534571647644043, "learning_rate": 9.851175110615594e-06, "loss": 0.5387, "step": 1666 }, { "epoch": 0.11, "grad_norm": 1.0650722980499268, "learning_rate": 9.850926548481528e-06, "loss": 0.5899, "step": 1667 }, { "epoch": 0.11, "grad_norm": 1.1716303825378418, "learning_rate": 9.850677782091818e-06, "loss": 0.5806, "step": 1668 }, { "epoch": 0.11, "grad_norm": 1.033402442932129, "learning_rate": 9.850428811456943e-06, "loss": 0.5319, "step": 1669 }, { "epoch": 0.11, "grad_norm": 1.113662600517273, "learning_rate": 9.850179636587383e-06, "loss": 0.5838, "step": 1670 }, { "epoch": 0.11, "grad_norm": 1.1086348295211792, "learning_rate": 9.849930257493632e-06, "loss": 0.5578, "step": 1671 }, { "epoch": 0.11, "grad_norm": 1.1946237087249756, "learning_rate": 9.849680674186188e-06, "loss": 0.6278, "step": 1672 }, { "epoch": 0.11, "grad_norm": 1.0539735555648804, "learning_rate": 9.849430886675564e-06, "loss": 0.5906, "step": 1673 }, { "epoch": 0.11, "grad_norm": 1.0256834030151367, "learning_rate": 9.849180894972272e-06, "loss": 0.5803, "step": 1674 }, { "epoch": 0.11, "grad_norm": 0.9212612509727478, "learning_rate": 9.848930699086846e-06, "loss": 0.577, "step": 1675 }, { "epoch": 0.11, "grad_norm": 1.0151751041412354, "learning_rate": 9.848680299029813e-06, "loss": 0.5676, "step": 1676 }, { "epoch": 0.11, "grad_norm": 1.0259836912155151, "learning_rate": 9.848429694811721e-06, "loss": 0.528, "step": 1677 }, { "epoch": 0.11, "grad_norm": 1.1119564771652222, "learning_rate": 9.84817888644312e-06, "loss": 0.5951, "step": 1678 }, { "epoch": 0.11, "grad_norm": 1.052847981452942, "learning_rate": 9.847927873934573e-06, "loss": 0.5732, "step": 1679 }, { "epoch": 0.11, "grad_norm": 1.1761375665664673, "learning_rate": 9.847676657296647e-06, "loss": 0.5885, "step": 1680 }, { "epoch": 0.11, "grad_norm": 1.073297142982483, "learning_rate": 9.847425236539922e-06, "loss": 0.5852, "step": 1681 }, { "epoch": 0.11, "grad_norm": 1.0152772665023804, "learning_rate": 9.847173611674982e-06, "loss": 0.5493, "step": 1682 }, { "epoch": 0.11, "grad_norm": 1.1353917121887207, "learning_rate": 9.846921782712424e-06, "loss": 0.5567, "step": 1683 }, { "epoch": 0.11, "grad_norm": 1.0314478874206543, "learning_rate": 9.846669749662851e-06, "loss": 0.5533, "step": 1684 }, { "epoch": 0.11, "grad_norm": 1.0658608675003052, "learning_rate": 9.846417512536874e-06, "loss": 0.5625, "step": 1685 }, { "epoch": 0.11, "grad_norm": 1.116602897644043, "learning_rate": 9.846165071345118e-06, "loss": 0.6012, "step": 1686 }, { "epoch": 0.11, "grad_norm": 1.1800258159637451, "learning_rate": 9.845912426098206e-06, "loss": 0.6041, "step": 1687 }, { "epoch": 0.11, "grad_norm": 1.0146981477737427, "learning_rate": 9.845659576806781e-06, "loss": 0.5607, "step": 1688 }, { "epoch": 0.11, "grad_norm": 1.0185143947601318, "learning_rate": 9.845406523481488e-06, "loss": 0.5872, "step": 1689 }, { "epoch": 0.11, "grad_norm": 1.0636152029037476, "learning_rate": 9.845153266132981e-06, "loss": 0.54, "step": 1690 }, { "epoch": 0.11, "grad_norm": 1.0499494075775146, "learning_rate": 9.844899804771927e-06, "loss": 0.5694, "step": 1691 }, { "epoch": 0.11, "grad_norm": 1.1946227550506592, "learning_rate": 9.844646139408995e-06, "loss": 0.593, "step": 1692 }, { "epoch": 0.11, "grad_norm": 0.9941321015357971, "learning_rate": 9.844392270054868e-06, "loss": 0.5635, "step": 1693 }, { "epoch": 0.11, "grad_norm": 1.088547706604004, "learning_rate": 9.844138196720236e-06, "loss": 0.615, "step": 1694 }, { "epoch": 0.11, "grad_norm": 1.0187320709228516, "learning_rate": 9.843883919415795e-06, "loss": 0.6168, "step": 1695 }, { "epoch": 0.11, "grad_norm": 1.0746089220046997, "learning_rate": 9.843629438152252e-06, "loss": 0.5816, "step": 1696 }, { "epoch": 0.11, "grad_norm": 1.0524471998214722, "learning_rate": 9.843374752940323e-06, "loss": 0.5683, "step": 1697 }, { "epoch": 0.11, "grad_norm": 1.0667989253997803, "learning_rate": 9.843119863790733e-06, "loss": 0.6443, "step": 1698 }, { "epoch": 0.11, "grad_norm": 1.1471716165542603, "learning_rate": 9.842864770714213e-06, "loss": 0.5554, "step": 1699 }, { "epoch": 0.11, "grad_norm": 1.115005612373352, "learning_rate": 9.842609473721505e-06, "loss": 0.5617, "step": 1700 }, { "epoch": 0.11, "grad_norm": 1.0730042457580566, "learning_rate": 9.842353972823358e-06, "loss": 0.5653, "step": 1701 }, { "epoch": 0.11, "grad_norm": 1.0105443000793457, "learning_rate": 9.842098268030532e-06, "loss": 0.5291, "step": 1702 }, { "epoch": 0.11, "grad_norm": 1.1139228343963623, "learning_rate": 9.84184235935379e-06, "loss": 0.5783, "step": 1703 }, { "epoch": 0.11, "grad_norm": 1.0208879709243774, "learning_rate": 9.84158624680391e-06, "loss": 0.5841, "step": 1704 }, { "epoch": 0.11, "grad_norm": 1.0257160663604736, "learning_rate": 9.841329930391678e-06, "loss": 0.5669, "step": 1705 }, { "epoch": 0.11, "grad_norm": 1.3174355030059814, "learning_rate": 9.841073410127884e-06, "loss": 0.6071, "step": 1706 }, { "epoch": 0.11, "grad_norm": 1.1715240478515625, "learning_rate": 9.840816686023329e-06, "loss": 0.5801, "step": 1707 }, { "epoch": 0.11, "grad_norm": 0.9790030121803284, "learning_rate": 9.840559758088821e-06, "loss": 0.5325, "step": 1708 }, { "epoch": 0.11, "grad_norm": 1.0704594850540161, "learning_rate": 9.840302626335182e-06, "loss": 0.5574, "step": 1709 }, { "epoch": 0.11, "grad_norm": 1.012350082397461, "learning_rate": 9.84004529077324e-06, "loss": 0.5779, "step": 1710 }, { "epoch": 0.11, "grad_norm": 0.9783762693405151, "learning_rate": 9.839787751413825e-06, "loss": 0.5429, "step": 1711 }, { "epoch": 0.11, "grad_norm": 1.0305521488189697, "learning_rate": 9.839530008267785e-06, "loss": 0.5534, "step": 1712 }, { "epoch": 0.11, "grad_norm": 1.0552220344543457, "learning_rate": 9.839272061345974e-06, "loss": 0.5872, "step": 1713 }, { "epoch": 0.11, "grad_norm": 1.0879662036895752, "learning_rate": 9.839013910659249e-06, "loss": 0.5924, "step": 1714 }, { "epoch": 0.11, "grad_norm": 1.1004040241241455, "learning_rate": 9.838755556218483e-06, "loss": 0.5854, "step": 1715 }, { "epoch": 0.11, "grad_norm": 1.0836282968521118, "learning_rate": 9.838496998034552e-06, "loss": 0.6243, "step": 1716 }, { "epoch": 0.11, "grad_norm": 1.1114145517349243, "learning_rate": 9.838238236118344e-06, "loss": 0.5672, "step": 1717 }, { "epoch": 0.11, "grad_norm": 1.0886434316635132, "learning_rate": 9.837979270480758e-06, "loss": 0.5725, "step": 1718 }, { "epoch": 0.11, "grad_norm": 0.9595142602920532, "learning_rate": 9.837720101132692e-06, "loss": 0.6296, "step": 1719 }, { "epoch": 0.11, "grad_norm": 1.0831856727600098, "learning_rate": 9.837460728085062e-06, "loss": 0.5772, "step": 1720 }, { "epoch": 0.11, "grad_norm": 1.0812630653381348, "learning_rate": 9.83720115134879e-06, "loss": 0.639, "step": 1721 }, { "epoch": 0.11, "grad_norm": 0.9859281778335571, "learning_rate": 9.836941370934806e-06, "loss": 0.556, "step": 1722 }, { "epoch": 0.11, "grad_norm": 1.047037959098816, "learning_rate": 9.836681386854045e-06, "loss": 0.5702, "step": 1723 }, { "epoch": 0.11, "grad_norm": 1.0959748029708862, "learning_rate": 9.836421199117456e-06, "loss": 0.5845, "step": 1724 }, { "epoch": 0.11, "grad_norm": 1.0794752836227417, "learning_rate": 9.836160807735997e-06, "loss": 0.6148, "step": 1725 }, { "epoch": 0.11, "grad_norm": 1.0430015325546265, "learning_rate": 9.83590021272063e-06, "loss": 0.559, "step": 1726 }, { "epoch": 0.11, "grad_norm": 1.1524336338043213, "learning_rate": 9.835639414082327e-06, "loss": 0.583, "step": 1727 }, { "epoch": 0.11, "grad_norm": 1.0416326522827148, "learning_rate": 9.83537841183207e-06, "loss": 0.5753, "step": 1728 }, { "epoch": 0.11, "grad_norm": 1.080689549446106, "learning_rate": 9.83511720598085e-06, "loss": 0.5485, "step": 1729 }, { "epoch": 0.11, "grad_norm": 1.15471351146698, "learning_rate": 9.834855796539665e-06, "loss": 0.5558, "step": 1730 }, { "epoch": 0.11, "grad_norm": 1.0688234567642212, "learning_rate": 9.834594183519521e-06, "loss": 0.5585, "step": 1731 }, { "epoch": 0.11, "grad_norm": 1.0754145383834839, "learning_rate": 9.834332366931435e-06, "loss": 0.5745, "step": 1732 }, { "epoch": 0.11, "grad_norm": 1.0678609609603882, "learning_rate": 9.834070346786428e-06, "loss": 0.5691, "step": 1733 }, { "epoch": 0.11, "grad_norm": 1.062833309173584, "learning_rate": 9.833808123095538e-06, "loss": 0.5819, "step": 1734 }, { "epoch": 0.11, "grad_norm": 1.1182148456573486, "learning_rate": 9.833545695869802e-06, "loss": 0.6185, "step": 1735 }, { "epoch": 0.11, "grad_norm": 1.1162580251693726, "learning_rate": 9.833283065120272e-06, "loss": 0.5718, "step": 1736 }, { "epoch": 0.11, "grad_norm": 1.0736720561981201, "learning_rate": 9.833020230858005e-06, "loss": 0.6132, "step": 1737 }, { "epoch": 0.11, "grad_norm": 1.0325653553009033, "learning_rate": 9.832757193094072e-06, "loss": 0.5879, "step": 1738 }, { "epoch": 0.11, "grad_norm": 0.9937186241149902, "learning_rate": 9.832493951839541e-06, "loss": 0.5699, "step": 1739 }, { "epoch": 0.11, "grad_norm": 1.101816177368164, "learning_rate": 9.832230507105504e-06, "loss": 0.5643, "step": 1740 }, { "epoch": 0.11, "grad_norm": 1.0643540620803833, "learning_rate": 9.831966858903049e-06, "loss": 0.591, "step": 1741 }, { "epoch": 0.11, "grad_norm": 0.9806624054908752, "learning_rate": 9.83170300724328e-06, "loss": 0.5871, "step": 1742 }, { "epoch": 0.11, "grad_norm": 1.0082716941833496, "learning_rate": 9.831438952137304e-06, "loss": 0.5493, "step": 1743 }, { "epoch": 0.11, "grad_norm": 1.0105382204055786, "learning_rate": 9.831174693596241e-06, "loss": 0.5441, "step": 1744 }, { "epoch": 0.11, "grad_norm": 1.1338965892791748, "learning_rate": 9.83091023163122e-06, "loss": 0.6051, "step": 1745 }, { "epoch": 0.11, "grad_norm": 1.0223342180252075, "learning_rate": 9.830645566253374e-06, "loss": 0.5614, "step": 1746 }, { "epoch": 0.11, "grad_norm": 1.089674711227417, "learning_rate": 9.830380697473848e-06, "loss": 0.5997, "step": 1747 }, { "epoch": 0.11, "grad_norm": 1.0419564247131348, "learning_rate": 9.830115625303793e-06, "loss": 0.6263, "step": 1748 }, { "epoch": 0.11, "grad_norm": 1.0378230810165405, "learning_rate": 9.829850349754373e-06, "loss": 0.5786, "step": 1749 }, { "epoch": 0.11, "grad_norm": 1.0939370393753052, "learning_rate": 9.829584870836756e-06, "loss": 0.6189, "step": 1750 }, { "epoch": 0.11, "grad_norm": 1.079451084136963, "learning_rate": 9.82931918856212e-06, "loss": 0.5546, "step": 1751 }, { "epoch": 0.11, "grad_norm": 1.10829496383667, "learning_rate": 9.829053302941656e-06, "loss": 0.5974, "step": 1752 }, { "epoch": 0.11, "grad_norm": 1.0517836809158325, "learning_rate": 9.828787213986554e-06, "loss": 0.6085, "step": 1753 }, { "epoch": 0.11, "grad_norm": 1.0262974500656128, "learning_rate": 9.82852092170802e-06, "loss": 0.5534, "step": 1754 }, { "epoch": 0.11, "grad_norm": 1.0490713119506836, "learning_rate": 9.82825442611727e-06, "loss": 0.5534, "step": 1755 }, { "epoch": 0.11, "grad_norm": 1.1421825885772705, "learning_rate": 9.82798772722552e-06, "loss": 0.6172, "step": 1756 }, { "epoch": 0.11, "grad_norm": 1.0918395519256592, "learning_rate": 9.827720825044003e-06, "loss": 0.5853, "step": 1757 }, { "epoch": 0.11, "grad_norm": 1.0919643640518188, "learning_rate": 9.827453719583957e-06, "loss": 0.5493, "step": 1758 }, { "epoch": 0.11, "grad_norm": 1.004308819770813, "learning_rate": 9.827186410856627e-06, "loss": 0.5275, "step": 1759 }, { "epoch": 0.11, "grad_norm": 1.0599530935287476, "learning_rate": 9.82691889887327e-06, "loss": 0.5225, "step": 1760 }, { "epoch": 0.11, "grad_norm": 1.0359498262405396, "learning_rate": 9.82665118364515e-06, "loss": 0.5409, "step": 1761 }, { "epoch": 0.11, "grad_norm": 1.1455105543136597, "learning_rate": 9.82638326518354e-06, "loss": 0.5428, "step": 1762 }, { "epoch": 0.11, "grad_norm": 1.1107579469680786, "learning_rate": 9.826115143499721e-06, "loss": 0.6133, "step": 1763 }, { "epoch": 0.11, "grad_norm": 1.0757449865341187, "learning_rate": 9.82584681860498e-06, "loss": 0.5667, "step": 1764 }, { "epoch": 0.11, "grad_norm": 1.064499855041504, "learning_rate": 9.82557829051062e-06, "loss": 0.5712, "step": 1765 }, { "epoch": 0.11, "grad_norm": 1.087536334991455, "learning_rate": 9.825309559227944e-06, "loss": 0.6336, "step": 1766 }, { "epoch": 0.11, "grad_norm": 1.1417089700698853, "learning_rate": 9.825040624768267e-06, "loss": 0.5945, "step": 1767 }, { "epoch": 0.11, "grad_norm": 1.0964943170547485, "learning_rate": 9.824771487142917e-06, "loss": 0.6186, "step": 1768 }, { "epoch": 0.11, "grad_norm": 1.009015440940857, "learning_rate": 9.824502146363222e-06, "loss": 0.5442, "step": 1769 }, { "epoch": 0.11, "grad_norm": 1.0028083324432373, "learning_rate": 9.824232602440524e-06, "loss": 0.5788, "step": 1770 }, { "epoch": 0.11, "grad_norm": 0.9938005805015564, "learning_rate": 9.823962855386175e-06, "loss": 0.597, "step": 1771 }, { "epoch": 0.11, "grad_norm": 1.0942137241363525, "learning_rate": 9.823692905211533e-06, "loss": 0.5339, "step": 1772 }, { "epoch": 0.11, "grad_norm": 1.0058943033218384, "learning_rate": 9.823422751927961e-06, "loss": 0.5573, "step": 1773 }, { "epoch": 0.11, "grad_norm": 1.0122864246368408, "learning_rate": 9.823152395546836e-06, "loss": 0.5873, "step": 1774 }, { "epoch": 0.11, "grad_norm": 1.0176011323928833, "learning_rate": 9.822881836079543e-06, "loss": 0.5539, "step": 1775 }, { "epoch": 0.11, "grad_norm": 1.0070388317108154, "learning_rate": 9.822611073537474e-06, "loss": 0.5853, "step": 1776 }, { "epoch": 0.11, "grad_norm": 1.0597985982894897, "learning_rate": 9.822340107932028e-06, "loss": 0.5133, "step": 1777 }, { "epoch": 0.11, "grad_norm": 1.1082496643066406, "learning_rate": 9.822068939274616e-06, "loss": 0.5884, "step": 1778 }, { "epoch": 0.11, "grad_norm": 1.0341330766677856, "learning_rate": 9.821797567576656e-06, "loss": 0.5426, "step": 1779 }, { "epoch": 0.11, "grad_norm": 1.0901429653167725, "learning_rate": 9.821525992849575e-06, "loss": 0.6166, "step": 1780 }, { "epoch": 0.11, "grad_norm": 1.1223825216293335, "learning_rate": 9.821254215104808e-06, "loss": 0.6138, "step": 1781 }, { "epoch": 0.11, "grad_norm": 1.0060911178588867, "learning_rate": 9.820982234353795e-06, "loss": 0.5476, "step": 1782 }, { "epoch": 0.11, "grad_norm": 1.1112552881240845, "learning_rate": 9.820710050607994e-06, "loss": 0.5866, "step": 1783 }, { "epoch": 0.11, "grad_norm": 1.0314642190933228, "learning_rate": 9.820437663878862e-06, "loss": 0.5702, "step": 1784 }, { "epoch": 0.11, "grad_norm": 1.0946764945983887, "learning_rate": 9.820165074177867e-06, "loss": 0.6082, "step": 1785 }, { "epoch": 0.11, "grad_norm": 1.0887361764907837, "learning_rate": 9.819892281516491e-06, "loss": 0.6036, "step": 1786 }, { "epoch": 0.11, "grad_norm": 1.141646146774292, "learning_rate": 9.819619285906217e-06, "loss": 0.5512, "step": 1787 }, { "epoch": 0.11, "grad_norm": 1.1829429864883423, "learning_rate": 9.819346087358542e-06, "loss": 0.6157, "step": 1788 }, { "epoch": 0.11, "grad_norm": 1.1907756328582764, "learning_rate": 9.819072685884969e-06, "loss": 0.5512, "step": 1789 }, { "epoch": 0.11, "grad_norm": 1.1075764894485474, "learning_rate": 9.818799081497008e-06, "loss": 0.6017, "step": 1790 }, { "epoch": 0.11, "grad_norm": 1.0502817630767822, "learning_rate": 9.818525274206184e-06, "loss": 0.5918, "step": 1791 }, { "epoch": 0.11, "grad_norm": 1.004138469696045, "learning_rate": 9.818251264024018e-06, "loss": 0.6126, "step": 1792 }, { "epoch": 0.11, "grad_norm": 1.0952894687652588, "learning_rate": 9.817977050962058e-06, "loss": 0.5419, "step": 1793 }, { "epoch": 0.11, "grad_norm": 1.0882782936096191, "learning_rate": 9.817702635031842e-06, "loss": 0.591, "step": 1794 }, { "epoch": 0.11, "grad_norm": 1.0567734241485596, "learning_rate": 9.817428016244928e-06, "loss": 0.5676, "step": 1795 }, { "epoch": 0.11, "grad_norm": 1.0806546211242676, "learning_rate": 9.81715319461288e-06, "loss": 0.6661, "step": 1796 }, { "epoch": 0.11, "grad_norm": 0.9864494204521179, "learning_rate": 9.816878170147268e-06, "loss": 0.6047, "step": 1797 }, { "epoch": 0.11, "grad_norm": 1.1121294498443604, "learning_rate": 9.816602942859672e-06, "loss": 0.6089, "step": 1798 }, { "epoch": 0.11, "grad_norm": 1.1498067378997803, "learning_rate": 9.816327512761683e-06, "loss": 0.5679, "step": 1799 }, { "epoch": 0.11, "grad_norm": 1.135475993156433, "learning_rate": 9.816051879864896e-06, "loss": 0.5329, "step": 1800 }, { "epoch": 0.11, "grad_norm": 1.0519531965255737, "learning_rate": 9.81577604418092e-06, "loss": 0.5243, "step": 1801 }, { "epoch": 0.11, "grad_norm": 1.032646894454956, "learning_rate": 9.815500005721365e-06, "loss": 0.575, "step": 1802 }, { "epoch": 0.11, "grad_norm": 1.1332684755325317, "learning_rate": 9.815223764497859e-06, "loss": 0.5767, "step": 1803 }, { "epoch": 0.11, "grad_norm": 0.9929540753364563, "learning_rate": 9.814947320522031e-06, "loss": 0.559, "step": 1804 }, { "epoch": 0.11, "grad_norm": 1.1531939506530762, "learning_rate": 9.81467067380552e-06, "loss": 0.5943, "step": 1805 }, { "epoch": 0.11, "grad_norm": 1.0956352949142456, "learning_rate": 9.814393824359975e-06, "loss": 0.5878, "step": 1806 }, { "epoch": 0.11, "grad_norm": 0.9945195913314819, "learning_rate": 9.814116772197058e-06, "loss": 0.5425, "step": 1807 }, { "epoch": 0.11, "grad_norm": 1.1511505842208862, "learning_rate": 9.813839517328428e-06, "loss": 0.5698, "step": 1808 }, { "epoch": 0.11, "grad_norm": 1.056097388267517, "learning_rate": 9.813562059765762e-06, "loss": 0.5904, "step": 1809 }, { "epoch": 0.11, "grad_norm": 1.0813708305358887, "learning_rate": 9.813284399520744e-06, "loss": 0.5949, "step": 1810 }, { "epoch": 0.11, "grad_norm": 1.0767086744308472, "learning_rate": 9.813006536605063e-06, "loss": 0.584, "step": 1811 }, { "epoch": 0.11, "grad_norm": 1.1276978254318237, "learning_rate": 9.812728471030421e-06, "loss": 0.606, "step": 1812 }, { "epoch": 0.11, "grad_norm": 1.0942857265472412, "learning_rate": 9.812450202808525e-06, "loss": 0.5775, "step": 1813 }, { "epoch": 0.11, "grad_norm": 0.9871548414230347, "learning_rate": 9.812171731951092e-06, "loss": 0.5556, "step": 1814 }, { "epoch": 0.11, "grad_norm": 1.0339603424072266, "learning_rate": 9.811893058469848e-06, "loss": 0.5699, "step": 1815 }, { "epoch": 0.12, "grad_norm": 1.0050936937332153, "learning_rate": 9.811614182376527e-06, "loss": 0.6113, "step": 1816 }, { "epoch": 0.12, "grad_norm": 1.0542008876800537, "learning_rate": 9.811335103682872e-06, "loss": 0.5385, "step": 1817 }, { "epoch": 0.12, "grad_norm": 0.9927748441696167, "learning_rate": 9.81105582240063e-06, "loss": 0.597, "step": 1818 }, { "epoch": 0.12, "grad_norm": 1.0616050958633423, "learning_rate": 9.810776338541566e-06, "loss": 0.6027, "step": 1819 }, { "epoch": 0.12, "grad_norm": 1.0716493129730225, "learning_rate": 9.810496652117445e-06, "loss": 0.5842, "step": 1820 }, { "epoch": 0.12, "grad_norm": 0.9858418107032776, "learning_rate": 9.810216763140046e-06, "loss": 0.5235, "step": 1821 }, { "epoch": 0.12, "grad_norm": 1.0361227989196777, "learning_rate": 9.809936671621151e-06, "loss": 0.5259, "step": 1822 }, { "epoch": 0.12, "grad_norm": 1.0617575645446777, "learning_rate": 9.809656377572556e-06, "loss": 0.6021, "step": 1823 }, { "epoch": 0.12, "grad_norm": 1.0023410320281982, "learning_rate": 9.809375881006063e-06, "loss": 0.5129, "step": 1824 }, { "epoch": 0.12, "grad_norm": 0.9909228682518005, "learning_rate": 9.809095181933482e-06, "loss": 0.5426, "step": 1825 }, { "epoch": 0.12, "grad_norm": 1.1261311769485474, "learning_rate": 9.808814280366632e-06, "loss": 0.6176, "step": 1826 }, { "epoch": 0.12, "grad_norm": 1.1135318279266357, "learning_rate": 9.808533176317341e-06, "loss": 0.6037, "step": 1827 }, { "epoch": 0.12, "grad_norm": 1.1510497331619263, "learning_rate": 9.808251869797445e-06, "loss": 0.6103, "step": 1828 }, { "epoch": 0.12, "grad_norm": 1.0855205059051514, "learning_rate": 9.807970360818791e-06, "loss": 0.5569, "step": 1829 }, { "epoch": 0.12, "grad_norm": 1.1300290822982788, "learning_rate": 9.80768864939323e-06, "loss": 0.6362, "step": 1830 }, { "epoch": 0.12, "grad_norm": 1.05135178565979, "learning_rate": 9.807406735532625e-06, "loss": 0.5439, "step": 1831 }, { "epoch": 0.12, "grad_norm": 1.057826042175293, "learning_rate": 9.807124619248847e-06, "loss": 0.6045, "step": 1832 }, { "epoch": 0.12, "grad_norm": 1.2218470573425293, "learning_rate": 9.806842300553772e-06, "loss": 0.5813, "step": 1833 }, { "epoch": 0.12, "grad_norm": 1.0777051448822021, "learning_rate": 9.806559779459291e-06, "loss": 0.6229, "step": 1834 }, { "epoch": 0.12, "grad_norm": 0.9983317255973816, "learning_rate": 9.806277055977299e-06, "loss": 0.5392, "step": 1835 }, { "epoch": 0.12, "grad_norm": 1.1468912363052368, "learning_rate": 9.8059941301197e-06, "loss": 0.5798, "step": 1836 }, { "epoch": 0.12, "grad_norm": 1.0529228448867798, "learning_rate": 9.805711001898406e-06, "loss": 0.5773, "step": 1837 }, { "epoch": 0.12, "grad_norm": 1.017897605895996, "learning_rate": 9.805427671325339e-06, "loss": 0.5667, "step": 1838 }, { "epoch": 0.12, "grad_norm": 1.046511173248291, "learning_rate": 9.80514413841243e-06, "loss": 0.5286, "step": 1839 }, { "epoch": 0.12, "grad_norm": 1.2047935724258423, "learning_rate": 9.804860403171617e-06, "loss": 0.6145, "step": 1840 }, { "epoch": 0.12, "grad_norm": 1.1296573877334595, "learning_rate": 9.804576465614848e-06, "loss": 0.5614, "step": 1841 }, { "epoch": 0.12, "grad_norm": 1.065781593322754, "learning_rate": 9.804292325754079e-06, "loss": 0.6024, "step": 1842 }, { "epoch": 0.12, "grad_norm": 1.0602785348892212, "learning_rate": 9.804007983601271e-06, "loss": 0.5905, "step": 1843 }, { "epoch": 0.12, "grad_norm": 1.0478906631469727, "learning_rate": 9.8037234391684e-06, "loss": 0.5914, "step": 1844 }, { "epoch": 0.12, "grad_norm": 1.0980864763259888, "learning_rate": 9.803438692467446e-06, "loss": 0.5525, "step": 1845 }, { "epoch": 0.12, "grad_norm": 1.053962230682373, "learning_rate": 9.8031537435104e-06, "loss": 0.5454, "step": 1846 }, { "epoch": 0.12, "grad_norm": 1.0422577857971191, "learning_rate": 9.802868592309255e-06, "loss": 0.5807, "step": 1847 }, { "epoch": 0.12, "grad_norm": 1.1173555850982666, "learning_rate": 9.802583238876024e-06, "loss": 0.5915, "step": 1848 }, { "epoch": 0.12, "grad_norm": 1.0564782619476318, "learning_rate": 9.80229768322272e-06, "loss": 0.5566, "step": 1849 }, { "epoch": 0.12, "grad_norm": 1.0177700519561768, "learning_rate": 9.802011925361366e-06, "loss": 0.5884, "step": 1850 }, { "epoch": 0.12, "grad_norm": 1.0435445308685303, "learning_rate": 9.801725965303995e-06, "loss": 0.5258, "step": 1851 }, { "epoch": 0.12, "grad_norm": 1.1057952642440796, "learning_rate": 9.801439803062646e-06, "loss": 0.5988, "step": 1852 }, { "epoch": 0.12, "grad_norm": 0.9988928437232971, "learning_rate": 9.801153438649371e-06, "loss": 0.5789, "step": 1853 }, { "epoch": 0.12, "grad_norm": 1.096474051475525, "learning_rate": 9.800866872076227e-06, "loss": 0.578, "step": 1854 }, { "epoch": 0.12, "grad_norm": 1.0434516668319702, "learning_rate": 9.80058010335528e-06, "loss": 0.5812, "step": 1855 }, { "epoch": 0.12, "grad_norm": 1.1052302122116089, "learning_rate": 9.800293132498603e-06, "loss": 0.5821, "step": 1856 }, { "epoch": 0.12, "grad_norm": 1.084574580192566, "learning_rate": 9.800005959518284e-06, "loss": 0.5597, "step": 1857 }, { "epoch": 0.12, "grad_norm": 1.1198168992996216, "learning_rate": 9.79971858442641e-06, "loss": 0.6072, "step": 1858 }, { "epoch": 0.12, "grad_norm": 1.142138957977295, "learning_rate": 9.799431007235086e-06, "loss": 0.5874, "step": 1859 }, { "epoch": 0.12, "grad_norm": 1.0381388664245605, "learning_rate": 9.799143227956416e-06, "loss": 0.6147, "step": 1860 }, { "epoch": 0.12, "grad_norm": 1.1210684776306152, "learning_rate": 9.798855246602522e-06, "loss": 0.6059, "step": 1861 }, { "epoch": 0.12, "grad_norm": 1.0363600254058838, "learning_rate": 9.798567063185525e-06, "loss": 0.549, "step": 1862 }, { "epoch": 0.12, "grad_norm": 1.0627700090408325, "learning_rate": 9.798278677717562e-06, "loss": 0.5713, "step": 1863 }, { "epoch": 0.12, "grad_norm": 1.0229395627975464, "learning_rate": 9.797990090210777e-06, "loss": 0.5821, "step": 1864 }, { "epoch": 0.12, "grad_norm": 1.0014104843139648, "learning_rate": 9.79770130067732e-06, "loss": 0.6146, "step": 1865 }, { "epoch": 0.12, "grad_norm": 0.9721370339393616, "learning_rate": 9.797412309129351e-06, "loss": 0.5623, "step": 1866 }, { "epoch": 0.12, "grad_norm": 1.0202564001083374, "learning_rate": 9.79712311557904e-06, "loss": 0.5536, "step": 1867 }, { "epoch": 0.12, "grad_norm": 1.028006911277771, "learning_rate": 9.79683372003856e-06, "loss": 0.5573, "step": 1868 }, { "epoch": 0.12, "grad_norm": 1.0714778900146484, "learning_rate": 9.796544122520101e-06, "loss": 0.5889, "step": 1869 }, { "epoch": 0.12, "grad_norm": 1.029428482055664, "learning_rate": 9.796254323035854e-06, "loss": 0.5298, "step": 1870 }, { "epoch": 0.12, "grad_norm": 1.0187203884124756, "learning_rate": 9.795964321598023e-06, "loss": 0.5737, "step": 1871 }, { "epoch": 0.12, "grad_norm": 0.9776306748390198, "learning_rate": 9.795674118218819e-06, "loss": 0.4964, "step": 1872 }, { "epoch": 0.12, "grad_norm": 1.0021920204162598, "learning_rate": 9.795383712910458e-06, "loss": 0.5602, "step": 1873 }, { "epoch": 0.12, "grad_norm": 1.052433729171753, "learning_rate": 9.795093105685175e-06, "loss": 0.5904, "step": 1874 }, { "epoch": 0.12, "grad_norm": 1.0357136726379395, "learning_rate": 9.794802296555198e-06, "loss": 0.5716, "step": 1875 }, { "epoch": 0.12, "grad_norm": 0.9910592436790466, "learning_rate": 9.79451128553278e-06, "loss": 0.5199, "step": 1876 }, { "epoch": 0.12, "grad_norm": 1.1537259817123413, "learning_rate": 9.794220072630168e-06, "loss": 0.6142, "step": 1877 }, { "epoch": 0.12, "grad_norm": 1.2031731605529785, "learning_rate": 9.793928657859627e-06, "loss": 0.6193, "step": 1878 }, { "epoch": 0.12, "grad_norm": 1.170706868171692, "learning_rate": 9.793637041233428e-06, "loss": 0.6113, "step": 1879 }, { "epoch": 0.12, "grad_norm": 1.0904954671859741, "learning_rate": 9.793345222763847e-06, "loss": 0.5832, "step": 1880 }, { "epoch": 0.12, "grad_norm": 1.0491715669631958, "learning_rate": 9.793053202463176e-06, "loss": 0.5481, "step": 1881 }, { "epoch": 0.12, "grad_norm": 1.0722602605819702, "learning_rate": 9.792760980343708e-06, "loss": 0.5864, "step": 1882 }, { "epoch": 0.12, "grad_norm": 1.1385523080825806, "learning_rate": 9.792468556417746e-06, "loss": 0.5359, "step": 1883 }, { "epoch": 0.12, "grad_norm": 1.0855581760406494, "learning_rate": 9.792175930697608e-06, "loss": 0.6192, "step": 1884 }, { "epoch": 0.12, "grad_norm": 1.002265453338623, "learning_rate": 9.79188310319561e-06, "loss": 0.5793, "step": 1885 }, { "epoch": 0.12, "grad_norm": 1.0687613487243652, "learning_rate": 9.791590073924086e-06, "loss": 0.6658, "step": 1886 }, { "epoch": 0.12, "grad_norm": 1.0373985767364502, "learning_rate": 9.79129684289537e-06, "loss": 0.5612, "step": 1887 }, { "epoch": 0.12, "grad_norm": 0.9813857674598694, "learning_rate": 9.791003410121815e-06, "loss": 0.5701, "step": 1888 }, { "epoch": 0.12, "grad_norm": 1.0157337188720703, "learning_rate": 9.79070977561577e-06, "loss": 0.5682, "step": 1889 }, { "epoch": 0.12, "grad_norm": 0.9713268876075745, "learning_rate": 9.790415939389604e-06, "loss": 0.57, "step": 1890 }, { "epoch": 0.12, "grad_norm": 1.0119757652282715, "learning_rate": 9.790121901455687e-06, "loss": 0.5453, "step": 1891 }, { "epoch": 0.12, "grad_norm": 1.069533348083496, "learning_rate": 9.7898276618264e-06, "loss": 0.544, "step": 1892 }, { "epoch": 0.12, "grad_norm": 0.9527726173400879, "learning_rate": 9.789533220514132e-06, "loss": 0.5392, "step": 1893 }, { "epoch": 0.12, "grad_norm": 1.0631836652755737, "learning_rate": 9.789238577531284e-06, "loss": 0.6161, "step": 1894 }, { "epoch": 0.12, "grad_norm": 1.0867925882339478, "learning_rate": 9.788943732890258e-06, "loss": 0.5382, "step": 1895 }, { "epoch": 0.12, "grad_norm": 1.0896707773208618, "learning_rate": 9.788648686603472e-06, "loss": 0.5852, "step": 1896 }, { "epoch": 0.12, "grad_norm": 1.0752573013305664, "learning_rate": 9.788353438683346e-06, "loss": 0.5952, "step": 1897 }, { "epoch": 0.12, "grad_norm": 1.0917840003967285, "learning_rate": 9.788057989142317e-06, "loss": 0.6199, "step": 1898 }, { "epoch": 0.12, "grad_norm": 0.9953743815422058, "learning_rate": 9.787762337992821e-06, "loss": 0.5669, "step": 1899 }, { "epoch": 0.12, "grad_norm": 1.1098178625106812, "learning_rate": 9.78746648524731e-06, "loss": 0.6031, "step": 1900 }, { "epoch": 0.12, "grad_norm": 1.0404541492462158, "learning_rate": 9.787170430918239e-06, "loss": 0.5569, "step": 1901 }, { "epoch": 0.12, "grad_norm": 1.1098657846450806, "learning_rate": 9.786874175018073e-06, "loss": 0.5853, "step": 1902 }, { "epoch": 0.12, "grad_norm": 1.0735530853271484, "learning_rate": 9.78657771755929e-06, "loss": 0.5813, "step": 1903 }, { "epoch": 0.12, "grad_norm": 0.9700368642807007, "learning_rate": 9.786281058554369e-06, "loss": 0.5414, "step": 1904 }, { "epoch": 0.12, "grad_norm": 0.979625940322876, "learning_rate": 9.785984198015804e-06, "loss": 0.5367, "step": 1905 }, { "epoch": 0.12, "grad_norm": 1.0052965879440308, "learning_rate": 9.785687135956092e-06, "loss": 0.5731, "step": 1906 }, { "epoch": 0.12, "grad_norm": 1.0384953022003174, "learning_rate": 9.785389872387745e-06, "loss": 0.5167, "step": 1907 }, { "epoch": 0.12, "grad_norm": 1.0627541542053223, "learning_rate": 9.785092407323276e-06, "loss": 0.6133, "step": 1908 }, { "epoch": 0.12, "grad_norm": 1.117289423942566, "learning_rate": 9.784794740775212e-06, "loss": 0.5983, "step": 1909 }, { "epoch": 0.12, "grad_norm": 1.080491542816162, "learning_rate": 9.784496872756086e-06, "loss": 0.5615, "step": 1910 }, { "epoch": 0.12, "grad_norm": 1.043665885925293, "learning_rate": 9.784198803278442e-06, "loss": 0.5818, "step": 1911 }, { "epoch": 0.12, "grad_norm": 1.0099648237228394, "learning_rate": 9.78390053235483e-06, "loss": 0.5487, "step": 1912 }, { "epoch": 0.12, "grad_norm": 1.1236435174942017, "learning_rate": 9.783602059997808e-06, "loss": 0.5977, "step": 1913 }, { "epoch": 0.12, "grad_norm": 1.0785053968429565, "learning_rate": 9.783303386219942e-06, "loss": 0.5789, "step": 1914 }, { "epoch": 0.12, "grad_norm": 1.0824027061462402, "learning_rate": 9.783004511033814e-06, "loss": 0.6267, "step": 1915 }, { "epoch": 0.12, "grad_norm": 1.1740599870681763, "learning_rate": 9.782705434452002e-06, "loss": 0.633, "step": 1916 }, { "epoch": 0.12, "grad_norm": 1.023955225944519, "learning_rate": 9.782406156487104e-06, "loss": 0.598, "step": 1917 }, { "epoch": 0.12, "grad_norm": 1.1687668561935425, "learning_rate": 9.782106677151717e-06, "loss": 0.6052, "step": 1918 }, { "epoch": 0.12, "grad_norm": 1.0945203304290771, "learning_rate": 9.781806996458456e-06, "loss": 0.5986, "step": 1919 }, { "epoch": 0.12, "grad_norm": 1.1078828573226929, "learning_rate": 9.781507114419937e-06, "loss": 0.5578, "step": 1920 }, { "epoch": 0.12, "grad_norm": 1.0594239234924316, "learning_rate": 9.781207031048785e-06, "loss": 0.5605, "step": 1921 }, { "epoch": 0.12, "grad_norm": 1.0740500688552856, "learning_rate": 9.78090674635764e-06, "loss": 0.5738, "step": 1922 }, { "epoch": 0.12, "grad_norm": 1.0668847560882568, "learning_rate": 9.780606260359141e-06, "loss": 0.5996, "step": 1923 }, { "epoch": 0.12, "grad_norm": 1.119848608970642, "learning_rate": 9.780305573065945e-06, "loss": 0.6466, "step": 1924 }, { "epoch": 0.12, "grad_norm": 0.9590592384338379, "learning_rate": 9.78000468449071e-06, "loss": 0.5362, "step": 1925 }, { "epoch": 0.12, "grad_norm": 1.09816575050354, "learning_rate": 9.779703594646106e-06, "loss": 0.6452, "step": 1926 }, { "epoch": 0.12, "grad_norm": 1.1173896789550781, "learning_rate": 9.779402303544811e-06, "loss": 0.5727, "step": 1927 }, { "epoch": 0.12, "grad_norm": 1.010911464691162, "learning_rate": 9.77910081119951e-06, "loss": 0.5318, "step": 1928 }, { "epoch": 0.12, "grad_norm": 1.2026013135910034, "learning_rate": 9.7787991176229e-06, "loss": 0.6171, "step": 1929 }, { "epoch": 0.12, "grad_norm": 0.9925667643547058, "learning_rate": 9.778497222827685e-06, "loss": 0.547, "step": 1930 }, { "epoch": 0.12, "grad_norm": 1.0244348049163818, "learning_rate": 9.778195126826574e-06, "loss": 0.6138, "step": 1931 }, { "epoch": 0.12, "grad_norm": 1.0038400888442993, "learning_rate": 9.777892829632288e-06, "loss": 0.5836, "step": 1932 }, { "epoch": 0.12, "grad_norm": 1.0449421405792236, "learning_rate": 9.777590331257557e-06, "loss": 0.6516, "step": 1933 }, { "epoch": 0.12, "grad_norm": 1.0840104818344116, "learning_rate": 9.777287631715117e-06, "loss": 0.5375, "step": 1934 }, { "epoch": 0.12, "grad_norm": 1.0799813270568848, "learning_rate": 9.776984731017714e-06, "loss": 0.6516, "step": 1935 }, { "epoch": 0.12, "grad_norm": 1.014874815940857, "learning_rate": 9.7766816291781e-06, "loss": 0.536, "step": 1936 }, { "epoch": 0.12, "grad_norm": 1.1212245225906372, "learning_rate": 9.77637832620904e-06, "loss": 0.598, "step": 1937 }, { "epoch": 0.12, "grad_norm": 1.0710190534591675, "learning_rate": 9.776074822123306e-06, "loss": 0.5727, "step": 1938 }, { "epoch": 0.12, "grad_norm": 1.0462238788604736, "learning_rate": 9.775771116933674e-06, "loss": 0.5412, "step": 1939 }, { "epoch": 0.12, "grad_norm": 1.0557196140289307, "learning_rate": 9.775467210652936e-06, "loss": 0.5848, "step": 1940 }, { "epoch": 0.12, "grad_norm": 1.044198751449585, "learning_rate": 9.775163103293885e-06, "loss": 0.5564, "step": 1941 }, { "epoch": 0.12, "grad_norm": 1.0481051206588745, "learning_rate": 9.774858794869328e-06, "loss": 0.6043, "step": 1942 }, { "epoch": 0.12, "grad_norm": 1.0210567712783813, "learning_rate": 9.774554285392078e-06, "loss": 0.5476, "step": 1943 }, { "epoch": 0.12, "grad_norm": 1.0004888772964478, "learning_rate": 9.774249574874957e-06, "loss": 0.5579, "step": 1944 }, { "epoch": 0.12, "grad_norm": 0.9968801736831665, "learning_rate": 9.773944663330793e-06, "loss": 0.5568, "step": 1945 }, { "epoch": 0.12, "grad_norm": 1.0612200498580933, "learning_rate": 9.773639550772428e-06, "loss": 0.5521, "step": 1946 }, { "epoch": 0.12, "grad_norm": 1.0417052507400513, "learning_rate": 9.773334237212707e-06, "loss": 0.5736, "step": 1947 }, { "epoch": 0.12, "grad_norm": 1.0855478048324585, "learning_rate": 9.773028722664486e-06, "loss": 0.5767, "step": 1948 }, { "epoch": 0.12, "grad_norm": 1.009902834892273, "learning_rate": 9.77272300714063e-06, "loss": 0.6113, "step": 1949 }, { "epoch": 0.12, "grad_norm": 1.0125980377197266, "learning_rate": 9.77241709065401e-06, "loss": 0.5467, "step": 1950 }, { "epoch": 0.12, "grad_norm": 1.0341662168502808, "learning_rate": 9.772110973217512e-06, "loss": 0.578, "step": 1951 }, { "epoch": 0.12, "grad_norm": 1.0842918157577515, "learning_rate": 9.77180465484402e-06, "loss": 0.5719, "step": 1952 }, { "epoch": 0.12, "grad_norm": 0.9895298480987549, "learning_rate": 9.771498135546433e-06, "loss": 0.5673, "step": 1953 }, { "epoch": 0.12, "grad_norm": 1.0436815023422241, "learning_rate": 9.77119141533766e-06, "loss": 0.5575, "step": 1954 }, { "epoch": 0.12, "grad_norm": 1.059710144996643, "learning_rate": 9.770884494230614e-06, "loss": 0.5649, "step": 1955 }, { "epoch": 0.12, "grad_norm": 0.9809030890464783, "learning_rate": 9.770577372238217e-06, "loss": 0.5313, "step": 1956 }, { "epoch": 0.12, "grad_norm": 1.0639938116073608, "learning_rate": 9.770270049373403e-06, "loss": 0.5909, "step": 1957 }, { "epoch": 0.12, "grad_norm": 1.0074024200439453, "learning_rate": 9.769962525649112e-06, "loss": 0.5131, "step": 1958 }, { "epoch": 0.12, "grad_norm": 1.0866878032684326, "learning_rate": 9.769654801078294e-06, "loss": 0.6138, "step": 1959 }, { "epoch": 0.12, "grad_norm": 1.0164234638214111, "learning_rate": 9.769346875673903e-06, "loss": 0.5365, "step": 1960 }, { "epoch": 0.12, "grad_norm": 1.0905537605285645, "learning_rate": 9.769038749448907e-06, "loss": 0.5678, "step": 1961 }, { "epoch": 0.12, "grad_norm": 1.0142149925231934, "learning_rate": 9.76873042241628e-06, "loss": 0.5378, "step": 1962 }, { "epoch": 0.12, "grad_norm": 1.0266367197036743, "learning_rate": 9.768421894589003e-06, "loss": 0.5878, "step": 1963 }, { "epoch": 0.12, "grad_norm": 1.1256475448608398, "learning_rate": 9.76811316598007e-06, "loss": 0.5872, "step": 1964 }, { "epoch": 0.12, "grad_norm": 1.02944016456604, "learning_rate": 9.767804236602476e-06, "loss": 0.5647, "step": 1965 }, { "epoch": 0.12, "grad_norm": 0.980645477771759, "learning_rate": 9.767495106469233e-06, "loss": 0.573, "step": 1966 }, { "epoch": 0.12, "grad_norm": 1.1279990673065186, "learning_rate": 9.767185775593356e-06, "loss": 0.5803, "step": 1967 }, { "epoch": 0.12, "grad_norm": 1.1235218048095703, "learning_rate": 9.76687624398787e-06, "loss": 0.5421, "step": 1968 }, { "epoch": 0.12, "grad_norm": 1.0594100952148438, "learning_rate": 9.766566511665808e-06, "loss": 0.5899, "step": 1969 }, { "epoch": 0.12, "grad_norm": 1.0320079326629639, "learning_rate": 9.766256578640212e-06, "loss": 0.5809, "step": 1970 }, { "epoch": 0.12, "grad_norm": 1.0690884590148926, "learning_rate": 9.76594644492413e-06, "loss": 0.5566, "step": 1971 }, { "epoch": 0.12, "grad_norm": 1.002901554107666, "learning_rate": 9.765636110530626e-06, "loss": 0.5923, "step": 1972 }, { "epoch": 0.12, "grad_norm": 1.1202044486999512, "learning_rate": 9.765325575472761e-06, "loss": 0.571, "step": 1973 }, { "epoch": 0.13, "grad_norm": 1.0211033821105957, "learning_rate": 9.765014839763616e-06, "loss": 0.589, "step": 1974 }, { "epoch": 0.13, "grad_norm": 1.101736307144165, "learning_rate": 9.764703903416271e-06, "loss": 0.545, "step": 1975 }, { "epoch": 0.13, "grad_norm": 1.0086767673492432, "learning_rate": 9.76439276644382e-06, "loss": 0.5584, "step": 1976 }, { "epoch": 0.13, "grad_norm": 1.0515953302383423, "learning_rate": 9.764081428859363e-06, "loss": 0.6101, "step": 1977 }, { "epoch": 0.13, "grad_norm": 1.0618976354599, "learning_rate": 9.763769890676011e-06, "loss": 0.5512, "step": 1978 }, { "epoch": 0.13, "grad_norm": 1.1033425331115723, "learning_rate": 9.76345815190688e-06, "loss": 0.6218, "step": 1979 }, { "epoch": 0.13, "grad_norm": 1.13877272605896, "learning_rate": 9.763146212565097e-06, "loss": 0.5939, "step": 1980 }, { "epoch": 0.13, "grad_norm": 1.0750939846038818, "learning_rate": 9.762834072663798e-06, "loss": 0.5518, "step": 1981 }, { "epoch": 0.13, "grad_norm": 0.9559831023216248, "learning_rate": 9.762521732216124e-06, "loss": 0.5434, "step": 1982 }, { "epoch": 0.13, "grad_norm": 1.0375754833221436, "learning_rate": 9.762209191235227e-06, "loss": 0.6282, "step": 1983 }, { "epoch": 0.13, "grad_norm": 1.04959237575531, "learning_rate": 9.761896449734269e-06, "loss": 0.5665, "step": 1984 }, { "epoch": 0.13, "grad_norm": 1.168744683265686, "learning_rate": 9.761583507726416e-06, "loss": 0.5913, "step": 1985 }, { "epoch": 0.13, "grad_norm": 1.2400338649749756, "learning_rate": 9.761270365224846e-06, "loss": 0.5966, "step": 1986 }, { "epoch": 0.13, "grad_norm": 0.9545580744743347, "learning_rate": 9.760957022242746e-06, "loss": 0.5043, "step": 1987 }, { "epoch": 0.13, "grad_norm": 1.086821436882019, "learning_rate": 9.760643478793305e-06, "loss": 0.6122, "step": 1988 }, { "epoch": 0.13, "grad_norm": 1.0779457092285156, "learning_rate": 9.760329734889729e-06, "loss": 0.5736, "step": 1989 }, { "epoch": 0.13, "grad_norm": 1.0653327703475952, "learning_rate": 9.760015790545227e-06, "loss": 0.552, "step": 1990 }, { "epoch": 0.13, "grad_norm": 1.119644284248352, "learning_rate": 9.759701645773022e-06, "loss": 0.5557, "step": 1991 }, { "epoch": 0.13, "grad_norm": 1.195134162902832, "learning_rate": 9.759387300586336e-06, "loss": 0.5542, "step": 1992 }, { "epoch": 0.13, "grad_norm": 0.8945276141166687, "learning_rate": 9.759072754998407e-06, "loss": 0.5006, "step": 1993 }, { "epoch": 0.13, "grad_norm": 1.1034178733825684, "learning_rate": 9.758758009022482e-06, "loss": 0.612, "step": 1994 }, { "epoch": 0.13, "grad_norm": 0.9998924732208252, "learning_rate": 9.758443062671809e-06, "loss": 0.5873, "step": 1995 }, { "epoch": 0.13, "grad_norm": 1.0209602117538452, "learning_rate": 9.758127915959655e-06, "loss": 0.5756, "step": 1996 }, { "epoch": 0.13, "grad_norm": 1.1128679513931274, "learning_rate": 9.757812568899285e-06, "loss": 0.6099, "step": 1997 }, { "epoch": 0.13, "grad_norm": 0.9929051399230957, "learning_rate": 9.75749702150398e-06, "loss": 0.5368, "step": 1998 }, { "epoch": 0.13, "grad_norm": 1.0805941820144653, "learning_rate": 9.757181273787024e-06, "loss": 0.5681, "step": 1999 }, { "epoch": 0.13, "grad_norm": 1.0125678777694702, "learning_rate": 9.756865325761715e-06, "loss": 0.5911, "step": 2000 }, { "epoch": 0.13, "grad_norm": 1.0813782215118408, "learning_rate": 9.756549177441354e-06, "loss": 0.5496, "step": 2001 }, { "epoch": 0.13, "grad_norm": 1.0482062101364136, "learning_rate": 9.756232828839256e-06, "loss": 0.5561, "step": 2002 }, { "epoch": 0.13, "grad_norm": 1.0184236764907837, "learning_rate": 9.755916279968738e-06, "loss": 0.583, "step": 2003 }, { "epoch": 0.13, "grad_norm": 1.0551586151123047, "learning_rate": 9.75559953084313e-06, "loss": 0.5462, "step": 2004 }, { "epoch": 0.13, "grad_norm": 1.135002851486206, "learning_rate": 9.755282581475769e-06, "loss": 0.5428, "step": 2005 }, { "epoch": 0.13, "grad_norm": 1.0241683721542358, "learning_rate": 9.75496543188e-06, "loss": 0.5505, "step": 2006 }, { "epoch": 0.13, "grad_norm": 1.01645827293396, "learning_rate": 9.754648082069181e-06, "loss": 0.5413, "step": 2007 }, { "epoch": 0.13, "grad_norm": 1.0453730821609497, "learning_rate": 9.75433053205667e-06, "loss": 0.5514, "step": 2008 }, { "epoch": 0.13, "grad_norm": 1.0232735872268677, "learning_rate": 9.754012781855837e-06, "loss": 0.5932, "step": 2009 }, { "epoch": 0.13, "grad_norm": 1.0743303298950195, "learning_rate": 9.753694831480067e-06, "loss": 0.5519, "step": 2010 }, { "epoch": 0.13, "grad_norm": 1.1090643405914307, "learning_rate": 9.753376680942744e-06, "loss": 0.5891, "step": 2011 }, { "epoch": 0.13, "grad_norm": 1.0725889205932617, "learning_rate": 9.753058330257263e-06, "loss": 0.5544, "step": 2012 }, { "epoch": 0.13, "grad_norm": 1.1309471130371094, "learning_rate": 9.752739779437032e-06, "loss": 0.573, "step": 2013 }, { "epoch": 0.13, "grad_norm": 1.1250039339065552, "learning_rate": 9.752421028495461e-06, "loss": 0.6168, "step": 2014 }, { "epoch": 0.13, "grad_norm": 1.0606460571289062, "learning_rate": 9.752102077445974e-06, "loss": 0.5707, "step": 2015 }, { "epoch": 0.13, "grad_norm": 1.0642883777618408, "learning_rate": 9.751782926302e-06, "loss": 0.5621, "step": 2016 }, { "epoch": 0.13, "grad_norm": 1.0262463092803955, "learning_rate": 9.751463575076977e-06, "loss": 0.5777, "step": 2017 }, { "epoch": 0.13, "grad_norm": 1.0222927331924438, "learning_rate": 9.75114402378435e-06, "loss": 0.5698, "step": 2018 }, { "epoch": 0.13, "grad_norm": 1.1447890996932983, "learning_rate": 9.75082427243758e-06, "loss": 0.6233, "step": 2019 }, { "epoch": 0.13, "grad_norm": 1.1772607564926147, "learning_rate": 9.750504321050126e-06, "loss": 0.6242, "step": 2020 }, { "epoch": 0.13, "grad_norm": 1.0555652379989624, "learning_rate": 9.75018416963546e-06, "loss": 0.5834, "step": 2021 }, { "epoch": 0.13, "grad_norm": 1.1782764196395874, "learning_rate": 9.749863818207061e-06, "loss": 0.5901, "step": 2022 }, { "epoch": 0.13, "grad_norm": 1.0955591201782227, "learning_rate": 9.749543266778424e-06, "loss": 0.5761, "step": 2023 }, { "epoch": 0.13, "grad_norm": 1.125753402709961, "learning_rate": 9.749222515363041e-06, "loss": 0.5906, "step": 2024 }, { "epoch": 0.13, "grad_norm": 1.0516302585601807, "learning_rate": 9.748901563974418e-06, "loss": 0.5582, "step": 2025 }, { "epoch": 0.13, "grad_norm": 1.1180061101913452, "learning_rate": 9.748580412626072e-06, "loss": 0.6122, "step": 2026 }, { "epoch": 0.13, "grad_norm": 1.0065104961395264, "learning_rate": 9.748259061331524e-06, "loss": 0.5759, "step": 2027 }, { "epoch": 0.13, "grad_norm": 1.0845080614089966, "learning_rate": 9.747937510104305e-06, "loss": 0.5855, "step": 2028 }, { "epoch": 0.13, "grad_norm": 1.0901507139205933, "learning_rate": 9.74761575895795e-06, "loss": 0.5857, "step": 2029 }, { "epoch": 0.13, "grad_norm": 1.1333222389221191, "learning_rate": 9.747293807906017e-06, "loss": 0.5956, "step": 2030 }, { "epoch": 0.13, "grad_norm": 0.9672915935516357, "learning_rate": 9.746971656962053e-06, "loss": 0.5722, "step": 2031 }, { "epoch": 0.13, "grad_norm": 1.082801342010498, "learning_rate": 9.746649306139627e-06, "loss": 0.5835, "step": 2032 }, { "epoch": 0.13, "grad_norm": 1.1733567714691162, "learning_rate": 9.74632675545231e-06, "loss": 0.5665, "step": 2033 }, { "epoch": 0.13, "grad_norm": 1.02162504196167, "learning_rate": 9.746004004913688e-06, "loss": 0.5609, "step": 2034 }, { "epoch": 0.13, "grad_norm": 0.9524051547050476, "learning_rate": 9.745681054537345e-06, "loss": 0.5042, "step": 2035 }, { "epoch": 0.13, "grad_norm": 1.0968283414840698, "learning_rate": 9.745357904336882e-06, "loss": 0.5785, "step": 2036 }, { "epoch": 0.13, "grad_norm": 0.9993317723274231, "learning_rate": 9.745034554325905e-06, "loss": 0.5537, "step": 2037 }, { "epoch": 0.13, "grad_norm": 1.0851517915725708, "learning_rate": 9.74471100451803e-06, "loss": 0.5703, "step": 2038 }, { "epoch": 0.13, "grad_norm": 1.0083051919937134, "learning_rate": 9.744387254926882e-06, "loss": 0.559, "step": 2039 }, { "epoch": 0.13, "grad_norm": 1.1442487239837646, "learning_rate": 9.74406330556609e-06, "loss": 0.6185, "step": 2040 }, { "epoch": 0.13, "grad_norm": 1.1060651540756226, "learning_rate": 9.743739156449294e-06, "loss": 0.6161, "step": 2041 }, { "epoch": 0.13, "grad_norm": 1.0019123554229736, "learning_rate": 9.743414807590145e-06, "loss": 0.5745, "step": 2042 }, { "epoch": 0.13, "grad_norm": 1.0129235982894897, "learning_rate": 9.743090259002302e-06, "loss": 0.549, "step": 2043 }, { "epoch": 0.13, "grad_norm": 1.088689923286438, "learning_rate": 9.742765510699425e-06, "loss": 0.5491, "step": 2044 }, { "epoch": 0.13, "grad_norm": 1.0228464603424072, "learning_rate": 9.742440562695194e-06, "loss": 0.5514, "step": 2045 }, { "epoch": 0.13, "grad_norm": 1.0378639698028564, "learning_rate": 9.742115415003288e-06, "loss": 0.553, "step": 2046 }, { "epoch": 0.13, "grad_norm": 1.0331372022628784, "learning_rate": 9.741790067637398e-06, "loss": 0.6551, "step": 2047 }, { "epoch": 0.13, "grad_norm": 1.0934799909591675, "learning_rate": 9.741464520611223e-06, "loss": 0.5955, "step": 2048 }, { "epoch": 0.13, "grad_norm": 1.070549488067627, "learning_rate": 9.741138773938472e-06, "loss": 0.5467, "step": 2049 }, { "epoch": 0.13, "grad_norm": 1.067488670349121, "learning_rate": 9.74081282763286e-06, "loss": 0.5599, "step": 2050 }, { "epoch": 0.13, "grad_norm": 1.0350698232650757, "learning_rate": 9.740486681708114e-06, "loss": 0.6225, "step": 2051 }, { "epoch": 0.13, "grad_norm": 1.0792306661605835, "learning_rate": 9.740160336177962e-06, "loss": 0.6127, "step": 2052 }, { "epoch": 0.13, "grad_norm": 1.0747098922729492, "learning_rate": 9.73983379105615e-06, "loss": 0.5754, "step": 2053 }, { "epoch": 0.13, "grad_norm": 1.069152593612671, "learning_rate": 9.739507046356424e-06, "loss": 0.5852, "step": 2054 }, { "epoch": 0.13, "grad_norm": 0.9734413623809814, "learning_rate": 9.739180102092544e-06, "loss": 0.5735, "step": 2055 }, { "epoch": 0.13, "grad_norm": 1.1087932586669922, "learning_rate": 9.738852958278278e-06, "loss": 0.5878, "step": 2056 }, { "epoch": 0.13, "grad_norm": 1.0658721923828125, "learning_rate": 9.738525614927399e-06, "loss": 0.5843, "step": 2057 }, { "epoch": 0.13, "grad_norm": 0.9961588382720947, "learning_rate": 9.73819807205369e-06, "loss": 0.5266, "step": 2058 }, { "epoch": 0.13, "grad_norm": 1.0801833868026733, "learning_rate": 9.737870329670942e-06, "loss": 0.5932, "step": 2059 }, { "epoch": 0.13, "grad_norm": 1.1166014671325684, "learning_rate": 9.737542387792957e-06, "loss": 0.6081, "step": 2060 }, { "epoch": 0.13, "grad_norm": 1.0558866262435913, "learning_rate": 9.737214246433544e-06, "loss": 0.5787, "step": 2061 }, { "epoch": 0.13, "grad_norm": 1.1056846380233765, "learning_rate": 9.736885905606516e-06, "loss": 0.5789, "step": 2062 }, { "epoch": 0.13, "grad_norm": 1.0813485383987427, "learning_rate": 9.736557365325703e-06, "loss": 0.5877, "step": 2063 }, { "epoch": 0.13, "grad_norm": 1.005872368812561, "learning_rate": 9.736228625604938e-06, "loss": 0.5553, "step": 2064 }, { "epoch": 0.13, "grad_norm": 1.0798795223236084, "learning_rate": 9.735899686458059e-06, "loss": 0.5727, "step": 2065 }, { "epoch": 0.13, "grad_norm": 0.9533892273902893, "learning_rate": 9.73557054789892e-06, "loss": 0.5545, "step": 2066 }, { "epoch": 0.13, "grad_norm": 0.9775428771972656, "learning_rate": 9.73524120994138e-06, "loss": 0.5799, "step": 2067 }, { "epoch": 0.13, "grad_norm": 1.0594699382781982, "learning_rate": 9.734911672599304e-06, "loss": 0.5669, "step": 2068 }, { "epoch": 0.13, "grad_norm": 1.021916151046753, "learning_rate": 9.73458193588657e-06, "loss": 0.6152, "step": 2069 }, { "epoch": 0.13, "grad_norm": 1.0601035356521606, "learning_rate": 9.734251999817061e-06, "loss": 0.5698, "step": 2070 }, { "epoch": 0.13, "grad_norm": 1.082911491394043, "learning_rate": 9.733921864404669e-06, "loss": 0.5278, "step": 2071 }, { "epoch": 0.13, "grad_norm": 1.1061042547225952, "learning_rate": 9.733591529663295e-06, "loss": 0.5808, "step": 2072 }, { "epoch": 0.13, "grad_norm": 1.0210843086242676, "learning_rate": 9.73326099560685e-06, "loss": 0.5361, "step": 2073 }, { "epoch": 0.13, "grad_norm": 0.9831228852272034, "learning_rate": 9.732930262249249e-06, "loss": 0.5207, "step": 2074 }, { "epoch": 0.13, "grad_norm": 1.057084083557129, "learning_rate": 9.73259932960442e-06, "loss": 0.5674, "step": 2075 }, { "epoch": 0.13, "grad_norm": 1.0501519441604614, "learning_rate": 9.732268197686296e-06, "loss": 0.5809, "step": 2076 }, { "epoch": 0.13, "grad_norm": 1.0547102689743042, "learning_rate": 9.731936866508822e-06, "loss": 0.5508, "step": 2077 }, { "epoch": 0.13, "grad_norm": 1.1186622381210327, "learning_rate": 9.731605336085947e-06, "loss": 0.6001, "step": 2078 }, { "epoch": 0.13, "grad_norm": 1.0614229440689087, "learning_rate": 9.73127360643163e-06, "loss": 0.5409, "step": 2079 }, { "epoch": 0.13, "grad_norm": 1.057842493057251, "learning_rate": 9.730941677559843e-06, "loss": 0.5829, "step": 2080 }, { "epoch": 0.13, "grad_norm": 1.0404309034347534, "learning_rate": 9.730609549484558e-06, "loss": 0.6032, "step": 2081 }, { "epoch": 0.13, "grad_norm": 0.9369000792503357, "learning_rate": 9.730277222219762e-06, "loss": 0.4888, "step": 2082 }, { "epoch": 0.13, "grad_norm": 1.1000438928604126, "learning_rate": 9.729944695779448e-06, "loss": 0.5853, "step": 2083 }, { "epoch": 0.13, "grad_norm": 1.0321012735366821, "learning_rate": 9.729611970177615e-06, "loss": 0.5321, "step": 2084 }, { "epoch": 0.13, "grad_norm": 1.0978355407714844, "learning_rate": 9.729279045428277e-06, "loss": 0.6109, "step": 2085 }, { "epoch": 0.13, "grad_norm": 1.0012578964233398, "learning_rate": 9.72894592154545e-06, "loss": 0.5483, "step": 2086 }, { "epoch": 0.13, "grad_norm": 1.0384838581085205, "learning_rate": 9.72861259854316e-06, "loss": 0.5335, "step": 2087 }, { "epoch": 0.13, "grad_norm": 1.0041470527648926, "learning_rate": 9.728279076435446e-06, "loss": 0.5387, "step": 2088 }, { "epoch": 0.13, "grad_norm": 1.126891851425171, "learning_rate": 9.727945355236345e-06, "loss": 0.5387, "step": 2089 }, { "epoch": 0.13, "grad_norm": 1.1620920896530151, "learning_rate": 9.727611434959914e-06, "loss": 0.5745, "step": 2090 }, { "epoch": 0.13, "grad_norm": 1.0596435070037842, "learning_rate": 9.727277315620212e-06, "loss": 0.6089, "step": 2091 }, { "epoch": 0.13, "grad_norm": 0.9877673983573914, "learning_rate": 9.726942997231308e-06, "loss": 0.5555, "step": 2092 }, { "epoch": 0.13, "grad_norm": 1.0126142501831055, "learning_rate": 9.726608479807278e-06, "loss": 0.5542, "step": 2093 }, { "epoch": 0.13, "grad_norm": 1.0659465789794922, "learning_rate": 9.726273763362206e-06, "loss": 0.6002, "step": 2094 }, { "epoch": 0.13, "grad_norm": 1.177112340927124, "learning_rate": 9.725938847910187e-06, "loss": 0.5862, "step": 2095 }, { "epoch": 0.13, "grad_norm": 1.0119010210037231, "learning_rate": 9.725603733465325e-06, "loss": 0.557, "step": 2096 }, { "epoch": 0.13, "grad_norm": 1.0778146982192993, "learning_rate": 9.725268420041728e-06, "loss": 0.6207, "step": 2097 }, { "epoch": 0.13, "grad_norm": 0.9919752478599548, "learning_rate": 9.724932907653516e-06, "loss": 0.5706, "step": 2098 }, { "epoch": 0.13, "grad_norm": 1.003469705581665, "learning_rate": 9.724597196314817e-06, "loss": 0.5379, "step": 2099 }, { "epoch": 0.13, "grad_norm": 1.0249968767166138, "learning_rate": 9.724261286039766e-06, "loss": 0.55, "step": 2100 }, { "epoch": 0.13, "grad_norm": 1.1211967468261719, "learning_rate": 9.723925176842506e-06, "loss": 0.5692, "step": 2101 }, { "epoch": 0.13, "grad_norm": 1.1254615783691406, "learning_rate": 9.72358886873719e-06, "loss": 0.5689, "step": 2102 }, { "epoch": 0.13, "grad_norm": 0.9981034398078918, "learning_rate": 9.723252361737977e-06, "loss": 0.5425, "step": 2103 }, { "epoch": 0.13, "grad_norm": 0.9972788095474243, "learning_rate": 9.722915655859042e-06, "loss": 0.6044, "step": 2104 }, { "epoch": 0.13, "grad_norm": 1.0443592071533203, "learning_rate": 9.722578751114556e-06, "loss": 0.5497, "step": 2105 }, { "epoch": 0.13, "grad_norm": 1.0376752614974976, "learning_rate": 9.72224164751871e-06, "loss": 0.6096, "step": 2106 }, { "epoch": 0.13, "grad_norm": 1.2066547870635986, "learning_rate": 9.721904345085692e-06, "loss": 0.6374, "step": 2107 }, { "epoch": 0.13, "grad_norm": 1.0088324546813965, "learning_rate": 9.72156684382971e-06, "loss": 0.5344, "step": 2108 }, { "epoch": 0.13, "grad_norm": 1.0367845296859741, "learning_rate": 9.721229143764975e-06, "loss": 0.5277, "step": 2109 }, { "epoch": 0.13, "grad_norm": 1.0177240371704102, "learning_rate": 9.720891244905701e-06, "loss": 0.5958, "step": 2110 }, { "epoch": 0.13, "grad_norm": 1.14991295337677, "learning_rate": 9.720553147266123e-06, "loss": 0.6143, "step": 2111 }, { "epoch": 0.13, "grad_norm": 1.1234478950500488, "learning_rate": 9.720214850860473e-06, "loss": 0.6052, "step": 2112 }, { "epoch": 0.13, "grad_norm": 1.1023095846176147, "learning_rate": 9.719876355702993e-06, "loss": 0.5674, "step": 2113 }, { "epoch": 0.13, "grad_norm": 1.0658434629440308, "learning_rate": 9.719537661807942e-06, "loss": 0.5703, "step": 2114 }, { "epoch": 0.13, "grad_norm": 1.0622062683105469, "learning_rate": 9.719198769189577e-06, "loss": 0.5308, "step": 2115 }, { "epoch": 0.13, "grad_norm": 1.0702844858169556, "learning_rate": 9.718859677862169e-06, "loss": 0.5833, "step": 2116 }, { "epoch": 0.13, "grad_norm": 1.0853898525238037, "learning_rate": 9.718520387839996e-06, "loss": 0.5786, "step": 2117 }, { "epoch": 0.13, "grad_norm": 1.0653886795043945, "learning_rate": 9.718180899137344e-06, "loss": 0.5863, "step": 2118 }, { "epoch": 0.13, "grad_norm": 1.0895500183105469, "learning_rate": 9.717841211768505e-06, "loss": 0.5423, "step": 2119 }, { "epoch": 0.13, "grad_norm": 1.0399247407913208, "learning_rate": 9.71750132574779e-06, "loss": 0.5686, "step": 2120 }, { "epoch": 0.13, "grad_norm": 1.1106464862823486, "learning_rate": 9.717161241089501e-06, "loss": 0.5797, "step": 2121 }, { "epoch": 0.13, "grad_norm": 1.0631537437438965, "learning_rate": 9.716820957807963e-06, "loss": 0.5606, "step": 2122 }, { "epoch": 0.13, "grad_norm": 1.048399806022644, "learning_rate": 9.716480475917504e-06, "loss": 0.566, "step": 2123 }, { "epoch": 0.13, "grad_norm": 1.0127686262130737, "learning_rate": 9.716139795432459e-06, "loss": 0.5521, "step": 2124 }, { "epoch": 0.13, "grad_norm": 1.0522278547286987, "learning_rate": 9.715798916367174e-06, "loss": 0.571, "step": 2125 }, { "epoch": 0.13, "grad_norm": 1.0320959091186523, "learning_rate": 9.715457838736e-06, "loss": 0.5768, "step": 2126 }, { "epoch": 0.13, "grad_norm": 1.0515543222427368, "learning_rate": 9.715116562553302e-06, "loss": 0.5936, "step": 2127 }, { "epoch": 0.13, "grad_norm": 1.0121512413024902, "learning_rate": 9.714775087833446e-06, "loss": 0.5614, "step": 2128 }, { "epoch": 0.13, "grad_norm": 1.0705124139785767, "learning_rate": 9.714433414590816e-06, "loss": 0.565, "step": 2129 }, { "epoch": 0.13, "grad_norm": 1.0589114427566528, "learning_rate": 9.714091542839792e-06, "loss": 0.5875, "step": 2130 }, { "epoch": 0.14, "grad_norm": 0.98140549659729, "learning_rate": 9.713749472594773e-06, "loss": 0.569, "step": 2131 }, { "epoch": 0.14, "grad_norm": 1.0898550748825073, "learning_rate": 9.713407203870163e-06, "loss": 0.5749, "step": 2132 }, { "epoch": 0.14, "grad_norm": 0.9971396923065186, "learning_rate": 9.713064736680372e-06, "loss": 0.5178, "step": 2133 }, { "epoch": 0.14, "grad_norm": 1.0567504167556763, "learning_rate": 9.71272207103982e-06, "loss": 0.536, "step": 2134 }, { "epoch": 0.14, "grad_norm": 1.1251214742660522, "learning_rate": 9.712379206962936e-06, "loss": 0.6351, "step": 2135 }, { "epoch": 0.14, "grad_norm": 0.9792860150337219, "learning_rate": 9.712036144464157e-06, "loss": 0.5423, "step": 2136 }, { "epoch": 0.14, "grad_norm": 1.0028667449951172, "learning_rate": 9.711692883557928e-06, "loss": 0.6023, "step": 2137 }, { "epoch": 0.14, "grad_norm": 1.046597957611084, "learning_rate": 9.711349424258702e-06, "loss": 0.5918, "step": 2138 }, { "epoch": 0.14, "grad_norm": 1.0695263147354126, "learning_rate": 9.711005766580942e-06, "loss": 0.5404, "step": 2139 }, { "epoch": 0.14, "grad_norm": 1.0932008028030396, "learning_rate": 9.710661910539117e-06, "loss": 0.5448, "step": 2140 }, { "epoch": 0.14, "grad_norm": 1.0518196821212769, "learning_rate": 9.710317856147707e-06, "loss": 0.5634, "step": 2141 }, { "epoch": 0.14, "grad_norm": 1.0272433757781982, "learning_rate": 9.709973603421196e-06, "loss": 0.5293, "step": 2142 }, { "epoch": 0.14, "grad_norm": 1.0365588665008545, "learning_rate": 9.709629152374084e-06, "loss": 0.5599, "step": 2143 }, { "epoch": 0.14, "grad_norm": 1.0788600444793701, "learning_rate": 9.70928450302087e-06, "loss": 0.6063, "step": 2144 }, { "epoch": 0.14, "grad_norm": 0.985220193862915, "learning_rate": 9.708939655376069e-06, "loss": 0.522, "step": 2145 }, { "epoch": 0.14, "grad_norm": 1.0706517696380615, "learning_rate": 9.708594609454201e-06, "loss": 0.5709, "step": 2146 }, { "epoch": 0.14, "grad_norm": 0.9971744418144226, "learning_rate": 9.708249365269793e-06, "loss": 0.5983, "step": 2147 }, { "epoch": 0.14, "grad_norm": 1.0823099613189697, "learning_rate": 9.707903922837382e-06, "loss": 0.599, "step": 2148 }, { "epoch": 0.14, "grad_norm": 1.09287691116333, "learning_rate": 9.707558282171517e-06, "loss": 0.6628, "step": 2149 }, { "epoch": 0.14, "grad_norm": 0.9843802452087402, "learning_rate": 9.707212443286746e-06, "loss": 0.5796, "step": 2150 }, { "epoch": 0.14, "grad_norm": 1.0742542743682861, "learning_rate": 9.706866406197637e-06, "loss": 0.5566, "step": 2151 }, { "epoch": 0.14, "grad_norm": 1.0588560104370117, "learning_rate": 9.706520170918756e-06, "loss": 0.6191, "step": 2152 }, { "epoch": 0.14, "grad_norm": 1.0245299339294434, "learning_rate": 9.706173737464683e-06, "loss": 0.5365, "step": 2153 }, { "epoch": 0.14, "grad_norm": 1.029465913772583, "learning_rate": 9.705827105850008e-06, "loss": 0.5687, "step": 2154 }, { "epoch": 0.14, "grad_norm": 1.1049184799194336, "learning_rate": 9.705480276089323e-06, "loss": 0.5191, "step": 2155 }, { "epoch": 0.14, "grad_norm": 1.0191420316696167, "learning_rate": 9.705133248197232e-06, "loss": 0.5487, "step": 2156 }, { "epoch": 0.14, "grad_norm": 1.0053186416625977, "learning_rate": 9.704786022188346e-06, "loss": 0.5622, "step": 2157 }, { "epoch": 0.14, "grad_norm": 1.0235744714736938, "learning_rate": 9.704438598077291e-06, "loss": 0.5317, "step": 2158 }, { "epoch": 0.14, "grad_norm": 1.0682446956634521, "learning_rate": 9.70409097587869e-06, "loss": 0.5349, "step": 2159 }, { "epoch": 0.14, "grad_norm": 1.011461615562439, "learning_rate": 9.703743155607182e-06, "loss": 0.5326, "step": 2160 }, { "epoch": 0.14, "grad_norm": 1.0195093154907227, "learning_rate": 9.703395137277414e-06, "loss": 0.5612, "step": 2161 }, { "epoch": 0.14, "grad_norm": 1.0611745119094849, "learning_rate": 9.703046920904038e-06, "loss": 0.5622, "step": 2162 }, { "epoch": 0.14, "grad_norm": 1.2104378938674927, "learning_rate": 9.702698506501717e-06, "loss": 0.5635, "step": 2163 }, { "epoch": 0.14, "grad_norm": 1.056689739227295, "learning_rate": 9.702349894085122e-06, "loss": 0.559, "step": 2164 }, { "epoch": 0.14, "grad_norm": 1.0749422311782837, "learning_rate": 9.702001083668931e-06, "loss": 0.5979, "step": 2165 }, { "epoch": 0.14, "grad_norm": 1.1210516691207886, "learning_rate": 9.701652075267832e-06, "loss": 0.6176, "step": 2166 }, { "epoch": 0.14, "grad_norm": 1.1960784196853638, "learning_rate": 9.701302868896518e-06, "loss": 0.5733, "step": 2167 }, { "epoch": 0.14, "grad_norm": 1.0504810810089111, "learning_rate": 9.700953464569698e-06, "loss": 0.5869, "step": 2168 }, { "epoch": 0.14, "grad_norm": 1.0147689580917358, "learning_rate": 9.700603862302078e-06, "loss": 0.5408, "step": 2169 }, { "epoch": 0.14, "grad_norm": 1.0584076642990112, "learning_rate": 9.700254062108383e-06, "loss": 0.5806, "step": 2170 }, { "epoch": 0.14, "grad_norm": 1.0558146238327026, "learning_rate": 9.69990406400334e-06, "loss": 0.5633, "step": 2171 }, { "epoch": 0.14, "grad_norm": 1.0875517129898071, "learning_rate": 9.699553868001688e-06, "loss": 0.6023, "step": 2172 }, { "epoch": 0.14, "grad_norm": 1.1330591440200806, "learning_rate": 9.699203474118168e-06, "loss": 0.5357, "step": 2173 }, { "epoch": 0.14, "grad_norm": 1.0651309490203857, "learning_rate": 9.69885288236754e-06, "loss": 0.5559, "step": 2174 }, { "epoch": 0.14, "grad_norm": 1.2233500480651855, "learning_rate": 9.698502092764562e-06, "loss": 0.5752, "step": 2175 }, { "epoch": 0.14, "grad_norm": 1.0270609855651855, "learning_rate": 9.698151105324006e-06, "loss": 0.5416, "step": 2176 }, { "epoch": 0.14, "grad_norm": 1.0004528760910034, "learning_rate": 9.697799920060651e-06, "loss": 0.5924, "step": 2177 }, { "epoch": 0.14, "grad_norm": 1.0074985027313232, "learning_rate": 9.697448536989284e-06, "loss": 0.5565, "step": 2178 }, { "epoch": 0.14, "grad_norm": 1.044296145439148, "learning_rate": 9.697096956124699e-06, "loss": 0.5802, "step": 2179 }, { "epoch": 0.14, "grad_norm": 1.1109081506729126, "learning_rate": 9.696745177481703e-06, "loss": 0.5349, "step": 2180 }, { "epoch": 0.14, "grad_norm": 0.9400250911712646, "learning_rate": 9.696393201075105e-06, "loss": 0.5466, "step": 2181 }, { "epoch": 0.14, "grad_norm": 1.1142268180847168, "learning_rate": 9.696041026919727e-06, "loss": 0.535, "step": 2182 }, { "epoch": 0.14, "grad_norm": 1.0524247884750366, "learning_rate": 9.695688655030397e-06, "loss": 0.616, "step": 2183 }, { "epoch": 0.14, "grad_norm": 1.0073829889297485, "learning_rate": 9.695336085421953e-06, "loss": 0.5719, "step": 2184 }, { "epoch": 0.14, "grad_norm": 1.0474032163619995, "learning_rate": 9.694983318109242e-06, "loss": 0.5499, "step": 2185 }, { "epoch": 0.14, "grad_norm": 1.0479087829589844, "learning_rate": 9.694630353107115e-06, "loss": 0.616, "step": 2186 }, { "epoch": 0.14, "grad_norm": 1.073878288269043, "learning_rate": 9.694277190430437e-06, "loss": 0.5741, "step": 2187 }, { "epoch": 0.14, "grad_norm": 1.0107192993164062, "learning_rate": 9.693923830094074e-06, "loss": 0.5786, "step": 2188 }, { "epoch": 0.14, "grad_norm": 1.0605086088180542, "learning_rate": 9.693570272112908e-06, "loss": 0.5929, "step": 2189 }, { "epoch": 0.14, "grad_norm": 1.0389643907546997, "learning_rate": 9.693216516501827e-06, "loss": 0.5538, "step": 2190 }, { "epoch": 0.14, "grad_norm": 1.0293034315109253, "learning_rate": 9.692862563275725e-06, "loss": 0.6193, "step": 2191 }, { "epoch": 0.14, "grad_norm": 1.003514051437378, "learning_rate": 9.692508412449505e-06, "loss": 0.5897, "step": 2192 }, { "epoch": 0.14, "grad_norm": 1.0086450576782227, "learning_rate": 9.692154064038079e-06, "loss": 0.5623, "step": 2193 }, { "epoch": 0.14, "grad_norm": 1.0342178344726562, "learning_rate": 9.691799518056369e-06, "loss": 0.6051, "step": 2194 }, { "epoch": 0.14, "grad_norm": 1.0912590026855469, "learning_rate": 9.691444774519302e-06, "loss": 0.5518, "step": 2195 }, { "epoch": 0.14, "grad_norm": 1.103579044342041, "learning_rate": 9.691089833441818e-06, "loss": 0.6017, "step": 2196 }, { "epoch": 0.14, "grad_norm": 1.0409106016159058, "learning_rate": 9.69073469483886e-06, "loss": 0.5614, "step": 2197 }, { "epoch": 0.14, "grad_norm": 1.062774896621704, "learning_rate": 9.690379358725379e-06, "loss": 0.6216, "step": 2198 }, { "epoch": 0.14, "grad_norm": 1.027420997619629, "learning_rate": 9.69002382511634e-06, "loss": 0.5999, "step": 2199 }, { "epoch": 0.14, "grad_norm": 1.0598483085632324, "learning_rate": 9.689668094026716e-06, "loss": 0.5388, "step": 2200 }, { "epoch": 0.14, "grad_norm": 1.1370553970336914, "learning_rate": 9.689312165471483e-06, "loss": 0.5359, "step": 2201 }, { "epoch": 0.14, "grad_norm": 1.0518770217895508, "learning_rate": 9.688956039465626e-06, "loss": 0.567, "step": 2202 }, { "epoch": 0.14, "grad_norm": 1.046511173248291, "learning_rate": 9.688599716024141e-06, "loss": 0.569, "step": 2203 }, { "epoch": 0.14, "grad_norm": 1.0349102020263672, "learning_rate": 9.688243195162033e-06, "loss": 0.5654, "step": 2204 }, { "epoch": 0.14, "grad_norm": 1.0427666902542114, "learning_rate": 9.687886476894314e-06, "loss": 0.5174, "step": 2205 }, { "epoch": 0.14, "grad_norm": 1.213161587715149, "learning_rate": 9.687529561236004e-06, "loss": 0.6052, "step": 2206 }, { "epoch": 0.14, "grad_norm": 1.065405249595642, "learning_rate": 9.687172448202129e-06, "loss": 0.5917, "step": 2207 }, { "epoch": 0.14, "grad_norm": 0.9554116725921631, "learning_rate": 9.68681513780773e-06, "loss": 0.5801, "step": 2208 }, { "epoch": 0.14, "grad_norm": 1.1155883073806763, "learning_rate": 9.686457630067848e-06, "loss": 0.5936, "step": 2209 }, { "epoch": 0.14, "grad_norm": 1.0245048999786377, "learning_rate": 9.686099924997538e-06, "loss": 0.5474, "step": 2210 }, { "epoch": 0.14, "grad_norm": 0.9610018730163574, "learning_rate": 9.685742022611864e-06, "loss": 0.5173, "step": 2211 }, { "epoch": 0.14, "grad_norm": 1.137954592704773, "learning_rate": 9.685383922925892e-06, "loss": 0.5747, "step": 2212 }, { "epoch": 0.14, "grad_norm": 1.0669362545013428, "learning_rate": 9.685025625954703e-06, "loss": 0.5971, "step": 2213 }, { "epoch": 0.14, "grad_norm": 1.135469675064087, "learning_rate": 9.684667131713381e-06, "loss": 0.5644, "step": 2214 }, { "epoch": 0.14, "grad_norm": 1.065121054649353, "learning_rate": 9.684308440217026e-06, "loss": 0.6075, "step": 2215 }, { "epoch": 0.14, "grad_norm": 1.0996694564819336, "learning_rate": 9.683949551480736e-06, "loss": 0.515, "step": 2216 }, { "epoch": 0.14, "grad_norm": 1.113033413887024, "learning_rate": 9.683590465519625e-06, "loss": 0.6083, "step": 2217 }, { "epoch": 0.14, "grad_norm": 1.0447050333023071, "learning_rate": 9.683231182348813e-06, "loss": 0.5751, "step": 2218 }, { "epoch": 0.14, "grad_norm": 0.9936871528625488, "learning_rate": 9.682871701983428e-06, "loss": 0.5505, "step": 2219 }, { "epoch": 0.14, "grad_norm": 1.1078193187713623, "learning_rate": 9.682512024438607e-06, "loss": 0.5774, "step": 2220 }, { "epoch": 0.14, "grad_norm": 1.0609996318817139, "learning_rate": 9.682152149729491e-06, "loss": 0.6056, "step": 2221 }, { "epoch": 0.14, "grad_norm": 1.070388674736023, "learning_rate": 9.681792077871238e-06, "loss": 0.5829, "step": 2222 }, { "epoch": 0.14, "grad_norm": 1.1255348920822144, "learning_rate": 9.681431808879007e-06, "loss": 0.5679, "step": 2223 }, { "epoch": 0.14, "grad_norm": 1.1114442348480225, "learning_rate": 9.681071342767967e-06, "loss": 0.592, "step": 2224 }, { "epoch": 0.14, "grad_norm": 1.1285667419433594, "learning_rate": 9.6807106795533e-06, "loss": 0.5819, "step": 2225 }, { "epoch": 0.14, "grad_norm": 1.109213948249817, "learning_rate": 9.680349819250185e-06, "loss": 0.5491, "step": 2226 }, { "epoch": 0.14, "grad_norm": 1.0387630462646484, "learning_rate": 9.679988761873824e-06, "loss": 0.5465, "step": 2227 }, { "epoch": 0.14, "grad_norm": 1.0273832082748413, "learning_rate": 9.679627507439416e-06, "loss": 0.581, "step": 2228 }, { "epoch": 0.14, "grad_norm": 1.0205281972885132, "learning_rate": 9.679266055962174e-06, "loss": 0.61, "step": 2229 }, { "epoch": 0.14, "grad_norm": 0.9858378171920776, "learning_rate": 9.678904407457314e-06, "loss": 0.555, "step": 2230 }, { "epoch": 0.14, "grad_norm": 1.1034581661224365, "learning_rate": 9.678542561940067e-06, "loss": 0.6115, "step": 2231 }, { "epoch": 0.14, "grad_norm": 1.1124613285064697, "learning_rate": 9.678180519425669e-06, "loss": 0.5678, "step": 2232 }, { "epoch": 0.14, "grad_norm": 1.0323601961135864, "learning_rate": 9.677818279929363e-06, "loss": 0.5149, "step": 2233 }, { "epoch": 0.14, "grad_norm": 0.946808934211731, "learning_rate": 9.677455843466402e-06, "loss": 0.5688, "step": 2234 }, { "epoch": 0.14, "grad_norm": 1.0611376762390137, "learning_rate": 9.677093210052048e-06, "loss": 0.5322, "step": 2235 }, { "epoch": 0.14, "grad_norm": 1.0465165376663208, "learning_rate": 9.676730379701567e-06, "loss": 0.5893, "step": 2236 }, { "epoch": 0.14, "grad_norm": 1.0118218660354614, "learning_rate": 9.676367352430242e-06, "loss": 0.6004, "step": 2237 }, { "epoch": 0.14, "grad_norm": 1.0092300176620483, "learning_rate": 9.676004128253354e-06, "loss": 0.571, "step": 2238 }, { "epoch": 0.14, "grad_norm": 1.0186326503753662, "learning_rate": 9.675640707186199e-06, "loss": 0.5769, "step": 2239 }, { "epoch": 0.14, "grad_norm": 1.0467184782028198, "learning_rate": 9.67527708924408e-06, "loss": 0.5376, "step": 2240 }, { "epoch": 0.14, "grad_norm": 1.021779179573059, "learning_rate": 9.674913274442305e-06, "loss": 0.6079, "step": 2241 }, { "epoch": 0.14, "grad_norm": 0.9716930985450745, "learning_rate": 9.674549262796196e-06, "loss": 0.5796, "step": 2242 }, { "epoch": 0.14, "grad_norm": 0.9714882969856262, "learning_rate": 9.674185054321079e-06, "loss": 0.5152, "step": 2243 }, { "epoch": 0.14, "grad_norm": 1.0956681966781616, "learning_rate": 9.67382064903229e-06, "loss": 0.5704, "step": 2244 }, { "epoch": 0.14, "grad_norm": 1.1594781875610352, "learning_rate": 9.67345604694517e-06, "loss": 0.5553, "step": 2245 }, { "epoch": 0.14, "grad_norm": 1.062707543373108, "learning_rate": 9.673091248075077e-06, "loss": 0.5664, "step": 2246 }, { "epoch": 0.14, "grad_norm": 1.0908327102661133, "learning_rate": 9.672726252437368e-06, "loss": 0.566, "step": 2247 }, { "epoch": 0.14, "grad_norm": 1.0008841753005981, "learning_rate": 9.67236106004741e-06, "loss": 0.5607, "step": 2248 }, { "epoch": 0.14, "grad_norm": 0.9752715229988098, "learning_rate": 9.671995670920582e-06, "loss": 0.5441, "step": 2249 }, { "epoch": 0.14, "grad_norm": 1.0136269330978394, "learning_rate": 9.671630085072268e-06, "loss": 0.5602, "step": 2250 }, { "epoch": 0.14, "grad_norm": 1.0056018829345703, "learning_rate": 9.671264302517864e-06, "loss": 0.5857, "step": 2251 }, { "epoch": 0.14, "grad_norm": 0.9630345106124878, "learning_rate": 9.67089832327277e-06, "loss": 0.5591, "step": 2252 }, { "epoch": 0.14, "grad_norm": 1.1637920141220093, "learning_rate": 9.670532147352399e-06, "loss": 0.6148, "step": 2253 }, { "epoch": 0.14, "grad_norm": 1.0402402877807617, "learning_rate": 9.670165774772164e-06, "loss": 0.5934, "step": 2254 }, { "epoch": 0.14, "grad_norm": 1.0115119218826294, "learning_rate": 9.669799205547494e-06, "loss": 0.5944, "step": 2255 }, { "epoch": 0.14, "grad_norm": 1.013441562652588, "learning_rate": 9.669432439693827e-06, "loss": 0.6018, "step": 2256 }, { "epoch": 0.14, "grad_norm": 1.0764124393463135, "learning_rate": 9.669065477226602e-06, "loss": 0.612, "step": 2257 }, { "epoch": 0.14, "grad_norm": 1.0073105096817017, "learning_rate": 9.668698318161271e-06, "loss": 0.5506, "step": 2258 }, { "epoch": 0.14, "grad_norm": 1.0922359228134155, "learning_rate": 9.668330962513297e-06, "loss": 0.5587, "step": 2259 }, { "epoch": 0.14, "grad_norm": 1.2077372074127197, "learning_rate": 9.667963410298147e-06, "loss": 0.5915, "step": 2260 }, { "epoch": 0.14, "grad_norm": 1.0957609415054321, "learning_rate": 9.667595661531294e-06, "loss": 0.5817, "step": 2261 }, { "epoch": 0.14, "grad_norm": 0.9836629033088684, "learning_rate": 9.667227716228228e-06, "loss": 0.5945, "step": 2262 }, { "epoch": 0.14, "grad_norm": 0.9690822958946228, "learning_rate": 9.666859574404434e-06, "loss": 0.5601, "step": 2263 }, { "epoch": 0.14, "grad_norm": 1.0340832471847534, "learning_rate": 9.666491236075423e-06, "loss": 0.5599, "step": 2264 }, { "epoch": 0.14, "grad_norm": 1.0833946466445923, "learning_rate": 9.666122701256697e-06, "loss": 0.6062, "step": 2265 }, { "epoch": 0.14, "grad_norm": 1.0714364051818848, "learning_rate": 9.665753969963779e-06, "loss": 0.5708, "step": 2266 }, { "epoch": 0.14, "grad_norm": 1.003904104232788, "learning_rate": 9.66538504221219e-06, "loss": 0.6108, "step": 2267 }, { "epoch": 0.14, "grad_norm": 0.98911052942276, "learning_rate": 9.665015918017467e-06, "loss": 0.5416, "step": 2268 }, { "epoch": 0.14, "grad_norm": 1.054664969444275, "learning_rate": 9.664646597395151e-06, "loss": 0.5697, "step": 2269 }, { "epoch": 0.14, "grad_norm": 1.0036427974700928, "learning_rate": 9.664277080360796e-06, "loss": 0.6049, "step": 2270 }, { "epoch": 0.14, "grad_norm": 1.0508859157562256, "learning_rate": 9.663907366929958e-06, "loss": 0.5559, "step": 2271 }, { "epoch": 0.14, "grad_norm": 1.055727243423462, "learning_rate": 9.663537457118206e-06, "loss": 0.6226, "step": 2272 }, { "epoch": 0.14, "grad_norm": 1.1699100732803345, "learning_rate": 9.663167350941114e-06, "loss": 0.6372, "step": 2273 }, { "epoch": 0.14, "grad_norm": 1.0462695360183716, "learning_rate": 9.662797048414267e-06, "loss": 0.6348, "step": 2274 }, { "epoch": 0.14, "grad_norm": 1.1070884466171265, "learning_rate": 9.662426549553257e-06, "loss": 0.5869, "step": 2275 }, { "epoch": 0.14, "grad_norm": 1.0312139987945557, "learning_rate": 9.662055854373684e-06, "loss": 0.6051, "step": 2276 }, { "epoch": 0.14, "grad_norm": 0.9913875460624695, "learning_rate": 9.661684962891158e-06, "loss": 0.5672, "step": 2277 }, { "epoch": 0.14, "grad_norm": 0.9986532926559448, "learning_rate": 9.661313875121294e-06, "loss": 0.5188, "step": 2278 }, { "epoch": 0.14, "grad_norm": 1.0187432765960693, "learning_rate": 9.66094259107972e-06, "loss": 0.6049, "step": 2279 }, { "epoch": 0.14, "grad_norm": 1.1970367431640625, "learning_rate": 9.660571110782066e-06, "loss": 0.5716, "step": 2280 }, { "epoch": 0.14, "grad_norm": 1.0717835426330566, "learning_rate": 9.660199434243977e-06, "loss": 0.5828, "step": 2281 }, { "epoch": 0.14, "grad_norm": 1.001078486442566, "learning_rate": 9.6598275614811e-06, "loss": 0.5683, "step": 2282 }, { "epoch": 0.14, "grad_norm": 1.0684194564819336, "learning_rate": 9.659455492509096e-06, "loss": 0.5513, "step": 2283 }, { "epoch": 0.14, "grad_norm": 1.065119981765747, "learning_rate": 9.659083227343628e-06, "loss": 0.5637, "step": 2284 }, { "epoch": 0.14, "grad_norm": 1.1782958507537842, "learning_rate": 9.658710766000375e-06, "loss": 0.5661, "step": 2285 }, { "epoch": 0.14, "grad_norm": 1.0934861898422241, "learning_rate": 9.658338108495018e-06, "loss": 0.5459, "step": 2286 }, { "epoch": 0.14, "grad_norm": 1.0259177684783936, "learning_rate": 9.65796525484325e-06, "loss": 0.5708, "step": 2287 }, { "epoch": 0.14, "grad_norm": 1.0861341953277588, "learning_rate": 9.657592205060766e-06, "loss": 0.5637, "step": 2288 }, { "epoch": 0.15, "grad_norm": 1.0681545734405518, "learning_rate": 9.657218959163278e-06, "loss": 0.5259, "step": 2289 }, { "epoch": 0.15, "grad_norm": 1.044212818145752, "learning_rate": 9.656845517166502e-06, "loss": 0.5527, "step": 2290 }, { "epoch": 0.15, "grad_norm": 1.1174644231796265, "learning_rate": 9.656471879086158e-06, "loss": 0.57, "step": 2291 }, { "epoch": 0.15, "grad_norm": 1.0047990083694458, "learning_rate": 9.656098044937985e-06, "loss": 0.5822, "step": 2292 }, { "epoch": 0.15, "grad_norm": 1.07584547996521, "learning_rate": 9.65572401473772e-06, "loss": 0.6041, "step": 2293 }, { "epoch": 0.15, "grad_norm": 1.054582118988037, "learning_rate": 9.655349788501112e-06, "loss": 0.5843, "step": 2294 }, { "epoch": 0.15, "grad_norm": 1.116593599319458, "learning_rate": 9.654975366243919e-06, "loss": 0.5613, "step": 2295 }, { "epoch": 0.15, "grad_norm": 0.9938434362411499, "learning_rate": 9.654600747981908e-06, "loss": 0.5017, "step": 2296 }, { "epoch": 0.15, "grad_norm": 1.0228008031845093, "learning_rate": 9.654225933730852e-06, "loss": 0.5806, "step": 2297 }, { "epoch": 0.15, "grad_norm": 1.0263339281082153, "learning_rate": 9.653850923506532e-06, "loss": 0.5742, "step": 2298 }, { "epoch": 0.15, "grad_norm": 1.0496147871017456, "learning_rate": 9.653475717324739e-06, "loss": 0.5722, "step": 2299 }, { "epoch": 0.15, "grad_norm": 1.0120099782943726, "learning_rate": 9.65310031520127e-06, "loss": 0.6022, "step": 2300 }, { "epoch": 0.15, "grad_norm": 1.0658786296844482, "learning_rate": 9.652724717151938e-06, "loss": 0.6067, "step": 2301 }, { "epoch": 0.15, "grad_norm": 1.041401743888855, "learning_rate": 9.652348923192551e-06, "loss": 0.5763, "step": 2302 }, { "epoch": 0.15, "grad_norm": 0.9860716462135315, "learning_rate": 9.651972933338935e-06, "loss": 0.5306, "step": 2303 }, { "epoch": 0.15, "grad_norm": 1.0807832479476929, "learning_rate": 9.651596747606924e-06, "loss": 0.6203, "step": 2304 }, { "epoch": 0.15, "grad_norm": 1.0927594900131226, "learning_rate": 9.651220366012354e-06, "loss": 0.566, "step": 2305 }, { "epoch": 0.15, "grad_norm": 1.0677021741867065, "learning_rate": 9.650843788571076e-06, "loss": 0.582, "step": 2306 }, { "epoch": 0.15, "grad_norm": 1.1915913820266724, "learning_rate": 9.650467015298943e-06, "loss": 0.5704, "step": 2307 }, { "epoch": 0.15, "grad_norm": 1.0427172183990479, "learning_rate": 9.650090046211822e-06, "loss": 0.6025, "step": 2308 }, { "epoch": 0.15, "grad_norm": 0.9778934121131897, "learning_rate": 9.649712881325587e-06, "loss": 0.518, "step": 2309 }, { "epoch": 0.15, "grad_norm": 1.0721766948699951, "learning_rate": 9.649335520656118e-06, "loss": 0.6194, "step": 2310 }, { "epoch": 0.15, "grad_norm": 1.088017463684082, "learning_rate": 9.648957964219303e-06, "loss": 0.6091, "step": 2311 }, { "epoch": 0.15, "grad_norm": 1.0366744995117188, "learning_rate": 9.64858021203104e-06, "loss": 0.589, "step": 2312 }, { "epoch": 0.15, "grad_norm": 0.9899066686630249, "learning_rate": 9.648202264107239e-06, "loss": 0.5605, "step": 2313 }, { "epoch": 0.15, "grad_norm": 1.0178853273391724, "learning_rate": 9.647824120463806e-06, "loss": 0.5583, "step": 2314 }, { "epoch": 0.15, "grad_norm": 1.095003604888916, "learning_rate": 9.64744578111667e-06, "loss": 0.5185, "step": 2315 }, { "epoch": 0.15, "grad_norm": 1.1024019718170166, "learning_rate": 9.647067246081761e-06, "loss": 0.5716, "step": 2316 }, { "epoch": 0.15, "grad_norm": 1.0408539772033691, "learning_rate": 9.646688515375014e-06, "loss": 0.5683, "step": 2317 }, { "epoch": 0.15, "grad_norm": 1.0439146757125854, "learning_rate": 9.646309589012379e-06, "loss": 0.547, "step": 2318 }, { "epoch": 0.15, "grad_norm": 0.9975500106811523, "learning_rate": 9.64593046700981e-06, "loss": 0.5251, "step": 2319 }, { "epoch": 0.15, "grad_norm": 1.0552396774291992, "learning_rate": 9.645551149383272e-06, "loss": 0.5575, "step": 2320 }, { "epoch": 0.15, "grad_norm": 0.975261926651001, "learning_rate": 9.645171636148736e-06, "loss": 0.5052, "step": 2321 }, { "epoch": 0.15, "grad_norm": 1.0283842086791992, "learning_rate": 9.644791927322182e-06, "loss": 0.5466, "step": 2322 }, { "epoch": 0.15, "grad_norm": 1.044140338897705, "learning_rate": 9.644412022919597e-06, "loss": 0.5374, "step": 2323 }, { "epoch": 0.15, "grad_norm": 1.0010985136032104, "learning_rate": 9.644031922956979e-06, "loss": 0.5567, "step": 2324 }, { "epoch": 0.15, "grad_norm": 1.0314868688583374, "learning_rate": 9.64365162745033e-06, "loss": 0.5878, "step": 2325 }, { "epoch": 0.15, "grad_norm": 0.9741953015327454, "learning_rate": 9.643271136415668e-06, "loss": 0.5589, "step": 2326 }, { "epoch": 0.15, "grad_norm": 1.0657076835632324, "learning_rate": 9.642890449869008e-06, "loss": 0.5935, "step": 2327 }, { "epoch": 0.15, "grad_norm": 0.9860471487045288, "learning_rate": 9.642509567826386e-06, "loss": 0.5435, "step": 2328 }, { "epoch": 0.15, "grad_norm": 1.027422308921814, "learning_rate": 9.642128490303834e-06, "loss": 0.5383, "step": 2329 }, { "epoch": 0.15, "grad_norm": 0.9340013265609741, "learning_rate": 9.6417472173174e-06, "loss": 0.5688, "step": 2330 }, { "epoch": 0.15, "grad_norm": 0.9668096899986267, "learning_rate": 9.64136574888314e-06, "loss": 0.5719, "step": 2331 }, { "epoch": 0.15, "grad_norm": 1.0264288187026978, "learning_rate": 9.640984085017113e-06, "loss": 0.5957, "step": 2332 }, { "epoch": 0.15, "grad_norm": 0.9968881011009216, "learning_rate": 9.640602225735391e-06, "loss": 0.6009, "step": 2333 }, { "epoch": 0.15, "grad_norm": 0.9973297119140625, "learning_rate": 9.640220171054054e-06, "loss": 0.5623, "step": 2334 }, { "epoch": 0.15, "grad_norm": 1.2151033878326416, "learning_rate": 9.639837920989188e-06, "loss": 0.5627, "step": 2335 }, { "epoch": 0.15, "grad_norm": 1.1902861595153809, "learning_rate": 9.639455475556887e-06, "loss": 0.5959, "step": 2336 }, { "epoch": 0.15, "grad_norm": 1.0719233751296997, "learning_rate": 9.639072834773254e-06, "loss": 0.5984, "step": 2337 }, { "epoch": 0.15, "grad_norm": 1.039047360420227, "learning_rate": 9.638689998654404e-06, "loss": 0.5333, "step": 2338 }, { "epoch": 0.15, "grad_norm": 1.0445502996444702, "learning_rate": 9.638306967216453e-06, "loss": 0.584, "step": 2339 }, { "epoch": 0.15, "grad_norm": 0.9414918422698975, "learning_rate": 9.637923740475534e-06, "loss": 0.5178, "step": 2340 }, { "epoch": 0.15, "grad_norm": 1.0898582935333252, "learning_rate": 9.637540318447778e-06, "loss": 0.5821, "step": 2341 }, { "epoch": 0.15, "grad_norm": 1.0142707824707031, "learning_rate": 9.637156701149333e-06, "loss": 0.5971, "step": 2342 }, { "epoch": 0.15, "grad_norm": 1.0771269798278809, "learning_rate": 9.636772888596352e-06, "loss": 0.5993, "step": 2343 }, { "epoch": 0.15, "grad_norm": 0.9535529613494873, "learning_rate": 9.636388880804991e-06, "loss": 0.5602, "step": 2344 }, { "epoch": 0.15, "grad_norm": 1.0608489513397217, "learning_rate": 9.636004677791427e-06, "loss": 0.5983, "step": 2345 }, { "epoch": 0.15, "grad_norm": 1.132565975189209, "learning_rate": 9.635620279571833e-06, "loss": 0.6043, "step": 2346 }, { "epoch": 0.15, "grad_norm": 1.0791984796524048, "learning_rate": 9.635235686162395e-06, "loss": 0.5979, "step": 2347 }, { "epoch": 0.15, "grad_norm": 0.9959856867790222, "learning_rate": 9.634850897579304e-06, "loss": 0.5548, "step": 2348 }, { "epoch": 0.15, "grad_norm": 1.0113996267318726, "learning_rate": 9.63446591383877e-06, "loss": 0.5516, "step": 2349 }, { "epoch": 0.15, "grad_norm": 1.0375350713729858, "learning_rate": 9.634080734956993e-06, "loss": 0.5548, "step": 2350 }, { "epoch": 0.15, "grad_norm": 1.0703330039978027, "learning_rate": 9.633695360950202e-06, "loss": 0.5933, "step": 2351 }, { "epoch": 0.15, "grad_norm": 1.0566142797470093, "learning_rate": 9.633309791834617e-06, "loss": 0.5393, "step": 2352 }, { "epoch": 0.15, "grad_norm": 1.2384819984436035, "learning_rate": 9.632924027626474e-06, "loss": 0.5905, "step": 2353 }, { "epoch": 0.15, "grad_norm": 1.102569341659546, "learning_rate": 9.632538068342018e-06, "loss": 0.5665, "step": 2354 }, { "epoch": 0.15, "grad_norm": 1.0893784761428833, "learning_rate": 9.632151913997498e-06, "loss": 0.5453, "step": 2355 }, { "epoch": 0.15, "grad_norm": 1.1036884784698486, "learning_rate": 9.631765564609177e-06, "loss": 0.5205, "step": 2356 }, { "epoch": 0.15, "grad_norm": 1.1188225746154785, "learning_rate": 9.63137902019332e-06, "loss": 0.5471, "step": 2357 }, { "epoch": 0.15, "grad_norm": 1.0827534198760986, "learning_rate": 9.630992280766202e-06, "loss": 0.536, "step": 2358 }, { "epoch": 0.15, "grad_norm": 1.1823593378067017, "learning_rate": 9.630605346344113e-06, "loss": 0.5388, "step": 2359 }, { "epoch": 0.15, "grad_norm": 1.026418924331665, "learning_rate": 9.630218216943338e-06, "loss": 0.6167, "step": 2360 }, { "epoch": 0.15, "grad_norm": 1.1206985712051392, "learning_rate": 9.629830892580183e-06, "loss": 0.6046, "step": 2361 }, { "epoch": 0.15, "grad_norm": 1.1533458232879639, "learning_rate": 9.629443373270954e-06, "loss": 0.5696, "step": 2362 }, { "epoch": 0.15, "grad_norm": 1.19341242313385, "learning_rate": 9.62905565903197e-06, "loss": 0.6008, "step": 2363 }, { "epoch": 0.15, "grad_norm": 1.0197237730026245, "learning_rate": 9.628667749879555e-06, "loss": 0.5629, "step": 2364 }, { "epoch": 0.15, "grad_norm": 1.0176202058792114, "learning_rate": 9.628279645830044e-06, "loss": 0.5628, "step": 2365 }, { "epoch": 0.15, "grad_norm": 1.0141323804855347, "learning_rate": 9.627891346899775e-06, "loss": 0.5293, "step": 2366 }, { "epoch": 0.15, "grad_norm": 1.0090693235397339, "learning_rate": 9.627502853105104e-06, "loss": 0.5858, "step": 2367 }, { "epoch": 0.15, "grad_norm": 1.059515357017517, "learning_rate": 9.627114164462385e-06, "loss": 0.5623, "step": 2368 }, { "epoch": 0.15, "grad_norm": 1.0840575695037842, "learning_rate": 9.626725280987985e-06, "loss": 0.6196, "step": 2369 }, { "epoch": 0.15, "grad_norm": 1.001650333404541, "learning_rate": 9.626336202698277e-06, "loss": 0.5397, "step": 2370 }, { "epoch": 0.15, "grad_norm": 1.0970371961593628, "learning_rate": 9.625946929609647e-06, "loss": 0.537, "step": 2371 }, { "epoch": 0.15, "grad_norm": 1.096764087677002, "learning_rate": 9.625557461738484e-06, "loss": 0.6295, "step": 2372 }, { "epoch": 0.15, "grad_norm": 1.0875904560089111, "learning_rate": 9.625167799101188e-06, "loss": 0.6304, "step": 2373 }, { "epoch": 0.15, "grad_norm": 1.1247825622558594, "learning_rate": 9.624777941714165e-06, "loss": 0.6608, "step": 2374 }, { "epoch": 0.15, "grad_norm": 1.0841033458709717, "learning_rate": 9.624387889593832e-06, "loss": 0.5454, "step": 2375 }, { "epoch": 0.15, "grad_norm": 0.9857283234596252, "learning_rate": 9.62399764275661e-06, "loss": 0.5596, "step": 2376 }, { "epoch": 0.15, "grad_norm": 1.0509493350982666, "learning_rate": 9.623607201218934e-06, "loss": 0.587, "step": 2377 }, { "epoch": 0.15, "grad_norm": 1.0771265029907227, "learning_rate": 9.623216564997244e-06, "loss": 0.6089, "step": 2378 }, { "epoch": 0.15, "grad_norm": 0.9977293014526367, "learning_rate": 9.622825734107987e-06, "loss": 0.5601, "step": 2379 }, { "epoch": 0.15, "grad_norm": 1.1041595935821533, "learning_rate": 9.62243470856762e-06, "loss": 0.6054, "step": 2380 }, { "epoch": 0.15, "grad_norm": 1.0072989463806152, "learning_rate": 9.622043488392607e-06, "loss": 0.5584, "step": 2381 }, { "epoch": 0.15, "grad_norm": 0.9942018985748291, "learning_rate": 9.621652073599423e-06, "loss": 0.5868, "step": 2382 }, { "epoch": 0.15, "grad_norm": 0.9523398280143738, "learning_rate": 9.621260464204548e-06, "loss": 0.5531, "step": 2383 }, { "epoch": 0.15, "grad_norm": 0.998120129108429, "learning_rate": 9.620868660224468e-06, "loss": 0.5677, "step": 2384 }, { "epoch": 0.15, "grad_norm": 1.0065315961837769, "learning_rate": 9.620476661675685e-06, "loss": 0.5598, "step": 2385 }, { "epoch": 0.15, "grad_norm": 1.123533844947815, "learning_rate": 9.620084468574704e-06, "loss": 0.5741, "step": 2386 }, { "epoch": 0.15, "grad_norm": 1.026013731956482, "learning_rate": 9.619692080938039e-06, "loss": 0.532, "step": 2387 }, { "epoch": 0.15, "grad_norm": 1.102170467376709, "learning_rate": 9.61929949878221e-06, "loss": 0.608, "step": 2388 }, { "epoch": 0.15, "grad_norm": 1.0668294429779053, "learning_rate": 9.618906722123748e-06, "loss": 0.5807, "step": 2389 }, { "epoch": 0.15, "grad_norm": 1.1137778759002686, "learning_rate": 9.618513750979193e-06, "loss": 0.5795, "step": 2390 }, { "epoch": 0.15, "grad_norm": 1.0429565906524658, "learning_rate": 9.61812058536509e-06, "loss": 0.5191, "step": 2391 }, { "epoch": 0.15, "grad_norm": 1.0552812814712524, "learning_rate": 9.617727225297994e-06, "loss": 0.5823, "step": 2392 }, { "epoch": 0.15, "grad_norm": 1.0765811204910278, "learning_rate": 9.617333670794468e-06, "loss": 0.5655, "step": 2393 }, { "epoch": 0.15, "grad_norm": 1.030846357345581, "learning_rate": 9.616939921871087e-06, "loss": 0.5564, "step": 2394 }, { "epoch": 0.15, "grad_norm": 1.1203022003173828, "learning_rate": 9.616545978544424e-06, "loss": 0.5641, "step": 2395 }, { "epoch": 0.15, "grad_norm": 1.0004371404647827, "learning_rate": 9.616151840831069e-06, "loss": 0.5221, "step": 2396 }, { "epoch": 0.15, "grad_norm": 1.0611863136291504, "learning_rate": 9.61575750874762e-06, "loss": 0.5545, "step": 2397 }, { "epoch": 0.15, "grad_norm": 0.9985150098800659, "learning_rate": 9.615362982310679e-06, "loss": 0.4831, "step": 2398 }, { "epoch": 0.15, "grad_norm": 1.041325330734253, "learning_rate": 9.614968261536858e-06, "loss": 0.5563, "step": 2399 }, { "epoch": 0.15, "grad_norm": 1.217852234840393, "learning_rate": 9.61457334644278e-06, "loss": 0.617, "step": 2400 }, { "epoch": 0.15, "grad_norm": 1.1040374040603638, "learning_rate": 9.61417823704507e-06, "loss": 0.5679, "step": 2401 }, { "epoch": 0.15, "grad_norm": 1.114501953125, "learning_rate": 9.613782933360365e-06, "loss": 0.608, "step": 2402 }, { "epoch": 0.15, "grad_norm": 1.0527658462524414, "learning_rate": 9.613387435405312e-06, "loss": 0.4935, "step": 2403 }, { "epoch": 0.15, "grad_norm": 1.1038916110992432, "learning_rate": 9.612991743196562e-06, "loss": 0.5531, "step": 2404 }, { "epoch": 0.15, "grad_norm": 0.9707751870155334, "learning_rate": 9.612595856750776e-06, "loss": 0.5677, "step": 2405 }, { "epoch": 0.15, "grad_norm": 1.034013271331787, "learning_rate": 9.612199776084627e-06, "loss": 0.5734, "step": 2406 }, { "epoch": 0.15, "grad_norm": 1.1263413429260254, "learning_rate": 9.611803501214789e-06, "loss": 0.6142, "step": 2407 }, { "epoch": 0.15, "grad_norm": 0.9992231726646423, "learning_rate": 9.61140703215795e-06, "loss": 0.5411, "step": 2408 }, { "epoch": 0.15, "grad_norm": 1.0508081912994385, "learning_rate": 9.611010368930801e-06, "loss": 0.5605, "step": 2409 }, { "epoch": 0.15, "grad_norm": 1.0325145721435547, "learning_rate": 9.610613511550047e-06, "loss": 0.5872, "step": 2410 }, { "epoch": 0.15, "grad_norm": 1.0452853441238403, "learning_rate": 9.610216460032398e-06, "loss": 0.5944, "step": 2411 }, { "epoch": 0.15, "grad_norm": 1.0934975147247314, "learning_rate": 9.60981921439457e-06, "loss": 0.6453, "step": 2412 }, { "epoch": 0.15, "grad_norm": 1.0208979845046997, "learning_rate": 9.609421774653291e-06, "loss": 0.622, "step": 2413 }, { "epoch": 0.15, "grad_norm": 1.0178954601287842, "learning_rate": 9.609024140825299e-06, "loss": 0.5192, "step": 2414 }, { "epoch": 0.15, "grad_norm": 1.1866627931594849, "learning_rate": 9.608626312927331e-06, "loss": 0.6222, "step": 2415 }, { "epoch": 0.15, "grad_norm": 1.0078473091125488, "learning_rate": 9.608228290976143e-06, "loss": 0.5801, "step": 2416 }, { "epoch": 0.15, "grad_norm": 1.073379397392273, "learning_rate": 9.607830074988491e-06, "loss": 0.6375, "step": 2417 }, { "epoch": 0.15, "grad_norm": 1.0799641609191895, "learning_rate": 9.607431664981144e-06, "loss": 0.5561, "step": 2418 }, { "epoch": 0.15, "grad_norm": 1.1281019449234009, "learning_rate": 9.607033060970878e-06, "loss": 0.5997, "step": 2419 }, { "epoch": 0.15, "grad_norm": 1.0619001388549805, "learning_rate": 9.606634262974477e-06, "loss": 0.5907, "step": 2420 }, { "epoch": 0.15, "grad_norm": 0.9821775555610657, "learning_rate": 9.606235271008732e-06, "loss": 0.5351, "step": 2421 }, { "epoch": 0.15, "grad_norm": 1.1251587867736816, "learning_rate": 9.605836085090445e-06, "loss": 0.5694, "step": 2422 }, { "epoch": 0.15, "grad_norm": 1.228594183921814, "learning_rate": 9.605436705236421e-06, "loss": 0.5574, "step": 2423 }, { "epoch": 0.15, "grad_norm": 0.9809057712554932, "learning_rate": 9.60503713146348e-06, "loss": 0.577, "step": 2424 }, { "epoch": 0.15, "grad_norm": 1.1365041732788086, "learning_rate": 9.604637363788444e-06, "loss": 0.574, "step": 2425 }, { "epoch": 0.15, "grad_norm": 0.9982814788818359, "learning_rate": 9.604237402228149e-06, "loss": 0.622, "step": 2426 }, { "epoch": 0.15, "grad_norm": 1.141382098197937, "learning_rate": 9.603837246799431e-06, "loss": 0.5937, "step": 2427 }, { "epoch": 0.15, "grad_norm": 1.042654275894165, "learning_rate": 9.603436897519145e-06, "loss": 0.5735, "step": 2428 }, { "epoch": 0.15, "grad_norm": 1.0142533779144287, "learning_rate": 9.603036354404145e-06, "loss": 0.5442, "step": 2429 }, { "epoch": 0.15, "grad_norm": 0.9460806846618652, "learning_rate": 9.602635617471295e-06, "loss": 0.5689, "step": 2430 }, { "epoch": 0.15, "grad_norm": 0.9694622159004211, "learning_rate": 9.602234686737473e-06, "loss": 0.6085, "step": 2431 }, { "epoch": 0.15, "grad_norm": 0.9917210340499878, "learning_rate": 9.601833562219556e-06, "loss": 0.562, "step": 2432 }, { "epoch": 0.15, "grad_norm": 0.9378957152366638, "learning_rate": 9.601432243934437e-06, "loss": 0.5098, "step": 2433 }, { "epoch": 0.15, "grad_norm": 1.0544147491455078, "learning_rate": 9.601030731899014e-06, "loss": 0.5531, "step": 2434 }, { "epoch": 0.15, "grad_norm": 0.9746567010879517, "learning_rate": 9.600629026130192e-06, "loss": 0.5252, "step": 2435 }, { "epoch": 0.15, "grad_norm": 1.0485756397247314, "learning_rate": 9.600227126644887e-06, "loss": 0.6327, "step": 2436 }, { "epoch": 0.15, "grad_norm": 1.0036715269088745, "learning_rate": 9.59982503346002e-06, "loss": 0.5437, "step": 2437 }, { "epoch": 0.15, "grad_norm": 1.0754265785217285, "learning_rate": 9.599422746592522e-06, "loss": 0.6003, "step": 2438 }, { "epoch": 0.15, "grad_norm": 1.062813639640808, "learning_rate": 9.599020266059334e-06, "loss": 0.6354, "step": 2439 }, { "epoch": 0.15, "grad_norm": 0.9763906002044678, "learning_rate": 9.5986175918774e-06, "loss": 0.5905, "step": 2440 }, { "epoch": 0.15, "grad_norm": 1.0599114894866943, "learning_rate": 9.598214724063678e-06, "loss": 0.6055, "step": 2441 }, { "epoch": 0.15, "grad_norm": 0.984308123588562, "learning_rate": 9.597811662635128e-06, "loss": 0.5885, "step": 2442 }, { "epoch": 0.15, "grad_norm": 1.2546172142028809, "learning_rate": 9.597408407608725e-06, "loss": 0.6079, "step": 2443 }, { "epoch": 0.15, "grad_norm": 1.020815134048462, "learning_rate": 9.597004959001447e-06, "loss": 0.5654, "step": 2444 }, { "epoch": 0.15, "grad_norm": 1.1067922115325928, "learning_rate": 9.596601316830282e-06, "loss": 0.5599, "step": 2445 }, { "epoch": 0.15, "grad_norm": 1.0436384677886963, "learning_rate": 9.596197481112225e-06, "loss": 0.5519, "step": 2446 }, { "epoch": 0.16, "grad_norm": 0.9777306914329529, "learning_rate": 9.59579345186428e-06, "loss": 0.5408, "step": 2447 }, { "epoch": 0.16, "grad_norm": 1.1045746803283691, "learning_rate": 9.595389229103464e-06, "loss": 0.5752, "step": 2448 }, { "epoch": 0.16, "grad_norm": 0.973228931427002, "learning_rate": 9.594984812846792e-06, "loss": 0.5467, "step": 2449 }, { "epoch": 0.16, "grad_norm": 1.0757728815078735, "learning_rate": 9.594580203111294e-06, "loss": 0.5394, "step": 2450 }, { "epoch": 0.16, "grad_norm": 1.0427677631378174, "learning_rate": 9.594175399914008e-06, "loss": 0.5478, "step": 2451 }, { "epoch": 0.16, "grad_norm": 1.0544366836547852, "learning_rate": 9.593770403271977e-06, "loss": 0.5963, "step": 2452 }, { "epoch": 0.16, "grad_norm": 1.0926449298858643, "learning_rate": 9.593365213202255e-06, "loss": 0.5747, "step": 2453 }, { "epoch": 0.16, "grad_norm": 0.9562935829162598, "learning_rate": 9.592959829721903e-06, "loss": 0.5514, "step": 2454 }, { "epoch": 0.16, "grad_norm": 1.050257682800293, "learning_rate": 9.59255425284799e-06, "loss": 0.5929, "step": 2455 }, { "epoch": 0.16, "grad_norm": 1.0073059797286987, "learning_rate": 9.592148482597595e-06, "loss": 0.5564, "step": 2456 }, { "epoch": 0.16, "grad_norm": 1.0695605278015137, "learning_rate": 9.591742518987802e-06, "loss": 0.6154, "step": 2457 }, { "epoch": 0.16, "grad_norm": 0.957906186580658, "learning_rate": 9.591336362035703e-06, "loss": 0.5076, "step": 2458 }, { "epoch": 0.16, "grad_norm": 0.9516271352767944, "learning_rate": 9.590930011758403e-06, "loss": 0.5469, "step": 2459 }, { "epoch": 0.16, "grad_norm": 1.0699506998062134, "learning_rate": 9.590523468173011e-06, "loss": 0.5738, "step": 2460 }, { "epoch": 0.16, "grad_norm": 1.1135766506195068, "learning_rate": 9.590116731296646e-06, "loss": 0.5231, "step": 2461 }, { "epoch": 0.16, "grad_norm": 1.0049141645431519, "learning_rate": 9.589709801146432e-06, "loss": 0.5416, "step": 2462 }, { "epoch": 0.16, "grad_norm": 1.034530758857727, "learning_rate": 9.589302677739506e-06, "loss": 0.5573, "step": 2463 }, { "epoch": 0.16, "grad_norm": 1.0589345693588257, "learning_rate": 9.588895361093009e-06, "loss": 0.5911, "step": 2464 }, { "epoch": 0.16, "grad_norm": 1.033722162246704, "learning_rate": 9.588487851224091e-06, "loss": 0.5711, "step": 2465 }, { "epoch": 0.16, "grad_norm": 1.143248200416565, "learning_rate": 9.588080148149912e-06, "loss": 0.5856, "step": 2466 }, { "epoch": 0.16, "grad_norm": 1.1562327146530151, "learning_rate": 9.587672251887639e-06, "loss": 0.6477, "step": 2467 }, { "epoch": 0.16, "grad_norm": 0.9303533434867859, "learning_rate": 9.587264162454447e-06, "loss": 0.5134, "step": 2468 }, { "epoch": 0.16, "grad_norm": 1.045608639717102, "learning_rate": 9.586855879867519e-06, "loss": 0.5697, "step": 2469 }, { "epoch": 0.16, "grad_norm": 1.0757079124450684, "learning_rate": 9.586447404144046e-06, "loss": 0.6234, "step": 2470 }, { "epoch": 0.16, "grad_norm": 1.096331000328064, "learning_rate": 9.58603873530123e-06, "loss": 0.5649, "step": 2471 }, { "epoch": 0.16, "grad_norm": 1.0169209241867065, "learning_rate": 9.585629873356273e-06, "loss": 0.4537, "step": 2472 }, { "epoch": 0.16, "grad_norm": 1.0567876100540161, "learning_rate": 9.585220818326395e-06, "loss": 0.5838, "step": 2473 }, { "epoch": 0.16, "grad_norm": 1.066560983657837, "learning_rate": 9.58481157022882e-06, "loss": 0.5419, "step": 2474 }, { "epoch": 0.16, "grad_norm": 1.0556069612503052, "learning_rate": 9.584402129080779e-06, "loss": 0.575, "step": 2475 }, { "epoch": 0.16, "grad_norm": 0.9920236468315125, "learning_rate": 9.583992494899513e-06, "loss": 0.55, "step": 2476 }, { "epoch": 0.16, "grad_norm": 1.0362834930419922, "learning_rate": 9.583582667702269e-06, "loss": 0.5787, "step": 2477 }, { "epoch": 0.16, "grad_norm": 1.0171829462051392, "learning_rate": 9.583172647506305e-06, "loss": 0.546, "step": 2478 }, { "epoch": 0.16, "grad_norm": 1.0156619548797607, "learning_rate": 9.582762434328883e-06, "loss": 0.5497, "step": 2479 }, { "epoch": 0.16, "grad_norm": 0.990680456161499, "learning_rate": 9.582352028187278e-06, "loss": 0.6005, "step": 2480 }, { "epoch": 0.16, "grad_norm": 0.9822958111763, "learning_rate": 9.581941429098769e-06, "loss": 0.5028, "step": 2481 }, { "epoch": 0.16, "grad_norm": 1.0820964574813843, "learning_rate": 9.581530637080647e-06, "loss": 0.5871, "step": 2482 }, { "epoch": 0.16, "grad_norm": 1.1711864471435547, "learning_rate": 9.581119652150208e-06, "loss": 0.5747, "step": 2483 }, { "epoch": 0.16, "grad_norm": 1.1426204442977905, "learning_rate": 9.580708474324755e-06, "loss": 0.5489, "step": 2484 }, { "epoch": 0.16, "grad_norm": 1.0703874826431274, "learning_rate": 9.580297103621605e-06, "loss": 0.5749, "step": 2485 }, { "epoch": 0.16, "grad_norm": 1.0250228643417358, "learning_rate": 9.579885540058079e-06, "loss": 0.5777, "step": 2486 }, { "epoch": 0.16, "grad_norm": 1.002596139907837, "learning_rate": 9.579473783651503e-06, "loss": 0.5351, "step": 2487 }, { "epoch": 0.16, "grad_norm": 1.1740162372589111, "learning_rate": 9.579061834419217e-06, "loss": 0.6129, "step": 2488 }, { "epoch": 0.16, "grad_norm": 1.1330641508102417, "learning_rate": 9.578649692378567e-06, "loss": 0.6367, "step": 2489 }, { "epoch": 0.16, "grad_norm": 1.1116493940353394, "learning_rate": 9.578237357546907e-06, "loss": 0.5447, "step": 2490 }, { "epoch": 0.16, "grad_norm": 1.0301001071929932, "learning_rate": 9.577824829941597e-06, "loss": 0.5909, "step": 2491 }, { "epoch": 0.16, "grad_norm": 1.0472018718719482, "learning_rate": 9.577412109580009e-06, "loss": 0.5484, "step": 2492 }, { "epoch": 0.16, "grad_norm": 1.0376631021499634, "learning_rate": 9.57699919647952e-06, "loss": 0.6091, "step": 2493 }, { "epoch": 0.16, "grad_norm": 1.1155240535736084, "learning_rate": 9.576586090657519e-06, "loss": 0.5844, "step": 2494 }, { "epoch": 0.16, "grad_norm": 1.144724726676941, "learning_rate": 9.576172792131397e-06, "loss": 0.5911, "step": 2495 }, { "epoch": 0.16, "grad_norm": 1.0386004447937012, "learning_rate": 9.57575930091856e-06, "loss": 0.5678, "step": 2496 }, { "epoch": 0.16, "grad_norm": 1.0411202907562256, "learning_rate": 9.575345617036413e-06, "loss": 0.5939, "step": 2497 }, { "epoch": 0.16, "grad_norm": 1.019171118736267, "learning_rate": 9.574931740502383e-06, "loss": 0.567, "step": 2498 }, { "epoch": 0.16, "grad_norm": 1.0830127000808716, "learning_rate": 9.57451767133389e-06, "loss": 0.5962, "step": 2499 }, { "epoch": 0.16, "grad_norm": 1.036673665046692, "learning_rate": 9.57410340954837e-06, "loss": 0.5726, "step": 2500 }, { "epoch": 0.16, "grad_norm": 0.9850932955741882, "learning_rate": 9.57368895516327e-06, "loss": 0.5759, "step": 2501 }, { "epoch": 0.16, "grad_norm": 1.1040841341018677, "learning_rate": 9.573274308196037e-06, "loss": 0.554, "step": 2502 }, { "epoch": 0.16, "grad_norm": 0.9730511903762817, "learning_rate": 9.572859468664133e-06, "loss": 0.5287, "step": 2503 }, { "epoch": 0.16, "grad_norm": 1.0098161697387695, "learning_rate": 9.572444436585025e-06, "loss": 0.6103, "step": 2504 }, { "epoch": 0.16, "grad_norm": 1.0337492227554321, "learning_rate": 9.572029211976189e-06, "loss": 0.5723, "step": 2505 }, { "epoch": 0.16, "grad_norm": 0.9878047704696655, "learning_rate": 9.571613794855105e-06, "loss": 0.5733, "step": 2506 }, { "epoch": 0.16, "grad_norm": 1.0235744714736938, "learning_rate": 9.57119818523927e-06, "loss": 0.547, "step": 2507 }, { "epoch": 0.16, "grad_norm": 1.025876522064209, "learning_rate": 9.570782383146183e-06, "loss": 0.5847, "step": 2508 }, { "epoch": 0.16, "grad_norm": 1.1984429359436035, "learning_rate": 9.570366388593347e-06, "loss": 0.6102, "step": 2509 }, { "epoch": 0.16, "grad_norm": 1.136934518814087, "learning_rate": 9.569950201598283e-06, "loss": 0.5599, "step": 2510 }, { "epoch": 0.16, "grad_norm": 1.0273244380950928, "learning_rate": 9.569533822178513e-06, "loss": 0.5903, "step": 2511 }, { "epoch": 0.16, "grad_norm": 0.9874576330184937, "learning_rate": 9.569117250351571e-06, "loss": 0.5544, "step": 2512 }, { "epoch": 0.16, "grad_norm": 1.027095079421997, "learning_rate": 9.568700486134996e-06, "loss": 0.5663, "step": 2513 }, { "epoch": 0.16, "grad_norm": 1.0826996564865112, "learning_rate": 9.568283529546336e-06, "loss": 0.5274, "step": 2514 }, { "epoch": 0.16, "grad_norm": 1.0489357709884644, "learning_rate": 9.56786638060315e-06, "loss": 0.5844, "step": 2515 }, { "epoch": 0.16, "grad_norm": 1.0651934146881104, "learning_rate": 9.567449039323e-06, "loss": 0.6068, "step": 2516 }, { "epoch": 0.16, "grad_norm": 0.9960207939147949, "learning_rate": 9.56703150572346e-06, "loss": 0.5678, "step": 2517 }, { "epoch": 0.16, "grad_norm": 1.075217843055725, "learning_rate": 9.56661377982211e-06, "loss": 0.5541, "step": 2518 }, { "epoch": 0.16, "grad_norm": 1.0845835208892822, "learning_rate": 9.566195861636542e-06, "loss": 0.6073, "step": 2519 }, { "epoch": 0.16, "grad_norm": 1.0700103044509888, "learning_rate": 9.56577775118435e-06, "loss": 0.5778, "step": 2520 }, { "epoch": 0.16, "grad_norm": 1.1105780601501465, "learning_rate": 9.56535944848314e-06, "loss": 0.6404, "step": 2521 }, { "epoch": 0.16, "grad_norm": 0.961463987827301, "learning_rate": 9.564940953550525e-06, "loss": 0.5808, "step": 2522 }, { "epoch": 0.16, "grad_norm": 0.9934834837913513, "learning_rate": 9.564522266404127e-06, "loss": 0.5474, "step": 2523 }, { "epoch": 0.16, "grad_norm": 1.0321897268295288, "learning_rate": 9.564103387061575e-06, "loss": 0.5176, "step": 2524 }, { "epoch": 0.16, "grad_norm": 1.2021677494049072, "learning_rate": 9.563684315540507e-06, "loss": 0.543, "step": 2525 }, { "epoch": 0.16, "grad_norm": 1.1043858528137207, "learning_rate": 9.563265051858569e-06, "loss": 0.5861, "step": 2526 }, { "epoch": 0.16, "grad_norm": 1.026528239250183, "learning_rate": 9.562845596033413e-06, "loss": 0.612, "step": 2527 }, { "epoch": 0.16, "grad_norm": 1.0612083673477173, "learning_rate": 9.562425948082702e-06, "loss": 0.5459, "step": 2528 }, { "epoch": 0.16, "grad_norm": 0.9829614758491516, "learning_rate": 9.562006108024106e-06, "loss": 0.5407, "step": 2529 }, { "epoch": 0.16, "grad_norm": 1.1292877197265625, "learning_rate": 9.561586075875304e-06, "loss": 0.5878, "step": 2530 }, { "epoch": 0.16, "grad_norm": 1.096125602722168, "learning_rate": 9.56116585165398e-06, "loss": 0.5269, "step": 2531 }, { "epoch": 0.16, "grad_norm": 1.1311432123184204, "learning_rate": 9.560745435377828e-06, "loss": 0.5386, "step": 2532 }, { "epoch": 0.16, "grad_norm": 1.0907361507415771, "learning_rate": 9.560324827064553e-06, "loss": 0.5681, "step": 2533 }, { "epoch": 0.16, "grad_norm": 1.0219597816467285, "learning_rate": 9.559904026731862e-06, "loss": 0.5279, "step": 2534 }, { "epoch": 0.16, "grad_norm": 0.9601796865463257, "learning_rate": 9.559483034397477e-06, "loss": 0.5515, "step": 2535 }, { "epoch": 0.16, "grad_norm": 0.9654351472854614, "learning_rate": 9.559061850079121e-06, "loss": 0.5096, "step": 2536 }, { "epoch": 0.16, "grad_norm": 1.0824660062789917, "learning_rate": 9.558640473794533e-06, "loss": 0.5346, "step": 2537 }, { "epoch": 0.16, "grad_norm": 1.0912461280822754, "learning_rate": 9.558218905561452e-06, "loss": 0.5576, "step": 2538 }, { "epoch": 0.16, "grad_norm": 1.0075145959854126, "learning_rate": 9.557797145397629e-06, "loss": 0.5801, "step": 2539 }, { "epoch": 0.16, "grad_norm": 1.053517460823059, "learning_rate": 9.557375193320824e-06, "loss": 0.5874, "step": 2540 }, { "epoch": 0.16, "grad_norm": 1.0725799798965454, "learning_rate": 9.556953049348803e-06, "loss": 0.5453, "step": 2541 }, { "epoch": 0.16, "grad_norm": 1.0406394004821777, "learning_rate": 9.556530713499341e-06, "loss": 0.5786, "step": 2542 }, { "epoch": 0.16, "grad_norm": 1.1403875350952148, "learning_rate": 9.556108185790223e-06, "loss": 0.639, "step": 2543 }, { "epoch": 0.16, "grad_norm": 1.0901226997375488, "learning_rate": 9.55568546623924e-06, "loss": 0.613, "step": 2544 }, { "epoch": 0.16, "grad_norm": 1.057421326637268, "learning_rate": 9.555262554864188e-06, "loss": 0.5585, "step": 2545 }, { "epoch": 0.16, "grad_norm": 1.002384901046753, "learning_rate": 9.554839451682876e-06, "loss": 0.5107, "step": 2546 }, { "epoch": 0.16, "grad_norm": 0.971389889717102, "learning_rate": 9.554416156713121e-06, "loss": 0.5524, "step": 2547 }, { "epoch": 0.16, "grad_norm": 1.013134241104126, "learning_rate": 9.553992669972744e-06, "loss": 0.5461, "step": 2548 }, { "epoch": 0.16, "grad_norm": 0.9925364255905151, "learning_rate": 9.55356899147958e-06, "loss": 0.568, "step": 2549 }, { "epoch": 0.16, "grad_norm": 1.0054972171783447, "learning_rate": 9.553145121251465e-06, "loss": 0.5726, "step": 2550 }, { "epoch": 0.16, "grad_norm": 1.061987042427063, "learning_rate": 9.552721059306248e-06, "loss": 0.5742, "step": 2551 }, { "epoch": 0.16, "grad_norm": 1.0200176239013672, "learning_rate": 9.552296805661787e-06, "loss": 0.5705, "step": 2552 }, { "epoch": 0.16, "grad_norm": 1.1196894645690918, "learning_rate": 9.551872360335941e-06, "loss": 0.5836, "step": 2553 }, { "epoch": 0.16, "grad_norm": 1.0350748300552368, "learning_rate": 9.551447723346587e-06, "loss": 0.5628, "step": 2554 }, { "epoch": 0.16, "grad_norm": 1.0239752531051636, "learning_rate": 9.5510228947116e-06, "loss": 0.572, "step": 2555 }, { "epoch": 0.16, "grad_norm": 1.016221284866333, "learning_rate": 9.550597874448874e-06, "loss": 0.5742, "step": 2556 }, { "epoch": 0.16, "grad_norm": 1.0235182046890259, "learning_rate": 9.5501726625763e-06, "loss": 0.5836, "step": 2557 }, { "epoch": 0.16, "grad_norm": 1.0047327280044556, "learning_rate": 9.549747259111786e-06, "loss": 0.5531, "step": 2558 }, { "epoch": 0.16, "grad_norm": 1.0238168239593506, "learning_rate": 9.54932166407324e-06, "loss": 0.5564, "step": 2559 }, { "epoch": 0.16, "grad_norm": 1.0174661874771118, "learning_rate": 9.548895877478585e-06, "loss": 0.5585, "step": 2560 }, { "epoch": 0.16, "grad_norm": 1.0724985599517822, "learning_rate": 9.54846989934575e-06, "loss": 0.6014, "step": 2561 }, { "epoch": 0.16, "grad_norm": 1.3581907749176025, "learning_rate": 9.54804372969267e-06, "loss": 0.573, "step": 2562 }, { "epoch": 0.16, "grad_norm": 1.0653820037841797, "learning_rate": 9.54761736853729e-06, "loss": 0.5453, "step": 2563 }, { "epoch": 0.16, "grad_norm": 0.9831539392471313, "learning_rate": 9.547190815897563e-06, "loss": 0.5298, "step": 2564 }, { "epoch": 0.16, "grad_norm": 1.1154634952545166, "learning_rate": 9.54676407179145e-06, "loss": 0.6071, "step": 2565 }, { "epoch": 0.16, "grad_norm": 1.0205329656600952, "learning_rate": 9.546337136236916e-06, "loss": 0.5596, "step": 2566 }, { "epoch": 0.16, "grad_norm": 1.0515823364257812, "learning_rate": 9.545910009251945e-06, "loss": 0.5866, "step": 2567 }, { "epoch": 0.16, "grad_norm": 1.0403087139129639, "learning_rate": 9.545482690854513e-06, "loss": 0.5738, "step": 2568 }, { "epoch": 0.16, "grad_norm": 1.0300084352493286, "learning_rate": 9.545055181062621e-06, "loss": 0.5697, "step": 2569 }, { "epoch": 0.16, "grad_norm": 1.0895107984542847, "learning_rate": 9.544627479894264e-06, "loss": 0.5992, "step": 2570 }, { "epoch": 0.16, "grad_norm": 0.9808819890022278, "learning_rate": 9.544199587367455e-06, "loss": 0.5548, "step": 2571 }, { "epoch": 0.16, "grad_norm": 0.9981071352958679, "learning_rate": 9.54377150350021e-06, "loss": 0.586, "step": 2572 }, { "epoch": 0.16, "grad_norm": 1.0077358484268188, "learning_rate": 9.543343228310551e-06, "loss": 0.5069, "step": 2573 }, { "epoch": 0.16, "grad_norm": 1.0661660432815552, "learning_rate": 9.542914761816518e-06, "loss": 0.5892, "step": 2574 }, { "epoch": 0.16, "grad_norm": 1.0031508207321167, "learning_rate": 9.542486104036143e-06, "loss": 0.5557, "step": 2575 }, { "epoch": 0.16, "grad_norm": 1.002708077430725, "learning_rate": 9.542057254987485e-06, "loss": 0.5689, "step": 2576 }, { "epoch": 0.16, "grad_norm": 1.0708394050598145, "learning_rate": 9.541628214688595e-06, "loss": 0.5536, "step": 2577 }, { "epoch": 0.16, "grad_norm": 0.9945730566978455, "learning_rate": 9.541198983157538e-06, "loss": 0.5941, "step": 2578 }, { "epoch": 0.16, "grad_norm": 1.0138047933578491, "learning_rate": 9.54076956041239e-06, "loss": 0.5696, "step": 2579 }, { "epoch": 0.16, "grad_norm": 0.9891455769538879, "learning_rate": 9.540339946471235e-06, "loss": 0.5547, "step": 2580 }, { "epoch": 0.16, "grad_norm": 0.969589114189148, "learning_rate": 9.539910141352156e-06, "loss": 0.5695, "step": 2581 }, { "epoch": 0.16, "grad_norm": 0.9683725833892822, "learning_rate": 9.539480145073257e-06, "loss": 0.534, "step": 2582 }, { "epoch": 0.16, "grad_norm": 1.0561916828155518, "learning_rate": 9.53904995765264e-06, "loss": 0.5461, "step": 2583 }, { "epoch": 0.16, "grad_norm": 1.119350790977478, "learning_rate": 9.538619579108417e-06, "loss": 0.6128, "step": 2584 }, { "epoch": 0.16, "grad_norm": 1.0886105298995972, "learning_rate": 9.538189009458715e-06, "loss": 0.6064, "step": 2585 }, { "epoch": 0.16, "grad_norm": 1.058983564376831, "learning_rate": 9.53775824872166e-06, "loss": 0.5322, "step": 2586 }, { "epoch": 0.16, "grad_norm": 1.0209020376205444, "learning_rate": 9.53732729691539e-06, "loss": 0.5659, "step": 2587 }, { "epoch": 0.16, "grad_norm": 1.0355514287948608, "learning_rate": 9.536896154058053e-06, "loss": 0.559, "step": 2588 }, { "epoch": 0.16, "grad_norm": 1.0672528743743896, "learning_rate": 9.536464820167804e-06, "loss": 0.5699, "step": 2589 }, { "epoch": 0.16, "grad_norm": 0.9848209619522095, "learning_rate": 9.536033295262799e-06, "loss": 0.5106, "step": 2590 }, { "epoch": 0.16, "grad_norm": 1.0069193840026855, "learning_rate": 9.535601579361214e-06, "loss": 0.5202, "step": 2591 }, { "epoch": 0.16, "grad_norm": 1.0515129566192627, "learning_rate": 9.535169672481222e-06, "loss": 0.5657, "step": 2592 }, { "epoch": 0.16, "grad_norm": 1.049230933189392, "learning_rate": 9.534737574641014e-06, "loss": 0.5897, "step": 2593 }, { "epoch": 0.16, "grad_norm": 1.0274021625518799, "learning_rate": 9.53430528585878e-06, "loss": 0.5719, "step": 2594 }, { "epoch": 0.16, "grad_norm": 0.9516419172286987, "learning_rate": 9.533872806152727e-06, "loss": 0.5474, "step": 2595 }, { "epoch": 0.16, "grad_norm": 1.0155837535858154, "learning_rate": 9.533440135541059e-06, "loss": 0.5569, "step": 2596 }, { "epoch": 0.16, "grad_norm": 1.0431528091430664, "learning_rate": 9.533007274042e-06, "loss": 0.6354, "step": 2597 }, { "epoch": 0.16, "grad_norm": 1.1031321287155151, "learning_rate": 9.532574221673772e-06, "loss": 0.563, "step": 2598 }, { "epoch": 0.16, "grad_norm": 1.1353846788406372, "learning_rate": 9.532140978454614e-06, "loss": 0.5598, "step": 2599 }, { "epoch": 0.16, "grad_norm": 1.140213131904602, "learning_rate": 9.531707544402762e-06, "loss": 0.5997, "step": 2600 }, { "epoch": 0.16, "grad_norm": 1.0954878330230713, "learning_rate": 9.531273919536473e-06, "loss": 0.5664, "step": 2601 }, { "epoch": 0.16, "grad_norm": 0.9809207916259766, "learning_rate": 9.530840103874001e-06, "loss": 0.5823, "step": 2602 }, { "epoch": 0.16, "grad_norm": 1.0627355575561523, "learning_rate": 9.530406097433615e-06, "loss": 0.514, "step": 2603 }, { "epoch": 0.16, "grad_norm": 1.0106245279312134, "learning_rate": 9.529971900233587e-06, "loss": 0.5331, "step": 2604 }, { "epoch": 0.17, "grad_norm": 1.0271943807601929, "learning_rate": 9.529537512292201e-06, "loss": 0.5586, "step": 2605 }, { "epoch": 0.17, "grad_norm": 1.0283169746398926, "learning_rate": 9.529102933627747e-06, "loss": 0.58, "step": 2606 }, { "epoch": 0.17, "grad_norm": 1.0217669010162354, "learning_rate": 9.528668164258525e-06, "loss": 0.572, "step": 2607 }, { "epoch": 0.17, "grad_norm": 0.9806956052780151, "learning_rate": 9.528233204202842e-06, "loss": 0.5579, "step": 2608 }, { "epoch": 0.17, "grad_norm": 0.9664952158927917, "learning_rate": 9.527798053479009e-06, "loss": 0.5279, "step": 2609 }, { "epoch": 0.17, "grad_norm": 1.0128159523010254, "learning_rate": 9.527362712105353e-06, "loss": 0.5462, "step": 2610 }, { "epoch": 0.17, "grad_norm": 1.0592056512832642, "learning_rate": 9.5269271801002e-06, "loss": 0.5338, "step": 2611 }, { "epoch": 0.17, "grad_norm": 1.0758192539215088, "learning_rate": 9.526491457481895e-06, "loss": 0.5544, "step": 2612 }, { "epoch": 0.17, "grad_norm": 0.9766221046447754, "learning_rate": 9.526055544268778e-06, "loss": 0.547, "step": 2613 }, { "epoch": 0.17, "grad_norm": 0.972629964351654, "learning_rate": 9.525619440479209e-06, "loss": 0.5373, "step": 2614 }, { "epoch": 0.17, "grad_norm": 1.006587028503418, "learning_rate": 9.525183146131549e-06, "loss": 0.6073, "step": 2615 }, { "epoch": 0.17, "grad_norm": 1.0332380533218384, "learning_rate": 9.524746661244166e-06, "loss": 0.5731, "step": 2616 }, { "epoch": 0.17, "grad_norm": 1.0720937252044678, "learning_rate": 9.524309985835444e-06, "loss": 0.5515, "step": 2617 }, { "epoch": 0.17, "grad_norm": 1.0120248794555664, "learning_rate": 9.523873119923768e-06, "loss": 0.5364, "step": 2618 }, { "epoch": 0.17, "grad_norm": 1.0338369607925415, "learning_rate": 9.523436063527531e-06, "loss": 0.532, "step": 2619 }, { "epoch": 0.17, "grad_norm": 1.073209524154663, "learning_rate": 9.522998816665137e-06, "loss": 0.5833, "step": 2620 }, { "epoch": 0.17, "grad_norm": 0.9193693995475769, "learning_rate": 9.522561379354997e-06, "loss": 0.5559, "step": 2621 }, { "epoch": 0.17, "grad_norm": 1.032959222793579, "learning_rate": 9.522123751615532e-06, "loss": 0.5919, "step": 2622 }, { "epoch": 0.17, "grad_norm": 0.9943687319755554, "learning_rate": 9.521685933465166e-06, "loss": 0.5811, "step": 2623 }, { "epoch": 0.17, "grad_norm": 1.0560085773468018, "learning_rate": 9.521247924922334e-06, "loss": 0.562, "step": 2624 }, { "epoch": 0.17, "grad_norm": 1.0780750513076782, "learning_rate": 9.520809726005481e-06, "loss": 0.5474, "step": 2625 }, { "epoch": 0.17, "grad_norm": 1.0348384380340576, "learning_rate": 9.52037133673306e-06, "loss": 0.5089, "step": 2626 }, { "epoch": 0.17, "grad_norm": 0.9498501420021057, "learning_rate": 9.519932757123523e-06, "loss": 0.5973, "step": 2627 }, { "epoch": 0.17, "grad_norm": 1.070290446281433, "learning_rate": 9.519493987195343e-06, "loss": 0.5405, "step": 2628 }, { "epoch": 0.17, "grad_norm": 0.9983610510826111, "learning_rate": 9.519055026966995e-06, "loss": 0.5381, "step": 2629 }, { "epoch": 0.17, "grad_norm": 1.0487020015716553, "learning_rate": 9.518615876456958e-06, "loss": 0.5699, "step": 2630 }, { "epoch": 0.17, "grad_norm": 0.965333878993988, "learning_rate": 9.518176535683727e-06, "loss": 0.5298, "step": 2631 }, { "epoch": 0.17, "grad_norm": 0.9889441728591919, "learning_rate": 9.5177370046658e-06, "loss": 0.5948, "step": 2632 }, { "epoch": 0.17, "grad_norm": 1.0800920724868774, "learning_rate": 9.517297283421681e-06, "loss": 0.5983, "step": 2633 }, { "epoch": 0.17, "grad_norm": 1.0012084245681763, "learning_rate": 9.51685737196989e-06, "loss": 0.56, "step": 2634 }, { "epoch": 0.17, "grad_norm": 0.9588029980659485, "learning_rate": 9.516417270328948e-06, "loss": 0.5488, "step": 2635 }, { "epoch": 0.17, "grad_norm": 1.028010606765747, "learning_rate": 9.515976978517387e-06, "loss": 0.5601, "step": 2636 }, { "epoch": 0.17, "grad_norm": 0.9502065181732178, "learning_rate": 9.515536496553744e-06, "loss": 0.5276, "step": 2637 }, { "epoch": 0.17, "grad_norm": 1.0674821138381958, "learning_rate": 9.515095824456568e-06, "loss": 0.5827, "step": 2638 }, { "epoch": 0.17, "grad_norm": 0.9957113862037659, "learning_rate": 9.514654962244414e-06, "loss": 0.5788, "step": 2639 }, { "epoch": 0.17, "grad_norm": 0.9970918893814087, "learning_rate": 9.514213909935843e-06, "loss": 0.5072, "step": 2640 }, { "epoch": 0.17, "grad_norm": 1.0630422830581665, "learning_rate": 9.51377266754943e-06, "loss": 0.5901, "step": 2641 }, { "epoch": 0.17, "grad_norm": 1.049082636833191, "learning_rate": 9.513331235103751e-06, "loss": 0.556, "step": 2642 }, { "epoch": 0.17, "grad_norm": 1.1107585430145264, "learning_rate": 9.512889612617397e-06, "loss": 0.5573, "step": 2643 }, { "epoch": 0.17, "grad_norm": 0.990415096282959, "learning_rate": 9.512447800108958e-06, "loss": 0.5591, "step": 2644 }, { "epoch": 0.17, "grad_norm": 1.0263445377349854, "learning_rate": 9.512005797597042e-06, "loss": 0.5828, "step": 2645 }, { "epoch": 0.17, "grad_norm": 1.073521375656128, "learning_rate": 9.511563605100255e-06, "loss": 0.5681, "step": 2646 }, { "epoch": 0.17, "grad_norm": 1.095017671585083, "learning_rate": 9.511121222637222e-06, "loss": 0.5948, "step": 2647 }, { "epoch": 0.17, "grad_norm": 0.9400439858436584, "learning_rate": 9.510678650226567e-06, "loss": 0.5077, "step": 2648 }, { "epoch": 0.17, "grad_norm": 1.0273232460021973, "learning_rate": 9.510235887886923e-06, "loss": 0.5506, "step": 2649 }, { "epoch": 0.17, "grad_norm": 1.04499089717865, "learning_rate": 9.509792935636939e-06, "loss": 0.5314, "step": 2650 }, { "epoch": 0.17, "grad_norm": 0.9724092483520508, "learning_rate": 9.50934979349526e-06, "loss": 0.5165, "step": 2651 }, { "epoch": 0.17, "grad_norm": 1.1168352365493774, "learning_rate": 9.508906461480549e-06, "loss": 0.6275, "step": 2652 }, { "epoch": 0.17, "grad_norm": 1.1057885885238647, "learning_rate": 9.508462939611473e-06, "loss": 0.5357, "step": 2653 }, { "epoch": 0.17, "grad_norm": 1.1065791845321655, "learning_rate": 9.508019227906706e-06, "loss": 0.5996, "step": 2654 }, { "epoch": 0.17, "grad_norm": 1.092964768409729, "learning_rate": 9.507575326384932e-06, "loss": 0.5046, "step": 2655 }, { "epoch": 0.17, "grad_norm": 1.1059108972549438, "learning_rate": 9.507131235064842e-06, "loss": 0.5753, "step": 2656 }, { "epoch": 0.17, "grad_norm": 1.0257097482681274, "learning_rate": 9.506686953965134e-06, "loss": 0.5683, "step": 2657 }, { "epoch": 0.17, "grad_norm": 0.9293850064277649, "learning_rate": 9.506242483104517e-06, "loss": 0.4773, "step": 2658 }, { "epoch": 0.17, "grad_norm": 1.008527159690857, "learning_rate": 9.505797822501704e-06, "loss": 0.5724, "step": 2659 }, { "epoch": 0.17, "grad_norm": 1.0008771419525146, "learning_rate": 9.505352972175419e-06, "loss": 0.5916, "step": 2660 }, { "epoch": 0.17, "grad_norm": 1.0681480169296265, "learning_rate": 9.504907932144394e-06, "loss": 0.5959, "step": 2661 }, { "epoch": 0.17, "grad_norm": 1.0716049671173096, "learning_rate": 9.504462702427369e-06, "loss": 0.5705, "step": 2662 }, { "epoch": 0.17, "grad_norm": 1.114532470703125, "learning_rate": 9.504017283043087e-06, "loss": 0.5862, "step": 2663 }, { "epoch": 0.17, "grad_norm": 1.0168551206588745, "learning_rate": 9.503571674010305e-06, "loss": 0.5815, "step": 2664 }, { "epoch": 0.17, "grad_norm": 0.9963028430938721, "learning_rate": 9.503125875347789e-06, "loss": 0.5591, "step": 2665 }, { "epoch": 0.17, "grad_norm": 1.1091727018356323, "learning_rate": 9.502679887074306e-06, "loss": 0.5815, "step": 2666 }, { "epoch": 0.17, "grad_norm": 1.0106605291366577, "learning_rate": 9.502233709208637e-06, "loss": 0.6212, "step": 2667 }, { "epoch": 0.17, "grad_norm": 1.087552547454834, "learning_rate": 9.50178734176957e-06, "loss": 0.5382, "step": 2668 }, { "epoch": 0.17, "grad_norm": 1.003647804260254, "learning_rate": 9.501340784775896e-06, "loss": 0.5655, "step": 2669 }, { "epoch": 0.17, "grad_norm": 1.0045603513717651, "learning_rate": 9.500894038246424e-06, "loss": 0.5292, "step": 2670 }, { "epoch": 0.17, "grad_norm": 1.0269829034805298, "learning_rate": 9.50044710219996e-06, "loss": 0.5717, "step": 2671 }, { "epoch": 0.17, "grad_norm": 1.0510656833648682, "learning_rate": 9.499999976655324e-06, "loss": 0.5478, "step": 2672 }, { "epoch": 0.17, "grad_norm": 0.9509247541427612, "learning_rate": 9.499552661631342e-06, "loss": 0.547, "step": 2673 }, { "epoch": 0.17, "grad_norm": 1.1670129299163818, "learning_rate": 9.49910515714685e-06, "loss": 0.535, "step": 2674 }, { "epoch": 0.17, "grad_norm": 0.9437034726142883, "learning_rate": 9.498657463220694e-06, "loss": 0.5333, "step": 2675 }, { "epoch": 0.17, "grad_norm": 1.115921139717102, "learning_rate": 9.49820957987172e-06, "loss": 0.5674, "step": 2676 }, { "epoch": 0.17, "grad_norm": 1.0253336429595947, "learning_rate": 9.49776150711879e-06, "loss": 0.5207, "step": 2677 }, { "epoch": 0.17, "grad_norm": 0.922319769859314, "learning_rate": 9.497313244980768e-06, "loss": 0.5107, "step": 2678 }, { "epoch": 0.17, "grad_norm": 1.033787727355957, "learning_rate": 9.496864793476532e-06, "loss": 0.5775, "step": 2679 }, { "epoch": 0.17, "grad_norm": 1.230092167854309, "learning_rate": 9.49641615262496e-06, "loss": 0.5708, "step": 2680 }, { "epoch": 0.17, "grad_norm": 0.9727022051811218, "learning_rate": 9.49596732244495e-06, "loss": 0.5258, "step": 2681 }, { "epoch": 0.17, "grad_norm": 1.0040794610977173, "learning_rate": 9.495518302955393e-06, "loss": 0.5962, "step": 2682 }, { "epoch": 0.17, "grad_norm": 1.0190296173095703, "learning_rate": 9.4950690941752e-06, "loss": 0.5857, "step": 2683 }, { "epoch": 0.17, "grad_norm": 1.1475651264190674, "learning_rate": 9.494619696123286e-06, "loss": 0.5803, "step": 2684 }, { "epoch": 0.17, "grad_norm": 1.0474387407302856, "learning_rate": 9.49417010881857e-06, "loss": 0.5363, "step": 2685 }, { "epoch": 0.17, "grad_norm": 0.9686734080314636, "learning_rate": 9.493720332279987e-06, "loss": 0.5116, "step": 2686 }, { "epoch": 0.17, "grad_norm": 1.0977166891098022, "learning_rate": 9.493270366526471e-06, "loss": 0.5734, "step": 2687 }, { "epoch": 0.17, "grad_norm": 1.140429139137268, "learning_rate": 9.492820211576971e-06, "loss": 0.552, "step": 2688 }, { "epoch": 0.17, "grad_norm": 1.088618278503418, "learning_rate": 9.492369867450444e-06, "loss": 0.5414, "step": 2689 }, { "epoch": 0.17, "grad_norm": 1.1019575595855713, "learning_rate": 9.491919334165846e-06, "loss": 0.5658, "step": 2690 }, { "epoch": 0.17, "grad_norm": 1.0340977907180786, "learning_rate": 9.491468611742154e-06, "loss": 0.6331, "step": 2691 }, { "epoch": 0.17, "grad_norm": 1.086870789527893, "learning_rate": 9.491017700198343e-06, "loss": 0.6129, "step": 2692 }, { "epoch": 0.17, "grad_norm": 1.0830391645431519, "learning_rate": 9.490566599553399e-06, "loss": 0.5659, "step": 2693 }, { "epoch": 0.17, "grad_norm": 1.0409371852874756, "learning_rate": 9.490115309826317e-06, "loss": 0.5554, "step": 2694 }, { "epoch": 0.17, "grad_norm": 1.0426156520843506, "learning_rate": 9.4896638310361e-06, "loss": 0.5665, "step": 2695 }, { "epoch": 0.17, "grad_norm": 1.0904855728149414, "learning_rate": 9.489212163201758e-06, "loss": 0.5656, "step": 2696 }, { "epoch": 0.17, "grad_norm": 1.0347099304199219, "learning_rate": 9.488760306342307e-06, "loss": 0.5764, "step": 2697 }, { "epoch": 0.17, "grad_norm": 1.05708909034729, "learning_rate": 9.488308260476776e-06, "loss": 0.5354, "step": 2698 }, { "epoch": 0.17, "grad_norm": 1.0091209411621094, "learning_rate": 9.487856025624196e-06, "loss": 0.5512, "step": 2699 }, { "epoch": 0.17, "grad_norm": 1.0588990449905396, "learning_rate": 9.487403601803614e-06, "loss": 0.5239, "step": 2700 }, { "epoch": 0.17, "grad_norm": 1.0110063552856445, "learning_rate": 9.486950989034074e-06, "loss": 0.5683, "step": 2701 }, { "epoch": 0.17, "grad_norm": 0.9574355483055115, "learning_rate": 9.486498187334636e-06, "loss": 0.5331, "step": 2702 }, { "epoch": 0.17, "grad_norm": 1.005077838897705, "learning_rate": 9.48604519672437e-06, "loss": 0.5988, "step": 2703 }, { "epoch": 0.17, "grad_norm": 1.0744143724441528, "learning_rate": 9.485592017222344e-06, "loss": 0.6271, "step": 2704 }, { "epoch": 0.17, "grad_norm": 1.054773211479187, "learning_rate": 9.485138648847643e-06, "loss": 0.5351, "step": 2705 }, { "epoch": 0.17, "grad_norm": 0.9953201413154602, "learning_rate": 9.484685091619358e-06, "loss": 0.5487, "step": 2706 }, { "epoch": 0.17, "grad_norm": 1.0761560201644897, "learning_rate": 9.484231345556582e-06, "loss": 0.57, "step": 2707 }, { "epoch": 0.17, "grad_norm": 1.0151156187057495, "learning_rate": 9.483777410678427e-06, "loss": 0.5624, "step": 2708 }, { "epoch": 0.17, "grad_norm": 0.977360188961029, "learning_rate": 9.483323287004001e-06, "loss": 0.5196, "step": 2709 }, { "epoch": 0.17, "grad_norm": 1.0837485790252686, "learning_rate": 9.482868974552427e-06, "loss": 0.5727, "step": 2710 }, { "epoch": 0.17, "grad_norm": 1.1661759614944458, "learning_rate": 9.482414473342835e-06, "loss": 0.6477, "step": 2711 }, { "epoch": 0.17, "grad_norm": 1.0656156539916992, "learning_rate": 9.481959783394365e-06, "loss": 0.6108, "step": 2712 }, { "epoch": 0.17, "grad_norm": 1.1635092496871948, "learning_rate": 9.48150490472616e-06, "loss": 0.5768, "step": 2713 }, { "epoch": 0.17, "grad_norm": 1.0615007877349854, "learning_rate": 9.481049837357371e-06, "loss": 0.5624, "step": 2714 }, { "epoch": 0.17, "grad_norm": 0.9256333112716675, "learning_rate": 9.480594581307164e-06, "loss": 0.5176, "step": 2715 }, { "epoch": 0.17, "grad_norm": 0.9532365202903748, "learning_rate": 9.480139136594706e-06, "loss": 0.5315, "step": 2716 }, { "epoch": 0.17, "grad_norm": 1.0971673727035522, "learning_rate": 9.479683503239172e-06, "loss": 0.5831, "step": 2717 }, { "epoch": 0.17, "grad_norm": 1.0594340562820435, "learning_rate": 9.479227681259751e-06, "loss": 0.5991, "step": 2718 }, { "epoch": 0.17, "grad_norm": 0.9971533417701721, "learning_rate": 9.478771670675635e-06, "loss": 0.5632, "step": 2719 }, { "epoch": 0.17, "grad_norm": 0.9794123768806458, "learning_rate": 9.478315471506023e-06, "loss": 0.5368, "step": 2720 }, { "epoch": 0.17, "grad_norm": 0.9903360605239868, "learning_rate": 9.477859083770126e-06, "loss": 0.5879, "step": 2721 }, { "epoch": 0.17, "grad_norm": 1.0230356454849243, "learning_rate": 9.477402507487162e-06, "loss": 0.5509, "step": 2722 }, { "epoch": 0.17, "grad_norm": 1.0338963270187378, "learning_rate": 9.476945742676352e-06, "loss": 0.5207, "step": 2723 }, { "epoch": 0.17, "grad_norm": 1.096906304359436, "learning_rate": 9.476488789356933e-06, "loss": 0.5387, "step": 2724 }, { "epoch": 0.17, "grad_norm": 1.0951502323150635, "learning_rate": 9.47603164754814e-06, "loss": 0.6059, "step": 2725 }, { "epoch": 0.17, "grad_norm": 0.9961447715759277, "learning_rate": 9.47557431726923e-06, "loss": 0.5728, "step": 2726 }, { "epoch": 0.17, "grad_norm": 1.1282474994659424, "learning_rate": 9.475116798539451e-06, "loss": 0.6234, "step": 2727 }, { "epoch": 0.17, "grad_norm": 1.0593305826187134, "learning_rate": 9.474659091378074e-06, "loss": 0.5426, "step": 2728 }, { "epoch": 0.17, "grad_norm": 1.0862617492675781, "learning_rate": 9.474201195804367e-06, "loss": 0.6038, "step": 2729 }, { "epoch": 0.17, "grad_norm": 1.0345958471298218, "learning_rate": 9.473743111837612e-06, "loss": 0.577, "step": 2730 }, { "epoch": 0.17, "grad_norm": 0.9704797267913818, "learning_rate": 9.4732848394971e-06, "loss": 0.5234, "step": 2731 }, { "epoch": 0.17, "grad_norm": 0.9978111386299133, "learning_rate": 9.472826378802122e-06, "loss": 0.5623, "step": 2732 }, { "epoch": 0.17, "grad_norm": 1.0373756885528564, "learning_rate": 9.472367729771987e-06, "loss": 0.578, "step": 2733 }, { "epoch": 0.17, "grad_norm": 1.0830659866333008, "learning_rate": 9.471908892426005e-06, "loss": 0.5932, "step": 2734 }, { "epoch": 0.17, "grad_norm": 1.0066198110580444, "learning_rate": 9.471449866783495e-06, "loss": 0.5549, "step": 2735 }, { "epoch": 0.17, "grad_norm": 1.0600615739822388, "learning_rate": 9.470990652863787e-06, "loss": 0.6033, "step": 2736 }, { "epoch": 0.17, "grad_norm": 1.1532150506973267, "learning_rate": 9.470531250686216e-06, "loss": 0.5851, "step": 2737 }, { "epoch": 0.17, "grad_norm": 1.1124008893966675, "learning_rate": 9.470071660270126e-06, "loss": 0.5786, "step": 2738 }, { "epoch": 0.17, "grad_norm": 0.9894224405288696, "learning_rate": 9.469611881634868e-06, "loss": 0.5439, "step": 2739 }, { "epoch": 0.17, "grad_norm": 1.0182647705078125, "learning_rate": 9.469151914799803e-06, "loss": 0.5386, "step": 2740 }, { "epoch": 0.17, "grad_norm": 0.978829562664032, "learning_rate": 9.468691759784298e-06, "loss": 0.5712, "step": 2741 }, { "epoch": 0.17, "grad_norm": 1.0401105880737305, "learning_rate": 9.468231416607727e-06, "loss": 0.5204, "step": 2742 }, { "epoch": 0.17, "grad_norm": 1.068730115890503, "learning_rate": 9.467770885289477e-06, "loss": 0.5169, "step": 2743 }, { "epoch": 0.17, "grad_norm": 1.023269772529602, "learning_rate": 9.467310165848935e-06, "loss": 0.5856, "step": 2744 }, { "epoch": 0.17, "grad_norm": 1.0364032983779907, "learning_rate": 9.466849258305504e-06, "loss": 0.586, "step": 2745 }, { "epoch": 0.17, "grad_norm": 1.0057346820831299, "learning_rate": 9.46638816267859e-06, "loss": 0.5624, "step": 2746 }, { "epoch": 0.17, "grad_norm": 1.052634835243225, "learning_rate": 9.465926878987609e-06, "loss": 0.5905, "step": 2747 }, { "epoch": 0.17, "grad_norm": 0.9901856780052185, "learning_rate": 9.46546540725198e-06, "loss": 0.5594, "step": 2748 }, { "epoch": 0.17, "grad_norm": 1.0854971408843994, "learning_rate": 9.465003747491138e-06, "loss": 0.5894, "step": 2749 }, { "epoch": 0.17, "grad_norm": 1.0599004030227661, "learning_rate": 9.464541899724522e-06, "loss": 0.5523, "step": 2750 }, { "epoch": 0.17, "grad_norm": 1.1270490884780884, "learning_rate": 9.464079863971576e-06, "loss": 0.5561, "step": 2751 }, { "epoch": 0.17, "grad_norm": 1.1302123069763184, "learning_rate": 9.463617640251756e-06, "loss": 0.5472, "step": 2752 }, { "epoch": 0.17, "grad_norm": 1.055801510810852, "learning_rate": 9.463155228584526e-06, "loss": 0.5548, "step": 2753 }, { "epoch": 0.17, "grad_norm": 1.117521047592163, "learning_rate": 9.462692628989356e-06, "loss": 0.5113, "step": 2754 }, { "epoch": 0.17, "grad_norm": 1.0038899183273315, "learning_rate": 9.462229841485723e-06, "loss": 0.5977, "step": 2755 }, { "epoch": 0.17, "grad_norm": 1.06837797164917, "learning_rate": 9.461766866093117e-06, "loss": 0.5819, "step": 2756 }, { "epoch": 0.17, "grad_norm": 1.0148512125015259, "learning_rate": 9.461303702831026e-06, "loss": 0.5387, "step": 2757 }, { "epoch": 0.17, "grad_norm": 1.059126377105713, "learning_rate": 9.460840351718958e-06, "loss": 0.5427, "step": 2758 }, { "epoch": 0.17, "grad_norm": 1.0758804082870483, "learning_rate": 9.46037681277642e-06, "loss": 0.5507, "step": 2759 }, { "epoch": 0.17, "grad_norm": 1.0124850273132324, "learning_rate": 9.459913086022931e-06, "loss": 0.5496, "step": 2760 }, { "epoch": 0.17, "grad_norm": 0.9992918968200684, "learning_rate": 9.459449171478017e-06, "loss": 0.4975, "step": 2761 }, { "epoch": 0.17, "grad_norm": 0.9648446440696716, "learning_rate": 9.458985069161212e-06, "loss": 0.5103, "step": 2762 }, { "epoch": 0.18, "grad_norm": 1.1720576286315918, "learning_rate": 9.458520779092057e-06, "loss": 0.5341, "step": 2763 }, { "epoch": 0.18, "grad_norm": 1.0725833177566528, "learning_rate": 9.458056301290102e-06, "loss": 0.6193, "step": 2764 }, { "epoch": 0.18, "grad_norm": 0.9970990419387817, "learning_rate": 9.457591635774905e-06, "loss": 0.6286, "step": 2765 }, { "epoch": 0.18, "grad_norm": 1.0215911865234375, "learning_rate": 9.457126782566031e-06, "loss": 0.6008, "step": 2766 }, { "epoch": 0.18, "grad_norm": 1.0754748582839966, "learning_rate": 9.456661741683054e-06, "loss": 0.5929, "step": 2767 }, { "epoch": 0.18, "grad_norm": 1.061965823173523, "learning_rate": 9.456196513145553e-06, "loss": 0.5575, "step": 2768 }, { "epoch": 0.18, "grad_norm": 1.0222315788269043, "learning_rate": 9.455731096973119e-06, "loss": 0.566, "step": 2769 }, { "epoch": 0.18, "grad_norm": 1.079120397567749, "learning_rate": 9.455265493185349e-06, "loss": 0.5987, "step": 2770 }, { "epoch": 0.18, "grad_norm": 1.013046383857727, "learning_rate": 9.454799701801849e-06, "loss": 0.5493, "step": 2771 }, { "epoch": 0.18, "grad_norm": 1.0657896995544434, "learning_rate": 9.45433372284223e-06, "loss": 0.5631, "step": 2772 }, { "epoch": 0.18, "grad_norm": 0.9840913414955139, "learning_rate": 9.453867556326113e-06, "loss": 0.5552, "step": 2773 }, { "epoch": 0.18, "grad_norm": 1.123887062072754, "learning_rate": 9.453401202273127e-06, "loss": 0.5443, "step": 2774 }, { "epoch": 0.18, "grad_norm": 1.0064541101455688, "learning_rate": 9.45293466070291e-06, "loss": 0.5392, "step": 2775 }, { "epoch": 0.18, "grad_norm": 1.1079277992248535, "learning_rate": 9.452467931635104e-06, "loss": 0.595, "step": 2776 }, { "epoch": 0.18, "grad_norm": 0.9934383034706116, "learning_rate": 9.452001015089363e-06, "loss": 0.5407, "step": 2777 }, { "epoch": 0.18, "grad_norm": 1.04298996925354, "learning_rate": 9.451533911085346e-06, "loss": 0.5536, "step": 2778 }, { "epoch": 0.18, "grad_norm": 1.1786850690841675, "learning_rate": 9.451066619642721e-06, "loss": 0.5682, "step": 2779 }, { "epoch": 0.18, "grad_norm": 1.0571482181549072, "learning_rate": 9.450599140781166e-06, "loss": 0.5803, "step": 2780 }, { "epoch": 0.18, "grad_norm": 0.9489169716835022, "learning_rate": 9.450131474520364e-06, "loss": 0.5458, "step": 2781 }, { "epoch": 0.18, "grad_norm": 0.9808425903320312, "learning_rate": 9.449663620880006e-06, "loss": 0.5421, "step": 2782 }, { "epoch": 0.18, "grad_norm": 0.9377076625823975, "learning_rate": 9.449195579879793e-06, "loss": 0.5443, "step": 2783 }, { "epoch": 0.18, "grad_norm": 1.0947315692901611, "learning_rate": 9.448727351539431e-06, "loss": 0.5892, "step": 2784 }, { "epoch": 0.18, "grad_norm": 1.0158424377441406, "learning_rate": 9.448258935878635e-06, "loss": 0.5569, "step": 2785 }, { "epoch": 0.18, "grad_norm": 1.0767693519592285, "learning_rate": 9.44779033291713e-06, "loss": 0.5791, "step": 2786 }, { "epoch": 0.18, "grad_norm": 1.0848816633224487, "learning_rate": 9.447321542674647e-06, "loss": 0.5863, "step": 2787 }, { "epoch": 0.18, "grad_norm": 1.111331582069397, "learning_rate": 9.446852565170928e-06, "loss": 0.5767, "step": 2788 }, { "epoch": 0.18, "grad_norm": 1.0502716302871704, "learning_rate": 9.446383400425713e-06, "loss": 0.6005, "step": 2789 }, { "epoch": 0.18, "grad_norm": 1.0394223928451538, "learning_rate": 9.445914048458764e-06, "loss": 0.5051, "step": 2790 }, { "epoch": 0.18, "grad_norm": 0.9824061393737793, "learning_rate": 9.445444509289838e-06, "loss": 0.5472, "step": 2791 }, { "epoch": 0.18, "grad_norm": 1.0843420028686523, "learning_rate": 9.44497478293871e-06, "loss": 0.5515, "step": 2792 }, { "epoch": 0.18, "grad_norm": 1.0395381450653076, "learning_rate": 9.444504869425154e-06, "loss": 0.577, "step": 2793 }, { "epoch": 0.18, "grad_norm": 1.0165070295333862, "learning_rate": 9.44403476876896e-06, "loss": 0.5736, "step": 2794 }, { "epoch": 0.18, "grad_norm": 0.9988610148429871, "learning_rate": 9.443564480989924e-06, "loss": 0.55, "step": 2795 }, { "epoch": 0.18, "grad_norm": 0.9660263061523438, "learning_rate": 9.443094006107844e-06, "loss": 0.5494, "step": 2796 }, { "epoch": 0.18, "grad_norm": 0.9972324967384338, "learning_rate": 9.442623344142534e-06, "loss": 0.545, "step": 2797 }, { "epoch": 0.18, "grad_norm": 1.038231611251831, "learning_rate": 9.442152495113808e-06, "loss": 0.5573, "step": 2798 }, { "epoch": 0.18, "grad_norm": 0.9843300580978394, "learning_rate": 9.441681459041494e-06, "loss": 0.5879, "step": 2799 }, { "epoch": 0.18, "grad_norm": 1.1102025508880615, "learning_rate": 9.441210235945425e-06, "loss": 0.5593, "step": 2800 }, { "epoch": 0.18, "grad_norm": 0.9363767504692078, "learning_rate": 9.440738825845441e-06, "loss": 0.5394, "step": 2801 }, { "epoch": 0.18, "grad_norm": 1.1123851537704468, "learning_rate": 9.440267228761395e-06, "loss": 0.5985, "step": 2802 }, { "epoch": 0.18, "grad_norm": 1.0247012376785278, "learning_rate": 9.439795444713143e-06, "loss": 0.5928, "step": 2803 }, { "epoch": 0.18, "grad_norm": 1.0402313470840454, "learning_rate": 9.43932347372055e-06, "loss": 0.5614, "step": 2804 }, { "epoch": 0.18, "grad_norm": 1.025115728378296, "learning_rate": 9.438851315803488e-06, "loss": 0.5049, "step": 2805 }, { "epoch": 0.18, "grad_norm": 0.9654614329338074, "learning_rate": 9.438378970981839e-06, "loss": 0.5418, "step": 2806 }, { "epoch": 0.18, "grad_norm": 1.0685309171676636, "learning_rate": 9.43790643927549e-06, "loss": 0.6401, "step": 2807 }, { "epoch": 0.18, "grad_norm": 1.0241663455963135, "learning_rate": 9.437433720704342e-06, "loss": 0.5294, "step": 2808 }, { "epoch": 0.18, "grad_norm": 1.1073213815689087, "learning_rate": 9.436960815288294e-06, "loss": 0.5465, "step": 2809 }, { "epoch": 0.18, "grad_norm": 1.0069595575332642, "learning_rate": 9.436487723047263e-06, "loss": 0.5578, "step": 2810 }, { "epoch": 0.18, "grad_norm": 1.0204834938049316, "learning_rate": 9.436014444001167e-06, "loss": 0.5714, "step": 2811 }, { "epoch": 0.18, "grad_norm": 1.0144668817520142, "learning_rate": 9.435540978169933e-06, "loss": 0.5524, "step": 2812 }, { "epoch": 0.18, "grad_norm": 1.0032633543014526, "learning_rate": 9.435067325573499e-06, "loss": 0.5966, "step": 2813 }, { "epoch": 0.18, "grad_norm": 0.9835605621337891, "learning_rate": 9.43459348623181e-06, "loss": 0.6052, "step": 2814 }, { "epoch": 0.18, "grad_norm": 1.0459119081497192, "learning_rate": 9.434119460164816e-06, "loss": 0.5217, "step": 2815 }, { "epoch": 0.18, "grad_norm": 1.0917023420333862, "learning_rate": 9.433645247392476e-06, "loss": 0.543, "step": 2816 }, { "epoch": 0.18, "grad_norm": 1.0550017356872559, "learning_rate": 9.433170847934759e-06, "loss": 0.6093, "step": 2817 }, { "epoch": 0.18, "grad_norm": 1.098501443862915, "learning_rate": 9.432696261811637e-06, "loss": 0.5914, "step": 2818 }, { "epoch": 0.18, "grad_norm": 1.0482356548309326, "learning_rate": 9.432221489043097e-06, "loss": 0.5857, "step": 2819 }, { "epoch": 0.18, "grad_norm": 1.0572627782821655, "learning_rate": 9.43174652964913e-06, "loss": 0.5746, "step": 2820 }, { "epoch": 0.18, "grad_norm": 1.1536352634429932, "learning_rate": 9.431271383649731e-06, "loss": 0.5707, "step": 2821 }, { "epoch": 0.18, "grad_norm": 0.993578314781189, "learning_rate": 9.430796051064913e-06, "loss": 0.5792, "step": 2822 }, { "epoch": 0.18, "grad_norm": 1.0542477369308472, "learning_rate": 9.430320531914683e-06, "loss": 0.5783, "step": 2823 }, { "epoch": 0.18, "grad_norm": 1.0634816884994507, "learning_rate": 9.42984482621907e-06, "loss": 0.5851, "step": 2824 }, { "epoch": 0.18, "grad_norm": 0.9990572333335876, "learning_rate": 9.4293689339981e-06, "loss": 0.5078, "step": 2825 }, { "epoch": 0.18, "grad_norm": 1.158766269683838, "learning_rate": 9.428892855271813e-06, "loss": 0.6321, "step": 2826 }, { "epoch": 0.18, "grad_norm": 1.0206190347671509, "learning_rate": 9.428416590060256e-06, "loss": 0.5622, "step": 2827 }, { "epoch": 0.18, "grad_norm": 1.0901415348052979, "learning_rate": 9.427940138383482e-06, "loss": 0.6017, "step": 2828 }, { "epoch": 0.18, "grad_norm": 1.0550830364227295, "learning_rate": 9.427463500261551e-06, "loss": 0.6087, "step": 2829 }, { "epoch": 0.18, "grad_norm": 1.0264674425125122, "learning_rate": 9.426986675714535e-06, "loss": 0.523, "step": 2830 }, { "epoch": 0.18, "grad_norm": 0.9756189584732056, "learning_rate": 9.426509664762509e-06, "loss": 0.4892, "step": 2831 }, { "epoch": 0.18, "grad_norm": 1.1184206008911133, "learning_rate": 9.42603246742556e-06, "loss": 0.5733, "step": 2832 }, { "epoch": 0.18, "grad_norm": 1.1206648349761963, "learning_rate": 9.425555083723783e-06, "loss": 0.5927, "step": 2833 }, { "epoch": 0.18, "grad_norm": 1.0198521614074707, "learning_rate": 9.425077513677276e-06, "loss": 0.5436, "step": 2834 }, { "epoch": 0.18, "grad_norm": 0.9669837355613708, "learning_rate": 9.424599757306148e-06, "loss": 0.5645, "step": 2835 }, { "epoch": 0.18, "grad_norm": 1.0391610860824585, "learning_rate": 9.424121814630516e-06, "loss": 0.5046, "step": 2836 }, { "epoch": 0.18, "grad_norm": 1.0172545909881592, "learning_rate": 9.423643685670504e-06, "loss": 0.5923, "step": 2837 }, { "epoch": 0.18, "grad_norm": 1.0235159397125244, "learning_rate": 9.423165370446249e-06, "loss": 0.5294, "step": 2838 }, { "epoch": 0.18, "grad_norm": 1.0332740545272827, "learning_rate": 9.422686868977884e-06, "loss": 0.5247, "step": 2839 }, { "epoch": 0.18, "grad_norm": 1.1540343761444092, "learning_rate": 9.42220818128556e-06, "loss": 0.5839, "step": 2840 }, { "epoch": 0.18, "grad_norm": 1.0458693504333496, "learning_rate": 9.421729307389435e-06, "loss": 0.5583, "step": 2841 }, { "epoch": 0.18, "grad_norm": 1.0047495365142822, "learning_rate": 9.42125024730967e-06, "loss": 0.5742, "step": 2842 }, { "epoch": 0.18, "grad_norm": 1.1194878816604614, "learning_rate": 9.420771001066439e-06, "loss": 0.6002, "step": 2843 }, { "epoch": 0.18, "grad_norm": 1.068681240081787, "learning_rate": 9.420291568679917e-06, "loss": 0.6148, "step": 2844 }, { "epoch": 0.18, "grad_norm": 1.0517293214797974, "learning_rate": 9.419811950170294e-06, "loss": 0.5744, "step": 2845 }, { "epoch": 0.18, "grad_norm": 1.161916971206665, "learning_rate": 9.419332145557768e-06, "loss": 0.5964, "step": 2846 }, { "epoch": 0.18, "grad_norm": 1.14397394657135, "learning_rate": 9.418852154862538e-06, "loss": 0.5905, "step": 2847 }, { "epoch": 0.18, "grad_norm": 1.0074042081832886, "learning_rate": 9.418371978104816e-06, "loss": 0.5582, "step": 2848 }, { "epoch": 0.18, "grad_norm": 1.015149474143982, "learning_rate": 9.41789161530482e-06, "loss": 0.5931, "step": 2849 }, { "epoch": 0.18, "grad_norm": 1.0828512907028198, "learning_rate": 9.417411066482777e-06, "loss": 0.5581, "step": 2850 }, { "epoch": 0.18, "grad_norm": 1.0007314682006836, "learning_rate": 9.41693033165892e-06, "loss": 0.5666, "step": 2851 }, { "epoch": 0.18, "grad_norm": 1.0738846063613892, "learning_rate": 9.416449410853495e-06, "loss": 0.5609, "step": 2852 }, { "epoch": 0.18, "grad_norm": 1.1264728307724, "learning_rate": 9.415968304086746e-06, "loss": 0.5753, "step": 2853 }, { "epoch": 0.18, "grad_norm": 1.0523388385772705, "learning_rate": 9.415487011378935e-06, "loss": 0.5625, "step": 2854 }, { "epoch": 0.18, "grad_norm": 1.051973581314087, "learning_rate": 9.415005532750326e-06, "loss": 0.5322, "step": 2855 }, { "epoch": 0.18, "grad_norm": 1.1547439098358154, "learning_rate": 9.414523868221192e-06, "loss": 0.623, "step": 2856 }, { "epoch": 0.18, "grad_norm": 0.9445652365684509, "learning_rate": 9.414042017811817e-06, "loss": 0.5596, "step": 2857 }, { "epoch": 0.18, "grad_norm": 0.9419575333595276, "learning_rate": 9.413559981542486e-06, "loss": 0.5771, "step": 2858 }, { "epoch": 0.18, "grad_norm": 1.0547152757644653, "learning_rate": 9.413077759433498e-06, "loss": 0.5862, "step": 2859 }, { "epoch": 0.18, "grad_norm": 1.1181983947753906, "learning_rate": 9.412595351505158e-06, "loss": 0.5518, "step": 2860 }, { "epoch": 0.18, "grad_norm": 0.9860160946846008, "learning_rate": 9.412112757777777e-06, "loss": 0.5856, "step": 2861 }, { "epoch": 0.18, "grad_norm": 1.0584853887557983, "learning_rate": 9.411629978271679e-06, "loss": 0.5884, "step": 2862 }, { "epoch": 0.18, "grad_norm": 1.132488489151001, "learning_rate": 9.411147013007188e-06, "loss": 0.5753, "step": 2863 }, { "epoch": 0.18, "grad_norm": 1.0454785823822021, "learning_rate": 9.41066386200464e-06, "loss": 0.5617, "step": 2864 }, { "epoch": 0.18, "grad_norm": 0.9679178595542908, "learning_rate": 9.410180525284384e-06, "loss": 0.5144, "step": 2865 }, { "epoch": 0.18, "grad_norm": 0.9169051051139832, "learning_rate": 9.409697002866765e-06, "loss": 0.5357, "step": 2866 }, { "epoch": 0.18, "grad_norm": 1.045176386833191, "learning_rate": 9.409213294772147e-06, "loss": 0.5865, "step": 2867 }, { "epoch": 0.18, "grad_norm": 1.0670503377914429, "learning_rate": 9.408729401020896e-06, "loss": 0.5824, "step": 2868 }, { "epoch": 0.18, "grad_norm": 1.10625422000885, "learning_rate": 9.408245321633385e-06, "loss": 0.5535, "step": 2869 }, { "epoch": 0.18, "grad_norm": 0.9990093111991882, "learning_rate": 9.407761056629999e-06, "loss": 0.5219, "step": 2870 }, { "epoch": 0.18, "grad_norm": 1.0798190832138062, "learning_rate": 9.40727660603113e-06, "loss": 0.5537, "step": 2871 }, { "epoch": 0.18, "grad_norm": 1.19645094871521, "learning_rate": 9.406791969857173e-06, "loss": 0.6066, "step": 2872 }, { "epoch": 0.18, "grad_norm": 1.039581537246704, "learning_rate": 9.406307148128537e-06, "loss": 0.5903, "step": 2873 }, { "epoch": 0.18, "grad_norm": 1.1147894859313965, "learning_rate": 9.405822140865636e-06, "loss": 0.5871, "step": 2874 }, { "epoch": 0.18, "grad_norm": 1.0248534679412842, "learning_rate": 9.40533694808889e-06, "loss": 0.5069, "step": 2875 }, { "epoch": 0.18, "grad_norm": 1.0174291133880615, "learning_rate": 9.404851569818731e-06, "loss": 0.5817, "step": 2876 }, { "epoch": 0.18, "grad_norm": 1.1911200284957886, "learning_rate": 9.404366006075596e-06, "loss": 0.645, "step": 2877 }, { "epoch": 0.18, "grad_norm": 1.1297792196273804, "learning_rate": 9.403880256879931e-06, "loss": 0.6082, "step": 2878 }, { "epoch": 0.18, "grad_norm": 1.1312628984451294, "learning_rate": 9.403394322252186e-06, "loss": 0.5478, "step": 2879 }, { "epoch": 0.18, "grad_norm": 0.9603383541107178, "learning_rate": 9.402908202212826e-06, "loss": 0.5066, "step": 2880 }, { "epoch": 0.18, "grad_norm": 0.9875933527946472, "learning_rate": 9.402421896782319e-06, "loss": 0.5505, "step": 2881 }, { "epoch": 0.18, "grad_norm": 0.9948558807373047, "learning_rate": 9.401935405981138e-06, "loss": 0.5821, "step": 2882 }, { "epoch": 0.18, "grad_norm": 1.1072025299072266, "learning_rate": 9.401448729829773e-06, "loss": 0.5781, "step": 2883 }, { "epoch": 0.18, "grad_norm": 1.002524495124817, "learning_rate": 9.400961868348713e-06, "loss": 0.5846, "step": 2884 }, { "epoch": 0.18, "grad_norm": 1.030574917793274, "learning_rate": 9.400474821558457e-06, "loss": 0.5437, "step": 2885 }, { "epoch": 0.18, "grad_norm": 1.0898710489273071, "learning_rate": 9.399987589479516e-06, "loss": 0.5841, "step": 2886 }, { "epoch": 0.18, "grad_norm": 1.0254002809524536, "learning_rate": 9.399500172132403e-06, "loss": 0.5442, "step": 2887 }, { "epoch": 0.18, "grad_norm": 1.0504629611968994, "learning_rate": 9.399012569537643e-06, "loss": 0.5413, "step": 2888 }, { "epoch": 0.18, "grad_norm": 1.114540457725525, "learning_rate": 9.398524781715767e-06, "loss": 0.5611, "step": 2889 }, { "epoch": 0.18, "grad_norm": 1.0113818645477295, "learning_rate": 9.398036808687314e-06, "loss": 0.5544, "step": 2890 }, { "epoch": 0.18, "grad_norm": 1.0500532388687134, "learning_rate": 9.39754865047283e-06, "loss": 0.5893, "step": 2891 }, { "epoch": 0.18, "grad_norm": 0.9870203733444214, "learning_rate": 9.39706030709287e-06, "loss": 0.4995, "step": 2892 }, { "epoch": 0.18, "grad_norm": 1.0355092287063599, "learning_rate": 9.396571778567997e-06, "loss": 0.5499, "step": 2893 }, { "epoch": 0.18, "grad_norm": 1.0265161991119385, "learning_rate": 9.396083064918782e-06, "loss": 0.5549, "step": 2894 }, { "epoch": 0.18, "grad_norm": 1.060455560684204, "learning_rate": 9.3955941661658e-06, "loss": 0.603, "step": 2895 }, { "epoch": 0.18, "grad_norm": 1.0171563625335693, "learning_rate": 9.39510508232964e-06, "loss": 0.5435, "step": 2896 }, { "epoch": 0.18, "grad_norm": 0.9958793520927429, "learning_rate": 9.394615813430895e-06, "loss": 0.5376, "step": 2897 }, { "epoch": 0.18, "grad_norm": 1.0014652013778687, "learning_rate": 9.394126359490166e-06, "loss": 0.5651, "step": 2898 }, { "epoch": 0.18, "grad_norm": 1.0729966163635254, "learning_rate": 9.393636720528061e-06, "loss": 0.5663, "step": 2899 }, { "epoch": 0.18, "grad_norm": 1.0536006689071655, "learning_rate": 9.393146896565197e-06, "loss": 0.5999, "step": 2900 }, { "epoch": 0.18, "grad_norm": 1.0769306421279907, "learning_rate": 9.392656887622202e-06, "loss": 0.5778, "step": 2901 }, { "epoch": 0.18, "grad_norm": 1.0698580741882324, "learning_rate": 9.392166693719706e-06, "loss": 0.5471, "step": 2902 }, { "epoch": 0.18, "grad_norm": 0.9738646745681763, "learning_rate": 9.391676314878348e-06, "loss": 0.5611, "step": 2903 }, { "epoch": 0.18, "grad_norm": 1.0991955995559692, "learning_rate": 9.391185751118782e-06, "loss": 0.6088, "step": 2904 }, { "epoch": 0.18, "grad_norm": 1.0504570007324219, "learning_rate": 9.390695002461657e-06, "loss": 0.5671, "step": 2905 }, { "epoch": 0.18, "grad_norm": 0.9739258885383606, "learning_rate": 9.390204068927638e-06, "loss": 0.5751, "step": 2906 }, { "epoch": 0.18, "grad_norm": 1.022497296333313, "learning_rate": 9.389712950537399e-06, "loss": 0.5703, "step": 2907 }, { "epoch": 0.18, "grad_norm": 1.0213046073913574, "learning_rate": 9.38922164731162e-06, "loss": 0.5869, "step": 2908 }, { "epoch": 0.18, "grad_norm": 0.9645665287971497, "learning_rate": 9.388730159270984e-06, "loss": 0.4713, "step": 2909 }, { "epoch": 0.18, "grad_norm": 1.0854631662368774, "learning_rate": 9.38823848643619e-06, "loss": 0.5605, "step": 2910 }, { "epoch": 0.18, "grad_norm": 1.1496248245239258, "learning_rate": 9.38774662882794e-06, "loss": 0.676, "step": 2911 }, { "epoch": 0.18, "grad_norm": 0.986517071723938, "learning_rate": 9.387254586466942e-06, "loss": 0.5374, "step": 2912 }, { "epoch": 0.18, "grad_norm": 0.9511660933494568, "learning_rate": 9.386762359373915e-06, "loss": 0.5468, "step": 2913 }, { "epoch": 0.18, "grad_norm": 0.9129959940910339, "learning_rate": 9.386269947569585e-06, "loss": 0.5195, "step": 2914 }, { "epoch": 0.18, "grad_norm": 1.0946681499481201, "learning_rate": 9.385777351074688e-06, "loss": 0.5783, "step": 2915 }, { "epoch": 0.18, "grad_norm": 1.0663743019104004, "learning_rate": 9.385284569909963e-06, "loss": 0.5628, "step": 2916 }, { "epoch": 0.18, "grad_norm": 0.9548754096031189, "learning_rate": 9.38479160409616e-06, "loss": 0.5574, "step": 2917 }, { "epoch": 0.18, "grad_norm": 1.0848325490951538, "learning_rate": 9.384298453654037e-06, "loss": 0.5916, "step": 2918 }, { "epoch": 0.18, "grad_norm": 0.934111475944519, "learning_rate": 9.383805118604357e-06, "loss": 0.5702, "step": 2919 }, { "epoch": 0.18, "grad_norm": 1.025260090827942, "learning_rate": 9.383311598967892e-06, "loss": 0.5516, "step": 2920 }, { "epoch": 0.19, "grad_norm": 1.0210864543914795, "learning_rate": 9.382817894765426e-06, "loss": 0.6516, "step": 2921 }, { "epoch": 0.19, "grad_norm": 1.0006023645401, "learning_rate": 9.382324006017746e-06, "loss": 0.5987, "step": 2922 }, { "epoch": 0.19, "grad_norm": 1.0651023387908936, "learning_rate": 9.381829932745646e-06, "loss": 0.5347, "step": 2923 }, { "epoch": 0.19, "grad_norm": 1.056115746498108, "learning_rate": 9.38133567496993e-06, "loss": 0.5696, "step": 2924 }, { "epoch": 0.19, "grad_norm": 1.0541669130325317, "learning_rate": 9.380841232711412e-06, "loss": 0.5691, "step": 2925 }, { "epoch": 0.19, "grad_norm": 1.1002367734909058, "learning_rate": 9.38034660599091e-06, "loss": 0.5373, "step": 2926 }, { "epoch": 0.19, "grad_norm": 1.0014314651489258, "learning_rate": 9.379851794829247e-06, "loss": 0.5326, "step": 2927 }, { "epoch": 0.19, "grad_norm": 1.0438270568847656, "learning_rate": 9.379356799247263e-06, "loss": 0.6348, "step": 2928 }, { "epoch": 0.19, "grad_norm": 1.0377657413482666, "learning_rate": 9.3788616192658e-06, "loss": 0.5942, "step": 2929 }, { "epoch": 0.19, "grad_norm": 1.029208779335022, "learning_rate": 9.378366254905706e-06, "loss": 0.5666, "step": 2930 }, { "epoch": 0.19, "grad_norm": 1.0012551546096802, "learning_rate": 9.377870706187841e-06, "loss": 0.521, "step": 2931 }, { "epoch": 0.19, "grad_norm": 1.096712589263916, "learning_rate": 9.37737497313307e-06, "loss": 0.5228, "step": 2932 }, { "epoch": 0.19, "grad_norm": 0.932421863079071, "learning_rate": 9.376879055762267e-06, "loss": 0.4983, "step": 2933 }, { "epoch": 0.19, "grad_norm": 1.041609287261963, "learning_rate": 9.376382954096312e-06, "loss": 0.5816, "step": 2934 }, { "epoch": 0.19, "grad_norm": 0.9358463287353516, "learning_rate": 9.375886668156095e-06, "loss": 0.5758, "step": 2935 }, { "epoch": 0.19, "grad_norm": 1.065076231956482, "learning_rate": 9.375390197962514e-06, "loss": 0.5325, "step": 2936 }, { "epoch": 0.19, "grad_norm": 1.0218020677566528, "learning_rate": 9.374893543536471e-06, "loss": 0.538, "step": 2937 }, { "epoch": 0.19, "grad_norm": 1.0039360523223877, "learning_rate": 9.374396704898883e-06, "loss": 0.5959, "step": 2938 }, { "epoch": 0.19, "grad_norm": 1.0219570398330688, "learning_rate": 9.373899682070664e-06, "loss": 0.5818, "step": 2939 }, { "epoch": 0.19, "grad_norm": 1.084692358970642, "learning_rate": 9.373402475072746e-06, "loss": 0.5754, "step": 2940 }, { "epoch": 0.19, "grad_norm": 0.9906976819038391, "learning_rate": 9.372905083926064e-06, "loss": 0.5624, "step": 2941 }, { "epoch": 0.19, "grad_norm": 1.009021520614624, "learning_rate": 9.372407508651561e-06, "loss": 0.5557, "step": 2942 }, { "epoch": 0.19, "grad_norm": 0.9522027373313904, "learning_rate": 9.371909749270189e-06, "loss": 0.5369, "step": 2943 }, { "epoch": 0.19, "grad_norm": 1.0123964548110962, "learning_rate": 9.371411805802907e-06, "loss": 0.5701, "step": 2944 }, { "epoch": 0.19, "grad_norm": 1.0369518995285034, "learning_rate": 9.370913678270678e-06, "loss": 0.5329, "step": 2945 }, { "epoch": 0.19, "grad_norm": 1.0261355638504028, "learning_rate": 9.370415366694483e-06, "loss": 0.5628, "step": 2946 }, { "epoch": 0.19, "grad_norm": 1.0193971395492554, "learning_rate": 9.369916871095299e-06, "loss": 0.5684, "step": 2947 }, { "epoch": 0.19, "grad_norm": 0.948062002658844, "learning_rate": 9.369418191494117e-06, "loss": 0.5665, "step": 2948 }, { "epoch": 0.19, "grad_norm": 1.036242961883545, "learning_rate": 9.368919327911934e-06, "loss": 0.5659, "step": 2949 }, { "epoch": 0.19, "grad_norm": 1.047235131263733, "learning_rate": 9.368420280369759e-06, "loss": 0.5915, "step": 2950 }, { "epoch": 0.19, "grad_norm": 1.0826328992843628, "learning_rate": 9.3679210488886e-06, "loss": 0.567, "step": 2951 }, { "epoch": 0.19, "grad_norm": 0.9877567887306213, "learning_rate": 9.367421633489482e-06, "loss": 0.6275, "step": 2952 }, { "epoch": 0.19, "grad_norm": 0.9576612710952759, "learning_rate": 9.366922034193431e-06, "loss": 0.5483, "step": 2953 }, { "epoch": 0.19, "grad_norm": 1.0604145526885986, "learning_rate": 9.366422251021486e-06, "loss": 0.562, "step": 2954 }, { "epoch": 0.19, "grad_norm": 0.986191987991333, "learning_rate": 9.365922283994689e-06, "loss": 0.5494, "step": 2955 }, { "epoch": 0.19, "grad_norm": 1.1650367975234985, "learning_rate": 9.365422133134093e-06, "loss": 0.5687, "step": 2956 }, { "epoch": 0.19, "grad_norm": 1.0348550081253052, "learning_rate": 9.364921798460756e-06, "loss": 0.558, "step": 2957 }, { "epoch": 0.19, "grad_norm": 1.052424430847168, "learning_rate": 9.364421279995747e-06, "loss": 0.593, "step": 2958 }, { "epoch": 0.19, "grad_norm": 0.9829449653625488, "learning_rate": 9.36392057776014e-06, "loss": 0.5745, "step": 2959 }, { "epoch": 0.19, "grad_norm": 1.0334904193878174, "learning_rate": 9.36341969177502e-06, "loss": 0.51, "step": 2960 }, { "epoch": 0.19, "grad_norm": 1.0335619449615479, "learning_rate": 9.362918622061475e-06, "loss": 0.535, "step": 2961 }, { "epoch": 0.19, "grad_norm": 0.9990548491477966, "learning_rate": 9.362417368640604e-06, "loss": 0.5253, "step": 2962 }, { "epoch": 0.19, "grad_norm": 0.9685349464416504, "learning_rate": 9.361915931533513e-06, "loss": 0.5622, "step": 2963 }, { "epoch": 0.19, "grad_norm": 0.9835405349731445, "learning_rate": 9.36141431076132e-06, "loss": 0.5842, "step": 2964 }, { "epoch": 0.19, "grad_norm": 1.0301471948623657, "learning_rate": 9.360912506345139e-06, "loss": 0.5497, "step": 2965 }, { "epoch": 0.19, "grad_norm": 0.9751991033554077, "learning_rate": 9.360410518306103e-06, "loss": 0.5084, "step": 2966 }, { "epoch": 0.19, "grad_norm": 0.9731338024139404, "learning_rate": 9.359908346665349e-06, "loss": 0.5161, "step": 2967 }, { "epoch": 0.19, "grad_norm": 0.9955142140388489, "learning_rate": 9.359405991444022e-06, "loss": 0.6021, "step": 2968 }, { "epoch": 0.19, "grad_norm": 1.101694107055664, "learning_rate": 9.358903452663273e-06, "loss": 0.6029, "step": 2969 }, { "epoch": 0.19, "grad_norm": 1.1504160165786743, "learning_rate": 9.358400730344265e-06, "loss": 0.5574, "step": 2970 }, { "epoch": 0.19, "grad_norm": 0.9698602557182312, "learning_rate": 9.357897824508163e-06, "loss": 0.5278, "step": 2971 }, { "epoch": 0.19, "grad_norm": 1.0260837078094482, "learning_rate": 9.357394735176144e-06, "loss": 0.5825, "step": 2972 }, { "epoch": 0.19, "grad_norm": 0.9875388741493225, "learning_rate": 9.356891462369391e-06, "loss": 0.545, "step": 2973 }, { "epoch": 0.19, "grad_norm": 1.0932499170303345, "learning_rate": 9.356388006109094e-06, "loss": 0.5889, "step": 2974 }, { "epoch": 0.19, "grad_norm": 0.8903037309646606, "learning_rate": 9.355884366416454e-06, "loss": 0.542, "step": 2975 }, { "epoch": 0.19, "grad_norm": 1.0137654542922974, "learning_rate": 9.355380543312676e-06, "loss": 0.6136, "step": 2976 }, { "epoch": 0.19, "grad_norm": 0.9530428051948547, "learning_rate": 9.354876536818974e-06, "loss": 0.5744, "step": 2977 }, { "epoch": 0.19, "grad_norm": 0.9858264923095703, "learning_rate": 9.35437234695657e-06, "loss": 0.564, "step": 2978 }, { "epoch": 0.19, "grad_norm": 1.0544084310531616, "learning_rate": 9.353867973746696e-06, "loss": 0.5591, "step": 2979 }, { "epoch": 0.19, "grad_norm": 1.011245846748352, "learning_rate": 9.353363417210587e-06, "loss": 0.5045, "step": 2980 }, { "epoch": 0.19, "grad_norm": 1.0920368432998657, "learning_rate": 9.352858677369488e-06, "loss": 0.5875, "step": 2981 }, { "epoch": 0.19, "grad_norm": 0.9856042861938477, "learning_rate": 9.352353754244654e-06, "loss": 0.5399, "step": 2982 }, { "epoch": 0.19, "grad_norm": 0.958921492099762, "learning_rate": 9.351848647857343e-06, "loss": 0.5416, "step": 2983 }, { "epoch": 0.19, "grad_norm": 0.9829522967338562, "learning_rate": 9.351343358228825e-06, "loss": 0.5362, "step": 2984 }, { "epoch": 0.19, "grad_norm": 1.0200073719024658, "learning_rate": 9.350837885380375e-06, "loss": 0.5788, "step": 2985 }, { "epoch": 0.19, "grad_norm": 1.0267620086669922, "learning_rate": 9.350332229333276e-06, "loss": 0.55, "step": 2986 }, { "epoch": 0.19, "grad_norm": 0.9330299496650696, "learning_rate": 9.349826390108823e-06, "loss": 0.5397, "step": 2987 }, { "epoch": 0.19, "grad_norm": 0.913159191608429, "learning_rate": 9.349320367728312e-06, "loss": 0.5045, "step": 2988 }, { "epoch": 0.19, "grad_norm": 1.0245423316955566, "learning_rate": 9.34881416221305e-06, "loss": 0.5595, "step": 2989 }, { "epoch": 0.19, "grad_norm": 0.9669381976127625, "learning_rate": 9.348307773584351e-06, "loss": 0.5488, "step": 2990 }, { "epoch": 0.19, "grad_norm": 1.0299818515777588, "learning_rate": 9.34780120186354e-06, "loss": 0.5729, "step": 2991 }, { "epoch": 0.19, "grad_norm": 0.9824283719062805, "learning_rate": 9.347294447071945e-06, "loss": 0.5487, "step": 2992 }, { "epoch": 0.19, "grad_norm": 1.0039525032043457, "learning_rate": 9.346787509230903e-06, "loss": 0.5602, "step": 2993 }, { "epoch": 0.19, "grad_norm": 0.9912124276161194, "learning_rate": 9.346280388361761e-06, "loss": 0.5508, "step": 2994 }, { "epoch": 0.19, "grad_norm": 1.016176462173462, "learning_rate": 9.34577308448587e-06, "loss": 0.5502, "step": 2995 }, { "epoch": 0.19, "grad_norm": 1.0755479335784912, "learning_rate": 9.345265597624595e-06, "loss": 0.5636, "step": 2996 }, { "epoch": 0.19, "grad_norm": 1.038004994392395, "learning_rate": 9.344757927799299e-06, "loss": 0.5851, "step": 2997 }, { "epoch": 0.19, "grad_norm": 1.0409660339355469, "learning_rate": 9.344250075031362e-06, "loss": 0.5517, "step": 2998 }, { "epoch": 0.19, "grad_norm": 0.9538517594337463, "learning_rate": 9.343742039342168e-06, "loss": 0.5182, "step": 2999 }, { "epoch": 0.19, "grad_norm": 0.941325306892395, "learning_rate": 9.343233820753107e-06, "loss": 0.5663, "step": 3000 }, { "epoch": 0.19, "grad_norm": 0.998930811882019, "learning_rate": 9.34272541928558e-06, "loss": 0.5677, "step": 3001 }, { "epoch": 0.19, "grad_norm": 0.9965593814849854, "learning_rate": 9.34221683496099e-06, "loss": 0.5179, "step": 3002 }, { "epoch": 0.19, "grad_norm": 1.0460489988327026, "learning_rate": 9.341708067800757e-06, "loss": 0.55, "step": 3003 }, { "epoch": 0.19, "grad_norm": 1.0359423160552979, "learning_rate": 9.341199117826298e-06, "loss": 0.5749, "step": 3004 }, { "epoch": 0.19, "grad_norm": 0.9712229371070862, "learning_rate": 9.340689985059048e-06, "loss": 0.5765, "step": 3005 }, { "epoch": 0.19, "grad_norm": 1.0309653282165527, "learning_rate": 9.340180669520443e-06, "loss": 0.5637, "step": 3006 }, { "epoch": 0.19, "grad_norm": 1.0827995538711548, "learning_rate": 9.339671171231929e-06, "loss": 0.5617, "step": 3007 }, { "epoch": 0.19, "grad_norm": 0.9527615904808044, "learning_rate": 9.339161490214957e-06, "loss": 0.5781, "step": 3008 }, { "epoch": 0.19, "grad_norm": 0.983928918838501, "learning_rate": 9.33865162649099e-06, "loss": 0.534, "step": 3009 }, { "epoch": 0.19, "grad_norm": 0.9210485816001892, "learning_rate": 9.338141580081496e-06, "loss": 0.509, "step": 3010 }, { "epoch": 0.19, "grad_norm": 1.131502389907837, "learning_rate": 9.337631351007953e-06, "loss": 0.523, "step": 3011 }, { "epoch": 0.19, "grad_norm": 1.0279563665390015, "learning_rate": 9.337120939291842e-06, "loss": 0.5423, "step": 3012 }, { "epoch": 0.19, "grad_norm": 1.0946483612060547, "learning_rate": 9.336610344954656e-06, "loss": 0.5313, "step": 3013 }, { "epoch": 0.19, "grad_norm": 1.0433611869812012, "learning_rate": 9.336099568017895e-06, "loss": 0.5194, "step": 3014 }, { "epoch": 0.19, "grad_norm": 1.0965982675552368, "learning_rate": 9.335588608503065e-06, "loss": 0.5643, "step": 3015 }, { "epoch": 0.19, "grad_norm": 1.0462230443954468, "learning_rate": 9.33507746643168e-06, "loss": 0.545, "step": 3016 }, { "epoch": 0.19, "grad_norm": 1.0247931480407715, "learning_rate": 9.334566141825266e-06, "loss": 0.5885, "step": 3017 }, { "epoch": 0.19, "grad_norm": 0.9968416094779968, "learning_rate": 9.334054634705347e-06, "loss": 0.5317, "step": 3018 }, { "epoch": 0.19, "grad_norm": 1.109590768814087, "learning_rate": 9.333542945093468e-06, "loss": 0.5622, "step": 3019 }, { "epoch": 0.19, "grad_norm": 1.0385175943374634, "learning_rate": 9.333031073011169e-06, "loss": 0.5883, "step": 3020 }, { "epoch": 0.19, "grad_norm": 1.0782296657562256, "learning_rate": 9.332519018480005e-06, "loss": 0.6121, "step": 3021 }, { "epoch": 0.19, "grad_norm": 1.1267576217651367, "learning_rate": 9.332006781521537e-06, "loss": 0.5382, "step": 3022 }, { "epoch": 0.19, "grad_norm": 1.0112955570220947, "learning_rate": 9.331494362157335e-06, "loss": 0.5186, "step": 3023 }, { "epoch": 0.19, "grad_norm": 1.0713708400726318, "learning_rate": 9.330981760408972e-06, "loss": 0.55, "step": 3024 }, { "epoch": 0.19, "grad_norm": 0.981164813041687, "learning_rate": 9.330468976298033e-06, "loss": 0.5468, "step": 3025 }, { "epoch": 0.19, "grad_norm": 1.0624793767929077, "learning_rate": 9.329956009846111e-06, "loss": 0.573, "step": 3026 }, { "epoch": 0.19, "grad_norm": 1.0366548299789429, "learning_rate": 9.329442861074803e-06, "loss": 0.5816, "step": 3027 }, { "epoch": 0.19, "grad_norm": 1.0300683975219727, "learning_rate": 9.328929530005717e-06, "loss": 0.565, "step": 3028 }, { "epoch": 0.19, "grad_norm": 1.0933263301849365, "learning_rate": 9.328416016660471e-06, "loss": 0.5633, "step": 3029 }, { "epoch": 0.19, "grad_norm": 1.003050684928894, "learning_rate": 9.327902321060681e-06, "loss": 0.5039, "step": 3030 }, { "epoch": 0.19, "grad_norm": 1.0441280603408813, "learning_rate": 9.327388443227981e-06, "loss": 0.5935, "step": 3031 }, { "epoch": 0.19, "grad_norm": 1.143961787223816, "learning_rate": 9.326874383184006e-06, "loss": 0.5781, "step": 3032 }, { "epoch": 0.19, "grad_norm": 1.0665271282196045, "learning_rate": 9.326360140950406e-06, "loss": 0.5428, "step": 3033 }, { "epoch": 0.19, "grad_norm": 1.0226956605911255, "learning_rate": 9.325845716548827e-06, "loss": 0.5855, "step": 3034 }, { "epoch": 0.19, "grad_norm": 1.1042506694793701, "learning_rate": 9.325331110000937e-06, "loss": 0.5883, "step": 3035 }, { "epoch": 0.19, "grad_norm": 1.0352132320404053, "learning_rate": 9.324816321328398e-06, "loss": 0.5992, "step": 3036 }, { "epoch": 0.19, "grad_norm": 0.9326235055923462, "learning_rate": 9.324301350552889e-06, "loss": 0.566, "step": 3037 }, { "epoch": 0.19, "grad_norm": 1.04964280128479, "learning_rate": 9.323786197696094e-06, "loss": 0.5552, "step": 3038 }, { "epoch": 0.19, "grad_norm": 1.1173388957977295, "learning_rate": 9.323270862779704e-06, "loss": 0.5909, "step": 3039 }, { "epoch": 0.19, "grad_norm": 1.0002803802490234, "learning_rate": 9.322755345825418e-06, "loss": 0.4963, "step": 3040 }, { "epoch": 0.19, "grad_norm": 0.9863665699958801, "learning_rate": 9.32223964685494e-06, "loss": 0.5456, "step": 3041 }, { "epoch": 0.19, "grad_norm": 1.0843548774719238, "learning_rate": 9.321723765889987e-06, "loss": 0.5545, "step": 3042 }, { "epoch": 0.19, "grad_norm": 1.0747976303100586, "learning_rate": 9.321207702952281e-06, "loss": 0.5945, "step": 3043 }, { "epoch": 0.19, "grad_norm": 1.2692376375198364, "learning_rate": 9.320691458063552e-06, "loss": 0.5809, "step": 3044 }, { "epoch": 0.19, "grad_norm": 0.9482994079589844, "learning_rate": 9.320175031245535e-06, "loss": 0.5497, "step": 3045 }, { "epoch": 0.19, "grad_norm": 1.094792366027832, "learning_rate": 9.319658422519977e-06, "loss": 0.6102, "step": 3046 }, { "epoch": 0.19, "grad_norm": 1.2238280773162842, "learning_rate": 9.319141631908628e-06, "loss": 0.6002, "step": 3047 }, { "epoch": 0.19, "grad_norm": 0.9731894135475159, "learning_rate": 9.318624659433254e-06, "loss": 0.5756, "step": 3048 }, { "epoch": 0.19, "grad_norm": 1.0275579690933228, "learning_rate": 9.318107505115615e-06, "loss": 0.5615, "step": 3049 }, { "epoch": 0.19, "grad_norm": 1.0008189678192139, "learning_rate": 9.317590168977492e-06, "loss": 0.5839, "step": 3050 }, { "epoch": 0.19, "grad_norm": 1.0680350065231323, "learning_rate": 9.317072651040666e-06, "loss": 0.5497, "step": 3051 }, { "epoch": 0.19, "grad_norm": 1.161730170249939, "learning_rate": 9.31655495132693e-06, "loss": 0.5489, "step": 3052 }, { "epoch": 0.19, "grad_norm": 1.0339173078536987, "learning_rate": 9.31603706985808e-06, "loss": 0.5424, "step": 3053 }, { "epoch": 0.19, "grad_norm": 1.064933180809021, "learning_rate": 9.315519006655925e-06, "loss": 0.6057, "step": 3054 }, { "epoch": 0.19, "grad_norm": 1.0022574663162231, "learning_rate": 9.315000761742276e-06, "loss": 0.5506, "step": 3055 }, { "epoch": 0.19, "grad_norm": 1.0306645631790161, "learning_rate": 9.314482335138954e-06, "loss": 0.5416, "step": 3056 }, { "epoch": 0.19, "grad_norm": 1.0025889873504639, "learning_rate": 9.313963726867793e-06, "loss": 0.5841, "step": 3057 }, { "epoch": 0.19, "grad_norm": 1.0571597814559937, "learning_rate": 9.313444936950626e-06, "loss": 0.5199, "step": 3058 }, { "epoch": 0.19, "grad_norm": 1.082176923751831, "learning_rate": 9.312925965409297e-06, "loss": 0.5846, "step": 3059 }, { "epoch": 0.19, "grad_norm": 1.1681592464447021, "learning_rate": 9.312406812265659e-06, "loss": 0.5624, "step": 3060 }, { "epoch": 0.19, "grad_norm": 1.0278209447860718, "learning_rate": 9.311887477541574e-06, "loss": 0.553, "step": 3061 }, { "epoch": 0.19, "grad_norm": 0.9465845227241516, "learning_rate": 9.311367961258906e-06, "loss": 0.5733, "step": 3062 }, { "epoch": 0.19, "grad_norm": 0.9811533093452454, "learning_rate": 9.31084826343953e-06, "loss": 0.541, "step": 3063 }, { "epoch": 0.19, "grad_norm": 1.119778037071228, "learning_rate": 9.310328384105331e-06, "loss": 0.5966, "step": 3064 }, { "epoch": 0.19, "grad_norm": 1.0411384105682373, "learning_rate": 9.309808323278199e-06, "loss": 0.5322, "step": 3065 }, { "epoch": 0.19, "grad_norm": 0.9605517387390137, "learning_rate": 9.30928808098003e-06, "loss": 0.5559, "step": 3066 }, { "epoch": 0.19, "grad_norm": 1.074902057647705, "learning_rate": 9.308767657232733e-06, "loss": 0.6172, "step": 3067 }, { "epoch": 0.19, "grad_norm": 1.038602590560913, "learning_rate": 9.308247052058217e-06, "loss": 0.5566, "step": 3068 }, { "epoch": 0.19, "grad_norm": 1.026792287826538, "learning_rate": 9.307726265478405e-06, "loss": 0.5894, "step": 3069 }, { "epoch": 0.19, "grad_norm": 1.0388686656951904, "learning_rate": 9.307205297515225e-06, "loss": 0.6314, "step": 3070 }, { "epoch": 0.19, "grad_norm": 1.082331657409668, "learning_rate": 9.306684148190616e-06, "loss": 0.607, "step": 3071 }, { "epoch": 0.19, "grad_norm": 1.00520920753479, "learning_rate": 9.306162817526519e-06, "loss": 0.6214, "step": 3072 }, { "epoch": 0.19, "grad_norm": 0.9240366220474243, "learning_rate": 9.305641305544884e-06, "loss": 0.5031, "step": 3073 }, { "epoch": 0.19, "grad_norm": 0.9548230171203613, "learning_rate": 9.305119612267673e-06, "loss": 0.555, "step": 3074 }, { "epoch": 0.19, "grad_norm": 1.0587652921676636, "learning_rate": 9.30459773771685e-06, "loss": 0.588, "step": 3075 }, { "epoch": 0.19, "grad_norm": 1.0245496034622192, "learning_rate": 9.304075681914392e-06, "loss": 0.5027, "step": 3076 }, { "epoch": 0.19, "grad_norm": 1.0220906734466553, "learning_rate": 9.30355344488228e-06, "loss": 0.5349, "step": 3077 }, { "epoch": 0.2, "grad_norm": 1.0830450057983398, "learning_rate": 9.303031026642504e-06, "loss": 0.5247, "step": 3078 }, { "epoch": 0.2, "grad_norm": 0.9544553756713867, "learning_rate": 9.302508427217059e-06, "loss": 0.5561, "step": 3079 }, { "epoch": 0.2, "grad_norm": 1.0785131454467773, "learning_rate": 9.301985646627953e-06, "loss": 0.587, "step": 3080 }, { "epoch": 0.2, "grad_norm": 0.9493861198425293, "learning_rate": 9.301462684897195e-06, "loss": 0.5444, "step": 3081 }, { "epoch": 0.2, "grad_norm": 0.9934033751487732, "learning_rate": 9.300939542046808e-06, "loss": 0.5378, "step": 3082 }, { "epoch": 0.2, "grad_norm": 1.0883805751800537, "learning_rate": 9.30041621809882e-06, "loss": 0.5908, "step": 3083 }, { "epoch": 0.2, "grad_norm": 1.0969899892807007, "learning_rate": 9.299892713075263e-06, "loss": 0.6121, "step": 3084 }, { "epoch": 0.2, "grad_norm": 1.0503705739974976, "learning_rate": 9.299369026998184e-06, "loss": 0.5734, "step": 3085 }, { "epoch": 0.2, "grad_norm": 1.048270344734192, "learning_rate": 9.298845159889632e-06, "loss": 0.5618, "step": 3086 }, { "epoch": 0.2, "grad_norm": 1.0619757175445557, "learning_rate": 9.298321111771664e-06, "loss": 0.5845, "step": 3087 }, { "epoch": 0.2, "grad_norm": 1.0658220052719116, "learning_rate": 9.297796882666346e-06, "loss": 0.5824, "step": 3088 }, { "epoch": 0.2, "grad_norm": 1.0340075492858887, "learning_rate": 9.297272472595753e-06, "loss": 0.5979, "step": 3089 }, { "epoch": 0.2, "grad_norm": 1.0165023803710938, "learning_rate": 9.296747881581965e-06, "loss": 0.56, "step": 3090 }, { "epoch": 0.2, "grad_norm": 0.9769580364227295, "learning_rate": 9.29622310964707e-06, "loss": 0.559, "step": 3091 }, { "epoch": 0.2, "grad_norm": 1.1017225980758667, "learning_rate": 9.295698156813167e-06, "loss": 0.5867, "step": 3092 }, { "epoch": 0.2, "grad_norm": 1.227968454360962, "learning_rate": 9.295173023102358e-06, "loss": 0.5888, "step": 3093 }, { "epoch": 0.2, "grad_norm": 1.072351336479187, "learning_rate": 9.294647708536754e-06, "loss": 0.5908, "step": 3094 }, { "epoch": 0.2, "grad_norm": 1.027206540107727, "learning_rate": 9.294122213138475e-06, "loss": 0.5276, "step": 3095 }, { "epoch": 0.2, "grad_norm": 1.1481643915176392, "learning_rate": 9.29359653692965e-06, "loss": 0.5594, "step": 3096 }, { "epoch": 0.2, "grad_norm": 0.9869410991668701, "learning_rate": 9.293070679932407e-06, "loss": 0.6166, "step": 3097 }, { "epoch": 0.2, "grad_norm": 1.051023244857788, "learning_rate": 9.292544642168896e-06, "loss": 0.523, "step": 3098 }, { "epoch": 0.2, "grad_norm": 1.0125713348388672, "learning_rate": 9.292018423661261e-06, "loss": 0.5428, "step": 3099 }, { "epoch": 0.2, "grad_norm": 1.1149213314056396, "learning_rate": 9.291492024431661e-06, "loss": 0.5854, "step": 3100 }, { "epoch": 0.2, "grad_norm": 1.1189168691635132, "learning_rate": 9.290965444502263e-06, "loss": 0.6149, "step": 3101 }, { "epoch": 0.2, "grad_norm": 0.9649505019187927, "learning_rate": 9.290438683895236e-06, "loss": 0.5618, "step": 3102 }, { "epoch": 0.2, "grad_norm": 1.0438565015792847, "learning_rate": 9.28991174263276e-06, "loss": 0.5917, "step": 3103 }, { "epoch": 0.2, "grad_norm": 1.2185555696487427, "learning_rate": 9.289384620737025e-06, "loss": 0.6123, "step": 3104 }, { "epoch": 0.2, "grad_norm": 1.0851901769638062, "learning_rate": 9.288857318230225e-06, "loss": 0.5576, "step": 3105 }, { "epoch": 0.2, "grad_norm": 1.0335770845413208, "learning_rate": 9.288329835134563e-06, "loss": 0.6028, "step": 3106 }, { "epoch": 0.2, "grad_norm": 1.0511239767074585, "learning_rate": 9.28780217147225e-06, "loss": 0.5304, "step": 3107 }, { "epoch": 0.2, "grad_norm": 1.0801007747650146, "learning_rate": 9.287274327265505e-06, "loss": 0.5758, "step": 3108 }, { "epoch": 0.2, "grad_norm": 1.0751490592956543, "learning_rate": 9.286746302536551e-06, "loss": 0.5577, "step": 3109 }, { "epoch": 0.2, "grad_norm": 1.0887144804000854, "learning_rate": 9.286218097307623e-06, "loss": 0.5569, "step": 3110 }, { "epoch": 0.2, "grad_norm": 1.0135072469711304, "learning_rate": 9.285689711600961e-06, "loss": 0.5706, "step": 3111 }, { "epoch": 0.2, "grad_norm": 1.0304912328720093, "learning_rate": 9.285161145438815e-06, "loss": 0.5775, "step": 3112 }, { "epoch": 0.2, "grad_norm": 0.9967204332351685, "learning_rate": 9.284632398843439e-06, "loss": 0.5224, "step": 3113 }, { "epoch": 0.2, "grad_norm": 1.1232625246047974, "learning_rate": 9.284103471837097e-06, "loss": 0.577, "step": 3114 }, { "epoch": 0.2, "grad_norm": 1.0335229635238647, "learning_rate": 9.283574364442066e-06, "loss": 0.5093, "step": 3115 }, { "epoch": 0.2, "grad_norm": 1.074714183807373, "learning_rate": 9.283045076680614e-06, "loss": 0.6136, "step": 3116 }, { "epoch": 0.2, "grad_norm": 1.0689712762832642, "learning_rate": 9.282515608575038e-06, "loss": 0.5831, "step": 3117 }, { "epoch": 0.2, "grad_norm": 0.9729591608047485, "learning_rate": 9.281985960147625e-06, "loss": 0.5608, "step": 3118 }, { "epoch": 0.2, "grad_norm": 1.0159952640533447, "learning_rate": 9.28145613142068e-06, "loss": 0.5936, "step": 3119 }, { "epoch": 0.2, "grad_norm": 1.0457758903503418, "learning_rate": 9.28092612241651e-06, "loss": 0.5497, "step": 3120 }, { "epoch": 0.2, "grad_norm": 1.0730384588241577, "learning_rate": 9.280395933157436e-06, "loss": 0.5271, "step": 3121 }, { "epoch": 0.2, "grad_norm": 0.9636267423629761, "learning_rate": 9.279865563665778e-06, "loss": 0.5305, "step": 3122 }, { "epoch": 0.2, "grad_norm": 0.9577395915985107, "learning_rate": 9.27933501396387e-06, "loss": 0.5282, "step": 3123 }, { "epoch": 0.2, "grad_norm": 0.9931013584136963, "learning_rate": 9.27880428407405e-06, "loss": 0.578, "step": 3124 }, { "epoch": 0.2, "grad_norm": 0.9747065305709839, "learning_rate": 9.278273374018669e-06, "loss": 0.5649, "step": 3125 }, { "epoch": 0.2, "grad_norm": 1.026253342628479, "learning_rate": 9.277742283820077e-06, "loss": 0.5885, "step": 3126 }, { "epoch": 0.2, "grad_norm": 1.0813730955123901, "learning_rate": 9.27721101350064e-06, "loss": 0.598, "step": 3127 }, { "epoch": 0.2, "grad_norm": 1.0212422609329224, "learning_rate": 9.276679563082726e-06, "loss": 0.5821, "step": 3128 }, { "epoch": 0.2, "grad_norm": 0.9639314413070679, "learning_rate": 9.276147932588712e-06, "loss": 0.5182, "step": 3129 }, { "epoch": 0.2, "grad_norm": 1.0123671293258667, "learning_rate": 9.275616122040985e-06, "loss": 0.5395, "step": 3130 }, { "epoch": 0.2, "grad_norm": 1.0312772989273071, "learning_rate": 9.275084131461938e-06, "loss": 0.5851, "step": 3131 }, { "epoch": 0.2, "grad_norm": 1.109800100326538, "learning_rate": 9.27455196087397e-06, "loss": 0.5442, "step": 3132 }, { "epoch": 0.2, "grad_norm": 1.0117939710617065, "learning_rate": 9.274019610299487e-06, "loss": 0.5628, "step": 3133 }, { "epoch": 0.2, "grad_norm": 0.981764018535614, "learning_rate": 9.273487079760908e-06, "loss": 0.5369, "step": 3134 }, { "epoch": 0.2, "grad_norm": 1.0255862474441528, "learning_rate": 9.272954369280654e-06, "loss": 0.5556, "step": 3135 }, { "epoch": 0.2, "grad_norm": 0.9613683819770813, "learning_rate": 9.272421478881158e-06, "loss": 0.5672, "step": 3136 }, { "epoch": 0.2, "grad_norm": 1.2361464500427246, "learning_rate": 9.271888408584852e-06, "loss": 0.5917, "step": 3137 }, { "epoch": 0.2, "grad_norm": 0.9689004421234131, "learning_rate": 9.27135515841419e-06, "loss": 0.5511, "step": 3138 }, { "epoch": 0.2, "grad_norm": 1.0080313682556152, "learning_rate": 9.27082172839162e-06, "loss": 0.548, "step": 3139 }, { "epoch": 0.2, "grad_norm": 1.0074330568313599, "learning_rate": 9.270288118539603e-06, "loss": 0.5812, "step": 3140 }, { "epoch": 0.2, "grad_norm": 1.0355607271194458, "learning_rate": 9.26975432888061e-06, "loss": 0.5253, "step": 3141 }, { "epoch": 0.2, "grad_norm": 0.9730377793312073, "learning_rate": 9.269220359437114e-06, "loss": 0.5238, "step": 3142 }, { "epoch": 0.2, "grad_norm": 1.0539954900741577, "learning_rate": 9.2686862102316e-06, "loss": 0.589, "step": 3143 }, { "epoch": 0.2, "grad_norm": 1.0077118873596191, "learning_rate": 9.268151881286561e-06, "loss": 0.5713, "step": 3144 }, { "epoch": 0.2, "grad_norm": 0.9483577013015747, "learning_rate": 9.267617372624494e-06, "loss": 0.5718, "step": 3145 }, { "epoch": 0.2, "grad_norm": 1.081387996673584, "learning_rate": 9.267082684267905e-06, "loss": 0.5745, "step": 3146 }, { "epoch": 0.2, "grad_norm": 1.1054128408432007, "learning_rate": 9.266547816239309e-06, "loss": 0.602, "step": 3147 }, { "epoch": 0.2, "grad_norm": 1.0518232583999634, "learning_rate": 9.266012768561225e-06, "loss": 0.62, "step": 3148 }, { "epoch": 0.2, "grad_norm": 0.9769362211227417, "learning_rate": 9.265477541256184e-06, "loss": 0.5942, "step": 3149 }, { "epoch": 0.2, "grad_norm": 0.9312898516654968, "learning_rate": 9.264942134346723e-06, "loss": 0.5428, "step": 3150 }, { "epoch": 0.2, "grad_norm": 0.9657699465751648, "learning_rate": 9.264406547855386e-06, "loss": 0.5389, "step": 3151 }, { "epoch": 0.2, "grad_norm": 1.0395054817199707, "learning_rate": 9.263870781804723e-06, "loss": 0.5474, "step": 3152 }, { "epoch": 0.2, "grad_norm": 1.0954946279525757, "learning_rate": 9.263334836217295e-06, "loss": 0.5394, "step": 3153 }, { "epoch": 0.2, "grad_norm": 0.9262494444847107, "learning_rate": 9.262798711115667e-06, "loss": 0.5598, "step": 3154 }, { "epoch": 0.2, "grad_norm": 0.9422882199287415, "learning_rate": 9.262262406522415e-06, "loss": 0.4883, "step": 3155 }, { "epoch": 0.2, "grad_norm": 1.115454912185669, "learning_rate": 9.261725922460121e-06, "loss": 0.5713, "step": 3156 }, { "epoch": 0.2, "grad_norm": 1.1434146165847778, "learning_rate": 9.261189258951372e-06, "loss": 0.5763, "step": 3157 }, { "epoch": 0.2, "grad_norm": 0.9961076974868774, "learning_rate": 9.26065241601877e-06, "loss": 0.6126, "step": 3158 }, { "epoch": 0.2, "grad_norm": 0.9965333938598633, "learning_rate": 9.260115393684914e-06, "loss": 0.5771, "step": 3159 }, { "epoch": 0.2, "grad_norm": 0.9651138186454773, "learning_rate": 9.25957819197242e-06, "loss": 0.5508, "step": 3160 }, { "epoch": 0.2, "grad_norm": 1.0352110862731934, "learning_rate": 9.259040810903906e-06, "loss": 0.5425, "step": 3161 }, { "epoch": 0.2, "grad_norm": 1.0079376697540283, "learning_rate": 9.258503250501998e-06, "loss": 0.5814, "step": 3162 }, { "epoch": 0.2, "grad_norm": 1.0181200504302979, "learning_rate": 9.257965510789334e-06, "loss": 0.5294, "step": 3163 }, { "epoch": 0.2, "grad_norm": 1.0838521718978882, "learning_rate": 9.257427591788555e-06, "loss": 0.6009, "step": 3164 }, { "epoch": 0.2, "grad_norm": 0.9461649060249329, "learning_rate": 9.25688949352231e-06, "loss": 0.5694, "step": 3165 }, { "epoch": 0.2, "grad_norm": 0.8991617560386658, "learning_rate": 9.256351216013257e-06, "loss": 0.5699, "step": 3166 }, { "epoch": 0.2, "grad_norm": 0.9943413138389587, "learning_rate": 9.255812759284062e-06, "loss": 0.5118, "step": 3167 }, { "epoch": 0.2, "grad_norm": 0.92888343334198, "learning_rate": 9.255274123357396e-06, "loss": 0.5091, "step": 3168 }, { "epoch": 0.2, "grad_norm": 1.0817584991455078, "learning_rate": 9.254735308255937e-06, "loss": 0.6395, "step": 3169 }, { "epoch": 0.2, "grad_norm": 1.0437732934951782, "learning_rate": 9.254196314002379e-06, "loss": 0.5639, "step": 3170 }, { "epoch": 0.2, "grad_norm": 1.1044567823410034, "learning_rate": 9.253657140619412e-06, "loss": 0.5903, "step": 3171 }, { "epoch": 0.2, "grad_norm": 1.1140618324279785, "learning_rate": 9.25311778812974e-06, "loss": 0.5645, "step": 3172 }, { "epoch": 0.2, "grad_norm": 1.1418890953063965, "learning_rate": 9.252578256556075e-06, "loss": 0.6061, "step": 3173 }, { "epoch": 0.2, "grad_norm": 1.003599762916565, "learning_rate": 9.252038545921131e-06, "loss": 0.5037, "step": 3174 }, { "epoch": 0.2, "grad_norm": 1.2493435144424438, "learning_rate": 9.251498656247636e-06, "loss": 0.602, "step": 3175 }, { "epoch": 0.2, "grad_norm": 1.0258667469024658, "learning_rate": 9.250958587558326e-06, "loss": 0.5372, "step": 3176 }, { "epoch": 0.2, "grad_norm": 1.0604716539382935, "learning_rate": 9.250418339875934e-06, "loss": 0.5791, "step": 3177 }, { "epoch": 0.2, "grad_norm": 1.565056562423706, "learning_rate": 9.249877913223213e-06, "loss": 0.5609, "step": 3178 }, { "epoch": 0.2, "grad_norm": 1.0567054748535156, "learning_rate": 9.249337307622916e-06, "loss": 0.5614, "step": 3179 }, { "epoch": 0.2, "grad_norm": 0.9943719506263733, "learning_rate": 9.24879652309781e-06, "loss": 0.576, "step": 3180 }, { "epoch": 0.2, "grad_norm": 0.998900294303894, "learning_rate": 9.248255559670661e-06, "loss": 0.5364, "step": 3181 }, { "epoch": 0.2, "grad_norm": 0.9154987335205078, "learning_rate": 9.247714417364251e-06, "loss": 0.5405, "step": 3182 }, { "epoch": 0.2, "grad_norm": 0.9305417537689209, "learning_rate": 9.24717309620136e-06, "loss": 0.5161, "step": 3183 }, { "epoch": 0.2, "grad_norm": 1.028682827949524, "learning_rate": 9.246631596204788e-06, "loss": 0.5365, "step": 3184 }, { "epoch": 0.2, "grad_norm": 1.061288595199585, "learning_rate": 9.246089917397332e-06, "loss": 0.5778, "step": 3185 }, { "epoch": 0.2, "grad_norm": 1.063146710395813, "learning_rate": 9.2455480598018e-06, "loss": 0.6103, "step": 3186 }, { "epoch": 0.2, "grad_norm": 1.1085253953933716, "learning_rate": 9.245006023441008e-06, "loss": 0.5487, "step": 3187 }, { "epoch": 0.2, "grad_norm": 1.0992125272750854, "learning_rate": 9.24446380833778e-06, "loss": 0.6294, "step": 3188 }, { "epoch": 0.2, "grad_norm": 0.9414675831794739, "learning_rate": 9.243921414514947e-06, "loss": 0.5078, "step": 3189 }, { "epoch": 0.2, "grad_norm": 0.9629780650138855, "learning_rate": 9.243378841995346e-06, "loss": 0.5164, "step": 3190 }, { "epoch": 0.2, "grad_norm": 1.040055513381958, "learning_rate": 9.242836090801823e-06, "loss": 0.5511, "step": 3191 }, { "epoch": 0.2, "grad_norm": 0.8983215689659119, "learning_rate": 9.242293160957231e-06, "loss": 0.5451, "step": 3192 }, { "epoch": 0.2, "grad_norm": 1.0140423774719238, "learning_rate": 9.241750052484435e-06, "loss": 0.5477, "step": 3193 }, { "epoch": 0.2, "grad_norm": 1.0713225603103638, "learning_rate": 9.241206765406298e-06, "loss": 0.5152, "step": 3194 }, { "epoch": 0.2, "grad_norm": 1.0715563297271729, "learning_rate": 9.2406632997457e-06, "loss": 0.5458, "step": 3195 }, { "epoch": 0.2, "grad_norm": 1.0324233770370483, "learning_rate": 9.240119655525522e-06, "loss": 0.5821, "step": 3196 }, { "epoch": 0.2, "grad_norm": 1.0402965545654297, "learning_rate": 9.239575832768655e-06, "loss": 0.6, "step": 3197 }, { "epoch": 0.2, "grad_norm": 0.9679100513458252, "learning_rate": 9.239031831498e-06, "loss": 0.5253, "step": 3198 }, { "epoch": 0.2, "grad_norm": 1.125067114830017, "learning_rate": 9.238487651736458e-06, "loss": 0.5982, "step": 3199 }, { "epoch": 0.2, "grad_norm": 1.075700283050537, "learning_rate": 9.237943293506948e-06, "loss": 0.5908, "step": 3200 }, { "epoch": 0.2, "grad_norm": 0.9718443751335144, "learning_rate": 9.237398756832387e-06, "loss": 0.5514, "step": 3201 }, { "epoch": 0.2, "grad_norm": 1.0474004745483398, "learning_rate": 9.236854041735706e-06, "loss": 0.5362, "step": 3202 }, { "epoch": 0.2, "grad_norm": 1.0741773843765259, "learning_rate": 9.236309148239839e-06, "loss": 0.5573, "step": 3203 }, { "epoch": 0.2, "grad_norm": 1.0043758153915405, "learning_rate": 9.235764076367732e-06, "loss": 0.5263, "step": 3204 }, { "epoch": 0.2, "grad_norm": 1.0209715366363525, "learning_rate": 9.235218826142337e-06, "loss": 0.5497, "step": 3205 }, { "epoch": 0.2, "grad_norm": 0.9901759028434753, "learning_rate": 9.234673397586606e-06, "loss": 0.5489, "step": 3206 }, { "epoch": 0.2, "grad_norm": 5.522571563720703, "learning_rate": 9.234127790723512e-06, "loss": 0.5618, "step": 3207 }, { "epoch": 0.2, "grad_norm": 4.058837413787842, "learning_rate": 9.233582005576028e-06, "loss": 0.5912, "step": 3208 }, { "epoch": 0.2, "grad_norm": 16.545076370239258, "learning_rate": 9.233036042167131e-06, "loss": 0.6748, "step": 3209 }, { "epoch": 0.2, "grad_norm": 8.660017967224121, "learning_rate": 9.232489900519812e-06, "loss": 0.6371, "step": 3210 }, { "epoch": 0.2, "grad_norm": 1.4097462892532349, "learning_rate": 9.231943580657069e-06, "loss": 0.6299, "step": 3211 }, { "epoch": 0.2, "grad_norm": 1.1483733654022217, "learning_rate": 9.2313970826019e-06, "loss": 0.5256, "step": 3212 }, { "epoch": 0.2, "grad_norm": 2.201183557510376, "learning_rate": 9.230850406377323e-06, "loss": 0.5777, "step": 3213 }, { "epoch": 0.2, "grad_norm": 1.2220267057418823, "learning_rate": 9.230303552006352e-06, "loss": 0.589, "step": 3214 }, { "epoch": 0.2, "grad_norm": 1.1205120086669922, "learning_rate": 9.229756519512014e-06, "loss": 0.5937, "step": 3215 }, { "epoch": 0.2, "grad_norm": 1.0527418851852417, "learning_rate": 9.229209308917343e-06, "loss": 0.5534, "step": 3216 }, { "epoch": 0.2, "grad_norm": 2.3884944915771484, "learning_rate": 9.228661920245383e-06, "loss": 0.5374, "step": 3217 }, { "epoch": 0.2, "grad_norm": 1.0388891696929932, "learning_rate": 9.22811435351918e-06, "loss": 0.5414, "step": 3218 }, { "epoch": 0.2, "grad_norm": 1.263106107711792, "learning_rate": 9.227566608761786e-06, "loss": 0.6034, "step": 3219 }, { "epoch": 0.2, "grad_norm": 1.2488805055618286, "learning_rate": 9.227018685996272e-06, "loss": 0.5911, "step": 3220 }, { "epoch": 0.2, "grad_norm": 2.839228391647339, "learning_rate": 9.226470585245706e-06, "loss": 0.599, "step": 3221 }, { "epoch": 0.2, "grad_norm": 1.4514201879501343, "learning_rate": 9.225922306533164e-06, "loss": 0.6333, "step": 3222 }, { "epoch": 0.2, "grad_norm": 1.2319753170013428, "learning_rate": 9.225373849881739e-06, "loss": 0.5535, "step": 3223 }, { "epoch": 0.2, "grad_norm": 0.9874230027198792, "learning_rate": 9.224825215314515e-06, "loss": 0.56, "step": 3224 }, { "epoch": 0.2, "grad_norm": 1.7198013067245483, "learning_rate": 9.224276402854601e-06, "loss": 0.5781, "step": 3225 }, { "epoch": 0.2, "grad_norm": 1.7840505838394165, "learning_rate": 9.223727412525103e-06, "loss": 0.5775, "step": 3226 }, { "epoch": 0.2, "grad_norm": 1.298826813697815, "learning_rate": 9.223178244349135e-06, "loss": 0.5766, "step": 3227 }, { "epoch": 0.2, "grad_norm": 1.0350885391235352, "learning_rate": 9.222628898349825e-06, "loss": 0.5495, "step": 3228 }, { "epoch": 0.2, "grad_norm": 0.9768742918968201, "learning_rate": 9.2220793745503e-06, "loss": 0.5383, "step": 3229 }, { "epoch": 0.2, "grad_norm": 0.9744670987129211, "learning_rate": 9.221529672973701e-06, "loss": 0.5271, "step": 3230 }, { "epoch": 0.2, "grad_norm": 1.1338422298431396, "learning_rate": 9.220979793643173e-06, "loss": 0.5691, "step": 3231 }, { "epoch": 0.2, "grad_norm": 1.0255298614501953, "learning_rate": 9.220429736581869e-06, "loss": 0.5825, "step": 3232 }, { "epoch": 0.2, "grad_norm": 1.0402343273162842, "learning_rate": 9.219879501812952e-06, "loss": 0.5294, "step": 3233 }, { "epoch": 0.2, "grad_norm": 1.087755560874939, "learning_rate": 9.219329089359588e-06, "loss": 0.5842, "step": 3234 }, { "epoch": 0.2, "grad_norm": 1.099476933479309, "learning_rate": 9.218778499244953e-06, "loss": 0.6201, "step": 3235 }, { "epoch": 0.21, "grad_norm": 1.0949242115020752, "learning_rate": 9.218227731492234e-06, "loss": 0.6195, "step": 3236 }, { "epoch": 0.21, "grad_norm": 1.0557842254638672, "learning_rate": 9.217676786124616e-06, "loss": 0.5638, "step": 3237 }, { "epoch": 0.21, "grad_norm": 1.1988122463226318, "learning_rate": 9.217125663165303e-06, "loss": 0.5939, "step": 3238 }, { "epoch": 0.21, "grad_norm": 1.1216857433319092, "learning_rate": 9.216574362637498e-06, "loss": 0.6367, "step": 3239 }, { "epoch": 0.21, "grad_norm": 0.9827113151550293, "learning_rate": 9.216022884564414e-06, "loss": 0.5675, "step": 3240 }, { "epoch": 0.21, "grad_norm": 1.0841467380523682, "learning_rate": 9.215471228969275e-06, "loss": 0.574, "step": 3241 }, { "epoch": 0.21, "grad_norm": 1.0059484243392944, "learning_rate": 9.214919395875306e-06, "loss": 0.61, "step": 3242 }, { "epoch": 0.21, "grad_norm": 0.9875774383544922, "learning_rate": 9.214367385305744e-06, "loss": 0.4961, "step": 3243 }, { "epoch": 0.21, "grad_norm": 1.0952807664871216, "learning_rate": 9.213815197283834e-06, "loss": 0.5547, "step": 3244 }, { "epoch": 0.21, "grad_norm": 1.1362330913543701, "learning_rate": 9.21326283183282e-06, "loss": 0.5441, "step": 3245 }, { "epoch": 0.21, "grad_norm": 1.0097522735595703, "learning_rate": 9.21271028897597e-06, "loss": 0.5455, "step": 3246 }, { "epoch": 0.21, "grad_norm": 1.0766572952270508, "learning_rate": 9.212157568736542e-06, "loss": 0.5419, "step": 3247 }, { "epoch": 0.21, "grad_norm": 0.9383859038352966, "learning_rate": 9.211604671137812e-06, "loss": 0.5496, "step": 3248 }, { "epoch": 0.21, "grad_norm": 1.0516395568847656, "learning_rate": 9.211051596203061e-06, "loss": 0.6164, "step": 3249 }, { "epoch": 0.21, "grad_norm": 1.0035028457641602, "learning_rate": 9.210498343955576e-06, "loss": 0.5573, "step": 3250 }, { "epoch": 0.21, "grad_norm": 1.065867304801941, "learning_rate": 9.209944914418653e-06, "loss": 0.6218, "step": 3251 }, { "epoch": 0.21, "grad_norm": 1.0126872062683105, "learning_rate": 9.209391307615596e-06, "loss": 0.5129, "step": 3252 }, { "epoch": 0.21, "grad_norm": 1.238006591796875, "learning_rate": 9.208837523569713e-06, "loss": 0.5923, "step": 3253 }, { "epoch": 0.21, "grad_norm": 1.0148457288742065, "learning_rate": 9.208283562304326e-06, "loss": 0.5192, "step": 3254 }, { "epoch": 0.21, "grad_norm": 1.1629726886749268, "learning_rate": 9.207729423842755e-06, "loss": 0.5576, "step": 3255 }, { "epoch": 0.21, "grad_norm": 1.10166335105896, "learning_rate": 9.207175108208334e-06, "loss": 0.5832, "step": 3256 }, { "epoch": 0.21, "grad_norm": 1.0380483865737915, "learning_rate": 9.20662061542441e-06, "loss": 0.5944, "step": 3257 }, { "epoch": 0.21, "grad_norm": 1.0519050359725952, "learning_rate": 9.206065945514321e-06, "loss": 0.5601, "step": 3258 }, { "epoch": 0.21, "grad_norm": 1.071286678314209, "learning_rate": 9.20551109850143e-06, "loss": 0.5497, "step": 3259 }, { "epoch": 0.21, "grad_norm": 1.0530121326446533, "learning_rate": 9.204956074409095e-06, "loss": 0.5615, "step": 3260 }, { "epoch": 0.21, "grad_norm": 1.0857264995574951, "learning_rate": 9.204400873260688e-06, "loss": 0.5625, "step": 3261 }, { "epoch": 0.21, "grad_norm": 0.9711153507232666, "learning_rate": 9.203845495079587e-06, "loss": 0.5684, "step": 3262 }, { "epoch": 0.21, "grad_norm": 0.9729705452919006, "learning_rate": 9.203289939889175e-06, "loss": 0.5455, "step": 3263 }, { "epoch": 0.21, "grad_norm": 0.9644930958747864, "learning_rate": 9.202734207712847e-06, "loss": 0.55, "step": 3264 }, { "epoch": 0.21, "grad_norm": 1.0248082876205444, "learning_rate": 9.202178298574e-06, "loss": 0.5271, "step": 3265 }, { "epoch": 0.21, "grad_norm": 1.0923707485198975, "learning_rate": 9.201622212496043e-06, "loss": 0.5625, "step": 3266 }, { "epoch": 0.21, "grad_norm": 1.037908673286438, "learning_rate": 9.201065949502394e-06, "loss": 0.6094, "step": 3267 }, { "epoch": 0.21, "grad_norm": 1.0656263828277588, "learning_rate": 9.20050950961647e-06, "loss": 0.5385, "step": 3268 }, { "epoch": 0.21, "grad_norm": 0.9770526885986328, "learning_rate": 9.199952892861706e-06, "loss": 0.5493, "step": 3269 }, { "epoch": 0.21, "grad_norm": 1.1895619630813599, "learning_rate": 9.199396099261532e-06, "loss": 0.5696, "step": 3270 }, { "epoch": 0.21, "grad_norm": 1.0683916807174683, "learning_rate": 9.198839128839399e-06, "loss": 0.5782, "step": 3271 }, { "epoch": 0.21, "grad_norm": 1.0089161396026611, "learning_rate": 9.198281981618757e-06, "loss": 0.5446, "step": 3272 }, { "epoch": 0.21, "grad_norm": 1.0737895965576172, "learning_rate": 9.197724657623066e-06, "loss": 0.5417, "step": 3273 }, { "epoch": 0.21, "grad_norm": 1.0453969240188599, "learning_rate": 9.197167156875793e-06, "loss": 0.5761, "step": 3274 }, { "epoch": 0.21, "grad_norm": 1.1445963382720947, "learning_rate": 9.19660947940041e-06, "loss": 0.5683, "step": 3275 }, { "epoch": 0.21, "grad_norm": 0.9256425499916077, "learning_rate": 9.196051625220401e-06, "loss": 0.5287, "step": 3276 }, { "epoch": 0.21, "grad_norm": 0.9852249622344971, "learning_rate": 9.195493594359254e-06, "loss": 0.5401, "step": 3277 }, { "epoch": 0.21, "grad_norm": 1.0960800647735596, "learning_rate": 9.19493538684047e-06, "loss": 0.5993, "step": 3278 }, { "epoch": 0.21, "grad_norm": 0.9759271740913391, "learning_rate": 9.194377002687547e-06, "loss": 0.5518, "step": 3279 }, { "epoch": 0.21, "grad_norm": 1.0800795555114746, "learning_rate": 9.193818441924003e-06, "loss": 0.5577, "step": 3280 }, { "epoch": 0.21, "grad_norm": 1.0303105115890503, "learning_rate": 9.19325970457335e-06, "loss": 0.5192, "step": 3281 }, { "epoch": 0.21, "grad_norm": 0.977532684803009, "learning_rate": 9.192700790659121e-06, "loss": 0.4839, "step": 3282 }, { "epoch": 0.21, "grad_norm": 1.118839144706726, "learning_rate": 9.192141700204844e-06, "loss": 0.6311, "step": 3283 }, { "epoch": 0.21, "grad_norm": 1.0272563695907593, "learning_rate": 9.191582433234067e-06, "loss": 0.5859, "step": 3284 }, { "epoch": 0.21, "grad_norm": 0.9714250564575195, "learning_rate": 9.191022989770332e-06, "loss": 0.5556, "step": 3285 }, { "epoch": 0.21, "grad_norm": 1.1177606582641602, "learning_rate": 9.1904633698372e-06, "loss": 0.5937, "step": 3286 }, { "epoch": 0.21, "grad_norm": 0.9881129860877991, "learning_rate": 9.189903573458234e-06, "loss": 0.5656, "step": 3287 }, { "epoch": 0.21, "grad_norm": 1.031219244003296, "learning_rate": 9.189343600657002e-06, "loss": 0.5645, "step": 3288 }, { "epoch": 0.21, "grad_norm": 1.0255357027053833, "learning_rate": 9.188783451457086e-06, "loss": 0.5764, "step": 3289 }, { "epoch": 0.21, "grad_norm": 1.020890235900879, "learning_rate": 9.18822312588207e-06, "loss": 0.5423, "step": 3290 }, { "epoch": 0.21, "grad_norm": 0.9295765161514282, "learning_rate": 9.187662623955548e-06, "loss": 0.4904, "step": 3291 }, { "epoch": 0.21, "grad_norm": 1.1230452060699463, "learning_rate": 9.18710194570112e-06, "loss": 0.557, "step": 3292 }, { "epoch": 0.21, "grad_norm": 0.9984371066093445, "learning_rate": 9.186541091142397e-06, "loss": 0.4955, "step": 3293 }, { "epoch": 0.21, "grad_norm": 1.1656453609466553, "learning_rate": 9.18598006030299e-06, "loss": 0.6019, "step": 3294 }, { "epoch": 0.21, "grad_norm": 1.0436992645263672, "learning_rate": 9.185418853206528e-06, "loss": 0.5177, "step": 3295 }, { "epoch": 0.21, "grad_norm": 1.035557508468628, "learning_rate": 9.184857469876635e-06, "loss": 0.5584, "step": 3296 }, { "epoch": 0.21, "grad_norm": 1.0965100526809692, "learning_rate": 9.184295910336953e-06, "loss": 0.558, "step": 3297 }, { "epoch": 0.21, "grad_norm": 1.0783100128173828, "learning_rate": 9.183734174611125e-06, "loss": 0.6263, "step": 3298 }, { "epoch": 0.21, "grad_norm": 1.103131651878357, "learning_rate": 9.183172262722807e-06, "loss": 0.6347, "step": 3299 }, { "epoch": 0.21, "grad_norm": 1.0050467252731323, "learning_rate": 9.182610174695656e-06, "loss": 0.5465, "step": 3300 }, { "epoch": 0.21, "grad_norm": 1.054924726486206, "learning_rate": 9.182047910553342e-06, "loss": 0.5739, "step": 3301 }, { "epoch": 0.21, "grad_norm": 0.973746120929718, "learning_rate": 9.181485470319537e-06, "loss": 0.5741, "step": 3302 }, { "epoch": 0.21, "grad_norm": 1.0047938823699951, "learning_rate": 9.180922854017927e-06, "loss": 0.5378, "step": 3303 }, { "epoch": 0.21, "grad_norm": 1.0483931303024292, "learning_rate": 9.1803600616722e-06, "loss": 0.5843, "step": 3304 }, { "epoch": 0.21, "grad_norm": 1.1777387857437134, "learning_rate": 9.179797093306053e-06, "loss": 0.6227, "step": 3305 }, { "epoch": 0.21, "grad_norm": 1.0783026218414307, "learning_rate": 9.17923394894319e-06, "loss": 0.5663, "step": 3306 }, { "epoch": 0.21, "grad_norm": 0.9983446598052979, "learning_rate": 9.178670628607325e-06, "loss": 0.563, "step": 3307 }, { "epoch": 0.21, "grad_norm": 0.9895806312561035, "learning_rate": 9.178107132322174e-06, "loss": 0.5261, "step": 3308 }, { "epoch": 0.21, "grad_norm": 1.0566648244857788, "learning_rate": 9.177543460111469e-06, "loss": 0.5581, "step": 3309 }, { "epoch": 0.21, "grad_norm": 1.0616939067840576, "learning_rate": 9.17697961199894e-06, "loss": 0.5691, "step": 3310 }, { "epoch": 0.21, "grad_norm": 1.002230167388916, "learning_rate": 9.176415588008332e-06, "loss": 0.5919, "step": 3311 }, { "epoch": 0.21, "grad_norm": 1.095953345298767, "learning_rate": 9.175851388163391e-06, "loss": 0.5681, "step": 3312 }, { "epoch": 0.21, "grad_norm": 1.043747067451477, "learning_rate": 9.175287012487874e-06, "loss": 0.5646, "step": 3313 }, { "epoch": 0.21, "grad_norm": 0.9811042547225952, "learning_rate": 9.174722461005546e-06, "loss": 0.5461, "step": 3314 }, { "epoch": 0.21, "grad_norm": 1.125097632408142, "learning_rate": 9.174157733740178e-06, "loss": 0.5883, "step": 3315 }, { "epoch": 0.21, "grad_norm": 1.0411174297332764, "learning_rate": 9.173592830715548e-06, "loss": 0.5333, "step": 3316 }, { "epoch": 0.21, "grad_norm": 1.0620931386947632, "learning_rate": 9.173027751955444e-06, "loss": 0.5858, "step": 3317 }, { "epoch": 0.21, "grad_norm": 0.960599422454834, "learning_rate": 9.172462497483658e-06, "loss": 0.4991, "step": 3318 }, { "epoch": 0.21, "grad_norm": 1.133169412612915, "learning_rate": 9.17189706732399e-06, "loss": 0.5543, "step": 3319 }, { "epoch": 0.21, "grad_norm": 1.2463678121566772, "learning_rate": 9.171331461500253e-06, "loss": 0.596, "step": 3320 }, { "epoch": 0.21, "grad_norm": 1.0226194858551025, "learning_rate": 9.170765680036256e-06, "loss": 0.5472, "step": 3321 }, { "epoch": 0.21, "grad_norm": 1.02968168258667, "learning_rate": 9.170199722955825e-06, "loss": 0.5627, "step": 3322 }, { "epoch": 0.21, "grad_norm": 0.9317070245742798, "learning_rate": 9.169633590282793e-06, "loss": 0.549, "step": 3323 }, { "epoch": 0.21, "grad_norm": 1.1055186986923218, "learning_rate": 9.169067282040994e-06, "loss": 0.579, "step": 3324 }, { "epoch": 0.21, "grad_norm": 1.012923002243042, "learning_rate": 9.168500798254275e-06, "loss": 0.5193, "step": 3325 }, { "epoch": 0.21, "grad_norm": 1.0518181324005127, "learning_rate": 9.167934138946489e-06, "loss": 0.5923, "step": 3326 }, { "epoch": 0.21, "grad_norm": 1.1636402606964111, "learning_rate": 9.167367304141494e-06, "loss": 0.5886, "step": 3327 }, { "epoch": 0.21, "grad_norm": 1.1672029495239258, "learning_rate": 9.166800293863161e-06, "loss": 0.6024, "step": 3328 }, { "epoch": 0.21, "grad_norm": 1.037725806236267, "learning_rate": 9.166233108135362e-06, "loss": 0.5351, "step": 3329 }, { "epoch": 0.21, "grad_norm": 1.094976544380188, "learning_rate": 9.165665746981982e-06, "loss": 0.5662, "step": 3330 }, { "epoch": 0.21, "grad_norm": 1.0267653465270996, "learning_rate": 9.165098210426905e-06, "loss": 0.5907, "step": 3331 }, { "epoch": 0.21, "grad_norm": 1.0563652515411377, "learning_rate": 9.164530498494035e-06, "loss": 0.5794, "step": 3332 }, { "epoch": 0.21, "grad_norm": 1.0362412929534912, "learning_rate": 9.163962611207272e-06, "loss": 0.573, "step": 3333 }, { "epoch": 0.21, "grad_norm": 1.0745573043823242, "learning_rate": 9.163394548590529e-06, "loss": 0.5796, "step": 3334 }, { "epoch": 0.21, "grad_norm": 1.067790150642395, "learning_rate": 9.162826310667725e-06, "loss": 0.5735, "step": 3335 }, { "epoch": 0.21, "grad_norm": 1.0531738996505737, "learning_rate": 9.162257897462784e-06, "loss": 0.5827, "step": 3336 }, { "epoch": 0.21, "grad_norm": 1.0059359073638916, "learning_rate": 9.161689308999646e-06, "loss": 0.5369, "step": 3337 }, { "epoch": 0.21, "grad_norm": 1.201866865158081, "learning_rate": 9.161120545302246e-06, "loss": 0.5747, "step": 3338 }, { "epoch": 0.21, "grad_norm": 1.0412216186523438, "learning_rate": 9.160551606394537e-06, "loss": 0.6078, "step": 3339 }, { "epoch": 0.21, "grad_norm": 1.0173043012619019, "learning_rate": 9.159982492300473e-06, "loss": 0.5753, "step": 3340 }, { "epoch": 0.21, "grad_norm": 1.0637174844741821, "learning_rate": 9.159413203044017e-06, "loss": 0.5532, "step": 3341 }, { "epoch": 0.21, "grad_norm": 1.042886734008789, "learning_rate": 9.158843738649141e-06, "loss": 0.5398, "step": 3342 }, { "epoch": 0.21, "grad_norm": 1.0245264768600464, "learning_rate": 9.158274099139823e-06, "loss": 0.542, "step": 3343 }, { "epoch": 0.21, "grad_norm": 0.9581206440925598, "learning_rate": 9.157704284540047e-06, "loss": 0.5205, "step": 3344 }, { "epoch": 0.21, "grad_norm": 1.0562598705291748, "learning_rate": 9.15713429487381e-06, "loss": 0.574, "step": 3345 }, { "epoch": 0.21, "grad_norm": 1.0625674724578857, "learning_rate": 9.156564130165106e-06, "loss": 0.5444, "step": 3346 }, { "epoch": 0.21, "grad_norm": 1.0383031368255615, "learning_rate": 9.155993790437949e-06, "loss": 0.5663, "step": 3347 }, { "epoch": 0.21, "grad_norm": 0.969848096370697, "learning_rate": 9.155423275716351e-06, "loss": 0.5429, "step": 3348 }, { "epoch": 0.21, "grad_norm": 1.017959713935852, "learning_rate": 9.154852586024332e-06, "loss": 0.5594, "step": 3349 }, { "epoch": 0.21, "grad_norm": 0.9905710816383362, "learning_rate": 9.154281721385928e-06, "loss": 0.5361, "step": 3350 }, { "epoch": 0.21, "grad_norm": 1.1172434091567993, "learning_rate": 9.153710681825169e-06, "loss": 0.5811, "step": 3351 }, { "epoch": 0.21, "grad_norm": 1.0339463949203491, "learning_rate": 9.153139467366103e-06, "loss": 0.5715, "step": 3352 }, { "epoch": 0.21, "grad_norm": 0.9711489081382751, "learning_rate": 9.152568078032783e-06, "loss": 0.579, "step": 3353 }, { "epoch": 0.21, "grad_norm": 1.065482258796692, "learning_rate": 9.151996513849267e-06, "loss": 0.5532, "step": 3354 }, { "epoch": 0.21, "grad_norm": 0.8725598454475403, "learning_rate": 9.151424774839622e-06, "loss": 0.508, "step": 3355 }, { "epoch": 0.21, "grad_norm": 1.0380140542984009, "learning_rate": 9.15085286102792e-06, "loss": 0.5271, "step": 3356 }, { "epoch": 0.21, "grad_norm": 0.9468619227409363, "learning_rate": 9.150280772438245e-06, "loss": 0.5565, "step": 3357 }, { "epoch": 0.21, "grad_norm": 0.9295430779457092, "learning_rate": 9.149708509094684e-06, "loss": 0.5414, "step": 3358 }, { "epoch": 0.21, "grad_norm": 1.0121490955352783, "learning_rate": 9.149136071021333e-06, "loss": 0.5603, "step": 3359 }, { "epoch": 0.21, "grad_norm": 1.0209137201309204, "learning_rate": 9.148563458242296e-06, "loss": 0.5791, "step": 3360 }, { "epoch": 0.21, "grad_norm": 0.954666018486023, "learning_rate": 9.147990670781683e-06, "loss": 0.5656, "step": 3361 }, { "epoch": 0.21, "grad_norm": 0.9398199319839478, "learning_rate": 9.147417708663615e-06, "loss": 0.4706, "step": 3362 }, { "epoch": 0.21, "grad_norm": 1.1586277484893799, "learning_rate": 9.146844571912213e-06, "loss": 0.561, "step": 3363 }, { "epoch": 0.21, "grad_norm": 1.058401107788086, "learning_rate": 9.146271260551614e-06, "loss": 0.5768, "step": 3364 }, { "epoch": 0.21, "grad_norm": 1.0301518440246582, "learning_rate": 9.145697774605953e-06, "loss": 0.5582, "step": 3365 }, { "epoch": 0.21, "grad_norm": 1.0140354633331299, "learning_rate": 9.145124114099382e-06, "loss": 0.5525, "step": 3366 }, { "epoch": 0.21, "grad_norm": 1.1071993112564087, "learning_rate": 9.144550279056055e-06, "loss": 0.5199, "step": 3367 }, { "epoch": 0.21, "grad_norm": 1.0806834697723389, "learning_rate": 9.143976269500133e-06, "loss": 0.5089, "step": 3368 }, { "epoch": 0.21, "grad_norm": 1.0306366682052612, "learning_rate": 9.143402085455785e-06, "loss": 0.5495, "step": 3369 }, { "epoch": 0.21, "grad_norm": 0.9952037930488586, "learning_rate": 9.142827726947193e-06, "loss": 0.5691, "step": 3370 }, { "epoch": 0.21, "grad_norm": 1.0687406063079834, "learning_rate": 9.142253193998533e-06, "loss": 0.5451, "step": 3371 }, { "epoch": 0.21, "grad_norm": 0.9995650053024292, "learning_rate": 9.141678486634002e-06, "loss": 0.5679, "step": 3372 }, { "epoch": 0.21, "grad_norm": 0.9620756506919861, "learning_rate": 9.1411036048778e-06, "loss": 0.5671, "step": 3373 }, { "epoch": 0.21, "grad_norm": 1.0601290464401245, "learning_rate": 9.140528548754128e-06, "loss": 0.5516, "step": 3374 }, { "epoch": 0.21, "grad_norm": 0.9143772721290588, "learning_rate": 9.139953318287204e-06, "loss": 0.4883, "step": 3375 }, { "epoch": 0.21, "grad_norm": 1.1627060174942017, "learning_rate": 9.139377913501247e-06, "loss": 0.563, "step": 3376 }, { "epoch": 0.21, "grad_norm": 1.0104249715805054, "learning_rate": 9.138802334420486e-06, "loss": 0.5254, "step": 3377 }, { "epoch": 0.21, "grad_norm": 0.9357181191444397, "learning_rate": 9.138226581069158e-06, "loss": 0.5523, "step": 3378 }, { "epoch": 0.21, "grad_norm": 1.0746586322784424, "learning_rate": 9.137650653471505e-06, "loss": 0.6169, "step": 3379 }, { "epoch": 0.21, "grad_norm": 1.0098421573638916, "learning_rate": 9.137074551651774e-06, "loss": 0.5593, "step": 3380 }, { "epoch": 0.21, "grad_norm": 1.048406720161438, "learning_rate": 9.136498275634226e-06, "loss": 0.5816, "step": 3381 }, { "epoch": 0.21, "grad_norm": 0.9700804948806763, "learning_rate": 9.135921825443125e-06, "loss": 0.58, "step": 3382 }, { "epoch": 0.21, "grad_norm": 1.0831280946731567, "learning_rate": 9.135345201102745e-06, "loss": 0.5414, "step": 3383 }, { "epoch": 0.21, "grad_norm": 1.141015648841858, "learning_rate": 9.134768402637366e-06, "loss": 0.6002, "step": 3384 }, { "epoch": 0.21, "grad_norm": 0.9244864583015442, "learning_rate": 9.13419143007127e-06, "loss": 0.5431, "step": 3385 }, { "epoch": 0.21, "grad_norm": 0.9787561297416687, "learning_rate": 9.133614283428757e-06, "loss": 0.5202, "step": 3386 }, { "epoch": 0.21, "grad_norm": 1.0705188512802124, "learning_rate": 9.133036962734127e-06, "loss": 0.5641, "step": 3387 }, { "epoch": 0.21, "grad_norm": 0.9717293381690979, "learning_rate": 9.132459468011686e-06, "loss": 0.5757, "step": 3388 }, { "epoch": 0.21, "grad_norm": 1.0418078899383545, "learning_rate": 9.131881799285754e-06, "loss": 0.5758, "step": 3389 }, { "epoch": 0.21, "grad_norm": 1.106235384941101, "learning_rate": 9.131303956580653e-06, "loss": 0.6181, "step": 3390 }, { "epoch": 0.21, "grad_norm": 0.9898834824562073, "learning_rate": 9.130725939920712e-06, "loss": 0.5413, "step": 3391 }, { "epoch": 0.21, "grad_norm": 1.0746660232543945, "learning_rate": 9.130147749330275e-06, "loss": 0.5748, "step": 3392 }, { "epoch": 0.21, "grad_norm": 0.9885543584823608, "learning_rate": 9.129569384833682e-06, "loss": 0.5702, "step": 3393 }, { "epoch": 0.22, "grad_norm": 1.0890642404556274, "learning_rate": 9.128990846455287e-06, "loss": 0.5958, "step": 3394 }, { "epoch": 0.22, "grad_norm": 1.0893391370773315, "learning_rate": 9.128412134219453e-06, "loss": 0.5275, "step": 3395 }, { "epoch": 0.22, "grad_norm": 1.0430580377578735, "learning_rate": 9.127833248150546e-06, "loss": 0.5993, "step": 3396 }, { "epoch": 0.22, "grad_norm": 0.997788667678833, "learning_rate": 9.12725418827294e-06, "loss": 0.5298, "step": 3397 }, { "epoch": 0.22, "grad_norm": 0.9546692371368408, "learning_rate": 9.126674954611016e-06, "loss": 0.5516, "step": 3398 }, { "epoch": 0.22, "grad_norm": 1.0463684797286987, "learning_rate": 9.12609554718917e-06, "loss": 0.5576, "step": 3399 }, { "epoch": 0.22, "grad_norm": 0.9936715960502625, "learning_rate": 9.12551596603179e-06, "loss": 0.5558, "step": 3400 }, { "epoch": 0.22, "grad_norm": 1.0408931970596313, "learning_rate": 9.124936211163284e-06, "loss": 0.5593, "step": 3401 }, { "epoch": 0.22, "grad_norm": 1.0101039409637451, "learning_rate": 9.124356282608065e-06, "loss": 0.5265, "step": 3402 }, { "epoch": 0.22, "grad_norm": 1.1082333326339722, "learning_rate": 9.123776180390552e-06, "loss": 0.5835, "step": 3403 }, { "epoch": 0.22, "grad_norm": 1.0752975940704346, "learning_rate": 9.123195904535167e-06, "loss": 0.5165, "step": 3404 }, { "epoch": 0.22, "grad_norm": 1.0898516178131104, "learning_rate": 9.122615455066348e-06, "loss": 0.5818, "step": 3405 }, { "epoch": 0.22, "grad_norm": 1.1162127256393433, "learning_rate": 9.122034832008532e-06, "loss": 0.5463, "step": 3406 }, { "epoch": 0.22, "grad_norm": 1.0598623752593994, "learning_rate": 9.12145403538617e-06, "loss": 0.5618, "step": 3407 }, { "epoch": 0.22, "grad_norm": 1.0100927352905273, "learning_rate": 9.120873065223716e-06, "loss": 0.5602, "step": 3408 }, { "epoch": 0.22, "grad_norm": 1.0553600788116455, "learning_rate": 9.120291921545633e-06, "loss": 0.5251, "step": 3409 }, { "epoch": 0.22, "grad_norm": 1.0603712797164917, "learning_rate": 9.11971060437639e-06, "loss": 0.6051, "step": 3410 }, { "epoch": 0.22, "grad_norm": 1.015459656715393, "learning_rate": 9.119129113740463e-06, "loss": 0.5398, "step": 3411 }, { "epoch": 0.22, "grad_norm": 0.9911149144172668, "learning_rate": 9.118547449662342e-06, "loss": 0.534, "step": 3412 }, { "epoch": 0.22, "grad_norm": 1.0005937814712524, "learning_rate": 9.117965612166514e-06, "loss": 0.5691, "step": 3413 }, { "epoch": 0.22, "grad_norm": 0.947547435760498, "learning_rate": 9.117383601277478e-06, "loss": 0.5596, "step": 3414 }, { "epoch": 0.22, "grad_norm": 1.0150891542434692, "learning_rate": 9.116801417019744e-06, "loss": 0.5266, "step": 3415 }, { "epoch": 0.22, "grad_norm": 0.959476113319397, "learning_rate": 9.116219059417821e-06, "loss": 0.5825, "step": 3416 }, { "epoch": 0.22, "grad_norm": 1.0116640329360962, "learning_rate": 9.115636528496236e-06, "loss": 0.5495, "step": 3417 }, { "epoch": 0.22, "grad_norm": 0.9909448027610779, "learning_rate": 9.115053824279511e-06, "loss": 0.5555, "step": 3418 }, { "epoch": 0.22, "grad_norm": 0.9822895526885986, "learning_rate": 9.114470946792187e-06, "loss": 0.5671, "step": 3419 }, { "epoch": 0.22, "grad_norm": 0.9034964442253113, "learning_rate": 9.113887896058805e-06, "loss": 0.5463, "step": 3420 }, { "epoch": 0.22, "grad_norm": 1.0540194511413574, "learning_rate": 9.11330467210391e-06, "loss": 0.5678, "step": 3421 }, { "epoch": 0.22, "grad_norm": 1.0437064170837402, "learning_rate": 9.11272127495207e-06, "loss": 0.5398, "step": 3422 }, { "epoch": 0.22, "grad_norm": 1.0747253894805908, "learning_rate": 9.112137704627842e-06, "loss": 0.6188, "step": 3423 }, { "epoch": 0.22, "grad_norm": 0.9856347441673279, "learning_rate": 9.1115539611558e-06, "loss": 0.544, "step": 3424 }, { "epoch": 0.22, "grad_norm": 1.0398938655853271, "learning_rate": 9.110970044560524e-06, "loss": 0.5679, "step": 3425 }, { "epoch": 0.22, "grad_norm": 1.0096843242645264, "learning_rate": 9.1103859548666e-06, "loss": 0.5964, "step": 3426 }, { "epoch": 0.22, "grad_norm": 1.0516611337661743, "learning_rate": 9.109801692098624e-06, "loss": 0.5566, "step": 3427 }, { "epoch": 0.22, "grad_norm": 0.9619464874267578, "learning_rate": 9.109217256281196e-06, "loss": 0.5298, "step": 3428 }, { "epoch": 0.22, "grad_norm": 1.08042311668396, "learning_rate": 9.108632647438922e-06, "loss": 0.585, "step": 3429 }, { "epoch": 0.22, "grad_norm": 1.047071933746338, "learning_rate": 9.108047865596421e-06, "loss": 0.5438, "step": 3430 }, { "epoch": 0.22, "grad_norm": 1.0152941942214966, "learning_rate": 9.107462910778316e-06, "loss": 0.5251, "step": 3431 }, { "epoch": 0.22, "grad_norm": 0.9794025421142578, "learning_rate": 9.106877783009236e-06, "loss": 0.5746, "step": 3432 }, { "epoch": 0.22, "grad_norm": 1.0107927322387695, "learning_rate": 9.106292482313819e-06, "loss": 0.557, "step": 3433 }, { "epoch": 0.22, "grad_norm": 0.936278760433197, "learning_rate": 9.105707008716712e-06, "loss": 0.5274, "step": 3434 }, { "epoch": 0.22, "grad_norm": 1.0086160898208618, "learning_rate": 9.105121362242564e-06, "loss": 0.5522, "step": 3435 }, { "epoch": 0.22, "grad_norm": 1.0340970754623413, "learning_rate": 9.104535542916035e-06, "loss": 0.5361, "step": 3436 }, { "epoch": 0.22, "grad_norm": 0.9634861946105957, "learning_rate": 9.103949550761795e-06, "loss": 0.5679, "step": 3437 }, { "epoch": 0.22, "grad_norm": 1.0549044609069824, "learning_rate": 9.103363385804516e-06, "loss": 0.5772, "step": 3438 }, { "epoch": 0.22, "grad_norm": 1.0148675441741943, "learning_rate": 9.102777048068878e-06, "loss": 0.5776, "step": 3439 }, { "epoch": 0.22, "grad_norm": 1.0089242458343506, "learning_rate": 9.102190537579572e-06, "loss": 0.5738, "step": 3440 }, { "epoch": 0.22, "grad_norm": 0.9703475832939148, "learning_rate": 9.101603854361291e-06, "loss": 0.5424, "step": 3441 }, { "epoch": 0.22, "grad_norm": 1.0789226293563843, "learning_rate": 9.101016998438743e-06, "loss": 0.5257, "step": 3442 }, { "epoch": 0.22, "grad_norm": 1.1241742372512817, "learning_rate": 9.100429969836636e-06, "loss": 0.5992, "step": 3443 }, { "epoch": 0.22, "grad_norm": 1.0380760431289673, "learning_rate": 9.099842768579685e-06, "loss": 0.5483, "step": 3444 }, { "epoch": 0.22, "grad_norm": 0.958708643913269, "learning_rate": 9.099255394692618e-06, "loss": 0.4942, "step": 3445 }, { "epoch": 0.22, "grad_norm": 0.9943519830703735, "learning_rate": 9.098667848200167e-06, "loss": 0.521, "step": 3446 }, { "epoch": 0.22, "grad_norm": 1.0717390775680542, "learning_rate": 9.09808012912707e-06, "loss": 0.58, "step": 3447 }, { "epoch": 0.22, "grad_norm": 0.9994247555732727, "learning_rate": 9.097492237498076e-06, "loss": 0.5667, "step": 3448 }, { "epoch": 0.22, "grad_norm": 1.003158450126648, "learning_rate": 9.096904173337937e-06, "loss": 0.5413, "step": 3449 }, { "epoch": 0.22, "grad_norm": 1.1830018758773804, "learning_rate": 9.096315936671416e-06, "loss": 0.5561, "step": 3450 }, { "epoch": 0.22, "grad_norm": 1.113922119140625, "learning_rate": 9.095727527523282e-06, "loss": 0.5983, "step": 3451 }, { "epoch": 0.22, "grad_norm": 1.1049774885177612, "learning_rate": 9.095138945918309e-06, "loss": 0.5698, "step": 3452 }, { "epoch": 0.22, "grad_norm": 0.987943172454834, "learning_rate": 9.094550191881281e-06, "loss": 0.511, "step": 3453 }, { "epoch": 0.22, "grad_norm": 0.9769154787063599, "learning_rate": 9.093961265436988e-06, "loss": 0.5826, "step": 3454 }, { "epoch": 0.22, "grad_norm": 0.9816447496414185, "learning_rate": 9.093372166610229e-06, "loss": 0.5603, "step": 3455 }, { "epoch": 0.22, "grad_norm": 1.0525661706924438, "learning_rate": 9.092782895425806e-06, "loss": 0.5196, "step": 3456 }, { "epoch": 0.22, "grad_norm": 1.0274732112884521, "learning_rate": 9.092193451908533e-06, "loss": 0.5814, "step": 3457 }, { "epoch": 0.22, "grad_norm": 1.0459325313568115, "learning_rate": 9.091603836083231e-06, "loss": 0.6087, "step": 3458 }, { "epoch": 0.22, "grad_norm": 1.008176565170288, "learning_rate": 9.091014047974725e-06, "loss": 0.4993, "step": 3459 }, { "epoch": 0.22, "grad_norm": 1.0283236503601074, "learning_rate": 9.090424087607848e-06, "loss": 0.5641, "step": 3460 }, { "epoch": 0.22, "grad_norm": 0.9741441011428833, "learning_rate": 9.089833955007443e-06, "loss": 0.5211, "step": 3461 }, { "epoch": 0.22, "grad_norm": 1.0388422012329102, "learning_rate": 9.089243650198359e-06, "loss": 0.5607, "step": 3462 }, { "epoch": 0.22, "grad_norm": 0.9902390837669373, "learning_rate": 9.088653173205449e-06, "loss": 0.5419, "step": 3463 }, { "epoch": 0.22, "grad_norm": 1.1023180484771729, "learning_rate": 9.088062524053575e-06, "loss": 0.5474, "step": 3464 }, { "epoch": 0.22, "grad_norm": 1.0138156414031982, "learning_rate": 9.087471702767612e-06, "loss": 0.5287, "step": 3465 }, { "epoch": 0.22, "grad_norm": 1.0068479776382446, "learning_rate": 9.086880709372434e-06, "loss": 0.5632, "step": 3466 }, { "epoch": 0.22, "grad_norm": 0.9584867358207703, "learning_rate": 9.086289543892928e-06, "loss": 0.507, "step": 3467 }, { "epoch": 0.22, "grad_norm": 0.9820325970649719, "learning_rate": 9.085698206353983e-06, "loss": 0.5424, "step": 3468 }, { "epoch": 0.22, "grad_norm": 1.0293880701065063, "learning_rate": 9.085106696780499e-06, "loss": 0.5286, "step": 3469 }, { "epoch": 0.22, "grad_norm": 1.1403424739837646, "learning_rate": 9.084515015197384e-06, "loss": 0.6048, "step": 3470 }, { "epoch": 0.22, "grad_norm": 1.075882911682129, "learning_rate": 9.08392316162955e-06, "loss": 0.5827, "step": 3471 }, { "epoch": 0.22, "grad_norm": 0.9643628001213074, "learning_rate": 9.083331136101921e-06, "loss": 0.549, "step": 3472 }, { "epoch": 0.22, "grad_norm": 1.06292724609375, "learning_rate": 9.08273893863942e-06, "loss": 0.5346, "step": 3473 }, { "epoch": 0.22, "grad_norm": 0.9991394281387329, "learning_rate": 9.082146569266988e-06, "loss": 0.5242, "step": 3474 }, { "epoch": 0.22, "grad_norm": 1.058954119682312, "learning_rate": 9.081554028009562e-06, "loss": 0.5945, "step": 3475 }, { "epoch": 0.22, "grad_norm": 0.9722981452941895, "learning_rate": 9.080961314892096e-06, "loss": 0.554, "step": 3476 }, { "epoch": 0.22, "grad_norm": 1.054129719734192, "learning_rate": 9.080368429939546e-06, "loss": 0.5414, "step": 3477 }, { "epoch": 0.22, "grad_norm": 1.144458293914795, "learning_rate": 9.079775373176874e-06, "loss": 0.5698, "step": 3478 }, { "epoch": 0.22, "grad_norm": 1.02845299243927, "learning_rate": 9.079182144629055e-06, "loss": 0.5586, "step": 3479 }, { "epoch": 0.22, "grad_norm": 0.9779374599456787, "learning_rate": 9.078588744321067e-06, "loss": 0.5392, "step": 3480 }, { "epoch": 0.22, "grad_norm": 1.0148526430130005, "learning_rate": 9.077995172277894e-06, "loss": 0.5343, "step": 3481 }, { "epoch": 0.22, "grad_norm": 0.9910771250724792, "learning_rate": 9.07740142852453e-06, "loss": 0.5702, "step": 3482 }, { "epoch": 0.22, "grad_norm": 1.0904290676116943, "learning_rate": 9.076807513085976e-06, "loss": 0.5666, "step": 3483 }, { "epoch": 0.22, "grad_norm": 1.087215781211853, "learning_rate": 9.076213425987242e-06, "loss": 0.5903, "step": 3484 }, { "epoch": 0.22, "grad_norm": 1.0257717370986938, "learning_rate": 9.07561916725334e-06, "loss": 0.5993, "step": 3485 }, { "epoch": 0.22, "grad_norm": 1.1014600992202759, "learning_rate": 9.075024736909292e-06, "loss": 0.5377, "step": 3486 }, { "epoch": 0.22, "grad_norm": 1.0270522832870483, "learning_rate": 9.074430134980129e-06, "loss": 0.567, "step": 3487 }, { "epoch": 0.22, "grad_norm": 1.1104378700256348, "learning_rate": 9.073835361490885e-06, "loss": 0.6008, "step": 3488 }, { "epoch": 0.22, "grad_norm": 1.0529365539550781, "learning_rate": 9.073240416466609e-06, "loss": 0.577, "step": 3489 }, { "epoch": 0.22, "grad_norm": 1.0253465175628662, "learning_rate": 9.072645299932347e-06, "loss": 0.5496, "step": 3490 }, { "epoch": 0.22, "grad_norm": 1.056884527206421, "learning_rate": 9.07205001191316e-06, "loss": 0.5844, "step": 3491 }, { "epoch": 0.22, "grad_norm": 0.9805004000663757, "learning_rate": 9.071454552434111e-06, "loss": 0.5795, "step": 3492 }, { "epoch": 0.22, "grad_norm": 1.0552654266357422, "learning_rate": 9.070858921520276e-06, "loss": 0.5736, "step": 3493 }, { "epoch": 0.22, "grad_norm": 0.9912766814231873, "learning_rate": 9.070263119196734e-06, "loss": 0.5463, "step": 3494 }, { "epoch": 0.22, "grad_norm": 1.0589691400527954, "learning_rate": 9.06966714548857e-06, "loss": 0.5687, "step": 3495 }, { "epoch": 0.22, "grad_norm": 1.0722336769104004, "learning_rate": 9.069071000420879e-06, "loss": 0.5335, "step": 3496 }, { "epoch": 0.22, "grad_norm": 1.0283418893814087, "learning_rate": 9.068474684018765e-06, "loss": 0.5064, "step": 3497 }, { "epoch": 0.22, "grad_norm": 1.0223890542984009, "learning_rate": 9.067878196307334e-06, "loss": 0.5515, "step": 3498 }, { "epoch": 0.22, "grad_norm": 1.053024172782898, "learning_rate": 9.067281537311705e-06, "loss": 0.5376, "step": 3499 }, { "epoch": 0.22, "grad_norm": 1.0045981407165527, "learning_rate": 9.066684707056999e-06, "loss": 0.5207, "step": 3500 }, { "epoch": 0.22, "grad_norm": 1.0248801708221436, "learning_rate": 9.066087705568346e-06, "loss": 0.5377, "step": 3501 }, { "epoch": 0.22, "grad_norm": 1.0563328266143799, "learning_rate": 9.065490532870884e-06, "loss": 0.5073, "step": 3502 }, { "epoch": 0.22, "grad_norm": 1.0533092021942139, "learning_rate": 9.06489318898976e-06, "loss": 0.5845, "step": 3503 }, { "epoch": 0.22, "grad_norm": 1.1314165592193604, "learning_rate": 9.064295673950125e-06, "loss": 0.5801, "step": 3504 }, { "epoch": 0.22, "grad_norm": 1.0564184188842773, "learning_rate": 9.063697987777136e-06, "loss": 0.5964, "step": 3505 }, { "epoch": 0.22, "grad_norm": 1.0110223293304443, "learning_rate": 9.063100130495962e-06, "loss": 0.5694, "step": 3506 }, { "epoch": 0.22, "grad_norm": 0.9970470666885376, "learning_rate": 9.062502102131777e-06, "loss": 0.5601, "step": 3507 }, { "epoch": 0.22, "grad_norm": 1.0728727579116821, "learning_rate": 9.06190390270976e-06, "loss": 0.5879, "step": 3508 }, { "epoch": 0.22, "grad_norm": 0.9721595048904419, "learning_rate": 9.0613055322551e-06, "loss": 0.5765, "step": 3509 }, { "epoch": 0.22, "grad_norm": 1.0315375328063965, "learning_rate": 9.060706990792993e-06, "loss": 0.5413, "step": 3510 }, { "epoch": 0.22, "grad_norm": 0.981389045715332, "learning_rate": 9.06010827834864e-06, "loss": 0.5462, "step": 3511 }, { "epoch": 0.22, "grad_norm": 1.0459198951721191, "learning_rate": 9.059509394947252e-06, "loss": 0.546, "step": 3512 }, { "epoch": 0.22, "grad_norm": 1.0360572338104248, "learning_rate": 9.058910340614045e-06, "loss": 0.5462, "step": 3513 }, { "epoch": 0.22, "grad_norm": 1.0255624055862427, "learning_rate": 9.058311115374244e-06, "loss": 0.5757, "step": 3514 }, { "epoch": 0.22, "grad_norm": 0.991475522518158, "learning_rate": 9.057711719253077e-06, "loss": 0.5359, "step": 3515 }, { "epoch": 0.22, "grad_norm": 1.0928494930267334, "learning_rate": 9.057112152275788e-06, "loss": 0.5771, "step": 3516 }, { "epoch": 0.22, "grad_norm": 0.9886402487754822, "learning_rate": 9.05651241446762e-06, "loss": 0.5433, "step": 3517 }, { "epoch": 0.22, "grad_norm": 0.9913406372070312, "learning_rate": 9.055912505853826e-06, "loss": 0.5207, "step": 3518 }, { "epoch": 0.22, "grad_norm": 1.0736424922943115, "learning_rate": 9.055312426459663e-06, "loss": 0.5671, "step": 3519 }, { "epoch": 0.22, "grad_norm": 1.0118083953857422, "learning_rate": 9.054712176310405e-06, "loss": 0.6284, "step": 3520 }, { "epoch": 0.22, "grad_norm": 1.0888111591339111, "learning_rate": 9.05411175543132e-06, "loss": 0.5334, "step": 3521 }, { "epoch": 0.22, "grad_norm": 1.0824137926101685, "learning_rate": 9.053511163847694e-06, "loss": 0.5147, "step": 3522 }, { "epoch": 0.22, "grad_norm": 1.1179592609405518, "learning_rate": 9.052910401584812e-06, "loss": 0.6117, "step": 3523 }, { "epoch": 0.22, "grad_norm": 0.9232814311981201, "learning_rate": 9.052309468667974e-06, "loss": 0.5319, "step": 3524 }, { "epoch": 0.22, "grad_norm": 0.9559396505355835, "learning_rate": 9.05170836512248e-06, "loss": 0.505, "step": 3525 }, { "epoch": 0.22, "grad_norm": 1.0154615640640259, "learning_rate": 9.051107090973642e-06, "loss": 0.5432, "step": 3526 }, { "epoch": 0.22, "grad_norm": 1.0286099910736084, "learning_rate": 9.050505646246777e-06, "loss": 0.6052, "step": 3527 }, { "epoch": 0.22, "grad_norm": 0.9501967430114746, "learning_rate": 9.04990403096721e-06, "loss": 0.524, "step": 3528 }, { "epoch": 0.22, "grad_norm": 1.0195550918579102, "learning_rate": 9.049302245160273e-06, "loss": 0.5972, "step": 3529 }, { "epoch": 0.22, "grad_norm": 1.0528881549835205, "learning_rate": 9.048700288851305e-06, "loss": 0.5714, "step": 3530 }, { "epoch": 0.22, "grad_norm": 0.9923539161682129, "learning_rate": 9.048098162065652e-06, "loss": 0.5882, "step": 3531 }, { "epoch": 0.22, "grad_norm": 1.0214570760726929, "learning_rate": 9.047495864828668e-06, "loss": 0.5554, "step": 3532 }, { "epoch": 0.22, "grad_norm": 1.0018694400787354, "learning_rate": 9.046893397165713e-06, "loss": 0.5691, "step": 3533 }, { "epoch": 0.22, "grad_norm": 1.0348128080368042, "learning_rate": 9.046290759102155e-06, "loss": 0.5482, "step": 3534 }, { "epoch": 0.22, "grad_norm": 0.9383018612861633, "learning_rate": 9.04568795066337e-06, "loss": 0.5379, "step": 3535 }, { "epoch": 0.22, "grad_norm": 1.0389997959136963, "learning_rate": 9.045084971874738e-06, "loss": 0.5788, "step": 3536 }, { "epoch": 0.22, "grad_norm": 1.0366233587265015, "learning_rate": 9.044481822761651e-06, "loss": 0.5706, "step": 3537 }, { "epoch": 0.22, "grad_norm": 1.0714468955993652, "learning_rate": 9.043878503349503e-06, "loss": 0.555, "step": 3538 }, { "epoch": 0.22, "grad_norm": 0.9625380635261536, "learning_rate": 9.043275013663699e-06, "loss": 0.5278, "step": 3539 }, { "epoch": 0.22, "grad_norm": 1.0158138275146484, "learning_rate": 9.04267135372965e-06, "loss": 0.5828, "step": 3540 }, { "epoch": 0.22, "grad_norm": 1.1385537385940552, "learning_rate": 9.042067523572775e-06, "loss": 0.539, "step": 3541 }, { "epoch": 0.22, "grad_norm": 1.0848937034606934, "learning_rate": 9.041463523218496e-06, "loss": 0.6276, "step": 3542 }, { "epoch": 0.22, "grad_norm": 1.0919116735458374, "learning_rate": 9.040859352692249e-06, "loss": 0.612, "step": 3543 }, { "epoch": 0.22, "grad_norm": 1.0339999198913574, "learning_rate": 9.04025501201947e-06, "loss": 0.5891, "step": 3544 }, { "epoch": 0.22, "grad_norm": 0.9081238508224487, "learning_rate": 9.039650501225608e-06, "loss": 0.5463, "step": 3545 }, { "epoch": 0.22, "grad_norm": 1.077070951461792, "learning_rate": 9.039045820336116e-06, "loss": 0.5707, "step": 3546 }, { "epoch": 0.22, "grad_norm": 0.9889419078826904, "learning_rate": 9.038440969376456e-06, "loss": 0.5492, "step": 3547 }, { "epoch": 0.22, "grad_norm": 1.0376169681549072, "learning_rate": 9.037835948372095e-06, "loss": 0.5193, "step": 3548 }, { "epoch": 0.22, "grad_norm": 1.0179705619812012, "learning_rate": 9.03723075734851e-06, "loss": 0.5445, "step": 3549 }, { "epoch": 0.22, "grad_norm": 1.0555214881896973, "learning_rate": 9.03662539633118e-06, "loss": 0.5367, "step": 3550 }, { "epoch": 0.22, "grad_norm": 1.040494680404663, "learning_rate": 9.0360198653456e-06, "loss": 0.5805, "step": 3551 }, { "epoch": 0.23, "grad_norm": 0.985089898109436, "learning_rate": 9.035414164417262e-06, "loss": 0.5218, "step": 3552 }, { "epoch": 0.23, "grad_norm": 1.0762962102890015, "learning_rate": 9.034808293571672e-06, "loss": 0.5925, "step": 3553 }, { "epoch": 0.23, "grad_norm": 1.0686595439910889, "learning_rate": 9.03420225283434e-06, "loss": 0.5909, "step": 3554 }, { "epoch": 0.23, "grad_norm": 0.9033122658729553, "learning_rate": 9.033596042230788e-06, "loss": 0.4947, "step": 3555 }, { "epoch": 0.23, "grad_norm": 1.055325984954834, "learning_rate": 9.032989661786535e-06, "loss": 0.5935, "step": 3556 }, { "epoch": 0.23, "grad_norm": 0.9803203344345093, "learning_rate": 9.032383111527119e-06, "loss": 0.5634, "step": 3557 }, { "epoch": 0.23, "grad_norm": 1.0444809198379517, "learning_rate": 9.031776391478077e-06, "loss": 0.5677, "step": 3558 }, { "epoch": 0.23, "grad_norm": 1.0093274116516113, "learning_rate": 9.031169501664958e-06, "loss": 0.5633, "step": 3559 }, { "epoch": 0.23, "grad_norm": 1.0077446699142456, "learning_rate": 9.030562442113313e-06, "loss": 0.5624, "step": 3560 }, { "epoch": 0.23, "grad_norm": 0.9775265455245972, "learning_rate": 9.029955212848706e-06, "loss": 0.5783, "step": 3561 }, { "epoch": 0.23, "grad_norm": 1.0331493616104126, "learning_rate": 9.029347813896704e-06, "loss": 0.5234, "step": 3562 }, { "epoch": 0.23, "grad_norm": 0.9706992506980896, "learning_rate": 9.028740245282881e-06, "loss": 0.5356, "step": 3563 }, { "epoch": 0.23, "grad_norm": 0.9969547986984253, "learning_rate": 9.028132507032823e-06, "loss": 0.5537, "step": 3564 }, { "epoch": 0.23, "grad_norm": 0.9665756225585938, "learning_rate": 9.027524599172117e-06, "loss": 0.5453, "step": 3565 }, { "epoch": 0.23, "grad_norm": 1.1190989017486572, "learning_rate": 9.026916521726361e-06, "loss": 0.6168, "step": 3566 }, { "epoch": 0.23, "grad_norm": 1.0341004133224487, "learning_rate": 9.026308274721161e-06, "loss": 0.528, "step": 3567 }, { "epoch": 0.23, "grad_norm": 1.0381500720977783, "learning_rate": 9.025699858182125e-06, "loss": 0.5352, "step": 3568 }, { "epoch": 0.23, "grad_norm": 0.9730229377746582, "learning_rate": 9.02509127213487e-06, "loss": 0.5532, "step": 3569 }, { "epoch": 0.23, "grad_norm": 0.9915220737457275, "learning_rate": 9.024482516605026e-06, "loss": 0.5211, "step": 3570 }, { "epoch": 0.23, "grad_norm": 1.0444742441177368, "learning_rate": 9.023873591618224e-06, "loss": 0.5641, "step": 3571 }, { "epoch": 0.23, "grad_norm": 1.0250283479690552, "learning_rate": 9.023264497200102e-06, "loss": 0.5628, "step": 3572 }, { "epoch": 0.23, "grad_norm": 0.9876275062561035, "learning_rate": 9.022655233376308e-06, "loss": 0.5419, "step": 3573 }, { "epoch": 0.23, "grad_norm": 0.9815468788146973, "learning_rate": 9.022045800172493e-06, "loss": 0.5366, "step": 3574 }, { "epoch": 0.23, "grad_norm": 0.9828575849533081, "learning_rate": 9.021436197614326e-06, "loss": 0.5414, "step": 3575 }, { "epoch": 0.23, "grad_norm": 1.1150814294815063, "learning_rate": 9.020826425727468e-06, "loss": 0.5584, "step": 3576 }, { "epoch": 0.23, "grad_norm": 1.0118638277053833, "learning_rate": 9.020216484537595e-06, "loss": 0.5998, "step": 3577 }, { "epoch": 0.23, "grad_norm": 1.1020475625991821, "learning_rate": 9.019606374070394e-06, "loss": 0.5849, "step": 3578 }, { "epoch": 0.23, "grad_norm": 1.0934542417526245, "learning_rate": 9.01899609435155e-06, "loss": 0.5789, "step": 3579 }, { "epoch": 0.23, "grad_norm": 1.0679789781570435, "learning_rate": 9.018385645406765e-06, "loss": 0.5612, "step": 3580 }, { "epoch": 0.23, "grad_norm": 0.9859147071838379, "learning_rate": 9.017775027261735e-06, "loss": 0.5722, "step": 3581 }, { "epoch": 0.23, "grad_norm": 1.018585205078125, "learning_rate": 9.017164239942178e-06, "loss": 0.5516, "step": 3582 }, { "epoch": 0.23, "grad_norm": 0.986737847328186, "learning_rate": 9.016553283473808e-06, "loss": 0.576, "step": 3583 }, { "epoch": 0.23, "grad_norm": 0.925209105014801, "learning_rate": 9.015942157882353e-06, "loss": 0.5147, "step": 3584 }, { "epoch": 0.23, "grad_norm": 0.9873782992362976, "learning_rate": 9.015330863193543e-06, "loss": 0.5421, "step": 3585 }, { "epoch": 0.23, "grad_norm": 1.02249014377594, "learning_rate": 9.01471939943312e-06, "loss": 0.5569, "step": 3586 }, { "epoch": 0.23, "grad_norm": 1.0699585676193237, "learning_rate": 9.014107766626828e-06, "loss": 0.5106, "step": 3587 }, { "epoch": 0.23, "grad_norm": 0.9971165060997009, "learning_rate": 9.013495964800423e-06, "loss": 0.5997, "step": 3588 }, { "epoch": 0.23, "grad_norm": 0.9946675896644592, "learning_rate": 9.012883993979663e-06, "loss": 0.6373, "step": 3589 }, { "epoch": 0.23, "grad_norm": 0.9921069145202637, "learning_rate": 9.01227185419032e-06, "loss": 0.5683, "step": 3590 }, { "epoch": 0.23, "grad_norm": 1.1467154026031494, "learning_rate": 9.011659545458167e-06, "loss": 0.5316, "step": 3591 }, { "epoch": 0.23, "grad_norm": 1.0683609247207642, "learning_rate": 9.011047067808985e-06, "loss": 0.5763, "step": 3592 }, { "epoch": 0.23, "grad_norm": 0.9084008932113647, "learning_rate": 9.010434421268564e-06, "loss": 0.5203, "step": 3593 }, { "epoch": 0.23, "grad_norm": 1.1025091409683228, "learning_rate": 9.009821605862701e-06, "loss": 0.5671, "step": 3594 }, { "epoch": 0.23, "grad_norm": 0.9929082989692688, "learning_rate": 9.0092086216172e-06, "loss": 0.5385, "step": 3595 }, { "epoch": 0.23, "grad_norm": 0.9512858986854553, "learning_rate": 9.00859546855787e-06, "loss": 0.5357, "step": 3596 }, { "epoch": 0.23, "grad_norm": 0.9521564841270447, "learning_rate": 9.007982146710533e-06, "loss": 0.5221, "step": 3597 }, { "epoch": 0.23, "grad_norm": 1.0225049257278442, "learning_rate": 9.007368656101006e-06, "loss": 0.5682, "step": 3598 }, { "epoch": 0.23, "grad_norm": 0.9886544942855835, "learning_rate": 9.006754996755129e-06, "loss": 0.5341, "step": 3599 }, { "epoch": 0.23, "grad_norm": 1.0694276094436646, "learning_rate": 9.006141168698735e-06, "loss": 0.5978, "step": 3600 }, { "epoch": 0.23, "grad_norm": 0.9691435694694519, "learning_rate": 9.005527171957676e-06, "loss": 0.5321, "step": 3601 }, { "epoch": 0.23, "grad_norm": 1.0941452980041504, "learning_rate": 9.004913006557798e-06, "loss": 0.5851, "step": 3602 }, { "epoch": 0.23, "grad_norm": 0.9894156455993652, "learning_rate": 9.004298672524967e-06, "loss": 0.5904, "step": 3603 }, { "epoch": 0.23, "grad_norm": 0.9922523498535156, "learning_rate": 9.003684169885049e-06, "loss": 0.5586, "step": 3604 }, { "epoch": 0.23, "grad_norm": 0.9479037523269653, "learning_rate": 9.00306949866392e-06, "loss": 0.5359, "step": 3605 }, { "epoch": 0.23, "grad_norm": 0.9888592958450317, "learning_rate": 9.002454658887458e-06, "loss": 0.5346, "step": 3606 }, { "epoch": 0.23, "grad_norm": 1.0730727910995483, "learning_rate": 9.001839650581554e-06, "loss": 0.5736, "step": 3607 }, { "epoch": 0.23, "grad_norm": 1.0897572040557861, "learning_rate": 9.001224473772104e-06, "loss": 0.5993, "step": 3608 }, { "epoch": 0.23, "grad_norm": 1.0272395610809326, "learning_rate": 9.000609128485011e-06, "loss": 0.501, "step": 3609 }, { "epoch": 0.23, "grad_norm": 1.076352596282959, "learning_rate": 8.999993614746184e-06, "loss": 0.6239, "step": 3610 }, { "epoch": 0.23, "grad_norm": 0.9634479880332947, "learning_rate": 8.999377932581541e-06, "loss": 0.5195, "step": 3611 }, { "epoch": 0.23, "grad_norm": 1.0180588960647583, "learning_rate": 8.998762082017006e-06, "loss": 0.5528, "step": 3612 }, { "epoch": 0.23, "grad_norm": 1.1297459602355957, "learning_rate": 8.998146063078512e-06, "loss": 0.6002, "step": 3613 }, { "epoch": 0.23, "grad_norm": 1.0049798488616943, "learning_rate": 8.997529875791993e-06, "loss": 0.5378, "step": 3614 }, { "epoch": 0.23, "grad_norm": 0.9840946793556213, "learning_rate": 8.9969135201834e-06, "loss": 0.5592, "step": 3615 }, { "epoch": 0.23, "grad_norm": 1.0649902820587158, "learning_rate": 8.996296996278682e-06, "loss": 0.6086, "step": 3616 }, { "epoch": 0.23, "grad_norm": 0.9876604080200195, "learning_rate": 8.9956803041038e-06, "loss": 0.5474, "step": 3617 }, { "epoch": 0.23, "grad_norm": 0.9918978214263916, "learning_rate": 8.99506344368472e-06, "loss": 0.4996, "step": 3618 }, { "epoch": 0.23, "grad_norm": 0.9975392818450928, "learning_rate": 8.994446415047415e-06, "loss": 0.5084, "step": 3619 }, { "epoch": 0.23, "grad_norm": 1.0040109157562256, "learning_rate": 8.993829218217867e-06, "loss": 0.5604, "step": 3620 }, { "epoch": 0.23, "grad_norm": 1.0073280334472656, "learning_rate": 8.993211853222065e-06, "loss": 0.5751, "step": 3621 }, { "epoch": 0.23, "grad_norm": 1.0646840333938599, "learning_rate": 8.992594320086005e-06, "loss": 0.5922, "step": 3622 }, { "epoch": 0.23, "grad_norm": 0.9566529989242554, "learning_rate": 8.991976618835685e-06, "loss": 0.556, "step": 3623 }, { "epoch": 0.23, "grad_norm": 1.0741969347000122, "learning_rate": 8.991358749497117e-06, "loss": 0.5727, "step": 3624 }, { "epoch": 0.23, "grad_norm": 1.0362106561660767, "learning_rate": 8.990740712096317e-06, "loss": 0.5696, "step": 3625 }, { "epoch": 0.23, "grad_norm": 0.9603510499000549, "learning_rate": 8.99012250665931e-06, "loss": 0.5189, "step": 3626 }, { "epoch": 0.23, "grad_norm": 1.1036278009414673, "learning_rate": 8.989504133212123e-06, "loss": 0.6535, "step": 3627 }, { "epoch": 0.23, "grad_norm": 1.18490469455719, "learning_rate": 8.988885591780795e-06, "loss": 0.5566, "step": 3628 }, { "epoch": 0.23, "grad_norm": 1.0526707172393799, "learning_rate": 8.988266882391374e-06, "loss": 0.5357, "step": 3629 }, { "epoch": 0.23, "grad_norm": 1.0246859788894653, "learning_rate": 8.987648005069907e-06, "loss": 0.5803, "step": 3630 }, { "epoch": 0.23, "grad_norm": 1.0607661008834839, "learning_rate": 8.987028959842454e-06, "loss": 0.5744, "step": 3631 }, { "epoch": 0.23, "grad_norm": 1.073825478553772, "learning_rate": 8.986409746735084e-06, "loss": 0.5299, "step": 3632 }, { "epoch": 0.23, "grad_norm": 1.0737742185592651, "learning_rate": 8.985790365773864e-06, "loss": 0.5551, "step": 3633 }, { "epoch": 0.23, "grad_norm": 1.0232653617858887, "learning_rate": 8.985170816984878e-06, "loss": 0.5318, "step": 3634 }, { "epoch": 0.23, "grad_norm": 0.9758453369140625, "learning_rate": 8.984551100394212e-06, "loss": 0.5652, "step": 3635 }, { "epoch": 0.23, "grad_norm": 1.0398926734924316, "learning_rate": 8.98393121602796e-06, "loss": 0.5543, "step": 3636 }, { "epoch": 0.23, "grad_norm": 0.9773332476615906, "learning_rate": 8.983311163912227e-06, "loss": 0.5269, "step": 3637 }, { "epoch": 0.23, "grad_norm": 1.0076240301132202, "learning_rate": 8.982690944073113e-06, "loss": 0.5352, "step": 3638 }, { "epoch": 0.23, "grad_norm": 1.0406144857406616, "learning_rate": 8.982070556536741e-06, "loss": 0.5671, "step": 3639 }, { "epoch": 0.23, "grad_norm": 1.0525169372558594, "learning_rate": 8.98145000132923e-06, "loss": 0.5772, "step": 3640 }, { "epoch": 0.23, "grad_norm": 1.0494791269302368, "learning_rate": 8.980829278476711e-06, "loss": 0.5476, "step": 3641 }, { "epoch": 0.23, "grad_norm": 1.0154716968536377, "learning_rate": 8.980208388005318e-06, "loss": 0.5468, "step": 3642 }, { "epoch": 0.23, "grad_norm": 0.9966602325439453, "learning_rate": 8.979587329941197e-06, "loss": 0.5753, "step": 3643 }, { "epoch": 0.23, "grad_norm": 1.0978286266326904, "learning_rate": 8.978966104310497e-06, "loss": 0.6039, "step": 3644 }, { "epoch": 0.23, "grad_norm": 0.9729987978935242, "learning_rate": 8.978344711139374e-06, "loss": 0.5369, "step": 3645 }, { "epoch": 0.23, "grad_norm": 1.1959797143936157, "learning_rate": 8.977723150453999e-06, "loss": 0.5857, "step": 3646 }, { "epoch": 0.23, "grad_norm": 1.0096715688705444, "learning_rate": 8.977101422280536e-06, "loss": 0.5511, "step": 3647 }, { "epoch": 0.23, "grad_norm": 0.9775328040122986, "learning_rate": 8.97647952664517e-06, "loss": 0.517, "step": 3648 }, { "epoch": 0.23, "grad_norm": 0.944109320640564, "learning_rate": 8.975857463574082e-06, "loss": 0.4827, "step": 3649 }, { "epoch": 0.23, "grad_norm": 1.0979822874069214, "learning_rate": 8.97523523309347e-06, "loss": 0.5146, "step": 3650 }, { "epoch": 0.23, "grad_norm": 1.0472910404205322, "learning_rate": 8.974612835229528e-06, "loss": 0.5592, "step": 3651 }, { "epoch": 0.23, "grad_norm": 0.9543821811676025, "learning_rate": 8.973990270008467e-06, "loss": 0.5554, "step": 3652 }, { "epoch": 0.23, "grad_norm": 0.9596877694129944, "learning_rate": 8.973367537456502e-06, "loss": 0.5711, "step": 3653 }, { "epoch": 0.23, "grad_norm": 1.1064952611923218, "learning_rate": 8.97274463759985e-06, "loss": 0.6165, "step": 3654 }, { "epoch": 0.23, "grad_norm": 1.1261231899261475, "learning_rate": 8.972121570464744e-06, "loss": 0.5903, "step": 3655 }, { "epoch": 0.23, "grad_norm": 1.0051484107971191, "learning_rate": 8.971498336077415e-06, "loss": 0.5356, "step": 3656 }, { "epoch": 0.23, "grad_norm": 1.0241944789886475, "learning_rate": 8.970874934464108e-06, "loss": 0.532, "step": 3657 }, { "epoch": 0.23, "grad_norm": 1.0307494401931763, "learning_rate": 8.970251365651071e-06, "loss": 0.5779, "step": 3658 }, { "epoch": 0.23, "grad_norm": 1.0298916101455688, "learning_rate": 8.969627629664559e-06, "loss": 0.4963, "step": 3659 }, { "epoch": 0.23, "grad_norm": 0.992840051651001, "learning_rate": 8.969003726530838e-06, "loss": 0.5462, "step": 3660 }, { "epoch": 0.23, "grad_norm": 0.9768746495246887, "learning_rate": 8.968379656276177e-06, "loss": 0.5891, "step": 3661 }, { "epoch": 0.23, "grad_norm": 1.003993034362793, "learning_rate": 8.967755418926854e-06, "loss": 0.5489, "step": 3662 }, { "epoch": 0.23, "grad_norm": 1.04116690158844, "learning_rate": 8.967131014509152e-06, "loss": 0.5331, "step": 3663 }, { "epoch": 0.23, "grad_norm": 1.098099708557129, "learning_rate": 8.966506443049366e-06, "loss": 0.5863, "step": 3664 }, { "epoch": 0.23, "grad_norm": 1.0058609247207642, "learning_rate": 8.965881704573789e-06, "loss": 0.5659, "step": 3665 }, { "epoch": 0.23, "grad_norm": 1.147666573524475, "learning_rate": 8.965256799108733e-06, "loss": 0.5475, "step": 3666 }, { "epoch": 0.23, "grad_norm": 1.0394258499145508, "learning_rate": 8.964631726680504e-06, "loss": 0.591, "step": 3667 }, { "epoch": 0.23, "grad_norm": 1.0518441200256348, "learning_rate": 8.964006487315426e-06, "loss": 0.6134, "step": 3668 }, { "epoch": 0.23, "grad_norm": 1.028096318244934, "learning_rate": 8.963381081039826e-06, "loss": 0.5475, "step": 3669 }, { "epoch": 0.23, "grad_norm": 0.9712651371955872, "learning_rate": 8.962755507880036e-06, "loss": 0.5356, "step": 3670 }, { "epoch": 0.23, "grad_norm": 1.001176357269287, "learning_rate": 8.962129767862395e-06, "loss": 0.5402, "step": 3671 }, { "epoch": 0.23, "grad_norm": 0.9954277276992798, "learning_rate": 8.961503861013255e-06, "loss": 0.5845, "step": 3672 }, { "epoch": 0.23, "grad_norm": 1.0858553647994995, "learning_rate": 8.960877787358968e-06, "loss": 0.547, "step": 3673 }, { "epoch": 0.23, "grad_norm": 1.068434476852417, "learning_rate": 8.960251546925895e-06, "loss": 0.572, "step": 3674 }, { "epoch": 0.23, "grad_norm": 0.898658812046051, "learning_rate": 8.959625139740407e-06, "loss": 0.4826, "step": 3675 }, { "epoch": 0.23, "grad_norm": 1.0096508264541626, "learning_rate": 8.95899856582888e-06, "loss": 0.5776, "step": 3676 }, { "epoch": 0.23, "grad_norm": 0.932982325553894, "learning_rate": 8.958371825217693e-06, "loss": 0.5157, "step": 3677 }, { "epoch": 0.23, "grad_norm": 1.0382206439971924, "learning_rate": 8.957744917933241e-06, "loss": 0.5287, "step": 3678 }, { "epoch": 0.23, "grad_norm": 1.0119495391845703, "learning_rate": 8.957117844001919e-06, "loss": 0.5399, "step": 3679 }, { "epoch": 0.23, "grad_norm": 1.061253547668457, "learning_rate": 8.956490603450128e-06, "loss": 0.5403, "step": 3680 }, { "epoch": 0.23, "grad_norm": 1.0558768510818481, "learning_rate": 8.955863196304282e-06, "loss": 0.5512, "step": 3681 }, { "epoch": 0.23, "grad_norm": 1.0816651582717896, "learning_rate": 8.9552356225908e-06, "loss": 0.5424, "step": 3682 }, { "epoch": 0.23, "grad_norm": 0.955443799495697, "learning_rate": 8.954607882336105e-06, "loss": 0.5196, "step": 3683 }, { "epoch": 0.23, "grad_norm": 1.108539342880249, "learning_rate": 8.953979975566626e-06, "loss": 0.5846, "step": 3684 }, { "epoch": 0.23, "grad_norm": 1.037209391593933, "learning_rate": 8.953351902308807e-06, "loss": 0.5725, "step": 3685 }, { "epoch": 0.23, "grad_norm": 1.0951324701309204, "learning_rate": 8.952723662589093e-06, "loss": 0.5924, "step": 3686 }, { "epoch": 0.23, "grad_norm": 1.0157463550567627, "learning_rate": 8.952095256433934e-06, "loss": 0.5943, "step": 3687 }, { "epoch": 0.23, "grad_norm": 0.954087495803833, "learning_rate": 8.951466683869795e-06, "loss": 0.4879, "step": 3688 }, { "epoch": 0.23, "grad_norm": 1.0344793796539307, "learning_rate": 8.950837944923138e-06, "loss": 0.525, "step": 3689 }, { "epoch": 0.23, "grad_norm": 1.050633192062378, "learning_rate": 8.95020903962044e-06, "loss": 0.5889, "step": 3690 }, { "epoch": 0.23, "grad_norm": 1.2160776853561401, "learning_rate": 8.94957996798818e-06, "loss": 0.5498, "step": 3691 }, { "epoch": 0.23, "grad_norm": 0.988061249256134, "learning_rate": 8.948950730052847e-06, "loss": 0.5514, "step": 3692 }, { "epoch": 0.23, "grad_norm": 1.0714434385299683, "learning_rate": 8.948321325840937e-06, "loss": 0.6054, "step": 3693 }, { "epoch": 0.23, "grad_norm": 1.0225694179534912, "learning_rate": 8.94769175537895e-06, "loss": 0.5472, "step": 3694 }, { "epoch": 0.23, "grad_norm": 1.0245158672332764, "learning_rate": 8.9470620186934e-06, "loss": 0.5868, "step": 3695 }, { "epoch": 0.23, "grad_norm": 1.0072945356369019, "learning_rate": 8.946432115810795e-06, "loss": 0.5886, "step": 3696 }, { "epoch": 0.23, "grad_norm": 1.1089059114456177, "learning_rate": 8.945802046757666e-06, "loss": 0.6025, "step": 3697 }, { "epoch": 0.23, "grad_norm": 1.119722604751587, "learning_rate": 8.945171811560535e-06, "loss": 0.5642, "step": 3698 }, { "epoch": 0.23, "grad_norm": 1.1201279163360596, "learning_rate": 8.944541410245947e-06, "loss": 0.5244, "step": 3699 }, { "epoch": 0.23, "grad_norm": 1.0516458749771118, "learning_rate": 8.943910842840439e-06, "loss": 0.5755, "step": 3700 }, { "epoch": 0.23, "grad_norm": 1.0389517545700073, "learning_rate": 8.943280109370568e-06, "loss": 0.5597, "step": 3701 }, { "epoch": 0.23, "grad_norm": 0.9651356935501099, "learning_rate": 8.942649209862888e-06, "loss": 0.5182, "step": 3702 }, { "epoch": 0.23, "grad_norm": 0.9806023240089417, "learning_rate": 8.942018144343965e-06, "loss": 0.5615, "step": 3703 }, { "epoch": 0.23, "grad_norm": 1.080400824546814, "learning_rate": 8.941386912840372e-06, "loss": 0.5771, "step": 3704 }, { "epoch": 0.23, "grad_norm": 0.9524903893470764, "learning_rate": 8.940755515378687e-06, "loss": 0.5383, "step": 3705 }, { "epoch": 0.23, "grad_norm": 1.0646077394485474, "learning_rate": 8.940123951985495e-06, "loss": 0.547, "step": 3706 }, { "epoch": 0.23, "grad_norm": 1.1073490381240845, "learning_rate": 8.939492222687392e-06, "loss": 0.5689, "step": 3707 }, { "epoch": 0.23, "grad_norm": 1.01206374168396, "learning_rate": 8.938860327510975e-06, "loss": 0.5544, "step": 3708 }, { "epoch": 0.23, "grad_norm": 1.0279171466827393, "learning_rate": 8.938228266482852e-06, "loss": 0.5748, "step": 3709 }, { "epoch": 0.24, "grad_norm": 1.0886095762252808, "learning_rate": 8.937596039629637e-06, "loss": 0.5399, "step": 3710 }, { "epoch": 0.24, "grad_norm": 1.0480284690856934, "learning_rate": 8.93696364697795e-06, "loss": 0.566, "step": 3711 }, { "epoch": 0.24, "grad_norm": 0.9844214916229248, "learning_rate": 8.936331088554419e-06, "loss": 0.5417, "step": 3712 }, { "epoch": 0.24, "grad_norm": 0.9495744705200195, "learning_rate": 8.93569836438568e-06, "loss": 0.5301, "step": 3713 }, { "epoch": 0.24, "grad_norm": 1.0315167903900146, "learning_rate": 8.935065474498375e-06, "loss": 0.543, "step": 3714 }, { "epoch": 0.24, "grad_norm": 1.0174928903579712, "learning_rate": 8.934432418919153e-06, "loss": 0.5741, "step": 3715 }, { "epoch": 0.24, "grad_norm": 1.0040485858917236, "learning_rate": 8.933799197674667e-06, "loss": 0.537, "step": 3716 }, { "epoch": 0.24, "grad_norm": 1.072627305984497, "learning_rate": 8.933165810791579e-06, "loss": 0.5555, "step": 3717 }, { "epoch": 0.24, "grad_norm": 0.9701638221740723, "learning_rate": 8.932532258296565e-06, "loss": 0.5739, "step": 3718 }, { "epoch": 0.24, "grad_norm": 1.0700533390045166, "learning_rate": 8.931898540216297e-06, "loss": 0.5862, "step": 3719 }, { "epoch": 0.24, "grad_norm": 1.0565009117126465, "learning_rate": 8.931264656577459e-06, "loss": 0.5621, "step": 3720 }, { "epoch": 0.24, "grad_norm": 1.0032659769058228, "learning_rate": 8.930630607406743e-06, "loss": 0.5661, "step": 3721 }, { "epoch": 0.24, "grad_norm": 1.0463674068450928, "learning_rate": 8.929996392730844e-06, "loss": 0.5696, "step": 3722 }, { "epoch": 0.24, "grad_norm": 1.0808651447296143, "learning_rate": 8.92936201257647e-06, "loss": 0.5766, "step": 3723 }, { "epoch": 0.24, "grad_norm": 1.0453593730926514, "learning_rate": 8.928727466970331e-06, "loss": 0.5321, "step": 3724 }, { "epoch": 0.24, "grad_norm": 1.1240564584732056, "learning_rate": 8.928092755939145e-06, "loss": 0.5606, "step": 3725 }, { "epoch": 0.24, "grad_norm": 1.1166630983352661, "learning_rate": 8.927457879509638e-06, "loss": 0.5597, "step": 3726 }, { "epoch": 0.24, "grad_norm": 1.0657453536987305, "learning_rate": 8.926822837708542e-06, "loss": 0.5721, "step": 3727 }, { "epoch": 0.24, "grad_norm": 0.9846172332763672, "learning_rate": 8.926187630562597e-06, "loss": 0.5524, "step": 3728 }, { "epoch": 0.24, "grad_norm": 1.0242836475372314, "learning_rate": 8.925552258098549e-06, "loss": 0.5224, "step": 3729 }, { "epoch": 0.24, "grad_norm": 1.0484217405319214, "learning_rate": 8.924916720343151e-06, "loss": 0.5579, "step": 3730 }, { "epoch": 0.24, "grad_norm": 1.1069062948226929, "learning_rate": 8.924281017323164e-06, "loss": 0.5332, "step": 3731 }, { "epoch": 0.24, "grad_norm": 0.9859890341758728, "learning_rate": 8.923645149065354e-06, "loss": 0.5214, "step": 3732 }, { "epoch": 0.24, "grad_norm": 1.0303205251693726, "learning_rate": 8.923009115596498e-06, "loss": 0.5266, "step": 3733 }, { "epoch": 0.24, "grad_norm": 0.9743627309799194, "learning_rate": 8.922372916943374e-06, "loss": 0.5929, "step": 3734 }, { "epoch": 0.24, "grad_norm": 1.0966832637786865, "learning_rate": 8.921736553132772e-06, "loss": 0.5618, "step": 3735 }, { "epoch": 0.24, "grad_norm": 1.0996084213256836, "learning_rate": 8.921100024191486e-06, "loss": 0.5385, "step": 3736 }, { "epoch": 0.24, "grad_norm": 1.0357041358947754, "learning_rate": 8.920463330146318e-06, "loss": 0.5574, "step": 3737 }, { "epoch": 0.24, "grad_norm": 1.0976433753967285, "learning_rate": 8.919826471024078e-06, "loss": 0.5603, "step": 3738 }, { "epoch": 0.24, "grad_norm": 1.055237054824829, "learning_rate": 8.919189446851583e-06, "loss": 0.5319, "step": 3739 }, { "epoch": 0.24, "grad_norm": 1.0910391807556152, "learning_rate": 8.918552257655652e-06, "loss": 0.5432, "step": 3740 }, { "epoch": 0.24, "grad_norm": 1.0958404541015625, "learning_rate": 8.917914903463119e-06, "loss": 0.6082, "step": 3741 }, { "epoch": 0.24, "grad_norm": 1.0359296798706055, "learning_rate": 8.917277384300817e-06, "loss": 0.5747, "step": 3742 }, { "epoch": 0.24, "grad_norm": 0.9910179376602173, "learning_rate": 8.916639700195593e-06, "loss": 0.5327, "step": 3743 }, { "epoch": 0.24, "grad_norm": 1.0079916715621948, "learning_rate": 8.916001851174296e-06, "loss": 0.5484, "step": 3744 }, { "epoch": 0.24, "grad_norm": 1.047631859779358, "learning_rate": 8.915363837263782e-06, "loss": 0.5795, "step": 3745 }, { "epoch": 0.24, "grad_norm": 0.9888326525688171, "learning_rate": 8.91472565849092e-06, "loss": 0.5323, "step": 3746 }, { "epoch": 0.24, "grad_norm": 0.9907726049423218, "learning_rate": 8.914087314882578e-06, "loss": 0.5223, "step": 3747 }, { "epoch": 0.24, "grad_norm": 1.0509141683578491, "learning_rate": 8.913448806465634e-06, "loss": 0.5716, "step": 3748 }, { "epoch": 0.24, "grad_norm": 0.9748091697692871, "learning_rate": 8.912810133266976e-06, "loss": 0.5324, "step": 3749 }, { "epoch": 0.24, "grad_norm": 1.0037689208984375, "learning_rate": 8.912171295313493e-06, "loss": 0.5359, "step": 3750 }, { "epoch": 0.24, "grad_norm": 0.970689058303833, "learning_rate": 8.911532292632089e-06, "loss": 0.5731, "step": 3751 }, { "epoch": 0.24, "grad_norm": 1.1158560514450073, "learning_rate": 8.910893125249666e-06, "loss": 0.5589, "step": 3752 }, { "epoch": 0.24, "grad_norm": 1.082456350326538, "learning_rate": 8.91025379319314e-06, "loss": 0.5883, "step": 3753 }, { "epoch": 0.24, "grad_norm": 0.9528515338897705, "learning_rate": 8.909614296489428e-06, "loss": 0.5179, "step": 3754 }, { "epoch": 0.24, "grad_norm": 1.0401341915130615, "learning_rate": 8.908974635165458e-06, "loss": 0.5784, "step": 3755 }, { "epoch": 0.24, "grad_norm": 0.9220054745674133, "learning_rate": 8.908334809248165e-06, "loss": 0.5363, "step": 3756 }, { "epoch": 0.24, "grad_norm": 1.0673452615737915, "learning_rate": 8.90769481876449e-06, "loss": 0.6093, "step": 3757 }, { "epoch": 0.24, "grad_norm": 1.0971558094024658, "learning_rate": 8.90705466374138e-06, "loss": 0.5634, "step": 3758 }, { "epoch": 0.24, "grad_norm": 1.1336888074874878, "learning_rate": 8.906414344205789e-06, "loss": 0.5662, "step": 3759 }, { "epoch": 0.24, "grad_norm": 1.0179674625396729, "learning_rate": 8.905773860184679e-06, "loss": 0.5693, "step": 3760 }, { "epoch": 0.24, "grad_norm": 0.9385787844657898, "learning_rate": 8.905133211705019e-06, "loss": 0.5391, "step": 3761 }, { "epoch": 0.24, "grad_norm": 0.9712759256362915, "learning_rate": 8.904492398793785e-06, "loss": 0.5767, "step": 3762 }, { "epoch": 0.24, "grad_norm": 0.9476292729377747, "learning_rate": 8.903851421477959e-06, "loss": 0.5587, "step": 3763 }, { "epoch": 0.24, "grad_norm": 0.9753716588020325, "learning_rate": 8.90321027978453e-06, "loss": 0.5619, "step": 3764 }, { "epoch": 0.24, "grad_norm": 1.0432862043380737, "learning_rate": 8.902568973740495e-06, "loss": 0.5288, "step": 3765 }, { "epoch": 0.24, "grad_norm": 1.0975617170333862, "learning_rate": 8.901927503372855e-06, "loss": 0.5406, "step": 3766 }, { "epoch": 0.24, "grad_norm": 1.0205985307693481, "learning_rate": 8.901285868708622e-06, "loss": 0.5532, "step": 3767 }, { "epoch": 0.24, "grad_norm": 0.9458399415016174, "learning_rate": 8.900644069774815e-06, "loss": 0.5218, "step": 3768 }, { "epoch": 0.24, "grad_norm": 1.0280425548553467, "learning_rate": 8.900002106598453e-06, "loss": 0.5456, "step": 3769 }, { "epoch": 0.24, "grad_norm": 1.0532450675964355, "learning_rate": 8.89935997920657e-06, "loss": 0.5718, "step": 3770 }, { "epoch": 0.24, "grad_norm": 1.1915584802627563, "learning_rate": 8.898717687626203e-06, "loss": 0.5831, "step": 3771 }, { "epoch": 0.24, "grad_norm": 1.0443774461746216, "learning_rate": 8.898075231884397e-06, "loss": 0.5566, "step": 3772 }, { "epoch": 0.24, "grad_norm": 0.9577083587646484, "learning_rate": 8.897432612008206e-06, "loss": 0.5656, "step": 3773 }, { "epoch": 0.24, "grad_norm": 0.9909464120864868, "learning_rate": 8.896789828024682e-06, "loss": 0.5585, "step": 3774 }, { "epoch": 0.24, "grad_norm": 1.0283769369125366, "learning_rate": 8.896146879960896e-06, "loss": 0.5992, "step": 3775 }, { "epoch": 0.24, "grad_norm": 1.1171537637710571, "learning_rate": 8.895503767843918e-06, "loss": 0.5796, "step": 3776 }, { "epoch": 0.24, "grad_norm": 1.1655315160751343, "learning_rate": 8.89486049170083e-06, "loss": 0.5953, "step": 3777 }, { "epoch": 0.24, "grad_norm": 0.9457029104232788, "learning_rate": 8.894217051558713e-06, "loss": 0.5288, "step": 3778 }, { "epoch": 0.24, "grad_norm": 0.9799229502677917, "learning_rate": 8.893573447444663e-06, "loss": 0.5732, "step": 3779 }, { "epoch": 0.24, "grad_norm": 1.0082273483276367, "learning_rate": 8.892929679385783e-06, "loss": 0.5022, "step": 3780 }, { "epoch": 0.24, "grad_norm": 1.0606393814086914, "learning_rate": 8.892285747409172e-06, "loss": 0.5212, "step": 3781 }, { "epoch": 0.24, "grad_norm": 1.0303373336791992, "learning_rate": 8.891641651541953e-06, "loss": 0.5262, "step": 3782 }, { "epoch": 0.24, "grad_norm": 1.126741647720337, "learning_rate": 8.89099739181124e-06, "loss": 0.5396, "step": 3783 }, { "epoch": 0.24, "grad_norm": 1.0784149169921875, "learning_rate": 8.890352968244162e-06, "loss": 0.5377, "step": 3784 }, { "epoch": 0.24, "grad_norm": 1.032558798789978, "learning_rate": 8.889708380867856e-06, "loss": 0.5923, "step": 3785 }, { "epoch": 0.24, "grad_norm": 0.9988136291503906, "learning_rate": 8.88906362970946e-06, "loss": 0.5248, "step": 3786 }, { "epoch": 0.24, "grad_norm": 0.9523390531539917, "learning_rate": 8.888418714796124e-06, "loss": 0.556, "step": 3787 }, { "epoch": 0.24, "grad_norm": 1.0088428258895874, "learning_rate": 8.887773636155002e-06, "loss": 0.5189, "step": 3788 }, { "epoch": 0.24, "grad_norm": 1.0588726997375488, "learning_rate": 8.887128393813257e-06, "loss": 0.5795, "step": 3789 }, { "epoch": 0.24, "grad_norm": 1.0953381061553955, "learning_rate": 8.886482987798059e-06, "loss": 0.5686, "step": 3790 }, { "epoch": 0.24, "grad_norm": 1.1218312978744507, "learning_rate": 8.885837418136581e-06, "loss": 0.534, "step": 3791 }, { "epoch": 0.24, "grad_norm": 1.00978422164917, "learning_rate": 8.885191684856007e-06, "loss": 0.4912, "step": 3792 }, { "epoch": 0.24, "grad_norm": 1.0210072994232178, "learning_rate": 8.884545787983528e-06, "loss": 0.5406, "step": 3793 }, { "epoch": 0.24, "grad_norm": 1.0807887315750122, "learning_rate": 8.88389972754634e-06, "loss": 0.5797, "step": 3794 }, { "epoch": 0.24, "grad_norm": 1.090825080871582, "learning_rate": 8.883253503571643e-06, "loss": 0.5606, "step": 3795 }, { "epoch": 0.24, "grad_norm": 1.0245369672775269, "learning_rate": 8.882607116086651e-06, "loss": 0.5506, "step": 3796 }, { "epoch": 0.24, "grad_norm": 0.9361253380775452, "learning_rate": 8.881960565118581e-06, "loss": 0.4825, "step": 3797 }, { "epoch": 0.24, "grad_norm": 1.1194905042648315, "learning_rate": 8.881313850694653e-06, "loss": 0.5873, "step": 3798 }, { "epoch": 0.24, "grad_norm": 1.1902475357055664, "learning_rate": 8.880666972842105e-06, "loss": 0.5689, "step": 3799 }, { "epoch": 0.24, "grad_norm": 0.98819500207901, "learning_rate": 8.880019931588167e-06, "loss": 0.5874, "step": 3800 }, { "epoch": 0.24, "grad_norm": 1.106116771697998, "learning_rate": 8.87937272696009e-06, "loss": 0.5685, "step": 3801 }, { "epoch": 0.24, "grad_norm": 1.007765531539917, "learning_rate": 8.878725358985121e-06, "loss": 0.5097, "step": 3802 }, { "epoch": 0.24, "grad_norm": 1.0698480606079102, "learning_rate": 8.87807782769052e-06, "loss": 0.6116, "step": 3803 }, { "epoch": 0.24, "grad_norm": 1.0733602046966553, "learning_rate": 8.877430133103555e-06, "loss": 0.5521, "step": 3804 }, { "epoch": 0.24, "grad_norm": 1.0455069541931152, "learning_rate": 8.876782275251491e-06, "loss": 0.5661, "step": 3805 }, { "epoch": 0.24, "grad_norm": 1.0989000797271729, "learning_rate": 8.876134254161617e-06, "loss": 0.5475, "step": 3806 }, { "epoch": 0.24, "grad_norm": 1.0597692728042603, "learning_rate": 8.87548606986121e-06, "loss": 0.5844, "step": 3807 }, { "epoch": 0.24, "grad_norm": 1.148749589920044, "learning_rate": 8.874837722377568e-06, "loss": 0.5301, "step": 3808 }, { "epoch": 0.24, "grad_norm": 1.0871633291244507, "learning_rate": 8.87418921173799e-06, "loss": 0.5459, "step": 3809 }, { "epoch": 0.24, "grad_norm": 0.9634197354316711, "learning_rate": 8.87354053796978e-06, "loss": 0.5407, "step": 3810 }, { "epoch": 0.24, "grad_norm": 0.9858682155609131, "learning_rate": 8.872891701100253e-06, "loss": 0.5547, "step": 3811 }, { "epoch": 0.24, "grad_norm": 1.064706563949585, "learning_rate": 8.872242701156731e-06, "loss": 0.5557, "step": 3812 }, { "epoch": 0.24, "grad_norm": 1.0465726852416992, "learning_rate": 8.871593538166538e-06, "loss": 0.5593, "step": 3813 }, { "epoch": 0.24, "grad_norm": 1.292360544204712, "learning_rate": 8.870944212157008e-06, "loss": 0.5751, "step": 3814 }, { "epoch": 0.24, "grad_norm": 1.0987080335617065, "learning_rate": 8.870294723155486e-06, "loss": 0.5797, "step": 3815 }, { "epoch": 0.24, "grad_norm": 1.0372346639633179, "learning_rate": 8.869645071189316e-06, "loss": 0.5913, "step": 3816 }, { "epoch": 0.24, "grad_norm": 1.109615445137024, "learning_rate": 8.868995256285853e-06, "loss": 0.5662, "step": 3817 }, { "epoch": 0.24, "grad_norm": 1.04058837890625, "learning_rate": 8.868345278472458e-06, "loss": 0.5612, "step": 3818 }, { "epoch": 0.24, "grad_norm": 1.0476531982421875, "learning_rate": 8.867695137776503e-06, "loss": 0.5716, "step": 3819 }, { "epoch": 0.24, "grad_norm": 1.0954654216766357, "learning_rate": 8.86704483422536e-06, "loss": 0.5752, "step": 3820 }, { "epoch": 0.24, "grad_norm": 1.0084110498428345, "learning_rate": 8.86639436784641e-06, "loss": 0.5679, "step": 3821 }, { "epoch": 0.24, "grad_norm": 0.9866928458213806, "learning_rate": 8.865743738667045e-06, "loss": 0.4968, "step": 3822 }, { "epoch": 0.24, "grad_norm": 0.9854780435562134, "learning_rate": 8.865092946714657e-06, "loss": 0.5099, "step": 3823 }, { "epoch": 0.24, "grad_norm": 0.969935953617096, "learning_rate": 8.864441992016653e-06, "loss": 0.5269, "step": 3824 }, { "epoch": 0.24, "grad_norm": 1.0676237344741821, "learning_rate": 8.863790874600438e-06, "loss": 0.547, "step": 3825 }, { "epoch": 0.24, "grad_norm": 0.9443878531455994, "learning_rate": 8.863139594493432e-06, "loss": 0.5228, "step": 3826 }, { "epoch": 0.24, "grad_norm": 1.0739343166351318, "learning_rate": 8.862488151723055e-06, "loss": 0.5478, "step": 3827 }, { "epoch": 0.24, "grad_norm": 1.0631316900253296, "learning_rate": 8.86183654631674e-06, "loss": 0.5672, "step": 3828 }, { "epoch": 0.24, "grad_norm": 0.9959419965744019, "learning_rate": 8.861184778301921e-06, "loss": 0.5609, "step": 3829 }, { "epoch": 0.24, "grad_norm": 1.0151301622390747, "learning_rate": 8.860532847706046e-06, "loss": 0.5751, "step": 3830 }, { "epoch": 0.24, "grad_norm": 1.0595030784606934, "learning_rate": 8.85988075455656e-06, "loss": 0.5408, "step": 3831 }, { "epoch": 0.24, "grad_norm": 1.0776829719543457, "learning_rate": 8.859228498880923e-06, "loss": 0.5859, "step": 3832 }, { "epoch": 0.24, "grad_norm": 1.0213021039962769, "learning_rate": 8.8585760807066e-06, "loss": 0.5698, "step": 3833 }, { "epoch": 0.24, "grad_norm": 1.074037790298462, "learning_rate": 8.85792350006106e-06, "loss": 0.6023, "step": 3834 }, { "epoch": 0.24, "grad_norm": 0.9719785451889038, "learning_rate": 8.857270756971785e-06, "loss": 0.54, "step": 3835 }, { "epoch": 0.24, "grad_norm": 1.0435043573379517, "learning_rate": 8.856617851466254e-06, "loss": 0.5853, "step": 3836 }, { "epoch": 0.24, "grad_norm": 0.9525250792503357, "learning_rate": 8.855964783571963e-06, "loss": 0.5082, "step": 3837 }, { "epoch": 0.24, "grad_norm": 1.0572185516357422, "learning_rate": 8.855311553316409e-06, "loss": 0.5296, "step": 3838 }, { "epoch": 0.24, "grad_norm": 1.005979061126709, "learning_rate": 8.854658160727096e-06, "loss": 0.5716, "step": 3839 }, { "epoch": 0.24, "grad_norm": 0.9383775591850281, "learning_rate": 8.854004605831536e-06, "loss": 0.4758, "step": 3840 }, { "epoch": 0.24, "grad_norm": 1.0103214979171753, "learning_rate": 8.853350888657251e-06, "loss": 0.5514, "step": 3841 }, { "epoch": 0.24, "grad_norm": 1.0398743152618408, "learning_rate": 8.852697009231766e-06, "loss": 0.5314, "step": 3842 }, { "epoch": 0.24, "grad_norm": 1.0169912576675415, "learning_rate": 8.852042967582611e-06, "loss": 0.5853, "step": 3843 }, { "epoch": 0.24, "grad_norm": 1.0007593631744385, "learning_rate": 8.851388763737328e-06, "loss": 0.4835, "step": 3844 }, { "epoch": 0.24, "grad_norm": 1.0083773136138916, "learning_rate": 8.850734397723461e-06, "loss": 0.5283, "step": 3845 }, { "epoch": 0.24, "grad_norm": 1.2991793155670166, "learning_rate": 8.850079869568565e-06, "loss": 0.594, "step": 3846 }, { "epoch": 0.24, "grad_norm": 0.9768350720405579, "learning_rate": 8.849425179300197e-06, "loss": 0.5486, "step": 3847 }, { "epoch": 0.24, "grad_norm": 1.0022505521774292, "learning_rate": 8.848770326945927e-06, "loss": 0.5329, "step": 3848 }, { "epoch": 0.24, "grad_norm": 1.0250089168548584, "learning_rate": 8.84811531253333e-06, "loss": 0.5185, "step": 3849 }, { "epoch": 0.24, "grad_norm": 1.1248204708099365, "learning_rate": 8.847460136089982e-06, "loss": 0.5634, "step": 3850 }, { "epoch": 0.24, "grad_norm": 1.0407178401947021, "learning_rate": 8.846804797643472e-06, "loss": 0.5423, "step": 3851 }, { "epoch": 0.24, "grad_norm": 0.9828975796699524, "learning_rate": 8.846149297221394e-06, "loss": 0.5327, "step": 3852 }, { "epoch": 0.24, "grad_norm": 1.0683937072753906, "learning_rate": 8.845493634851348e-06, "loss": 0.6222, "step": 3853 }, { "epoch": 0.24, "grad_norm": 0.9499751925468445, "learning_rate": 8.844837810560943e-06, "loss": 0.5243, "step": 3854 }, { "epoch": 0.24, "grad_norm": 1.0025689601898193, "learning_rate": 8.844181824377793e-06, "loss": 0.5128, "step": 3855 }, { "epoch": 0.24, "grad_norm": 0.9878833293914795, "learning_rate": 8.843525676329521e-06, "loss": 0.5791, "step": 3856 }, { "epoch": 0.24, "grad_norm": 0.9180898666381836, "learning_rate": 8.842869366443751e-06, "loss": 0.5351, "step": 3857 }, { "epoch": 0.24, "grad_norm": 0.9682977795600891, "learning_rate": 8.842212894748122e-06, "loss": 0.5325, "step": 3858 }, { "epoch": 0.24, "grad_norm": 0.9068602919578552, "learning_rate": 8.841556261270272e-06, "loss": 0.5159, "step": 3859 }, { "epoch": 0.24, "grad_norm": 1.0087989568710327, "learning_rate": 8.840899466037854e-06, "loss": 0.5818, "step": 3860 }, { "epoch": 0.24, "grad_norm": 0.9543938040733337, "learning_rate": 8.840242509078521e-06, "loss": 0.5371, "step": 3861 }, { "epoch": 0.24, "grad_norm": 0.9779382944107056, "learning_rate": 8.839585390419933e-06, "loss": 0.5227, "step": 3862 }, { "epoch": 0.24, "grad_norm": 0.9689169526100159, "learning_rate": 8.838928110089763e-06, "loss": 0.5444, "step": 3863 }, { "epoch": 0.24, "grad_norm": 1.093464970588684, "learning_rate": 8.838270668115685e-06, "loss": 0.584, "step": 3864 }, { "epoch": 0.24, "grad_norm": 0.9167064428329468, "learning_rate": 8.837613064525381e-06, "loss": 0.5368, "step": 3865 }, { "epoch": 0.24, "grad_norm": 1.0776047706604004, "learning_rate": 8.83695529934654e-06, "loss": 0.5563, "step": 3866 }, { "epoch": 0.24, "grad_norm": 0.9500616788864136, "learning_rate": 8.83629737260686e-06, "loss": 0.5357, "step": 3867 }, { "epoch": 0.25, "grad_norm": 1.008630633354187, "learning_rate": 8.835639284334043e-06, "loss": 0.5135, "step": 3868 }, { "epoch": 0.25, "grad_norm": 1.0061575174331665, "learning_rate": 8.834981034555799e-06, "loss": 0.5408, "step": 3869 }, { "epoch": 0.25, "grad_norm": 0.9785071611404419, "learning_rate": 8.834322623299844e-06, "loss": 0.5372, "step": 3870 }, { "epoch": 0.25, "grad_norm": 0.997992217540741, "learning_rate": 8.833664050593904e-06, "loss": 0.5252, "step": 3871 }, { "epoch": 0.25, "grad_norm": 1.0702146291732788, "learning_rate": 8.833005316465706e-06, "loss": 0.5157, "step": 3872 }, { "epoch": 0.25, "grad_norm": 0.964113712310791, "learning_rate": 8.832346420942987e-06, "loss": 0.5289, "step": 3873 }, { "epoch": 0.25, "grad_norm": 0.9863677620887756, "learning_rate": 8.831687364053493e-06, "loss": 0.5532, "step": 3874 }, { "epoch": 0.25, "grad_norm": 1.1146938800811768, "learning_rate": 8.831028145824974e-06, "loss": 0.5715, "step": 3875 }, { "epoch": 0.25, "grad_norm": 1.073601484298706, "learning_rate": 8.830368766285186e-06, "loss": 0.5765, "step": 3876 }, { "epoch": 0.25, "grad_norm": 1.0261064767837524, "learning_rate": 8.829709225461894e-06, "loss": 0.5395, "step": 3877 }, { "epoch": 0.25, "grad_norm": 1.164169192314148, "learning_rate": 8.829049523382871e-06, "loss": 0.5879, "step": 3878 }, { "epoch": 0.25, "grad_norm": 0.9541839361190796, "learning_rate": 8.828389660075891e-06, "loss": 0.5518, "step": 3879 }, { "epoch": 0.25, "grad_norm": 0.9897672533988953, "learning_rate": 8.82772963556874e-06, "loss": 0.5619, "step": 3880 }, { "epoch": 0.25, "grad_norm": 1.0605300664901733, "learning_rate": 8.827069449889211e-06, "loss": 0.5179, "step": 3881 }, { "epoch": 0.25, "grad_norm": 1.0233609676361084, "learning_rate": 8.8264091030651e-06, "loss": 0.555, "step": 3882 }, { "epoch": 0.25, "grad_norm": 0.999971866607666, "learning_rate": 8.825748595124214e-06, "loss": 0.5514, "step": 3883 }, { "epoch": 0.25, "grad_norm": 1.0390762090682983, "learning_rate": 8.825087926094363e-06, "loss": 0.52, "step": 3884 }, { "epoch": 0.25, "grad_norm": 0.9346777200698853, "learning_rate": 8.824427096003367e-06, "loss": 0.5183, "step": 3885 }, { "epoch": 0.25, "grad_norm": 1.0920510292053223, "learning_rate": 8.823766104879047e-06, "loss": 0.5254, "step": 3886 }, { "epoch": 0.25, "grad_norm": 0.9973437190055847, "learning_rate": 8.823104952749242e-06, "loss": 0.505, "step": 3887 }, { "epoch": 0.25, "grad_norm": 0.9639909267425537, "learning_rate": 8.822443639641785e-06, "loss": 0.4865, "step": 3888 }, { "epoch": 0.25, "grad_norm": 1.0465211868286133, "learning_rate": 8.821782165584524e-06, "loss": 0.5365, "step": 3889 }, { "epoch": 0.25, "grad_norm": 1.0439425706863403, "learning_rate": 8.82112053060531e-06, "loss": 0.5753, "step": 3890 }, { "epoch": 0.25, "grad_norm": 1.057376742362976, "learning_rate": 8.820458734732004e-06, "loss": 0.6169, "step": 3891 }, { "epoch": 0.25, "grad_norm": 1.064401626586914, "learning_rate": 8.819796777992471e-06, "loss": 0.5196, "step": 3892 }, { "epoch": 0.25, "grad_norm": 1.0528103113174438, "learning_rate": 8.819134660414585e-06, "loss": 0.5504, "step": 3893 }, { "epoch": 0.25, "grad_norm": 1.065921425819397, "learning_rate": 8.818472382026222e-06, "loss": 0.5123, "step": 3894 }, { "epoch": 0.25, "grad_norm": 0.990153968334198, "learning_rate": 8.817809942855272e-06, "loss": 0.524, "step": 3895 }, { "epoch": 0.25, "grad_norm": 0.9567554593086243, "learning_rate": 8.817147342929626e-06, "loss": 0.5169, "step": 3896 }, { "epoch": 0.25, "grad_norm": 1.169144630432129, "learning_rate": 8.816484582277184e-06, "loss": 0.5749, "step": 3897 }, { "epoch": 0.25, "grad_norm": 1.0559519529342651, "learning_rate": 8.815821660925853e-06, "loss": 0.5486, "step": 3898 }, { "epoch": 0.25, "grad_norm": 1.0077050924301147, "learning_rate": 8.815158578903548e-06, "loss": 0.5413, "step": 3899 }, { "epoch": 0.25, "grad_norm": 0.9848203659057617, "learning_rate": 8.814495336238185e-06, "loss": 0.5758, "step": 3900 }, { "epoch": 0.25, "grad_norm": 1.0258076190948486, "learning_rate": 8.813831932957696e-06, "loss": 0.5552, "step": 3901 }, { "epoch": 0.25, "grad_norm": 0.9313562512397766, "learning_rate": 8.813168369090007e-06, "loss": 0.4832, "step": 3902 }, { "epoch": 0.25, "grad_norm": 0.9199298620223999, "learning_rate": 8.812504644663066e-06, "loss": 0.5393, "step": 3903 }, { "epoch": 0.25, "grad_norm": 1.1476012468338013, "learning_rate": 8.811840759704816e-06, "loss": 0.5524, "step": 3904 }, { "epoch": 0.25, "grad_norm": 0.9469196200370789, "learning_rate": 8.811176714243213e-06, "loss": 0.5498, "step": 3905 }, { "epoch": 0.25, "grad_norm": 1.152643084526062, "learning_rate": 8.810512508306216e-06, "loss": 0.5241, "step": 3906 }, { "epoch": 0.25, "grad_norm": 0.9760724902153015, "learning_rate": 8.809848141921793e-06, "loss": 0.5174, "step": 3907 }, { "epoch": 0.25, "grad_norm": 1.0510609149932861, "learning_rate": 8.809183615117919e-06, "loss": 0.5741, "step": 3908 }, { "epoch": 0.25, "grad_norm": 1.0153599977493286, "learning_rate": 8.808518927922574e-06, "loss": 0.5575, "step": 3909 }, { "epoch": 0.25, "grad_norm": 1.0817402601242065, "learning_rate": 8.807854080363745e-06, "loss": 0.5589, "step": 3910 }, { "epoch": 0.25, "grad_norm": 1.1702064275741577, "learning_rate": 8.807189072469428e-06, "loss": 0.5604, "step": 3911 }, { "epoch": 0.25, "grad_norm": 0.9715337753295898, "learning_rate": 8.806523904267623e-06, "loss": 0.5388, "step": 3912 }, { "epoch": 0.25, "grad_norm": 1.10527765750885, "learning_rate": 8.80585857578634e-06, "loss": 0.588, "step": 3913 }, { "epoch": 0.25, "grad_norm": 0.9401335120201111, "learning_rate": 8.80519308705359e-06, "loss": 0.5018, "step": 3914 }, { "epoch": 0.25, "grad_norm": 1.0677719116210938, "learning_rate": 8.804527438097396e-06, "loss": 0.5637, "step": 3915 }, { "epoch": 0.25, "grad_norm": 1.0230120420455933, "learning_rate": 8.803861628945787e-06, "loss": 0.5323, "step": 3916 }, { "epoch": 0.25, "grad_norm": 1.1242260932922363, "learning_rate": 8.803195659626798e-06, "loss": 0.597, "step": 3917 }, { "epoch": 0.25, "grad_norm": 0.9725340604782104, "learning_rate": 8.802529530168469e-06, "loss": 0.5711, "step": 3918 }, { "epoch": 0.25, "grad_norm": 0.9721158742904663, "learning_rate": 8.801863240598851e-06, "loss": 0.532, "step": 3919 }, { "epoch": 0.25, "grad_norm": 1.0960687398910522, "learning_rate": 8.801196790945999e-06, "loss": 0.5299, "step": 3920 }, { "epoch": 0.25, "grad_norm": 1.0951040983200073, "learning_rate": 8.800530181237971e-06, "loss": 0.5936, "step": 3921 }, { "epoch": 0.25, "grad_norm": 1.0156009197235107, "learning_rate": 8.799863411502838e-06, "loss": 0.5214, "step": 3922 }, { "epoch": 0.25, "grad_norm": 0.9303261637687683, "learning_rate": 8.799196481768677e-06, "loss": 0.5371, "step": 3923 }, { "epoch": 0.25, "grad_norm": 1.001246452331543, "learning_rate": 8.798529392063569e-06, "loss": 0.5324, "step": 3924 }, { "epoch": 0.25, "grad_norm": 0.9814727902412415, "learning_rate": 8.7978621424156e-06, "loss": 0.515, "step": 3925 }, { "epoch": 0.25, "grad_norm": 1.0274420976638794, "learning_rate": 8.79719473285287e-06, "loss": 0.5823, "step": 3926 }, { "epoch": 0.25, "grad_norm": 1.0317124128341675, "learning_rate": 8.796527163403479e-06, "loss": 0.5664, "step": 3927 }, { "epoch": 0.25, "grad_norm": 1.1095083951950073, "learning_rate": 8.795859434095535e-06, "loss": 0.5479, "step": 3928 }, { "epoch": 0.25, "grad_norm": 1.0397136211395264, "learning_rate": 8.795191544957156e-06, "loss": 0.6244, "step": 3929 }, { "epoch": 0.25, "grad_norm": 1.0844511985778809, "learning_rate": 8.794523496016465e-06, "loss": 0.6134, "step": 3930 }, { "epoch": 0.25, "grad_norm": 1.0013247728347778, "learning_rate": 8.793855287301588e-06, "loss": 0.5852, "step": 3931 }, { "epoch": 0.25, "grad_norm": 0.9349052906036377, "learning_rate": 8.793186918840661e-06, "loss": 0.4861, "step": 3932 }, { "epoch": 0.25, "grad_norm": 1.006710171699524, "learning_rate": 8.792518390661831e-06, "loss": 0.552, "step": 3933 }, { "epoch": 0.25, "grad_norm": 1.0546201467514038, "learning_rate": 8.791849702793245e-06, "loss": 0.5596, "step": 3934 }, { "epoch": 0.25, "grad_norm": 1.0243421792984009, "learning_rate": 8.791180855263057e-06, "loss": 0.5695, "step": 3935 }, { "epoch": 0.25, "grad_norm": 1.0765653848648071, "learning_rate": 8.790511848099433e-06, "loss": 0.5773, "step": 3936 }, { "epoch": 0.25, "grad_norm": 0.9342508316040039, "learning_rate": 8.789842681330543e-06, "loss": 0.5546, "step": 3937 }, { "epoch": 0.25, "grad_norm": 1.0190470218658447, "learning_rate": 8.789173354984557e-06, "loss": 0.5331, "step": 3938 }, { "epoch": 0.25, "grad_norm": 1.0095438957214355, "learning_rate": 8.788503869089667e-06, "loss": 0.6117, "step": 3939 }, { "epoch": 0.25, "grad_norm": 1.0041183233261108, "learning_rate": 8.787834223674056e-06, "loss": 0.5361, "step": 3940 }, { "epoch": 0.25, "grad_norm": 1.0229700803756714, "learning_rate": 8.787164418765923e-06, "loss": 0.5201, "step": 3941 }, { "epoch": 0.25, "grad_norm": 1.0589138269424438, "learning_rate": 8.786494454393472e-06, "loss": 0.5675, "step": 3942 }, { "epoch": 0.25, "grad_norm": 1.0750982761383057, "learning_rate": 8.785824330584912e-06, "loss": 0.5354, "step": 3943 }, { "epoch": 0.25, "grad_norm": 1.0840270519256592, "learning_rate": 8.785154047368459e-06, "loss": 0.5619, "step": 3944 }, { "epoch": 0.25, "grad_norm": 1.048685073852539, "learning_rate": 8.784483604772336e-06, "loss": 0.5506, "step": 3945 }, { "epoch": 0.25, "grad_norm": 1.044411301612854, "learning_rate": 8.783813002824773e-06, "loss": 0.5588, "step": 3946 }, { "epoch": 0.25, "grad_norm": 1.0236300230026245, "learning_rate": 8.783142241554009e-06, "loss": 0.5374, "step": 3947 }, { "epoch": 0.25, "grad_norm": 1.0593035221099854, "learning_rate": 8.782471320988284e-06, "loss": 0.5684, "step": 3948 }, { "epoch": 0.25, "grad_norm": 0.996816873550415, "learning_rate": 8.781800241155851e-06, "loss": 0.5102, "step": 3949 }, { "epoch": 0.25, "grad_norm": 1.0247293710708618, "learning_rate": 8.781129002084965e-06, "loss": 0.5328, "step": 3950 }, { "epoch": 0.25, "grad_norm": 1.0288833379745483, "learning_rate": 8.780457603803892e-06, "loss": 0.5485, "step": 3951 }, { "epoch": 0.25, "grad_norm": 1.0378539562225342, "learning_rate": 8.779786046340898e-06, "loss": 0.5774, "step": 3952 }, { "epoch": 0.25, "grad_norm": 1.0052213668823242, "learning_rate": 8.779114329724265e-06, "loss": 0.5665, "step": 3953 }, { "epoch": 0.25, "grad_norm": 0.9967714548110962, "learning_rate": 8.778442453982272e-06, "loss": 0.5205, "step": 3954 }, { "epoch": 0.25, "grad_norm": 0.982059121131897, "learning_rate": 8.777770419143214e-06, "loss": 0.5836, "step": 3955 }, { "epoch": 0.25, "grad_norm": 1.0280338525772095, "learning_rate": 8.777098225235384e-06, "loss": 0.5844, "step": 3956 }, { "epoch": 0.25, "grad_norm": 1.0566469430923462, "learning_rate": 8.776425872287087e-06, "loss": 0.5494, "step": 3957 }, { "epoch": 0.25, "grad_norm": 1.08198881149292, "learning_rate": 8.775753360326635e-06, "loss": 0.5845, "step": 3958 }, { "epoch": 0.25, "grad_norm": 1.0469781160354614, "learning_rate": 8.775080689382342e-06, "loss": 0.5745, "step": 3959 }, { "epoch": 0.25, "grad_norm": 0.9827989935874939, "learning_rate": 8.774407859482537e-06, "loss": 0.5656, "step": 3960 }, { "epoch": 0.25, "grad_norm": 0.9459227919578552, "learning_rate": 8.773734870655544e-06, "loss": 0.5435, "step": 3961 }, { "epoch": 0.25, "grad_norm": 1.0413237810134888, "learning_rate": 8.773061722929704e-06, "loss": 0.5485, "step": 3962 }, { "epoch": 0.25, "grad_norm": 0.9192708730697632, "learning_rate": 8.772388416333361e-06, "loss": 0.4988, "step": 3963 }, { "epoch": 0.25, "grad_norm": 1.0438575744628906, "learning_rate": 8.771714950894865e-06, "loss": 0.5377, "step": 3964 }, { "epoch": 0.25, "grad_norm": 1.0168156623840332, "learning_rate": 8.771041326642572e-06, "loss": 0.5476, "step": 3965 }, { "epoch": 0.25, "grad_norm": 0.9964485168457031, "learning_rate": 8.770367543604849e-06, "loss": 0.5807, "step": 3966 }, { "epoch": 0.25, "grad_norm": 1.0404568910598755, "learning_rate": 8.769693601810066e-06, "loss": 0.5739, "step": 3967 }, { "epoch": 0.25, "grad_norm": 1.0175347328186035, "learning_rate": 8.769019501286598e-06, "loss": 0.5346, "step": 3968 }, { "epoch": 0.25, "grad_norm": 0.9991718530654907, "learning_rate": 8.768345242062828e-06, "loss": 0.5996, "step": 3969 }, { "epoch": 0.25, "grad_norm": 1.0009833574295044, "learning_rate": 8.767670824167151e-06, "loss": 0.6081, "step": 3970 }, { "epoch": 0.25, "grad_norm": 0.9895312786102295, "learning_rate": 8.766996247627963e-06, "loss": 0.5379, "step": 3971 }, { "epoch": 0.25, "grad_norm": 0.9684005379676819, "learning_rate": 8.766321512473666e-06, "loss": 0.5624, "step": 3972 }, { "epoch": 0.25, "grad_norm": 0.9689050912857056, "learning_rate": 8.765646618732672e-06, "loss": 0.5462, "step": 3973 }, { "epoch": 0.25, "grad_norm": 1.0098981857299805, "learning_rate": 8.7649715664334e-06, "loss": 0.5476, "step": 3974 }, { "epoch": 0.25, "grad_norm": 1.0482535362243652, "learning_rate": 8.764296355604273e-06, "loss": 0.5777, "step": 3975 }, { "epoch": 0.25, "grad_norm": 0.9891552329063416, "learning_rate": 8.76362098627372e-06, "loss": 0.5396, "step": 3976 }, { "epoch": 0.25, "grad_norm": 1.1055810451507568, "learning_rate": 8.76294545847018e-06, "loss": 0.6089, "step": 3977 }, { "epoch": 0.25, "grad_norm": 1.0154608488082886, "learning_rate": 8.762269772222099e-06, "loss": 0.5323, "step": 3978 }, { "epoch": 0.25, "grad_norm": 1.0198290348052979, "learning_rate": 8.761593927557923e-06, "loss": 0.5662, "step": 3979 }, { "epoch": 0.25, "grad_norm": 0.971450924873352, "learning_rate": 8.760917924506114e-06, "loss": 0.5111, "step": 3980 }, { "epoch": 0.25, "grad_norm": 1.1433659791946411, "learning_rate": 8.760241763095135e-06, "loss": 0.5972, "step": 3981 }, { "epoch": 0.25, "grad_norm": 1.1008565425872803, "learning_rate": 8.759565443353454e-06, "loss": 0.5831, "step": 3982 }, { "epoch": 0.25, "grad_norm": 1.2331944704055786, "learning_rate": 8.758888965309554e-06, "loss": 0.5825, "step": 3983 }, { "epoch": 0.25, "grad_norm": 1.0712262392044067, "learning_rate": 8.758212328991913e-06, "loss": 0.5521, "step": 3984 }, { "epoch": 0.25, "grad_norm": 1.0214992761611938, "learning_rate": 8.757535534429027e-06, "loss": 0.5681, "step": 3985 }, { "epoch": 0.25, "grad_norm": 1.002253532409668, "learning_rate": 8.756858581649391e-06, "loss": 0.5614, "step": 3986 }, { "epoch": 0.25, "grad_norm": 0.9840101599693298, "learning_rate": 8.756181470681507e-06, "loss": 0.5775, "step": 3987 }, { "epoch": 0.25, "grad_norm": 1.0549638271331787, "learning_rate": 8.755504201553889e-06, "loss": 0.5666, "step": 3988 }, { "epoch": 0.25, "grad_norm": 1.0759743452072144, "learning_rate": 8.754826774295056e-06, "loss": 0.5767, "step": 3989 }, { "epoch": 0.25, "grad_norm": 1.0006356239318848, "learning_rate": 8.754149188933527e-06, "loss": 0.532, "step": 3990 }, { "epoch": 0.25, "grad_norm": 1.030730128288269, "learning_rate": 8.753471445497837e-06, "loss": 0.4923, "step": 3991 }, { "epoch": 0.25, "grad_norm": 0.9614889025688171, "learning_rate": 8.752793544016519e-06, "loss": 0.5366, "step": 3992 }, { "epoch": 0.25, "grad_norm": 0.9809851050376892, "learning_rate": 8.752115484518123e-06, "loss": 0.5864, "step": 3993 }, { "epoch": 0.25, "grad_norm": 1.1068106889724731, "learning_rate": 8.751437267031194e-06, "loss": 0.5662, "step": 3994 }, { "epoch": 0.25, "grad_norm": 1.0743038654327393, "learning_rate": 8.750758891584293e-06, "loss": 0.5702, "step": 3995 }, { "epoch": 0.25, "grad_norm": 0.9942998290061951, "learning_rate": 8.750080358205983e-06, "loss": 0.5074, "step": 3996 }, { "epoch": 0.25, "grad_norm": 0.923958957195282, "learning_rate": 8.749401666924834e-06, "loss": 0.4981, "step": 3997 }, { "epoch": 0.25, "grad_norm": 0.925460934638977, "learning_rate": 8.748722817769426e-06, "loss": 0.5518, "step": 3998 }, { "epoch": 0.25, "grad_norm": 1.0104377269744873, "learning_rate": 8.74804381076834e-06, "loss": 0.5347, "step": 3999 }, { "epoch": 0.25, "grad_norm": 1.0946342945098877, "learning_rate": 8.747364645950168e-06, "loss": 0.5968, "step": 4000 }, { "epoch": 0.25, "grad_norm": 1.0404244661331177, "learning_rate": 8.746685323343507e-06, "loss": 0.5761, "step": 4001 }, { "epoch": 0.25, "grad_norm": 0.9306938648223877, "learning_rate": 8.74600584297696e-06, "loss": 0.5315, "step": 4002 }, { "epoch": 0.25, "grad_norm": 1.0647470951080322, "learning_rate": 8.745326204879139e-06, "loss": 0.5447, "step": 4003 }, { "epoch": 0.25, "grad_norm": 0.9752390384674072, "learning_rate": 8.74464640907866e-06, "loss": 0.5313, "step": 4004 }, { "epoch": 0.25, "grad_norm": 1.1666275262832642, "learning_rate": 8.743966455604147e-06, "loss": 0.5659, "step": 4005 }, { "epoch": 0.25, "grad_norm": 1.0324372053146362, "learning_rate": 8.743286344484232e-06, "loss": 0.4939, "step": 4006 }, { "epoch": 0.25, "grad_norm": 0.951132595539093, "learning_rate": 8.74260607574755e-06, "loss": 0.507, "step": 4007 }, { "epoch": 0.25, "grad_norm": 1.0060588121414185, "learning_rate": 8.741925649422746e-06, "loss": 0.5316, "step": 4008 }, { "epoch": 0.25, "grad_norm": 1.0284314155578613, "learning_rate": 8.741245065538471e-06, "loss": 0.5578, "step": 4009 }, { "epoch": 0.25, "grad_norm": 0.935452401638031, "learning_rate": 8.74056432412338e-06, "loss": 0.5039, "step": 4010 }, { "epoch": 0.25, "grad_norm": 0.9619759917259216, "learning_rate": 8.739883425206138e-06, "loss": 0.5432, "step": 4011 }, { "epoch": 0.25, "grad_norm": 1.0670369863510132, "learning_rate": 8.739202368815416e-06, "loss": 0.5651, "step": 4012 }, { "epoch": 0.25, "grad_norm": 1.0108201503753662, "learning_rate": 8.738521154979889e-06, "loss": 0.5562, "step": 4013 }, { "epoch": 0.25, "grad_norm": 0.9825449585914612, "learning_rate": 8.737839783728242e-06, "loss": 0.5479, "step": 4014 }, { "epoch": 0.25, "grad_norm": 0.9974722862243652, "learning_rate": 8.737158255089164e-06, "loss": 0.5501, "step": 4015 }, { "epoch": 0.25, "grad_norm": 0.9536095261573792, "learning_rate": 8.736476569091352e-06, "loss": 0.5288, "step": 4016 }, { "epoch": 0.25, "grad_norm": 0.9635430574417114, "learning_rate": 8.735794725763512e-06, "loss": 0.5658, "step": 4017 }, { "epoch": 0.25, "grad_norm": 1.066859245300293, "learning_rate": 8.735112725134352e-06, "loss": 0.5349, "step": 4018 }, { "epoch": 0.25, "grad_norm": 1.0659446716308594, "learning_rate": 8.734430567232585e-06, "loss": 0.5434, "step": 4019 }, { "epoch": 0.25, "grad_norm": 0.9656541347503662, "learning_rate": 8.733748252086943e-06, "loss": 0.5587, "step": 4020 }, { "epoch": 0.25, "grad_norm": 1.0233099460601807, "learning_rate": 8.733065779726146e-06, "loss": 0.6173, "step": 4021 }, { "epoch": 0.25, "grad_norm": 0.9754229187965393, "learning_rate": 8.732383150178938e-06, "loss": 0.5712, "step": 4022 }, { "epoch": 0.25, "grad_norm": 1.0898586511611938, "learning_rate": 8.73170036347406e-06, "loss": 0.6084, "step": 4023 }, { "epoch": 0.25, "grad_norm": 1.0394037961959839, "learning_rate": 8.731017419640261e-06, "loss": 0.5765, "step": 4024 }, { "epoch": 0.26, "grad_norm": 1.0213582515716553, "learning_rate": 8.730334318706297e-06, "loss": 0.5323, "step": 4025 }, { "epoch": 0.26, "grad_norm": 1.0330605506896973, "learning_rate": 8.729651060700932e-06, "loss": 0.5812, "step": 4026 }, { "epoch": 0.26, "grad_norm": 0.9591134786605835, "learning_rate": 8.728967645652936e-06, "loss": 0.5151, "step": 4027 }, { "epoch": 0.26, "grad_norm": 0.9901994466781616, "learning_rate": 8.728284073591083e-06, "loss": 0.5721, "step": 4028 }, { "epoch": 0.26, "grad_norm": 1.09280264377594, "learning_rate": 8.727600344544159e-06, "loss": 0.5826, "step": 4029 }, { "epoch": 0.26, "grad_norm": 0.9844223260879517, "learning_rate": 8.72691645854095e-06, "loss": 0.5586, "step": 4030 }, { "epoch": 0.26, "grad_norm": 1.013307809829712, "learning_rate": 8.726232415610257e-06, "loss": 0.5962, "step": 4031 }, { "epoch": 0.26, "grad_norm": 1.01223623752594, "learning_rate": 8.725548215780877e-06, "loss": 0.5316, "step": 4032 }, { "epoch": 0.26, "grad_norm": 1.052016019821167, "learning_rate": 8.724863859081622e-06, "loss": 0.5531, "step": 4033 }, { "epoch": 0.26, "grad_norm": 0.9758538007736206, "learning_rate": 8.724179345541308e-06, "loss": 0.5258, "step": 4034 }, { "epoch": 0.26, "grad_norm": 0.9941028356552124, "learning_rate": 8.72349467518876e-06, "loss": 0.6022, "step": 4035 }, { "epoch": 0.26, "grad_norm": 0.9477452039718628, "learning_rate": 8.7228098480528e-06, "loss": 0.5549, "step": 4036 }, { "epoch": 0.26, "grad_norm": 1.0785270929336548, "learning_rate": 8.72212486416227e-06, "loss": 0.5849, "step": 4037 }, { "epoch": 0.26, "grad_norm": 1.1175118684768677, "learning_rate": 8.721439723546012e-06, "loss": 0.6015, "step": 4038 }, { "epoch": 0.26, "grad_norm": 1.0209541320800781, "learning_rate": 8.720754426232871e-06, "loss": 0.5323, "step": 4039 }, { "epoch": 0.26, "grad_norm": 1.0251224040985107, "learning_rate": 8.720068972251705e-06, "loss": 0.5495, "step": 4040 }, { "epoch": 0.26, "grad_norm": 0.9551291465759277, "learning_rate": 8.719383361631376e-06, "loss": 0.5908, "step": 4041 }, { "epoch": 0.26, "grad_norm": 1.0323892831802368, "learning_rate": 8.718697594400753e-06, "loss": 0.5788, "step": 4042 }, { "epoch": 0.26, "grad_norm": 0.9632778167724609, "learning_rate": 8.71801167058871e-06, "loss": 0.5492, "step": 4043 }, { "epoch": 0.26, "grad_norm": 1.033238172531128, "learning_rate": 8.717325590224129e-06, "loss": 0.5496, "step": 4044 }, { "epoch": 0.26, "grad_norm": 1.1615618467330933, "learning_rate": 8.7166393533359e-06, "loss": 0.5668, "step": 4045 }, { "epoch": 0.26, "grad_norm": 1.0879848003387451, "learning_rate": 8.715952959952917e-06, "loss": 0.5658, "step": 4046 }, { "epoch": 0.26, "grad_norm": 0.9767739772796631, "learning_rate": 8.715266410104081e-06, "loss": 0.5787, "step": 4047 }, { "epoch": 0.26, "grad_norm": 0.992115318775177, "learning_rate": 8.714579703818301e-06, "loss": 0.6004, "step": 4048 }, { "epoch": 0.26, "grad_norm": 1.0199464559555054, "learning_rate": 8.713892841124492e-06, "loss": 0.5387, "step": 4049 }, { "epoch": 0.26, "grad_norm": 1.0499255657196045, "learning_rate": 8.713205822051576e-06, "loss": 0.5862, "step": 4050 }, { "epoch": 0.26, "grad_norm": 1.0221071243286133, "learning_rate": 8.71251864662848e-06, "loss": 0.5519, "step": 4051 }, { "epoch": 0.26, "grad_norm": 1.016858696937561, "learning_rate": 8.711831314884137e-06, "loss": 0.5548, "step": 4052 }, { "epoch": 0.26, "grad_norm": 0.9993886947631836, "learning_rate": 8.711143826847491e-06, "loss": 0.5586, "step": 4053 }, { "epoch": 0.26, "grad_norm": 0.9646019339561462, "learning_rate": 8.71045618254749e-06, "loss": 0.5213, "step": 4054 }, { "epoch": 0.26, "grad_norm": 1.0232441425323486, "learning_rate": 8.709768382013084e-06, "loss": 0.5256, "step": 4055 }, { "epoch": 0.26, "grad_norm": 1.1045476198196411, "learning_rate": 8.709080425273238e-06, "loss": 0.5761, "step": 4056 }, { "epoch": 0.26, "grad_norm": 1.0643750429153442, "learning_rate": 8.708392312356919e-06, "loss": 0.5942, "step": 4057 }, { "epoch": 0.26, "grad_norm": 1.1480286121368408, "learning_rate": 8.7077040432931e-06, "loss": 0.6403, "step": 4058 }, { "epoch": 0.26, "grad_norm": 1.0662150382995605, "learning_rate": 8.707015618110761e-06, "loss": 0.5633, "step": 4059 }, { "epoch": 0.26, "grad_norm": 1.0232387781143188, "learning_rate": 8.706327036838891e-06, "loss": 0.5391, "step": 4060 }, { "epoch": 0.26, "grad_norm": 1.0814896821975708, "learning_rate": 8.705638299506482e-06, "loss": 0.5135, "step": 4061 }, { "epoch": 0.26, "grad_norm": 1.1783430576324463, "learning_rate": 8.704949406142536e-06, "loss": 0.5899, "step": 4062 }, { "epoch": 0.26, "grad_norm": 1.0899817943572998, "learning_rate": 8.70426035677606e-06, "loss": 0.6027, "step": 4063 }, { "epoch": 0.26, "grad_norm": 0.9908663034439087, "learning_rate": 8.703571151436064e-06, "loss": 0.4875, "step": 4064 }, { "epoch": 0.26, "grad_norm": 1.1074823141098022, "learning_rate": 8.702881790151572e-06, "loss": 0.5187, "step": 4065 }, { "epoch": 0.26, "grad_norm": 1.0565532445907593, "learning_rate": 8.70219227295161e-06, "loss": 0.6422, "step": 4066 }, { "epoch": 0.26, "grad_norm": 0.9629092216491699, "learning_rate": 8.70150259986521e-06, "loss": 0.5202, "step": 4067 }, { "epoch": 0.26, "grad_norm": 1.0577901601791382, "learning_rate": 8.70081277092141e-06, "loss": 0.5438, "step": 4068 }, { "epoch": 0.26, "grad_norm": 0.9888391494750977, "learning_rate": 8.700122786149261e-06, "loss": 0.5352, "step": 4069 }, { "epoch": 0.26, "grad_norm": 1.0722532272338867, "learning_rate": 8.699432645577812e-06, "loss": 0.622, "step": 4070 }, { "epoch": 0.26, "grad_norm": 0.9681339263916016, "learning_rate": 8.698742349236124e-06, "loss": 0.5098, "step": 4071 }, { "epoch": 0.26, "grad_norm": 1.0090398788452148, "learning_rate": 8.698051897153264e-06, "loss": 0.5901, "step": 4072 }, { "epoch": 0.26, "grad_norm": 1.0440917015075684, "learning_rate": 8.697361289358302e-06, "loss": 0.5435, "step": 4073 }, { "epoch": 0.26, "grad_norm": 1.1044894456863403, "learning_rate": 8.696670525880318e-06, "loss": 0.5635, "step": 4074 }, { "epoch": 0.26, "grad_norm": 1.0052359104156494, "learning_rate": 8.695979606748398e-06, "loss": 0.5303, "step": 4075 }, { "epoch": 0.26, "grad_norm": 1.1398298740386963, "learning_rate": 8.695288531991633e-06, "loss": 0.5506, "step": 4076 }, { "epoch": 0.26, "grad_norm": 1.0224276781082153, "learning_rate": 8.694597301639125e-06, "loss": 0.5696, "step": 4077 }, { "epoch": 0.26, "grad_norm": 1.1038308143615723, "learning_rate": 8.693905915719976e-06, "loss": 0.5572, "step": 4078 }, { "epoch": 0.26, "grad_norm": 1.01349937915802, "learning_rate": 8.693214374263298e-06, "loss": 0.5485, "step": 4079 }, { "epoch": 0.26, "grad_norm": 1.0743470191955566, "learning_rate": 8.692522677298213e-06, "loss": 0.5594, "step": 4080 }, { "epoch": 0.26, "grad_norm": 0.9773702621459961, "learning_rate": 8.691830824853843e-06, "loss": 0.5573, "step": 4081 }, { "epoch": 0.26, "grad_norm": 0.9347333312034607, "learning_rate": 8.691138816959318e-06, "loss": 0.566, "step": 4082 }, { "epoch": 0.26, "grad_norm": 1.0899261236190796, "learning_rate": 8.690446653643778e-06, "loss": 0.5562, "step": 4083 }, { "epoch": 0.26, "grad_norm": 1.035556674003601, "learning_rate": 8.68975433493637e-06, "loss": 0.5625, "step": 4084 }, { "epoch": 0.26, "grad_norm": 1.104180932044983, "learning_rate": 8.689061860866242e-06, "loss": 0.5346, "step": 4085 }, { "epoch": 0.26, "grad_norm": 1.0645705461502075, "learning_rate": 8.68836923146255e-06, "loss": 0.6061, "step": 4086 }, { "epoch": 0.26, "grad_norm": 1.0560028553009033, "learning_rate": 8.687676446754464e-06, "loss": 0.5779, "step": 4087 }, { "epoch": 0.26, "grad_norm": 1.142768383026123, "learning_rate": 8.686983506771149e-06, "loss": 0.5514, "step": 4088 }, { "epoch": 0.26, "grad_norm": 0.9595299959182739, "learning_rate": 8.686290411541785e-06, "loss": 0.5195, "step": 4089 }, { "epoch": 0.26, "grad_norm": 1.0044152736663818, "learning_rate": 8.685597161095555e-06, "loss": 0.5095, "step": 4090 }, { "epoch": 0.26, "grad_norm": 0.9950119256973267, "learning_rate": 8.68490375546165e-06, "loss": 0.538, "step": 4091 }, { "epoch": 0.26, "grad_norm": 1.0089884996414185, "learning_rate": 8.684210194669269e-06, "loss": 0.5648, "step": 4092 }, { "epoch": 0.26, "grad_norm": 1.1214574575424194, "learning_rate": 8.68351647874761e-06, "loss": 0.5571, "step": 4093 }, { "epoch": 0.26, "grad_norm": 1.0534110069274902, "learning_rate": 8.682822607725887e-06, "loss": 0.5782, "step": 4094 }, { "epoch": 0.26, "grad_norm": 1.0555564165115356, "learning_rate": 8.682128581633316e-06, "loss": 0.5501, "step": 4095 }, { "epoch": 0.26, "grad_norm": 0.9699358344078064, "learning_rate": 8.68143440049912e-06, "loss": 0.552, "step": 4096 }, { "epoch": 0.26, "grad_norm": 0.959537148475647, "learning_rate": 8.68074006435253e-06, "loss": 0.5549, "step": 4097 }, { "epoch": 0.26, "grad_norm": 1.134029746055603, "learning_rate": 8.680045573222776e-06, "loss": 0.6056, "step": 4098 }, { "epoch": 0.26, "grad_norm": 1.0371557474136353, "learning_rate": 8.679350927139108e-06, "loss": 0.5999, "step": 4099 }, { "epoch": 0.26, "grad_norm": 0.9098154306411743, "learning_rate": 8.678656126130768e-06, "loss": 0.5404, "step": 4100 }, { "epoch": 0.26, "grad_norm": 1.038533329963684, "learning_rate": 8.677961170227021e-06, "loss": 0.5811, "step": 4101 }, { "epoch": 0.26, "grad_norm": 0.9490874409675598, "learning_rate": 8.677266059457121e-06, "loss": 0.5481, "step": 4102 }, { "epoch": 0.26, "grad_norm": 1.037155270576477, "learning_rate": 8.67657079385034e-06, "loss": 0.5693, "step": 4103 }, { "epoch": 0.26, "grad_norm": 1.065422773361206, "learning_rate": 8.675875373435951e-06, "loss": 0.5286, "step": 4104 }, { "epoch": 0.26, "grad_norm": 1.0294665098190308, "learning_rate": 8.67517979824324e-06, "loss": 0.5112, "step": 4105 }, { "epoch": 0.26, "grad_norm": 1.0523234605789185, "learning_rate": 8.674484068301492e-06, "loss": 0.5605, "step": 4106 }, { "epoch": 0.26, "grad_norm": 1.013358473777771, "learning_rate": 8.673788183640001e-06, "loss": 0.6047, "step": 4107 }, { "epoch": 0.26, "grad_norm": 1.1127411127090454, "learning_rate": 8.673092144288071e-06, "loss": 0.5797, "step": 4108 }, { "epoch": 0.26, "grad_norm": 0.998127818107605, "learning_rate": 8.672395950275008e-06, "loss": 0.5816, "step": 4109 }, { "epoch": 0.26, "grad_norm": 1.053147554397583, "learning_rate": 8.671699601630127e-06, "loss": 0.5526, "step": 4110 }, { "epoch": 0.26, "grad_norm": 0.9740100502967834, "learning_rate": 8.67100309838275e-06, "loss": 0.5408, "step": 4111 }, { "epoch": 0.26, "grad_norm": 1.016816258430481, "learning_rate": 8.670306440562202e-06, "loss": 0.5728, "step": 4112 }, { "epoch": 0.26, "grad_norm": 0.9359219074249268, "learning_rate": 8.669609628197817e-06, "loss": 0.517, "step": 4113 }, { "epoch": 0.26, "grad_norm": 1.0418291091918945, "learning_rate": 8.668912661318938e-06, "loss": 0.5534, "step": 4114 }, { "epoch": 0.26, "grad_norm": 1.0207910537719727, "learning_rate": 8.66821553995491e-06, "loss": 0.5825, "step": 4115 }, { "epoch": 0.26, "grad_norm": 0.9650876522064209, "learning_rate": 8.667518264135085e-06, "loss": 0.5182, "step": 4116 }, { "epoch": 0.26, "grad_norm": 1.0291123390197754, "learning_rate": 8.666820833888825e-06, "loss": 0.5197, "step": 4117 }, { "epoch": 0.26, "grad_norm": 1.034320592880249, "learning_rate": 8.666123249245495e-06, "loss": 0.5744, "step": 4118 }, { "epoch": 0.26, "grad_norm": 0.9790547490119934, "learning_rate": 8.665425510234469e-06, "loss": 0.5938, "step": 4119 }, { "epoch": 0.26, "grad_norm": 1.018493413925171, "learning_rate": 8.664727616885126e-06, "loss": 0.5469, "step": 4120 }, { "epoch": 0.26, "grad_norm": 1.008954405784607, "learning_rate": 8.66402956922685e-06, "loss": 0.5409, "step": 4121 }, { "epoch": 0.26, "grad_norm": 1.0612338781356812, "learning_rate": 8.663331367289038e-06, "loss": 0.6004, "step": 4122 }, { "epoch": 0.26, "grad_norm": 0.946708083152771, "learning_rate": 8.662633011101084e-06, "loss": 0.5077, "step": 4123 }, { "epoch": 0.26, "grad_norm": 0.9556915760040283, "learning_rate": 8.661934500692395e-06, "loss": 0.5697, "step": 4124 }, { "epoch": 0.26, "grad_norm": 0.9655421376228333, "learning_rate": 8.661235836092385e-06, "loss": 0.6102, "step": 4125 }, { "epoch": 0.26, "grad_norm": 0.9389283061027527, "learning_rate": 8.660537017330468e-06, "loss": 0.5537, "step": 4126 }, { "epoch": 0.26, "grad_norm": 1.082326054573059, "learning_rate": 8.659838044436074e-06, "loss": 0.5764, "step": 4127 }, { "epoch": 0.26, "grad_norm": 0.9879428744316101, "learning_rate": 8.65913891743863e-06, "loss": 0.4938, "step": 4128 }, { "epoch": 0.26, "grad_norm": 0.9940571784973145, "learning_rate": 8.658439636367574e-06, "loss": 0.5274, "step": 4129 }, { "epoch": 0.26, "grad_norm": 1.0283193588256836, "learning_rate": 8.657740201252353e-06, "loss": 0.6089, "step": 4130 }, { "epoch": 0.26, "grad_norm": 1.1298843622207642, "learning_rate": 8.657040612122418e-06, "loss": 0.5857, "step": 4131 }, { "epoch": 0.26, "grad_norm": 1.1527552604675293, "learning_rate": 8.656340869007225e-06, "loss": 0.6263, "step": 4132 }, { "epoch": 0.26, "grad_norm": 1.0959068536758423, "learning_rate": 8.655640971936236e-06, "loss": 0.5546, "step": 4133 }, { "epoch": 0.26, "grad_norm": 1.0256848335266113, "learning_rate": 8.654940920938922e-06, "loss": 0.5583, "step": 4134 }, { "epoch": 0.26, "grad_norm": 0.949115514755249, "learning_rate": 8.654240716044762e-06, "loss": 0.5556, "step": 4135 }, { "epoch": 0.26, "grad_norm": 1.077285885810852, "learning_rate": 8.653540357283236e-06, "loss": 0.5281, "step": 4136 }, { "epoch": 0.26, "grad_norm": 1.0292668342590332, "learning_rate": 8.652839844683836e-06, "loss": 0.5358, "step": 4137 }, { "epoch": 0.26, "grad_norm": 0.9717956185340881, "learning_rate": 8.652139178276058e-06, "loss": 0.5689, "step": 4138 }, { "epoch": 0.26, "grad_norm": 1.0240721702575684, "learning_rate": 8.651438358089403e-06, "loss": 0.5914, "step": 4139 }, { "epoch": 0.26, "grad_norm": 1.0816338062286377, "learning_rate": 8.650737384153382e-06, "loss": 0.6093, "step": 4140 }, { "epoch": 0.26, "grad_norm": 1.1217504739761353, "learning_rate": 8.65003625649751e-06, "loss": 0.5252, "step": 4141 }, { "epoch": 0.26, "grad_norm": 1.0407133102416992, "learning_rate": 8.649334975151307e-06, "loss": 0.567, "step": 4142 }, { "epoch": 0.26, "grad_norm": 1.0991817712783813, "learning_rate": 8.648633540144304e-06, "loss": 0.5795, "step": 4143 }, { "epoch": 0.26, "grad_norm": 0.9913834929466248, "learning_rate": 8.647931951506037e-06, "loss": 0.5412, "step": 4144 }, { "epoch": 0.26, "grad_norm": 1.0170899629592896, "learning_rate": 8.647230209266043e-06, "loss": 0.564, "step": 4145 }, { "epoch": 0.26, "grad_norm": 1.0658817291259766, "learning_rate": 8.646528313453876e-06, "loss": 0.5481, "step": 4146 }, { "epoch": 0.26, "grad_norm": 0.9929413795471191, "learning_rate": 8.645826264099085e-06, "loss": 0.5121, "step": 4147 }, { "epoch": 0.26, "grad_norm": 1.0202983617782593, "learning_rate": 8.645124061231234e-06, "loss": 0.5619, "step": 4148 }, { "epoch": 0.26, "grad_norm": 1.0099726915359497, "learning_rate": 8.644421704879889e-06, "loss": 0.4819, "step": 4149 }, { "epoch": 0.26, "grad_norm": 0.9748672842979431, "learning_rate": 8.643719195074622e-06, "loss": 0.535, "step": 4150 }, { "epoch": 0.26, "grad_norm": 1.0206111669540405, "learning_rate": 8.643016531845017e-06, "loss": 0.5487, "step": 4151 }, { "epoch": 0.26, "grad_norm": 1.2215276956558228, "learning_rate": 8.642313715220659e-06, "loss": 0.5484, "step": 4152 }, { "epoch": 0.26, "grad_norm": 0.9748323559761047, "learning_rate": 8.641610745231142e-06, "loss": 0.5357, "step": 4153 }, { "epoch": 0.26, "grad_norm": 1.0382490158081055, "learning_rate": 8.640907621906062e-06, "loss": 0.5182, "step": 4154 }, { "epoch": 0.26, "grad_norm": 1.0930920839309692, "learning_rate": 8.640204345275029e-06, "loss": 0.5927, "step": 4155 }, { "epoch": 0.26, "grad_norm": 1.106478214263916, "learning_rate": 8.639500915367656e-06, "loss": 0.5741, "step": 4156 }, { "epoch": 0.26, "grad_norm": 1.0560954809188843, "learning_rate": 8.63879733221356e-06, "loss": 0.5941, "step": 4157 }, { "epoch": 0.26, "grad_norm": 1.0674949884414673, "learning_rate": 8.638093595842366e-06, "loss": 0.5958, "step": 4158 }, { "epoch": 0.26, "grad_norm": 0.994944155216217, "learning_rate": 8.637389706283705e-06, "loss": 0.5292, "step": 4159 }, { "epoch": 0.26, "grad_norm": 1.093483328819275, "learning_rate": 8.636685663567219e-06, "loss": 0.5908, "step": 4160 }, { "epoch": 0.26, "grad_norm": 0.9695919156074524, "learning_rate": 8.635981467722552e-06, "loss": 0.5486, "step": 4161 }, { "epoch": 0.26, "grad_norm": 0.9622272253036499, "learning_rate": 8.635277118779353e-06, "loss": 0.5281, "step": 4162 }, { "epoch": 0.26, "grad_norm": 0.9899247288703918, "learning_rate": 8.63457261676728e-06, "loss": 0.5503, "step": 4163 }, { "epoch": 0.26, "grad_norm": 1.0092194080352783, "learning_rate": 8.633867961715998e-06, "loss": 0.5343, "step": 4164 }, { "epoch": 0.26, "grad_norm": 1.0169821977615356, "learning_rate": 8.633163153655178e-06, "loss": 0.5738, "step": 4165 }, { "epoch": 0.26, "grad_norm": 1.0969207286834717, "learning_rate": 8.632458192614495e-06, "loss": 0.6267, "step": 4166 }, { "epoch": 0.26, "grad_norm": 1.0135194063186646, "learning_rate": 8.631753078623635e-06, "loss": 0.5283, "step": 4167 }, { "epoch": 0.26, "grad_norm": 1.0597193241119385, "learning_rate": 8.631047811712288e-06, "loss": 0.6067, "step": 4168 }, { "epoch": 0.26, "grad_norm": 1.0797663927078247, "learning_rate": 8.630342391910147e-06, "loss": 0.594, "step": 4169 }, { "epoch": 0.26, "grad_norm": 1.0816011428833008, "learning_rate": 8.629636819246919e-06, "loss": 0.5519, "step": 4170 }, { "epoch": 0.26, "grad_norm": 1.047973871231079, "learning_rate": 8.628931093752308e-06, "loss": 0.5402, "step": 4171 }, { "epoch": 0.26, "grad_norm": 1.0002282857894897, "learning_rate": 8.628225215456037e-06, "loss": 0.5496, "step": 4172 }, { "epoch": 0.26, "grad_norm": 1.0012603998184204, "learning_rate": 8.627519184387821e-06, "loss": 0.5941, "step": 4173 }, { "epoch": 0.26, "grad_norm": 1.0786024332046509, "learning_rate": 8.626813000577393e-06, "loss": 0.5951, "step": 4174 }, { "epoch": 0.26, "grad_norm": 1.1156187057495117, "learning_rate": 8.626106664054483e-06, "loss": 0.5681, "step": 4175 }, { "epoch": 0.26, "grad_norm": 1.0088963508605957, "learning_rate": 8.62540017484884e-06, "loss": 0.5534, "step": 4176 }, { "epoch": 0.26, "grad_norm": 1.043418526649475, "learning_rate": 8.624693532990205e-06, "loss": 0.5366, "step": 4177 }, { "epoch": 0.26, "grad_norm": 1.1141891479492188, "learning_rate": 8.623986738508334e-06, "loss": 0.5752, "step": 4178 }, { "epoch": 0.26, "grad_norm": 1.0138537883758545, "learning_rate": 8.62327979143299e-06, "loss": 0.537, "step": 4179 }, { "epoch": 0.26, "grad_norm": 0.9940417408943176, "learning_rate": 8.622572691793937e-06, "loss": 0.5016, "step": 4180 }, { "epoch": 0.26, "grad_norm": 1.0246286392211914, "learning_rate": 8.621865439620952e-06, "loss": 0.5365, "step": 4181 }, { "epoch": 0.26, "grad_norm": 0.9923083782196045, "learning_rate": 8.621158034943812e-06, "loss": 0.544, "step": 4182 }, { "epoch": 0.27, "grad_norm": 0.9740118384361267, "learning_rate": 8.620450477792303e-06, "loss": 0.5447, "step": 4183 }, { "epoch": 0.27, "grad_norm": 1.0033918619155884, "learning_rate": 8.619742768196221e-06, "loss": 0.522, "step": 4184 }, { "epoch": 0.27, "grad_norm": 1.0502849817276, "learning_rate": 8.619034906185362e-06, "loss": 0.6178, "step": 4185 }, { "epoch": 0.27, "grad_norm": 1.0749516487121582, "learning_rate": 8.618326891789534e-06, "loss": 0.5715, "step": 4186 }, { "epoch": 0.27, "grad_norm": 1.1323692798614502, "learning_rate": 8.617618725038545e-06, "loss": 0.5474, "step": 4187 }, { "epoch": 0.27, "grad_norm": 1.0600783824920654, "learning_rate": 8.61691040596222e-06, "loss": 0.5766, "step": 4188 }, { "epoch": 0.27, "grad_norm": 1.0678776502609253, "learning_rate": 8.616201934590379e-06, "loss": 0.5907, "step": 4189 }, { "epoch": 0.27, "grad_norm": 1.1253869533538818, "learning_rate": 8.615493310952852e-06, "loss": 0.5871, "step": 4190 }, { "epoch": 0.27, "grad_norm": 0.9809844493865967, "learning_rate": 8.614784535079482e-06, "loss": 0.5472, "step": 4191 }, { "epoch": 0.27, "grad_norm": 1.0507099628448486, "learning_rate": 8.614075607000108e-06, "loss": 0.5836, "step": 4192 }, { "epoch": 0.27, "grad_norm": 1.0916355848312378, "learning_rate": 8.613366526744584e-06, "loss": 0.5245, "step": 4193 }, { "epoch": 0.27, "grad_norm": 1.0502519607543945, "learning_rate": 8.612657294342765e-06, "loss": 0.5358, "step": 4194 }, { "epoch": 0.27, "grad_norm": 1.0014053583145142, "learning_rate": 8.611947909824514e-06, "loss": 0.5794, "step": 4195 }, { "epoch": 0.27, "grad_norm": 0.9775264263153076, "learning_rate": 8.611238373219703e-06, "loss": 0.5321, "step": 4196 }, { "epoch": 0.27, "grad_norm": 1.1208726167678833, "learning_rate": 8.610528684558206e-06, "loss": 0.5351, "step": 4197 }, { "epoch": 0.27, "grad_norm": 1.0999410152435303, "learning_rate": 8.609818843869907e-06, "loss": 0.5385, "step": 4198 }, { "epoch": 0.27, "grad_norm": 0.9840497374534607, "learning_rate": 8.609108851184693e-06, "loss": 0.5264, "step": 4199 }, { "epoch": 0.27, "grad_norm": 0.9287744760513306, "learning_rate": 8.608398706532462e-06, "loss": 0.5643, "step": 4200 }, { "epoch": 0.27, "grad_norm": 0.9510626792907715, "learning_rate": 8.607688409943112e-06, "loss": 0.5107, "step": 4201 }, { "epoch": 0.27, "grad_norm": 0.9852222204208374, "learning_rate": 8.606977961446554e-06, "loss": 0.5483, "step": 4202 }, { "epoch": 0.27, "grad_norm": 0.9964022636413574, "learning_rate": 8.606267361072704e-06, "loss": 0.5655, "step": 4203 }, { "epoch": 0.27, "grad_norm": 0.9797468781471252, "learning_rate": 8.605556608851478e-06, "loss": 0.5991, "step": 4204 }, { "epoch": 0.27, "grad_norm": 1.094444990158081, "learning_rate": 8.604845704812808e-06, "loss": 0.5936, "step": 4205 }, { "epoch": 0.27, "grad_norm": 1.0043718814849854, "learning_rate": 8.604134648986625e-06, "loss": 0.5328, "step": 4206 }, { "epoch": 0.27, "grad_norm": 1.0017262697219849, "learning_rate": 8.603423441402868e-06, "loss": 0.558, "step": 4207 }, { "epoch": 0.27, "grad_norm": 1.0760914087295532, "learning_rate": 8.602712082091487e-06, "loss": 0.5093, "step": 4208 }, { "epoch": 0.27, "grad_norm": 1.0324655771255493, "learning_rate": 8.602000571082432e-06, "loss": 0.5664, "step": 4209 }, { "epoch": 0.27, "grad_norm": 0.9881264567375183, "learning_rate": 8.601288908405665e-06, "loss": 0.5565, "step": 4210 }, { "epoch": 0.27, "grad_norm": 0.9853984713554382, "learning_rate": 8.60057709409115e-06, "loss": 0.5614, "step": 4211 }, { "epoch": 0.27, "grad_norm": 0.9987500309944153, "learning_rate": 8.599865128168858e-06, "loss": 0.5252, "step": 4212 }, { "epoch": 0.27, "grad_norm": 0.9709988832473755, "learning_rate": 8.599153010668768e-06, "loss": 0.5274, "step": 4213 }, { "epoch": 0.27, "grad_norm": 1.00737464427948, "learning_rate": 8.598440741620868e-06, "loss": 0.5745, "step": 4214 }, { "epoch": 0.27, "grad_norm": 0.9636495113372803, "learning_rate": 8.597728321055144e-06, "loss": 0.5178, "step": 4215 }, { "epoch": 0.27, "grad_norm": 0.9868621230125427, "learning_rate": 8.597015749001596e-06, "loss": 0.5453, "step": 4216 }, { "epoch": 0.27, "grad_norm": 0.9736589193344116, "learning_rate": 8.59630302549023e-06, "loss": 0.5534, "step": 4217 }, { "epoch": 0.27, "grad_norm": 1.090067744255066, "learning_rate": 8.595590150551052e-06, "loss": 0.5462, "step": 4218 }, { "epoch": 0.27, "grad_norm": 1.1201854944229126, "learning_rate": 8.59487712421408e-06, "loss": 0.553, "step": 4219 }, { "epoch": 0.27, "grad_norm": 1.06673264503479, "learning_rate": 8.594163946509339e-06, "loss": 0.5585, "step": 4220 }, { "epoch": 0.27, "grad_norm": 1.0092447996139526, "learning_rate": 8.593450617466859e-06, "loss": 0.5357, "step": 4221 }, { "epoch": 0.27, "grad_norm": 1.069238305091858, "learning_rate": 8.592737137116673e-06, "loss": 0.5405, "step": 4222 }, { "epoch": 0.27, "grad_norm": 1.0110065937042236, "learning_rate": 8.592023505488825e-06, "loss": 0.5798, "step": 4223 }, { "epoch": 0.27, "grad_norm": 0.8846083283424377, "learning_rate": 8.591309722613362e-06, "loss": 0.5074, "step": 4224 }, { "epoch": 0.27, "grad_norm": 0.9890139102935791, "learning_rate": 8.590595788520342e-06, "loss": 0.517, "step": 4225 }, { "epoch": 0.27, "grad_norm": 0.9960560202598572, "learning_rate": 8.589881703239821e-06, "loss": 0.4919, "step": 4226 }, { "epoch": 0.27, "grad_norm": 0.9747819900512695, "learning_rate": 8.58916746680187e-06, "loss": 0.544, "step": 4227 }, { "epoch": 0.27, "grad_norm": 1.0688061714172363, "learning_rate": 8.588453079236565e-06, "loss": 0.6009, "step": 4228 }, { "epoch": 0.27, "grad_norm": 1.0313738584518433, "learning_rate": 8.587738540573984e-06, "loss": 0.5276, "step": 4229 }, { "epoch": 0.27, "grad_norm": 0.9333264231681824, "learning_rate": 8.587023850844212e-06, "loss": 0.5402, "step": 4230 }, { "epoch": 0.27, "grad_norm": 1.0281132459640503, "learning_rate": 8.586309010077345e-06, "loss": 0.6009, "step": 4231 }, { "epoch": 0.27, "grad_norm": 0.923550009727478, "learning_rate": 8.585594018303482e-06, "loss": 0.5505, "step": 4232 }, { "epoch": 0.27, "grad_norm": 0.9499755501747131, "learning_rate": 8.584878875552727e-06, "loss": 0.5427, "step": 4233 }, { "epoch": 0.27, "grad_norm": 0.9870060086250305, "learning_rate": 8.584163581855194e-06, "loss": 0.5592, "step": 4234 }, { "epoch": 0.27, "grad_norm": 1.0823084115982056, "learning_rate": 8.583448137241002e-06, "loss": 0.5651, "step": 4235 }, { "epoch": 0.27, "grad_norm": 1.0290069580078125, "learning_rate": 8.582732541740273e-06, "loss": 0.5512, "step": 4236 }, { "epoch": 0.27, "grad_norm": 0.9916083812713623, "learning_rate": 8.582016795383142e-06, "loss": 0.5345, "step": 4237 }, { "epoch": 0.27, "grad_norm": 0.997864305973053, "learning_rate": 8.581300898199743e-06, "loss": 0.5139, "step": 4238 }, { "epoch": 0.27, "grad_norm": 1.0194896459579468, "learning_rate": 8.580584850220222e-06, "loss": 0.5385, "step": 4239 }, { "epoch": 0.27, "grad_norm": 1.0112195014953613, "learning_rate": 8.57986865147473e-06, "loss": 0.5308, "step": 4240 }, { "epoch": 0.27, "grad_norm": 1.0058863162994385, "learning_rate": 8.57915230199342e-06, "loss": 0.5421, "step": 4241 }, { "epoch": 0.27, "grad_norm": 1.0418747663497925, "learning_rate": 8.578435801806461e-06, "loss": 0.5858, "step": 4242 }, { "epoch": 0.27, "grad_norm": 0.9853035807609558, "learning_rate": 8.577719150944017e-06, "loss": 0.5623, "step": 4243 }, { "epoch": 0.27, "grad_norm": 1.0543568134307861, "learning_rate": 8.577002349436264e-06, "loss": 0.5407, "step": 4244 }, { "epoch": 0.27, "grad_norm": 1.1742209196090698, "learning_rate": 8.57628539731339e-06, "loss": 0.6039, "step": 4245 }, { "epoch": 0.27, "grad_norm": 0.9908335208892822, "learning_rate": 8.575568294605574e-06, "loss": 0.589, "step": 4246 }, { "epoch": 0.27, "grad_norm": 1.0744788646697998, "learning_rate": 8.574851041343018e-06, "loss": 0.5337, "step": 4247 }, { "epoch": 0.27, "grad_norm": 0.9883583188056946, "learning_rate": 8.574133637555921e-06, "loss": 0.5498, "step": 4248 }, { "epoch": 0.27, "grad_norm": 0.9712103009223938, "learning_rate": 8.57341608327449e-06, "loss": 0.5297, "step": 4249 }, { "epoch": 0.27, "grad_norm": 1.036982774734497, "learning_rate": 8.572698378528937e-06, "loss": 0.5845, "step": 4250 }, { "epoch": 0.27, "grad_norm": 0.9253586530685425, "learning_rate": 8.571980523349485e-06, "loss": 0.5515, "step": 4251 }, { "epoch": 0.27, "grad_norm": 1.1356269121170044, "learning_rate": 8.57126251776636e-06, "loss": 0.593, "step": 4252 }, { "epoch": 0.27, "grad_norm": 1.0142786502838135, "learning_rate": 8.570544361809792e-06, "loss": 0.6091, "step": 4253 }, { "epoch": 0.27, "grad_norm": 0.9978875517845154, "learning_rate": 8.569826055510025e-06, "loss": 0.5441, "step": 4254 }, { "epoch": 0.27, "grad_norm": 0.9625915288925171, "learning_rate": 8.569107598897296e-06, "loss": 0.4728, "step": 4255 }, { "epoch": 0.27, "grad_norm": 1.1040743589401245, "learning_rate": 8.568388992001868e-06, "loss": 0.5302, "step": 4256 }, { "epoch": 0.27, "grad_norm": 1.0358699560165405, "learning_rate": 8.56767023485399e-06, "loss": 0.5349, "step": 4257 }, { "epoch": 0.27, "grad_norm": 1.031915545463562, "learning_rate": 8.56695132748393e-06, "loss": 0.591, "step": 4258 }, { "epoch": 0.27, "grad_norm": 1.0375105142593384, "learning_rate": 8.566232269921957e-06, "loss": 0.6325, "step": 4259 }, { "epoch": 0.27, "grad_norm": 0.9367465972900391, "learning_rate": 8.565513062198351e-06, "loss": 0.5572, "step": 4260 }, { "epoch": 0.27, "grad_norm": 0.9881364107131958, "learning_rate": 8.564793704343392e-06, "loss": 0.511, "step": 4261 }, { "epoch": 0.27, "grad_norm": 1.3143302202224731, "learning_rate": 8.564074196387371e-06, "loss": 0.6167, "step": 4262 }, { "epoch": 0.27, "grad_norm": 0.9849549531936646, "learning_rate": 8.563354538360585e-06, "loss": 0.5401, "step": 4263 }, { "epoch": 0.27, "grad_norm": 0.9980334043502808, "learning_rate": 8.562634730293335e-06, "loss": 0.537, "step": 4264 }, { "epoch": 0.27, "grad_norm": 1.069611668586731, "learning_rate": 8.56191477221593e-06, "loss": 0.5925, "step": 4265 }, { "epoch": 0.27, "grad_norm": 1.0978347063064575, "learning_rate": 8.561194664158685e-06, "loss": 0.6103, "step": 4266 }, { "epoch": 0.27, "grad_norm": 1.1114164590835571, "learning_rate": 8.560474406151921e-06, "loss": 0.5989, "step": 4267 }, { "epoch": 0.27, "grad_norm": 1.0410330295562744, "learning_rate": 8.559753998225965e-06, "loss": 0.5932, "step": 4268 }, { "epoch": 0.27, "grad_norm": 1.0758030414581299, "learning_rate": 8.559033440411155e-06, "loss": 0.5336, "step": 4269 }, { "epoch": 0.27, "grad_norm": 0.9148129820823669, "learning_rate": 8.558312732737825e-06, "loss": 0.512, "step": 4270 }, { "epoch": 0.27, "grad_norm": 0.9755415320396423, "learning_rate": 8.557591875236323e-06, "loss": 0.5372, "step": 4271 }, { "epoch": 0.27, "grad_norm": 0.9766758680343628, "learning_rate": 8.556870867937006e-06, "loss": 0.527, "step": 4272 }, { "epoch": 0.27, "grad_norm": 0.9935266375541687, "learning_rate": 8.55614971087023e-06, "loss": 0.5456, "step": 4273 }, { "epoch": 0.27, "grad_norm": 1.0467207431793213, "learning_rate": 8.555428404066359e-06, "loss": 0.5415, "step": 4274 }, { "epoch": 0.27, "grad_norm": 0.9002679586410522, "learning_rate": 8.554706947555766e-06, "loss": 0.5279, "step": 4275 }, { "epoch": 0.27, "grad_norm": 1.0087589025497437, "learning_rate": 8.553985341368832e-06, "loss": 0.6003, "step": 4276 }, { "epoch": 0.27, "grad_norm": 0.8842843770980835, "learning_rate": 8.553263585535937e-06, "loss": 0.5594, "step": 4277 }, { "epoch": 0.27, "grad_norm": 1.0302597284317017, "learning_rate": 8.552541680087472e-06, "loss": 0.5525, "step": 4278 }, { "epoch": 0.27, "grad_norm": 1.047193169593811, "learning_rate": 8.551819625053837e-06, "loss": 0.5872, "step": 4279 }, { "epoch": 0.27, "grad_norm": 1.0108321905136108, "learning_rate": 8.551097420465432e-06, "loss": 0.5376, "step": 4280 }, { "epoch": 0.27, "grad_norm": 1.0369234085083008, "learning_rate": 8.55037506635267e-06, "loss": 0.5298, "step": 4281 }, { "epoch": 0.27, "grad_norm": 1.0145535469055176, "learning_rate": 8.549652562745963e-06, "loss": 0.5513, "step": 4282 }, { "epoch": 0.27, "grad_norm": 1.0360575914382935, "learning_rate": 8.548929909675736e-06, "loss": 0.5499, "step": 4283 }, { "epoch": 0.27, "grad_norm": 1.1149938106536865, "learning_rate": 8.548207107172417e-06, "loss": 0.5757, "step": 4284 }, { "epoch": 0.27, "grad_norm": 1.109709620475769, "learning_rate": 8.547484155266439e-06, "loss": 0.5682, "step": 4285 }, { "epoch": 0.27, "grad_norm": 1.0783034563064575, "learning_rate": 8.546761053988244e-06, "loss": 0.614, "step": 4286 }, { "epoch": 0.27, "grad_norm": 1.0046306848526, "learning_rate": 8.546037803368279e-06, "loss": 0.5357, "step": 4287 }, { "epoch": 0.27, "grad_norm": 0.9678237438201904, "learning_rate": 8.545314403436998e-06, "loss": 0.506, "step": 4288 }, { "epoch": 0.27, "grad_norm": 1.1249730587005615, "learning_rate": 8.54459085422486e-06, "loss": 0.5468, "step": 4289 }, { "epoch": 0.27, "grad_norm": 1.0614315271377563, "learning_rate": 8.543867155762335e-06, "loss": 0.5888, "step": 4290 }, { "epoch": 0.27, "grad_norm": 1.1124675273895264, "learning_rate": 8.543143308079888e-06, "loss": 0.5672, "step": 4291 }, { "epoch": 0.27, "grad_norm": 1.035871148109436, "learning_rate": 8.542419311208006e-06, "loss": 0.5639, "step": 4292 }, { "epoch": 0.27, "grad_norm": 0.9853594899177551, "learning_rate": 8.541695165177169e-06, "loss": 0.5951, "step": 4293 }, { "epoch": 0.27, "grad_norm": 1.0269336700439453, "learning_rate": 8.540970870017867e-06, "loss": 0.5504, "step": 4294 }, { "epoch": 0.27, "grad_norm": 0.9964527487754822, "learning_rate": 8.540246425760602e-06, "loss": 0.5321, "step": 4295 }, { "epoch": 0.27, "grad_norm": 1.0525084733963013, "learning_rate": 8.539521832435874e-06, "loss": 0.5324, "step": 4296 }, { "epoch": 0.27, "grad_norm": 1.0360336303710938, "learning_rate": 8.538797090074196e-06, "loss": 0.5566, "step": 4297 }, { "epoch": 0.27, "grad_norm": 1.0745279788970947, "learning_rate": 8.538072198706081e-06, "loss": 0.5588, "step": 4298 }, { "epoch": 0.27, "grad_norm": 1.0289655923843384, "learning_rate": 8.537347158362056e-06, "loss": 0.5235, "step": 4299 }, { "epoch": 0.27, "grad_norm": 1.0566655397415161, "learning_rate": 8.536621969072648e-06, "loss": 0.5395, "step": 4300 }, { "epoch": 0.27, "grad_norm": 1.065811038017273, "learning_rate": 8.53589663086839e-06, "loss": 0.5819, "step": 4301 }, { "epoch": 0.27, "grad_norm": 1.0649478435516357, "learning_rate": 8.535171143779828e-06, "loss": 0.5556, "step": 4302 }, { "epoch": 0.27, "grad_norm": 0.9797233939170837, "learning_rate": 8.534445507837505e-06, "loss": 0.5467, "step": 4303 }, { "epoch": 0.27, "grad_norm": 1.0511661767959595, "learning_rate": 8.533719723071979e-06, "loss": 0.5709, "step": 4304 }, { "epoch": 0.27, "grad_norm": 1.0210515260696411, "learning_rate": 8.532993789513805e-06, "loss": 0.5575, "step": 4305 }, { "epoch": 0.27, "grad_norm": 1.1841431856155396, "learning_rate": 8.532267707193555e-06, "loss": 0.5748, "step": 4306 }, { "epoch": 0.27, "grad_norm": 1.0234102010726929, "learning_rate": 8.5315414761418e-06, "loss": 0.5749, "step": 4307 }, { "epoch": 0.27, "grad_norm": 1.0281085968017578, "learning_rate": 8.530815096389118e-06, "loss": 0.5112, "step": 4308 }, { "epoch": 0.27, "grad_norm": 1.0519088506698608, "learning_rate": 8.530088567966095e-06, "loss": 0.5671, "step": 4309 }, { "epoch": 0.27, "grad_norm": 0.9593432545661926, "learning_rate": 8.529361890903323e-06, "loss": 0.5305, "step": 4310 }, { "epoch": 0.27, "grad_norm": 1.2273467779159546, "learning_rate": 8.5286350652314e-06, "loss": 0.5833, "step": 4311 }, { "epoch": 0.27, "grad_norm": 1.0280483961105347, "learning_rate": 8.527908090980929e-06, "loss": 0.595, "step": 4312 }, { "epoch": 0.27, "grad_norm": 1.0453826189041138, "learning_rate": 8.527180968182522e-06, "loss": 0.5362, "step": 4313 }, { "epoch": 0.27, "grad_norm": 0.9932584762573242, "learning_rate": 8.526453696866794e-06, "loss": 0.5371, "step": 4314 }, { "epoch": 0.27, "grad_norm": 0.9493650197982788, "learning_rate": 8.525726277064368e-06, "loss": 0.5309, "step": 4315 }, { "epoch": 0.27, "grad_norm": 0.9597651958465576, "learning_rate": 8.524998708805874e-06, "loss": 0.5175, "step": 4316 }, { "epoch": 0.27, "grad_norm": 0.9586458206176758, "learning_rate": 8.524270992121948e-06, "loss": 0.5555, "step": 4317 }, { "epoch": 0.27, "grad_norm": 1.064935564994812, "learning_rate": 8.523543127043228e-06, "loss": 0.5609, "step": 4318 }, { "epoch": 0.27, "grad_norm": 0.9935119152069092, "learning_rate": 8.522815113600366e-06, "loss": 0.5312, "step": 4319 }, { "epoch": 0.27, "grad_norm": 1.1048222780227661, "learning_rate": 8.522086951824014e-06, "loss": 0.611, "step": 4320 }, { "epoch": 0.27, "grad_norm": 0.9754494428634644, "learning_rate": 8.521358641744834e-06, "loss": 0.5286, "step": 4321 }, { "epoch": 0.27, "grad_norm": 1.023982048034668, "learning_rate": 8.520630183393492e-06, "loss": 0.5568, "step": 4322 }, { "epoch": 0.27, "grad_norm": 1.0174022912979126, "learning_rate": 8.519901576800657e-06, "loss": 0.5716, "step": 4323 }, { "epoch": 0.27, "grad_norm": 1.0983679294586182, "learning_rate": 8.519172821997015e-06, "loss": 0.5184, "step": 4324 }, { "epoch": 0.27, "grad_norm": 1.0188732147216797, "learning_rate": 8.518443919013247e-06, "loss": 0.5623, "step": 4325 }, { "epoch": 0.27, "grad_norm": 0.9419271945953369, "learning_rate": 8.517714867880044e-06, "loss": 0.5156, "step": 4326 }, { "epoch": 0.27, "grad_norm": 1.0496870279312134, "learning_rate": 8.516985668628105e-06, "loss": 0.4981, "step": 4327 }, { "epoch": 0.27, "grad_norm": 1.0018783807754517, "learning_rate": 8.516256321288136e-06, "loss": 0.5079, "step": 4328 }, { "epoch": 0.27, "grad_norm": 1.0067249536514282, "learning_rate": 8.515526825890845e-06, "loss": 0.4978, "step": 4329 }, { "epoch": 0.27, "grad_norm": 1.0570882558822632, "learning_rate": 8.514797182466948e-06, "loss": 0.532, "step": 4330 }, { "epoch": 0.27, "grad_norm": 1.1334291696548462, "learning_rate": 8.51406739104717e-06, "loss": 0.5654, "step": 4331 }, { "epoch": 0.27, "grad_norm": 1.0686006546020508, "learning_rate": 8.513337451662238e-06, "loss": 0.5888, "step": 4332 }, { "epoch": 0.27, "grad_norm": 1.0836188793182373, "learning_rate": 8.512607364342887e-06, "loss": 0.5513, "step": 4333 }, { "epoch": 0.27, "grad_norm": 1.0141760110855103, "learning_rate": 8.51187712911986e-06, "loss": 0.5344, "step": 4334 }, { "epoch": 0.27, "grad_norm": 1.0270737409591675, "learning_rate": 8.511146746023905e-06, "loss": 0.5242, "step": 4335 }, { "epoch": 0.27, "grad_norm": 1.0236823558807373, "learning_rate": 8.510416215085775e-06, "loss": 0.5676, "step": 4336 }, { "epoch": 0.27, "grad_norm": 0.9484716653823853, "learning_rate": 8.509685536336229e-06, "loss": 0.5009, "step": 4337 }, { "epoch": 0.27, "grad_norm": 1.0540645122528076, "learning_rate": 8.508954709806034e-06, "loss": 0.5761, "step": 4338 }, { "epoch": 0.27, "grad_norm": 1.025154948234558, "learning_rate": 8.508223735525963e-06, "loss": 0.5623, "step": 4339 }, { "epoch": 0.27, "grad_norm": 1.0674318075180054, "learning_rate": 8.507492613526795e-06, "loss": 0.5728, "step": 4340 }, { "epoch": 0.28, "grad_norm": 0.9796115756034851, "learning_rate": 8.506761343839316e-06, "loss": 0.5457, "step": 4341 }, { "epoch": 0.28, "grad_norm": 1.0570836067199707, "learning_rate": 8.506029926494315e-06, "loss": 0.5681, "step": 4342 }, { "epoch": 0.28, "grad_norm": 1.0061306953430176, "learning_rate": 8.50529836152259e-06, "loss": 0.6148, "step": 4343 }, { "epoch": 0.28, "grad_norm": 0.9319235682487488, "learning_rate": 8.504566648954947e-06, "loss": 0.5147, "step": 4344 }, { "epoch": 0.28, "grad_norm": 1.0190160274505615, "learning_rate": 8.503834788822191e-06, "loss": 0.5375, "step": 4345 }, { "epoch": 0.28, "grad_norm": 0.9299525022506714, "learning_rate": 8.503102781155141e-06, "loss": 0.5279, "step": 4346 }, { "epoch": 0.28, "grad_norm": 0.9546197652816772, "learning_rate": 8.502370625984622e-06, "loss": 0.529, "step": 4347 }, { "epoch": 0.28, "grad_norm": 1.046825885772705, "learning_rate": 8.501638323341459e-06, "loss": 0.59, "step": 4348 }, { "epoch": 0.28, "grad_norm": 0.9880673289299011, "learning_rate": 8.500905873256486e-06, "loss": 0.5394, "step": 4349 }, { "epoch": 0.28, "grad_norm": 1.1426104307174683, "learning_rate": 8.500173275760546e-06, "loss": 0.5538, "step": 4350 }, { "epoch": 0.28, "grad_norm": 0.9619871377944946, "learning_rate": 8.499440530884486e-06, "loss": 0.5246, "step": 4351 }, { "epoch": 0.28, "grad_norm": 1.0348474979400635, "learning_rate": 8.498707638659159e-06, "loss": 0.5367, "step": 4352 }, { "epoch": 0.28, "grad_norm": 0.9267213344573975, "learning_rate": 8.497974599115424e-06, "loss": 0.5001, "step": 4353 }, { "epoch": 0.28, "grad_norm": 1.062888503074646, "learning_rate": 8.497241412284147e-06, "loss": 0.5501, "step": 4354 }, { "epoch": 0.28, "grad_norm": 1.0686131715774536, "learning_rate": 8.496508078196202e-06, "loss": 0.574, "step": 4355 }, { "epoch": 0.28, "grad_norm": 0.9900593161582947, "learning_rate": 8.495774596882462e-06, "loss": 0.5111, "step": 4356 }, { "epoch": 0.28, "grad_norm": 0.933822751045227, "learning_rate": 8.495040968373817e-06, "loss": 0.5655, "step": 4357 }, { "epoch": 0.28, "grad_norm": 1.038696050643921, "learning_rate": 8.494307192701154e-06, "loss": 0.5161, "step": 4358 }, { "epoch": 0.28, "grad_norm": 0.9698143601417542, "learning_rate": 8.493573269895372e-06, "loss": 0.5127, "step": 4359 }, { "epoch": 0.28, "grad_norm": 0.945016086101532, "learning_rate": 8.492839199987373e-06, "loss": 0.529, "step": 4360 }, { "epoch": 0.28, "grad_norm": 1.0088963508605957, "learning_rate": 8.492104983008065e-06, "loss": 0.5339, "step": 4361 }, { "epoch": 0.28, "grad_norm": 0.9921668767929077, "learning_rate": 8.491370618988367e-06, "loss": 0.5159, "step": 4362 }, { "epoch": 0.28, "grad_norm": 1.0715415477752686, "learning_rate": 8.490636107959194e-06, "loss": 0.5341, "step": 4363 }, { "epoch": 0.28, "grad_norm": 1.040958046913147, "learning_rate": 8.489901449951478e-06, "loss": 0.5823, "step": 4364 }, { "epoch": 0.28, "grad_norm": 1.0582530498504639, "learning_rate": 8.489166644996154e-06, "loss": 0.5688, "step": 4365 }, { "epoch": 0.28, "grad_norm": 1.1656434535980225, "learning_rate": 8.48843169312416e-06, "loss": 0.5707, "step": 4366 }, { "epoch": 0.28, "grad_norm": 1.0881271362304688, "learning_rate": 8.487696594366444e-06, "loss": 0.5661, "step": 4367 }, { "epoch": 0.28, "grad_norm": 0.9278864860534668, "learning_rate": 8.486961348753954e-06, "loss": 0.4697, "step": 4368 }, { "epoch": 0.28, "grad_norm": 1.033172607421875, "learning_rate": 8.486225956317655e-06, "loss": 0.5875, "step": 4369 }, { "epoch": 0.28, "grad_norm": 0.9845580458641052, "learning_rate": 8.48549041708851e-06, "loss": 0.526, "step": 4370 }, { "epoch": 0.28, "grad_norm": 0.9801870584487915, "learning_rate": 8.484754731097484e-06, "loss": 0.5087, "step": 4371 }, { "epoch": 0.28, "grad_norm": 1.0618585348129272, "learning_rate": 8.484018898375561e-06, "loss": 0.5513, "step": 4372 }, { "epoch": 0.28, "grad_norm": 0.9950966835021973, "learning_rate": 8.483282918953723e-06, "loss": 0.5884, "step": 4373 }, { "epoch": 0.28, "grad_norm": 0.942801296710968, "learning_rate": 8.482546792862957e-06, "loss": 0.5631, "step": 4374 }, { "epoch": 0.28, "grad_norm": 1.0492613315582275, "learning_rate": 8.481810520134262e-06, "loss": 0.5348, "step": 4375 }, { "epoch": 0.28, "grad_norm": 0.9315921664237976, "learning_rate": 8.481074100798638e-06, "loss": 0.5685, "step": 4376 }, { "epoch": 0.28, "grad_norm": 1.0836108922958374, "learning_rate": 8.480337534887093e-06, "loss": 0.5692, "step": 4377 }, { "epoch": 0.28, "grad_norm": 1.0537856817245483, "learning_rate": 8.479600822430642e-06, "loss": 0.5957, "step": 4378 }, { "epoch": 0.28, "grad_norm": 1.0087220668792725, "learning_rate": 8.478863963460306e-06, "loss": 0.5093, "step": 4379 }, { "epoch": 0.28, "grad_norm": 1.0664055347442627, "learning_rate": 8.478126958007108e-06, "loss": 0.5462, "step": 4380 }, { "epoch": 0.28, "grad_norm": 0.9335545301437378, "learning_rate": 8.477389806102085e-06, "loss": 0.5823, "step": 4381 }, { "epoch": 0.28, "grad_norm": 1.0260354280471802, "learning_rate": 8.476652507776274e-06, "loss": 0.514, "step": 4382 }, { "epoch": 0.28, "grad_norm": 1.0136860609054565, "learning_rate": 8.475915063060721e-06, "loss": 0.5885, "step": 4383 }, { "epoch": 0.28, "grad_norm": 1.0180233716964722, "learning_rate": 8.475177471986476e-06, "loss": 0.5295, "step": 4384 }, { "epoch": 0.28, "grad_norm": 1.0764186382293701, "learning_rate": 8.474439734584597e-06, "loss": 0.5504, "step": 4385 }, { "epoch": 0.28, "grad_norm": 1.021909236907959, "learning_rate": 8.473701850886147e-06, "loss": 0.5445, "step": 4386 }, { "epoch": 0.28, "grad_norm": 1.0124695301055908, "learning_rate": 8.472963820922195e-06, "loss": 0.5058, "step": 4387 }, { "epoch": 0.28, "grad_norm": 1.0495041608810425, "learning_rate": 8.47222564472382e-06, "loss": 0.5681, "step": 4388 }, { "epoch": 0.28, "grad_norm": 1.0147275924682617, "learning_rate": 8.471487322322101e-06, "loss": 0.5115, "step": 4389 }, { "epoch": 0.28, "grad_norm": 1.0069338083267212, "learning_rate": 8.47074885374813e-06, "loss": 0.58, "step": 4390 }, { "epoch": 0.28, "grad_norm": 1.0648608207702637, "learning_rate": 8.470010239032995e-06, "loss": 0.5825, "step": 4391 }, { "epoch": 0.28, "grad_norm": 1.0082082748413086, "learning_rate": 8.469271478207801e-06, "loss": 0.5018, "step": 4392 }, { "epoch": 0.28, "grad_norm": 0.9308993816375732, "learning_rate": 8.468532571303655e-06, "loss": 0.5125, "step": 4393 }, { "epoch": 0.28, "grad_norm": 1.0136820077896118, "learning_rate": 8.467793518351668e-06, "loss": 0.5703, "step": 4394 }, { "epoch": 0.28, "grad_norm": 1.1096161603927612, "learning_rate": 8.46705431938296e-06, "loss": 0.5736, "step": 4395 }, { "epoch": 0.28, "grad_norm": 0.9819938540458679, "learning_rate": 8.466314974428655e-06, "loss": 0.5676, "step": 4396 }, { "epoch": 0.28, "grad_norm": 1.0787750482559204, "learning_rate": 8.465575483519883e-06, "loss": 0.5306, "step": 4397 }, { "epoch": 0.28, "grad_norm": 1.0179063081741333, "learning_rate": 8.464835846687786e-06, "loss": 0.5864, "step": 4398 }, { "epoch": 0.28, "grad_norm": 0.9123398065567017, "learning_rate": 8.464096063963503e-06, "loss": 0.5144, "step": 4399 }, { "epoch": 0.28, "grad_norm": 1.0560660362243652, "learning_rate": 8.463356135378187e-06, "loss": 0.5308, "step": 4400 }, { "epoch": 0.28, "grad_norm": 1.008315920829773, "learning_rate": 8.462616060962992e-06, "loss": 0.5408, "step": 4401 }, { "epoch": 0.28, "grad_norm": 1.080171823501587, "learning_rate": 8.46187584074908e-06, "loss": 0.6, "step": 4402 }, { "epoch": 0.28, "grad_norm": 1.0192162990570068, "learning_rate": 8.461135474767618e-06, "loss": 0.5419, "step": 4403 }, { "epoch": 0.28, "grad_norm": 1.1443674564361572, "learning_rate": 8.460394963049784e-06, "loss": 0.5712, "step": 4404 }, { "epoch": 0.28, "grad_norm": 1.009781837463379, "learning_rate": 8.459654305626754e-06, "loss": 0.5378, "step": 4405 }, { "epoch": 0.28, "grad_norm": 0.9513635635375977, "learning_rate": 8.458913502529718e-06, "loss": 0.5402, "step": 4406 }, { "epoch": 0.28, "grad_norm": 1.040630578994751, "learning_rate": 8.458172553789866e-06, "loss": 0.5859, "step": 4407 }, { "epoch": 0.28, "grad_norm": 1.0000171661376953, "learning_rate": 8.457431459438398e-06, "loss": 0.5451, "step": 4408 }, { "epoch": 0.28, "grad_norm": 1.0893827676773071, "learning_rate": 8.456690219506519e-06, "loss": 0.5255, "step": 4409 }, { "epoch": 0.28, "grad_norm": 1.0486403703689575, "learning_rate": 8.45594883402544e-06, "loss": 0.5654, "step": 4410 }, { "epoch": 0.28, "grad_norm": 1.0393122434616089, "learning_rate": 8.455207303026378e-06, "loss": 0.5722, "step": 4411 }, { "epoch": 0.28, "grad_norm": 0.9944487810134888, "learning_rate": 8.454465626540555e-06, "loss": 0.5326, "step": 4412 }, { "epoch": 0.28, "grad_norm": 1.0593161582946777, "learning_rate": 8.453723804599203e-06, "loss": 0.5565, "step": 4413 }, { "epoch": 0.28, "grad_norm": 1.0495504140853882, "learning_rate": 8.452981837233555e-06, "loss": 0.5774, "step": 4414 }, { "epoch": 0.28, "grad_norm": 1.0957008600234985, "learning_rate": 8.452239724474856e-06, "loss": 0.5729, "step": 4415 }, { "epoch": 0.28, "grad_norm": 1.020525574684143, "learning_rate": 8.451497466354349e-06, "loss": 0.5112, "step": 4416 }, { "epoch": 0.28, "grad_norm": 1.0432324409484863, "learning_rate": 8.450755062903293e-06, "loss": 0.5541, "step": 4417 }, { "epoch": 0.28, "grad_norm": 1.0233687162399292, "learning_rate": 8.450012514152943e-06, "loss": 0.5548, "step": 4418 }, { "epoch": 0.28, "grad_norm": 0.9818899631500244, "learning_rate": 8.44926982013457e-06, "loss": 0.5535, "step": 4419 }, { "epoch": 0.28, "grad_norm": 1.0102336406707764, "learning_rate": 8.448526980879444e-06, "loss": 0.5317, "step": 4420 }, { "epoch": 0.28, "grad_norm": 1.1956027746200562, "learning_rate": 8.447783996418843e-06, "loss": 0.6164, "step": 4421 }, { "epoch": 0.28, "grad_norm": 1.0055028200149536, "learning_rate": 8.447040866784051e-06, "loss": 0.5325, "step": 4422 }, { "epoch": 0.28, "grad_norm": 1.037794828414917, "learning_rate": 8.446297592006361e-06, "loss": 0.4994, "step": 4423 }, { "epoch": 0.28, "grad_norm": 1.0306740999221802, "learning_rate": 8.445554172117066e-06, "loss": 0.5552, "step": 4424 }, { "epoch": 0.28, "grad_norm": 1.0212527513504028, "learning_rate": 8.444810607147472e-06, "loss": 0.5188, "step": 4425 }, { "epoch": 0.28, "grad_norm": 0.9898123741149902, "learning_rate": 8.444066897128888e-06, "loss": 0.516, "step": 4426 }, { "epoch": 0.28, "grad_norm": 1.4958157539367676, "learning_rate": 8.443323042092625e-06, "loss": 0.5338, "step": 4427 }, { "epoch": 0.28, "grad_norm": 1.0361689329147339, "learning_rate": 8.442579042070011e-06, "loss": 0.5656, "step": 4428 }, { "epoch": 0.28, "grad_norm": 0.9662915468215942, "learning_rate": 8.441834897092366e-06, "loss": 0.5384, "step": 4429 }, { "epoch": 0.28, "grad_norm": 1.1028213500976562, "learning_rate": 8.44109060719103e-06, "loss": 0.5347, "step": 4430 }, { "epoch": 0.28, "grad_norm": 1.0823792219161987, "learning_rate": 8.440346172397338e-06, "loss": 0.6062, "step": 4431 }, { "epoch": 0.28, "grad_norm": 1.0345669984817505, "learning_rate": 8.439601592742637e-06, "loss": 0.6027, "step": 4432 }, { "epoch": 0.28, "grad_norm": 1.0370168685913086, "learning_rate": 8.438856868258278e-06, "loss": 0.5847, "step": 4433 }, { "epoch": 0.28, "grad_norm": 0.9230498671531677, "learning_rate": 8.438111998975618e-06, "loss": 0.5397, "step": 4434 }, { "epoch": 0.28, "grad_norm": 1.0350542068481445, "learning_rate": 8.437366984926023e-06, "loss": 0.5518, "step": 4435 }, { "epoch": 0.28, "grad_norm": 0.9938403367996216, "learning_rate": 8.436621826140863e-06, "loss": 0.5528, "step": 4436 }, { "epoch": 0.28, "grad_norm": 1.030972957611084, "learning_rate": 8.435876522651512e-06, "loss": 0.6143, "step": 4437 }, { "epoch": 0.28, "grad_norm": 1.1263898611068726, "learning_rate": 8.435131074489353e-06, "loss": 0.6138, "step": 4438 }, { "epoch": 0.28, "grad_norm": 0.985565721988678, "learning_rate": 8.434385481685776e-06, "loss": 0.5098, "step": 4439 }, { "epoch": 0.28, "grad_norm": 1.0614559650421143, "learning_rate": 8.43363974427217e-06, "loss": 0.5787, "step": 4440 }, { "epoch": 0.28, "grad_norm": 1.0057392120361328, "learning_rate": 8.432893862279943e-06, "loss": 0.5294, "step": 4441 }, { "epoch": 0.28, "grad_norm": 1.033789038658142, "learning_rate": 8.432147835740496e-06, "loss": 0.5534, "step": 4442 }, { "epoch": 0.28, "grad_norm": 1.0420987606048584, "learning_rate": 8.431401664685244e-06, "loss": 0.5472, "step": 4443 }, { "epoch": 0.28, "grad_norm": 1.0862915515899658, "learning_rate": 8.430655349145604e-06, "loss": 0.5556, "step": 4444 }, { "epoch": 0.28, "grad_norm": 1.0685113668441772, "learning_rate": 8.429908889153003e-06, "loss": 0.596, "step": 4445 }, { "epoch": 0.28, "grad_norm": 0.9933927655220032, "learning_rate": 8.429162284738868e-06, "loss": 0.5705, "step": 4446 }, { "epoch": 0.28, "grad_norm": 0.9709452390670776, "learning_rate": 8.42841553593464e-06, "loss": 0.5502, "step": 4447 }, { "epoch": 0.28, "grad_norm": 1.0400898456573486, "learning_rate": 8.42766864277176e-06, "loss": 0.5348, "step": 4448 }, { "epoch": 0.28, "grad_norm": 0.9806199669837952, "learning_rate": 8.426921605281677e-06, "loss": 0.5603, "step": 4449 }, { "epoch": 0.28, "grad_norm": 1.0576506853103638, "learning_rate": 8.426174423495848e-06, "loss": 0.5572, "step": 4450 }, { "epoch": 0.28, "grad_norm": 1.0964463949203491, "learning_rate": 8.425427097445733e-06, "loss": 0.5739, "step": 4451 }, { "epoch": 0.28, "grad_norm": 1.0517578125, "learning_rate": 8.424679627162798e-06, "loss": 0.5891, "step": 4452 }, { "epoch": 0.28, "grad_norm": 0.9388140439987183, "learning_rate": 8.423932012678516e-06, "loss": 0.5163, "step": 4453 }, { "epoch": 0.28, "grad_norm": 1.0317274332046509, "learning_rate": 8.42318425402437e-06, "loss": 0.6287, "step": 4454 }, { "epoch": 0.28, "grad_norm": 0.9393893480300903, "learning_rate": 8.422436351231843e-06, "loss": 0.5292, "step": 4455 }, { "epoch": 0.28, "grad_norm": 0.9703336358070374, "learning_rate": 8.421688304332428e-06, "loss": 0.5127, "step": 4456 }, { "epoch": 0.28, "grad_norm": 1.0111799240112305, "learning_rate": 8.42094011335762e-06, "loss": 0.51, "step": 4457 }, { "epoch": 0.28, "grad_norm": 1.0073695182800293, "learning_rate": 8.420191778338924e-06, "loss": 0.5102, "step": 4458 }, { "epoch": 0.28, "grad_norm": 0.9900495409965515, "learning_rate": 8.419443299307852e-06, "loss": 0.5851, "step": 4459 }, { "epoch": 0.28, "grad_norm": 1.0441724061965942, "learning_rate": 8.418694676295918e-06, "loss": 0.5308, "step": 4460 }, { "epoch": 0.28, "grad_norm": 0.9689183235168457, "learning_rate": 8.417945909334642e-06, "loss": 0.5456, "step": 4461 }, { "epoch": 0.28, "grad_norm": 1.0017889738082886, "learning_rate": 8.417196998455555e-06, "loss": 0.5471, "step": 4462 }, { "epoch": 0.28, "grad_norm": 1.0057268142700195, "learning_rate": 8.41644794369019e-06, "loss": 0.5082, "step": 4463 }, { "epoch": 0.28, "grad_norm": 0.9479397535324097, "learning_rate": 8.415698745070088e-06, "loss": 0.5388, "step": 4464 }, { "epoch": 0.28, "grad_norm": 1.0385222434997559, "learning_rate": 8.414949402626793e-06, "loss": 0.5791, "step": 4465 }, { "epoch": 0.28, "grad_norm": 0.9783814549446106, "learning_rate": 8.41419991639186e-06, "loss": 0.5192, "step": 4466 }, { "epoch": 0.28, "grad_norm": 1.1291273832321167, "learning_rate": 8.413450286396845e-06, "loss": 0.5347, "step": 4467 }, { "epoch": 0.28, "grad_norm": 0.8909294009208679, "learning_rate": 8.41270051267331e-06, "loss": 0.5149, "step": 4468 }, { "epoch": 0.28, "grad_norm": 1.0434598922729492, "learning_rate": 8.411950595252834e-06, "loss": 0.5483, "step": 4469 }, { "epoch": 0.28, "grad_norm": 1.0784733295440674, "learning_rate": 8.411200534166983e-06, "loss": 0.6113, "step": 4470 }, { "epoch": 0.28, "grad_norm": 1.080352783203125, "learning_rate": 8.410450329447346e-06, "loss": 0.5783, "step": 4471 }, { "epoch": 0.28, "grad_norm": 1.0519651174545288, "learning_rate": 8.409699981125509e-06, "loss": 0.591, "step": 4472 }, { "epoch": 0.28, "grad_norm": 1.0252313613891602, "learning_rate": 8.408949489233068e-06, "loss": 0.5828, "step": 4473 }, { "epoch": 0.28, "grad_norm": 0.9834392666816711, "learning_rate": 8.408198853801623e-06, "loss": 0.5373, "step": 4474 }, { "epoch": 0.28, "grad_norm": 1.0300211906433105, "learning_rate": 8.40744807486278e-06, "loss": 0.5858, "step": 4475 }, { "epoch": 0.28, "grad_norm": 0.9656156301498413, "learning_rate": 8.406697152448152e-06, "loss": 0.5426, "step": 4476 }, { "epoch": 0.28, "grad_norm": 0.9705562591552734, "learning_rate": 8.405946086589359e-06, "loss": 0.5428, "step": 4477 }, { "epoch": 0.28, "grad_norm": 1.0767998695373535, "learning_rate": 8.405194877318023e-06, "loss": 0.6171, "step": 4478 }, { "epoch": 0.28, "grad_norm": 0.9853762984275818, "learning_rate": 8.404443524665777e-06, "loss": 0.5877, "step": 4479 }, { "epoch": 0.28, "grad_norm": 0.9431714415550232, "learning_rate": 8.40369202866426e-06, "loss": 0.5145, "step": 4480 }, { "epoch": 0.28, "grad_norm": 1.0816575288772583, "learning_rate": 8.40294038934511e-06, "loss": 0.5858, "step": 4481 }, { "epoch": 0.28, "grad_norm": 0.9906498193740845, "learning_rate": 8.402188606739977e-06, "loss": 0.5379, "step": 4482 }, { "epoch": 0.28, "grad_norm": 1.089381217956543, "learning_rate": 8.401436680880518e-06, "loss": 0.5514, "step": 4483 }, { "epoch": 0.28, "grad_norm": 1.0199429988861084, "learning_rate": 8.400684611798395e-06, "loss": 0.5221, "step": 4484 }, { "epoch": 0.28, "grad_norm": 0.9694374799728394, "learning_rate": 8.39993239952527e-06, "loss": 0.5128, "step": 4485 }, { "epoch": 0.28, "grad_norm": 1.0346933603286743, "learning_rate": 8.399180044092821e-06, "loss": 0.5198, "step": 4486 }, { "epoch": 0.28, "grad_norm": 1.1518710851669312, "learning_rate": 8.398427545532726e-06, "loss": 0.5294, "step": 4487 }, { "epoch": 0.28, "grad_norm": 1.0096625089645386, "learning_rate": 8.397674903876667e-06, "loss": 0.5433, "step": 4488 }, { "epoch": 0.28, "grad_norm": 1.0959570407867432, "learning_rate": 8.396922119156339e-06, "loss": 0.5901, "step": 4489 }, { "epoch": 0.28, "grad_norm": 1.0759719610214233, "learning_rate": 8.396169191403438e-06, "loss": 0.5404, "step": 4490 }, { "epoch": 0.28, "grad_norm": 0.9815802574157715, "learning_rate": 8.395416120649667e-06, "loss": 0.5193, "step": 4491 }, { "epoch": 0.28, "grad_norm": 0.9859442710876465, "learning_rate": 8.394662906926734e-06, "loss": 0.5087, "step": 4492 }, { "epoch": 0.28, "grad_norm": 0.9523860812187195, "learning_rate": 8.393909550266354e-06, "loss": 0.5617, "step": 4493 }, { "epoch": 0.28, "grad_norm": 0.9746415615081787, "learning_rate": 8.393156050700252e-06, "loss": 0.5675, "step": 4494 }, { "epoch": 0.28, "grad_norm": 1.037996768951416, "learning_rate": 8.39240240826015e-06, "loss": 0.5698, "step": 4495 }, { "epoch": 0.28, "grad_norm": 1.1229304075241089, "learning_rate": 8.391648622977787e-06, "loss": 0.6012, "step": 4496 }, { "epoch": 0.28, "grad_norm": 0.9897786378860474, "learning_rate": 8.390894694884896e-06, "loss": 0.5329, "step": 4497 }, { "epoch": 0.28, "grad_norm": 1.0292725563049316, "learning_rate": 8.390140624013228e-06, "loss": 0.5462, "step": 4498 }, { "epoch": 0.29, "grad_norm": 1.0851386785507202, "learning_rate": 8.38938641039453e-06, "loss": 0.589, "step": 4499 }, { "epoch": 0.29, "grad_norm": 0.9658882021903992, "learning_rate": 8.388632054060562e-06, "loss": 0.5191, "step": 4500 }, { "epoch": 0.29, "grad_norm": 1.016357660293579, "learning_rate": 8.387877555043086e-06, "loss": 0.5857, "step": 4501 }, { "epoch": 0.29, "grad_norm": 0.9577678442001343, "learning_rate": 8.38712291337387e-06, "loss": 0.5332, "step": 4502 }, { "epoch": 0.29, "grad_norm": 1.0035228729248047, "learning_rate": 8.386368129084695e-06, "loss": 0.5338, "step": 4503 }, { "epoch": 0.29, "grad_norm": 1.148341178894043, "learning_rate": 8.385613202207336e-06, "loss": 0.5602, "step": 4504 }, { "epoch": 0.29, "grad_norm": 0.95160973072052, "learning_rate": 8.384858132773582e-06, "loss": 0.5396, "step": 4505 }, { "epoch": 0.29, "grad_norm": 1.062878131866455, "learning_rate": 8.38410292081523e-06, "loss": 0.5118, "step": 4506 }, { "epoch": 0.29, "grad_norm": 0.9612262845039368, "learning_rate": 8.383347566364072e-06, "loss": 0.508, "step": 4507 }, { "epoch": 0.29, "grad_norm": 1.0526068210601807, "learning_rate": 8.38259206945192e-06, "loss": 0.5425, "step": 4508 }, { "epoch": 0.29, "grad_norm": 1.0237740278244019, "learning_rate": 8.381836430110585e-06, "loss": 0.5376, "step": 4509 }, { "epoch": 0.29, "grad_norm": 1.0377681255340576, "learning_rate": 8.38108064837188e-06, "loss": 0.5819, "step": 4510 }, { "epoch": 0.29, "grad_norm": 0.9746587872505188, "learning_rate": 8.380324724267631e-06, "loss": 0.5514, "step": 4511 }, { "epoch": 0.29, "grad_norm": 0.966952383518219, "learning_rate": 8.379568657829669e-06, "loss": 0.5774, "step": 4512 }, { "epoch": 0.29, "grad_norm": 0.9846550226211548, "learning_rate": 8.378812449089826e-06, "loss": 0.5439, "step": 4513 }, { "epoch": 0.29, "grad_norm": 1.055591106414795, "learning_rate": 8.378056098079946e-06, "loss": 0.594, "step": 4514 }, { "epoch": 0.29, "grad_norm": 1.0931600332260132, "learning_rate": 8.377299604831875e-06, "loss": 0.5985, "step": 4515 }, { "epoch": 0.29, "grad_norm": 0.9879661202430725, "learning_rate": 8.376542969377465e-06, "loss": 0.5831, "step": 4516 }, { "epoch": 0.29, "grad_norm": 1.0338890552520752, "learning_rate": 8.375786191748578e-06, "loss": 0.5988, "step": 4517 }, { "epoch": 0.29, "grad_norm": 1.1263805627822876, "learning_rate": 8.375029271977076e-06, "loss": 0.6065, "step": 4518 }, { "epoch": 0.29, "grad_norm": 0.9827618598937988, "learning_rate": 8.374272210094834e-06, "loss": 0.5548, "step": 4519 }, { "epoch": 0.29, "grad_norm": 0.9819245934486389, "learning_rate": 8.373515006133728e-06, "loss": 0.5061, "step": 4520 }, { "epoch": 0.29, "grad_norm": 0.9984309673309326, "learning_rate": 8.372757660125639e-06, "loss": 0.4948, "step": 4521 }, { "epoch": 0.29, "grad_norm": 1.0824671983718872, "learning_rate": 8.372000172102459e-06, "loss": 0.585, "step": 4522 }, { "epoch": 0.29, "grad_norm": 1.0909574031829834, "learning_rate": 8.37124254209608e-06, "loss": 0.5601, "step": 4523 }, { "epoch": 0.29, "grad_norm": 0.9879238605499268, "learning_rate": 8.370484770138407e-06, "loss": 0.5791, "step": 4524 }, { "epoch": 0.29, "grad_norm": 1.0516186952590942, "learning_rate": 8.369726856261346e-06, "loss": 0.5755, "step": 4525 }, { "epoch": 0.29, "grad_norm": 1.0363621711730957, "learning_rate": 8.36896880049681e-06, "loss": 0.5142, "step": 4526 }, { "epoch": 0.29, "grad_norm": 1.0198794603347778, "learning_rate": 8.368210602876716e-06, "loss": 0.5286, "step": 4527 }, { "epoch": 0.29, "grad_norm": 0.9703680872917175, "learning_rate": 8.36745226343299e-06, "loss": 0.5467, "step": 4528 }, { "epoch": 0.29, "grad_norm": 1.0106040239334106, "learning_rate": 8.366693782197566e-06, "loss": 0.5444, "step": 4529 }, { "epoch": 0.29, "grad_norm": 1.034140944480896, "learning_rate": 8.365935159202378e-06, "loss": 0.5487, "step": 4530 }, { "epoch": 0.29, "grad_norm": 1.0963879823684692, "learning_rate": 8.365176394479368e-06, "loss": 0.573, "step": 4531 }, { "epoch": 0.29, "grad_norm": 0.983534038066864, "learning_rate": 8.364417488060488e-06, "loss": 0.5616, "step": 4532 }, { "epoch": 0.29, "grad_norm": 1.0498030185699463, "learning_rate": 8.363658439977693e-06, "loss": 0.5222, "step": 4533 }, { "epoch": 0.29, "grad_norm": 0.9815921187400818, "learning_rate": 8.36289925026294e-06, "loss": 0.5376, "step": 4534 }, { "epoch": 0.29, "grad_norm": 0.9987731575965881, "learning_rate": 8.362139918948198e-06, "loss": 0.5405, "step": 4535 }, { "epoch": 0.29, "grad_norm": 1.0733778476715088, "learning_rate": 8.36138044606544e-06, "loss": 0.5804, "step": 4536 }, { "epoch": 0.29, "grad_norm": 1.0026512145996094, "learning_rate": 8.360620831646647e-06, "loss": 0.5573, "step": 4537 }, { "epoch": 0.29, "grad_norm": 0.978951096534729, "learning_rate": 8.359861075723801e-06, "loss": 0.5474, "step": 4538 }, { "epoch": 0.29, "grad_norm": 1.0572445392608643, "learning_rate": 8.359101178328893e-06, "loss": 0.5784, "step": 4539 }, { "epoch": 0.29, "grad_norm": 0.9558642506599426, "learning_rate": 8.358341139493919e-06, "loss": 0.5252, "step": 4540 }, { "epoch": 0.29, "grad_norm": 1.0329904556274414, "learning_rate": 8.357580959250882e-06, "loss": 0.5189, "step": 4541 }, { "epoch": 0.29, "grad_norm": 1.0733036994934082, "learning_rate": 8.356820637631792e-06, "loss": 0.5723, "step": 4542 }, { "epoch": 0.29, "grad_norm": 1.063766360282898, "learning_rate": 8.356060174668663e-06, "loss": 0.5409, "step": 4543 }, { "epoch": 0.29, "grad_norm": 0.9099135398864746, "learning_rate": 8.355299570393515e-06, "loss": 0.4935, "step": 4544 }, { "epoch": 0.29, "grad_norm": 0.9763380289077759, "learning_rate": 8.354538824838373e-06, "loss": 0.5566, "step": 4545 }, { "epoch": 0.29, "grad_norm": 0.9359689950942993, "learning_rate": 8.353777938035272e-06, "loss": 0.5214, "step": 4546 }, { "epoch": 0.29, "grad_norm": 0.9775251746177673, "learning_rate": 8.353016910016247e-06, "loss": 0.541, "step": 4547 }, { "epoch": 0.29, "grad_norm": 1.0470110177993774, "learning_rate": 8.352255740813347e-06, "loss": 0.5642, "step": 4548 }, { "epoch": 0.29, "grad_norm": 0.9434897303581238, "learning_rate": 8.351494430458617e-06, "loss": 0.5199, "step": 4549 }, { "epoch": 0.29, "grad_norm": 1.0338044166564941, "learning_rate": 8.350732978984116e-06, "loss": 0.5699, "step": 4550 }, { "epoch": 0.29, "grad_norm": 0.9947142601013184, "learning_rate": 8.349971386421906e-06, "loss": 0.556, "step": 4551 }, { "epoch": 0.29, "grad_norm": 0.9455569386482239, "learning_rate": 8.349209652804055e-06, "loss": 0.5067, "step": 4552 }, { "epoch": 0.29, "grad_norm": 0.9736185073852539, "learning_rate": 8.348447778162636e-06, "loss": 0.5689, "step": 4553 }, { "epoch": 0.29, "grad_norm": 1.0313166379928589, "learning_rate": 8.347685762529729e-06, "loss": 0.5833, "step": 4554 }, { "epoch": 0.29, "grad_norm": 0.996852457523346, "learning_rate": 8.34692360593742e-06, "loss": 0.5084, "step": 4555 }, { "epoch": 0.29, "grad_norm": 1.0415087938308716, "learning_rate": 8.346161308417805e-06, "loss": 0.5731, "step": 4556 }, { "epoch": 0.29, "grad_norm": 0.9775435924530029, "learning_rate": 8.345398870002972e-06, "loss": 0.5587, "step": 4557 }, { "epoch": 0.29, "grad_norm": 1.0303115844726562, "learning_rate": 8.344636290725035e-06, "loss": 0.5762, "step": 4558 }, { "epoch": 0.29, "grad_norm": 0.9830759167671204, "learning_rate": 8.343873570616097e-06, "loss": 0.5606, "step": 4559 }, { "epoch": 0.29, "grad_norm": 1.0046902894973755, "learning_rate": 8.343110709708275e-06, "loss": 0.5648, "step": 4560 }, { "epoch": 0.29, "grad_norm": 1.0001426935195923, "learning_rate": 8.342347708033692e-06, "loss": 0.5636, "step": 4561 }, { "epoch": 0.29, "grad_norm": 1.034018874168396, "learning_rate": 8.341584565624471e-06, "loss": 0.5831, "step": 4562 }, { "epoch": 0.29, "grad_norm": 0.9828287363052368, "learning_rate": 8.340821282512753e-06, "loss": 0.5553, "step": 4563 }, { "epoch": 0.29, "grad_norm": 1.0107008218765259, "learning_rate": 8.34005785873067e-06, "loss": 0.5308, "step": 4564 }, { "epoch": 0.29, "grad_norm": 0.993648111820221, "learning_rate": 8.339294294310371e-06, "loss": 0.559, "step": 4565 }, { "epoch": 0.29, "grad_norm": 0.9884464740753174, "learning_rate": 8.338530589284005e-06, "loss": 0.5824, "step": 4566 }, { "epoch": 0.29, "grad_norm": 0.9992973804473877, "learning_rate": 8.33776674368373e-06, "loss": 0.4819, "step": 4567 }, { "epoch": 0.29, "grad_norm": 1.0691200494766235, "learning_rate": 8.337002757541708e-06, "loss": 0.6044, "step": 4568 }, { "epoch": 0.29, "grad_norm": 0.9904386401176453, "learning_rate": 8.33623863089011e-06, "loss": 0.529, "step": 4569 }, { "epoch": 0.29, "grad_norm": 0.9538317918777466, "learning_rate": 8.335474363761109e-06, "loss": 0.5066, "step": 4570 }, { "epoch": 0.29, "grad_norm": 1.0342077016830444, "learning_rate": 8.334709956186884e-06, "loss": 0.55, "step": 4571 }, { "epoch": 0.29, "grad_norm": 0.9945910573005676, "learning_rate": 8.333945408199624e-06, "loss": 0.5278, "step": 4572 }, { "epoch": 0.29, "grad_norm": 1.0571881532669067, "learning_rate": 8.333180719831521e-06, "loss": 0.5608, "step": 4573 }, { "epoch": 0.29, "grad_norm": 1.0826531648635864, "learning_rate": 8.332415891114774e-06, "loss": 0.5684, "step": 4574 }, { "epoch": 0.29, "grad_norm": 1.0074824094772339, "learning_rate": 8.331650922081586e-06, "loss": 0.5156, "step": 4575 }, { "epoch": 0.29, "grad_norm": 1.0355405807495117, "learning_rate": 8.330885812764168e-06, "loss": 0.5606, "step": 4576 }, { "epoch": 0.29, "grad_norm": 0.9580333232879639, "learning_rate": 8.330120563194736e-06, "loss": 0.5699, "step": 4577 }, { "epoch": 0.29, "grad_norm": 1.0509356260299683, "learning_rate": 8.32935517340551e-06, "loss": 0.5693, "step": 4578 }, { "epoch": 0.29, "grad_norm": 0.944019079208374, "learning_rate": 8.328589643428722e-06, "loss": 0.534, "step": 4579 }, { "epoch": 0.29, "grad_norm": 1.0960191488265991, "learning_rate": 8.327823973296601e-06, "loss": 0.5914, "step": 4580 }, { "epoch": 0.29, "grad_norm": 1.0410923957824707, "learning_rate": 8.32705816304139e-06, "loss": 0.5643, "step": 4581 }, { "epoch": 0.29, "grad_norm": 1.0160187482833862, "learning_rate": 8.326292212695335e-06, "loss": 0.5954, "step": 4582 }, { "epoch": 0.29, "grad_norm": 1.0226081609725952, "learning_rate": 8.325526122290685e-06, "loss": 0.5585, "step": 4583 }, { "epoch": 0.29, "grad_norm": 0.9550665020942688, "learning_rate": 8.3247598918597e-06, "loss": 0.5219, "step": 4584 }, { "epoch": 0.29, "grad_norm": 1.024646282196045, "learning_rate": 8.323993521434639e-06, "loss": 0.5408, "step": 4585 }, { "epoch": 0.29, "grad_norm": 0.9836820960044861, "learning_rate": 8.323227011047777e-06, "loss": 0.537, "step": 4586 }, { "epoch": 0.29, "grad_norm": 0.9069527387619019, "learning_rate": 8.322460360731386e-06, "loss": 0.5902, "step": 4587 }, { "epoch": 0.29, "grad_norm": 0.9728847742080688, "learning_rate": 8.321693570517745e-06, "loss": 0.5593, "step": 4588 }, { "epoch": 0.29, "grad_norm": 1.0425338745117188, "learning_rate": 8.320926640439145e-06, "loss": 0.5218, "step": 4589 }, { "epoch": 0.29, "grad_norm": 0.983156681060791, "learning_rate": 8.320159570527876e-06, "loss": 0.5384, "step": 4590 }, { "epoch": 0.29, "grad_norm": 0.932736873626709, "learning_rate": 8.319392360816239e-06, "loss": 0.536, "step": 4591 }, { "epoch": 0.29, "grad_norm": 0.9952834844589233, "learning_rate": 8.318625011336533e-06, "loss": 0.5843, "step": 4592 }, { "epoch": 0.29, "grad_norm": 1.061063289642334, "learning_rate": 8.317857522121078e-06, "loss": 0.5549, "step": 4593 }, { "epoch": 0.29, "grad_norm": 1.0253628492355347, "learning_rate": 8.317089893202181e-06, "loss": 0.5438, "step": 4594 }, { "epoch": 0.29, "grad_norm": 0.9959260821342468, "learning_rate": 8.316322124612169e-06, "loss": 0.6049, "step": 4595 }, { "epoch": 0.29, "grad_norm": 1.0181363821029663, "learning_rate": 8.315554216383368e-06, "loss": 0.5881, "step": 4596 }, { "epoch": 0.29, "grad_norm": 1.103354811668396, "learning_rate": 8.314786168548115e-06, "loss": 0.5834, "step": 4597 }, { "epoch": 0.29, "grad_norm": 1.0442967414855957, "learning_rate": 8.314017981138746e-06, "loss": 0.5289, "step": 4598 }, { "epoch": 0.29, "grad_norm": 1.093725562095642, "learning_rate": 8.31324965418761e-06, "loss": 0.5644, "step": 4599 }, { "epoch": 0.29, "grad_norm": 1.0241702795028687, "learning_rate": 8.312481187727055e-06, "loss": 0.5611, "step": 4600 }, { "epoch": 0.29, "grad_norm": 0.9719318151473999, "learning_rate": 8.311712581789442e-06, "loss": 0.4764, "step": 4601 }, { "epoch": 0.29, "grad_norm": 1.0587811470031738, "learning_rate": 8.310943836407132e-06, "loss": 0.5606, "step": 4602 }, { "epoch": 0.29, "grad_norm": 1.0955878496170044, "learning_rate": 8.310174951612495e-06, "loss": 0.5183, "step": 4603 }, { "epoch": 0.29, "grad_norm": 1.0315818786621094, "learning_rate": 8.309405927437906e-06, "loss": 0.4809, "step": 4604 }, { "epoch": 0.29, "grad_norm": 1.043058156967163, "learning_rate": 8.308636763915746e-06, "loss": 0.559, "step": 4605 }, { "epoch": 0.29, "grad_norm": 1.0042216777801514, "learning_rate": 8.307867461078402e-06, "loss": 0.5702, "step": 4606 }, { "epoch": 0.29, "grad_norm": 1.0011327266693115, "learning_rate": 8.307098018958266e-06, "loss": 0.5797, "step": 4607 }, { "epoch": 0.29, "grad_norm": 0.989730954170227, "learning_rate": 8.306328437587738e-06, "loss": 0.5656, "step": 4608 }, { "epoch": 0.29, "grad_norm": 1.0619161128997803, "learning_rate": 8.305558716999221e-06, "loss": 0.5523, "step": 4609 }, { "epoch": 0.29, "grad_norm": 1.0472158193588257, "learning_rate": 8.304788857225126e-06, "loss": 0.5554, "step": 4610 }, { "epoch": 0.29, "grad_norm": 1.0451831817626953, "learning_rate": 8.304018858297867e-06, "loss": 0.537, "step": 4611 }, { "epoch": 0.29, "grad_norm": 1.0356922149658203, "learning_rate": 8.30324872024987e-06, "loss": 0.549, "step": 4612 }, { "epoch": 0.29, "grad_norm": 1.051112413406372, "learning_rate": 8.30247844311356e-06, "loss": 0.4971, "step": 4613 }, { "epoch": 0.29, "grad_norm": 1.0303791761398315, "learning_rate": 8.301708026921371e-06, "loss": 0.5841, "step": 4614 }, { "epoch": 0.29, "grad_norm": 1.0167726278305054, "learning_rate": 8.300937471705742e-06, "loss": 0.5971, "step": 4615 }, { "epoch": 0.29, "grad_norm": 0.9281720519065857, "learning_rate": 8.300166777499119e-06, "loss": 0.5534, "step": 4616 }, { "epoch": 0.29, "grad_norm": 1.017045497894287, "learning_rate": 8.299395944333955e-06, "loss": 0.6104, "step": 4617 }, { "epoch": 0.29, "grad_norm": 0.9842953085899353, "learning_rate": 8.298624972242704e-06, "loss": 0.5341, "step": 4618 }, { "epoch": 0.29, "grad_norm": 1.0903747081756592, "learning_rate": 8.297853861257831e-06, "loss": 0.5049, "step": 4619 }, { "epoch": 0.29, "grad_norm": 0.9999139904975891, "learning_rate": 8.297082611411805e-06, "loss": 0.5475, "step": 4620 }, { "epoch": 0.29, "grad_norm": 0.9975643157958984, "learning_rate": 8.296311222737099e-06, "loss": 0.511, "step": 4621 }, { "epoch": 0.29, "grad_norm": 1.0004945993423462, "learning_rate": 8.295539695266195e-06, "loss": 0.5207, "step": 4622 }, { "epoch": 0.29, "grad_norm": 0.919409453868866, "learning_rate": 8.29476802903158e-06, "loss": 0.5177, "step": 4623 }, { "epoch": 0.29, "grad_norm": 0.9774051904678345, "learning_rate": 8.293996224065742e-06, "loss": 0.5436, "step": 4624 }, { "epoch": 0.29, "grad_norm": 0.9625217318534851, "learning_rate": 8.293224280401185e-06, "loss": 0.5572, "step": 4625 }, { "epoch": 0.29, "grad_norm": 1.027803659439087, "learning_rate": 8.292452198070406e-06, "loss": 0.5563, "step": 4626 }, { "epoch": 0.29, "grad_norm": 0.9124547839164734, "learning_rate": 8.291679977105922e-06, "loss": 0.5344, "step": 4627 }, { "epoch": 0.29, "grad_norm": 0.9668528437614441, "learning_rate": 8.290907617540244e-06, "loss": 0.5308, "step": 4628 }, { "epoch": 0.29, "grad_norm": 1.0439895391464233, "learning_rate": 8.290135119405894e-06, "loss": 0.5132, "step": 4629 }, { "epoch": 0.29, "grad_norm": 0.924747109413147, "learning_rate": 8.2893624827354e-06, "loss": 0.5168, "step": 4630 }, { "epoch": 0.29, "grad_norm": 0.9718056321144104, "learning_rate": 8.288589707561295e-06, "loss": 0.5367, "step": 4631 }, { "epoch": 0.29, "grad_norm": 1.003222107887268, "learning_rate": 8.287816793916119e-06, "loss": 0.5405, "step": 4632 }, { "epoch": 0.29, "grad_norm": 1.0253312587738037, "learning_rate": 8.287043741832412e-06, "loss": 0.5878, "step": 4633 }, { "epoch": 0.29, "grad_norm": 1.0082941055297852, "learning_rate": 8.28627055134273e-06, "loss": 0.4953, "step": 4634 }, { "epoch": 0.29, "grad_norm": 0.9098595976829529, "learning_rate": 8.285497222479626e-06, "loss": 0.516, "step": 4635 }, { "epoch": 0.29, "grad_norm": 0.9030443429946899, "learning_rate": 8.284723755275666e-06, "loss": 0.5401, "step": 4636 }, { "epoch": 0.29, "grad_norm": 0.9865958094596863, "learning_rate": 8.283950149763413e-06, "loss": 0.5145, "step": 4637 }, { "epoch": 0.29, "grad_norm": 0.9161219000816345, "learning_rate": 8.283176405975444e-06, "loss": 0.5248, "step": 4638 }, { "epoch": 0.29, "grad_norm": 1.0371172428131104, "learning_rate": 8.282402523944338e-06, "loss": 0.5267, "step": 4639 }, { "epoch": 0.29, "grad_norm": 0.9949166774749756, "learning_rate": 8.28162850370268e-06, "loss": 0.5002, "step": 4640 }, { "epoch": 0.29, "grad_norm": 1.0591098070144653, "learning_rate": 8.28085434528306e-06, "loss": 0.5958, "step": 4641 }, { "epoch": 0.29, "grad_norm": 1.0102256536483765, "learning_rate": 8.28008004871808e-06, "loss": 0.5246, "step": 4642 }, { "epoch": 0.29, "grad_norm": 1.0576274394989014, "learning_rate": 8.279305614040337e-06, "loss": 0.541, "step": 4643 }, { "epoch": 0.29, "grad_norm": 1.0965458154678345, "learning_rate": 8.278531041282445e-06, "loss": 0.5362, "step": 4644 }, { "epoch": 0.29, "grad_norm": 1.045287847518921, "learning_rate": 8.277756330477013e-06, "loss": 0.562, "step": 4645 }, { "epoch": 0.29, "grad_norm": 1.0357410907745361, "learning_rate": 8.276981481656668e-06, "loss": 0.5587, "step": 4646 }, { "epoch": 0.29, "grad_norm": 1.0280876159667969, "learning_rate": 8.276206494854029e-06, "loss": 0.5946, "step": 4647 }, { "epoch": 0.29, "grad_norm": 0.971557080745697, "learning_rate": 8.275431370101734e-06, "loss": 0.5241, "step": 4648 }, { "epoch": 0.29, "grad_norm": 1.0403517484664917, "learning_rate": 8.274656107432418e-06, "loss": 0.5568, "step": 4649 }, { "epoch": 0.29, "grad_norm": 1.0311771631240845, "learning_rate": 8.273880706878724e-06, "loss": 0.5656, "step": 4650 }, { "epoch": 0.29, "grad_norm": 1.1487035751342773, "learning_rate": 8.273105168473304e-06, "loss": 0.536, "step": 4651 }, { "epoch": 0.29, "grad_norm": 1.0901763439178467, "learning_rate": 8.27232949224881e-06, "loss": 0.5805, "step": 4652 }, { "epoch": 0.29, "grad_norm": 1.0801087617874146, "learning_rate": 8.271553678237904e-06, "loss": 0.5626, "step": 4653 }, { "epoch": 0.29, "grad_norm": 1.0637428760528564, "learning_rate": 8.270777726473256e-06, "loss": 0.5283, "step": 4654 }, { "epoch": 0.29, "grad_norm": 0.9966744184494019, "learning_rate": 8.270001636987535e-06, "loss": 0.5295, "step": 4655 }, { "epoch": 0.29, "grad_norm": 1.013501524925232, "learning_rate": 8.26922540981342e-06, "loss": 0.5601, "step": 4656 }, { "epoch": 0.3, "grad_norm": 1.0766243934631348, "learning_rate": 8.268449044983598e-06, "loss": 0.5249, "step": 4657 }, { "epoch": 0.3, "grad_norm": 0.9005308151245117, "learning_rate": 8.267672542530753e-06, "loss": 0.5389, "step": 4658 }, { "epoch": 0.3, "grad_norm": 0.9778000116348267, "learning_rate": 8.266895902487588e-06, "loss": 0.5196, "step": 4659 }, { "epoch": 0.3, "grad_norm": 0.9980995655059814, "learning_rate": 8.2661191248868e-06, "loss": 0.5653, "step": 4660 }, { "epoch": 0.3, "grad_norm": 1.0180249214172363, "learning_rate": 8.265342209761098e-06, "loss": 0.5532, "step": 4661 }, { "epoch": 0.3, "grad_norm": 0.9602007865905762, "learning_rate": 8.264565157143194e-06, "loss": 0.5783, "step": 4662 }, { "epoch": 0.3, "grad_norm": 0.979564905166626, "learning_rate": 8.26378796706581e-06, "loss": 0.5458, "step": 4663 }, { "epoch": 0.3, "grad_norm": 1.0173044204711914, "learning_rate": 8.263010639561666e-06, "loss": 0.5548, "step": 4664 }, { "epoch": 0.3, "grad_norm": 0.9330337643623352, "learning_rate": 8.262233174663497e-06, "loss": 0.5383, "step": 4665 }, { "epoch": 0.3, "grad_norm": 1.0098026990890503, "learning_rate": 8.261455572404036e-06, "loss": 0.5428, "step": 4666 }, { "epoch": 0.3, "grad_norm": 1.0305527448654175, "learning_rate": 8.260677832816029e-06, "loss": 0.5413, "step": 4667 }, { "epoch": 0.3, "grad_norm": 1.0110151767730713, "learning_rate": 8.259899955932218e-06, "loss": 0.5382, "step": 4668 }, { "epoch": 0.3, "grad_norm": 1.0421013832092285, "learning_rate": 8.259121941785362e-06, "loss": 0.5617, "step": 4669 }, { "epoch": 0.3, "grad_norm": 0.9988119006156921, "learning_rate": 8.25834379040822e-06, "loss": 0.5215, "step": 4670 }, { "epoch": 0.3, "grad_norm": 0.9611369371414185, "learning_rate": 8.257565501833555e-06, "loss": 0.5705, "step": 4671 }, { "epoch": 0.3, "grad_norm": 0.9753782749176025, "learning_rate": 8.256787076094138e-06, "loss": 0.5314, "step": 4672 }, { "epoch": 0.3, "grad_norm": 1.049460530281067, "learning_rate": 8.256008513222747e-06, "loss": 0.529, "step": 4673 }, { "epoch": 0.3, "grad_norm": 1.0090168714523315, "learning_rate": 8.255229813252167e-06, "loss": 0.5604, "step": 4674 }, { "epoch": 0.3, "grad_norm": 0.9995334148406982, "learning_rate": 8.25445097621518e-06, "loss": 0.536, "step": 4675 }, { "epoch": 0.3, "grad_norm": 1.0167433023452759, "learning_rate": 8.253672002144584e-06, "loss": 0.5293, "step": 4676 }, { "epoch": 0.3, "grad_norm": 0.9629647731781006, "learning_rate": 8.25289289107318e-06, "loss": 0.5628, "step": 4677 }, { "epoch": 0.3, "grad_norm": 0.9389039278030396, "learning_rate": 8.252113643033774e-06, "loss": 0.5432, "step": 4678 }, { "epoch": 0.3, "grad_norm": 1.0852667093276978, "learning_rate": 8.251334258059173e-06, "loss": 0.633, "step": 4679 }, { "epoch": 0.3, "grad_norm": 0.9621460437774658, "learning_rate": 8.250554736182199e-06, "loss": 0.5695, "step": 4680 }, { "epoch": 0.3, "grad_norm": 0.9658225774765015, "learning_rate": 8.249775077435671e-06, "loss": 0.5503, "step": 4681 }, { "epoch": 0.3, "grad_norm": 0.971916139125824, "learning_rate": 8.24899528185242e-06, "loss": 0.4941, "step": 4682 }, { "epoch": 0.3, "grad_norm": 1.0347654819488525, "learning_rate": 8.24821534946528e-06, "loss": 0.5648, "step": 4683 }, { "epoch": 0.3, "grad_norm": 0.9471699595451355, "learning_rate": 8.247435280307093e-06, "loss": 0.557, "step": 4684 }, { "epoch": 0.3, "grad_norm": 0.9978876709938049, "learning_rate": 8.246655074410703e-06, "loss": 0.5893, "step": 4685 }, { "epoch": 0.3, "grad_norm": 1.061025857925415, "learning_rate": 8.24587473180896e-06, "loss": 0.5648, "step": 4686 }, { "epoch": 0.3, "grad_norm": 1.0649787187576294, "learning_rate": 8.245094252534727e-06, "loss": 0.538, "step": 4687 }, { "epoch": 0.3, "grad_norm": 1.0082396268844604, "learning_rate": 8.244313636620862e-06, "loss": 0.5599, "step": 4688 }, { "epoch": 0.3, "grad_norm": 0.9994105696678162, "learning_rate": 8.243532884100236e-06, "loss": 0.546, "step": 4689 }, { "epoch": 0.3, "grad_norm": 1.007741928100586, "learning_rate": 8.242751995005721e-06, "loss": 0.5514, "step": 4690 }, { "epoch": 0.3, "grad_norm": 0.9305049180984497, "learning_rate": 8.241970969370205e-06, "loss": 0.5207, "step": 4691 }, { "epoch": 0.3, "grad_norm": 1.0813062191009521, "learning_rate": 8.241189807226566e-06, "loss": 0.5586, "step": 4692 }, { "epoch": 0.3, "grad_norm": 1.026228427886963, "learning_rate": 8.240408508607703e-06, "loss": 0.5504, "step": 4693 }, { "epoch": 0.3, "grad_norm": 1.02143394947052, "learning_rate": 8.239627073546507e-06, "loss": 0.5425, "step": 4694 }, { "epoch": 0.3, "grad_norm": 0.9124248623847961, "learning_rate": 8.238845502075886e-06, "loss": 0.5542, "step": 4695 }, { "epoch": 0.3, "grad_norm": 0.9577934145927429, "learning_rate": 8.238063794228748e-06, "loss": 0.5093, "step": 4696 }, { "epoch": 0.3, "grad_norm": 0.9729812145233154, "learning_rate": 8.237281950038008e-06, "loss": 0.6089, "step": 4697 }, { "epoch": 0.3, "grad_norm": 1.0045956373214722, "learning_rate": 8.236499969536585e-06, "loss": 0.5789, "step": 4698 }, { "epoch": 0.3, "grad_norm": 0.9919505715370178, "learning_rate": 8.23571785275741e-06, "loss": 0.5477, "step": 4699 }, { "epoch": 0.3, "grad_norm": 1.0783262252807617, "learning_rate": 8.234935599733412e-06, "loss": 0.5961, "step": 4700 }, { "epoch": 0.3, "grad_norm": 0.9970394372940063, "learning_rate": 8.234153210497528e-06, "loss": 0.4814, "step": 4701 }, { "epoch": 0.3, "grad_norm": 1.00620436668396, "learning_rate": 8.233370685082704e-06, "loss": 0.5642, "step": 4702 }, { "epoch": 0.3, "grad_norm": 0.9523754715919495, "learning_rate": 8.232588023521888e-06, "loss": 0.53, "step": 4703 }, { "epoch": 0.3, "grad_norm": 1.0080584287643433, "learning_rate": 8.231805225848035e-06, "loss": 0.5723, "step": 4704 }, { "epoch": 0.3, "grad_norm": 0.9508122205734253, "learning_rate": 8.23102229209411e-06, "loss": 0.512, "step": 4705 }, { "epoch": 0.3, "grad_norm": 1.0608426332473755, "learning_rate": 8.230239222293073e-06, "loss": 0.4941, "step": 4706 }, { "epoch": 0.3, "grad_norm": 1.0919255018234253, "learning_rate": 8.229456016477899e-06, "loss": 0.5382, "step": 4707 }, { "epoch": 0.3, "grad_norm": 1.0042272806167603, "learning_rate": 8.228672674681568e-06, "loss": 0.5456, "step": 4708 }, { "epoch": 0.3, "grad_norm": 1.12455153465271, "learning_rate": 8.227889196937062e-06, "loss": 0.6251, "step": 4709 }, { "epoch": 0.3, "grad_norm": 1.0752999782562256, "learning_rate": 8.227105583277372e-06, "loss": 0.497, "step": 4710 }, { "epoch": 0.3, "grad_norm": 0.9840076565742493, "learning_rate": 8.22632183373549e-06, "loss": 0.5404, "step": 4711 }, { "epoch": 0.3, "grad_norm": 1.017511248588562, "learning_rate": 8.225537948344423e-06, "loss": 0.5328, "step": 4712 }, { "epoch": 0.3, "grad_norm": 0.9743256568908691, "learning_rate": 8.224753927137171e-06, "loss": 0.5761, "step": 4713 }, { "epoch": 0.3, "grad_norm": 1.004582166671753, "learning_rate": 8.22396977014675e-06, "loss": 0.5328, "step": 4714 }, { "epoch": 0.3, "grad_norm": 1.04586923122406, "learning_rate": 8.223185477406175e-06, "loss": 0.569, "step": 4715 }, { "epoch": 0.3, "grad_norm": 1.086186170578003, "learning_rate": 8.222401048948476e-06, "loss": 0.5482, "step": 4716 }, { "epoch": 0.3, "grad_norm": 0.9625822305679321, "learning_rate": 8.221616484806676e-06, "loss": 0.5437, "step": 4717 }, { "epoch": 0.3, "grad_norm": 1.0753564834594727, "learning_rate": 8.220831785013814e-06, "loss": 0.5511, "step": 4718 }, { "epoch": 0.3, "grad_norm": 0.9520816802978516, "learning_rate": 8.22004694960293e-06, "loss": 0.5087, "step": 4719 }, { "epoch": 0.3, "grad_norm": 0.970871090888977, "learning_rate": 8.21926197860707e-06, "loss": 0.5151, "step": 4720 }, { "epoch": 0.3, "grad_norm": 1.0056312084197998, "learning_rate": 8.218476872059288e-06, "loss": 0.5275, "step": 4721 }, { "epoch": 0.3, "grad_norm": 1.066782832145691, "learning_rate": 8.217691629992641e-06, "loss": 0.5548, "step": 4722 }, { "epoch": 0.3, "grad_norm": 0.9306086897850037, "learning_rate": 8.216906252440193e-06, "loss": 0.5172, "step": 4723 }, { "epoch": 0.3, "grad_norm": 1.0483423471450806, "learning_rate": 8.216120739435013e-06, "loss": 0.5743, "step": 4724 }, { "epoch": 0.3, "grad_norm": 1.0240111351013184, "learning_rate": 8.215335091010177e-06, "loss": 0.516, "step": 4725 }, { "epoch": 0.3, "grad_norm": 1.0304317474365234, "learning_rate": 8.214549307198765e-06, "loss": 0.537, "step": 4726 }, { "epoch": 0.3, "grad_norm": 1.0802671909332275, "learning_rate": 8.213763388033867e-06, "loss": 0.5904, "step": 4727 }, { "epoch": 0.3, "grad_norm": 1.0293980836868286, "learning_rate": 8.212977333548569e-06, "loss": 0.5909, "step": 4728 }, { "epoch": 0.3, "grad_norm": 1.0778398513793945, "learning_rate": 8.212191143775973e-06, "loss": 0.5599, "step": 4729 }, { "epoch": 0.3, "grad_norm": 1.0456862449645996, "learning_rate": 8.211404818749184e-06, "loss": 0.6011, "step": 4730 }, { "epoch": 0.3, "grad_norm": 0.9759236574172974, "learning_rate": 8.21061835850131e-06, "loss": 0.5174, "step": 4731 }, { "epoch": 0.3, "grad_norm": 1.0486438274383545, "learning_rate": 8.209831763065465e-06, "loss": 0.5671, "step": 4732 }, { "epoch": 0.3, "grad_norm": 1.0495485067367554, "learning_rate": 8.209045032474773e-06, "loss": 0.5122, "step": 4733 }, { "epoch": 0.3, "grad_norm": 0.9698861241340637, "learning_rate": 8.208258166762355e-06, "loss": 0.5258, "step": 4734 }, { "epoch": 0.3, "grad_norm": 1.0011286735534668, "learning_rate": 8.207471165961347e-06, "loss": 0.5479, "step": 4735 }, { "epoch": 0.3, "grad_norm": 0.9875169992446899, "learning_rate": 8.206684030104886e-06, "loss": 0.565, "step": 4736 }, { "epoch": 0.3, "grad_norm": 0.9940193891525269, "learning_rate": 8.205896759226115e-06, "loss": 0.5387, "step": 4737 }, { "epoch": 0.3, "grad_norm": 0.9382634162902832, "learning_rate": 8.205109353358186e-06, "loss": 0.571, "step": 4738 }, { "epoch": 0.3, "grad_norm": 0.9879447221755981, "learning_rate": 8.20432181253425e-06, "loss": 0.5197, "step": 4739 }, { "epoch": 0.3, "grad_norm": 0.9900509119033813, "learning_rate": 8.203534136787473e-06, "loss": 0.5059, "step": 4740 }, { "epoch": 0.3, "grad_norm": 1.0219135284423828, "learning_rate": 8.202746326151015e-06, "loss": 0.5262, "step": 4741 }, { "epoch": 0.3, "grad_norm": 0.9983688592910767, "learning_rate": 8.20195838065805e-06, "loss": 0.6165, "step": 4742 }, { "epoch": 0.3, "grad_norm": 1.0055594444274902, "learning_rate": 8.201170300341757e-06, "loss": 0.552, "step": 4743 }, { "epoch": 0.3, "grad_norm": 0.9651437997817993, "learning_rate": 8.20038208523532e-06, "loss": 0.5851, "step": 4744 }, { "epoch": 0.3, "grad_norm": 1.1710509061813354, "learning_rate": 8.199593735371924e-06, "loss": 0.5762, "step": 4745 }, { "epoch": 0.3, "grad_norm": 1.0982491970062256, "learning_rate": 8.198805250784769e-06, "loss": 0.5491, "step": 4746 }, { "epoch": 0.3, "grad_norm": 1.0060925483703613, "learning_rate": 8.198016631507053e-06, "loss": 0.5129, "step": 4747 }, { "epoch": 0.3, "grad_norm": 1.0412200689315796, "learning_rate": 8.19722787757198e-06, "loss": 0.5161, "step": 4748 }, { "epoch": 0.3, "grad_norm": 1.054504632949829, "learning_rate": 8.196438989012765e-06, "loss": 0.5459, "step": 4749 }, { "epoch": 0.3, "grad_norm": 1.0147706270217896, "learning_rate": 8.195649965862622e-06, "loss": 0.5291, "step": 4750 }, { "epoch": 0.3, "grad_norm": 0.9944101572036743, "learning_rate": 8.194860808154778e-06, "loss": 0.5146, "step": 4751 }, { "epoch": 0.3, "grad_norm": 1.054218053817749, "learning_rate": 8.194071515922456e-06, "loss": 0.5231, "step": 4752 }, { "epoch": 0.3, "grad_norm": 1.0329952239990234, "learning_rate": 8.193282089198897e-06, "loss": 0.5629, "step": 4753 }, { "epoch": 0.3, "grad_norm": 1.0480854511260986, "learning_rate": 8.192492528017337e-06, "loss": 0.5488, "step": 4754 }, { "epoch": 0.3, "grad_norm": 0.9405835866928101, "learning_rate": 8.191702832411023e-06, "loss": 0.5193, "step": 4755 }, { "epoch": 0.3, "grad_norm": 1.0800672769546509, "learning_rate": 8.190913002413204e-06, "loss": 0.5868, "step": 4756 }, { "epoch": 0.3, "grad_norm": 1.053136944770813, "learning_rate": 8.19012303805714e-06, "loss": 0.5606, "step": 4757 }, { "epoch": 0.3, "grad_norm": 1.0696057081222534, "learning_rate": 8.189332939376092e-06, "loss": 0.5747, "step": 4758 }, { "epoch": 0.3, "grad_norm": 0.9500235319137573, "learning_rate": 8.188542706403331e-06, "loss": 0.499, "step": 4759 }, { "epoch": 0.3, "grad_norm": 1.010105848312378, "learning_rate": 8.187752339172126e-06, "loss": 0.5386, "step": 4760 }, { "epoch": 0.3, "grad_norm": 0.9794458150863647, "learning_rate": 8.18696183771576e-06, "loss": 0.5711, "step": 4761 }, { "epoch": 0.3, "grad_norm": 0.9968729019165039, "learning_rate": 8.18617120206752e-06, "loss": 0.5591, "step": 4762 }, { "epoch": 0.3, "grad_norm": 0.9325933456420898, "learning_rate": 8.185380432260693e-06, "loss": 0.5214, "step": 4763 }, { "epoch": 0.3, "grad_norm": 1.1231193542480469, "learning_rate": 8.184589528328576e-06, "loss": 0.5749, "step": 4764 }, { "epoch": 0.3, "grad_norm": 1.0722376108169556, "learning_rate": 8.183798490304473e-06, "loss": 0.5411, "step": 4765 }, { "epoch": 0.3, "grad_norm": 1.111397385597229, "learning_rate": 8.183007318221691e-06, "loss": 0.6101, "step": 4766 }, { "epoch": 0.3, "grad_norm": 0.991192638874054, "learning_rate": 8.182216012113543e-06, "loss": 0.5273, "step": 4767 }, { "epoch": 0.3, "grad_norm": 0.9147621989250183, "learning_rate": 8.181424572013352e-06, "loss": 0.503, "step": 4768 }, { "epoch": 0.3, "grad_norm": 0.976429283618927, "learning_rate": 8.180632997954437e-06, "loss": 0.4936, "step": 4769 }, { "epoch": 0.3, "grad_norm": 0.9914647936820984, "learning_rate": 8.179841289970132e-06, "loss": 0.5334, "step": 4770 }, { "epoch": 0.3, "grad_norm": 1.0159574747085571, "learning_rate": 8.179049448093771e-06, "loss": 0.5456, "step": 4771 }, { "epoch": 0.3, "grad_norm": 0.998140811920166, "learning_rate": 8.178257472358697e-06, "loss": 0.5649, "step": 4772 }, { "epoch": 0.3, "grad_norm": 1.0405910015106201, "learning_rate": 8.177465362798259e-06, "loss": 0.5688, "step": 4773 }, { "epoch": 0.3, "grad_norm": 1.0000064373016357, "learning_rate": 8.176673119445807e-06, "loss": 0.5521, "step": 4774 }, { "epoch": 0.3, "grad_norm": 0.9818173050880432, "learning_rate": 8.1758807423347e-06, "loss": 0.5873, "step": 4775 }, { "epoch": 0.3, "grad_norm": 0.9706140160560608, "learning_rate": 8.175088231498304e-06, "loss": 0.5542, "step": 4776 }, { "epoch": 0.3, "grad_norm": 1.0345028638839722, "learning_rate": 8.174295586969987e-06, "loss": 0.4983, "step": 4777 }, { "epoch": 0.3, "grad_norm": 1.0244017839431763, "learning_rate": 8.173502808783127e-06, "loss": 0.576, "step": 4778 }, { "epoch": 0.3, "grad_norm": 0.979910671710968, "learning_rate": 8.172709896971103e-06, "loss": 0.5135, "step": 4779 }, { "epoch": 0.3, "grad_norm": 1.1312154531478882, "learning_rate": 8.1719168515673e-06, "loss": 0.6129, "step": 4780 }, { "epoch": 0.3, "grad_norm": 1.084006428718567, "learning_rate": 8.171123672605116e-06, "loss": 0.5455, "step": 4781 }, { "epoch": 0.3, "grad_norm": 1.0457392930984497, "learning_rate": 8.170330360117944e-06, "loss": 0.5901, "step": 4782 }, { "epoch": 0.3, "grad_norm": 1.0750620365142822, "learning_rate": 8.169536914139189e-06, "loss": 0.509, "step": 4783 }, { "epoch": 0.3, "grad_norm": 1.007377028465271, "learning_rate": 8.168743334702262e-06, "loss": 0.5299, "step": 4784 }, { "epoch": 0.3, "grad_norm": 1.197182297706604, "learning_rate": 8.167949621840576e-06, "loss": 0.5613, "step": 4785 }, { "epoch": 0.3, "grad_norm": 1.0056759119033813, "learning_rate": 8.16715577558755e-06, "loss": 0.5237, "step": 4786 }, { "epoch": 0.3, "grad_norm": 0.9658280611038208, "learning_rate": 8.166361795976614e-06, "loss": 0.5577, "step": 4787 }, { "epoch": 0.3, "grad_norm": 1.0165833234786987, "learning_rate": 8.165567683041197e-06, "loss": 0.5531, "step": 4788 }, { "epoch": 0.3, "grad_norm": 0.9536402821540833, "learning_rate": 8.164773436814736e-06, "loss": 0.518, "step": 4789 }, { "epoch": 0.3, "grad_norm": 1.0761758089065552, "learning_rate": 8.163979057330677e-06, "loss": 0.537, "step": 4790 }, { "epoch": 0.3, "grad_norm": 1.0675421953201294, "learning_rate": 8.163184544622467e-06, "loss": 0.4321, "step": 4791 }, { "epoch": 0.3, "grad_norm": 1.0709632635116577, "learning_rate": 8.162389898723558e-06, "loss": 0.5416, "step": 4792 }, { "epoch": 0.3, "grad_norm": 0.984924852848053, "learning_rate": 8.161595119667413e-06, "loss": 0.5235, "step": 4793 }, { "epoch": 0.3, "grad_norm": 1.097145915031433, "learning_rate": 8.160800207487495e-06, "loss": 0.5539, "step": 4794 }, { "epoch": 0.3, "grad_norm": 0.9144260883331299, "learning_rate": 8.160005162217275e-06, "loss": 0.5375, "step": 4795 }, { "epoch": 0.3, "grad_norm": 0.924608051776886, "learning_rate": 8.159209983890232e-06, "loss": 0.5342, "step": 4796 }, { "epoch": 0.3, "grad_norm": 1.0322277545928955, "learning_rate": 8.158414672539845e-06, "loss": 0.5645, "step": 4797 }, { "epoch": 0.3, "grad_norm": 1.0819236040115356, "learning_rate": 8.157619228199605e-06, "loss": 0.535, "step": 4798 }, { "epoch": 0.3, "grad_norm": 1.2202394008636475, "learning_rate": 8.156823650903003e-06, "loss": 0.6181, "step": 4799 }, { "epoch": 0.3, "grad_norm": 1.0472339391708374, "learning_rate": 8.156027940683539e-06, "loss": 0.5069, "step": 4800 }, { "epoch": 0.3, "grad_norm": 1.0055584907531738, "learning_rate": 8.15523209757472e-06, "loss": 0.6032, "step": 4801 }, { "epoch": 0.3, "grad_norm": 1.0556094646453857, "learning_rate": 8.15443612161005e-06, "loss": 0.5541, "step": 4802 }, { "epoch": 0.3, "grad_norm": 0.9796180725097656, "learning_rate": 8.15364001282305e-06, "loss": 0.5515, "step": 4803 }, { "epoch": 0.3, "grad_norm": 1.0150129795074463, "learning_rate": 8.15284377124724e-06, "loss": 0.5023, "step": 4804 }, { "epoch": 0.3, "grad_norm": 0.9804368019104004, "learning_rate": 8.152047396916145e-06, "loss": 0.4709, "step": 4805 }, { "epoch": 0.3, "grad_norm": 1.1215652227401733, "learning_rate": 8.1512508898633e-06, "loss": 0.5868, "step": 4806 }, { "epoch": 0.3, "grad_norm": 0.9547824263572693, "learning_rate": 8.150454250122245e-06, "loss": 0.5221, "step": 4807 }, { "epoch": 0.3, "grad_norm": 1.0794047117233276, "learning_rate": 8.149657477726518e-06, "loss": 0.518, "step": 4808 }, { "epoch": 0.3, "grad_norm": 1.0394928455352783, "learning_rate": 8.148860572709674e-06, "loss": 0.5546, "step": 4809 }, { "epoch": 0.3, "grad_norm": 0.95167076587677, "learning_rate": 8.148063535105261e-06, "loss": 0.5344, "step": 4810 }, { "epoch": 0.3, "grad_norm": 1.120106816291809, "learning_rate": 8.147266364946848e-06, "loss": 0.5504, "step": 4811 }, { "epoch": 0.3, "grad_norm": 1.0547051429748535, "learning_rate": 8.146469062267995e-06, "loss": 0.5594, "step": 4812 }, { "epoch": 0.3, "grad_norm": 1.05607271194458, "learning_rate": 8.145671627102277e-06, "loss": 0.5548, "step": 4813 }, { "epoch": 0.3, "grad_norm": 0.9690526723861694, "learning_rate": 8.14487405948327e-06, "loss": 0.5183, "step": 4814 }, { "epoch": 0.31, "grad_norm": 0.9852856993675232, "learning_rate": 8.144076359444555e-06, "loss": 0.4959, "step": 4815 }, { "epoch": 0.31, "grad_norm": 1.0635384321212769, "learning_rate": 8.143278527019722e-06, "loss": 0.5398, "step": 4816 }, { "epoch": 0.31, "grad_norm": 1.1019240617752075, "learning_rate": 8.142480562242365e-06, "loss": 0.5441, "step": 4817 }, { "epoch": 0.31, "grad_norm": 1.0027168989181519, "learning_rate": 8.141682465146084e-06, "loss": 0.5409, "step": 4818 }, { "epoch": 0.31, "grad_norm": 0.9465576410293579, "learning_rate": 8.140884235764484e-06, "loss": 0.544, "step": 4819 }, { "epoch": 0.31, "grad_norm": 1.001171588897705, "learning_rate": 8.140085874131174e-06, "loss": 0.5424, "step": 4820 }, { "epoch": 0.31, "grad_norm": 0.9729893803596497, "learning_rate": 8.139287380279773e-06, "loss": 0.5462, "step": 4821 }, { "epoch": 0.31, "grad_norm": 1.1184935569763184, "learning_rate": 8.138488754243899e-06, "loss": 0.5888, "step": 4822 }, { "epoch": 0.31, "grad_norm": 1.1190617084503174, "learning_rate": 8.137689996057183e-06, "loss": 0.5694, "step": 4823 }, { "epoch": 0.31, "grad_norm": 0.957481861114502, "learning_rate": 8.136891105753258e-06, "loss": 0.4887, "step": 4824 }, { "epoch": 0.31, "grad_norm": 0.9850767254829407, "learning_rate": 8.136092083365758e-06, "loss": 0.5534, "step": 4825 }, { "epoch": 0.31, "grad_norm": 1.052511215209961, "learning_rate": 8.13529292892833e-06, "loss": 0.5776, "step": 4826 }, { "epoch": 0.31, "grad_norm": 0.9635382294654846, "learning_rate": 8.134493642474625e-06, "loss": 0.5626, "step": 4827 }, { "epoch": 0.31, "grad_norm": 0.9893434047698975, "learning_rate": 8.133694224038297e-06, "loss": 0.5278, "step": 4828 }, { "epoch": 0.31, "grad_norm": 1.0003610849380493, "learning_rate": 8.132894673653007e-06, "loss": 0.5565, "step": 4829 }, { "epoch": 0.31, "grad_norm": 1.0402261018753052, "learning_rate": 8.13209499135242e-06, "loss": 0.5364, "step": 4830 }, { "epoch": 0.31, "grad_norm": 1.0688753128051758, "learning_rate": 8.131295177170208e-06, "loss": 0.5734, "step": 4831 }, { "epoch": 0.31, "grad_norm": 1.0642523765563965, "learning_rate": 8.13049523114005e-06, "loss": 0.5675, "step": 4832 }, { "epoch": 0.31, "grad_norm": 0.9267446398735046, "learning_rate": 8.129695153295627e-06, "loss": 0.5026, "step": 4833 }, { "epoch": 0.31, "grad_norm": 0.9632136821746826, "learning_rate": 8.128894943670631e-06, "loss": 0.5302, "step": 4834 }, { "epoch": 0.31, "grad_norm": 0.9400724172592163, "learning_rate": 8.12809460229875e-06, "loss": 0.507, "step": 4835 }, { "epoch": 0.31, "grad_norm": 0.9593424797058105, "learning_rate": 8.127294129213691e-06, "loss": 0.5504, "step": 4836 }, { "epoch": 0.31, "grad_norm": 1.0008407831192017, "learning_rate": 8.126493524449153e-06, "loss": 0.5677, "step": 4837 }, { "epoch": 0.31, "grad_norm": 1.0743299722671509, "learning_rate": 8.12569278803885e-06, "loss": 0.5388, "step": 4838 }, { "epoch": 0.31, "grad_norm": 1.0238865613937378, "learning_rate": 8.124891920016495e-06, "loss": 0.5762, "step": 4839 }, { "epoch": 0.31, "grad_norm": 1.0223515033721924, "learning_rate": 8.124090920415814e-06, "loss": 0.574, "step": 4840 }, { "epoch": 0.31, "grad_norm": 1.026674509048462, "learning_rate": 8.123289789270532e-06, "loss": 0.5846, "step": 4841 }, { "epoch": 0.31, "grad_norm": 1.0493360757827759, "learning_rate": 8.12248852661438e-06, "loss": 0.5678, "step": 4842 }, { "epoch": 0.31, "grad_norm": 0.9677516222000122, "learning_rate": 8.121687132481101e-06, "loss": 0.5142, "step": 4843 }, { "epoch": 0.31, "grad_norm": 0.9218686819076538, "learning_rate": 8.120885606904436e-06, "loss": 0.5231, "step": 4844 }, { "epoch": 0.31, "grad_norm": 0.9612337350845337, "learning_rate": 8.120083949918137e-06, "loss": 0.5023, "step": 4845 }, { "epoch": 0.31, "grad_norm": 0.932696521282196, "learning_rate": 8.119282161555952e-06, "loss": 0.5063, "step": 4846 }, { "epoch": 0.31, "grad_norm": 1.0821776390075684, "learning_rate": 8.11848024185165e-06, "loss": 0.5861, "step": 4847 }, { "epoch": 0.31, "grad_norm": 1.029322862625122, "learning_rate": 8.117678190838991e-06, "loss": 0.544, "step": 4848 }, { "epoch": 0.31, "grad_norm": 1.0611780881881714, "learning_rate": 8.116876008551751e-06, "loss": 0.5172, "step": 4849 }, { "epoch": 0.31, "grad_norm": 0.9280547499656677, "learning_rate": 8.116073695023704e-06, "loss": 0.5443, "step": 4850 }, { "epoch": 0.31, "grad_norm": 1.0576995611190796, "learning_rate": 8.115271250288635e-06, "loss": 0.5182, "step": 4851 }, { "epoch": 0.31, "grad_norm": 1.1136101484298706, "learning_rate": 8.114468674380328e-06, "loss": 0.5999, "step": 4852 }, { "epoch": 0.31, "grad_norm": 1.0218087434768677, "learning_rate": 8.113665967332582e-06, "loss": 0.5447, "step": 4853 }, { "epoch": 0.31, "grad_norm": 0.9869479537010193, "learning_rate": 8.112863129179194e-06, "loss": 0.5787, "step": 4854 }, { "epoch": 0.31, "grad_norm": 1.043057918548584, "learning_rate": 8.112060159953966e-06, "loss": 0.5773, "step": 4855 }, { "epoch": 0.31, "grad_norm": 1.0893371105194092, "learning_rate": 8.111257059690714e-06, "loss": 0.5851, "step": 4856 }, { "epoch": 0.31, "grad_norm": 1.0911871194839478, "learning_rate": 8.110453828423248e-06, "loss": 0.5698, "step": 4857 }, { "epoch": 0.31, "grad_norm": 1.0049121379852295, "learning_rate": 8.109650466185394e-06, "loss": 0.5398, "step": 4858 }, { "epoch": 0.31, "grad_norm": 1.0009570121765137, "learning_rate": 8.108846973010975e-06, "loss": 0.502, "step": 4859 }, { "epoch": 0.31, "grad_norm": 1.035396695137024, "learning_rate": 8.108043348933825e-06, "loss": 0.5625, "step": 4860 }, { "epoch": 0.31, "grad_norm": 1.0491327047348022, "learning_rate": 8.107239593987781e-06, "loss": 0.5217, "step": 4861 }, { "epoch": 0.31, "grad_norm": 1.0376914739608765, "learning_rate": 8.10643570820669e-06, "loss": 0.5596, "step": 4862 }, { "epoch": 0.31, "grad_norm": 1.05499267578125, "learning_rate": 8.105631691624394e-06, "loss": 0.5712, "step": 4863 }, { "epoch": 0.31, "grad_norm": 1.0017470121383667, "learning_rate": 8.104827544274754e-06, "loss": 0.6343, "step": 4864 }, { "epoch": 0.31, "grad_norm": 0.988599956035614, "learning_rate": 8.104023266191625e-06, "loss": 0.5115, "step": 4865 }, { "epoch": 0.31, "grad_norm": 0.9934467673301697, "learning_rate": 8.103218857408875e-06, "loss": 0.5522, "step": 4866 }, { "epoch": 0.31, "grad_norm": 1.1765915155410767, "learning_rate": 8.102414317960373e-06, "loss": 0.5593, "step": 4867 }, { "epoch": 0.31, "grad_norm": 1.0065627098083496, "learning_rate": 8.10160964788e-06, "loss": 0.5288, "step": 4868 }, { "epoch": 0.31, "grad_norm": 1.084834337234497, "learning_rate": 8.100804847201632e-06, "loss": 0.5505, "step": 4869 }, { "epoch": 0.31, "grad_norm": 1.1313133239746094, "learning_rate": 8.09999991595916e-06, "loss": 0.5272, "step": 4870 }, { "epoch": 0.31, "grad_norm": 1.1298694610595703, "learning_rate": 8.099194854186475e-06, "loss": 0.5865, "step": 4871 }, { "epoch": 0.31, "grad_norm": 0.9154739379882812, "learning_rate": 8.098389661917475e-06, "loss": 0.4787, "step": 4872 }, { "epoch": 0.31, "grad_norm": 1.0516656637191772, "learning_rate": 8.097584339186066e-06, "loss": 0.5343, "step": 4873 }, { "epoch": 0.31, "grad_norm": 1.2043085098266602, "learning_rate": 8.096778886026155e-06, "loss": 0.5639, "step": 4874 }, { "epoch": 0.31, "grad_norm": 1.0819754600524902, "learning_rate": 8.09597330247166e-06, "loss": 0.5108, "step": 4875 }, { "epoch": 0.31, "grad_norm": 1.084563970565796, "learning_rate": 8.095167588556498e-06, "loss": 0.5942, "step": 4876 }, { "epoch": 0.31, "grad_norm": 1.0509734153747559, "learning_rate": 8.094361744314597e-06, "loss": 0.5664, "step": 4877 }, { "epoch": 0.31, "grad_norm": 1.0124056339263916, "learning_rate": 8.093555769779887e-06, "loss": 0.497, "step": 4878 }, { "epoch": 0.31, "grad_norm": 1.034844160079956, "learning_rate": 8.092749664986304e-06, "loss": 0.5735, "step": 4879 }, { "epoch": 0.31, "grad_norm": 0.966972291469574, "learning_rate": 8.091943429967792e-06, "loss": 0.5141, "step": 4880 }, { "epoch": 0.31, "grad_norm": 1.1028048992156982, "learning_rate": 8.0911370647583e-06, "loss": 0.621, "step": 4881 }, { "epoch": 0.31, "grad_norm": 0.9943513870239258, "learning_rate": 8.090330569391778e-06, "loss": 0.5266, "step": 4882 }, { "epoch": 0.31, "grad_norm": 1.095138669013977, "learning_rate": 8.089523943902187e-06, "loss": 0.5717, "step": 4883 }, { "epoch": 0.31, "grad_norm": 1.0169485807418823, "learning_rate": 8.088717188323489e-06, "loss": 0.5974, "step": 4884 }, { "epoch": 0.31, "grad_norm": 1.1654067039489746, "learning_rate": 8.087910302689656e-06, "loss": 0.5394, "step": 4885 }, { "epoch": 0.31, "grad_norm": 0.9405874013900757, "learning_rate": 8.087103287034664e-06, "loss": 0.552, "step": 4886 }, { "epoch": 0.31, "grad_norm": 0.9728289246559143, "learning_rate": 8.086296141392489e-06, "loss": 0.5, "step": 4887 }, { "epoch": 0.31, "grad_norm": 0.9715489745140076, "learning_rate": 8.08548886579712e-06, "loss": 0.5554, "step": 4888 }, { "epoch": 0.31, "grad_norm": 1.0222160816192627, "learning_rate": 8.08468146028255e-06, "loss": 0.5288, "step": 4889 }, { "epoch": 0.31, "grad_norm": 0.9580404758453369, "learning_rate": 8.083873924882775e-06, "loss": 0.5233, "step": 4890 }, { "epoch": 0.31, "grad_norm": 1.1099965572357178, "learning_rate": 8.083066259631796e-06, "loss": 0.5475, "step": 4891 }, { "epoch": 0.31, "grad_norm": 0.9971556067466736, "learning_rate": 8.082258464563621e-06, "loss": 0.5567, "step": 4892 }, { "epoch": 0.31, "grad_norm": 0.9796202182769775, "learning_rate": 8.081450539712266e-06, "loss": 0.5169, "step": 4893 }, { "epoch": 0.31, "grad_norm": 1.042241096496582, "learning_rate": 8.080642485111747e-06, "loss": 0.5239, "step": 4894 }, { "epoch": 0.31, "grad_norm": 0.9608373641967773, "learning_rate": 8.07983430079609e-06, "loss": 0.5181, "step": 4895 }, { "epoch": 0.31, "grad_norm": 0.9804826974868774, "learning_rate": 8.079025986799326e-06, "loss": 0.5064, "step": 4896 }, { "epoch": 0.31, "grad_norm": 1.0063300132751465, "learning_rate": 8.078217543155488e-06, "loss": 0.4989, "step": 4897 }, { "epoch": 0.31, "grad_norm": 1.039695382118225, "learning_rate": 8.077408969898619e-06, "loss": 0.609, "step": 4898 }, { "epoch": 0.31, "grad_norm": 0.9815044403076172, "learning_rate": 8.076600267062761e-06, "loss": 0.5646, "step": 4899 }, { "epoch": 0.31, "grad_norm": 1.0451741218566895, "learning_rate": 8.07579143468197e-06, "loss": 0.5568, "step": 4900 }, { "epoch": 0.31, "grad_norm": 0.9567393660545349, "learning_rate": 8.074982472790302e-06, "loss": 0.4964, "step": 4901 }, { "epoch": 0.31, "grad_norm": 1.0217117071151733, "learning_rate": 8.074173381421819e-06, "loss": 0.5487, "step": 4902 }, { "epoch": 0.31, "grad_norm": 1.0442255735397339, "learning_rate": 8.073364160610589e-06, "loss": 0.5254, "step": 4903 }, { "epoch": 0.31, "grad_norm": 0.9374282956123352, "learning_rate": 8.072554810390685e-06, "loss": 0.5208, "step": 4904 }, { "epoch": 0.31, "grad_norm": 1.1306034326553345, "learning_rate": 8.071745330796187e-06, "loss": 0.6253, "step": 4905 }, { "epoch": 0.31, "grad_norm": 0.9648675918579102, "learning_rate": 8.070935721861178e-06, "loss": 0.5291, "step": 4906 }, { "epoch": 0.31, "grad_norm": 0.9852427840232849, "learning_rate": 8.07012598361975e-06, "loss": 0.5502, "step": 4907 }, { "epoch": 0.31, "grad_norm": 0.9443341493606567, "learning_rate": 8.069316116105996e-06, "loss": 0.6045, "step": 4908 }, { "epoch": 0.31, "grad_norm": 0.9153281450271606, "learning_rate": 8.068506119354019e-06, "loss": 0.505, "step": 4909 }, { "epoch": 0.31, "grad_norm": 1.0826537609100342, "learning_rate": 8.067695993397923e-06, "loss": 0.5711, "step": 4910 }, { "epoch": 0.31, "grad_norm": 0.9605743885040283, "learning_rate": 8.066885738271821e-06, "loss": 0.4974, "step": 4911 }, { "epoch": 0.31, "grad_norm": 0.985244870185852, "learning_rate": 8.06607535400983e-06, "loss": 0.5372, "step": 4912 }, { "epoch": 0.31, "grad_norm": 1.0411839485168457, "learning_rate": 8.06526484064607e-06, "loss": 0.5248, "step": 4913 }, { "epoch": 0.31, "grad_norm": 1.0325897932052612, "learning_rate": 8.064454198214673e-06, "loss": 0.5599, "step": 4914 }, { "epoch": 0.31, "grad_norm": 1.0130306482315063, "learning_rate": 8.063643426749769e-06, "loss": 0.5652, "step": 4915 }, { "epoch": 0.31, "grad_norm": 1.0241799354553223, "learning_rate": 8.062832526285498e-06, "loss": 0.5673, "step": 4916 }, { "epoch": 0.31, "grad_norm": 1.090224027633667, "learning_rate": 8.062021496856004e-06, "loss": 0.5921, "step": 4917 }, { "epoch": 0.31, "grad_norm": 1.0551925897598267, "learning_rate": 8.061210338495437e-06, "loss": 0.5828, "step": 4918 }, { "epoch": 0.31, "grad_norm": 0.9556765556335449, "learning_rate": 8.060399051237952e-06, "loss": 0.5779, "step": 4919 }, { "epoch": 0.31, "grad_norm": 1.074124813079834, "learning_rate": 8.059587635117709e-06, "loss": 0.5351, "step": 4920 }, { "epoch": 0.31, "grad_norm": 0.9773049354553223, "learning_rate": 8.058776090168874e-06, "loss": 0.5298, "step": 4921 }, { "epoch": 0.31, "grad_norm": 0.9283863306045532, "learning_rate": 8.057964416425618e-06, "loss": 0.5124, "step": 4922 }, { "epoch": 0.31, "grad_norm": 1.0056229829788208, "learning_rate": 8.05715261392212e-06, "loss": 0.4826, "step": 4923 }, { "epoch": 0.31, "grad_norm": 0.9548004269599915, "learning_rate": 8.05634068269256e-06, "loss": 0.5044, "step": 4924 }, { "epoch": 0.31, "grad_norm": 0.9597302675247192, "learning_rate": 8.055528622771124e-06, "loss": 0.5362, "step": 4925 }, { "epoch": 0.31, "grad_norm": 0.9519512057304382, "learning_rate": 8.05471643419201e-06, "loss": 0.5208, "step": 4926 }, { "epoch": 0.31, "grad_norm": 0.9922435283660889, "learning_rate": 8.053904116989413e-06, "loss": 0.5491, "step": 4927 }, { "epoch": 0.31, "grad_norm": 0.922353208065033, "learning_rate": 8.053091671197537e-06, "loss": 0.4871, "step": 4928 }, { "epoch": 0.31, "grad_norm": 1.0409049987792969, "learning_rate": 8.052279096850591e-06, "loss": 0.5317, "step": 4929 }, { "epoch": 0.31, "grad_norm": 0.9409239292144775, "learning_rate": 8.051466393982792e-06, "loss": 0.5379, "step": 4930 }, { "epoch": 0.31, "grad_norm": 1.0207635164260864, "learning_rate": 8.050653562628356e-06, "loss": 0.5513, "step": 4931 }, { "epoch": 0.31, "grad_norm": 1.0265536308288574, "learning_rate": 8.049840602821512e-06, "loss": 0.5972, "step": 4932 }, { "epoch": 0.31, "grad_norm": 1.1316540241241455, "learning_rate": 8.04902751459649e-06, "loss": 0.5525, "step": 4933 }, { "epoch": 0.31, "grad_norm": 0.9593460559844971, "learning_rate": 8.048214297987526e-06, "loss": 0.4956, "step": 4934 }, { "epoch": 0.31, "grad_norm": 0.9455051422119141, "learning_rate": 8.047400953028863e-06, "loss": 0.5173, "step": 4935 }, { "epoch": 0.31, "grad_norm": 0.9488646388053894, "learning_rate": 8.046587479754746e-06, "loss": 0.5095, "step": 4936 }, { "epoch": 0.31, "grad_norm": 1.0066953897476196, "learning_rate": 8.04577387819943e-06, "loss": 0.5887, "step": 4937 }, { "epoch": 0.31, "grad_norm": 1.0051743984222412, "learning_rate": 8.044960148397168e-06, "loss": 0.5565, "step": 4938 }, { "epoch": 0.31, "grad_norm": 1.0139719247817993, "learning_rate": 8.04414629038223e-06, "loss": 0.5635, "step": 4939 }, { "epoch": 0.31, "grad_norm": 0.9318371415138245, "learning_rate": 8.04333230418888e-06, "loss": 0.5089, "step": 4940 }, { "epoch": 0.31, "grad_norm": 1.042931318283081, "learning_rate": 8.042518189851394e-06, "loss": 0.5411, "step": 4941 }, { "epoch": 0.31, "grad_norm": 0.9732179641723633, "learning_rate": 8.04170394740405e-06, "loss": 0.562, "step": 4942 }, { "epoch": 0.31, "grad_norm": 0.9993977546691895, "learning_rate": 8.040889576881136e-06, "loss": 0.5448, "step": 4943 }, { "epoch": 0.31, "grad_norm": 1.030236005783081, "learning_rate": 8.04007507831694e-06, "loss": 0.5764, "step": 4944 }, { "epoch": 0.31, "grad_norm": 1.0133357048034668, "learning_rate": 8.039260451745758e-06, "loss": 0.5193, "step": 4945 }, { "epoch": 0.31, "grad_norm": 0.9909392595291138, "learning_rate": 8.03844569720189e-06, "loss": 0.5408, "step": 4946 }, { "epoch": 0.31, "grad_norm": 1.0355006456375122, "learning_rate": 8.037630814719644e-06, "loss": 0.5258, "step": 4947 }, { "epoch": 0.31, "grad_norm": 0.9290564060211182, "learning_rate": 8.036815804333334e-06, "loss": 0.5275, "step": 4948 }, { "epoch": 0.31, "grad_norm": 0.990656316280365, "learning_rate": 8.036000666077273e-06, "loss": 0.549, "step": 4949 }, { "epoch": 0.31, "grad_norm": 0.9807307720184326, "learning_rate": 8.035185399985784e-06, "loss": 0.522, "step": 4950 }, { "epoch": 0.31, "grad_norm": 1.0541224479675293, "learning_rate": 8.034370006093198e-06, "loss": 0.6065, "step": 4951 }, { "epoch": 0.31, "grad_norm": 1.0626296997070312, "learning_rate": 8.033554484433848e-06, "loss": 0.5326, "step": 4952 }, { "epoch": 0.31, "grad_norm": 1.0675690174102783, "learning_rate": 8.032738835042068e-06, "loss": 0.5979, "step": 4953 }, { "epoch": 0.31, "grad_norm": 1.0939754247665405, "learning_rate": 8.031923057952208e-06, "loss": 0.5368, "step": 4954 }, { "epoch": 0.31, "grad_norm": 0.9942882061004639, "learning_rate": 8.031107153198617e-06, "loss": 0.5531, "step": 4955 }, { "epoch": 0.31, "grad_norm": 1.094529151916504, "learning_rate": 8.030291120815647e-06, "loss": 0.5452, "step": 4956 }, { "epoch": 0.31, "grad_norm": 1.1173778772354126, "learning_rate": 8.029474960837657e-06, "loss": 0.5411, "step": 4957 }, { "epoch": 0.31, "grad_norm": 1.025722622871399, "learning_rate": 8.028658673299019e-06, "loss": 0.5644, "step": 4958 }, { "epoch": 0.31, "grad_norm": 0.9596103429794312, "learning_rate": 8.027842258234097e-06, "loss": 0.514, "step": 4959 }, { "epoch": 0.31, "grad_norm": 1.069469690322876, "learning_rate": 8.027025715677273e-06, "loss": 0.6062, "step": 4960 }, { "epoch": 0.31, "grad_norm": 1.0660532712936401, "learning_rate": 8.026209045662925e-06, "loss": 0.5804, "step": 4961 }, { "epoch": 0.31, "grad_norm": 1.0204991102218628, "learning_rate": 8.025392248225444e-06, "loss": 0.5192, "step": 4962 }, { "epoch": 0.31, "grad_norm": 1.0006287097930908, "learning_rate": 8.024575323399217e-06, "loss": 0.5342, "step": 4963 }, { "epoch": 0.31, "grad_norm": 0.9939424991607666, "learning_rate": 8.023758271218646e-06, "loss": 0.5538, "step": 4964 }, { "epoch": 0.31, "grad_norm": 0.973581075668335, "learning_rate": 8.022941091718133e-06, "loss": 0.5441, "step": 4965 }, { "epoch": 0.31, "grad_norm": 1.0039817094802856, "learning_rate": 8.022123784932085e-06, "loss": 0.5714, "step": 4966 }, { "epoch": 0.31, "grad_norm": 1.0360323190689087, "learning_rate": 8.02130635089492e-06, "loss": 0.5494, "step": 4967 }, { "epoch": 0.31, "grad_norm": 1.0309659242630005, "learning_rate": 8.020488789641054e-06, "loss": 0.5939, "step": 4968 }, { "epoch": 0.31, "grad_norm": 0.9577841758728027, "learning_rate": 8.019671101204914e-06, "loss": 0.5239, "step": 4969 }, { "epoch": 0.31, "grad_norm": 0.9899508357048035, "learning_rate": 8.018853285620926e-06, "loss": 0.5017, "step": 4970 }, { "epoch": 0.31, "grad_norm": 1.0647516250610352, "learning_rate": 8.018035342923529e-06, "loss": 0.5435, "step": 4971 }, { "epoch": 0.31, "grad_norm": 0.9604344964027405, "learning_rate": 8.017217273147165e-06, "loss": 0.4843, "step": 4972 }, { "epoch": 0.32, "grad_norm": 0.9627401232719421, "learning_rate": 8.016399076326275e-06, "loss": 0.611, "step": 4973 }, { "epoch": 0.32, "grad_norm": 1.0629135370254517, "learning_rate": 8.015580752495314e-06, "loss": 0.5744, "step": 4974 }, { "epoch": 0.32, "grad_norm": 1.0638678073883057, "learning_rate": 8.014762301688737e-06, "loss": 0.5838, "step": 4975 }, { "epoch": 0.32, "grad_norm": 0.9654185175895691, "learning_rate": 8.013943723941009e-06, "loss": 0.444, "step": 4976 }, { "epoch": 0.32, "grad_norm": 1.0113495588302612, "learning_rate": 8.013125019286594e-06, "loss": 0.5789, "step": 4977 }, { "epoch": 0.32, "grad_norm": 1.0057951211929321, "learning_rate": 8.012306187759966e-06, "loss": 0.5557, "step": 4978 }, { "epoch": 0.32, "grad_norm": 1.0672248601913452, "learning_rate": 8.011487229395605e-06, "loss": 0.579, "step": 4979 }, { "epoch": 0.32, "grad_norm": 0.9103686213493347, "learning_rate": 8.010668144227991e-06, "loss": 0.5334, "step": 4980 }, { "epoch": 0.32, "grad_norm": 1.0784097909927368, "learning_rate": 8.009848932291617e-06, "loss": 0.505, "step": 4981 }, { "epoch": 0.32, "grad_norm": 1.0375616550445557, "learning_rate": 8.009029593620974e-06, "loss": 0.5159, "step": 4982 }, { "epoch": 0.32, "grad_norm": 1.013041615486145, "learning_rate": 8.008210128250563e-06, "loss": 0.56, "step": 4983 }, { "epoch": 0.32, "grad_norm": 1.0100730657577515, "learning_rate": 8.007390536214888e-06, "loss": 0.551, "step": 4984 }, { "epoch": 0.32, "grad_norm": 0.9632449746131897, "learning_rate": 8.006570817548457e-06, "loss": 0.517, "step": 4985 }, { "epoch": 0.32, "grad_norm": 1.0761468410491943, "learning_rate": 8.005750972285793e-06, "loss": 0.5718, "step": 4986 }, { "epoch": 0.32, "grad_norm": 0.9765868782997131, "learning_rate": 8.004931000461408e-06, "loss": 0.5141, "step": 4987 }, { "epoch": 0.32, "grad_norm": 1.0333558320999146, "learning_rate": 8.004110902109832e-06, "loss": 0.5547, "step": 4988 }, { "epoch": 0.32, "grad_norm": 1.029192328453064, "learning_rate": 8.003290677265599e-06, "loss": 0.5921, "step": 4989 }, { "epoch": 0.32, "grad_norm": 1.0383979082107544, "learning_rate": 8.002470325963241e-06, "loss": 0.5385, "step": 4990 }, { "epoch": 0.32, "grad_norm": 0.9121312499046326, "learning_rate": 8.001649848237303e-06, "loss": 0.4893, "step": 4991 }, { "epoch": 0.32, "grad_norm": 0.9828303456306458, "learning_rate": 8.000829244122333e-06, "loss": 0.5365, "step": 4992 }, { "epoch": 0.32, "grad_norm": 1.0785502195358276, "learning_rate": 8.00000851365288e-06, "loss": 0.589, "step": 4993 }, { "epoch": 0.32, "grad_norm": 0.953129768371582, "learning_rate": 7.999187656863507e-06, "loss": 0.5109, "step": 4994 }, { "epoch": 0.32, "grad_norm": 0.9617950320243835, "learning_rate": 7.998366673788775e-06, "loss": 0.5067, "step": 4995 }, { "epoch": 0.32, "grad_norm": 1.0531142950057983, "learning_rate": 7.997545564463251e-06, "loss": 0.5743, "step": 4996 }, { "epoch": 0.32, "grad_norm": 1.0768109560012817, "learning_rate": 7.996724328921514e-06, "loss": 0.4972, "step": 4997 }, { "epoch": 0.32, "grad_norm": 1.1927998065948486, "learning_rate": 7.99590296719814e-06, "loss": 0.5797, "step": 4998 }, { "epoch": 0.32, "grad_norm": 1.0408989191055298, "learning_rate": 7.995081479327712e-06, "loss": 0.5515, "step": 4999 }, { "epoch": 0.32, "grad_norm": 1.0196020603179932, "learning_rate": 7.994259865344822e-06, "loss": 0.5542, "step": 5000 }, { "epoch": 0.32, "grad_norm": 1.0699565410614014, "learning_rate": 7.993438125284068e-06, "loss": 0.5788, "step": 5001 }, { "epoch": 0.32, "grad_norm": 1.0551797151565552, "learning_rate": 7.992616259180045e-06, "loss": 0.5452, "step": 5002 }, { "epoch": 0.32, "grad_norm": 1.0628966093063354, "learning_rate": 7.991794267067363e-06, "loss": 0.5319, "step": 5003 }, { "epoch": 0.32, "grad_norm": 1.0177741050720215, "learning_rate": 7.99097214898063e-06, "loss": 0.5379, "step": 5004 }, { "epoch": 0.32, "grad_norm": 0.9868799448013306, "learning_rate": 7.99014990495447e-06, "loss": 0.5373, "step": 5005 }, { "epoch": 0.32, "grad_norm": 1.0062427520751953, "learning_rate": 7.989327535023495e-06, "loss": 0.5484, "step": 5006 }, { "epoch": 0.32, "grad_norm": 1.0482970476150513, "learning_rate": 7.988505039222339e-06, "loss": 0.5368, "step": 5007 }, { "epoch": 0.32, "grad_norm": 1.0803232192993164, "learning_rate": 7.987682417585629e-06, "loss": 0.5596, "step": 5008 }, { "epoch": 0.32, "grad_norm": 1.0112433433532715, "learning_rate": 7.98685967014801e-06, "loss": 0.5734, "step": 5009 }, { "epoch": 0.32, "grad_norm": 1.0411220788955688, "learning_rate": 7.986036796944116e-06, "loss": 0.5594, "step": 5010 }, { "epoch": 0.32, "grad_norm": 1.0272407531738281, "learning_rate": 7.985213798008605e-06, "loss": 0.5552, "step": 5011 }, { "epoch": 0.32, "grad_norm": 1.0483700037002563, "learning_rate": 7.984390673376123e-06, "loss": 0.5545, "step": 5012 }, { "epoch": 0.32, "grad_norm": 0.9998469948768616, "learning_rate": 7.983567423081331e-06, "loss": 0.5405, "step": 5013 }, { "epoch": 0.32, "grad_norm": 0.9811612963676453, "learning_rate": 7.982744047158897e-06, "loss": 0.5598, "step": 5014 }, { "epoch": 0.32, "grad_norm": 1.082160472869873, "learning_rate": 7.981920545643485e-06, "loss": 0.5253, "step": 5015 }, { "epoch": 0.32, "grad_norm": 0.977547287940979, "learning_rate": 7.981096918569773e-06, "loss": 0.5051, "step": 5016 }, { "epoch": 0.32, "grad_norm": 1.0095993280410767, "learning_rate": 7.980273165972438e-06, "loss": 0.5184, "step": 5017 }, { "epoch": 0.32, "grad_norm": 0.9310224652290344, "learning_rate": 7.979449287886171e-06, "loss": 0.4903, "step": 5018 }, { "epoch": 0.32, "grad_norm": 1.0223238468170166, "learning_rate": 7.978625284345657e-06, "loss": 0.5627, "step": 5019 }, { "epoch": 0.32, "grad_norm": 0.9764466881752014, "learning_rate": 7.977801155385595e-06, "loss": 0.536, "step": 5020 }, { "epoch": 0.32, "grad_norm": 1.0332834720611572, "learning_rate": 7.976976901040686e-06, "loss": 0.577, "step": 5021 }, { "epoch": 0.32, "grad_norm": 1.0004802942276, "learning_rate": 7.976152521345635e-06, "loss": 0.6022, "step": 5022 }, { "epoch": 0.32, "grad_norm": 1.1783488988876343, "learning_rate": 7.975328016335154e-06, "loss": 0.5923, "step": 5023 }, { "epoch": 0.32, "grad_norm": 0.9814012050628662, "learning_rate": 7.974503386043961e-06, "loss": 0.5552, "step": 5024 }, { "epoch": 0.32, "grad_norm": 1.096435546875, "learning_rate": 7.973678630506778e-06, "loss": 0.6054, "step": 5025 }, { "epoch": 0.32, "grad_norm": 0.9659005403518677, "learning_rate": 7.972853749758334e-06, "loss": 0.5512, "step": 5026 }, { "epoch": 0.32, "grad_norm": 1.0144753456115723, "learning_rate": 7.972028743833357e-06, "loss": 0.5553, "step": 5027 }, { "epoch": 0.32, "grad_norm": 1.0260987281799316, "learning_rate": 7.971203612766591e-06, "loss": 0.526, "step": 5028 }, { "epoch": 0.32, "grad_norm": 0.9555620551109314, "learning_rate": 7.970378356592779e-06, "loss": 0.582, "step": 5029 }, { "epoch": 0.32, "grad_norm": 0.9632571935653687, "learning_rate": 7.969552975346664e-06, "loss": 0.5346, "step": 5030 }, { "epoch": 0.32, "grad_norm": 1.0778241157531738, "learning_rate": 7.968727469063005e-06, "loss": 0.5058, "step": 5031 }, { "epoch": 0.32, "grad_norm": 1.007095456123352, "learning_rate": 7.967901837776559e-06, "loss": 0.5496, "step": 5032 }, { "epoch": 0.32, "grad_norm": 1.007356882095337, "learning_rate": 7.967076081522091e-06, "loss": 0.5745, "step": 5033 }, { "epoch": 0.32, "grad_norm": 1.2195940017700195, "learning_rate": 7.966250200334373e-06, "loss": 0.6314, "step": 5034 }, { "epoch": 0.32, "grad_norm": 1.0311719179153442, "learning_rate": 7.965424194248176e-06, "loss": 0.5251, "step": 5035 }, { "epoch": 0.32, "grad_norm": 0.9696710109710693, "learning_rate": 7.964598063298282e-06, "loss": 0.5846, "step": 5036 }, { "epoch": 0.32, "grad_norm": 0.9161157011985779, "learning_rate": 7.963771807519477e-06, "loss": 0.4878, "step": 5037 }, { "epoch": 0.32, "grad_norm": 0.9984802603721619, "learning_rate": 7.962945426946552e-06, "loss": 0.5166, "step": 5038 }, { "epoch": 0.32, "grad_norm": 0.9821481704711914, "learning_rate": 7.962118921614302e-06, "loss": 0.5727, "step": 5039 }, { "epoch": 0.32, "grad_norm": 1.0021934509277344, "learning_rate": 7.961292291557529e-06, "loss": 0.5063, "step": 5040 }, { "epoch": 0.32, "grad_norm": 1.0840123891830444, "learning_rate": 7.960465536811039e-06, "loss": 0.5433, "step": 5041 }, { "epoch": 0.32, "grad_norm": 0.9790361523628235, "learning_rate": 7.959638657409643e-06, "loss": 0.5349, "step": 5042 }, { "epoch": 0.32, "grad_norm": 0.9270578026771545, "learning_rate": 7.95881165338816e-06, "loss": 0.5016, "step": 5043 }, { "epoch": 0.32, "grad_norm": 1.036125898361206, "learning_rate": 7.957984524781413e-06, "loss": 0.5588, "step": 5044 }, { "epoch": 0.32, "grad_norm": 1.0060368776321411, "learning_rate": 7.957157271624225e-06, "loss": 0.5297, "step": 5045 }, { "epoch": 0.32, "grad_norm": 1.0325381755828857, "learning_rate": 7.956329893951432e-06, "loss": 0.5324, "step": 5046 }, { "epoch": 0.32, "grad_norm": 1.1573082208633423, "learning_rate": 7.95550239179787e-06, "loss": 0.5842, "step": 5047 }, { "epoch": 0.32, "grad_norm": 0.8682379126548767, "learning_rate": 7.954674765198386e-06, "loss": 0.4966, "step": 5048 }, { "epoch": 0.32, "grad_norm": 1.066186785697937, "learning_rate": 7.953847014187826e-06, "loss": 0.5796, "step": 5049 }, { "epoch": 0.32, "grad_norm": 1.0737568140029907, "learning_rate": 7.953019138801045e-06, "loss": 0.5641, "step": 5050 }, { "epoch": 0.32, "grad_norm": 1.0351073741912842, "learning_rate": 7.952191139072898e-06, "loss": 0.574, "step": 5051 }, { "epoch": 0.32, "grad_norm": 0.9687965512275696, "learning_rate": 7.951363015038254e-06, "loss": 0.5135, "step": 5052 }, { "epoch": 0.32, "grad_norm": 1.0956878662109375, "learning_rate": 7.950534766731982e-06, "loss": 0.5986, "step": 5053 }, { "epoch": 0.32, "grad_norm": 0.957947850227356, "learning_rate": 7.949706394188951e-06, "loss": 0.5402, "step": 5054 }, { "epoch": 0.32, "grad_norm": 1.0185531377792358, "learning_rate": 7.948877897444047e-06, "loss": 0.5415, "step": 5055 }, { "epoch": 0.32, "grad_norm": 1.0777101516723633, "learning_rate": 7.948049276532156e-06, "loss": 0.5738, "step": 5056 }, { "epoch": 0.32, "grad_norm": 0.9509685039520264, "learning_rate": 7.94722053148816e-06, "loss": 0.5157, "step": 5057 }, { "epoch": 0.32, "grad_norm": 1.0248844623565674, "learning_rate": 7.946391662346964e-06, "loss": 0.5821, "step": 5058 }, { "epoch": 0.32, "grad_norm": 1.0210704803466797, "learning_rate": 7.945562669143463e-06, "loss": 0.5644, "step": 5059 }, { "epoch": 0.32, "grad_norm": 0.9601178765296936, "learning_rate": 7.944733551912566e-06, "loss": 0.5246, "step": 5060 }, { "epoch": 0.32, "grad_norm": 0.9830024242401123, "learning_rate": 7.943904310689184e-06, "loss": 0.5272, "step": 5061 }, { "epoch": 0.32, "grad_norm": 1.0547600984573364, "learning_rate": 7.94307494550823e-06, "loss": 0.5822, "step": 5062 }, { "epoch": 0.32, "grad_norm": 1.100956678390503, "learning_rate": 7.94224545640463e-06, "loss": 0.5451, "step": 5063 }, { "epoch": 0.32, "grad_norm": 0.9732983708381653, "learning_rate": 7.941415843413309e-06, "loss": 0.5272, "step": 5064 }, { "epoch": 0.32, "grad_norm": 0.9543006420135498, "learning_rate": 7.940586106569198e-06, "loss": 0.5295, "step": 5065 }, { "epoch": 0.32, "grad_norm": 1.0225476026535034, "learning_rate": 7.939756245907237e-06, "loss": 0.549, "step": 5066 }, { "epoch": 0.32, "grad_norm": 1.0729137659072876, "learning_rate": 7.938926261462366e-06, "loss": 0.5776, "step": 5067 }, { "epoch": 0.32, "grad_norm": 1.0118247270584106, "learning_rate": 7.938096153269535e-06, "loss": 0.5463, "step": 5068 }, { "epoch": 0.32, "grad_norm": 0.978184163570404, "learning_rate": 7.937265921363695e-06, "loss": 0.4906, "step": 5069 }, { "epoch": 0.32, "grad_norm": 1.0645402669906616, "learning_rate": 7.936435565779806e-06, "loss": 0.5251, "step": 5070 }, { "epoch": 0.32, "grad_norm": 1.0021170377731323, "learning_rate": 7.93560508655283e-06, "loss": 0.5221, "step": 5071 }, { "epoch": 0.32, "grad_norm": 1.007643222808838, "learning_rate": 7.934774483717736e-06, "loss": 0.5161, "step": 5072 }, { "epoch": 0.32, "grad_norm": 1.0118756294250488, "learning_rate": 7.933943757309498e-06, "loss": 0.5745, "step": 5073 }, { "epoch": 0.32, "grad_norm": 1.0875252485275269, "learning_rate": 7.933112907363096e-06, "loss": 0.519, "step": 5074 }, { "epoch": 0.32, "grad_norm": 1.0094273090362549, "learning_rate": 7.93228193391351e-06, "loss": 0.5552, "step": 5075 }, { "epoch": 0.32, "grad_norm": 1.0161619186401367, "learning_rate": 7.931450836995736e-06, "loss": 0.5124, "step": 5076 }, { "epoch": 0.32, "grad_norm": 0.9365780353546143, "learning_rate": 7.930619616644761e-06, "loss": 0.5455, "step": 5077 }, { "epoch": 0.32, "grad_norm": 1.0313904285430908, "learning_rate": 7.929788272895591e-06, "loss": 0.5447, "step": 5078 }, { "epoch": 0.32, "grad_norm": 1.0682475566864014, "learning_rate": 7.928956805783228e-06, "loss": 0.5592, "step": 5079 }, { "epoch": 0.32, "grad_norm": 1.0047205686569214, "learning_rate": 7.928125215342685e-06, "loss": 0.5656, "step": 5080 }, { "epoch": 0.32, "grad_norm": 1.002293586730957, "learning_rate": 7.927293501608975e-06, "loss": 0.5491, "step": 5081 }, { "epoch": 0.32, "grad_norm": 0.9850450158119202, "learning_rate": 7.926461664617117e-06, "loss": 0.5376, "step": 5082 }, { "epoch": 0.32, "grad_norm": 0.9095563888549805, "learning_rate": 7.92562970440214e-06, "loss": 0.5299, "step": 5083 }, { "epoch": 0.32, "grad_norm": 0.984972357749939, "learning_rate": 7.924797620999074e-06, "loss": 0.5106, "step": 5084 }, { "epoch": 0.32, "grad_norm": 1.1025408506393433, "learning_rate": 7.923965414442953e-06, "loss": 0.5869, "step": 5085 }, { "epoch": 0.32, "grad_norm": 1.0418413877487183, "learning_rate": 7.923133084768822e-06, "loss": 0.5485, "step": 5086 }, { "epoch": 0.32, "grad_norm": 1.098783016204834, "learning_rate": 7.922300632011726e-06, "loss": 0.5581, "step": 5087 }, { "epoch": 0.32, "grad_norm": 0.9901795387268066, "learning_rate": 7.921468056206715e-06, "loss": 0.5622, "step": 5088 }, { "epoch": 0.32, "grad_norm": 1.031957983970642, "learning_rate": 7.920635357388848e-06, "loss": 0.4864, "step": 5089 }, { "epoch": 0.32, "grad_norm": 1.0441521406173706, "learning_rate": 7.919802535593185e-06, "loss": 0.5574, "step": 5090 }, { "epoch": 0.32, "grad_norm": 0.9432700872421265, "learning_rate": 7.918969590854797e-06, "loss": 0.5624, "step": 5091 }, { "epoch": 0.32, "grad_norm": 0.9714290499687195, "learning_rate": 7.91813652320875e-06, "loss": 0.5236, "step": 5092 }, { "epoch": 0.32, "grad_norm": 0.9984144568443298, "learning_rate": 7.91730333269013e-06, "loss": 0.5561, "step": 5093 }, { "epoch": 0.32, "grad_norm": 1.0645394325256348, "learning_rate": 7.916470019334012e-06, "loss": 0.5818, "step": 5094 }, { "epoch": 0.32, "grad_norm": 1.0106472969055176, "learning_rate": 7.915636583175489e-06, "loss": 0.5075, "step": 5095 }, { "epoch": 0.32, "grad_norm": 1.1064532995224, "learning_rate": 7.91480302424965e-06, "loss": 0.5447, "step": 5096 }, { "epoch": 0.32, "grad_norm": 1.0424033403396606, "learning_rate": 7.913969342591597e-06, "loss": 0.5357, "step": 5097 }, { "epoch": 0.32, "grad_norm": 1.0222606658935547, "learning_rate": 7.913135538236432e-06, "loss": 0.6008, "step": 5098 }, { "epoch": 0.32, "grad_norm": 1.1493009328842163, "learning_rate": 7.912301611219264e-06, "loss": 0.5439, "step": 5099 }, { "epoch": 0.32, "grad_norm": 0.9946584105491638, "learning_rate": 7.911467561575204e-06, "loss": 0.5631, "step": 5100 }, { "epoch": 0.32, "grad_norm": 1.0782363414764404, "learning_rate": 7.910633389339376e-06, "loss": 0.5791, "step": 5101 }, { "epoch": 0.32, "grad_norm": 0.9767991304397583, "learning_rate": 7.909799094546899e-06, "loss": 0.4918, "step": 5102 }, { "epoch": 0.32, "grad_norm": 0.9834350943565369, "learning_rate": 7.908964677232906e-06, "loss": 0.5096, "step": 5103 }, { "epoch": 0.32, "grad_norm": 1.052124261856079, "learning_rate": 7.90813013743253e-06, "loss": 0.5607, "step": 5104 }, { "epoch": 0.32, "grad_norm": 1.1817008256912231, "learning_rate": 7.90729547518091e-06, "loss": 0.5512, "step": 5105 }, { "epoch": 0.32, "grad_norm": 1.0303164720535278, "learning_rate": 7.906460690513192e-06, "loss": 0.532, "step": 5106 }, { "epoch": 0.32, "grad_norm": 1.050991177558899, "learning_rate": 7.905625783464525e-06, "loss": 0.552, "step": 5107 }, { "epoch": 0.32, "grad_norm": 0.9917927384376526, "learning_rate": 7.904790754070063e-06, "loss": 0.5332, "step": 5108 }, { "epoch": 0.32, "grad_norm": 0.9641286134719849, "learning_rate": 7.90395560236497e-06, "loss": 0.4993, "step": 5109 }, { "epoch": 0.32, "grad_norm": 0.9856176376342773, "learning_rate": 7.903120328384406e-06, "loss": 0.5131, "step": 5110 }, { "epoch": 0.32, "grad_norm": 0.9945058822631836, "learning_rate": 7.902284932163545e-06, "loss": 0.5193, "step": 5111 }, { "epoch": 0.32, "grad_norm": 1.0896492004394531, "learning_rate": 7.901449413737562e-06, "loss": 0.5403, "step": 5112 }, { "epoch": 0.32, "grad_norm": 0.9672039151191711, "learning_rate": 7.90061377314164e-06, "loss": 0.5629, "step": 5113 }, { "epoch": 0.32, "grad_norm": 0.9481355547904968, "learning_rate": 7.899778010410958e-06, "loss": 0.5466, "step": 5114 }, { "epoch": 0.32, "grad_norm": 0.9383245706558228, "learning_rate": 7.898942125580715e-06, "loss": 0.518, "step": 5115 }, { "epoch": 0.32, "grad_norm": 0.9123684167861938, "learning_rate": 7.898106118686102e-06, "loss": 0.5126, "step": 5116 }, { "epoch": 0.32, "grad_norm": 1.0579379796981812, "learning_rate": 7.897269989762322e-06, "loss": 0.5693, "step": 5117 }, { "epoch": 0.32, "grad_norm": 1.0428487062454224, "learning_rate": 7.896433738844583e-06, "loss": 0.5411, "step": 5118 }, { "epoch": 0.32, "grad_norm": 1.0016083717346191, "learning_rate": 7.895597365968093e-06, "loss": 0.5569, "step": 5119 }, { "epoch": 0.32, "grad_norm": 0.9868251085281372, "learning_rate": 7.894760871168074e-06, "loss": 0.5551, "step": 5120 }, { "epoch": 0.32, "grad_norm": 1.116563320159912, "learning_rate": 7.893924254479744e-06, "loss": 0.5593, "step": 5121 }, { "epoch": 0.32, "grad_norm": 1.0450432300567627, "learning_rate": 7.893087515938329e-06, "loss": 0.5684, "step": 5122 }, { "epoch": 0.32, "grad_norm": 1.0664788484573364, "learning_rate": 7.892250655579063e-06, "loss": 0.551, "step": 5123 }, { "epoch": 0.32, "grad_norm": 1.0176259279251099, "learning_rate": 7.891413673437185e-06, "loss": 0.5314, "step": 5124 }, { "epoch": 0.32, "grad_norm": 1.0659630298614502, "learning_rate": 7.890576569547937e-06, "loss": 0.5712, "step": 5125 }, { "epoch": 0.32, "grad_norm": 1.021335482597351, "learning_rate": 7.889739343946561e-06, "loss": 0.5733, "step": 5126 }, { "epoch": 0.32, "grad_norm": 0.977240800857544, "learning_rate": 7.888901996668317e-06, "loss": 0.525, "step": 5127 }, { "epoch": 0.32, "grad_norm": 0.9783990979194641, "learning_rate": 7.888064527748458e-06, "loss": 0.5608, "step": 5128 }, { "epoch": 0.32, "grad_norm": 0.9815640449523926, "learning_rate": 7.887226937222252e-06, "loss": 0.5808, "step": 5129 }, { "epoch": 0.33, "grad_norm": 0.9848017692565918, "learning_rate": 7.88638922512496e-06, "loss": 0.5739, "step": 5130 }, { "epoch": 0.33, "grad_norm": 1.0390576124191284, "learning_rate": 7.88555139149186e-06, "loss": 0.5748, "step": 5131 }, { "epoch": 0.33, "grad_norm": 0.9983201026916504, "learning_rate": 7.884713436358228e-06, "loss": 0.5368, "step": 5132 }, { "epoch": 0.33, "grad_norm": 1.0555384159088135, "learning_rate": 7.883875359759349e-06, "loss": 0.5677, "step": 5133 }, { "epoch": 0.33, "grad_norm": 1.0259679555892944, "learning_rate": 7.883037161730511e-06, "loss": 0.5416, "step": 5134 }, { "epoch": 0.33, "grad_norm": 0.9975308179855347, "learning_rate": 7.882198842307008e-06, "loss": 0.5452, "step": 5135 }, { "epoch": 0.33, "grad_norm": 1.0763533115386963, "learning_rate": 7.881360401524138e-06, "loss": 0.5529, "step": 5136 }, { "epoch": 0.33, "grad_norm": 1.104356288909912, "learning_rate": 7.880521839417206e-06, "loss": 0.5224, "step": 5137 }, { "epoch": 0.33, "grad_norm": 0.9988602995872498, "learning_rate": 7.879683156021518e-06, "loss": 0.5517, "step": 5138 }, { "epoch": 0.33, "grad_norm": 1.1201856136322021, "learning_rate": 7.87884435137239e-06, "loss": 0.5465, "step": 5139 }, { "epoch": 0.33, "grad_norm": 0.9202961325645447, "learning_rate": 7.878005425505143e-06, "loss": 0.5252, "step": 5140 }, { "epoch": 0.33, "grad_norm": 0.9737326502799988, "learning_rate": 7.877166378455098e-06, "loss": 0.5632, "step": 5141 }, { "epoch": 0.33, "grad_norm": 1.0532732009887695, "learning_rate": 7.876327210257586e-06, "loss": 0.5459, "step": 5142 }, { "epoch": 0.33, "grad_norm": 0.9994428157806396, "learning_rate": 7.875487920947941e-06, "loss": 0.5726, "step": 5143 }, { "epoch": 0.33, "grad_norm": 1.0257031917572021, "learning_rate": 7.874648510561503e-06, "loss": 0.5696, "step": 5144 }, { "epoch": 0.33, "grad_norm": 1.0415347814559937, "learning_rate": 7.873808979133616e-06, "loss": 0.5748, "step": 5145 }, { "epoch": 0.33, "grad_norm": 0.9889886975288391, "learning_rate": 7.872969326699631e-06, "loss": 0.494, "step": 5146 }, { "epoch": 0.33, "grad_norm": 1.0992611646652222, "learning_rate": 7.8721295532949e-06, "loss": 0.5662, "step": 5147 }, { "epoch": 0.33, "grad_norm": 1.073646068572998, "learning_rate": 7.871289658954789e-06, "loss": 0.5554, "step": 5148 }, { "epoch": 0.33, "grad_norm": 1.016093373298645, "learning_rate": 7.870449643714654e-06, "loss": 0.5469, "step": 5149 }, { "epoch": 0.33, "grad_norm": 0.9302458763122559, "learning_rate": 7.869609507609874e-06, "loss": 0.498, "step": 5150 }, { "epoch": 0.33, "grad_norm": 0.9844838380813599, "learning_rate": 7.868769250675818e-06, "loss": 0.5399, "step": 5151 }, { "epoch": 0.33, "grad_norm": 0.8882426619529724, "learning_rate": 7.867928872947869e-06, "loss": 0.4986, "step": 5152 }, { "epoch": 0.33, "grad_norm": 1.0245869159698486, "learning_rate": 7.867088374461413e-06, "loss": 0.5663, "step": 5153 }, { "epoch": 0.33, "grad_norm": 0.9834217429161072, "learning_rate": 7.866247755251838e-06, "loss": 0.5518, "step": 5154 }, { "epoch": 0.33, "grad_norm": 0.9708665609359741, "learning_rate": 7.865407015354542e-06, "loss": 0.514, "step": 5155 }, { "epoch": 0.33, "grad_norm": 1.0061088800430298, "learning_rate": 7.864566154804925e-06, "loss": 0.5584, "step": 5156 }, { "epoch": 0.33, "grad_norm": 1.0202516317367554, "learning_rate": 7.86372517363839e-06, "loss": 0.504, "step": 5157 }, { "epoch": 0.33, "grad_norm": 1.0482490062713623, "learning_rate": 7.862884071890353e-06, "loss": 0.5764, "step": 5158 }, { "epoch": 0.33, "grad_norm": 0.9607723355293274, "learning_rate": 7.862042849596225e-06, "loss": 0.5018, "step": 5159 }, { "epoch": 0.33, "grad_norm": 0.9499772787094116, "learning_rate": 7.86120150679143e-06, "loss": 0.563, "step": 5160 }, { "epoch": 0.33, "grad_norm": 0.9344480633735657, "learning_rate": 7.860360043511392e-06, "loss": 0.5223, "step": 5161 }, { "epoch": 0.33, "grad_norm": 0.9974209070205688, "learning_rate": 7.859518459791543e-06, "loss": 0.5606, "step": 5162 }, { "epoch": 0.33, "grad_norm": 1.0160338878631592, "learning_rate": 7.85867675566732e-06, "loss": 0.5219, "step": 5163 }, { "epoch": 0.33, "grad_norm": 0.9889779090881348, "learning_rate": 7.857834931174164e-06, "loss": 0.5416, "step": 5164 }, { "epoch": 0.33, "grad_norm": 0.9936977028846741, "learning_rate": 7.85699298634752e-06, "loss": 0.5473, "step": 5165 }, { "epoch": 0.33, "grad_norm": 1.0194740295410156, "learning_rate": 7.856150921222838e-06, "loss": 0.5352, "step": 5166 }, { "epoch": 0.33, "grad_norm": 0.9760123491287231, "learning_rate": 7.85530873583558e-06, "loss": 0.5736, "step": 5167 }, { "epoch": 0.33, "grad_norm": 0.9568659067153931, "learning_rate": 7.854466430221203e-06, "loss": 0.5255, "step": 5168 }, { "epoch": 0.33, "grad_norm": 0.9879942536354065, "learning_rate": 7.853624004415172e-06, "loss": 0.5914, "step": 5169 }, { "epoch": 0.33, "grad_norm": 0.9788195490837097, "learning_rate": 7.852781458452964e-06, "loss": 0.5409, "step": 5170 }, { "epoch": 0.33, "grad_norm": 1.0303471088409424, "learning_rate": 7.851938792370053e-06, "loss": 0.5852, "step": 5171 }, { "epoch": 0.33, "grad_norm": 0.8927707076072693, "learning_rate": 7.85109600620192e-06, "loss": 0.4724, "step": 5172 }, { "epoch": 0.33, "grad_norm": 1.1031830310821533, "learning_rate": 7.85025309998405e-06, "loss": 0.5288, "step": 5173 }, { "epoch": 0.33, "grad_norm": 0.9610477089881897, "learning_rate": 7.849410073751942e-06, "loss": 0.4959, "step": 5174 }, { "epoch": 0.33, "grad_norm": 1.0511902570724487, "learning_rate": 7.848566927541084e-06, "loss": 0.533, "step": 5175 }, { "epoch": 0.33, "grad_norm": 0.9335895776748657, "learning_rate": 7.847723661386985e-06, "loss": 0.5203, "step": 5176 }, { "epoch": 0.33, "grad_norm": 1.0692973136901855, "learning_rate": 7.846880275325149e-06, "loss": 0.5507, "step": 5177 }, { "epoch": 0.33, "grad_norm": 0.9702898859977722, "learning_rate": 7.846036769391086e-06, "loss": 0.5529, "step": 5178 }, { "epoch": 0.33, "grad_norm": 1.0902308225631714, "learning_rate": 7.845193143620316e-06, "loss": 0.5497, "step": 5179 }, { "epoch": 0.33, "grad_norm": 1.0889067649841309, "learning_rate": 7.84434939804836e-06, "loss": 0.5724, "step": 5180 }, { "epoch": 0.33, "grad_norm": 1.1026750802993774, "learning_rate": 7.843505532710748e-06, "loss": 0.5676, "step": 5181 }, { "epoch": 0.33, "grad_norm": 1.019676923751831, "learning_rate": 7.84266154764301e-06, "loss": 0.5744, "step": 5182 }, { "epoch": 0.33, "grad_norm": 0.9964596629142761, "learning_rate": 7.84181744288068e-06, "loss": 0.5693, "step": 5183 }, { "epoch": 0.33, "grad_norm": 0.9298571348190308, "learning_rate": 7.840973218459305e-06, "loss": 0.4892, "step": 5184 }, { "epoch": 0.33, "grad_norm": 1.0911149978637695, "learning_rate": 7.84012887441443e-06, "loss": 0.5437, "step": 5185 }, { "epoch": 0.33, "grad_norm": 1.1494719982147217, "learning_rate": 7.839284410781609e-06, "loss": 0.5176, "step": 5186 }, { "epoch": 0.33, "grad_norm": 1.035965085029602, "learning_rate": 7.838439827596398e-06, "loss": 0.5471, "step": 5187 }, { "epoch": 0.33, "grad_norm": 1.102126121520996, "learning_rate": 7.83759512489436e-06, "loss": 0.5752, "step": 5188 }, { "epoch": 0.33, "grad_norm": 0.9802284240722656, "learning_rate": 7.836750302711065e-06, "loss": 0.5237, "step": 5189 }, { "epoch": 0.33, "grad_norm": 0.9825965762138367, "learning_rate": 7.83590536108208e-06, "loss": 0.522, "step": 5190 }, { "epoch": 0.33, "grad_norm": 1.017299771308899, "learning_rate": 7.835060300042986e-06, "loss": 0.5584, "step": 5191 }, { "epoch": 0.33, "grad_norm": 0.996579110622406, "learning_rate": 7.834215119629366e-06, "loss": 0.5229, "step": 5192 }, { "epoch": 0.33, "grad_norm": 1.006910800933838, "learning_rate": 7.833369819876809e-06, "loss": 0.5416, "step": 5193 }, { "epoch": 0.33, "grad_norm": 1.0680913925170898, "learning_rate": 7.832524400820902e-06, "loss": 0.5333, "step": 5194 }, { "epoch": 0.33, "grad_norm": 1.0967390537261963, "learning_rate": 7.831678862497248e-06, "loss": 0.5562, "step": 5195 }, { "epoch": 0.33, "grad_norm": 1.077828288078308, "learning_rate": 7.830833204941446e-06, "loss": 0.5854, "step": 5196 }, { "epoch": 0.33, "grad_norm": 0.9674387574195862, "learning_rate": 7.829987428189108e-06, "loss": 0.5844, "step": 5197 }, { "epoch": 0.33, "grad_norm": 0.9557288289070129, "learning_rate": 7.829141532275843e-06, "loss": 0.5209, "step": 5198 }, { "epoch": 0.33, "grad_norm": 0.9728726744651794, "learning_rate": 7.82829551723727e-06, "loss": 0.5654, "step": 5199 }, { "epoch": 0.33, "grad_norm": 0.9683870077133179, "learning_rate": 7.827449383109012e-06, "loss": 0.5174, "step": 5200 }, { "epoch": 0.33, "grad_norm": 1.0592831373214722, "learning_rate": 7.826603129926696e-06, "loss": 0.5505, "step": 5201 }, { "epoch": 0.33, "grad_norm": 0.9644933342933655, "learning_rate": 7.825756757725956e-06, "loss": 0.5594, "step": 5202 }, { "epoch": 0.33, "grad_norm": 1.012763261795044, "learning_rate": 7.824910266542426e-06, "loss": 0.5953, "step": 5203 }, { "epoch": 0.33, "grad_norm": 0.9749206304550171, "learning_rate": 7.824063656411756e-06, "loss": 0.4719, "step": 5204 }, { "epoch": 0.33, "grad_norm": 0.92559415102005, "learning_rate": 7.823216927369588e-06, "loss": 0.5293, "step": 5205 }, { "epoch": 0.33, "grad_norm": 1.0100061893463135, "learning_rate": 7.822370079451576e-06, "loss": 0.5859, "step": 5206 }, { "epoch": 0.33, "grad_norm": 0.9328200221061707, "learning_rate": 7.821523112693377e-06, "loss": 0.509, "step": 5207 }, { "epoch": 0.33, "grad_norm": 0.9542256593704224, "learning_rate": 7.820676027130657e-06, "loss": 0.4838, "step": 5208 }, { "epoch": 0.33, "grad_norm": 0.9602751135826111, "learning_rate": 7.81982882279908e-06, "loss": 0.5223, "step": 5209 }, { "epoch": 0.33, "grad_norm": 1.0051568746566772, "learning_rate": 7.818981499734323e-06, "loss": 0.5252, "step": 5210 }, { "epoch": 0.33, "grad_norm": 0.9804307222366333, "learning_rate": 7.818134057972062e-06, "loss": 0.516, "step": 5211 }, { "epoch": 0.33, "grad_norm": 1.0683549642562866, "learning_rate": 7.817286497547977e-06, "loss": 0.5317, "step": 5212 }, { "epoch": 0.33, "grad_norm": 1.01795494556427, "learning_rate": 7.81643881849776e-06, "loss": 0.562, "step": 5213 }, { "epoch": 0.33, "grad_norm": 1.0241423845291138, "learning_rate": 7.815591020857101e-06, "loss": 0.5633, "step": 5214 }, { "epoch": 0.33, "grad_norm": 1.044502854347229, "learning_rate": 7.8147431046617e-06, "loss": 0.5379, "step": 5215 }, { "epoch": 0.33, "grad_norm": 0.967847466468811, "learning_rate": 7.813895069947257e-06, "loss": 0.5734, "step": 5216 }, { "epoch": 0.33, "grad_norm": 0.9767869710922241, "learning_rate": 7.813046916749483e-06, "loss": 0.5801, "step": 5217 }, { "epoch": 0.33, "grad_norm": 1.1450245380401611, "learning_rate": 7.812198645104088e-06, "loss": 0.5427, "step": 5218 }, { "epoch": 0.33, "grad_norm": 1.0699288845062256, "learning_rate": 7.811350255046792e-06, "loss": 0.5262, "step": 5219 }, { "epoch": 0.33, "grad_norm": 1.0704792737960815, "learning_rate": 7.810501746613316e-06, "loss": 0.514, "step": 5220 }, { "epoch": 0.33, "grad_norm": 1.0542374849319458, "learning_rate": 7.809653119839389e-06, "loss": 0.5324, "step": 5221 }, { "epoch": 0.33, "grad_norm": 1.0917741060256958, "learning_rate": 7.808804374760742e-06, "loss": 0.5393, "step": 5222 }, { "epoch": 0.33, "grad_norm": 1.0410189628601074, "learning_rate": 7.807955511413114e-06, "loss": 0.5527, "step": 5223 }, { "epoch": 0.33, "grad_norm": 0.987148106098175, "learning_rate": 7.80710652983225e-06, "loss": 0.4854, "step": 5224 }, { "epoch": 0.33, "grad_norm": 1.000052809715271, "learning_rate": 7.806257430053893e-06, "loss": 0.5382, "step": 5225 }, { "epoch": 0.33, "grad_norm": 1.1064789295196533, "learning_rate": 7.8054082121138e-06, "loss": 0.5361, "step": 5226 }, { "epoch": 0.33, "grad_norm": 0.9538825750350952, "learning_rate": 7.804558876047724e-06, "loss": 0.536, "step": 5227 }, { "epoch": 0.33, "grad_norm": 1.048471450805664, "learning_rate": 7.80370942189143e-06, "loss": 0.5302, "step": 5228 }, { "epoch": 0.33, "grad_norm": 1.125544548034668, "learning_rate": 7.802859849680686e-06, "loss": 0.5418, "step": 5229 }, { "epoch": 0.33, "grad_norm": 0.9804906845092773, "learning_rate": 7.802010159451267e-06, "loss": 0.5573, "step": 5230 }, { "epoch": 0.33, "grad_norm": 1.0753605365753174, "learning_rate": 7.801160351238945e-06, "loss": 0.5406, "step": 5231 }, { "epoch": 0.33, "grad_norm": 0.953654944896698, "learning_rate": 7.800310425079505e-06, "loss": 0.4982, "step": 5232 }, { "epoch": 0.33, "grad_norm": 0.9982483983039856, "learning_rate": 7.799460381008736e-06, "loss": 0.5012, "step": 5233 }, { "epoch": 0.33, "grad_norm": 1.003063678741455, "learning_rate": 7.798610219062428e-06, "loss": 0.5596, "step": 5234 }, { "epoch": 0.33, "grad_norm": 1.0453439950942993, "learning_rate": 7.79775993927638e-06, "loss": 0.5208, "step": 5235 }, { "epoch": 0.33, "grad_norm": 1.057268500328064, "learning_rate": 7.796909541686392e-06, "loss": 0.4797, "step": 5236 }, { "epoch": 0.33, "grad_norm": 0.9837533831596375, "learning_rate": 7.796059026328274e-06, "loss": 0.5483, "step": 5237 }, { "epoch": 0.33, "grad_norm": 0.974611222743988, "learning_rate": 7.795208393237839e-06, "loss": 0.511, "step": 5238 }, { "epoch": 0.33, "grad_norm": 1.0048149824142456, "learning_rate": 7.794357642450899e-06, "loss": 0.5638, "step": 5239 }, { "epoch": 0.33, "grad_norm": 1.0645167827606201, "learning_rate": 7.793506774003282e-06, "loss": 0.5545, "step": 5240 }, { "epoch": 0.33, "grad_norm": 0.9813336730003357, "learning_rate": 7.792655787930811e-06, "loss": 0.5464, "step": 5241 }, { "epoch": 0.33, "grad_norm": 1.0172266960144043, "learning_rate": 7.791804684269322e-06, "loss": 0.5456, "step": 5242 }, { "epoch": 0.33, "grad_norm": 1.032725214958191, "learning_rate": 7.790953463054647e-06, "loss": 0.5566, "step": 5243 }, { "epoch": 0.33, "grad_norm": 0.9283890128135681, "learning_rate": 7.790102124322633e-06, "loss": 0.5315, "step": 5244 }, { "epoch": 0.33, "grad_norm": 1.0746551752090454, "learning_rate": 7.789250668109124e-06, "loss": 0.5924, "step": 5245 }, { "epoch": 0.33, "grad_norm": 1.0125327110290527, "learning_rate": 7.788399094449971e-06, "loss": 0.5477, "step": 5246 }, { "epoch": 0.33, "grad_norm": 0.9725445508956909, "learning_rate": 7.787547403381033e-06, "loss": 0.512, "step": 5247 }, { "epoch": 0.33, "grad_norm": 1.0623197555541992, "learning_rate": 7.786695594938172e-06, "loss": 0.5346, "step": 5248 }, { "epoch": 0.33, "grad_norm": 1.1318409442901611, "learning_rate": 7.785843669157253e-06, "loss": 0.588, "step": 5249 }, { "epoch": 0.33, "grad_norm": 1.0636366605758667, "learning_rate": 7.784991626074148e-06, "loss": 0.6046, "step": 5250 }, { "epoch": 0.33, "grad_norm": 1.0380812883377075, "learning_rate": 7.784139465724734e-06, "loss": 0.5747, "step": 5251 }, { "epoch": 0.33, "grad_norm": 0.9197916388511658, "learning_rate": 7.783287188144893e-06, "loss": 0.5228, "step": 5252 }, { "epoch": 0.33, "grad_norm": 0.9440740942955017, "learning_rate": 7.78243479337051e-06, "loss": 0.5606, "step": 5253 }, { "epoch": 0.33, "grad_norm": 0.9165782332420349, "learning_rate": 7.781582281437479e-06, "loss": 0.5276, "step": 5254 }, { "epoch": 0.33, "grad_norm": 1.1359775066375732, "learning_rate": 7.780729652381694e-06, "loss": 0.5725, "step": 5255 }, { "epoch": 0.33, "grad_norm": 0.9660843014717102, "learning_rate": 7.779876906239055e-06, "loss": 0.5514, "step": 5256 }, { "epoch": 0.33, "grad_norm": 1.0322363376617432, "learning_rate": 7.779024043045471e-06, "loss": 0.5686, "step": 5257 }, { "epoch": 0.33, "grad_norm": 1.0766687393188477, "learning_rate": 7.778171062836853e-06, "loss": 0.5995, "step": 5258 }, { "epoch": 0.33, "grad_norm": 1.1299717426300049, "learning_rate": 7.777317965649114e-06, "loss": 0.5667, "step": 5259 }, { "epoch": 0.33, "grad_norm": 0.9850207567214966, "learning_rate": 7.776464751518177e-06, "loss": 0.5553, "step": 5260 }, { "epoch": 0.33, "grad_norm": 1.0228867530822754, "learning_rate": 7.775611420479971e-06, "loss": 0.4989, "step": 5261 }, { "epoch": 0.33, "grad_norm": 0.973818838596344, "learning_rate": 7.774757972570423e-06, "loss": 0.5209, "step": 5262 }, { "epoch": 0.33, "grad_norm": 0.9740568995475769, "learning_rate": 7.773904407825467e-06, "loss": 0.5751, "step": 5263 }, { "epoch": 0.33, "grad_norm": 1.050536870956421, "learning_rate": 7.773050726281048e-06, "loss": 0.5365, "step": 5264 }, { "epoch": 0.33, "grad_norm": 1.1395479440689087, "learning_rate": 7.772196927973109e-06, "loss": 0.5459, "step": 5265 }, { "epoch": 0.33, "grad_norm": 1.0125073194503784, "learning_rate": 7.771343012937602e-06, "loss": 0.5437, "step": 5266 }, { "epoch": 0.33, "grad_norm": 1.010521650314331, "learning_rate": 7.77048898121048e-06, "loss": 0.5383, "step": 5267 }, { "epoch": 0.33, "grad_norm": 1.0275487899780273, "learning_rate": 7.769634832827706e-06, "loss": 0.5211, "step": 5268 }, { "epoch": 0.33, "grad_norm": 1.042787790298462, "learning_rate": 7.768780567825243e-06, "loss": 0.53, "step": 5269 }, { "epoch": 0.33, "grad_norm": 1.0025073289871216, "learning_rate": 7.767926186239064e-06, "loss": 0.5722, "step": 5270 }, { "epoch": 0.33, "grad_norm": 1.065557599067688, "learning_rate": 7.76707168810514e-06, "loss": 0.567, "step": 5271 }, { "epoch": 0.33, "grad_norm": 0.9811010956764221, "learning_rate": 7.766217073459454e-06, "loss": 0.5216, "step": 5272 }, { "epoch": 0.33, "grad_norm": 0.9990840554237366, "learning_rate": 7.765362342337991e-06, "loss": 0.5442, "step": 5273 }, { "epoch": 0.33, "grad_norm": 0.9401424527168274, "learning_rate": 7.76450749477674e-06, "loss": 0.5128, "step": 5274 }, { "epoch": 0.33, "grad_norm": 0.9551236629486084, "learning_rate": 7.763652530811692e-06, "loss": 0.5355, "step": 5275 }, { "epoch": 0.33, "grad_norm": 0.9799797534942627, "learning_rate": 7.762797450478853e-06, "loss": 0.5377, "step": 5276 }, { "epoch": 0.33, "grad_norm": 0.9672342538833618, "learning_rate": 7.761942253814225e-06, "loss": 0.5489, "step": 5277 }, { "epoch": 0.33, "grad_norm": 0.9345821738243103, "learning_rate": 7.761086940853814e-06, "loss": 0.4499, "step": 5278 }, { "epoch": 0.33, "grad_norm": 1.0644389390945435, "learning_rate": 7.76023151163364e-06, "loss": 0.5205, "step": 5279 }, { "epoch": 0.33, "grad_norm": 1.0195364952087402, "learning_rate": 7.759375966189718e-06, "loss": 0.5371, "step": 5280 }, { "epoch": 0.33, "grad_norm": 1.0258504152297974, "learning_rate": 7.758520304558072e-06, "loss": 0.5222, "step": 5281 }, { "epoch": 0.33, "grad_norm": 1.0528091192245483, "learning_rate": 7.757664526774733e-06, "loss": 0.5424, "step": 5282 }, { "epoch": 0.33, "grad_norm": 1.0659440755844116, "learning_rate": 7.756808632875737e-06, "loss": 0.5677, "step": 5283 }, { "epoch": 0.33, "grad_norm": 1.067395806312561, "learning_rate": 7.755952622897117e-06, "loss": 0.5145, "step": 5284 }, { "epoch": 0.33, "grad_norm": 1.0888980627059937, "learning_rate": 7.755096496874918e-06, "loss": 0.537, "step": 5285 }, { "epoch": 0.33, "grad_norm": 0.9735004305839539, "learning_rate": 7.75424025484519e-06, "loss": 0.5465, "step": 5286 }, { "epoch": 0.33, "grad_norm": 1.0772993564605713, "learning_rate": 7.753383896843988e-06, "loss": 0.5832, "step": 5287 }, { "epoch": 0.34, "grad_norm": 0.9806780219078064, "learning_rate": 7.752527422907368e-06, "loss": 0.5101, "step": 5288 }, { "epoch": 0.34, "grad_norm": 0.9502233862876892, "learning_rate": 7.751670833071393e-06, "loss": 0.5332, "step": 5289 }, { "epoch": 0.34, "grad_norm": 1.0269991159439087, "learning_rate": 7.750814127372131e-06, "loss": 0.4955, "step": 5290 }, { "epoch": 0.34, "grad_norm": 0.9721434712409973, "learning_rate": 7.749957305845656e-06, "loss": 0.5073, "step": 5291 }, { "epoch": 0.34, "grad_norm": 1.0321543216705322, "learning_rate": 7.749100368528047e-06, "loss": 0.5809, "step": 5292 }, { "epoch": 0.34, "grad_norm": 1.013351321220398, "learning_rate": 7.748243315455382e-06, "loss": 0.552, "step": 5293 }, { "epoch": 0.34, "grad_norm": 0.9572513103485107, "learning_rate": 7.747386146663753e-06, "loss": 0.5345, "step": 5294 }, { "epoch": 0.34, "grad_norm": 0.9991185665130615, "learning_rate": 7.746528862189251e-06, "loss": 0.5737, "step": 5295 }, { "epoch": 0.34, "grad_norm": 1.0627458095550537, "learning_rate": 7.745671462067974e-06, "loss": 0.5519, "step": 5296 }, { "epoch": 0.34, "grad_norm": 1.0270791053771973, "learning_rate": 7.74481394633602e-06, "loss": 0.5641, "step": 5297 }, { "epoch": 0.34, "grad_norm": 1.138097882270813, "learning_rate": 7.743956315029502e-06, "loss": 0.5608, "step": 5298 }, { "epoch": 0.34, "grad_norm": 1.0272469520568848, "learning_rate": 7.743098568184529e-06, "loss": 0.5283, "step": 5299 }, { "epoch": 0.34, "grad_norm": 1.0091753005981445, "learning_rate": 7.742240705837217e-06, "loss": 0.5103, "step": 5300 }, { "epoch": 0.34, "grad_norm": 1.0320264101028442, "learning_rate": 7.741382728023687e-06, "loss": 0.5482, "step": 5301 }, { "epoch": 0.34, "grad_norm": 0.9640737175941467, "learning_rate": 7.74052463478007e-06, "loss": 0.5287, "step": 5302 }, { "epoch": 0.34, "grad_norm": 0.966127872467041, "learning_rate": 7.739666426142493e-06, "loss": 0.5348, "step": 5303 }, { "epoch": 0.34, "grad_norm": 1.0670241117477417, "learning_rate": 7.738808102147093e-06, "loss": 0.5128, "step": 5304 }, { "epoch": 0.34, "grad_norm": 1.044314980506897, "learning_rate": 7.737949662830012e-06, "loss": 0.5914, "step": 5305 }, { "epoch": 0.34, "grad_norm": 1.0198527574539185, "learning_rate": 7.737091108227395e-06, "loss": 0.5561, "step": 5306 }, { "epoch": 0.34, "grad_norm": 1.060970664024353, "learning_rate": 7.736232438375391e-06, "loss": 0.5686, "step": 5307 }, { "epoch": 0.34, "grad_norm": 0.951311469078064, "learning_rate": 7.735373653310161e-06, "loss": 0.5144, "step": 5308 }, { "epoch": 0.34, "grad_norm": 1.0283550024032593, "learning_rate": 7.73451475306786e-06, "loss": 0.5794, "step": 5309 }, { "epoch": 0.34, "grad_norm": 0.9955611824989319, "learning_rate": 7.733655737684657e-06, "loss": 0.5263, "step": 5310 }, { "epoch": 0.34, "grad_norm": 1.0055062770843506, "learning_rate": 7.732796607196719e-06, "loss": 0.5571, "step": 5311 }, { "epoch": 0.34, "grad_norm": 1.0637763738632202, "learning_rate": 7.731937361640223e-06, "loss": 0.5657, "step": 5312 }, { "epoch": 0.34, "grad_norm": 0.9737985730171204, "learning_rate": 7.73107800105135e-06, "loss": 0.543, "step": 5313 }, { "epoch": 0.34, "grad_norm": 1.0452426671981812, "learning_rate": 7.730218525466283e-06, "loss": 0.5642, "step": 5314 }, { "epoch": 0.34, "grad_norm": 0.9778900742530823, "learning_rate": 7.729358934921209e-06, "loss": 0.5404, "step": 5315 }, { "epoch": 0.34, "grad_norm": 1.0338850021362305, "learning_rate": 7.728499229452326e-06, "loss": 0.514, "step": 5316 }, { "epoch": 0.34, "grad_norm": 1.054598331451416, "learning_rate": 7.727639409095833e-06, "loss": 0.5899, "step": 5317 }, { "epoch": 0.34, "grad_norm": 1.118851661682129, "learning_rate": 7.726779473887933e-06, "loss": 0.5827, "step": 5318 }, { "epoch": 0.34, "grad_norm": 1.0014888048171997, "learning_rate": 7.725919423864837e-06, "loss": 0.5633, "step": 5319 }, { "epoch": 0.34, "grad_norm": 1.0067678689956665, "learning_rate": 7.725059259062753e-06, "loss": 0.5687, "step": 5320 }, { "epoch": 0.34, "grad_norm": 1.0196219682693481, "learning_rate": 7.724198979517905e-06, "loss": 0.5275, "step": 5321 }, { "epoch": 0.34, "grad_norm": 1.0631487369537354, "learning_rate": 7.723338585266515e-06, "loss": 0.5602, "step": 5322 }, { "epoch": 0.34, "grad_norm": 1.1154228448867798, "learning_rate": 7.722478076344812e-06, "loss": 0.5358, "step": 5323 }, { "epoch": 0.34, "grad_norm": 1.059723138809204, "learning_rate": 7.721617452789028e-06, "loss": 0.5893, "step": 5324 }, { "epoch": 0.34, "grad_norm": 1.0555039644241333, "learning_rate": 7.7207567146354e-06, "loss": 0.5798, "step": 5325 }, { "epoch": 0.34, "grad_norm": 1.053282618522644, "learning_rate": 7.71989586192017e-06, "loss": 0.5244, "step": 5326 }, { "epoch": 0.34, "grad_norm": 1.051269769668579, "learning_rate": 7.719034894679589e-06, "loss": 0.5534, "step": 5327 }, { "epoch": 0.34, "grad_norm": 1.103200078010559, "learning_rate": 7.718173812949908e-06, "loss": 0.53, "step": 5328 }, { "epoch": 0.34, "grad_norm": 1.0847004652023315, "learning_rate": 7.717312616767382e-06, "loss": 0.5591, "step": 5329 }, { "epoch": 0.34, "grad_norm": 0.9675300717353821, "learning_rate": 7.716451306168276e-06, "loss": 0.5453, "step": 5330 }, { "epoch": 0.34, "grad_norm": 0.9376102685928345, "learning_rate": 7.715589881188852e-06, "loss": 0.5355, "step": 5331 }, { "epoch": 0.34, "grad_norm": 1.0276098251342773, "learning_rate": 7.71472834186539e-06, "loss": 0.5211, "step": 5332 }, { "epoch": 0.34, "grad_norm": 1.0249872207641602, "learning_rate": 7.713866688234157e-06, "loss": 0.5386, "step": 5333 }, { "epoch": 0.34, "grad_norm": 0.946427583694458, "learning_rate": 7.713004920331441e-06, "loss": 0.5545, "step": 5334 }, { "epoch": 0.34, "grad_norm": 0.9513344168663025, "learning_rate": 7.712143038193525e-06, "loss": 0.508, "step": 5335 }, { "epoch": 0.34, "grad_norm": 1.002827763557434, "learning_rate": 7.7112810418567e-06, "loss": 0.528, "step": 5336 }, { "epoch": 0.34, "grad_norm": 1.0068122148513794, "learning_rate": 7.710418931357263e-06, "loss": 0.553, "step": 5337 }, { "epoch": 0.34, "grad_norm": 1.025156855583191, "learning_rate": 7.709556706731514e-06, "loss": 0.5589, "step": 5338 }, { "epoch": 0.34, "grad_norm": 1.054255723953247, "learning_rate": 7.708694368015758e-06, "loss": 0.5058, "step": 5339 }, { "epoch": 0.34, "grad_norm": 0.8967742919921875, "learning_rate": 7.707831915246304e-06, "loss": 0.5334, "step": 5340 }, { "epoch": 0.34, "grad_norm": 1.0459626913070679, "learning_rate": 7.706969348459469e-06, "loss": 0.5869, "step": 5341 }, { "epoch": 0.34, "grad_norm": 1.0625965595245361, "learning_rate": 7.70610666769157e-06, "loss": 0.6205, "step": 5342 }, { "epoch": 0.34, "grad_norm": 0.9735672473907471, "learning_rate": 7.705243872978935e-06, "loss": 0.5298, "step": 5343 }, { "epoch": 0.34, "grad_norm": 0.9772914052009583, "learning_rate": 7.704380964357889e-06, "loss": 0.5148, "step": 5344 }, { "epoch": 0.34, "grad_norm": 1.0570646524429321, "learning_rate": 7.70351794186477e-06, "loss": 0.5635, "step": 5345 }, { "epoch": 0.34, "grad_norm": 1.028130054473877, "learning_rate": 7.702654805535915e-06, "loss": 0.5314, "step": 5346 }, { "epoch": 0.34, "grad_norm": 0.987157940864563, "learning_rate": 7.701791555407669e-06, "loss": 0.5216, "step": 5347 }, { "epoch": 0.34, "grad_norm": 1.0099393129348755, "learning_rate": 7.700928191516378e-06, "loss": 0.5943, "step": 5348 }, { "epoch": 0.34, "grad_norm": 0.9394066333770752, "learning_rate": 7.700064713898398e-06, "loss": 0.5601, "step": 5349 }, { "epoch": 0.34, "grad_norm": 1.0110291242599487, "learning_rate": 7.699201122590086e-06, "loss": 0.5385, "step": 5350 }, { "epoch": 0.34, "grad_norm": 1.0360554456710815, "learning_rate": 7.6983374176278e-06, "loss": 0.5548, "step": 5351 }, { "epoch": 0.34, "grad_norm": 0.9967292547225952, "learning_rate": 7.697473599047918e-06, "loss": 0.5413, "step": 5352 }, { "epoch": 0.34, "grad_norm": 0.9504690766334534, "learning_rate": 7.696609666886805e-06, "loss": 0.5463, "step": 5353 }, { "epoch": 0.34, "grad_norm": 0.9553558826446533, "learning_rate": 7.695745621180839e-06, "loss": 0.4722, "step": 5354 }, { "epoch": 0.34, "grad_norm": 0.9541303515434265, "learning_rate": 7.694881461966402e-06, "loss": 0.5693, "step": 5355 }, { "epoch": 0.34, "grad_norm": 1.013753056526184, "learning_rate": 7.694017189279882e-06, "loss": 0.5445, "step": 5356 }, { "epoch": 0.34, "grad_norm": 0.9317124485969543, "learning_rate": 7.69315280315767e-06, "loss": 0.4885, "step": 5357 }, { "epoch": 0.34, "grad_norm": 1.0442945957183838, "learning_rate": 7.692288303636163e-06, "loss": 0.5415, "step": 5358 }, { "epoch": 0.34, "grad_norm": 0.9768821597099304, "learning_rate": 7.69142369075176e-06, "loss": 0.5353, "step": 5359 }, { "epoch": 0.34, "grad_norm": 0.9622286558151245, "learning_rate": 7.690558964540872e-06, "loss": 0.5459, "step": 5360 }, { "epoch": 0.34, "grad_norm": 1.0875238180160522, "learning_rate": 7.6896941250399e-06, "loss": 0.533, "step": 5361 }, { "epoch": 0.34, "grad_norm": 1.0347795486450195, "learning_rate": 7.688829172285267e-06, "loss": 0.5402, "step": 5362 }, { "epoch": 0.34, "grad_norm": 0.9879282712936401, "learning_rate": 7.687964106313392e-06, "loss": 0.5114, "step": 5363 }, { "epoch": 0.34, "grad_norm": 1.0509141683578491, "learning_rate": 7.687098927160701e-06, "loss": 0.5641, "step": 5364 }, { "epoch": 0.34, "grad_norm": 1.0029528141021729, "learning_rate": 7.68623363486362e-06, "loss": 0.569, "step": 5365 }, { "epoch": 0.34, "grad_norm": 1.0377953052520752, "learning_rate": 7.685368229458584e-06, "loss": 0.5288, "step": 5366 }, { "epoch": 0.34, "grad_norm": 1.0632033348083496, "learning_rate": 7.684502710982035e-06, "loss": 0.5371, "step": 5367 }, { "epoch": 0.34, "grad_norm": 0.9152517914772034, "learning_rate": 7.683637079470418e-06, "loss": 0.5469, "step": 5368 }, { "epoch": 0.34, "grad_norm": 0.9898026585578918, "learning_rate": 7.682771334960178e-06, "loss": 0.5199, "step": 5369 }, { "epoch": 0.34, "grad_norm": 0.9831115007400513, "learning_rate": 7.681905477487769e-06, "loss": 0.5911, "step": 5370 }, { "epoch": 0.34, "grad_norm": 0.8863547444343567, "learning_rate": 7.68103950708965e-06, "loss": 0.4814, "step": 5371 }, { "epoch": 0.34, "grad_norm": 0.9888505935668945, "learning_rate": 7.680173423802282e-06, "loss": 0.5537, "step": 5372 }, { "epoch": 0.34, "grad_norm": 1.0175485610961914, "learning_rate": 7.679307227662136e-06, "loss": 0.5367, "step": 5373 }, { "epoch": 0.34, "grad_norm": 1.0653303861618042, "learning_rate": 7.678440918705686e-06, "loss": 0.5694, "step": 5374 }, { "epoch": 0.34, "grad_norm": 1.096785545349121, "learning_rate": 7.677574496969404e-06, "loss": 0.5849, "step": 5375 }, { "epoch": 0.34, "grad_norm": 1.0689427852630615, "learning_rate": 7.676707962489775e-06, "loss": 0.5733, "step": 5376 }, { "epoch": 0.34, "grad_norm": 1.0037180185317993, "learning_rate": 7.675841315303284e-06, "loss": 0.6059, "step": 5377 }, { "epoch": 0.34, "grad_norm": 0.9984425902366638, "learning_rate": 7.674974555446425e-06, "loss": 0.5509, "step": 5378 }, { "epoch": 0.34, "grad_norm": 0.9228800535202026, "learning_rate": 7.674107682955693e-06, "loss": 0.5234, "step": 5379 }, { "epoch": 0.34, "grad_norm": 1.0302813053131104, "learning_rate": 7.67324069786759e-06, "loss": 0.5437, "step": 5380 }, { "epoch": 0.34, "grad_norm": 1.0414074659347534, "learning_rate": 7.67237360021862e-06, "loss": 0.5869, "step": 5381 }, { "epoch": 0.34, "grad_norm": 1.0754034519195557, "learning_rate": 7.671506390045293e-06, "loss": 0.5862, "step": 5382 }, { "epoch": 0.34, "grad_norm": 1.090884804725647, "learning_rate": 7.670639067384126e-06, "loss": 0.5666, "step": 5383 }, { "epoch": 0.34, "grad_norm": 0.976880669593811, "learning_rate": 7.66977163227164e-06, "loss": 0.5274, "step": 5384 }, { "epoch": 0.34, "grad_norm": 0.9574406147003174, "learning_rate": 7.668904084744357e-06, "loss": 0.5016, "step": 5385 }, { "epoch": 0.34, "grad_norm": 0.9235224723815918, "learning_rate": 7.668036424838808e-06, "loss": 0.5219, "step": 5386 }, { "epoch": 0.34, "grad_norm": 0.9787632822990417, "learning_rate": 7.667168652591524e-06, "loss": 0.5527, "step": 5387 }, { "epoch": 0.34, "grad_norm": 1.0407617092132568, "learning_rate": 7.66630076803905e-06, "loss": 0.5838, "step": 5388 }, { "epoch": 0.34, "grad_norm": 0.9891582727432251, "learning_rate": 7.665432771217922e-06, "loss": 0.5295, "step": 5389 }, { "epoch": 0.34, "grad_norm": 1.0179615020751953, "learning_rate": 7.664564662164696e-06, "loss": 0.5429, "step": 5390 }, { "epoch": 0.34, "grad_norm": 1.1185835599899292, "learning_rate": 7.66369644091592e-06, "loss": 0.5839, "step": 5391 }, { "epoch": 0.34, "grad_norm": 0.9839662313461304, "learning_rate": 7.662828107508153e-06, "loss": 0.537, "step": 5392 }, { "epoch": 0.34, "grad_norm": 0.948878824710846, "learning_rate": 7.661959661977958e-06, "loss": 0.5315, "step": 5393 }, { "epoch": 0.34, "grad_norm": 1.1196187734603882, "learning_rate": 7.661091104361902e-06, "loss": 0.566, "step": 5394 }, { "epoch": 0.34, "grad_norm": 0.9894713163375854, "learning_rate": 7.660222434696556e-06, "loss": 0.5725, "step": 5395 }, { "epoch": 0.34, "grad_norm": 0.982667088508606, "learning_rate": 7.6593536530185e-06, "loss": 0.5424, "step": 5396 }, { "epoch": 0.34, "grad_norm": 1.060299038887024, "learning_rate": 7.658484759364308e-06, "loss": 0.5497, "step": 5397 }, { "epoch": 0.34, "grad_norm": 1.089518666267395, "learning_rate": 7.657615753770575e-06, "loss": 0.573, "step": 5398 }, { "epoch": 0.34, "grad_norm": 0.8669412136077881, "learning_rate": 7.656746636273889e-06, "loss": 0.4693, "step": 5399 }, { "epoch": 0.34, "grad_norm": 0.986979603767395, "learning_rate": 7.655877406910841e-06, "loss": 0.5226, "step": 5400 }, { "epoch": 0.34, "grad_norm": 1.1012288331985474, "learning_rate": 7.655008065718036e-06, "loss": 0.5856, "step": 5401 }, { "epoch": 0.34, "grad_norm": 0.949129045009613, "learning_rate": 7.654138612732078e-06, "loss": 0.5477, "step": 5402 }, { "epoch": 0.34, "grad_norm": 0.9540390372276306, "learning_rate": 7.653269047989575e-06, "loss": 0.529, "step": 5403 }, { "epoch": 0.34, "grad_norm": 0.9496141076087952, "learning_rate": 7.652399371527142e-06, "loss": 0.5673, "step": 5404 }, { "epoch": 0.34, "grad_norm": 1.135327696800232, "learning_rate": 7.651529583381398e-06, "loss": 0.5832, "step": 5405 }, { "epoch": 0.34, "grad_norm": 1.0860304832458496, "learning_rate": 7.65065968358897e-06, "loss": 0.6126, "step": 5406 }, { "epoch": 0.34, "grad_norm": 1.0657199621200562, "learning_rate": 7.649789672186483e-06, "loss": 0.5898, "step": 5407 }, { "epoch": 0.34, "grad_norm": 1.0875470638275146, "learning_rate": 7.648919549210567e-06, "loss": 0.5724, "step": 5408 }, { "epoch": 0.34, "grad_norm": 0.9945139288902283, "learning_rate": 7.648049314697869e-06, "loss": 0.495, "step": 5409 }, { "epoch": 0.34, "grad_norm": 1.04204523563385, "learning_rate": 7.647178968685024e-06, "loss": 0.5271, "step": 5410 }, { "epoch": 0.34, "grad_norm": 1.11293625831604, "learning_rate": 7.646308511208682e-06, "loss": 0.6148, "step": 5411 }, { "epoch": 0.34, "grad_norm": 0.973831057548523, "learning_rate": 7.645437942305491e-06, "loss": 0.5045, "step": 5412 }, { "epoch": 0.34, "grad_norm": 1.0557979345321655, "learning_rate": 7.644567262012115e-06, "loss": 0.5711, "step": 5413 }, { "epoch": 0.34, "grad_norm": 1.0014163255691528, "learning_rate": 7.643696470365209e-06, "loss": 0.562, "step": 5414 }, { "epoch": 0.34, "grad_norm": 1.0061209201812744, "learning_rate": 7.642825567401444e-06, "loss": 0.5078, "step": 5415 }, { "epoch": 0.34, "grad_norm": 1.074394702911377, "learning_rate": 7.641954553157487e-06, "loss": 0.5228, "step": 5416 }, { "epoch": 0.34, "grad_norm": 0.9882181286811829, "learning_rate": 7.641083427670014e-06, "loss": 0.5472, "step": 5417 }, { "epoch": 0.34, "grad_norm": 1.0100023746490479, "learning_rate": 7.640212190975707e-06, "loss": 0.5354, "step": 5418 }, { "epoch": 0.34, "grad_norm": 1.0182669162750244, "learning_rate": 7.639340843111247e-06, "loss": 0.5168, "step": 5419 }, { "epoch": 0.34, "grad_norm": 0.8914739489555359, "learning_rate": 7.638469384113328e-06, "loss": 0.4661, "step": 5420 }, { "epoch": 0.34, "grad_norm": 0.998095691204071, "learning_rate": 7.637597814018638e-06, "loss": 0.5238, "step": 5421 }, { "epoch": 0.34, "grad_norm": 1.0295097827911377, "learning_rate": 7.636726132863883e-06, "loss": 0.5437, "step": 5422 }, { "epoch": 0.34, "grad_norm": 1.030522346496582, "learning_rate": 7.635854340685762e-06, "loss": 0.541, "step": 5423 }, { "epoch": 0.34, "grad_norm": 1.0026613473892212, "learning_rate": 7.634982437520984e-06, "loss": 0.5135, "step": 5424 }, { "epoch": 0.34, "grad_norm": 0.9932113289833069, "learning_rate": 7.634110423406262e-06, "loss": 0.5158, "step": 5425 }, { "epoch": 0.34, "grad_norm": 1.0206046104431152, "learning_rate": 7.633238298378315e-06, "loss": 0.5665, "step": 5426 }, { "epoch": 0.34, "grad_norm": 1.0064122676849365, "learning_rate": 7.632366062473862e-06, "loss": 0.5221, "step": 5427 }, { "epoch": 0.34, "grad_norm": 1.0268290042877197, "learning_rate": 7.631493715729632e-06, "loss": 0.5089, "step": 5428 }, { "epoch": 0.34, "grad_norm": 1.0285868644714355, "learning_rate": 7.630621258182354e-06, "loss": 0.5365, "step": 5429 }, { "epoch": 0.34, "grad_norm": 0.9036509990692139, "learning_rate": 7.62974868986877e-06, "loss": 0.5043, "step": 5430 }, { "epoch": 0.34, "grad_norm": 1.057509183883667, "learning_rate": 7.628876010825614e-06, "loss": 0.5889, "step": 5431 }, { "epoch": 0.34, "grad_norm": 0.924913227558136, "learning_rate": 7.628003221089635e-06, "loss": 0.4883, "step": 5432 }, { "epoch": 0.34, "grad_norm": 1.1624161005020142, "learning_rate": 7.6271303206975825e-06, "loss": 0.5788, "step": 5433 }, { "epoch": 0.34, "grad_norm": 1.0000855922698975, "learning_rate": 7.626257309686211e-06, "loss": 0.531, "step": 5434 }, { "epoch": 0.34, "grad_norm": 0.98502117395401, "learning_rate": 7.6253841880922805e-06, "loss": 0.533, "step": 5435 }, { "epoch": 0.34, "grad_norm": 1.1036839485168457, "learning_rate": 7.624510955952555e-06, "loss": 0.515, "step": 5436 }, { "epoch": 0.34, "grad_norm": 1.0275412797927856, "learning_rate": 7.623637613303805e-06, "loss": 0.5339, "step": 5437 }, { "epoch": 0.34, "grad_norm": 0.9513323307037354, "learning_rate": 7.6227641601827996e-06, "loss": 0.4962, "step": 5438 }, { "epoch": 0.34, "grad_norm": 0.9934118986129761, "learning_rate": 7.62189059662632e-06, "loss": 0.5453, "step": 5439 }, { "epoch": 0.34, "grad_norm": 1.0119845867156982, "learning_rate": 7.621016922671147e-06, "loss": 0.5397, "step": 5440 }, { "epoch": 0.34, "grad_norm": 0.9635437726974487, "learning_rate": 7.620143138354072e-06, "loss": 0.5361, "step": 5441 }, { "epoch": 0.34, "grad_norm": 0.966569185256958, "learning_rate": 7.6192692437118825e-06, "loss": 0.5614, "step": 5442 }, { "epoch": 0.34, "grad_norm": 1.117817759513855, "learning_rate": 7.618395238781377e-06, "loss": 0.6088, "step": 5443 }, { "epoch": 0.34, "grad_norm": 1.0499943494796753, "learning_rate": 7.617521123599356e-06, "loss": 0.5452, "step": 5444 }, { "epoch": 0.34, "grad_norm": 1.0381006002426147, "learning_rate": 7.616646898202629e-06, "loss": 0.565, "step": 5445 }, { "epoch": 0.35, "grad_norm": 1.005886435508728, "learning_rate": 7.6157725626280014e-06, "loss": 0.4925, "step": 5446 }, { "epoch": 0.35, "grad_norm": 0.9612359404563904, "learning_rate": 7.61489811691229e-06, "loss": 0.5114, "step": 5447 }, { "epoch": 0.35, "grad_norm": 1.0364564657211304, "learning_rate": 7.614023561092319e-06, "loss": 0.5505, "step": 5448 }, { "epoch": 0.35, "grad_norm": 1.0441381931304932, "learning_rate": 7.613148895204906e-06, "loss": 0.5538, "step": 5449 }, { "epoch": 0.35, "grad_norm": 1.001044750213623, "learning_rate": 7.612274119286884e-06, "loss": 0.5626, "step": 5450 }, { "epoch": 0.35, "grad_norm": 1.1037360429763794, "learning_rate": 7.611399233375087e-06, "loss": 0.562, "step": 5451 }, { "epoch": 0.35, "grad_norm": 1.037367820739746, "learning_rate": 7.610524237506354e-06, "loss": 0.5773, "step": 5452 }, { "epoch": 0.35, "grad_norm": 1.0317175388336182, "learning_rate": 7.6096491317175246e-06, "loss": 0.5643, "step": 5453 }, { "epoch": 0.35, "grad_norm": 0.9355440139770508, "learning_rate": 7.608773916045449e-06, "loss": 0.5216, "step": 5454 }, { "epoch": 0.35, "grad_norm": 0.9212518334388733, "learning_rate": 7.607898590526979e-06, "loss": 0.529, "step": 5455 }, { "epoch": 0.35, "grad_norm": 0.9198362231254578, "learning_rate": 7.607023155198973e-06, "loss": 0.494, "step": 5456 }, { "epoch": 0.35, "grad_norm": 1.0639890432357788, "learning_rate": 7.606147610098289e-06, "loss": 0.5544, "step": 5457 }, { "epoch": 0.35, "grad_norm": 0.9888911247253418, "learning_rate": 7.605271955261796e-06, "loss": 0.5496, "step": 5458 }, { "epoch": 0.35, "grad_norm": 1.0444262027740479, "learning_rate": 7.604396190726364e-06, "loss": 0.5366, "step": 5459 }, { "epoch": 0.35, "grad_norm": 1.2127799987792969, "learning_rate": 7.603520316528869e-06, "loss": 0.575, "step": 5460 }, { "epoch": 0.35, "grad_norm": 1.0242631435394287, "learning_rate": 7.60264433270619e-06, "loss": 0.545, "step": 5461 }, { "epoch": 0.35, "grad_norm": 0.918563961982727, "learning_rate": 7.601768239295213e-06, "loss": 0.4738, "step": 5462 }, { "epoch": 0.35, "grad_norm": 1.0891157388687134, "learning_rate": 7.600892036332825e-06, "loss": 0.5843, "step": 5463 }, { "epoch": 0.35, "grad_norm": 1.0414520502090454, "learning_rate": 7.600015723855922e-06, "loss": 0.5781, "step": 5464 }, { "epoch": 0.35, "grad_norm": 1.1219522953033447, "learning_rate": 7.599139301901401e-06, "loss": 0.5522, "step": 5465 }, { "epoch": 0.35, "grad_norm": 1.0756316184997559, "learning_rate": 7.5982627705061666e-06, "loss": 0.5975, "step": 5466 }, { "epoch": 0.35, "grad_norm": 0.9472103118896484, "learning_rate": 7.597386129707126e-06, "loss": 0.5374, "step": 5467 }, { "epoch": 0.35, "grad_norm": 1.0254567861557007, "learning_rate": 7.596509379541191e-06, "loss": 0.5118, "step": 5468 }, { "epoch": 0.35, "grad_norm": 0.9630821943283081, "learning_rate": 7.595632520045277e-06, "loss": 0.5424, "step": 5469 }, { "epoch": 0.35, "grad_norm": 1.0659990310668945, "learning_rate": 7.594755551256308e-06, "loss": 0.5271, "step": 5470 }, { "epoch": 0.35, "grad_norm": 1.037732720375061, "learning_rate": 7.593878473211209e-06, "loss": 0.5695, "step": 5471 }, { "epoch": 0.35, "grad_norm": 0.9575313329696655, "learning_rate": 7.593001285946913e-06, "loss": 0.4812, "step": 5472 }, { "epoch": 0.35, "grad_norm": 0.9590383172035217, "learning_rate": 7.592123989500351e-06, "loss": 0.5403, "step": 5473 }, { "epoch": 0.35, "grad_norm": 1.141347885131836, "learning_rate": 7.591246583908465e-06, "loss": 0.5727, "step": 5474 }, { "epoch": 0.35, "grad_norm": 1.0539191961288452, "learning_rate": 7.590369069208201e-06, "loss": 0.5665, "step": 5475 }, { "epoch": 0.35, "grad_norm": 0.944985032081604, "learning_rate": 7.589491445436505e-06, "loss": 0.551, "step": 5476 }, { "epoch": 0.35, "grad_norm": 0.9608230590820312, "learning_rate": 7.588613712630334e-06, "loss": 0.5212, "step": 5477 }, { "epoch": 0.35, "grad_norm": 1.004446029663086, "learning_rate": 7.587735870826643e-06, "loss": 0.5286, "step": 5478 }, { "epoch": 0.35, "grad_norm": 0.9937500953674316, "learning_rate": 7.586857920062399e-06, "loss": 0.5697, "step": 5479 }, { "epoch": 0.35, "grad_norm": 1.0046453475952148, "learning_rate": 7.585979860374566e-06, "loss": 0.5417, "step": 5480 }, { "epoch": 0.35, "grad_norm": 0.925399899482727, "learning_rate": 7.5851016918001165e-06, "loss": 0.5237, "step": 5481 }, { "epoch": 0.35, "grad_norm": 0.9018027782440186, "learning_rate": 7.584223414376028e-06, "loss": 0.5335, "step": 5482 }, { "epoch": 0.35, "grad_norm": 1.0428508520126343, "learning_rate": 7.583345028139282e-06, "loss": 0.5563, "step": 5483 }, { "epoch": 0.35, "grad_norm": 1.0010719299316406, "learning_rate": 7.582466533126863e-06, "loss": 0.5427, "step": 5484 }, { "epoch": 0.35, "grad_norm": 1.0468254089355469, "learning_rate": 7.581587929375761e-06, "loss": 0.5808, "step": 5485 }, { "epoch": 0.35, "grad_norm": 0.953605055809021, "learning_rate": 7.580709216922973e-06, "loss": 0.54, "step": 5486 }, { "epoch": 0.35, "grad_norm": 1.0275905132293701, "learning_rate": 7.579830395805499e-06, "loss": 0.5562, "step": 5487 }, { "epoch": 0.35, "grad_norm": 1.1463297605514526, "learning_rate": 7.578951466060341e-06, "loss": 0.5444, "step": 5488 }, { "epoch": 0.35, "grad_norm": 0.9410524368286133, "learning_rate": 7.578072427724506e-06, "loss": 0.5091, "step": 5489 }, { "epoch": 0.35, "grad_norm": 1.0085787773132324, "learning_rate": 7.577193280835011e-06, "loss": 0.5477, "step": 5490 }, { "epoch": 0.35, "grad_norm": 0.9225336313247681, "learning_rate": 7.5763140254288716e-06, "loss": 0.5188, "step": 5491 }, { "epoch": 0.35, "grad_norm": 1.0280065536499023, "learning_rate": 7.575434661543113e-06, "loss": 0.5693, "step": 5492 }, { "epoch": 0.35, "grad_norm": 1.0376582145690918, "learning_rate": 7.574555189214756e-06, "loss": 0.5671, "step": 5493 }, { "epoch": 0.35, "grad_norm": 1.0159308910369873, "learning_rate": 7.573675608480841e-06, "loss": 0.5355, "step": 5494 }, { "epoch": 0.35, "grad_norm": 1.0317509174346924, "learning_rate": 7.5727959193783974e-06, "loss": 0.5673, "step": 5495 }, { "epoch": 0.35, "grad_norm": 1.031361699104309, "learning_rate": 7.571916121944467e-06, "loss": 0.4955, "step": 5496 }, { "epoch": 0.35, "grad_norm": 0.9690631031990051, "learning_rate": 7.571036216216097e-06, "loss": 0.5287, "step": 5497 }, { "epoch": 0.35, "grad_norm": 1.0832093954086304, "learning_rate": 7.570156202230335e-06, "loss": 0.5871, "step": 5498 }, { "epoch": 0.35, "grad_norm": 1.0991196632385254, "learning_rate": 7.569276080024237e-06, "loss": 0.5868, "step": 5499 }, { "epoch": 0.35, "grad_norm": 1.053709626197815, "learning_rate": 7.5683958496348596e-06, "loss": 0.5653, "step": 5500 }, { "epoch": 0.35, "grad_norm": 0.9709888100624084, "learning_rate": 7.567515511099268e-06, "loss": 0.5257, "step": 5501 }, { "epoch": 0.35, "grad_norm": 0.9813860654830933, "learning_rate": 7.56663506445453e-06, "loss": 0.5658, "step": 5502 }, { "epoch": 0.35, "grad_norm": 1.0247528553009033, "learning_rate": 7.5657545097377205e-06, "loss": 0.5182, "step": 5503 }, { "epoch": 0.35, "grad_norm": 0.8857021331787109, "learning_rate": 7.564873846985912e-06, "loss": 0.5192, "step": 5504 }, { "epoch": 0.35, "grad_norm": 1.0899372100830078, "learning_rate": 7.563993076236189e-06, "loss": 0.5456, "step": 5505 }, { "epoch": 0.35, "grad_norm": 1.18221914768219, "learning_rate": 7.563112197525637e-06, "loss": 0.5913, "step": 5506 }, { "epoch": 0.35, "grad_norm": 1.0839096307754517, "learning_rate": 7.562231210891347e-06, "loss": 0.5285, "step": 5507 }, { "epoch": 0.35, "grad_norm": 1.0166727304458618, "learning_rate": 7.561350116370413e-06, "loss": 0.5327, "step": 5508 }, { "epoch": 0.35, "grad_norm": 1.0474815368652344, "learning_rate": 7.560468913999937e-06, "loss": 0.5666, "step": 5509 }, { "epoch": 0.35, "grad_norm": 1.0153292417526245, "learning_rate": 7.559587603817022e-06, "loss": 0.5532, "step": 5510 }, { "epoch": 0.35, "grad_norm": 1.0052329301834106, "learning_rate": 7.558706185858777e-06, "loss": 0.5549, "step": 5511 }, { "epoch": 0.35, "grad_norm": 0.9917075634002686, "learning_rate": 7.557824660162316e-06, "loss": 0.5391, "step": 5512 }, { "epoch": 0.35, "grad_norm": 1.1074151992797852, "learning_rate": 7.556943026764756e-06, "loss": 0.5031, "step": 5513 }, { "epoch": 0.35, "grad_norm": 1.0473936796188354, "learning_rate": 7.55606128570322e-06, "loss": 0.5169, "step": 5514 }, { "epoch": 0.35, "grad_norm": 1.0295766592025757, "learning_rate": 7.5551794370148366e-06, "loss": 0.5779, "step": 5515 }, { "epoch": 0.35, "grad_norm": 0.9668201804161072, "learning_rate": 7.554297480736734e-06, "loss": 0.5236, "step": 5516 }, { "epoch": 0.35, "grad_norm": 0.9963002800941467, "learning_rate": 7.553415416906051e-06, "loss": 0.5361, "step": 5517 }, { "epoch": 0.35, "grad_norm": 1.1146910190582275, "learning_rate": 7.552533245559927e-06, "loss": 0.5512, "step": 5518 }, { "epoch": 0.35, "grad_norm": 1.0025383234024048, "learning_rate": 7.551650966735509e-06, "loss": 0.5026, "step": 5519 }, { "epoch": 0.35, "grad_norm": 1.053562879562378, "learning_rate": 7.550768580469945e-06, "loss": 0.5612, "step": 5520 }, { "epoch": 0.35, "grad_norm": 1.030248761177063, "learning_rate": 7.549886086800389e-06, "loss": 0.5254, "step": 5521 }, { "epoch": 0.35, "grad_norm": 0.8703546524047852, "learning_rate": 7.549003485763999e-06, "loss": 0.4456, "step": 5522 }, { "epoch": 0.35, "grad_norm": 1.0638717412948608, "learning_rate": 7.548120777397941e-06, "loss": 0.5633, "step": 5523 }, { "epoch": 0.35, "grad_norm": 1.140637755393982, "learning_rate": 7.547237961739382e-06, "loss": 0.5914, "step": 5524 }, { "epoch": 0.35, "grad_norm": 1.0732502937316895, "learning_rate": 7.546355038825492e-06, "loss": 0.5189, "step": 5525 }, { "epoch": 0.35, "grad_norm": 0.9874221086502075, "learning_rate": 7.545472008693451e-06, "loss": 0.5542, "step": 5526 }, { "epoch": 0.35, "grad_norm": 1.110288143157959, "learning_rate": 7.544588871380439e-06, "loss": 0.5781, "step": 5527 }, { "epoch": 0.35, "grad_norm": 0.9584753513336182, "learning_rate": 7.54370562692364e-06, "loss": 0.5693, "step": 5528 }, { "epoch": 0.35, "grad_norm": 0.9943515658378601, "learning_rate": 7.542822275360246e-06, "loss": 0.5642, "step": 5529 }, { "epoch": 0.35, "grad_norm": 1.0656532049179077, "learning_rate": 7.541938816727453e-06, "loss": 0.5973, "step": 5530 }, { "epoch": 0.35, "grad_norm": 1.0154662132263184, "learning_rate": 7.5410552510624594e-06, "loss": 0.5365, "step": 5531 }, { "epoch": 0.35, "grad_norm": 1.0014674663543701, "learning_rate": 7.540171578402466e-06, "loss": 0.5718, "step": 5532 }, { "epoch": 0.35, "grad_norm": 1.0146523714065552, "learning_rate": 7.539287798784688e-06, "loss": 0.5304, "step": 5533 }, { "epoch": 0.35, "grad_norm": 1.035952091217041, "learning_rate": 7.538403912246333e-06, "loss": 0.5214, "step": 5534 }, { "epoch": 0.35, "grad_norm": 0.9672898054122925, "learning_rate": 7.537519918824619e-06, "loss": 0.5037, "step": 5535 }, { "epoch": 0.35, "grad_norm": 0.9362606406211853, "learning_rate": 7.5366358185567676e-06, "loss": 0.4639, "step": 5536 }, { "epoch": 0.35, "grad_norm": 0.9829898476600647, "learning_rate": 7.5357516114800075e-06, "loss": 0.5478, "step": 5537 }, { "epoch": 0.35, "grad_norm": 0.9308422207832336, "learning_rate": 7.534867297631569e-06, "loss": 0.5041, "step": 5538 }, { "epoch": 0.35, "grad_norm": 1.0422790050506592, "learning_rate": 7.533982877048685e-06, "loss": 0.5872, "step": 5539 }, { "epoch": 0.35, "grad_norm": 1.0372891426086426, "learning_rate": 7.5330983497685975e-06, "loss": 0.5368, "step": 5540 }, { "epoch": 0.35, "grad_norm": 1.0850752592086792, "learning_rate": 7.532213715828551e-06, "loss": 0.5311, "step": 5541 }, { "epoch": 0.35, "grad_norm": 0.9258276224136353, "learning_rate": 7.531328975265795e-06, "loss": 0.5529, "step": 5542 }, { "epoch": 0.35, "grad_norm": 0.9780939817428589, "learning_rate": 7.53044412811758e-06, "loss": 0.503, "step": 5543 }, { "epoch": 0.35, "grad_norm": 1.0371925830841064, "learning_rate": 7.529559174421167e-06, "loss": 0.5487, "step": 5544 }, { "epoch": 0.35, "grad_norm": 0.9781390428543091, "learning_rate": 7.528674114213816e-06, "loss": 0.5579, "step": 5545 }, { "epoch": 0.35, "grad_norm": 1.094603419303894, "learning_rate": 7.527788947532795e-06, "loss": 0.5784, "step": 5546 }, { "epoch": 0.35, "grad_norm": 1.128503441810608, "learning_rate": 7.526903674415373e-06, "loss": 0.5358, "step": 5547 }, { "epoch": 0.35, "grad_norm": 1.0087530612945557, "learning_rate": 7.526018294898832e-06, "loss": 0.5644, "step": 5548 }, { "epoch": 0.35, "grad_norm": 1.00899076461792, "learning_rate": 7.525132809020443e-06, "loss": 0.5114, "step": 5549 }, { "epoch": 0.35, "grad_norm": 0.9247457981109619, "learning_rate": 7.524247216817499e-06, "loss": 0.5141, "step": 5550 }, { "epoch": 0.35, "grad_norm": 1.0578961372375488, "learning_rate": 7.5233615183272836e-06, "loss": 0.5232, "step": 5551 }, { "epoch": 0.35, "grad_norm": 1.1226731538772583, "learning_rate": 7.522475713587095e-06, "loss": 0.5662, "step": 5552 }, { "epoch": 0.35, "grad_norm": 1.0937976837158203, "learning_rate": 7.521589802634228e-06, "loss": 0.5649, "step": 5553 }, { "epoch": 0.35, "grad_norm": 0.984603226184845, "learning_rate": 7.520703785505987e-06, "loss": 0.5423, "step": 5554 }, { "epoch": 0.35, "grad_norm": 0.9459813237190247, "learning_rate": 7.519817662239678e-06, "loss": 0.5214, "step": 5555 }, { "epoch": 0.35, "grad_norm": 1.017005443572998, "learning_rate": 7.518931432872614e-06, "loss": 0.5657, "step": 5556 }, { "epoch": 0.35, "grad_norm": 1.0255146026611328, "learning_rate": 7.518045097442111e-06, "loss": 0.5667, "step": 5557 }, { "epoch": 0.35, "grad_norm": 1.0680776834487915, "learning_rate": 7.517158655985483e-06, "loss": 0.5562, "step": 5558 }, { "epoch": 0.35, "grad_norm": 1.114147424697876, "learning_rate": 7.516272108540066e-06, "loss": 0.5831, "step": 5559 }, { "epoch": 0.35, "grad_norm": 1.0147895812988281, "learning_rate": 7.515385455143183e-06, "loss": 0.5874, "step": 5560 }, { "epoch": 0.35, "grad_norm": 1.1398597955703735, "learning_rate": 7.514498695832169e-06, "loss": 0.5688, "step": 5561 }, { "epoch": 0.35, "grad_norm": 0.9944059252738953, "learning_rate": 7.51361183064436e-06, "loss": 0.5354, "step": 5562 }, { "epoch": 0.35, "grad_norm": 1.0544904470443726, "learning_rate": 7.512724859617103e-06, "loss": 0.5276, "step": 5563 }, { "epoch": 0.35, "grad_norm": 1.1566762924194336, "learning_rate": 7.511837782787743e-06, "loss": 0.5288, "step": 5564 }, { "epoch": 0.35, "grad_norm": 1.0607764720916748, "learning_rate": 7.510950600193632e-06, "loss": 0.5206, "step": 5565 }, { "epoch": 0.35, "grad_norm": 0.9686064124107361, "learning_rate": 7.510063311872125e-06, "loss": 0.5209, "step": 5566 }, { "epoch": 0.35, "grad_norm": 1.0985875129699707, "learning_rate": 7.509175917860586e-06, "loss": 0.5759, "step": 5567 }, { "epoch": 0.35, "grad_norm": 1.0419986248016357, "learning_rate": 7.508288418196377e-06, "loss": 0.557, "step": 5568 }, { "epoch": 0.35, "grad_norm": 1.0015439987182617, "learning_rate": 7.507400812916868e-06, "loss": 0.5079, "step": 5569 }, { "epoch": 0.35, "grad_norm": 1.097322940826416, "learning_rate": 7.5065131020594316e-06, "loss": 0.5759, "step": 5570 }, { "epoch": 0.35, "grad_norm": 1.0685824155807495, "learning_rate": 7.5056252856614505e-06, "loss": 0.526, "step": 5571 }, { "epoch": 0.35, "grad_norm": 1.0418754816055298, "learning_rate": 7.504737363760306e-06, "loss": 0.6284, "step": 5572 }, { "epoch": 0.35, "grad_norm": 0.9443859457969666, "learning_rate": 7.503849336393382e-06, "loss": 0.5359, "step": 5573 }, { "epoch": 0.35, "grad_norm": 1.0443962812423706, "learning_rate": 7.502961203598074e-06, "loss": 0.5534, "step": 5574 }, { "epoch": 0.35, "grad_norm": 0.9699767231941223, "learning_rate": 7.502072965411776e-06, "loss": 0.5226, "step": 5575 }, { "epoch": 0.35, "grad_norm": 1.1486808061599731, "learning_rate": 7.501184621871891e-06, "loss": 0.5182, "step": 5576 }, { "epoch": 0.35, "grad_norm": 0.9507920145988464, "learning_rate": 7.5002961730158204e-06, "loss": 0.5199, "step": 5577 }, { "epoch": 0.35, "grad_norm": 1.052960991859436, "learning_rate": 7.499407618880979e-06, "loss": 0.5421, "step": 5578 }, { "epoch": 0.35, "grad_norm": 0.9882809519767761, "learning_rate": 7.498518959504775e-06, "loss": 0.5601, "step": 5579 }, { "epoch": 0.35, "grad_norm": 1.05231511592865, "learning_rate": 7.49763019492463e-06, "loss": 0.5815, "step": 5580 }, { "epoch": 0.35, "grad_norm": 1.0310633182525635, "learning_rate": 7.4967413251779655e-06, "loss": 0.5782, "step": 5581 }, { "epoch": 0.35, "grad_norm": 1.0598621368408203, "learning_rate": 7.495852350302209e-06, "loss": 0.5534, "step": 5582 }, { "epoch": 0.35, "grad_norm": 0.952500581741333, "learning_rate": 7.494963270334794e-06, "loss": 0.5795, "step": 5583 }, { "epoch": 0.35, "grad_norm": 1.0291993618011475, "learning_rate": 7.494074085313155e-06, "loss": 0.4821, "step": 5584 }, { "epoch": 0.35, "grad_norm": 1.014594316482544, "learning_rate": 7.493184795274731e-06, "loss": 0.5457, "step": 5585 }, { "epoch": 0.35, "grad_norm": 1.0594139099121094, "learning_rate": 7.49229540025697e-06, "loss": 0.6289, "step": 5586 }, { "epoch": 0.35, "grad_norm": 1.008696436882019, "learning_rate": 7.4914059002973185e-06, "loss": 0.5707, "step": 5587 }, { "epoch": 0.35, "grad_norm": 0.9493990540504456, "learning_rate": 7.490516295433232e-06, "loss": 0.4909, "step": 5588 }, { "epoch": 0.35, "grad_norm": 1.0605522394180298, "learning_rate": 7.489626585702169e-06, "loss": 0.5402, "step": 5589 }, { "epoch": 0.35, "grad_norm": 1.025477647781372, "learning_rate": 7.4887367711415905e-06, "loss": 0.5494, "step": 5590 }, { "epoch": 0.35, "grad_norm": 0.9939894676208496, "learning_rate": 7.487846851788965e-06, "loss": 0.5431, "step": 5591 }, { "epoch": 0.35, "grad_norm": 0.996059238910675, "learning_rate": 7.486956827681761e-06, "loss": 0.5496, "step": 5592 }, { "epoch": 0.35, "grad_norm": 1.0278451442718506, "learning_rate": 7.4860666988574585e-06, "loss": 0.5322, "step": 5593 }, { "epoch": 0.35, "grad_norm": 1.0295774936676025, "learning_rate": 7.485176465353534e-06, "loss": 0.5237, "step": 5594 }, { "epoch": 0.35, "grad_norm": 0.8795758485794067, "learning_rate": 7.484286127207476e-06, "loss": 0.4798, "step": 5595 }, { "epoch": 0.35, "grad_norm": 0.974780797958374, "learning_rate": 7.48339568445677e-06, "loss": 0.5342, "step": 5596 }, { "epoch": 0.35, "grad_norm": 0.9459009170532227, "learning_rate": 7.482505137138911e-06, "loss": 0.5407, "step": 5597 }, { "epoch": 0.35, "grad_norm": 1.0662119388580322, "learning_rate": 7.4816144852913975e-06, "loss": 0.5727, "step": 5598 }, { "epoch": 0.35, "grad_norm": 1.0604021549224854, "learning_rate": 7.480723728951731e-06, "loss": 0.5644, "step": 5599 }, { "epoch": 0.35, "grad_norm": 0.9835381507873535, "learning_rate": 7.479832868157416e-06, "loss": 0.5444, "step": 5600 }, { "epoch": 0.35, "grad_norm": 0.9299740195274353, "learning_rate": 7.4789419029459675e-06, "loss": 0.482, "step": 5601 }, { "epoch": 0.35, "grad_norm": 0.9987683892250061, "learning_rate": 7.478050833354897e-06, "loss": 0.5687, "step": 5602 }, { "epoch": 0.35, "grad_norm": 1.0130796432495117, "learning_rate": 7.47715965942173e-06, "loss": 0.5344, "step": 5603 }, { "epoch": 0.36, "grad_norm": 0.9855684041976929, "learning_rate": 7.476268381183984e-06, "loss": 0.4738, "step": 5604 }, { "epoch": 0.36, "grad_norm": 1.0145164728164673, "learning_rate": 7.475376998679193e-06, "loss": 0.5809, "step": 5605 }, { "epoch": 0.36, "grad_norm": 1.0176178216934204, "learning_rate": 7.474485511944887e-06, "loss": 0.554, "step": 5606 }, { "epoch": 0.36, "grad_norm": 0.9874436259269714, "learning_rate": 7.4735939210186036e-06, "loss": 0.5148, "step": 5607 }, { "epoch": 0.36, "grad_norm": 1.0104726552963257, "learning_rate": 7.472702225937884e-06, "loss": 0.5412, "step": 5608 }, { "epoch": 0.36, "grad_norm": 1.001220464706421, "learning_rate": 7.471810426740278e-06, "loss": 0.538, "step": 5609 }, { "epoch": 0.36, "grad_norm": 0.9531809091567993, "learning_rate": 7.470918523463333e-06, "loss": 0.521, "step": 5610 }, { "epoch": 0.36, "grad_norm": 1.0025501251220703, "learning_rate": 7.470026516144604e-06, "loss": 0.575, "step": 5611 }, { "epoch": 0.36, "grad_norm": 0.9598075151443481, "learning_rate": 7.469134404821652e-06, "loss": 0.5063, "step": 5612 }, { "epoch": 0.36, "grad_norm": 1.0706355571746826, "learning_rate": 7.468242189532039e-06, "loss": 0.5339, "step": 5613 }, { "epoch": 0.36, "grad_norm": 1.079030156135559, "learning_rate": 7.467349870313334e-06, "loss": 0.4836, "step": 5614 }, { "epoch": 0.36, "grad_norm": 1.0258371829986572, "learning_rate": 7.466457447203109e-06, "loss": 0.5309, "step": 5615 }, { "epoch": 0.36, "grad_norm": 0.9211562871932983, "learning_rate": 7.465564920238941e-06, "loss": 0.5266, "step": 5616 }, { "epoch": 0.36, "grad_norm": 0.933488667011261, "learning_rate": 7.464672289458411e-06, "loss": 0.5295, "step": 5617 }, { "epoch": 0.36, "grad_norm": 1.12042236328125, "learning_rate": 7.463779554899107e-06, "loss": 0.5473, "step": 5618 }, { "epoch": 0.36, "grad_norm": 1.0543317794799805, "learning_rate": 7.462886716598614e-06, "loss": 0.5447, "step": 5619 }, { "epoch": 0.36, "grad_norm": 0.9691358804702759, "learning_rate": 7.46199377459453e-06, "loss": 0.5441, "step": 5620 }, { "epoch": 0.36, "grad_norm": 1.1141588687896729, "learning_rate": 7.461100728924455e-06, "loss": 0.501, "step": 5621 }, { "epoch": 0.36, "grad_norm": 0.9861559867858887, "learning_rate": 7.460207579625988e-06, "loss": 0.5273, "step": 5622 }, { "epoch": 0.36, "grad_norm": 0.97755366563797, "learning_rate": 7.459314326736738e-06, "loss": 0.5061, "step": 5623 }, { "epoch": 0.36, "grad_norm": 1.093091368675232, "learning_rate": 7.458420970294317e-06, "loss": 0.5569, "step": 5624 }, { "epoch": 0.36, "grad_norm": 0.9785102605819702, "learning_rate": 7.457527510336342e-06, "loss": 0.4819, "step": 5625 }, { "epoch": 0.36, "grad_norm": 1.0222108364105225, "learning_rate": 7.456633946900432e-06, "loss": 0.5498, "step": 5626 }, { "epoch": 0.36, "grad_norm": 0.9800930023193359, "learning_rate": 7.455740280024212e-06, "loss": 0.5178, "step": 5627 }, { "epoch": 0.36, "grad_norm": 0.966901421546936, "learning_rate": 7.454846509745311e-06, "loss": 0.5308, "step": 5628 }, { "epoch": 0.36, "grad_norm": 1.0559923648834229, "learning_rate": 7.453952636101366e-06, "loss": 0.5995, "step": 5629 }, { "epoch": 0.36, "grad_norm": 1.0091379880905151, "learning_rate": 7.45305865913001e-06, "loss": 0.5898, "step": 5630 }, { "epoch": 0.36, "grad_norm": 1.1073163747787476, "learning_rate": 7.452164578868889e-06, "loss": 0.5816, "step": 5631 }, { "epoch": 0.36, "grad_norm": 0.9831271171569824, "learning_rate": 7.451270395355647e-06, "loss": 0.5804, "step": 5632 }, { "epoch": 0.36, "grad_norm": 0.9418684244155884, "learning_rate": 7.450376108627937e-06, "loss": 0.48, "step": 5633 }, { "epoch": 0.36, "grad_norm": 0.993733286857605, "learning_rate": 7.449481718723411e-06, "loss": 0.5548, "step": 5634 }, { "epoch": 0.36, "grad_norm": 0.9845336675643921, "learning_rate": 7.448587225679733e-06, "loss": 0.5687, "step": 5635 }, { "epoch": 0.36, "grad_norm": 0.9980434775352478, "learning_rate": 7.447692629534565e-06, "loss": 0.5142, "step": 5636 }, { "epoch": 0.36, "grad_norm": 1.0565739870071411, "learning_rate": 7.446797930325574e-06, "loss": 0.5431, "step": 5637 }, { "epoch": 0.36, "grad_norm": 0.9953488707542419, "learning_rate": 7.445903128090435e-06, "loss": 0.5196, "step": 5638 }, { "epoch": 0.36, "grad_norm": 1.039448857307434, "learning_rate": 7.445008222866823e-06, "loss": 0.5051, "step": 5639 }, { "epoch": 0.36, "grad_norm": 0.963239848613739, "learning_rate": 7.444113214692422e-06, "loss": 0.5137, "step": 5640 }, { "epoch": 0.36, "grad_norm": 1.0066325664520264, "learning_rate": 7.443218103604915e-06, "loss": 0.5287, "step": 5641 }, { "epoch": 0.36, "grad_norm": 1.0743483304977417, "learning_rate": 7.442322889641992e-06, "loss": 0.5878, "step": 5642 }, { "epoch": 0.36, "grad_norm": 0.9948568940162659, "learning_rate": 7.441427572841349e-06, "loss": 0.6129, "step": 5643 }, { "epoch": 0.36, "grad_norm": 0.9837034344673157, "learning_rate": 7.440532153240685e-06, "loss": 0.5177, "step": 5644 }, { "epoch": 0.36, "grad_norm": 0.9905748963356018, "learning_rate": 7.4396366308777015e-06, "loss": 0.5653, "step": 5645 }, { "epoch": 0.36, "grad_norm": 1.0937864780426025, "learning_rate": 7.4387410057901056e-06, "loss": 0.5016, "step": 5646 }, { "epoch": 0.36, "grad_norm": 0.9721953272819519, "learning_rate": 7.4378452780156094e-06, "loss": 0.4845, "step": 5647 }, { "epoch": 0.36, "grad_norm": 1.0243889093399048, "learning_rate": 7.436949447591931e-06, "loss": 0.5188, "step": 5648 }, { "epoch": 0.36, "grad_norm": 0.9671458601951599, "learning_rate": 7.4360535145567865e-06, "loss": 0.5298, "step": 5649 }, { "epoch": 0.36, "grad_norm": 0.9595158696174622, "learning_rate": 7.435157478947905e-06, "loss": 0.4837, "step": 5650 }, { "epoch": 0.36, "grad_norm": 0.9490304589271545, "learning_rate": 7.434261340803013e-06, "loss": 0.5266, "step": 5651 }, { "epoch": 0.36, "grad_norm": 1.0103788375854492, "learning_rate": 7.433365100159844e-06, "loss": 0.5178, "step": 5652 }, { "epoch": 0.36, "grad_norm": 1.060921311378479, "learning_rate": 7.432468757056136e-06, "loss": 0.5291, "step": 5653 }, { "epoch": 0.36, "grad_norm": 0.9731550812721252, "learning_rate": 7.431572311529629e-06, "loss": 0.5375, "step": 5654 }, { "epoch": 0.36, "grad_norm": 1.0851389169692993, "learning_rate": 7.4306757636180725e-06, "loss": 0.4945, "step": 5655 }, { "epoch": 0.36, "grad_norm": 1.0156821012496948, "learning_rate": 7.429779113359214e-06, "loss": 0.5008, "step": 5656 }, { "epoch": 0.36, "grad_norm": 0.9673095941543579, "learning_rate": 7.428882360790811e-06, "loss": 0.5113, "step": 5657 }, { "epoch": 0.36, "grad_norm": 1.134093165397644, "learning_rate": 7.427985505950619e-06, "loss": 0.5406, "step": 5658 }, { "epoch": 0.36, "grad_norm": 1.0507636070251465, "learning_rate": 7.427088548876406e-06, "loss": 0.5929, "step": 5659 }, { "epoch": 0.36, "grad_norm": 1.0085718631744385, "learning_rate": 7.426191489605936e-06, "loss": 0.5333, "step": 5660 }, { "epoch": 0.36, "grad_norm": 0.981497049331665, "learning_rate": 7.425294328176984e-06, "loss": 0.5076, "step": 5661 }, { "epoch": 0.36, "grad_norm": 0.9927743673324585, "learning_rate": 7.4243970646273236e-06, "loss": 0.5324, "step": 5662 }, { "epoch": 0.36, "grad_norm": 1.0711027383804321, "learning_rate": 7.423499698994737e-06, "loss": 0.5218, "step": 5663 }, { "epoch": 0.36, "grad_norm": 0.946141242980957, "learning_rate": 7.422602231317009e-06, "loss": 0.5257, "step": 5664 }, { "epoch": 0.36, "grad_norm": 1.0562503337860107, "learning_rate": 7.421704661631929e-06, "loss": 0.5366, "step": 5665 }, { "epoch": 0.36, "grad_norm": 0.9718359708786011, "learning_rate": 7.42080698997729e-06, "loss": 0.5756, "step": 5666 }, { "epoch": 0.36, "grad_norm": 0.9516698122024536, "learning_rate": 7.419909216390889e-06, "loss": 0.536, "step": 5667 }, { "epoch": 0.36, "grad_norm": 0.954897940158844, "learning_rate": 7.4190113409105304e-06, "loss": 0.5016, "step": 5668 }, { "epoch": 0.36, "grad_norm": 1.0027780532836914, "learning_rate": 7.418113363574018e-06, "loss": 0.5256, "step": 5669 }, { "epoch": 0.36, "grad_norm": 1.044217586517334, "learning_rate": 7.417215284419165e-06, "loss": 0.5591, "step": 5670 }, { "epoch": 0.36, "grad_norm": 1.0571179389953613, "learning_rate": 7.416317103483784e-06, "loss": 0.5826, "step": 5671 }, { "epoch": 0.36, "grad_norm": 1.1507805585861206, "learning_rate": 7.415418820805698e-06, "loss": 0.5302, "step": 5672 }, { "epoch": 0.36, "grad_norm": 1.052793025970459, "learning_rate": 7.414520436422725e-06, "loss": 0.6083, "step": 5673 }, { "epoch": 0.36, "grad_norm": 0.9399493932723999, "learning_rate": 7.413621950372698e-06, "loss": 0.5089, "step": 5674 }, { "epoch": 0.36, "grad_norm": 0.9684324860572815, "learning_rate": 7.4127233626934456e-06, "loss": 0.5471, "step": 5675 }, { "epoch": 0.36, "grad_norm": 1.084831714630127, "learning_rate": 7.411824673422808e-06, "loss": 0.5573, "step": 5676 }, { "epoch": 0.36, "grad_norm": 1.0572097301483154, "learning_rate": 7.410925882598621e-06, "loss": 0.5921, "step": 5677 }, { "epoch": 0.36, "grad_norm": 0.9332970380783081, "learning_rate": 7.410026990258734e-06, "loss": 0.5345, "step": 5678 }, { "epoch": 0.36, "grad_norm": 1.0227848291397095, "learning_rate": 7.409127996440993e-06, "loss": 0.5114, "step": 5679 }, { "epoch": 0.36, "grad_norm": 1.0537148714065552, "learning_rate": 7.408228901183254e-06, "loss": 0.5592, "step": 5680 }, { "epoch": 0.36, "grad_norm": 1.004742980003357, "learning_rate": 7.407329704523372e-06, "loss": 0.536, "step": 5681 }, { "epoch": 0.36, "grad_norm": 1.0189913511276245, "learning_rate": 7.406430406499212e-06, "loss": 0.5426, "step": 5682 }, { "epoch": 0.36, "grad_norm": 1.0214310884475708, "learning_rate": 7.405531007148638e-06, "loss": 0.5429, "step": 5683 }, { "epoch": 0.36, "grad_norm": 0.9545047283172607, "learning_rate": 7.404631506509523e-06, "loss": 0.5079, "step": 5684 }, { "epoch": 0.36, "grad_norm": 1.060755729675293, "learning_rate": 7.403731904619739e-06, "loss": 0.586, "step": 5685 }, { "epoch": 0.36, "grad_norm": 0.942334771156311, "learning_rate": 7.402832201517166e-06, "loss": 0.5332, "step": 5686 }, { "epoch": 0.36, "grad_norm": 1.0206327438354492, "learning_rate": 7.40193239723969e-06, "loss": 0.5492, "step": 5687 }, { "epoch": 0.36, "grad_norm": 1.026884913444519, "learning_rate": 7.401032491825194e-06, "loss": 0.5491, "step": 5688 }, { "epoch": 0.36, "grad_norm": 1.0408568382263184, "learning_rate": 7.400132485311573e-06, "loss": 0.548, "step": 5689 }, { "epoch": 0.36, "grad_norm": 1.1180294752120972, "learning_rate": 7.399232377736722e-06, "loss": 0.5797, "step": 5690 }, { "epoch": 0.36, "grad_norm": 1.064048409461975, "learning_rate": 7.398332169138544e-06, "loss": 0.5514, "step": 5691 }, { "epoch": 0.36, "grad_norm": 1.0275312662124634, "learning_rate": 7.397431859554941e-06, "loss": 0.5385, "step": 5692 }, { "epoch": 0.36, "grad_norm": 0.9816993474960327, "learning_rate": 7.396531449023821e-06, "loss": 0.5183, "step": 5693 }, { "epoch": 0.36, "grad_norm": 1.0414061546325684, "learning_rate": 7.395630937583099e-06, "loss": 0.5726, "step": 5694 }, { "epoch": 0.36, "grad_norm": 0.9948675036430359, "learning_rate": 7.394730325270693e-06, "loss": 0.5384, "step": 5695 }, { "epoch": 0.36, "grad_norm": 1.0395992994308472, "learning_rate": 7.393829612124524e-06, "loss": 0.5407, "step": 5696 }, { "epoch": 0.36, "grad_norm": 1.0142568349838257, "learning_rate": 7.392928798182516e-06, "loss": 0.5272, "step": 5697 }, { "epoch": 0.36, "grad_norm": 0.9534479975700378, "learning_rate": 7.392027883482602e-06, "loss": 0.5845, "step": 5698 }, { "epoch": 0.36, "grad_norm": 0.9710732102394104, "learning_rate": 7.391126868062714e-06, "loss": 0.5356, "step": 5699 }, { "epoch": 0.36, "grad_norm": 0.9691248536109924, "learning_rate": 7.390225751960792e-06, "loss": 0.5359, "step": 5700 }, { "epoch": 0.36, "grad_norm": 0.9411775469779968, "learning_rate": 7.389324535214779e-06, "loss": 0.5567, "step": 5701 }, { "epoch": 0.36, "grad_norm": 1.0573960542678833, "learning_rate": 7.388423217862621e-06, "loss": 0.5633, "step": 5702 }, { "epoch": 0.36, "grad_norm": 1.0169092416763306, "learning_rate": 7.387521799942271e-06, "loss": 0.5405, "step": 5703 }, { "epoch": 0.36, "grad_norm": 0.9815866351127625, "learning_rate": 7.386620281491683e-06, "loss": 0.5868, "step": 5704 }, { "epoch": 0.36, "grad_norm": 0.961209237575531, "learning_rate": 7.385718662548817e-06, "loss": 0.5813, "step": 5705 }, { "epoch": 0.36, "grad_norm": 1.036128282546997, "learning_rate": 7.384816943151638e-06, "loss": 0.5422, "step": 5706 }, { "epoch": 0.36, "grad_norm": 0.9688419103622437, "learning_rate": 7.383915123338113e-06, "loss": 0.5279, "step": 5707 }, { "epoch": 0.36, "grad_norm": 0.8959729671478271, "learning_rate": 7.3830132031462165e-06, "loss": 0.5514, "step": 5708 }, { "epoch": 0.36, "grad_norm": 1.007442593574524, "learning_rate": 7.382111182613923e-06, "loss": 0.4924, "step": 5709 }, { "epoch": 0.36, "grad_norm": 0.9503515958786011, "learning_rate": 7.381209061779214e-06, "loss": 0.5705, "step": 5710 }, { "epoch": 0.36, "grad_norm": 0.9991022944450378, "learning_rate": 7.380306840680076e-06, "loss": 0.5538, "step": 5711 }, { "epoch": 0.36, "grad_norm": 1.0210731029510498, "learning_rate": 7.379404519354496e-06, "loss": 0.5804, "step": 5712 }, { "epoch": 0.36, "grad_norm": 0.9408265948295593, "learning_rate": 7.378502097840471e-06, "loss": 0.5335, "step": 5713 }, { "epoch": 0.36, "grad_norm": 1.086181879043579, "learning_rate": 7.377599576175995e-06, "loss": 0.594, "step": 5714 }, { "epoch": 0.36, "grad_norm": 1.025308609008789, "learning_rate": 7.376696954399073e-06, "loss": 0.5439, "step": 5715 }, { "epoch": 0.36, "grad_norm": 0.9805166721343994, "learning_rate": 7.37579423254771e-06, "loss": 0.5418, "step": 5716 }, { "epoch": 0.36, "grad_norm": 1.048662781715393, "learning_rate": 7.374891410659917e-06, "loss": 0.5599, "step": 5717 }, { "epoch": 0.36, "grad_norm": 1.037967324256897, "learning_rate": 7.373988488773708e-06, "loss": 0.5504, "step": 5718 }, { "epoch": 0.36, "grad_norm": 1.1014705896377563, "learning_rate": 7.3730854669271015e-06, "loss": 0.5071, "step": 5719 }, { "epoch": 0.36, "grad_norm": 1.019339680671692, "learning_rate": 7.372182345158122e-06, "loss": 0.5773, "step": 5720 }, { "epoch": 0.36, "grad_norm": 1.0042972564697266, "learning_rate": 7.3712791235047976e-06, "loss": 0.5491, "step": 5721 }, { "epoch": 0.36, "grad_norm": 1.1469221115112305, "learning_rate": 7.370375802005157e-06, "loss": 0.5723, "step": 5722 }, { "epoch": 0.36, "grad_norm": 1.1442276239395142, "learning_rate": 7.369472380697236e-06, "loss": 0.4971, "step": 5723 }, { "epoch": 0.36, "grad_norm": 0.9209834933280945, "learning_rate": 7.368568859619078e-06, "loss": 0.5027, "step": 5724 }, { "epoch": 0.36, "grad_norm": 1.0353718996047974, "learning_rate": 7.3676652388087234e-06, "loss": 0.5424, "step": 5725 }, { "epoch": 0.36, "grad_norm": 1.0237194299697876, "learning_rate": 7.366761518304223e-06, "loss": 0.535, "step": 5726 }, { "epoch": 0.36, "grad_norm": 1.0912165641784668, "learning_rate": 7.365857698143628e-06, "loss": 0.5355, "step": 5727 }, { "epoch": 0.36, "grad_norm": 1.13982355594635, "learning_rate": 7.364953778364996e-06, "loss": 0.5697, "step": 5728 }, { "epoch": 0.36, "grad_norm": 0.951877772808075, "learning_rate": 7.364049759006387e-06, "loss": 0.5509, "step": 5729 }, { "epoch": 0.36, "grad_norm": 1.0230592489242554, "learning_rate": 7.363145640105867e-06, "loss": 0.5146, "step": 5730 }, { "epoch": 0.36, "grad_norm": 0.9922885894775391, "learning_rate": 7.362241421701505e-06, "loss": 0.533, "step": 5731 }, { "epoch": 0.36, "grad_norm": 1.080177664756775, "learning_rate": 7.3613371038313744e-06, "loss": 0.5416, "step": 5732 }, { "epoch": 0.36, "grad_norm": 0.9975622892379761, "learning_rate": 7.360432686533552e-06, "loss": 0.5887, "step": 5733 }, { "epoch": 0.36, "grad_norm": 1.0143722295761108, "learning_rate": 7.359528169846121e-06, "loss": 0.557, "step": 5734 }, { "epoch": 0.36, "grad_norm": 0.9244665503501892, "learning_rate": 7.358623553807167e-06, "loss": 0.477, "step": 5735 }, { "epoch": 0.36, "grad_norm": 0.9375993013381958, "learning_rate": 7.35771883845478e-06, "loss": 0.5025, "step": 5736 }, { "epoch": 0.36, "grad_norm": 0.9574780464172363, "learning_rate": 7.356814023827055e-06, "loss": 0.4916, "step": 5737 }, { "epoch": 0.36, "grad_norm": 1.0635297298431396, "learning_rate": 7.35590910996209e-06, "loss": 0.5399, "step": 5738 }, { "epoch": 0.36, "grad_norm": 0.9631228446960449, "learning_rate": 7.355004096897987e-06, "loss": 0.5071, "step": 5739 }, { "epoch": 0.36, "grad_norm": 0.9968248009681702, "learning_rate": 7.354098984672856e-06, "loss": 0.4824, "step": 5740 }, { "epoch": 0.36, "grad_norm": 1.0746512413024902, "learning_rate": 7.353193773324805e-06, "loss": 0.5767, "step": 5741 }, { "epoch": 0.36, "grad_norm": 1.0765202045440674, "learning_rate": 7.35228846289195e-06, "loss": 0.508, "step": 5742 }, { "epoch": 0.36, "grad_norm": 0.9962648749351501, "learning_rate": 7.351383053412411e-06, "loss": 0.5793, "step": 5743 }, { "epoch": 0.36, "grad_norm": 0.9487490057945251, "learning_rate": 7.350477544924313e-06, "loss": 0.5729, "step": 5744 }, { "epoch": 0.36, "grad_norm": 1.005916953086853, "learning_rate": 7.349571937465782e-06, "loss": 0.5754, "step": 5745 }, { "epoch": 0.36, "grad_norm": 0.9979771375656128, "learning_rate": 7.348666231074948e-06, "loss": 0.5288, "step": 5746 }, { "epoch": 0.36, "grad_norm": 1.0600558519363403, "learning_rate": 7.3477604257899515e-06, "loss": 0.545, "step": 5747 }, { "epoch": 0.36, "grad_norm": 1.0493375062942505, "learning_rate": 7.346854521648929e-06, "loss": 0.577, "step": 5748 }, { "epoch": 0.36, "grad_norm": 0.9606602787971497, "learning_rate": 7.345948518690029e-06, "loss": 0.4939, "step": 5749 }, { "epoch": 0.36, "grad_norm": 1.0327515602111816, "learning_rate": 7.345042416951395e-06, "loss": 0.5413, "step": 5750 }, { "epoch": 0.36, "grad_norm": 0.9119663834571838, "learning_rate": 7.344136216471185e-06, "loss": 0.5097, "step": 5751 }, { "epoch": 0.36, "grad_norm": 1.002037525177002, "learning_rate": 7.343229917287552e-06, "loss": 0.5294, "step": 5752 }, { "epoch": 0.36, "grad_norm": 1.1066184043884277, "learning_rate": 7.34232351943866e-06, "loss": 0.5014, "step": 5753 }, { "epoch": 0.36, "grad_norm": 1.0165209770202637, "learning_rate": 7.341417022962671e-06, "loss": 0.4836, "step": 5754 }, { "epoch": 0.36, "grad_norm": 1.0941275358200073, "learning_rate": 7.340510427897759e-06, "loss": 0.4963, "step": 5755 }, { "epoch": 0.36, "grad_norm": 1.1376608610153198, "learning_rate": 7.339603734282093e-06, "loss": 0.5642, "step": 5756 }, { "epoch": 0.36, "grad_norm": 1.0889432430267334, "learning_rate": 7.338696942153855e-06, "loss": 0.5388, "step": 5757 }, { "epoch": 0.36, "grad_norm": 0.9839189648628235, "learning_rate": 7.337790051551221e-06, "loss": 0.5082, "step": 5758 }, { "epoch": 0.36, "grad_norm": 1.0140478610992432, "learning_rate": 7.3368830625123835e-06, "loss": 0.5426, "step": 5759 }, { "epoch": 0.36, "grad_norm": 1.1105388402938843, "learning_rate": 7.335975975075529e-06, "loss": 0.6198, "step": 5760 }, { "epoch": 0.36, "grad_norm": 1.046632170677185, "learning_rate": 7.3350687892788505e-06, "loss": 0.4986, "step": 5761 }, { "epoch": 0.37, "grad_norm": 1.060750961303711, "learning_rate": 7.33416150516055e-06, "loss": 0.518, "step": 5762 }, { "epoch": 0.37, "grad_norm": 0.9920255541801453, "learning_rate": 7.333254122758828e-06, "loss": 0.5693, "step": 5763 }, { "epoch": 0.37, "grad_norm": 0.9959121942520142, "learning_rate": 7.332346642111893e-06, "loss": 0.5546, "step": 5764 }, { "epoch": 0.37, "grad_norm": 1.037704586982727, "learning_rate": 7.331439063257953e-06, "loss": 0.5586, "step": 5765 }, { "epoch": 0.37, "grad_norm": 0.9203774333000183, "learning_rate": 7.330531386235225e-06, "loss": 0.5295, "step": 5766 }, { "epoch": 0.37, "grad_norm": 1.0745431184768677, "learning_rate": 7.329623611081927e-06, "loss": 0.5458, "step": 5767 }, { "epoch": 0.37, "grad_norm": 1.0159822702407837, "learning_rate": 7.3287157378362846e-06, "loss": 0.5106, "step": 5768 }, { "epoch": 0.37, "grad_norm": 1.0377622842788696, "learning_rate": 7.327807766536521e-06, "loss": 0.5749, "step": 5769 }, { "epoch": 0.37, "grad_norm": 1.03034245967865, "learning_rate": 7.3268996972208725e-06, "loss": 0.5834, "step": 5770 }, { "epoch": 0.37, "grad_norm": 1.0114792585372925, "learning_rate": 7.325991529927572e-06, "loss": 0.5514, "step": 5771 }, { "epoch": 0.37, "grad_norm": 1.0172260999679565, "learning_rate": 7.325083264694859e-06, "loss": 0.4866, "step": 5772 }, { "epoch": 0.37, "grad_norm": 1.0338290929794312, "learning_rate": 7.324174901560978e-06, "loss": 0.5613, "step": 5773 }, { "epoch": 0.37, "grad_norm": 0.9678194522857666, "learning_rate": 7.323266440564177e-06, "loss": 0.5218, "step": 5774 }, { "epoch": 0.37, "grad_norm": 1.0081979036331177, "learning_rate": 7.32235788174271e-06, "loss": 0.4992, "step": 5775 }, { "epoch": 0.37, "grad_norm": 0.9918741583824158, "learning_rate": 7.32144922513483e-06, "loss": 0.5158, "step": 5776 }, { "epoch": 0.37, "grad_norm": 1.0231093168258667, "learning_rate": 7.320540470778799e-06, "loss": 0.5438, "step": 5777 }, { "epoch": 0.37, "grad_norm": 1.0513994693756104, "learning_rate": 7.319631618712881e-06, "loss": 0.5442, "step": 5778 }, { "epoch": 0.37, "grad_norm": 0.9481300115585327, "learning_rate": 7.318722668975347e-06, "loss": 0.4835, "step": 5779 }, { "epoch": 0.37, "grad_norm": 0.9858001470565796, "learning_rate": 7.317813621604466e-06, "loss": 0.536, "step": 5780 }, { "epoch": 0.37, "grad_norm": 0.9564962983131409, "learning_rate": 7.316904476638515e-06, "loss": 0.5025, "step": 5781 }, { "epoch": 0.37, "grad_norm": 0.9979397654533386, "learning_rate": 7.315995234115778e-06, "loss": 0.5011, "step": 5782 }, { "epoch": 0.37, "grad_norm": 0.912788987159729, "learning_rate": 7.315085894074539e-06, "loss": 0.5239, "step": 5783 }, { "epoch": 0.37, "grad_norm": 1.0001533031463623, "learning_rate": 7.314176456553086e-06, "loss": 0.5199, "step": 5784 }, { "epoch": 0.37, "grad_norm": 0.9417988061904907, "learning_rate": 7.3132669215897125e-06, "loss": 0.5474, "step": 5785 }, { "epoch": 0.37, "grad_norm": 1.0640056133270264, "learning_rate": 7.312357289222717e-06, "loss": 0.5855, "step": 5786 }, { "epoch": 0.37, "grad_norm": 0.9787964224815369, "learning_rate": 7.3114475594904e-06, "loss": 0.5291, "step": 5787 }, { "epoch": 0.37, "grad_norm": 0.9471306204795837, "learning_rate": 7.310537732431067e-06, "loss": 0.5816, "step": 5788 }, { "epoch": 0.37, "grad_norm": 0.9862099289894104, "learning_rate": 7.309627808083027e-06, "loss": 0.5669, "step": 5789 }, { "epoch": 0.37, "grad_norm": 1.0701364278793335, "learning_rate": 7.308717786484596e-06, "loss": 0.5144, "step": 5790 }, { "epoch": 0.37, "grad_norm": 1.0688151121139526, "learning_rate": 7.30780766767409e-06, "loss": 0.6784, "step": 5791 }, { "epoch": 0.37, "grad_norm": 0.8825664520263672, "learning_rate": 7.306897451689832e-06, "loss": 0.4798, "step": 5792 }, { "epoch": 0.37, "grad_norm": 0.9757002592086792, "learning_rate": 7.305987138570145e-06, "loss": 0.5104, "step": 5793 }, { "epoch": 0.37, "grad_norm": 1.0232665538787842, "learning_rate": 7.305076728353364e-06, "loss": 0.5211, "step": 5794 }, { "epoch": 0.37, "grad_norm": 1.0065867900848389, "learning_rate": 7.30416622107782e-06, "loss": 0.5139, "step": 5795 }, { "epoch": 0.37, "grad_norm": 1.0104619264602661, "learning_rate": 7.303255616781853e-06, "loss": 0.5353, "step": 5796 }, { "epoch": 0.37, "grad_norm": 1.0506689548492432, "learning_rate": 7.3023449155038016e-06, "loss": 0.5751, "step": 5797 }, { "epoch": 0.37, "grad_norm": 0.9864729046821594, "learning_rate": 7.301434117282018e-06, "loss": 0.5732, "step": 5798 }, { "epoch": 0.37, "grad_norm": 0.9898861050605774, "learning_rate": 7.300523222154848e-06, "loss": 0.5242, "step": 5799 }, { "epoch": 0.37, "grad_norm": 1.04075026512146, "learning_rate": 7.299612230160648e-06, "loss": 0.5514, "step": 5800 }, { "epoch": 0.37, "grad_norm": 1.0983926057815552, "learning_rate": 7.298701141337778e-06, "loss": 0.5203, "step": 5801 }, { "epoch": 0.37, "grad_norm": 0.9918738007545471, "learning_rate": 7.2977899557246e-06, "loss": 0.5609, "step": 5802 }, { "epoch": 0.37, "grad_norm": 0.9489316940307617, "learning_rate": 7.2968786733594795e-06, "loss": 0.5345, "step": 5803 }, { "epoch": 0.37, "grad_norm": 0.984935998916626, "learning_rate": 7.295967294280788e-06, "loss": 0.5493, "step": 5804 }, { "epoch": 0.37, "grad_norm": 1.01011061668396, "learning_rate": 7.2950558185269005e-06, "loss": 0.5288, "step": 5805 }, { "epoch": 0.37, "grad_norm": 1.1023284196853638, "learning_rate": 7.294144246136198e-06, "loss": 0.5096, "step": 5806 }, { "epoch": 0.37, "grad_norm": 1.0046350955963135, "learning_rate": 7.29323257714706e-06, "loss": 0.58, "step": 5807 }, { "epoch": 0.37, "grad_norm": 1.0193142890930176, "learning_rate": 7.292320811597877e-06, "loss": 0.5758, "step": 5808 }, { "epoch": 0.37, "grad_norm": 0.923527717590332, "learning_rate": 7.291408949527039e-06, "loss": 0.4869, "step": 5809 }, { "epoch": 0.37, "grad_norm": 1.0279090404510498, "learning_rate": 7.290496990972942e-06, "loss": 0.523, "step": 5810 }, { "epoch": 0.37, "grad_norm": 0.9912611842155457, "learning_rate": 7.2895849359739834e-06, "loss": 0.502, "step": 5811 }, { "epoch": 0.37, "grad_norm": 1.0363621711730957, "learning_rate": 7.288672784568568e-06, "loss": 0.5422, "step": 5812 }, { "epoch": 0.37, "grad_norm": 0.9785564541816711, "learning_rate": 7.2877605367951055e-06, "loss": 0.5127, "step": 5813 }, { "epoch": 0.37, "grad_norm": 0.9607601761817932, "learning_rate": 7.286848192692003e-06, "loss": 0.5197, "step": 5814 }, { "epoch": 0.37, "grad_norm": 0.9498841166496277, "learning_rate": 7.28593575229768e-06, "loss": 0.547, "step": 5815 }, { "epoch": 0.37, "grad_norm": 0.9618740081787109, "learning_rate": 7.285023215650553e-06, "loss": 0.5023, "step": 5816 }, { "epoch": 0.37, "grad_norm": 0.9866049289703369, "learning_rate": 7.2841105827890475e-06, "loss": 0.5586, "step": 5817 }, { "epoch": 0.37, "grad_norm": 1.0533307790756226, "learning_rate": 7.283197853751593e-06, "loss": 0.5882, "step": 5818 }, { "epoch": 0.37, "grad_norm": 1.0719332695007324, "learning_rate": 7.282285028576618e-06, "loss": 0.5941, "step": 5819 }, { "epoch": 0.37, "grad_norm": 1.0505437850952148, "learning_rate": 7.28137210730256e-06, "loss": 0.5756, "step": 5820 }, { "epoch": 0.37, "grad_norm": 0.9779177308082581, "learning_rate": 7.280459089967861e-06, "loss": 0.4963, "step": 5821 }, { "epoch": 0.37, "grad_norm": 0.9846765398979187, "learning_rate": 7.279545976610961e-06, "loss": 0.5175, "step": 5822 }, { "epoch": 0.37, "grad_norm": 1.042790174484253, "learning_rate": 7.278632767270309e-06, "loss": 0.5478, "step": 5823 }, { "epoch": 0.37, "grad_norm": 0.9730501770973206, "learning_rate": 7.277719461984361e-06, "loss": 0.5141, "step": 5824 }, { "epoch": 0.37, "grad_norm": 0.9226754903793335, "learning_rate": 7.276806060791567e-06, "loss": 0.5409, "step": 5825 }, { "epoch": 0.37, "grad_norm": 1.015047311782837, "learning_rate": 7.275892563730393e-06, "loss": 0.5437, "step": 5826 }, { "epoch": 0.37, "grad_norm": 0.9851754307746887, "learning_rate": 7.274978970839297e-06, "loss": 0.5081, "step": 5827 }, { "epoch": 0.37, "grad_norm": 0.9413428902626038, "learning_rate": 7.274065282156752e-06, "loss": 0.4789, "step": 5828 }, { "epoch": 0.37, "grad_norm": 1.178230881690979, "learning_rate": 7.273151497721229e-06, "loss": 0.5777, "step": 5829 }, { "epoch": 0.37, "grad_norm": 0.9771174192428589, "learning_rate": 7.272237617571205e-06, "loss": 0.5226, "step": 5830 }, { "epoch": 0.37, "grad_norm": 1.155280351638794, "learning_rate": 7.2713236417451584e-06, "loss": 0.5712, "step": 5831 }, { "epoch": 0.37, "grad_norm": 0.9939926266670227, "learning_rate": 7.2704095702815754e-06, "loss": 0.5274, "step": 5832 }, { "epoch": 0.37, "grad_norm": 0.9951380491256714, "learning_rate": 7.269495403218943e-06, "loss": 0.5247, "step": 5833 }, { "epoch": 0.37, "grad_norm": 0.97422856092453, "learning_rate": 7.268581140595754e-06, "loss": 0.4867, "step": 5834 }, { "epoch": 0.37, "grad_norm": 1.0003308057785034, "learning_rate": 7.267666782450505e-06, "loss": 0.4925, "step": 5835 }, { "epoch": 0.37, "grad_norm": 1.0902355909347534, "learning_rate": 7.266752328821698e-06, "loss": 0.6134, "step": 5836 }, { "epoch": 0.37, "grad_norm": 0.9617888927459717, "learning_rate": 7.265837779747834e-06, "loss": 0.4861, "step": 5837 }, { "epoch": 0.37, "grad_norm": 1.0846583843231201, "learning_rate": 7.264923135267425e-06, "loss": 0.5432, "step": 5838 }, { "epoch": 0.37, "grad_norm": 1.1127210855484009, "learning_rate": 7.264008395418981e-06, "loss": 0.5714, "step": 5839 }, { "epoch": 0.37, "grad_norm": 0.9533615112304688, "learning_rate": 7.263093560241019e-06, "loss": 0.5281, "step": 5840 }, { "epoch": 0.37, "grad_norm": 0.9426731467247009, "learning_rate": 7.262178629772061e-06, "loss": 0.502, "step": 5841 }, { "epoch": 0.37, "grad_norm": 1.0847283601760864, "learning_rate": 7.261263604050628e-06, "loss": 0.5175, "step": 5842 }, { "epoch": 0.37, "grad_norm": 0.9640687108039856, "learning_rate": 7.260348483115254e-06, "loss": 0.4872, "step": 5843 }, { "epoch": 0.37, "grad_norm": 0.9335427284240723, "learning_rate": 7.259433267004466e-06, "loss": 0.5078, "step": 5844 }, { "epoch": 0.37, "grad_norm": 0.9635369181632996, "learning_rate": 7.258517955756805e-06, "loss": 0.5363, "step": 5845 }, { "epoch": 0.37, "grad_norm": 1.0456881523132324, "learning_rate": 7.257602549410808e-06, "loss": 0.5283, "step": 5846 }, { "epoch": 0.37, "grad_norm": 1.0658409595489502, "learning_rate": 7.256687048005024e-06, "loss": 0.5749, "step": 5847 }, { "epoch": 0.37, "grad_norm": 1.2358348369598389, "learning_rate": 7.255771451577996e-06, "loss": 0.5693, "step": 5848 }, { "epoch": 0.37, "grad_norm": 1.0727468729019165, "learning_rate": 7.254855760168281e-06, "loss": 0.5051, "step": 5849 }, { "epoch": 0.37, "grad_norm": 0.9722975492477417, "learning_rate": 7.2539399738144325e-06, "loss": 0.4984, "step": 5850 }, { "epoch": 0.37, "grad_norm": 1.1179519891738892, "learning_rate": 7.2530240925550145e-06, "loss": 0.5229, "step": 5851 }, { "epoch": 0.37, "grad_norm": 1.0390774011611938, "learning_rate": 7.252108116428589e-06, "loss": 0.52, "step": 5852 }, { "epoch": 0.37, "grad_norm": 1.1116324663162231, "learning_rate": 7.251192045473725e-06, "loss": 0.5284, "step": 5853 }, { "epoch": 0.37, "grad_norm": 1.048357605934143, "learning_rate": 7.250275879728995e-06, "loss": 0.5232, "step": 5854 }, { "epoch": 0.37, "grad_norm": 0.9675121307373047, "learning_rate": 7.249359619232976e-06, "loss": 0.5146, "step": 5855 }, { "epoch": 0.37, "grad_norm": 1.0100194215774536, "learning_rate": 7.24844326402425e-06, "loss": 0.5491, "step": 5856 }, { "epoch": 0.37, "grad_norm": 0.979152500629425, "learning_rate": 7.247526814141398e-06, "loss": 0.5108, "step": 5857 }, { "epoch": 0.37, "grad_norm": 0.9853137731552124, "learning_rate": 7.2466102696230115e-06, "loss": 0.5398, "step": 5858 }, { "epoch": 0.37, "grad_norm": 0.9618313312530518, "learning_rate": 7.24569363050768e-06, "loss": 0.5451, "step": 5859 }, { "epoch": 0.37, "grad_norm": 0.9729511141777039, "learning_rate": 7.244776896834004e-06, "loss": 0.5304, "step": 5860 }, { "epoch": 0.37, "grad_norm": 0.9838532209396362, "learning_rate": 7.243860068640581e-06, "loss": 0.5555, "step": 5861 }, { "epoch": 0.37, "grad_norm": 1.0647404193878174, "learning_rate": 7.242943145966016e-06, "loss": 0.5021, "step": 5862 }, { "epoch": 0.37, "grad_norm": 0.9827612638473511, "learning_rate": 7.242026128848918e-06, "loss": 0.5017, "step": 5863 }, { "epoch": 0.37, "grad_norm": 0.9512253999710083, "learning_rate": 7.241109017327901e-06, "loss": 0.5175, "step": 5864 }, { "epoch": 0.37, "grad_norm": 0.9890321493148804, "learning_rate": 7.240191811441577e-06, "loss": 0.5438, "step": 5865 }, { "epoch": 0.37, "grad_norm": 1.0261216163635254, "learning_rate": 7.239274511228569e-06, "loss": 0.5495, "step": 5866 }, { "epoch": 0.37, "grad_norm": 0.9640551209449768, "learning_rate": 7.238357116727502e-06, "loss": 0.5663, "step": 5867 }, { "epoch": 0.37, "grad_norm": 1.1027157306671143, "learning_rate": 7.2374396279770044e-06, "loss": 0.5347, "step": 5868 }, { "epoch": 0.37, "grad_norm": 0.9906113743782043, "learning_rate": 7.236522045015706e-06, "loss": 0.5146, "step": 5869 }, { "epoch": 0.37, "grad_norm": 1.0150017738342285, "learning_rate": 7.235604367882245e-06, "loss": 0.5514, "step": 5870 }, { "epoch": 0.37, "grad_norm": 1.0121136903762817, "learning_rate": 7.234686596615262e-06, "loss": 0.5075, "step": 5871 }, { "epoch": 0.37, "grad_norm": 1.036228895187378, "learning_rate": 7.2337687312534e-06, "loss": 0.5592, "step": 5872 }, { "epoch": 0.37, "grad_norm": 1.0446488857269287, "learning_rate": 7.232850771835307e-06, "loss": 0.5495, "step": 5873 }, { "epoch": 0.37, "grad_norm": 1.035109043121338, "learning_rate": 7.231932718399635e-06, "loss": 0.5271, "step": 5874 }, { "epoch": 0.37, "grad_norm": 1.0854578018188477, "learning_rate": 7.231014570985042e-06, "loss": 0.5426, "step": 5875 }, { "epoch": 0.37, "grad_norm": 1.036075234413147, "learning_rate": 7.230096329630185e-06, "loss": 0.5134, "step": 5876 }, { "epoch": 0.37, "grad_norm": 1.2155336141586304, "learning_rate": 7.22917799437373e-06, "loss": 0.508, "step": 5877 }, { "epoch": 0.37, "grad_norm": 1.0050281286239624, "learning_rate": 7.228259565254345e-06, "loss": 0.5759, "step": 5878 }, { "epoch": 0.37, "grad_norm": 1.069089651107788, "learning_rate": 7.227341042310702e-06, "loss": 0.5352, "step": 5879 }, { "epoch": 0.37, "grad_norm": 1.133564829826355, "learning_rate": 7.226422425581474e-06, "loss": 0.5747, "step": 5880 }, { "epoch": 0.37, "grad_norm": 1.0305187702178955, "learning_rate": 7.225503715105344e-06, "loss": 0.5182, "step": 5881 }, { "epoch": 0.37, "grad_norm": 0.9435314536094666, "learning_rate": 7.224584910920994e-06, "loss": 0.5672, "step": 5882 }, { "epoch": 0.37, "grad_norm": 1.0364540815353394, "learning_rate": 7.223666013067113e-06, "loss": 0.5042, "step": 5883 }, { "epoch": 0.37, "grad_norm": 1.054391860961914, "learning_rate": 7.222747021582392e-06, "loss": 0.5352, "step": 5884 }, { "epoch": 0.37, "grad_norm": 1.0646604299545288, "learning_rate": 7.221827936505524e-06, "loss": 0.5673, "step": 5885 }, { "epoch": 0.37, "grad_norm": 0.9911362528800964, "learning_rate": 7.220908757875214e-06, "loss": 0.5121, "step": 5886 }, { "epoch": 0.37, "grad_norm": 0.9722857475280762, "learning_rate": 7.21998948573016e-06, "loss": 0.5494, "step": 5887 }, { "epoch": 0.37, "grad_norm": 0.931648313999176, "learning_rate": 7.219070120109072e-06, "loss": 0.4944, "step": 5888 }, { "epoch": 0.37, "grad_norm": 1.0476207733154297, "learning_rate": 7.2181506610506605e-06, "loss": 0.5319, "step": 5889 }, { "epoch": 0.37, "grad_norm": 1.0259501934051514, "learning_rate": 7.217231108593642e-06, "loss": 0.5619, "step": 5890 }, { "epoch": 0.37, "grad_norm": 0.985495388507843, "learning_rate": 7.2163114627767336e-06, "loss": 0.5224, "step": 5891 }, { "epoch": 0.37, "grad_norm": 1.0021553039550781, "learning_rate": 7.21539172363866e-06, "loss": 0.5759, "step": 5892 }, { "epoch": 0.37, "grad_norm": 1.0142312049865723, "learning_rate": 7.214471891218147e-06, "loss": 0.5011, "step": 5893 }, { "epoch": 0.37, "grad_norm": 0.9844617247581482, "learning_rate": 7.213551965553927e-06, "loss": 0.5086, "step": 5894 }, { "epoch": 0.37, "grad_norm": 1.048954725265503, "learning_rate": 7.212631946684735e-06, "loss": 0.5307, "step": 5895 }, { "epoch": 0.37, "grad_norm": 0.9628361463546753, "learning_rate": 7.211711834649308e-06, "loss": 0.5715, "step": 5896 }, { "epoch": 0.37, "grad_norm": 1.0532171726226807, "learning_rate": 7.210791629486389e-06, "loss": 0.5546, "step": 5897 }, { "epoch": 0.37, "grad_norm": 1.0313563346862793, "learning_rate": 7.209871331234727e-06, "loss": 0.5123, "step": 5898 }, { "epoch": 0.37, "grad_norm": 1.0224592685699463, "learning_rate": 7.208950939933069e-06, "loss": 0.5466, "step": 5899 }, { "epoch": 0.37, "grad_norm": 0.9858354330062866, "learning_rate": 7.208030455620172e-06, "loss": 0.5493, "step": 5900 }, { "epoch": 0.37, "grad_norm": 1.0029754638671875, "learning_rate": 7.207109878334794e-06, "loss": 0.4909, "step": 5901 }, { "epoch": 0.37, "grad_norm": 0.9922599196434021, "learning_rate": 7.206189208115697e-06, "loss": 0.5534, "step": 5902 }, { "epoch": 0.37, "grad_norm": 0.9229944944381714, "learning_rate": 7.205268445001647e-06, "loss": 0.5001, "step": 5903 }, { "epoch": 0.37, "grad_norm": 1.019700050354004, "learning_rate": 7.204347589031413e-06, "loss": 0.5436, "step": 5904 }, { "epoch": 0.37, "grad_norm": 0.9846832752227783, "learning_rate": 7.203426640243772e-06, "loss": 0.5058, "step": 5905 }, { "epoch": 0.37, "grad_norm": 0.9663556814193726, "learning_rate": 7.2025055986775e-06, "loss": 0.5284, "step": 5906 }, { "epoch": 0.37, "grad_norm": 0.962640643119812, "learning_rate": 7.201584464371378e-06, "loss": 0.5636, "step": 5907 }, { "epoch": 0.37, "grad_norm": 1.0475239753723145, "learning_rate": 7.200663237364195e-06, "loss": 0.5509, "step": 5908 }, { "epoch": 0.37, "grad_norm": 1.0490944385528564, "learning_rate": 7.199741917694738e-06, "loss": 0.5452, "step": 5909 }, { "epoch": 0.37, "grad_norm": 1.0195828676223755, "learning_rate": 7.198820505401801e-06, "loss": 0.5843, "step": 5910 }, { "epoch": 0.37, "grad_norm": 1.0507577657699585, "learning_rate": 7.197899000524181e-06, "loss": 0.5732, "step": 5911 }, { "epoch": 0.37, "grad_norm": 1.0144188404083252, "learning_rate": 7.196977403100681e-06, "loss": 0.5377, "step": 5912 }, { "epoch": 0.37, "grad_norm": 1.0264872312545776, "learning_rate": 7.196055713170105e-06, "loss": 0.5162, "step": 5913 }, { "epoch": 0.37, "grad_norm": 0.9802569150924683, "learning_rate": 7.195133930771263e-06, "loss": 0.5609, "step": 5914 }, { "epoch": 0.37, "grad_norm": 1.0005505084991455, "learning_rate": 7.194212055942966e-06, "loss": 0.5513, "step": 5915 }, { "epoch": 0.37, "grad_norm": 0.9948568940162659, "learning_rate": 7.193290088724034e-06, "loss": 0.5248, "step": 5916 }, { "epoch": 0.37, "grad_norm": 1.050577998161316, "learning_rate": 7.192368029153285e-06, "loss": 0.5972, "step": 5917 }, { "epoch": 0.37, "grad_norm": 1.0255308151245117, "learning_rate": 7.191445877269548e-06, "loss": 0.4951, "step": 5918 }, { "epoch": 0.37, "grad_norm": 0.9985212683677673, "learning_rate": 7.190523633111644e-06, "loss": 0.5641, "step": 5919 }, { "epoch": 0.38, "grad_norm": 1.0271326303482056, "learning_rate": 7.189601296718413e-06, "loss": 0.5382, "step": 5920 }, { "epoch": 0.38, "grad_norm": 1.0106226205825806, "learning_rate": 7.188678868128687e-06, "loss": 0.5551, "step": 5921 }, { "epoch": 0.38, "grad_norm": 1.0215424299240112, "learning_rate": 7.18775634738131e-06, "loss": 0.5541, "step": 5922 }, { "epoch": 0.38, "grad_norm": 1.0225163698196411, "learning_rate": 7.18683373451512e-06, "loss": 0.5559, "step": 5923 }, { "epoch": 0.38, "grad_norm": 1.012471318244934, "learning_rate": 7.185911029568972e-06, "loss": 0.5423, "step": 5924 }, { "epoch": 0.38, "grad_norm": 1.021701455116272, "learning_rate": 7.184988232581713e-06, "loss": 0.5427, "step": 5925 }, { "epoch": 0.38, "grad_norm": 0.9949175715446472, "learning_rate": 7.184065343592203e-06, "loss": 0.5773, "step": 5926 }, { "epoch": 0.38, "grad_norm": 1.0874463319778442, "learning_rate": 7.183142362639296e-06, "loss": 0.5638, "step": 5927 }, { "epoch": 0.38, "grad_norm": 1.064543604850769, "learning_rate": 7.18221928976186e-06, "loss": 0.5246, "step": 5928 }, { "epoch": 0.38, "grad_norm": 0.9457411766052246, "learning_rate": 7.181296124998762e-06, "loss": 0.543, "step": 5929 }, { "epoch": 0.38, "grad_norm": 1.0419443845748901, "learning_rate": 7.180372868388873e-06, "loss": 0.5445, "step": 5930 }, { "epoch": 0.38, "grad_norm": 1.0131248235702515, "learning_rate": 7.179449519971066e-06, "loss": 0.5167, "step": 5931 }, { "epoch": 0.38, "grad_norm": 1.0083361864089966, "learning_rate": 7.178526079784221e-06, "loss": 0.5367, "step": 5932 }, { "epoch": 0.38, "grad_norm": 1.010665774345398, "learning_rate": 7.1776025478672225e-06, "loss": 0.5431, "step": 5933 }, { "epoch": 0.38, "grad_norm": 1.1985095739364624, "learning_rate": 7.176678924258955e-06, "loss": 0.5673, "step": 5934 }, { "epoch": 0.38, "grad_norm": 0.9250592589378357, "learning_rate": 7.175755208998311e-06, "loss": 0.4978, "step": 5935 }, { "epoch": 0.38, "grad_norm": 0.9751607775688171, "learning_rate": 7.174831402124184e-06, "loss": 0.5452, "step": 5936 }, { "epoch": 0.38, "grad_norm": 1.0652177333831787, "learning_rate": 7.173907503675472e-06, "loss": 0.5584, "step": 5937 }, { "epoch": 0.38, "grad_norm": 1.144456148147583, "learning_rate": 7.172983513691076e-06, "loss": 0.5653, "step": 5938 }, { "epoch": 0.38, "grad_norm": 1.030582070350647, "learning_rate": 7.172059432209907e-06, "loss": 0.5257, "step": 5939 }, { "epoch": 0.38, "grad_norm": 1.0467146635055542, "learning_rate": 7.171135259270868e-06, "loss": 0.5582, "step": 5940 }, { "epoch": 0.38, "grad_norm": 1.044650673866272, "learning_rate": 7.170210994912878e-06, "loss": 0.5191, "step": 5941 }, { "epoch": 0.38, "grad_norm": 1.049615740776062, "learning_rate": 7.169286639174852e-06, "loss": 0.5332, "step": 5942 }, { "epoch": 0.38, "grad_norm": 1.1635546684265137, "learning_rate": 7.168362192095712e-06, "loss": 0.562, "step": 5943 }, { "epoch": 0.38, "grad_norm": 1.193900227546692, "learning_rate": 7.1674376537143845e-06, "loss": 0.5554, "step": 5944 }, { "epoch": 0.38, "grad_norm": 1.0751802921295166, "learning_rate": 7.166513024069797e-06, "loss": 0.5491, "step": 5945 }, { "epoch": 0.38, "grad_norm": 0.9560760259628296, "learning_rate": 7.16558830320088e-06, "loss": 0.5585, "step": 5946 }, { "epoch": 0.38, "grad_norm": 1.0157760381698608, "learning_rate": 7.1646634911465765e-06, "loss": 0.4894, "step": 5947 }, { "epoch": 0.38, "grad_norm": 1.0313069820404053, "learning_rate": 7.163738587945822e-06, "loss": 0.5303, "step": 5948 }, { "epoch": 0.38, "grad_norm": 1.0127816200256348, "learning_rate": 7.162813593637563e-06, "loss": 0.5285, "step": 5949 }, { "epoch": 0.38, "grad_norm": 1.0272399187088013, "learning_rate": 7.161888508260748e-06, "loss": 0.5537, "step": 5950 }, { "epoch": 0.38, "grad_norm": 1.013628602027893, "learning_rate": 7.160963331854327e-06, "loss": 0.4851, "step": 5951 }, { "epoch": 0.38, "grad_norm": 1.007154107093811, "learning_rate": 7.16003806445726e-06, "loss": 0.5287, "step": 5952 }, { "epoch": 0.38, "grad_norm": 1.0067106485366821, "learning_rate": 7.159112706108502e-06, "loss": 0.4884, "step": 5953 }, { "epoch": 0.38, "grad_norm": 0.994032621383667, "learning_rate": 7.15818725684702e-06, "loss": 0.5397, "step": 5954 }, { "epoch": 0.38, "grad_norm": 1.0723838806152344, "learning_rate": 7.15726171671178e-06, "loss": 0.5378, "step": 5955 }, { "epoch": 0.38, "grad_norm": 0.9964460730552673, "learning_rate": 7.156336085741755e-06, "loss": 0.5543, "step": 5956 }, { "epoch": 0.38, "grad_norm": 1.007655382156372, "learning_rate": 7.155410363975916e-06, "loss": 0.5154, "step": 5957 }, { "epoch": 0.38, "grad_norm": 1.0438203811645508, "learning_rate": 7.154484551453247e-06, "loss": 0.5611, "step": 5958 }, { "epoch": 0.38, "grad_norm": 1.007995367050171, "learning_rate": 7.1535586482127284e-06, "loss": 0.5762, "step": 5959 }, { "epoch": 0.38, "grad_norm": 1.0440713167190552, "learning_rate": 7.152632654293347e-06, "loss": 0.5378, "step": 5960 }, { "epoch": 0.38, "grad_norm": 1.0187562704086304, "learning_rate": 7.151706569734091e-06, "loss": 0.5286, "step": 5961 }, { "epoch": 0.38, "grad_norm": 0.8911324143409729, "learning_rate": 7.150780394573957e-06, "loss": 0.5182, "step": 5962 }, { "epoch": 0.38, "grad_norm": 1.0653436183929443, "learning_rate": 7.149854128851945e-06, "loss": 0.5198, "step": 5963 }, { "epoch": 0.38, "grad_norm": 1.015015959739685, "learning_rate": 7.148927772607053e-06, "loss": 0.5289, "step": 5964 }, { "epoch": 0.38, "grad_norm": 1.0303547382354736, "learning_rate": 7.148001325878287e-06, "loss": 0.4941, "step": 5965 }, { "epoch": 0.38, "grad_norm": 1.0739134550094604, "learning_rate": 7.147074788704659e-06, "loss": 0.552, "step": 5966 }, { "epoch": 0.38, "grad_norm": 1.0836094617843628, "learning_rate": 7.14614816112518e-06, "loss": 0.5439, "step": 5967 }, { "epoch": 0.38, "grad_norm": 0.9984629154205322, "learning_rate": 7.145221443178868e-06, "loss": 0.5378, "step": 5968 }, { "epoch": 0.38, "grad_norm": 1.0398366451263428, "learning_rate": 7.144294634904744e-06, "loss": 0.5474, "step": 5969 }, { "epoch": 0.38, "grad_norm": 1.1328619718551636, "learning_rate": 7.143367736341832e-06, "loss": 0.6143, "step": 5970 }, { "epoch": 0.38, "grad_norm": 1.1427115201950073, "learning_rate": 7.142440747529161e-06, "loss": 0.5713, "step": 5971 }, { "epoch": 0.38, "grad_norm": 1.063494324684143, "learning_rate": 7.141513668505764e-06, "loss": 0.5325, "step": 5972 }, { "epoch": 0.38, "grad_norm": 1.0437992811203003, "learning_rate": 7.140586499310674e-06, "loss": 0.5313, "step": 5973 }, { "epoch": 0.38, "grad_norm": 0.9050129055976868, "learning_rate": 7.139659239982935e-06, "loss": 0.4815, "step": 5974 }, { "epoch": 0.38, "grad_norm": 0.9722835421562195, "learning_rate": 7.138731890561589e-06, "loss": 0.5278, "step": 5975 }, { "epoch": 0.38, "grad_norm": 1.006160855293274, "learning_rate": 7.1378044510856814e-06, "loss": 0.5462, "step": 5976 }, { "epoch": 0.38, "grad_norm": 1.033493995666504, "learning_rate": 7.136876921594267e-06, "loss": 0.5524, "step": 5977 }, { "epoch": 0.38, "grad_norm": 1.030398964881897, "learning_rate": 7.1359493021263986e-06, "loss": 0.5793, "step": 5978 }, { "epoch": 0.38, "grad_norm": 1.004825472831726, "learning_rate": 7.135021592721134e-06, "loss": 0.5153, "step": 5979 }, { "epoch": 0.38, "grad_norm": 0.9759687185287476, "learning_rate": 7.134093793417539e-06, "loss": 0.5439, "step": 5980 }, { "epoch": 0.38, "grad_norm": 1.0009828805923462, "learning_rate": 7.133165904254677e-06, "loss": 0.5331, "step": 5981 }, { "epoch": 0.38, "grad_norm": 1.038694977760315, "learning_rate": 7.132237925271621e-06, "loss": 0.5276, "step": 5982 }, { "epoch": 0.38, "grad_norm": 0.9898457527160645, "learning_rate": 7.131309856507444e-06, "loss": 0.5934, "step": 5983 }, { "epoch": 0.38, "grad_norm": 1.0886754989624023, "learning_rate": 7.13038169800122e-06, "loss": 0.5706, "step": 5984 }, { "epoch": 0.38, "grad_norm": 1.0914766788482666, "learning_rate": 7.129453449792036e-06, "loss": 0.564, "step": 5985 }, { "epoch": 0.38, "grad_norm": 1.0608938932418823, "learning_rate": 7.1285251119189754e-06, "loss": 0.52, "step": 5986 }, { "epoch": 0.38, "grad_norm": 1.1529028415679932, "learning_rate": 7.127596684421127e-06, "loss": 0.5301, "step": 5987 }, { "epoch": 0.38, "grad_norm": 0.9991157650947571, "learning_rate": 7.126668167337583e-06, "loss": 0.5239, "step": 5988 }, { "epoch": 0.38, "grad_norm": 0.9912459850311279, "learning_rate": 7.12573956070744e-06, "loss": 0.573, "step": 5989 }, { "epoch": 0.38, "grad_norm": 1.0392462015151978, "learning_rate": 7.1248108645698e-06, "loss": 0.5405, "step": 5990 }, { "epoch": 0.38, "grad_norm": 1.0258725881576538, "learning_rate": 7.123882078963766e-06, "loss": 0.5184, "step": 5991 }, { "epoch": 0.38, "grad_norm": 1.0548800230026245, "learning_rate": 7.1229532039284455e-06, "loss": 0.5775, "step": 5992 }, { "epoch": 0.38, "grad_norm": 1.0427476167678833, "learning_rate": 7.122024239502951e-06, "loss": 0.5358, "step": 5993 }, { "epoch": 0.38, "grad_norm": 1.0223079919815063, "learning_rate": 7.121095185726399e-06, "loss": 0.5779, "step": 5994 }, { "epoch": 0.38, "grad_norm": 1.0239362716674805, "learning_rate": 7.120166042637906e-06, "loss": 0.5594, "step": 5995 }, { "epoch": 0.38, "grad_norm": 0.9494137167930603, "learning_rate": 7.119236810276598e-06, "loss": 0.4868, "step": 5996 }, { "epoch": 0.38, "grad_norm": 1.037915825843811, "learning_rate": 7.118307488681598e-06, "loss": 0.5636, "step": 5997 }, { "epoch": 0.38, "grad_norm": 0.9847166538238525, "learning_rate": 7.11737807789204e-06, "loss": 0.5158, "step": 5998 }, { "epoch": 0.38, "grad_norm": 1.1142280101776123, "learning_rate": 7.116448577947057e-06, "loss": 0.5092, "step": 5999 }, { "epoch": 0.38, "grad_norm": 0.9346677660942078, "learning_rate": 7.115518988885785e-06, "loss": 0.5324, "step": 6000 }, { "epoch": 0.38, "grad_norm": 1.0473169088363647, "learning_rate": 7.114589310747371e-06, "loss": 0.5687, "step": 6001 }, { "epoch": 0.38, "grad_norm": 1.0970706939697266, "learning_rate": 7.113659543570956e-06, "loss": 0.5676, "step": 6002 }, { "epoch": 0.38, "grad_norm": 0.9799151420593262, "learning_rate": 7.11272968739569e-06, "loss": 0.4992, "step": 6003 }, { "epoch": 0.38, "grad_norm": 0.9896637201309204, "learning_rate": 7.1117997422607264e-06, "loss": 0.5302, "step": 6004 }, { "epoch": 0.38, "grad_norm": 0.9624154567718506, "learning_rate": 7.110869708205224e-06, "loss": 0.5075, "step": 6005 }, { "epoch": 0.38, "grad_norm": 0.9685401320457458, "learning_rate": 7.109939585268339e-06, "loss": 0.5583, "step": 6006 }, { "epoch": 0.38, "grad_norm": 1.057396650314331, "learning_rate": 7.109009373489239e-06, "loss": 0.5373, "step": 6007 }, { "epoch": 0.38, "grad_norm": 1.065391182899475, "learning_rate": 7.10807907290709e-06, "loss": 0.5856, "step": 6008 }, { "epoch": 0.38, "grad_norm": 1.111362099647522, "learning_rate": 7.107148683561066e-06, "loss": 0.5181, "step": 6009 }, { "epoch": 0.38, "grad_norm": 0.9908615350723267, "learning_rate": 7.106218205490342e-06, "loss": 0.5563, "step": 6010 }, { "epoch": 0.38, "grad_norm": 1.030978798866272, "learning_rate": 7.105287638734093e-06, "loss": 0.5814, "step": 6011 }, { "epoch": 0.38, "grad_norm": 1.0351821184158325, "learning_rate": 7.104356983331509e-06, "loss": 0.543, "step": 6012 }, { "epoch": 0.38, "grad_norm": 1.0720235109329224, "learning_rate": 7.1034262393217705e-06, "loss": 0.512, "step": 6013 }, { "epoch": 0.38, "grad_norm": 1.1268174648284912, "learning_rate": 7.1024954067440725e-06, "loss": 0.5858, "step": 6014 }, { "epoch": 0.38, "grad_norm": 1.09067964553833, "learning_rate": 7.101564485637603e-06, "loss": 0.5348, "step": 6015 }, { "epoch": 0.38, "grad_norm": 0.9996499419212341, "learning_rate": 7.1006334760415674e-06, "loss": 0.5254, "step": 6016 }, { "epoch": 0.38, "grad_norm": 1.0276721715927124, "learning_rate": 7.0997023779951625e-06, "loss": 0.5662, "step": 6017 }, { "epoch": 0.38, "grad_norm": 1.125401496887207, "learning_rate": 7.098771191537596e-06, "loss": 0.6017, "step": 6018 }, { "epoch": 0.38, "grad_norm": 1.0361688137054443, "learning_rate": 7.097839916708073e-06, "loss": 0.5537, "step": 6019 }, { "epoch": 0.38, "grad_norm": 1.1818232536315918, "learning_rate": 7.096908553545812e-06, "loss": 0.5483, "step": 6020 }, { "epoch": 0.38, "grad_norm": 0.9446892142295837, "learning_rate": 7.095977102090025e-06, "loss": 0.5471, "step": 6021 }, { "epoch": 0.38, "grad_norm": 1.089293360710144, "learning_rate": 7.095045562379934e-06, "loss": 0.5183, "step": 6022 }, { "epoch": 0.38, "grad_norm": 0.9710480570793152, "learning_rate": 7.0941139344547605e-06, "loss": 0.5095, "step": 6023 }, { "epoch": 0.38, "grad_norm": 1.0657448768615723, "learning_rate": 7.093182218353737e-06, "loss": 0.5713, "step": 6024 }, { "epoch": 0.38, "grad_norm": 1.0107380151748657, "learning_rate": 7.092250414116091e-06, "loss": 0.4987, "step": 6025 }, { "epoch": 0.38, "grad_norm": 0.9729856848716736, "learning_rate": 7.091318521781058e-06, "loss": 0.5294, "step": 6026 }, { "epoch": 0.38, "grad_norm": 1.0331482887268066, "learning_rate": 7.090386541387878e-06, "loss": 0.5804, "step": 6027 }, { "epoch": 0.38, "grad_norm": 0.917664110660553, "learning_rate": 7.089454472975792e-06, "loss": 0.5312, "step": 6028 }, { "epoch": 0.38, "grad_norm": 0.9795371294021606, "learning_rate": 7.088522316584048e-06, "loss": 0.5235, "step": 6029 }, { "epoch": 0.38, "grad_norm": 0.9877316355705261, "learning_rate": 7.087590072251893e-06, "loss": 0.5196, "step": 6030 }, { "epoch": 0.38, "grad_norm": 0.9603331089019775, "learning_rate": 7.086657740018582e-06, "loss": 0.5331, "step": 6031 }, { "epoch": 0.38, "grad_norm": 0.926292896270752, "learning_rate": 7.085725319923373e-06, "loss": 0.5002, "step": 6032 }, { "epoch": 0.38, "grad_norm": 1.0340707302093506, "learning_rate": 7.084792812005528e-06, "loss": 0.5793, "step": 6033 }, { "epoch": 0.38, "grad_norm": 1.0735664367675781, "learning_rate": 7.083860216304309e-06, "loss": 0.5494, "step": 6034 }, { "epoch": 0.38, "grad_norm": 1.0449724197387695, "learning_rate": 7.082927532858985e-06, "loss": 0.5082, "step": 6035 }, { "epoch": 0.38, "grad_norm": 1.1143243312835693, "learning_rate": 7.0819947617088294e-06, "loss": 0.5323, "step": 6036 }, { "epoch": 0.38, "grad_norm": 1.0565412044525146, "learning_rate": 7.081061902893117e-06, "loss": 0.5603, "step": 6037 }, { "epoch": 0.38, "grad_norm": 1.1616407632827759, "learning_rate": 7.080128956451125e-06, "loss": 0.5288, "step": 6038 }, { "epoch": 0.38, "grad_norm": 1.060279369354248, "learning_rate": 7.079195922422143e-06, "loss": 0.534, "step": 6039 }, { "epoch": 0.38, "grad_norm": 1.040551781654358, "learning_rate": 7.078262800845453e-06, "loss": 0.5331, "step": 6040 }, { "epoch": 0.38, "grad_norm": 1.0278587341308594, "learning_rate": 7.0773295917603445e-06, "loss": 0.5345, "step": 6041 }, { "epoch": 0.38, "grad_norm": 0.9772768020629883, "learning_rate": 7.076396295206113e-06, "loss": 0.5115, "step": 6042 }, { "epoch": 0.38, "grad_norm": 1.0806363821029663, "learning_rate": 7.075462911222057e-06, "loss": 0.5101, "step": 6043 }, { "epoch": 0.38, "grad_norm": 1.0037871599197388, "learning_rate": 7.07452943984748e-06, "loss": 0.5258, "step": 6044 }, { "epoch": 0.38, "grad_norm": 0.9234239459037781, "learning_rate": 7.073595881121683e-06, "loss": 0.5169, "step": 6045 }, { "epoch": 0.38, "grad_norm": 1.0970360040664673, "learning_rate": 7.072662235083977e-06, "loss": 0.5895, "step": 6046 }, { "epoch": 0.38, "grad_norm": 1.041857123374939, "learning_rate": 7.071728501773675e-06, "loss": 0.555, "step": 6047 }, { "epoch": 0.38, "grad_norm": 1.0813242197036743, "learning_rate": 7.070794681230093e-06, "loss": 0.5576, "step": 6048 }, { "epoch": 0.38, "grad_norm": 1.0041872262954712, "learning_rate": 7.06986077349255e-06, "loss": 0.5532, "step": 6049 }, { "epoch": 0.38, "grad_norm": 0.91943359375, "learning_rate": 7.068926778600372e-06, "loss": 0.5303, "step": 6050 }, { "epoch": 0.38, "grad_norm": 0.9743295311927795, "learning_rate": 7.067992696592882e-06, "loss": 0.548, "step": 6051 }, { "epoch": 0.38, "grad_norm": 1.0182067155838013, "learning_rate": 7.067058527509416e-06, "loss": 0.5124, "step": 6052 }, { "epoch": 0.38, "grad_norm": 1.0509822368621826, "learning_rate": 7.066124271389305e-06, "loss": 0.483, "step": 6053 }, { "epoch": 0.38, "grad_norm": 0.9800784587860107, "learning_rate": 7.0651899282718896e-06, "loss": 0.578, "step": 6054 }, { "epoch": 0.38, "grad_norm": 1.1136858463287354, "learning_rate": 7.064255498196509e-06, "loss": 0.5482, "step": 6055 }, { "epoch": 0.38, "grad_norm": 1.0547521114349365, "learning_rate": 7.0633209812025116e-06, "loss": 0.5467, "step": 6056 }, { "epoch": 0.38, "grad_norm": 0.9685391187667847, "learning_rate": 7.062386377329245e-06, "loss": 0.5177, "step": 6057 }, { "epoch": 0.38, "grad_norm": 1.0092555284500122, "learning_rate": 7.061451686616062e-06, "loss": 0.5246, "step": 6058 }, { "epoch": 0.38, "grad_norm": 1.109516978263855, "learning_rate": 7.0605169091023205e-06, "loss": 0.5814, "step": 6059 }, { "epoch": 0.38, "grad_norm": 1.1080363988876343, "learning_rate": 7.05958204482738e-06, "loss": 0.5113, "step": 6060 }, { "epoch": 0.38, "grad_norm": 1.1008321046829224, "learning_rate": 7.058647093830604e-06, "loss": 0.5606, "step": 6061 }, { "epoch": 0.38, "grad_norm": 1.029272437095642, "learning_rate": 7.0577120561513604e-06, "loss": 0.5364, "step": 6062 }, { "epoch": 0.38, "grad_norm": 1.0312248468399048, "learning_rate": 7.056776931829021e-06, "loss": 0.488, "step": 6063 }, { "epoch": 0.38, "grad_norm": 1.0757166147232056, "learning_rate": 7.055841720902959e-06, "loss": 0.5122, "step": 6064 }, { "epoch": 0.38, "grad_norm": 1.0667598247528076, "learning_rate": 7.054906423412554e-06, "loss": 0.5817, "step": 6065 }, { "epoch": 0.38, "grad_norm": 1.0422972440719604, "learning_rate": 7.053971039397188e-06, "loss": 0.5785, "step": 6066 }, { "epoch": 0.38, "grad_norm": 1.0605396032333374, "learning_rate": 7.0530355688962484e-06, "loss": 0.5636, "step": 6067 }, { "epoch": 0.38, "grad_norm": 1.014585018157959, "learning_rate": 7.0521000119491215e-06, "loss": 0.5179, "step": 6068 }, { "epoch": 0.38, "grad_norm": 1.1167665719985962, "learning_rate": 7.0511643685952014e-06, "loss": 0.5565, "step": 6069 }, { "epoch": 0.38, "grad_norm": 1.0698355436325073, "learning_rate": 7.050228638873886e-06, "loss": 0.5141, "step": 6070 }, { "epoch": 0.38, "grad_norm": 1.0831199884414673, "learning_rate": 7.049292822824575e-06, "loss": 0.5896, "step": 6071 }, { "epoch": 0.38, "grad_norm": 1.045108437538147, "learning_rate": 7.048356920486672e-06, "loss": 0.558, "step": 6072 }, { "epoch": 0.38, "grad_norm": 1.0129737854003906, "learning_rate": 7.047420931899585e-06, "loss": 0.5345, "step": 6073 }, { "epoch": 0.38, "grad_norm": 1.0114727020263672, "learning_rate": 7.0464848571027246e-06, "loss": 0.5566, "step": 6074 }, { "epoch": 0.38, "grad_norm": 1.0448603630065918, "learning_rate": 7.045548696135506e-06, "loss": 0.5243, "step": 6075 }, { "epoch": 0.38, "grad_norm": 1.061661958694458, "learning_rate": 7.044612449037348e-06, "loss": 0.5884, "step": 6076 }, { "epoch": 0.39, "grad_norm": 1.001895546913147, "learning_rate": 7.0436761158476715e-06, "loss": 0.5432, "step": 6077 }, { "epoch": 0.39, "grad_norm": 1.007230281829834, "learning_rate": 7.042739696605905e-06, "loss": 0.5211, "step": 6078 }, { "epoch": 0.39, "grad_norm": 0.9304594993591309, "learning_rate": 7.041803191351475e-06, "loss": 0.5272, "step": 6079 }, { "epoch": 0.39, "grad_norm": 1.011073350906372, "learning_rate": 7.040866600123816e-06, "loss": 0.5678, "step": 6080 }, { "epoch": 0.39, "grad_norm": 1.0891047716140747, "learning_rate": 7.039929922962363e-06, "loss": 0.5529, "step": 6081 }, { "epoch": 0.39, "grad_norm": 1.0254418849945068, "learning_rate": 7.038993159906558e-06, "loss": 0.5572, "step": 6082 }, { "epoch": 0.39, "grad_norm": 0.9843958020210266, "learning_rate": 7.0380563109958445e-06, "loss": 0.5261, "step": 6083 }, { "epoch": 0.39, "grad_norm": 1.045296549797058, "learning_rate": 7.03711937626967e-06, "loss": 0.547, "step": 6084 }, { "epoch": 0.39, "grad_norm": 0.9561773538589478, "learning_rate": 7.036182355767485e-06, "loss": 0.515, "step": 6085 }, { "epoch": 0.39, "grad_norm": 0.9849733114242554, "learning_rate": 7.0352452495287435e-06, "loss": 0.5254, "step": 6086 }, { "epoch": 0.39, "grad_norm": 1.0817680358886719, "learning_rate": 7.034308057592907e-06, "loss": 0.4934, "step": 6087 }, { "epoch": 0.39, "grad_norm": 1.0080195665359497, "learning_rate": 7.033370779999431e-06, "loss": 0.5738, "step": 6088 }, { "epoch": 0.39, "grad_norm": 0.9855676293373108, "learning_rate": 7.032433416787788e-06, "loss": 0.5446, "step": 6089 }, { "epoch": 0.39, "grad_norm": 0.9518822431564331, "learning_rate": 7.031495967997444e-06, "loss": 0.4807, "step": 6090 }, { "epoch": 0.39, "grad_norm": 0.9908321499824524, "learning_rate": 7.0305584336678715e-06, "loss": 0.5243, "step": 6091 }, { "epoch": 0.39, "grad_norm": 0.9574987888336182, "learning_rate": 7.029620813838544e-06, "loss": 0.502, "step": 6092 }, { "epoch": 0.39, "grad_norm": 1.018323302268982, "learning_rate": 7.02868310854895e-06, "loss": 0.516, "step": 6093 }, { "epoch": 0.39, "grad_norm": 1.0103620290756226, "learning_rate": 7.027745317838564e-06, "loss": 0.5337, "step": 6094 }, { "epoch": 0.39, "grad_norm": 1.0037866830825806, "learning_rate": 7.026807441746879e-06, "loss": 0.5451, "step": 6095 }, { "epoch": 0.39, "grad_norm": 1.0321918725967407, "learning_rate": 7.025869480313381e-06, "loss": 0.5442, "step": 6096 }, { "epoch": 0.39, "grad_norm": 1.0174425840377808, "learning_rate": 7.0249314335775675e-06, "loss": 0.5187, "step": 6097 }, { "epoch": 0.39, "grad_norm": 0.9343465566635132, "learning_rate": 7.023993301578935e-06, "loss": 0.4886, "step": 6098 }, { "epoch": 0.39, "grad_norm": 1.0583616495132446, "learning_rate": 7.023055084356987e-06, "loss": 0.4964, "step": 6099 }, { "epoch": 0.39, "grad_norm": 1.017966866493225, "learning_rate": 7.022116781951226e-06, "loss": 0.5475, "step": 6100 }, { "epoch": 0.39, "grad_norm": 0.9800785779953003, "learning_rate": 7.021178394401162e-06, "loss": 0.5518, "step": 6101 }, { "epoch": 0.39, "grad_norm": 0.9927045702934265, "learning_rate": 7.020239921746308e-06, "loss": 0.5416, "step": 6102 }, { "epoch": 0.39, "grad_norm": 1.0396851301193237, "learning_rate": 7.019301364026178e-06, "loss": 0.5618, "step": 6103 }, { "epoch": 0.39, "grad_norm": 1.045152187347412, "learning_rate": 7.018362721280292e-06, "loss": 0.5206, "step": 6104 }, { "epoch": 0.39, "grad_norm": 1.0131950378417969, "learning_rate": 7.0174239935481735e-06, "loss": 0.5591, "step": 6105 }, { "epoch": 0.39, "grad_norm": 1.0114343166351318, "learning_rate": 7.016485180869349e-06, "loss": 0.5126, "step": 6106 }, { "epoch": 0.39, "grad_norm": 1.071434736251831, "learning_rate": 7.015546283283346e-06, "loss": 0.4994, "step": 6107 }, { "epoch": 0.39, "grad_norm": 1.11801278591156, "learning_rate": 7.014607300829703e-06, "loss": 0.5724, "step": 6108 }, { "epoch": 0.39, "grad_norm": 1.0889805555343628, "learning_rate": 7.013668233547955e-06, "loss": 0.5321, "step": 6109 }, { "epoch": 0.39, "grad_norm": 1.009221076965332, "learning_rate": 7.0127290814776424e-06, "loss": 0.5145, "step": 6110 }, { "epoch": 0.39, "grad_norm": 1.04290771484375, "learning_rate": 7.0117898446583084e-06, "loss": 0.5851, "step": 6111 }, { "epoch": 0.39, "grad_norm": 1.0475056171417236, "learning_rate": 7.010850523129504e-06, "loss": 0.5237, "step": 6112 }, { "epoch": 0.39, "grad_norm": 1.01203453540802, "learning_rate": 7.009911116930779e-06, "loss": 0.5497, "step": 6113 }, { "epoch": 0.39, "grad_norm": 1.1558852195739746, "learning_rate": 7.00897162610169e-06, "loss": 0.5615, "step": 6114 }, { "epoch": 0.39, "grad_norm": 1.0910730361938477, "learning_rate": 7.0080320506817926e-06, "loss": 0.5076, "step": 6115 }, { "epoch": 0.39, "grad_norm": 0.9866907000541687, "learning_rate": 7.007092390710652e-06, "loss": 0.525, "step": 6116 }, { "epoch": 0.39, "grad_norm": 0.9702544808387756, "learning_rate": 7.006152646227833e-06, "loss": 0.4603, "step": 6117 }, { "epoch": 0.39, "grad_norm": 0.9692726731300354, "learning_rate": 7.005212817272905e-06, "loss": 0.5746, "step": 6118 }, { "epoch": 0.39, "grad_norm": 1.0775811672210693, "learning_rate": 7.0042729038854405e-06, "loss": 0.5711, "step": 6119 }, { "epoch": 0.39, "grad_norm": 0.9793890118598938, "learning_rate": 7.003332906105016e-06, "loss": 0.5655, "step": 6120 }, { "epoch": 0.39, "grad_norm": 1.0329787731170654, "learning_rate": 7.002392823971214e-06, "loss": 0.5754, "step": 6121 }, { "epoch": 0.39, "grad_norm": 0.9337785840034485, "learning_rate": 7.001452657523614e-06, "loss": 0.4502, "step": 6122 }, { "epoch": 0.39, "grad_norm": 1.0856430530548096, "learning_rate": 7.000512406801805e-06, "loss": 0.5455, "step": 6123 }, { "epoch": 0.39, "grad_norm": 0.9802913069725037, "learning_rate": 6.9995720718453786e-06, "loss": 0.518, "step": 6124 }, { "epoch": 0.39, "grad_norm": 1.1211453676223755, "learning_rate": 6.998631652693928e-06, "loss": 0.5777, "step": 6125 }, { "epoch": 0.39, "grad_norm": 0.989659309387207, "learning_rate": 6.997691149387052e-06, "loss": 0.5355, "step": 6126 }, { "epoch": 0.39, "grad_norm": 0.9833707809448242, "learning_rate": 6.99675056196435e-06, "loss": 0.528, "step": 6127 }, { "epoch": 0.39, "grad_norm": 0.9955346584320068, "learning_rate": 6.995809890465428e-06, "loss": 0.5341, "step": 6128 }, { "epoch": 0.39, "grad_norm": 0.9848581552505493, "learning_rate": 6.994869134929895e-06, "loss": 0.56, "step": 6129 }, { "epoch": 0.39, "grad_norm": 1.0137369632720947, "learning_rate": 6.993928295397363e-06, "loss": 0.5487, "step": 6130 }, { "epoch": 0.39, "grad_norm": 1.0635900497436523, "learning_rate": 6.992987371907446e-06, "loss": 0.5587, "step": 6131 }, { "epoch": 0.39, "grad_norm": 0.9602803587913513, "learning_rate": 6.992046364499764e-06, "loss": 0.5159, "step": 6132 }, { "epoch": 0.39, "grad_norm": 0.9842841029167175, "learning_rate": 6.991105273213939e-06, "loss": 0.491, "step": 6133 }, { "epoch": 0.39, "grad_norm": 1.0112354755401611, "learning_rate": 6.990164098089598e-06, "loss": 0.539, "step": 6134 }, { "epoch": 0.39, "grad_norm": 0.9850631356239319, "learning_rate": 6.9892228391663694e-06, "loss": 0.5184, "step": 6135 }, { "epoch": 0.39, "grad_norm": 0.9878668189048767, "learning_rate": 6.988281496483888e-06, "loss": 0.5472, "step": 6136 }, { "epoch": 0.39, "grad_norm": 1.0139793157577515, "learning_rate": 6.987340070081789e-06, "loss": 0.5217, "step": 6137 }, { "epoch": 0.39, "grad_norm": 1.041130781173706, "learning_rate": 6.986398559999714e-06, "loss": 0.5372, "step": 6138 }, { "epoch": 0.39, "grad_norm": 0.9763038158416748, "learning_rate": 6.9854569662773044e-06, "loss": 0.4826, "step": 6139 }, { "epoch": 0.39, "grad_norm": 1.0625923871994019, "learning_rate": 6.984515288954211e-06, "loss": 0.5403, "step": 6140 }, { "epoch": 0.39, "grad_norm": 1.0266550779342651, "learning_rate": 6.98357352807008e-06, "loss": 0.4992, "step": 6141 }, { "epoch": 0.39, "grad_norm": 0.9099301099777222, "learning_rate": 6.982631683664569e-06, "loss": 0.4971, "step": 6142 }, { "epoch": 0.39, "grad_norm": 0.9947066903114319, "learning_rate": 6.981689755777335e-06, "loss": 0.48, "step": 6143 }, { "epoch": 0.39, "grad_norm": 0.9713681936264038, "learning_rate": 6.98074774444804e-06, "loss": 0.4948, "step": 6144 }, { "epoch": 0.39, "grad_norm": 0.9177002310752869, "learning_rate": 6.979805649716347e-06, "loss": 0.4484, "step": 6145 }, { "epoch": 0.39, "grad_norm": 1.0028992891311646, "learning_rate": 6.978863471621925e-06, "loss": 0.6121, "step": 6146 }, { "epoch": 0.39, "grad_norm": 1.065828561782837, "learning_rate": 6.977921210204446e-06, "loss": 0.5806, "step": 6147 }, { "epoch": 0.39, "grad_norm": 0.9567848443984985, "learning_rate": 6.9769788655035875e-06, "loss": 0.5258, "step": 6148 }, { "epoch": 0.39, "grad_norm": 1.0322932004928589, "learning_rate": 6.976036437559024e-06, "loss": 0.6068, "step": 6149 }, { "epoch": 0.39, "grad_norm": 0.9327954053878784, "learning_rate": 6.975093926410441e-06, "loss": 0.5294, "step": 6150 }, { "epoch": 0.39, "grad_norm": 1.0788025856018066, "learning_rate": 6.974151332097525e-06, "loss": 0.5618, "step": 6151 }, { "epoch": 0.39, "grad_norm": 1.0738791227340698, "learning_rate": 6.973208654659962e-06, "loss": 0.5739, "step": 6152 }, { "epoch": 0.39, "grad_norm": 1.1076157093048096, "learning_rate": 6.9722658941374475e-06, "loss": 0.5123, "step": 6153 }, { "epoch": 0.39, "grad_norm": 0.8980148434638977, "learning_rate": 6.971323050569677e-06, "loss": 0.5308, "step": 6154 }, { "epoch": 0.39, "grad_norm": 0.9834483861923218, "learning_rate": 6.970380123996352e-06, "loss": 0.5193, "step": 6155 }, { "epoch": 0.39, "grad_norm": 1.1332507133483887, "learning_rate": 6.969437114457174e-06, "loss": 0.5675, "step": 6156 }, { "epoch": 0.39, "grad_norm": 0.9124565124511719, "learning_rate": 6.968494021991848e-06, "loss": 0.5158, "step": 6157 }, { "epoch": 0.39, "grad_norm": 1.0040991306304932, "learning_rate": 6.967550846640089e-06, "loss": 0.4864, "step": 6158 }, { "epoch": 0.39, "grad_norm": 1.040360689163208, "learning_rate": 6.966607588441609e-06, "loss": 0.5617, "step": 6159 }, { "epoch": 0.39, "grad_norm": 0.9898555278778076, "learning_rate": 6.9656642474361225e-06, "loss": 0.4797, "step": 6160 }, { "epoch": 0.39, "grad_norm": 0.9709599614143372, "learning_rate": 6.964720823663353e-06, "loss": 0.5402, "step": 6161 }, { "epoch": 0.39, "grad_norm": 1.0226396322250366, "learning_rate": 6.963777317163025e-06, "loss": 0.5439, "step": 6162 }, { "epoch": 0.39, "grad_norm": 0.963808000087738, "learning_rate": 6.962833727974867e-06, "loss": 0.5229, "step": 6163 }, { "epoch": 0.39, "grad_norm": 1.0142179727554321, "learning_rate": 6.961890056138607e-06, "loss": 0.5408, "step": 6164 }, { "epoch": 0.39, "grad_norm": 1.0173602104187012, "learning_rate": 6.9609463016939816e-06, "loss": 0.5459, "step": 6165 }, { "epoch": 0.39, "grad_norm": 0.948390781879425, "learning_rate": 6.960002464680731e-06, "loss": 0.4954, "step": 6166 }, { "epoch": 0.39, "grad_norm": 0.9785526990890503, "learning_rate": 6.959058545138593e-06, "loss": 0.512, "step": 6167 }, { "epoch": 0.39, "grad_norm": 1.1291639804840088, "learning_rate": 6.958114543107315e-06, "loss": 0.5671, "step": 6168 }, { "epoch": 0.39, "grad_norm": 0.9646851420402527, "learning_rate": 6.957170458626645e-06, "loss": 0.4931, "step": 6169 }, { "epoch": 0.39, "grad_norm": 1.0027590990066528, "learning_rate": 6.956226291736338e-06, "loss": 0.5538, "step": 6170 }, { "epoch": 0.39, "grad_norm": 1.0033183097839355, "learning_rate": 6.955282042476144e-06, "loss": 0.5133, "step": 6171 }, { "epoch": 0.39, "grad_norm": 1.014385461807251, "learning_rate": 6.9543377108858265e-06, "loss": 0.5415, "step": 6172 }, { "epoch": 0.39, "grad_norm": 1.0565842390060425, "learning_rate": 6.9533932970051465e-06, "loss": 0.5239, "step": 6173 }, { "epoch": 0.39, "grad_norm": 1.0331782102584839, "learning_rate": 6.952448800873871e-06, "loss": 0.5959, "step": 6174 }, { "epoch": 0.39, "grad_norm": 1.0689690113067627, "learning_rate": 6.951504222531768e-06, "loss": 0.5552, "step": 6175 }, { "epoch": 0.39, "grad_norm": 0.9959222078323364, "learning_rate": 6.950559562018611e-06, "loss": 0.5421, "step": 6176 }, { "epoch": 0.39, "grad_norm": 0.9821951985359192, "learning_rate": 6.949614819374175e-06, "loss": 0.5326, "step": 6177 }, { "epoch": 0.39, "grad_norm": 1.0054478645324707, "learning_rate": 6.948669994638243e-06, "loss": 0.543, "step": 6178 }, { "epoch": 0.39, "grad_norm": 0.9514632821083069, "learning_rate": 6.947725087850595e-06, "loss": 0.4761, "step": 6179 }, { "epoch": 0.39, "grad_norm": 1.0094581842422485, "learning_rate": 6.94678009905102e-06, "loss": 0.5214, "step": 6180 }, { "epoch": 0.39, "grad_norm": 1.0191506147384644, "learning_rate": 6.945835028279308e-06, "loss": 0.5829, "step": 6181 }, { "epoch": 0.39, "grad_norm": 1.0834375619888306, "learning_rate": 6.944889875575251e-06, "loss": 0.5345, "step": 6182 }, { "epoch": 0.39, "grad_norm": 0.9999750852584839, "learning_rate": 6.943944640978648e-06, "loss": 0.5565, "step": 6183 }, { "epoch": 0.39, "grad_norm": 1.0202056169509888, "learning_rate": 6.942999324529297e-06, "loss": 0.5142, "step": 6184 }, { "epoch": 0.39, "grad_norm": 1.000248908996582, "learning_rate": 6.942053926267005e-06, "loss": 0.5268, "step": 6185 }, { "epoch": 0.39, "grad_norm": 1.0070136785507202, "learning_rate": 6.941108446231578e-06, "loss": 0.533, "step": 6186 }, { "epoch": 0.39, "grad_norm": 0.9712101221084595, "learning_rate": 6.940162884462828e-06, "loss": 0.5313, "step": 6187 }, { "epoch": 0.39, "grad_norm": 0.9556286334991455, "learning_rate": 6.9392172410005656e-06, "loss": 0.5484, "step": 6188 }, { "epoch": 0.39, "grad_norm": 1.0260111093521118, "learning_rate": 6.9382715158846135e-06, "loss": 0.5298, "step": 6189 }, { "epoch": 0.39, "grad_norm": 1.06009840965271, "learning_rate": 6.93732570915479e-06, "loss": 0.5274, "step": 6190 }, { "epoch": 0.39, "grad_norm": 1.088383674621582, "learning_rate": 6.93637982085092e-06, "loss": 0.5408, "step": 6191 }, { "epoch": 0.39, "grad_norm": 1.039387583732605, "learning_rate": 6.9354338510128315e-06, "loss": 0.5236, "step": 6192 }, { "epoch": 0.39, "grad_norm": 1.0569809675216675, "learning_rate": 6.934487799680357e-06, "loss": 0.5541, "step": 6193 }, { "epoch": 0.39, "grad_norm": 0.9675583243370056, "learning_rate": 6.933541666893331e-06, "loss": 0.5471, "step": 6194 }, { "epoch": 0.39, "grad_norm": 1.0061856508255005, "learning_rate": 6.932595452691592e-06, "loss": 0.5677, "step": 6195 }, { "epoch": 0.39, "grad_norm": 0.9990823268890381, "learning_rate": 6.9316491571149815e-06, "loss": 0.5449, "step": 6196 }, { "epoch": 0.39, "grad_norm": 1.0470032691955566, "learning_rate": 6.930702780203344e-06, "loss": 0.5731, "step": 6197 }, { "epoch": 0.39, "grad_norm": 1.2246379852294922, "learning_rate": 6.929756321996529e-06, "loss": 0.5755, "step": 6198 }, { "epoch": 0.39, "grad_norm": 1.0149028301239014, "learning_rate": 6.928809782534388e-06, "loss": 0.545, "step": 6199 }, { "epoch": 0.39, "grad_norm": 1.0562412738800049, "learning_rate": 6.927863161856778e-06, "loss": 0.5309, "step": 6200 }, { "epoch": 0.39, "grad_norm": 1.009595274925232, "learning_rate": 6.9269164600035555e-06, "loss": 0.5182, "step": 6201 }, { "epoch": 0.39, "grad_norm": 1.0458571910858154, "learning_rate": 6.925969677014585e-06, "loss": 0.5708, "step": 6202 }, { "epoch": 0.39, "grad_norm": 1.0821754932403564, "learning_rate": 6.92502281292973e-06, "loss": 0.5318, "step": 6203 }, { "epoch": 0.39, "grad_norm": 0.9944037795066833, "learning_rate": 6.924075867788863e-06, "loss": 0.5421, "step": 6204 }, { "epoch": 0.39, "grad_norm": 1.0907917022705078, "learning_rate": 6.923128841631854e-06, "loss": 0.4908, "step": 6205 }, { "epoch": 0.39, "grad_norm": 1.0934239625930786, "learning_rate": 6.92218173449858e-06, "loss": 0.5125, "step": 6206 }, { "epoch": 0.39, "grad_norm": 1.0188615322113037, "learning_rate": 6.921234546428918e-06, "loss": 0.5283, "step": 6207 }, { "epoch": 0.39, "grad_norm": 0.9371731281280518, "learning_rate": 6.920287277462755e-06, "loss": 0.5093, "step": 6208 }, { "epoch": 0.39, "grad_norm": 0.9355340003967285, "learning_rate": 6.9193399276399745e-06, "loss": 0.4871, "step": 6209 }, { "epoch": 0.39, "grad_norm": 1.0003212690353394, "learning_rate": 6.918392497000466e-06, "loss": 0.554, "step": 6210 }, { "epoch": 0.39, "grad_norm": 0.91237872838974, "learning_rate": 6.917444985584122e-06, "loss": 0.4917, "step": 6211 }, { "epoch": 0.39, "grad_norm": 1.011143445968628, "learning_rate": 6.916497393430841e-06, "loss": 0.5489, "step": 6212 }, { "epoch": 0.39, "grad_norm": 0.9575095772743225, "learning_rate": 6.915549720580523e-06, "loss": 0.5222, "step": 6213 }, { "epoch": 0.39, "grad_norm": 1.0624295473098755, "learning_rate": 6.914601967073068e-06, "loss": 0.4989, "step": 6214 }, { "epoch": 0.39, "grad_norm": 1.0963515043258667, "learning_rate": 6.913654132948385e-06, "loss": 0.5305, "step": 6215 }, { "epoch": 0.39, "grad_norm": 0.9703376293182373, "learning_rate": 6.912706218246384e-06, "loss": 0.481, "step": 6216 }, { "epoch": 0.39, "grad_norm": 0.9639180898666382, "learning_rate": 6.911758223006979e-06, "loss": 0.5314, "step": 6217 }, { "epoch": 0.39, "grad_norm": 1.0230478048324585, "learning_rate": 6.910810147270084e-06, "loss": 0.4934, "step": 6218 }, { "epoch": 0.39, "grad_norm": 1.0991566181182861, "learning_rate": 6.909861991075622e-06, "loss": 0.5647, "step": 6219 }, { "epoch": 0.39, "grad_norm": 1.0460660457611084, "learning_rate": 6.908913754463514e-06, "loss": 0.5524, "step": 6220 }, { "epoch": 0.39, "grad_norm": 1.0755608081817627, "learning_rate": 6.90796543747369e-06, "loss": 0.5864, "step": 6221 }, { "epoch": 0.39, "grad_norm": 1.017244815826416, "learning_rate": 6.907017040146078e-06, "loss": 0.5585, "step": 6222 }, { "epoch": 0.39, "grad_norm": 1.0522984266281128, "learning_rate": 6.906068562520613e-06, "loss": 0.5338, "step": 6223 }, { "epoch": 0.39, "grad_norm": 0.9456668496131897, "learning_rate": 6.905120004637232e-06, "loss": 0.4864, "step": 6224 }, { "epoch": 0.39, "grad_norm": 1.0045950412750244, "learning_rate": 6.904171366535873e-06, "loss": 0.5382, "step": 6225 }, { "epoch": 0.39, "grad_norm": 0.9706365466117859, "learning_rate": 6.9032226482564835e-06, "loss": 0.5119, "step": 6226 }, { "epoch": 0.39, "grad_norm": 1.0931531190872192, "learning_rate": 6.9022738498390084e-06, "loss": 0.5717, "step": 6227 }, { "epoch": 0.39, "grad_norm": 1.0188103914260864, "learning_rate": 6.9013249713234e-06, "loss": 0.5502, "step": 6228 }, { "epoch": 0.39, "grad_norm": 0.9862955212593079, "learning_rate": 6.900376012749611e-06, "loss": 0.5273, "step": 6229 }, { "epoch": 0.39, "grad_norm": 1.095741629600525, "learning_rate": 6.899426974157598e-06, "loss": 0.545, "step": 6230 }, { "epoch": 0.39, "grad_norm": 0.9772562384605408, "learning_rate": 6.898477855587323e-06, "loss": 0.5392, "step": 6231 }, { "epoch": 0.39, "grad_norm": 1.002846360206604, "learning_rate": 6.897528657078752e-06, "loss": 0.5283, "step": 6232 }, { "epoch": 0.39, "grad_norm": 1.0799317359924316, "learning_rate": 6.8965793786718484e-06, "loss": 0.5303, "step": 6233 }, { "epoch": 0.39, "grad_norm": 0.9295328259468079, "learning_rate": 6.895630020406584e-06, "loss": 0.5266, "step": 6234 }, { "epoch": 0.4, "grad_norm": 1.0229262113571167, "learning_rate": 6.894680582322934e-06, "loss": 0.5467, "step": 6235 }, { "epoch": 0.4, "grad_norm": 0.9982120990753174, "learning_rate": 6.893731064460878e-06, "loss": 0.5524, "step": 6236 }, { "epoch": 0.4, "grad_norm": 1.0514452457427979, "learning_rate": 6.892781466860393e-06, "loss": 0.5273, "step": 6237 }, { "epoch": 0.4, "grad_norm": 0.9818599820137024, "learning_rate": 6.891831789561465e-06, "loss": 0.545, "step": 6238 }, { "epoch": 0.4, "grad_norm": 1.0682156085968018, "learning_rate": 6.8908820326040815e-06, "loss": 0.5374, "step": 6239 }, { "epoch": 0.4, "grad_norm": 0.98082035779953, "learning_rate": 6.889932196028235e-06, "loss": 0.5561, "step": 6240 }, { "epoch": 0.4, "grad_norm": 1.0529831647872925, "learning_rate": 6.888982279873917e-06, "loss": 0.5152, "step": 6241 }, { "epoch": 0.4, "grad_norm": 1.096386432647705, "learning_rate": 6.888032284181127e-06, "loss": 0.514, "step": 6242 }, { "epoch": 0.4, "grad_norm": 1.1044929027557373, "learning_rate": 6.887082208989865e-06, "loss": 0.5502, "step": 6243 }, { "epoch": 0.4, "grad_norm": 1.0671653747558594, "learning_rate": 6.886132054340136e-06, "loss": 0.5492, "step": 6244 }, { "epoch": 0.4, "grad_norm": 0.9476322531700134, "learning_rate": 6.885181820271947e-06, "loss": 0.5347, "step": 6245 }, { "epoch": 0.4, "grad_norm": 1.03954017162323, "learning_rate": 6.88423150682531e-06, "loss": 0.5653, "step": 6246 }, { "epoch": 0.4, "grad_norm": 1.0172086954116821, "learning_rate": 6.88328111404024e-06, "loss": 0.502, "step": 6247 }, { "epoch": 0.4, "grad_norm": 1.0906752347946167, "learning_rate": 6.882330641956752e-06, "loss": 0.5648, "step": 6248 }, { "epoch": 0.4, "grad_norm": 0.9495219588279724, "learning_rate": 6.881380090614871e-06, "loss": 0.5056, "step": 6249 }, { "epoch": 0.4, "grad_norm": 1.050155520439148, "learning_rate": 6.8804294600546175e-06, "loss": 0.5382, "step": 6250 }, { "epoch": 0.4, "grad_norm": 1.0673195123672485, "learning_rate": 6.879478750316022e-06, "loss": 0.5611, "step": 6251 }, { "epoch": 0.4, "grad_norm": 1.0704740285873413, "learning_rate": 6.878527961439113e-06, "loss": 0.5519, "step": 6252 }, { "epoch": 0.4, "grad_norm": 1.04393470287323, "learning_rate": 6.877577093463927e-06, "loss": 0.5399, "step": 6253 }, { "epoch": 0.4, "grad_norm": 1.0113558769226074, "learning_rate": 6.876626146430502e-06, "loss": 0.5227, "step": 6254 }, { "epoch": 0.4, "grad_norm": 1.0777956247329712, "learning_rate": 6.875675120378878e-06, "loss": 0.5607, "step": 6255 }, { "epoch": 0.4, "grad_norm": 0.9694634675979614, "learning_rate": 6.8747240153491e-06, "loss": 0.5241, "step": 6256 }, { "epoch": 0.4, "grad_norm": 0.9827180504798889, "learning_rate": 6.873772831381214e-06, "loss": 0.5364, "step": 6257 }, { "epoch": 0.4, "grad_norm": 1.1433156728744507, "learning_rate": 6.872821568515275e-06, "loss": 0.592, "step": 6258 }, { "epoch": 0.4, "grad_norm": 1.0650519132614136, "learning_rate": 6.8718702267913325e-06, "loss": 0.5413, "step": 6259 }, { "epoch": 0.4, "grad_norm": 1.0455180406570435, "learning_rate": 6.870918806249449e-06, "loss": 0.546, "step": 6260 }, { "epoch": 0.4, "grad_norm": 0.9944682717323303, "learning_rate": 6.8699673069296806e-06, "loss": 0.5242, "step": 6261 }, { "epoch": 0.4, "grad_norm": 0.9604764580726624, "learning_rate": 6.869015728872095e-06, "loss": 0.5627, "step": 6262 }, { "epoch": 0.4, "grad_norm": 1.064626932144165, "learning_rate": 6.868064072116758e-06, "loss": 0.5855, "step": 6263 }, { "epoch": 0.4, "grad_norm": 0.9918391704559326, "learning_rate": 6.867112336703743e-06, "loss": 0.5058, "step": 6264 }, { "epoch": 0.4, "grad_norm": 1.035409927368164, "learning_rate": 6.866160522673121e-06, "loss": 0.5655, "step": 6265 }, { "epoch": 0.4, "grad_norm": 0.93015056848526, "learning_rate": 6.865208630064973e-06, "loss": 0.5595, "step": 6266 }, { "epoch": 0.4, "grad_norm": 1.0266493558883667, "learning_rate": 6.864256658919377e-06, "loss": 0.4955, "step": 6267 }, { "epoch": 0.4, "grad_norm": 1.025432825088501, "learning_rate": 6.8633046092764174e-06, "loss": 0.5654, "step": 6268 }, { "epoch": 0.4, "grad_norm": 1.0468451976776123, "learning_rate": 6.862352481176184e-06, "loss": 0.5787, "step": 6269 }, { "epoch": 0.4, "grad_norm": 1.1604524850845337, "learning_rate": 6.861400274658767e-06, "loss": 0.5261, "step": 6270 }, { "epoch": 0.4, "grad_norm": 1.0462700128555298, "learning_rate": 6.860447989764259e-06, "loss": 0.5194, "step": 6271 }, { "epoch": 0.4, "grad_norm": 1.015401840209961, "learning_rate": 6.8594956265327585e-06, "loss": 0.5136, "step": 6272 }, { "epoch": 0.4, "grad_norm": 1.0995614528656006, "learning_rate": 6.858543185004365e-06, "loss": 0.5547, "step": 6273 }, { "epoch": 0.4, "grad_norm": 1.0032482147216797, "learning_rate": 6.857590665219185e-06, "loss": 0.5629, "step": 6274 }, { "epoch": 0.4, "grad_norm": 0.9372909665107727, "learning_rate": 6.856638067217324e-06, "loss": 0.4823, "step": 6275 }, { "epoch": 0.4, "grad_norm": 1.068374752998352, "learning_rate": 6.85568539103889e-06, "loss": 0.5269, "step": 6276 }, { "epoch": 0.4, "grad_norm": 1.093066692352295, "learning_rate": 6.854732636724002e-06, "loss": 0.5849, "step": 6277 }, { "epoch": 0.4, "grad_norm": 0.9396721720695496, "learning_rate": 6.853779804312775e-06, "loss": 0.4918, "step": 6278 }, { "epoch": 0.4, "grad_norm": 1.1168854236602783, "learning_rate": 6.8528268938453295e-06, "loss": 0.4996, "step": 6279 }, { "epoch": 0.4, "grad_norm": 1.0356525182724, "learning_rate": 6.851873905361786e-06, "loss": 0.5363, "step": 6280 }, { "epoch": 0.4, "grad_norm": 1.0906376838684082, "learning_rate": 6.850920838902278e-06, "loss": 0.5743, "step": 6281 }, { "epoch": 0.4, "grad_norm": 1.061261773109436, "learning_rate": 6.84996769450693e-06, "loss": 0.4891, "step": 6282 }, { "epoch": 0.4, "grad_norm": 1.131471872329712, "learning_rate": 6.84901447221588e-06, "loss": 0.5819, "step": 6283 }, { "epoch": 0.4, "grad_norm": 1.0375972986221313, "learning_rate": 6.84806117206926e-06, "loss": 0.5819, "step": 6284 }, { "epoch": 0.4, "grad_norm": 1.0283234119415283, "learning_rate": 6.847107794107216e-06, "loss": 0.4991, "step": 6285 }, { "epoch": 0.4, "grad_norm": 1.0950448513031006, "learning_rate": 6.846154338369887e-06, "loss": 0.5785, "step": 6286 }, { "epoch": 0.4, "grad_norm": 0.9550922513008118, "learning_rate": 6.845200804897421e-06, "loss": 0.529, "step": 6287 }, { "epoch": 0.4, "grad_norm": 1.0447999238967896, "learning_rate": 6.844247193729968e-06, "loss": 0.5736, "step": 6288 }, { "epoch": 0.4, "grad_norm": 0.9329219460487366, "learning_rate": 6.843293504907682e-06, "loss": 0.482, "step": 6289 }, { "epoch": 0.4, "grad_norm": 1.013639211654663, "learning_rate": 6.84233973847072e-06, "loss": 0.5383, "step": 6290 }, { "epoch": 0.4, "grad_norm": 1.048012375831604, "learning_rate": 6.8413858944592385e-06, "loss": 0.5864, "step": 6291 }, { "epoch": 0.4, "grad_norm": 1.0305635929107666, "learning_rate": 6.840431972913404e-06, "loss": 0.5534, "step": 6292 }, { "epoch": 0.4, "grad_norm": 1.0479077100753784, "learning_rate": 6.83947797387338e-06, "loss": 0.5109, "step": 6293 }, { "epoch": 0.4, "grad_norm": 0.9554098844528198, "learning_rate": 6.838523897379339e-06, "loss": 0.5521, "step": 6294 }, { "epoch": 0.4, "grad_norm": 0.9382658004760742, "learning_rate": 6.837569743471451e-06, "loss": 0.5413, "step": 6295 }, { "epoch": 0.4, "grad_norm": 1.0406569242477417, "learning_rate": 6.836615512189895e-06, "loss": 0.4994, "step": 6296 }, { "epoch": 0.4, "grad_norm": 1.0103338956832886, "learning_rate": 6.835661203574848e-06, "loss": 0.5333, "step": 6297 }, { "epoch": 0.4, "grad_norm": 1.0147366523742676, "learning_rate": 6.834706817666495e-06, "loss": 0.5447, "step": 6298 }, { "epoch": 0.4, "grad_norm": 0.9934282302856445, "learning_rate": 6.833752354505019e-06, "loss": 0.5232, "step": 6299 }, { "epoch": 0.4, "grad_norm": 1.0535008907318115, "learning_rate": 6.832797814130611e-06, "loss": 0.5237, "step": 6300 }, { "epoch": 0.4, "grad_norm": 0.9556391835212708, "learning_rate": 6.831843196583462e-06, "loss": 0.5341, "step": 6301 }, { "epoch": 0.4, "grad_norm": 0.9664854407310486, "learning_rate": 6.8308885019037695e-06, "loss": 0.524, "step": 6302 }, { "epoch": 0.4, "grad_norm": 1.0064958333969116, "learning_rate": 6.82993373013173e-06, "loss": 0.5478, "step": 6303 }, { "epoch": 0.4, "grad_norm": 1.0353161096572876, "learning_rate": 6.8289788813075485e-06, "loss": 0.5263, "step": 6304 }, { "epoch": 0.4, "grad_norm": 1.0607702732086182, "learning_rate": 6.82802395547143e-06, "loss": 0.5079, "step": 6305 }, { "epoch": 0.4, "grad_norm": 0.9825518131256104, "learning_rate": 6.82706895266358e-06, "loss": 0.4533, "step": 6306 }, { "epoch": 0.4, "grad_norm": 1.044676423072815, "learning_rate": 6.826113872924213e-06, "loss": 0.5691, "step": 6307 }, { "epoch": 0.4, "grad_norm": 1.0349632501602173, "learning_rate": 6.825158716293543e-06, "loss": 0.5567, "step": 6308 }, { "epoch": 0.4, "grad_norm": 0.9872351884841919, "learning_rate": 6.824203482811788e-06, "loss": 0.5289, "step": 6309 }, { "epoch": 0.4, "grad_norm": 0.9518697261810303, "learning_rate": 6.823248172519173e-06, "loss": 0.5069, "step": 6310 }, { "epoch": 0.4, "grad_norm": 1.042443871498108, "learning_rate": 6.8222927854559175e-06, "loss": 0.5468, "step": 6311 }, { "epoch": 0.4, "grad_norm": 0.993451714515686, "learning_rate": 6.8213373216622514e-06, "loss": 0.5224, "step": 6312 }, { "epoch": 0.4, "grad_norm": 1.061415672302246, "learning_rate": 6.820381781178409e-06, "loss": 0.5395, "step": 6313 }, { "epoch": 0.4, "grad_norm": 1.051388144493103, "learning_rate": 6.819426164044622e-06, "loss": 0.5393, "step": 6314 }, { "epoch": 0.4, "grad_norm": 1.0344637632369995, "learning_rate": 6.818470470301128e-06, "loss": 0.5154, "step": 6315 }, { "epoch": 0.4, "grad_norm": 1.0458735227584839, "learning_rate": 6.817514699988168e-06, "loss": 0.5319, "step": 6316 }, { "epoch": 0.4, "grad_norm": 1.0880186557769775, "learning_rate": 6.8165588531459885e-06, "loss": 0.5491, "step": 6317 }, { "epoch": 0.4, "grad_norm": 1.0239330530166626, "learning_rate": 6.815602929814833e-06, "loss": 0.5305, "step": 6318 }, { "epoch": 0.4, "grad_norm": 1.0305886268615723, "learning_rate": 6.814646930034954e-06, "loss": 0.58, "step": 6319 }, { "epoch": 0.4, "grad_norm": 0.9396848082542419, "learning_rate": 6.813690853846606e-06, "loss": 0.5338, "step": 6320 }, { "epoch": 0.4, "grad_norm": 1.0301604270935059, "learning_rate": 6.8127347012900465e-06, "loss": 0.5256, "step": 6321 }, { "epoch": 0.4, "grad_norm": 1.0537376403808594, "learning_rate": 6.811778472405534e-06, "loss": 0.5524, "step": 6322 }, { "epoch": 0.4, "grad_norm": 0.9582347273826599, "learning_rate": 6.810822167233333e-06, "loss": 0.5207, "step": 6323 }, { "epoch": 0.4, "grad_norm": 1.0056788921356201, "learning_rate": 6.80986578581371e-06, "loss": 0.5508, "step": 6324 }, { "epoch": 0.4, "grad_norm": 1.061269760131836, "learning_rate": 6.808909328186934e-06, "loss": 0.5671, "step": 6325 }, { "epoch": 0.4, "grad_norm": 0.9477720260620117, "learning_rate": 6.80795279439328e-06, "loss": 0.4916, "step": 6326 }, { "epoch": 0.4, "grad_norm": 1.0195330381393433, "learning_rate": 6.806996184473023e-06, "loss": 0.5624, "step": 6327 }, { "epoch": 0.4, "grad_norm": 1.0175155401229858, "learning_rate": 6.806039498466444e-06, "loss": 0.5314, "step": 6328 }, { "epoch": 0.4, "grad_norm": 0.9943006038665771, "learning_rate": 6.805082736413822e-06, "loss": 0.562, "step": 6329 }, { "epoch": 0.4, "grad_norm": 1.1058787107467651, "learning_rate": 6.804125898355447e-06, "loss": 0.5336, "step": 6330 }, { "epoch": 0.4, "grad_norm": 0.9576951265335083, "learning_rate": 6.8031689843316054e-06, "loss": 0.4966, "step": 6331 }, { "epoch": 0.4, "grad_norm": 0.948913037776947, "learning_rate": 6.802211994382591e-06, "loss": 0.5148, "step": 6332 }, { "epoch": 0.4, "grad_norm": 0.9891049265861511, "learning_rate": 6.8012549285487e-06, "loss": 0.5436, "step": 6333 }, { "epoch": 0.4, "grad_norm": 1.0424014329910278, "learning_rate": 6.800297786870228e-06, "loss": 0.5378, "step": 6334 }, { "epoch": 0.4, "grad_norm": 0.9966731071472168, "learning_rate": 6.799340569387481e-06, "loss": 0.4889, "step": 6335 }, { "epoch": 0.4, "grad_norm": 1.0427660942077637, "learning_rate": 6.798383276140761e-06, "loss": 0.5412, "step": 6336 }, { "epoch": 0.4, "grad_norm": 1.0151562690734863, "learning_rate": 6.797425907170378e-06, "loss": 0.5129, "step": 6337 }, { "epoch": 0.4, "grad_norm": 0.9394248723983765, "learning_rate": 6.796468462516642e-06, "loss": 0.5167, "step": 6338 }, { "epoch": 0.4, "grad_norm": 1.140164852142334, "learning_rate": 6.79551094221987e-06, "loss": 0.5403, "step": 6339 }, { "epoch": 0.4, "grad_norm": 1.0196154117584229, "learning_rate": 6.794553346320376e-06, "loss": 0.4968, "step": 6340 }, { "epoch": 0.4, "grad_norm": 0.9896392226219177, "learning_rate": 6.7935956748584855e-06, "loss": 0.4752, "step": 6341 }, { "epoch": 0.4, "grad_norm": 0.9792375564575195, "learning_rate": 6.792637927874519e-06, "loss": 0.5354, "step": 6342 }, { "epoch": 0.4, "grad_norm": 0.9903654456138611, "learning_rate": 6.791680105408807e-06, "loss": 0.5889, "step": 6343 }, { "epoch": 0.4, "grad_norm": 1.0461080074310303, "learning_rate": 6.790722207501678e-06, "loss": 0.5864, "step": 6344 }, { "epoch": 0.4, "grad_norm": 1.0412373542785645, "learning_rate": 6.789764234193465e-06, "loss": 0.5023, "step": 6345 }, { "epoch": 0.4, "grad_norm": 1.0502227544784546, "learning_rate": 6.788806185524508e-06, "loss": 0.566, "step": 6346 }, { "epoch": 0.4, "grad_norm": 1.0583373308181763, "learning_rate": 6.787848061535145e-06, "loss": 0.5976, "step": 6347 }, { "epoch": 0.4, "grad_norm": 1.0030248165130615, "learning_rate": 6.786889862265719e-06, "loss": 0.5051, "step": 6348 }, { "epoch": 0.4, "grad_norm": 1.01585853099823, "learning_rate": 6.7859315877565775e-06, "loss": 0.4992, "step": 6349 }, { "epoch": 0.4, "grad_norm": 1.0228488445281982, "learning_rate": 6.784973238048069e-06, "loss": 0.5788, "step": 6350 }, { "epoch": 0.4, "grad_norm": 0.9880842566490173, "learning_rate": 6.7840148131805485e-06, "loss": 0.5526, "step": 6351 }, { "epoch": 0.4, "grad_norm": 0.9535727500915527, "learning_rate": 6.783056313194369e-06, "loss": 0.5104, "step": 6352 }, { "epoch": 0.4, "grad_norm": 0.8931145668029785, "learning_rate": 6.7820977381298915e-06, "loss": 0.5571, "step": 6353 }, { "epoch": 0.4, "grad_norm": 0.989362895488739, "learning_rate": 6.781139088027477e-06, "loss": 0.5485, "step": 6354 }, { "epoch": 0.4, "grad_norm": 1.040856122970581, "learning_rate": 6.780180362927492e-06, "loss": 0.5125, "step": 6355 }, { "epoch": 0.4, "grad_norm": 0.996364414691925, "learning_rate": 6.779221562870306e-06, "loss": 0.514, "step": 6356 }, { "epoch": 0.4, "grad_norm": 1.1184418201446533, "learning_rate": 6.778262687896287e-06, "loss": 0.5479, "step": 6357 }, { "epoch": 0.4, "grad_norm": 1.1050505638122559, "learning_rate": 6.777303738045814e-06, "loss": 0.5934, "step": 6358 }, { "epoch": 0.4, "grad_norm": 0.9124274253845215, "learning_rate": 6.776344713359263e-06, "loss": 0.5294, "step": 6359 }, { "epoch": 0.4, "grad_norm": 1.0172024965286255, "learning_rate": 6.775385613877016e-06, "loss": 0.5622, "step": 6360 }, { "epoch": 0.4, "grad_norm": 1.023863673210144, "learning_rate": 6.774426439639455e-06, "loss": 0.5271, "step": 6361 }, { "epoch": 0.4, "grad_norm": 1.0649785995483398, "learning_rate": 6.773467190686972e-06, "loss": 0.5539, "step": 6362 }, { "epoch": 0.4, "grad_norm": 1.064691424369812, "learning_rate": 6.772507867059953e-06, "loss": 0.5705, "step": 6363 }, { "epoch": 0.4, "grad_norm": 1.0454376935958862, "learning_rate": 6.771548468798796e-06, "loss": 0.4621, "step": 6364 }, { "epoch": 0.4, "grad_norm": 1.0180542469024658, "learning_rate": 6.770588995943893e-06, "loss": 0.496, "step": 6365 }, { "epoch": 0.4, "grad_norm": 1.0939666032791138, "learning_rate": 6.769629448535648e-06, "loss": 0.5496, "step": 6366 }, { "epoch": 0.4, "grad_norm": 1.0308752059936523, "learning_rate": 6.768669826614464e-06, "loss": 0.5346, "step": 6367 }, { "epoch": 0.4, "grad_norm": 1.0306459665298462, "learning_rate": 6.767710130220745e-06, "loss": 0.5722, "step": 6368 }, { "epoch": 0.4, "grad_norm": 0.9302669167518616, "learning_rate": 6.766750359394904e-06, "loss": 0.5109, "step": 6369 }, { "epoch": 0.4, "grad_norm": 1.0190123319625854, "learning_rate": 6.76579051417735e-06, "loss": 0.6004, "step": 6370 }, { "epoch": 0.4, "grad_norm": 1.0275017023086548, "learning_rate": 6.7648305946085e-06, "loss": 0.5317, "step": 6371 }, { "epoch": 0.4, "grad_norm": 1.1311765909194946, "learning_rate": 6.763870600728772e-06, "loss": 0.5979, "step": 6372 }, { "epoch": 0.4, "grad_norm": 1.0098353624343872, "learning_rate": 6.76291053257859e-06, "loss": 0.5055, "step": 6373 }, { "epoch": 0.4, "grad_norm": 1.1389307975769043, "learning_rate": 6.761950390198378e-06, "loss": 0.5548, "step": 6374 }, { "epoch": 0.4, "grad_norm": 1.0219448804855347, "learning_rate": 6.760990173628566e-06, "loss": 0.5356, "step": 6375 }, { "epoch": 0.4, "grad_norm": 1.0062955617904663, "learning_rate": 6.760029882909582e-06, "loss": 0.5336, "step": 6376 }, { "epoch": 0.4, "grad_norm": 0.9788837432861328, "learning_rate": 6.759069518081863e-06, "loss": 0.5296, "step": 6377 }, { "epoch": 0.4, "grad_norm": 1.0798200368881226, "learning_rate": 6.758109079185846e-06, "loss": 0.526, "step": 6378 }, { "epoch": 0.4, "grad_norm": 1.0861631631851196, "learning_rate": 6.757148566261973e-06, "loss": 0.5039, "step": 6379 }, { "epoch": 0.4, "grad_norm": 1.0507569313049316, "learning_rate": 6.756187979350684e-06, "loss": 0.5441, "step": 6380 }, { "epoch": 0.4, "grad_norm": 1.0213924646377563, "learning_rate": 6.75522731849243e-06, "loss": 0.5038, "step": 6381 }, { "epoch": 0.4, "grad_norm": 0.9637886881828308, "learning_rate": 6.754266583727659e-06, "loss": 0.5594, "step": 6382 }, { "epoch": 0.4, "grad_norm": 0.9614035487174988, "learning_rate": 6.753305775096826e-06, "loss": 0.5076, "step": 6383 }, { "epoch": 0.4, "grad_norm": 1.0359253883361816, "learning_rate": 6.752344892640384e-06, "loss": 0.49, "step": 6384 }, { "epoch": 0.4, "grad_norm": 0.9942283034324646, "learning_rate": 6.751383936398796e-06, "loss": 0.5262, "step": 6385 }, { "epoch": 0.4, "grad_norm": 1.0773903131484985, "learning_rate": 6.750422906412523e-06, "loss": 0.5882, "step": 6386 }, { "epoch": 0.4, "grad_norm": 1.0718711614608765, "learning_rate": 6.749461802722032e-06, "loss": 0.5418, "step": 6387 }, { "epoch": 0.4, "grad_norm": 1.0906800031661987, "learning_rate": 6.7485006253677875e-06, "loss": 0.5531, "step": 6388 }, { "epoch": 0.4, "grad_norm": 1.1717183589935303, "learning_rate": 6.747539374390266e-06, "loss": 0.6119, "step": 6389 }, { "epoch": 0.4, "grad_norm": 1.0094630718231201, "learning_rate": 6.746578049829942e-06, "loss": 0.5017, "step": 6390 }, { "epoch": 0.4, "grad_norm": 0.9954185485839844, "learning_rate": 6.745616651727289e-06, "loss": 0.5181, "step": 6391 }, { "epoch": 0.4, "grad_norm": 1.052478551864624, "learning_rate": 6.744655180122793e-06, "loss": 0.5333, "step": 6392 }, { "epoch": 0.41, "grad_norm": 1.080421805381775, "learning_rate": 6.743693635056936e-06, "loss": 0.572, "step": 6393 }, { "epoch": 0.41, "grad_norm": 1.0238027572631836, "learning_rate": 6.742732016570207e-06, "loss": 0.5601, "step": 6394 }, { "epoch": 0.41, "grad_norm": 1.0315519571304321, "learning_rate": 6.741770324703095e-06, "loss": 0.5492, "step": 6395 }, { "epoch": 0.41, "grad_norm": 0.9174897074699402, "learning_rate": 6.740808559496093e-06, "loss": 0.5001, "step": 6396 }, { "epoch": 0.41, "grad_norm": 1.0448737144470215, "learning_rate": 6.739846720989699e-06, "loss": 0.5869, "step": 6397 }, { "epoch": 0.41, "grad_norm": 0.9965710639953613, "learning_rate": 6.738884809224413e-06, "loss": 0.5069, "step": 6398 }, { "epoch": 0.41, "grad_norm": 1.11588454246521, "learning_rate": 6.7379228242407345e-06, "loss": 0.5498, "step": 6399 }, { "epoch": 0.41, "grad_norm": 1.1629881858825684, "learning_rate": 6.736960766079173e-06, "loss": 0.5415, "step": 6400 }, { "epoch": 0.41, "grad_norm": 1.061481237411499, "learning_rate": 6.735998634780238e-06, "loss": 0.5179, "step": 6401 }, { "epoch": 0.41, "grad_norm": 1.0699317455291748, "learning_rate": 6.735036430384436e-06, "loss": 0.5091, "step": 6402 }, { "epoch": 0.41, "grad_norm": 1.0474531650543213, "learning_rate": 6.7340741529322875e-06, "loss": 0.5528, "step": 6403 }, { "epoch": 0.41, "grad_norm": 1.034765601158142, "learning_rate": 6.733111802464308e-06, "loss": 0.5094, "step": 6404 }, { "epoch": 0.41, "grad_norm": 0.9787458777427673, "learning_rate": 6.732149379021022e-06, "loss": 0.5054, "step": 6405 }, { "epoch": 0.41, "grad_norm": 0.9702959656715393, "learning_rate": 6.7311868826429485e-06, "loss": 0.5422, "step": 6406 }, { "epoch": 0.41, "grad_norm": 1.0975489616394043, "learning_rate": 6.730224313370619e-06, "loss": 0.546, "step": 6407 }, { "epoch": 0.41, "grad_norm": 1.0134038925170898, "learning_rate": 6.729261671244563e-06, "loss": 0.5146, "step": 6408 }, { "epoch": 0.41, "grad_norm": 1.0336501598358154, "learning_rate": 6.728298956305313e-06, "loss": 0.4965, "step": 6409 }, { "epoch": 0.41, "grad_norm": 1.0559171438217163, "learning_rate": 6.727336168593406e-06, "loss": 0.5324, "step": 6410 }, { "epoch": 0.41, "grad_norm": 1.0390537977218628, "learning_rate": 6.726373308149382e-06, "loss": 0.5387, "step": 6411 }, { "epoch": 0.41, "grad_norm": 0.9847078323364258, "learning_rate": 6.725410375013783e-06, "loss": 0.4994, "step": 6412 }, { "epoch": 0.41, "grad_norm": 0.972402036190033, "learning_rate": 6.724447369227159e-06, "loss": 0.5271, "step": 6413 }, { "epoch": 0.41, "grad_norm": 1.0684046745300293, "learning_rate": 6.723484290830051e-06, "loss": 0.5295, "step": 6414 }, { "epoch": 0.41, "grad_norm": 1.1181683540344238, "learning_rate": 6.722521139863017e-06, "loss": 0.556, "step": 6415 }, { "epoch": 0.41, "grad_norm": 1.022675633430481, "learning_rate": 6.72155791636661e-06, "loss": 0.5411, "step": 6416 }, { "epoch": 0.41, "grad_norm": 0.9986287355422974, "learning_rate": 6.720594620381387e-06, "loss": 0.4959, "step": 6417 }, { "epoch": 0.41, "grad_norm": 1.113417148590088, "learning_rate": 6.71963125194791e-06, "loss": 0.5561, "step": 6418 }, { "epoch": 0.41, "grad_norm": 1.1068001985549927, "learning_rate": 6.718667811106744e-06, "loss": 0.5329, "step": 6419 }, { "epoch": 0.41, "grad_norm": 1.092809796333313, "learning_rate": 6.717704297898455e-06, "loss": 0.5233, "step": 6420 }, { "epoch": 0.41, "grad_norm": 1.0115886926651, "learning_rate": 6.716740712363614e-06, "loss": 0.5103, "step": 6421 }, { "epoch": 0.41, "grad_norm": 1.1021331548690796, "learning_rate": 6.715777054542793e-06, "loss": 0.5401, "step": 6422 }, { "epoch": 0.41, "grad_norm": 1.0838817358016968, "learning_rate": 6.714813324476569e-06, "loss": 0.5178, "step": 6423 }, { "epoch": 0.41, "grad_norm": 1.07334566116333, "learning_rate": 6.713849522205522e-06, "loss": 0.5002, "step": 6424 }, { "epoch": 0.41, "grad_norm": 1.0014665126800537, "learning_rate": 6.712885647770233e-06, "loss": 0.5331, "step": 6425 }, { "epoch": 0.41, "grad_norm": 1.0466426610946655, "learning_rate": 6.711921701211288e-06, "loss": 0.5419, "step": 6426 }, { "epoch": 0.41, "grad_norm": 1.0850886106491089, "learning_rate": 6.710957682569276e-06, "loss": 0.585, "step": 6427 }, { "epoch": 0.41, "grad_norm": 0.9901554584503174, "learning_rate": 6.709993591884788e-06, "loss": 0.5246, "step": 6428 }, { "epoch": 0.41, "grad_norm": 0.9305109977722168, "learning_rate": 6.709029429198418e-06, "loss": 0.5141, "step": 6429 }, { "epoch": 0.41, "grad_norm": 0.9888066649436951, "learning_rate": 6.7080651945507645e-06, "loss": 0.518, "step": 6430 }, { "epoch": 0.41, "grad_norm": 0.9896424412727356, "learning_rate": 6.707100887982427e-06, "loss": 0.5147, "step": 6431 }, { "epoch": 0.41, "grad_norm": 1.0180577039718628, "learning_rate": 6.7061365095340105e-06, "loss": 0.5327, "step": 6432 }, { "epoch": 0.41, "grad_norm": 1.014597773551941, "learning_rate": 6.70517205924612e-06, "loss": 0.5363, "step": 6433 }, { "epoch": 0.41, "grad_norm": 0.9559026956558228, "learning_rate": 6.7042075371593665e-06, "loss": 0.5206, "step": 6434 }, { "epoch": 0.41, "grad_norm": 1.031485676765442, "learning_rate": 6.703242943314362e-06, "loss": 0.5737, "step": 6435 }, { "epoch": 0.41, "grad_norm": 1.142580509185791, "learning_rate": 6.702278277751722e-06, "loss": 0.4991, "step": 6436 }, { "epoch": 0.41, "grad_norm": 1.1274057626724243, "learning_rate": 6.701313540512065e-06, "loss": 0.5713, "step": 6437 }, { "epoch": 0.41, "grad_norm": 1.0787616968154907, "learning_rate": 6.700348731636014e-06, "loss": 0.5213, "step": 6438 }, { "epoch": 0.41, "grad_norm": 1.0522346496582031, "learning_rate": 6.699383851164194e-06, "loss": 0.5902, "step": 6439 }, { "epoch": 0.41, "grad_norm": 1.0166659355163574, "learning_rate": 6.6984188991372305e-06, "loss": 0.5382, "step": 6440 }, { "epoch": 0.41, "grad_norm": 0.933194100856781, "learning_rate": 6.697453875595755e-06, "loss": 0.5033, "step": 6441 }, { "epoch": 0.41, "grad_norm": 0.8746803998947144, "learning_rate": 6.696488780580403e-06, "loss": 0.49, "step": 6442 }, { "epoch": 0.41, "grad_norm": 0.9358901381492615, "learning_rate": 6.69552361413181e-06, "loss": 0.5203, "step": 6443 }, { "epoch": 0.41, "grad_norm": 0.9617918729782104, "learning_rate": 6.694558376290615e-06, "loss": 0.5361, "step": 6444 }, { "epoch": 0.41, "grad_norm": 0.9649971127510071, "learning_rate": 6.693593067097462e-06, "loss": 0.5358, "step": 6445 }, { "epoch": 0.41, "grad_norm": 1.0535650253295898, "learning_rate": 6.692627686592998e-06, "loss": 0.5792, "step": 6446 }, { "epoch": 0.41, "grad_norm": 1.090075969696045, "learning_rate": 6.691662234817869e-06, "loss": 0.542, "step": 6447 }, { "epoch": 0.41, "grad_norm": 0.9936316609382629, "learning_rate": 6.690696711812729e-06, "loss": 0.4804, "step": 6448 }, { "epoch": 0.41, "grad_norm": 0.9206106662750244, "learning_rate": 6.68973111761823e-06, "loss": 0.5067, "step": 6449 }, { "epoch": 0.41, "grad_norm": 1.0001904964447021, "learning_rate": 6.688765452275033e-06, "loss": 0.5969, "step": 6450 }, { "epoch": 0.41, "grad_norm": 0.9626813530921936, "learning_rate": 6.687799715823798e-06, "loss": 0.486, "step": 6451 }, { "epoch": 0.41, "grad_norm": 0.9665706753730774, "learning_rate": 6.686833908305188e-06, "loss": 0.548, "step": 6452 }, { "epoch": 0.41, "grad_norm": 0.9423041343688965, "learning_rate": 6.68586802975987e-06, "loss": 0.5228, "step": 6453 }, { "epoch": 0.41, "grad_norm": 1.0378499031066895, "learning_rate": 6.684902080228514e-06, "loss": 0.5171, "step": 6454 }, { "epoch": 0.41, "grad_norm": 0.9767656922340393, "learning_rate": 6.6839360597517935e-06, "loss": 0.4995, "step": 6455 }, { "epoch": 0.41, "grad_norm": 1.0101171731948853, "learning_rate": 6.682969968370383e-06, "loss": 0.5397, "step": 6456 }, { "epoch": 0.41, "grad_norm": 1.02167546749115, "learning_rate": 6.68200380612496e-06, "loss": 0.5212, "step": 6457 }, { "epoch": 0.41, "grad_norm": 1.0230549573898315, "learning_rate": 6.681037573056211e-06, "loss": 0.5128, "step": 6458 }, { "epoch": 0.41, "grad_norm": 1.034674882888794, "learning_rate": 6.6800712692048164e-06, "loss": 0.5311, "step": 6459 }, { "epoch": 0.41, "grad_norm": 1.0055681467056274, "learning_rate": 6.679104894611466e-06, "loss": 0.559, "step": 6460 }, { "epoch": 0.41, "grad_norm": 1.0654057264328003, "learning_rate": 6.678138449316848e-06, "loss": 0.5788, "step": 6461 }, { "epoch": 0.41, "grad_norm": 1.0350961685180664, "learning_rate": 6.6771719333616584e-06, "loss": 0.5117, "step": 6462 }, { "epoch": 0.41, "grad_norm": 0.9289602041244507, "learning_rate": 6.676205346786594e-06, "loss": 0.527, "step": 6463 }, { "epoch": 0.41, "grad_norm": 0.9366391897201538, "learning_rate": 6.6752386896323526e-06, "loss": 0.5414, "step": 6464 }, { "epoch": 0.41, "grad_norm": 1.154829502105713, "learning_rate": 6.674271961939638e-06, "loss": 0.5883, "step": 6465 }, { "epoch": 0.41, "grad_norm": 0.9293048977851868, "learning_rate": 6.673305163749155e-06, "loss": 0.4986, "step": 6466 }, { "epoch": 0.41, "grad_norm": 0.9967445731163025, "learning_rate": 6.672338295101614e-06, "loss": 0.5039, "step": 6467 }, { "epoch": 0.41, "grad_norm": 0.9883416295051575, "learning_rate": 6.671371356037723e-06, "loss": 0.5462, "step": 6468 }, { "epoch": 0.41, "grad_norm": 1.0531284809112549, "learning_rate": 6.670404346598199e-06, "loss": 0.5444, "step": 6469 }, { "epoch": 0.41, "grad_norm": 1.0140522718429565, "learning_rate": 6.669437266823759e-06, "loss": 0.5388, "step": 6470 }, { "epoch": 0.41, "grad_norm": 1.0097755193710327, "learning_rate": 6.668470116755125e-06, "loss": 0.5297, "step": 6471 }, { "epoch": 0.41, "grad_norm": 0.9846504330635071, "learning_rate": 6.6675028964330156e-06, "loss": 0.5286, "step": 6472 }, { "epoch": 0.41, "grad_norm": 1.0103787183761597, "learning_rate": 6.666535605898162e-06, "loss": 0.5903, "step": 6473 }, { "epoch": 0.41, "grad_norm": 1.0471034049987793, "learning_rate": 6.6655682451912915e-06, "loss": 0.5307, "step": 6474 }, { "epoch": 0.41, "grad_norm": 1.0226188898086548, "learning_rate": 6.664600814353137e-06, "loss": 0.5333, "step": 6475 }, { "epoch": 0.41, "grad_norm": 1.0311198234558105, "learning_rate": 6.6636333134244305e-06, "loss": 0.5282, "step": 6476 }, { "epoch": 0.41, "grad_norm": 1.0073610544204712, "learning_rate": 6.662665742445914e-06, "loss": 0.5265, "step": 6477 }, { "epoch": 0.41, "grad_norm": 1.0142841339111328, "learning_rate": 6.661698101458327e-06, "loss": 0.5301, "step": 6478 }, { "epoch": 0.41, "grad_norm": 0.9951909184455872, "learning_rate": 6.660730390502414e-06, "loss": 0.5186, "step": 6479 }, { "epoch": 0.41, "grad_norm": 0.9450570344924927, "learning_rate": 6.6597626096189206e-06, "loss": 0.5458, "step": 6480 }, { "epoch": 0.41, "grad_norm": 0.9907074570655823, "learning_rate": 6.658794758848598e-06, "loss": 0.5399, "step": 6481 }, { "epoch": 0.41, "grad_norm": 1.0250133275985718, "learning_rate": 6.6578268382322e-06, "loss": 0.5148, "step": 6482 }, { "epoch": 0.41, "grad_norm": 0.9800702929496765, "learning_rate": 6.656858847810479e-06, "loss": 0.5195, "step": 6483 }, { "epoch": 0.41, "grad_norm": 0.9772721529006958, "learning_rate": 6.655890787624195e-06, "loss": 0.5012, "step": 6484 }, { "epoch": 0.41, "grad_norm": 0.9839169383049011, "learning_rate": 6.654922657714112e-06, "loss": 0.4937, "step": 6485 }, { "epoch": 0.41, "grad_norm": 1.044286847114563, "learning_rate": 6.6539544581209935e-06, "loss": 0.566, "step": 6486 }, { "epoch": 0.41, "grad_norm": 0.9918443560600281, "learning_rate": 6.652986188885605e-06, "loss": 0.5345, "step": 6487 }, { "epoch": 0.41, "grad_norm": 1.0191620588302612, "learning_rate": 6.652017850048719e-06, "loss": 0.5222, "step": 6488 }, { "epoch": 0.41, "grad_norm": 1.0628665685653687, "learning_rate": 6.651049441651107e-06, "loss": 0.5547, "step": 6489 }, { "epoch": 0.41, "grad_norm": 0.9881481528282166, "learning_rate": 6.65008096373355e-06, "loss": 0.5246, "step": 6490 }, { "epoch": 0.41, "grad_norm": 0.997631847858429, "learning_rate": 6.6491124163368215e-06, "loss": 0.533, "step": 6491 }, { "epoch": 0.41, "grad_norm": 0.9483758211135864, "learning_rate": 6.648143799501705e-06, "loss": 0.5201, "step": 6492 }, { "epoch": 0.41, "grad_norm": 0.9615177512168884, "learning_rate": 6.647175113268989e-06, "loss": 0.534, "step": 6493 }, { "epoch": 0.41, "grad_norm": 1.0250009298324585, "learning_rate": 6.646206357679458e-06, "loss": 0.552, "step": 6494 }, { "epoch": 0.41, "grad_norm": 1.0113991498947144, "learning_rate": 6.645237532773902e-06, "loss": 0.573, "step": 6495 }, { "epoch": 0.41, "grad_norm": 1.03082275390625, "learning_rate": 6.64426863859312e-06, "loss": 0.5649, "step": 6496 }, { "epoch": 0.41, "grad_norm": 1.0717030763626099, "learning_rate": 6.643299675177906e-06, "loss": 0.5586, "step": 6497 }, { "epoch": 0.41, "grad_norm": 1.072690725326538, "learning_rate": 6.642330642569056e-06, "loss": 0.5275, "step": 6498 }, { "epoch": 0.41, "grad_norm": 0.9866908192634583, "learning_rate": 6.641361540807377e-06, "loss": 0.5133, "step": 6499 }, { "epoch": 0.41, "grad_norm": 1.0255568027496338, "learning_rate": 6.640392369933675e-06, "loss": 0.5299, "step": 6500 }, { "epoch": 0.41, "grad_norm": 1.0390933752059937, "learning_rate": 6.639423129988756e-06, "loss": 0.5288, "step": 6501 }, { "epoch": 0.41, "grad_norm": 1.038448691368103, "learning_rate": 6.638453821013431e-06, "loss": 0.6081, "step": 6502 }, { "epoch": 0.41, "grad_norm": 0.9224210381507874, "learning_rate": 6.637484443048516e-06, "loss": 0.5248, "step": 6503 }, { "epoch": 0.41, "grad_norm": 0.966993510723114, "learning_rate": 6.636514996134828e-06, "loss": 0.4875, "step": 6504 }, { "epoch": 0.41, "grad_norm": 1.0495386123657227, "learning_rate": 6.635545480313187e-06, "loss": 0.5672, "step": 6505 }, { "epoch": 0.41, "grad_norm": 1.0295976400375366, "learning_rate": 6.634575895624414e-06, "loss": 0.5866, "step": 6506 }, { "epoch": 0.41, "grad_norm": 1.0618761777877808, "learning_rate": 6.6336062421093374e-06, "loss": 0.562, "step": 6507 }, { "epoch": 0.41, "grad_norm": 1.0179448127746582, "learning_rate": 6.632636519808785e-06, "loss": 0.5375, "step": 6508 }, { "epoch": 0.41, "grad_norm": 1.019084095954895, "learning_rate": 6.6316667287635875e-06, "loss": 0.5497, "step": 6509 }, { "epoch": 0.41, "grad_norm": 1.040083885192871, "learning_rate": 6.63069686901458e-06, "loss": 0.5661, "step": 6510 }, { "epoch": 0.41, "grad_norm": 1.0258654356002808, "learning_rate": 6.629726940602601e-06, "loss": 0.5391, "step": 6511 }, { "epoch": 0.41, "grad_norm": 0.982742965221405, "learning_rate": 6.62875694356849e-06, "loss": 0.5586, "step": 6512 }, { "epoch": 0.41, "grad_norm": 1.039351224899292, "learning_rate": 6.62778687795309e-06, "loss": 0.5062, "step": 6513 }, { "epoch": 0.41, "grad_norm": 1.0537571907043457, "learning_rate": 6.626816743797246e-06, "loss": 0.4956, "step": 6514 }, { "epoch": 0.41, "grad_norm": 0.928530216217041, "learning_rate": 6.62584654114181e-06, "loss": 0.4869, "step": 6515 }, { "epoch": 0.41, "grad_norm": 1.0642585754394531, "learning_rate": 6.6248762700276315e-06, "loss": 0.5522, "step": 6516 }, { "epoch": 0.41, "grad_norm": 0.9538542628288269, "learning_rate": 6.623905930495565e-06, "loss": 0.5304, "step": 6517 }, { "epoch": 0.41, "grad_norm": 1.0592854022979736, "learning_rate": 6.622935522586469e-06, "loss": 0.4979, "step": 6518 }, { "epoch": 0.41, "grad_norm": 0.9506630897521973, "learning_rate": 6.6219650463412034e-06, "loss": 0.5171, "step": 6519 }, { "epoch": 0.41, "grad_norm": 1.0356582403182983, "learning_rate": 6.620994501800634e-06, "loss": 0.5231, "step": 6520 }, { "epoch": 0.41, "grad_norm": 1.0392316579818726, "learning_rate": 6.620023889005624e-06, "loss": 0.5735, "step": 6521 }, { "epoch": 0.41, "grad_norm": 1.0876318216323853, "learning_rate": 6.619053207997043e-06, "loss": 0.5476, "step": 6522 }, { "epoch": 0.41, "grad_norm": 1.0757724046707153, "learning_rate": 6.618082458815765e-06, "loss": 0.52, "step": 6523 }, { "epoch": 0.41, "grad_norm": 1.0079342126846313, "learning_rate": 6.617111641502664e-06, "loss": 0.5142, "step": 6524 }, { "epoch": 0.41, "grad_norm": 0.9872135519981384, "learning_rate": 6.616140756098617e-06, "loss": 0.4967, "step": 6525 }, { "epoch": 0.41, "grad_norm": 1.1018986701965332, "learning_rate": 6.615169802644503e-06, "loss": 0.5744, "step": 6526 }, { "epoch": 0.41, "grad_norm": 1.1604558229446411, "learning_rate": 6.614198781181209e-06, "loss": 0.5501, "step": 6527 }, { "epoch": 0.41, "grad_norm": 1.044912576675415, "learning_rate": 6.613227691749619e-06, "loss": 0.5811, "step": 6528 }, { "epoch": 0.41, "grad_norm": 1.0152037143707275, "learning_rate": 6.612256534390624e-06, "loss": 0.5524, "step": 6529 }, { "epoch": 0.41, "grad_norm": 1.0281046628952026, "learning_rate": 6.611285309145113e-06, "loss": 0.5104, "step": 6530 }, { "epoch": 0.41, "grad_norm": 0.9736826419830322, "learning_rate": 6.610314016053986e-06, "loss": 0.5092, "step": 6531 }, { "epoch": 0.41, "grad_norm": 0.9732024669647217, "learning_rate": 6.609342655158135e-06, "loss": 0.5532, "step": 6532 }, { "epoch": 0.41, "grad_norm": 1.0911118984222412, "learning_rate": 6.608371226498464e-06, "loss": 0.5026, "step": 6533 }, { "epoch": 0.41, "grad_norm": 1.047197937965393, "learning_rate": 6.607399730115875e-06, "loss": 0.5424, "step": 6534 }, { "epoch": 0.41, "grad_norm": 1.0095125436782837, "learning_rate": 6.6064281660512775e-06, "loss": 0.4706, "step": 6535 }, { "epoch": 0.41, "grad_norm": 1.0289307832717896, "learning_rate": 6.6054565343455765e-06, "loss": 0.5548, "step": 6536 }, { "epoch": 0.41, "grad_norm": 0.9875046610832214, "learning_rate": 6.604484835039686e-06, "loss": 0.5222, "step": 6537 }, { "epoch": 0.41, "grad_norm": 1.0515408515930176, "learning_rate": 6.603513068174521e-06, "loss": 0.4744, "step": 6538 }, { "epoch": 0.41, "grad_norm": 0.9239807724952698, "learning_rate": 6.602541233790999e-06, "loss": 0.5288, "step": 6539 }, { "epoch": 0.41, "grad_norm": 1.121701955795288, "learning_rate": 6.601569331930041e-06, "loss": 0.5923, "step": 6540 }, { "epoch": 0.41, "grad_norm": 0.9341360926628113, "learning_rate": 6.600597362632568e-06, "loss": 0.5167, "step": 6541 }, { "epoch": 0.41, "grad_norm": 1.015208125114441, "learning_rate": 6.599625325939509e-06, "loss": 0.5704, "step": 6542 }, { "epoch": 0.41, "grad_norm": 1.0524418354034424, "learning_rate": 6.598653221891793e-06, "loss": 0.558, "step": 6543 }, { "epoch": 0.41, "grad_norm": 0.9874768853187561, "learning_rate": 6.597681050530351e-06, "loss": 0.5702, "step": 6544 }, { "epoch": 0.41, "grad_norm": 1.0313066244125366, "learning_rate": 6.596708811896116e-06, "loss": 0.5523, "step": 6545 }, { "epoch": 0.41, "grad_norm": 1.067104458808899, "learning_rate": 6.595736506030029e-06, "loss": 0.5437, "step": 6546 }, { "epoch": 0.41, "grad_norm": 0.9491031765937805, "learning_rate": 6.59476413297303e-06, "loss": 0.5008, "step": 6547 }, { "epoch": 0.41, "grad_norm": 1.0489870309829712, "learning_rate": 6.59379169276606e-06, "loss": 0.5247, "step": 6548 }, { "epoch": 0.41, "grad_norm": 0.9969553351402283, "learning_rate": 6.5928191854500644e-06, "loss": 0.5564, "step": 6549 }, { "epoch": 0.41, "grad_norm": 1.0018967390060425, "learning_rate": 6.591846611065997e-06, "loss": 0.5337, "step": 6550 }, { "epoch": 0.42, "grad_norm": 0.9852742552757263, "learning_rate": 6.590873969654805e-06, "loss": 0.5623, "step": 6551 }, { "epoch": 0.42, "grad_norm": 1.039914846420288, "learning_rate": 6.589901261257445e-06, "loss": 0.5445, "step": 6552 }, { "epoch": 0.42, "grad_norm": 1.0345009565353394, "learning_rate": 6.588928485914871e-06, "loss": 0.5891, "step": 6553 }, { "epoch": 0.42, "grad_norm": 0.9844979643821716, "learning_rate": 6.587955643668049e-06, "loss": 0.5386, "step": 6554 }, { "epoch": 0.42, "grad_norm": 1.042885661125183, "learning_rate": 6.58698273455794e-06, "loss": 0.4922, "step": 6555 }, { "epoch": 0.42, "grad_norm": 1.0444130897521973, "learning_rate": 6.586009758625507e-06, "loss": 0.5699, "step": 6556 }, { "epoch": 0.42, "grad_norm": 1.0053129196166992, "learning_rate": 6.585036715911719e-06, "loss": 0.5309, "step": 6557 }, { "epoch": 0.42, "grad_norm": 1.0225436687469482, "learning_rate": 6.58406360645755e-06, "loss": 0.5656, "step": 6558 }, { "epoch": 0.42, "grad_norm": 0.930986225605011, "learning_rate": 6.583090430303975e-06, "loss": 0.5037, "step": 6559 }, { "epoch": 0.42, "grad_norm": 1.007002353668213, "learning_rate": 6.582117187491967e-06, "loss": 0.5977, "step": 6560 }, { "epoch": 0.42, "grad_norm": 1.0585647821426392, "learning_rate": 6.581143878062507e-06, "loss": 0.5395, "step": 6561 }, { "epoch": 0.42, "grad_norm": 1.0320481061935425, "learning_rate": 6.58017050205658e-06, "loss": 0.4973, "step": 6562 }, { "epoch": 0.42, "grad_norm": 1.0310266017913818, "learning_rate": 6.5791970595151714e-06, "loss": 0.5637, "step": 6563 }, { "epoch": 0.42, "grad_norm": 0.9598348140716553, "learning_rate": 6.578223550479266e-06, "loss": 0.5487, "step": 6564 }, { "epoch": 0.42, "grad_norm": 0.8893693685531616, "learning_rate": 6.5772499749898585e-06, "loss": 0.4807, "step": 6565 }, { "epoch": 0.42, "grad_norm": 1.0530903339385986, "learning_rate": 6.576276333087941e-06, "loss": 0.5382, "step": 6566 }, { "epoch": 0.42, "grad_norm": 1.0158112049102783, "learning_rate": 6.575302624814512e-06, "loss": 0.5204, "step": 6567 }, { "epoch": 0.42, "grad_norm": 1.030490756034851, "learning_rate": 6.5743288502105675e-06, "loss": 0.5139, "step": 6568 }, { "epoch": 0.42, "grad_norm": 0.9897223114967346, "learning_rate": 6.5733550093171115e-06, "loss": 0.5287, "step": 6569 }, { "epoch": 0.42, "grad_norm": 1.047285795211792, "learning_rate": 6.572381102175151e-06, "loss": 0.526, "step": 6570 }, { "epoch": 0.42, "grad_norm": 1.0202997922897339, "learning_rate": 6.571407128825692e-06, "loss": 0.5615, "step": 6571 }, { "epoch": 0.42, "grad_norm": 1.0218029022216797, "learning_rate": 6.570433089309745e-06, "loss": 0.4885, "step": 6572 }, { "epoch": 0.42, "grad_norm": 1.0741409063339233, "learning_rate": 6.569458983668323e-06, "loss": 0.4896, "step": 6573 }, { "epoch": 0.42, "grad_norm": 1.016112208366394, "learning_rate": 6.5684848119424435e-06, "loss": 0.567, "step": 6574 }, { "epoch": 0.42, "grad_norm": 1.040443778038025, "learning_rate": 6.567510574173126e-06, "loss": 0.5531, "step": 6575 }, { "epoch": 0.42, "grad_norm": 1.054648518562317, "learning_rate": 6.566536270401389e-06, "loss": 0.5775, "step": 6576 }, { "epoch": 0.42, "grad_norm": 1.0508983135223389, "learning_rate": 6.5655619006682604e-06, "loss": 0.5625, "step": 6577 }, { "epoch": 0.42, "grad_norm": 1.1195893287658691, "learning_rate": 6.5645874650147676e-06, "loss": 0.5335, "step": 6578 }, { "epoch": 0.42, "grad_norm": 1.0068633556365967, "learning_rate": 6.563612963481938e-06, "loss": 0.5265, "step": 6579 }, { "epoch": 0.42, "grad_norm": 1.0301257371902466, "learning_rate": 6.562638396110805e-06, "loss": 0.5061, "step": 6580 }, { "epoch": 0.42, "grad_norm": 1.1142112016677856, "learning_rate": 6.561663762942407e-06, "loss": 0.5522, "step": 6581 }, { "epoch": 0.42, "grad_norm": 1.1140917539596558, "learning_rate": 6.560689064017781e-06, "loss": 0.4971, "step": 6582 }, { "epoch": 0.42, "grad_norm": 1.0406494140625, "learning_rate": 6.559714299377966e-06, "loss": 0.5766, "step": 6583 }, { "epoch": 0.42, "grad_norm": 0.9920594096183777, "learning_rate": 6.558739469064008e-06, "loss": 0.6039, "step": 6584 }, { "epoch": 0.42, "grad_norm": 1.0681092739105225, "learning_rate": 6.5577645731169535e-06, "loss": 0.5815, "step": 6585 }, { "epoch": 0.42, "grad_norm": 1.2725310325622559, "learning_rate": 6.556789611577854e-06, "loss": 0.5283, "step": 6586 }, { "epoch": 0.42, "grad_norm": 1.0315043926239014, "learning_rate": 6.555814584487757e-06, "loss": 0.5367, "step": 6587 }, { "epoch": 0.42, "grad_norm": 1.012886881828308, "learning_rate": 6.5548394918877216e-06, "loss": 0.5401, "step": 6588 }, { "epoch": 0.42, "grad_norm": 0.9161368608474731, "learning_rate": 6.553864333818803e-06, "loss": 0.508, "step": 6589 }, { "epoch": 0.42, "grad_norm": 0.9886636137962341, "learning_rate": 6.552889110322062e-06, "loss": 0.5091, "step": 6590 }, { "epoch": 0.42, "grad_norm": 1.0155935287475586, "learning_rate": 6.551913821438565e-06, "loss": 0.4738, "step": 6591 }, { "epoch": 0.42, "grad_norm": 1.0011165142059326, "learning_rate": 6.550938467209375e-06, "loss": 0.5253, "step": 6592 }, { "epoch": 0.42, "grad_norm": 0.9297583103179932, "learning_rate": 6.5499630476755616e-06, "loss": 0.4809, "step": 6593 }, { "epoch": 0.42, "grad_norm": 1.0220762491226196, "learning_rate": 6.548987562878195e-06, "loss": 0.5204, "step": 6594 }, { "epoch": 0.42, "grad_norm": 1.0843340158462524, "learning_rate": 6.548012012858352e-06, "loss": 0.5195, "step": 6595 }, { "epoch": 0.42, "grad_norm": 1.0520986318588257, "learning_rate": 6.547036397657106e-06, "loss": 0.5212, "step": 6596 }, { "epoch": 0.42, "grad_norm": 1.0433666706085205, "learning_rate": 6.546060717315542e-06, "loss": 0.5474, "step": 6597 }, { "epoch": 0.42, "grad_norm": 1.0284672975540161, "learning_rate": 6.545084971874738e-06, "loss": 0.5432, "step": 6598 }, { "epoch": 0.42, "grad_norm": 1.0332967042922974, "learning_rate": 6.5441091613757805e-06, "loss": 0.5188, "step": 6599 }, { "epoch": 0.42, "grad_norm": 0.9910998344421387, "learning_rate": 6.543133285859758e-06, "loss": 0.5183, "step": 6600 }, { "epoch": 0.42, "grad_norm": 0.9519909620285034, "learning_rate": 6.542157345367763e-06, "loss": 0.4884, "step": 6601 }, { "epoch": 0.42, "grad_norm": 1.069887399673462, "learning_rate": 6.5411813399408845e-06, "loss": 0.5841, "step": 6602 }, { "epoch": 0.42, "grad_norm": 1.022975206375122, "learning_rate": 6.540205269620221e-06, "loss": 0.5542, "step": 6603 }, { "epoch": 0.42, "grad_norm": 0.9461351037025452, "learning_rate": 6.539229134446874e-06, "loss": 0.5331, "step": 6604 }, { "epoch": 0.42, "grad_norm": 1.0331028699874878, "learning_rate": 6.538252934461941e-06, "loss": 0.5288, "step": 6605 }, { "epoch": 0.42, "grad_norm": 1.0788092613220215, "learning_rate": 6.537276669706527e-06, "loss": 0.5742, "step": 6606 }, { "epoch": 0.42, "grad_norm": 1.0447759628295898, "learning_rate": 6.536300340221742e-06, "loss": 0.573, "step": 6607 }, { "epoch": 0.42, "grad_norm": 0.9823431968688965, "learning_rate": 6.535323946048695e-06, "loss": 0.5166, "step": 6608 }, { "epoch": 0.42, "grad_norm": 0.9577629566192627, "learning_rate": 6.534347487228495e-06, "loss": 0.4816, "step": 6609 }, { "epoch": 0.42, "grad_norm": 0.9827210307121277, "learning_rate": 6.533370963802261e-06, "loss": 0.497, "step": 6610 }, { "epoch": 0.42, "grad_norm": 1.1767102479934692, "learning_rate": 6.532394375811111e-06, "loss": 0.5633, "step": 6611 }, { "epoch": 0.42, "grad_norm": 1.1166398525238037, "learning_rate": 6.531417723296164e-06, "loss": 0.5354, "step": 6612 }, { "epoch": 0.42, "grad_norm": 1.1115363836288452, "learning_rate": 6.530441006298544e-06, "loss": 0.5402, "step": 6613 }, { "epoch": 0.42, "grad_norm": 1.0333431959152222, "learning_rate": 6.5294642248593765e-06, "loss": 0.5907, "step": 6614 }, { "epoch": 0.42, "grad_norm": 1.0875381231307983, "learning_rate": 6.528487379019791e-06, "loss": 0.529, "step": 6615 }, { "epoch": 0.42, "grad_norm": 0.9464609026908875, "learning_rate": 6.5275104688209215e-06, "loss": 0.4968, "step": 6616 }, { "epoch": 0.42, "grad_norm": 1.094282627105713, "learning_rate": 6.526533494303898e-06, "loss": 0.582, "step": 6617 }, { "epoch": 0.42, "grad_norm": 1.061200737953186, "learning_rate": 6.525556455509858e-06, "loss": 0.5653, "step": 6618 }, { "epoch": 0.42, "grad_norm": 0.9713898301124573, "learning_rate": 6.5245793524799465e-06, "loss": 0.5051, "step": 6619 }, { "epoch": 0.42, "grad_norm": 1.0049147605895996, "learning_rate": 6.5236021852553e-06, "loss": 0.5664, "step": 6620 }, { "epoch": 0.42, "grad_norm": 1.095810890197754, "learning_rate": 6.522624953877066e-06, "loss": 0.5445, "step": 6621 }, { "epoch": 0.42, "grad_norm": 0.9827590584754944, "learning_rate": 6.52164765838639e-06, "loss": 0.4835, "step": 6622 }, { "epoch": 0.42, "grad_norm": 1.028319239616394, "learning_rate": 6.520670298824428e-06, "loss": 0.5277, "step": 6623 }, { "epoch": 0.42, "grad_norm": 1.0088140964508057, "learning_rate": 6.519692875232328e-06, "loss": 0.4855, "step": 6624 }, { "epoch": 0.42, "grad_norm": 1.0765272378921509, "learning_rate": 6.518715387651249e-06, "loss": 0.5564, "step": 6625 }, { "epoch": 0.42, "grad_norm": 1.0901261568069458, "learning_rate": 6.517737836122345e-06, "loss": 0.5494, "step": 6626 }, { "epoch": 0.42, "grad_norm": 0.9907105565071106, "learning_rate": 6.516760220686783e-06, "loss": 0.5482, "step": 6627 }, { "epoch": 0.42, "grad_norm": 1.0758845806121826, "learning_rate": 6.515782541385725e-06, "loss": 0.595, "step": 6628 }, { "epoch": 0.42, "grad_norm": 1.0081164836883545, "learning_rate": 6.514804798260337e-06, "loss": 0.5667, "step": 6629 }, { "epoch": 0.42, "grad_norm": 0.9572669863700867, "learning_rate": 6.513826991351786e-06, "loss": 0.558, "step": 6630 }, { "epoch": 0.42, "grad_norm": 0.9977998733520508, "learning_rate": 6.512849120701249e-06, "loss": 0.5219, "step": 6631 }, { "epoch": 0.42, "grad_norm": 0.9644599556922913, "learning_rate": 6.511871186349897e-06, "loss": 0.5111, "step": 6632 }, { "epoch": 0.42, "grad_norm": 1.0376458168029785, "learning_rate": 6.510893188338911e-06, "loss": 0.5563, "step": 6633 }, { "epoch": 0.42, "grad_norm": 1.039550542831421, "learning_rate": 6.509915126709467e-06, "loss": 0.5022, "step": 6634 }, { "epoch": 0.42, "grad_norm": 1.0308173894882202, "learning_rate": 6.50893700150275e-06, "loss": 0.5156, "step": 6635 }, { "epoch": 0.42, "grad_norm": 1.015334963798523, "learning_rate": 6.5079588127599455e-06, "loss": 0.5134, "step": 6636 }, { "epoch": 0.42, "grad_norm": 0.9615833163261414, "learning_rate": 6.50698056052224e-06, "loss": 0.5199, "step": 6637 }, { "epoch": 0.42, "grad_norm": 0.9502135515213013, "learning_rate": 6.506002244830827e-06, "loss": 0.5224, "step": 6638 }, { "epoch": 0.42, "grad_norm": 0.999001681804657, "learning_rate": 6.505023865726898e-06, "loss": 0.5313, "step": 6639 }, { "epoch": 0.42, "grad_norm": 0.9371646046638489, "learning_rate": 6.50404542325165e-06, "loss": 0.5288, "step": 6640 }, { "epoch": 0.42, "grad_norm": 1.0646824836730957, "learning_rate": 6.503066917446279e-06, "loss": 0.5263, "step": 6641 }, { "epoch": 0.42, "grad_norm": 0.9830496311187744, "learning_rate": 6.502088348351992e-06, "loss": 0.5154, "step": 6642 }, { "epoch": 0.42, "grad_norm": 1.0101431608200073, "learning_rate": 6.501109716009988e-06, "loss": 0.4874, "step": 6643 }, { "epoch": 0.42, "grad_norm": 1.0016448497772217, "learning_rate": 6.500131020461477e-06, "loss": 0.5067, "step": 6644 }, { "epoch": 0.42, "grad_norm": 0.9707490801811218, "learning_rate": 6.4991522617476666e-06, "loss": 0.4988, "step": 6645 }, { "epoch": 0.42, "grad_norm": 1.03791081905365, "learning_rate": 6.498173439909771e-06, "loss": 0.5594, "step": 6646 }, { "epoch": 0.42, "grad_norm": 1.0289043188095093, "learning_rate": 6.497194554989001e-06, "loss": 0.5021, "step": 6647 }, { "epoch": 0.42, "grad_norm": 1.0819637775421143, "learning_rate": 6.496215607026579e-06, "loss": 0.4839, "step": 6648 }, { "epoch": 0.42, "grad_norm": 1.1179145574569702, "learning_rate": 6.495236596063722e-06, "loss": 0.5231, "step": 6649 }, { "epoch": 0.42, "grad_norm": 1.049654483795166, "learning_rate": 6.494257522141654e-06, "loss": 0.5223, "step": 6650 }, { "epoch": 0.42, "grad_norm": 1.221232533454895, "learning_rate": 6.4932783853016005e-06, "loss": 0.5759, "step": 6651 }, { "epoch": 0.42, "grad_norm": 0.9750707149505615, "learning_rate": 6.492299185584787e-06, "loss": 0.4607, "step": 6652 }, { "epoch": 0.42, "grad_norm": 1.0411572456359863, "learning_rate": 6.491319923032446e-06, "loss": 0.5288, "step": 6653 }, { "epoch": 0.42, "grad_norm": 0.9910756945610046, "learning_rate": 6.490340597685811e-06, "loss": 0.5358, "step": 6654 }, { "epoch": 0.42, "grad_norm": 0.9476546049118042, "learning_rate": 6.48936120958612e-06, "loss": 0.512, "step": 6655 }, { "epoch": 0.42, "grad_norm": 0.9512740969657898, "learning_rate": 6.488381758774609e-06, "loss": 0.5522, "step": 6656 }, { "epoch": 0.42, "grad_norm": 0.9980294108390808, "learning_rate": 6.487402245292518e-06, "loss": 0.5226, "step": 6657 }, { "epoch": 0.42, "grad_norm": 1.0905872583389282, "learning_rate": 6.486422669181094e-06, "loss": 0.5611, "step": 6658 }, { "epoch": 0.42, "grad_norm": 0.9635381698608398, "learning_rate": 6.485443030481583e-06, "loss": 0.5382, "step": 6659 }, { "epoch": 0.42, "grad_norm": 1.0294065475463867, "learning_rate": 6.4844633292352335e-06, "loss": 0.5563, "step": 6660 }, { "epoch": 0.42, "grad_norm": 0.9263328909873962, "learning_rate": 6.483483565483295e-06, "loss": 0.4946, "step": 6661 }, { "epoch": 0.42, "grad_norm": 0.9932078719139099, "learning_rate": 6.482503739267026e-06, "loss": 0.5044, "step": 6662 }, { "epoch": 0.42, "grad_norm": 1.065634846687317, "learning_rate": 6.481523850627682e-06, "loss": 0.5581, "step": 6663 }, { "epoch": 0.42, "grad_norm": 1.054053544998169, "learning_rate": 6.4805438996065215e-06, "loss": 0.6028, "step": 6664 }, { "epoch": 0.42, "grad_norm": 0.9770582318305969, "learning_rate": 6.479563886244809e-06, "loss": 0.5487, "step": 6665 }, { "epoch": 0.42, "grad_norm": 1.0783045291900635, "learning_rate": 6.478583810583807e-06, "loss": 0.5277, "step": 6666 }, { "epoch": 0.42, "grad_norm": 1.0439814329147339, "learning_rate": 6.477603672664785e-06, "loss": 0.5342, "step": 6667 }, { "epoch": 0.42, "grad_norm": 0.9825044274330139, "learning_rate": 6.476623472529012e-06, "loss": 0.5235, "step": 6668 }, { "epoch": 0.42, "grad_norm": 0.9862465858459473, "learning_rate": 6.475643210217762e-06, "loss": 0.5693, "step": 6669 }, { "epoch": 0.42, "grad_norm": 1.1262718439102173, "learning_rate": 6.47466288577231e-06, "loss": 0.5738, "step": 6670 }, { "epoch": 0.42, "grad_norm": 0.9301985502243042, "learning_rate": 6.473682499233934e-06, "loss": 0.4966, "step": 6671 }, { "epoch": 0.42, "grad_norm": 1.0108258724212646, "learning_rate": 6.472702050643913e-06, "loss": 0.5076, "step": 6672 }, { "epoch": 0.42, "grad_norm": 1.0300981998443604, "learning_rate": 6.471721540043533e-06, "loss": 0.5227, "step": 6673 }, { "epoch": 0.42, "grad_norm": 1.1556057929992676, "learning_rate": 6.47074096747408e-06, "loss": 0.5453, "step": 6674 }, { "epoch": 0.42, "grad_norm": 0.9517860412597656, "learning_rate": 6.469760332976839e-06, "loss": 0.4722, "step": 6675 }, { "epoch": 0.42, "grad_norm": 1.0921064615249634, "learning_rate": 6.4687796365931035e-06, "loss": 0.5293, "step": 6676 }, { "epoch": 0.42, "grad_norm": 1.0534489154815674, "learning_rate": 6.467798878364168e-06, "loss": 0.5218, "step": 6677 }, { "epoch": 0.42, "grad_norm": 0.9986158013343811, "learning_rate": 6.466818058331328e-06, "loss": 0.5382, "step": 6678 }, { "epoch": 0.42, "grad_norm": 1.045770287513733, "learning_rate": 6.465837176535881e-06, "loss": 0.5067, "step": 6679 }, { "epoch": 0.42, "grad_norm": 0.9914131164550781, "learning_rate": 6.46485623301913e-06, "loss": 0.504, "step": 6680 }, { "epoch": 0.42, "grad_norm": 0.9861920475959778, "learning_rate": 6.46387522782238e-06, "loss": 0.548, "step": 6681 }, { "epoch": 0.42, "grad_norm": 0.9626903533935547, "learning_rate": 6.462894160986937e-06, "loss": 0.532, "step": 6682 }, { "epoch": 0.42, "grad_norm": 1.0459203720092773, "learning_rate": 6.461913032554108e-06, "loss": 0.5218, "step": 6683 }, { "epoch": 0.42, "grad_norm": 0.9998902082443237, "learning_rate": 6.460931842565207e-06, "loss": 0.5367, "step": 6684 }, { "epoch": 0.42, "grad_norm": 0.9615074396133423, "learning_rate": 6.4599505910615505e-06, "loss": 0.5265, "step": 6685 }, { "epoch": 0.42, "grad_norm": 1.0468496084213257, "learning_rate": 6.45896927808445e-06, "loss": 0.5384, "step": 6686 }, { "epoch": 0.42, "grad_norm": 0.9850332140922546, "learning_rate": 6.4579879036752315e-06, "loss": 0.5649, "step": 6687 }, { "epoch": 0.42, "grad_norm": 1.007083535194397, "learning_rate": 6.457006467875213e-06, "loss": 0.5188, "step": 6688 }, { "epoch": 0.42, "grad_norm": 1.0500504970550537, "learning_rate": 6.456024970725722e-06, "loss": 0.502, "step": 6689 }, { "epoch": 0.42, "grad_norm": 1.0205175876617432, "learning_rate": 6.455043412268083e-06, "loss": 0.5195, "step": 6690 }, { "epoch": 0.42, "grad_norm": 1.0550191402435303, "learning_rate": 6.4540617925436275e-06, "loss": 0.4939, "step": 6691 }, { "epoch": 0.42, "grad_norm": 1.0376819372177124, "learning_rate": 6.453080111593689e-06, "loss": 0.5356, "step": 6692 }, { "epoch": 0.42, "grad_norm": 0.9219244718551636, "learning_rate": 6.4520983694596025e-06, "loss": 0.4873, "step": 6693 }, { "epoch": 0.42, "grad_norm": 1.0686750411987305, "learning_rate": 6.451116566182704e-06, "loss": 0.5296, "step": 6694 }, { "epoch": 0.42, "grad_norm": 1.0167797803878784, "learning_rate": 6.4501347018043356e-06, "loss": 0.5431, "step": 6695 }, { "epoch": 0.42, "grad_norm": 1.0186437368392944, "learning_rate": 6.44915277636584e-06, "loss": 0.5459, "step": 6696 }, { "epoch": 0.42, "grad_norm": 1.0310328006744385, "learning_rate": 6.4481707899085624e-06, "loss": 0.5266, "step": 6697 }, { "epoch": 0.42, "grad_norm": 1.0500410795211792, "learning_rate": 6.447188742473849e-06, "loss": 0.5378, "step": 6698 }, { "epoch": 0.42, "grad_norm": 1.064555048942566, "learning_rate": 6.446206634103053e-06, "loss": 0.5527, "step": 6699 }, { "epoch": 0.42, "grad_norm": 0.9763587713241577, "learning_rate": 6.445224464837527e-06, "loss": 0.564, "step": 6700 }, { "epoch": 0.42, "grad_norm": 0.9487929344177246, "learning_rate": 6.444242234718626e-06, "loss": 0.4782, "step": 6701 }, { "epoch": 0.42, "grad_norm": 1.0297126770019531, "learning_rate": 6.443259943787708e-06, "loss": 0.5551, "step": 6702 }, { "epoch": 0.42, "grad_norm": 1.0050936937332153, "learning_rate": 6.442277592086135e-06, "loss": 0.5399, "step": 6703 }, { "epoch": 0.42, "grad_norm": 1.0384033918380737, "learning_rate": 6.4412951796552715e-06, "loss": 0.5786, "step": 6704 }, { "epoch": 0.42, "grad_norm": 1.0282918214797974, "learning_rate": 6.44031270653648e-06, "loss": 0.544, "step": 6705 }, { "epoch": 0.42, "grad_norm": 1.083512306213379, "learning_rate": 6.4393301727711296e-06, "loss": 0.6097, "step": 6706 }, { "epoch": 0.42, "grad_norm": 1.0070874691009521, "learning_rate": 6.438347578400595e-06, "loss": 0.5758, "step": 6707 }, { "epoch": 0.42, "grad_norm": 1.0564383268356323, "learning_rate": 6.437364923466247e-06, "loss": 0.5719, "step": 6708 }, { "epoch": 0.43, "grad_norm": 1.0171891450881958, "learning_rate": 6.436382208009463e-06, "loss": 0.5656, "step": 6709 }, { "epoch": 0.43, "grad_norm": 1.1269818544387817, "learning_rate": 6.43539943207162e-06, "loss": 0.54, "step": 6710 }, { "epoch": 0.43, "grad_norm": 0.9396026134490967, "learning_rate": 6.434416595694102e-06, "loss": 0.4855, "step": 6711 }, { "epoch": 0.43, "grad_norm": 0.9992225170135498, "learning_rate": 6.433433698918289e-06, "loss": 0.5318, "step": 6712 }, { "epoch": 0.43, "grad_norm": 1.0550096035003662, "learning_rate": 6.432450741785571e-06, "loss": 0.5177, "step": 6713 }, { "epoch": 0.43, "grad_norm": 1.1212995052337646, "learning_rate": 6.431467724337332e-06, "loss": 0.5223, "step": 6714 }, { "epoch": 0.43, "grad_norm": 0.9880232810974121, "learning_rate": 6.430484646614971e-06, "loss": 0.6145, "step": 6715 }, { "epoch": 0.43, "grad_norm": 0.9042444229125977, "learning_rate": 6.429501508659877e-06, "loss": 0.4942, "step": 6716 }, { "epoch": 0.43, "grad_norm": 1.032116174697876, "learning_rate": 6.428518310513446e-06, "loss": 0.5615, "step": 6717 }, { "epoch": 0.43, "grad_norm": 1.0590288639068604, "learning_rate": 6.427535052217078e-06, "loss": 0.5699, "step": 6718 }, { "epoch": 0.43, "grad_norm": 0.9387075304985046, "learning_rate": 6.4265517338121764e-06, "loss": 0.5472, "step": 6719 }, { "epoch": 0.43, "grad_norm": 1.0132668018341064, "learning_rate": 6.4255683553401435e-06, "loss": 0.5519, "step": 6720 }, { "epoch": 0.43, "grad_norm": 0.9345173239707947, "learning_rate": 6.424584916842387e-06, "loss": 0.5051, "step": 6721 }, { "epoch": 0.43, "grad_norm": 0.9697826504707336, "learning_rate": 6.423601418360314e-06, "loss": 0.5309, "step": 6722 }, { "epoch": 0.43, "grad_norm": 1.0445016622543335, "learning_rate": 6.4226178599353385e-06, "loss": 0.5459, "step": 6723 }, { "epoch": 0.43, "grad_norm": 1.0668346881866455, "learning_rate": 6.421634241608874e-06, "loss": 0.5311, "step": 6724 }, { "epoch": 0.43, "grad_norm": 1.009395956993103, "learning_rate": 6.420650563422337e-06, "loss": 0.5064, "step": 6725 }, { "epoch": 0.43, "grad_norm": 1.0648528337478638, "learning_rate": 6.419666825417147e-06, "loss": 0.5421, "step": 6726 }, { "epoch": 0.43, "grad_norm": 1.070974588394165, "learning_rate": 6.4186830276347246e-06, "loss": 0.523, "step": 6727 }, { "epoch": 0.43, "grad_norm": 1.0476374626159668, "learning_rate": 6.417699170116497e-06, "loss": 0.5602, "step": 6728 }, { "epoch": 0.43, "grad_norm": 1.0144678354263306, "learning_rate": 6.416715252903888e-06, "loss": 0.5607, "step": 6729 }, { "epoch": 0.43, "grad_norm": 1.0093867778778076, "learning_rate": 6.415731276038327e-06, "loss": 0.5283, "step": 6730 }, { "epoch": 0.43, "grad_norm": 0.955757737159729, "learning_rate": 6.414747239561249e-06, "loss": 0.5502, "step": 6731 }, { "epoch": 0.43, "grad_norm": 1.0207679271697998, "learning_rate": 6.413763143514086e-06, "loss": 0.5735, "step": 6732 }, { "epoch": 0.43, "grad_norm": 1.0862566232681274, "learning_rate": 6.412778987938273e-06, "loss": 0.5535, "step": 6733 }, { "epoch": 0.43, "grad_norm": 1.0303869247436523, "learning_rate": 6.411794772875253e-06, "loss": 0.5531, "step": 6734 }, { "epoch": 0.43, "grad_norm": 0.9645281434059143, "learning_rate": 6.4108104983664665e-06, "loss": 0.4851, "step": 6735 }, { "epoch": 0.43, "grad_norm": 1.0301026105880737, "learning_rate": 6.409826164453359e-06, "loss": 0.4651, "step": 6736 }, { "epoch": 0.43, "grad_norm": 1.0346624851226807, "learning_rate": 6.408841771177373e-06, "loss": 0.582, "step": 6737 }, { "epoch": 0.43, "grad_norm": 0.9744212627410889, "learning_rate": 6.407857318579963e-06, "loss": 0.4727, "step": 6738 }, { "epoch": 0.43, "grad_norm": 0.9972878098487854, "learning_rate": 6.4068728067025785e-06, "loss": 0.5673, "step": 6739 }, { "epoch": 0.43, "grad_norm": 1.027344822883606, "learning_rate": 6.405888235586676e-06, "loss": 0.5011, "step": 6740 }, { "epoch": 0.43, "grad_norm": 1.0794671773910522, "learning_rate": 6.4049036052737065e-06, "loss": 0.5509, "step": 6741 }, { "epoch": 0.43, "grad_norm": 1.0829273462295532, "learning_rate": 6.403918915805138e-06, "loss": 0.5341, "step": 6742 }, { "epoch": 0.43, "grad_norm": 1.0934503078460693, "learning_rate": 6.402934167222427e-06, "loss": 0.5575, "step": 6743 }, { "epoch": 0.43, "grad_norm": 1.0320874452590942, "learning_rate": 6.4019493595670365e-06, "loss": 0.4917, "step": 6744 }, { "epoch": 0.43, "grad_norm": 0.9831816554069519, "learning_rate": 6.400964492880437e-06, "loss": 0.5485, "step": 6745 }, { "epoch": 0.43, "grad_norm": 1.0676835775375366, "learning_rate": 6.399979567204096e-06, "loss": 0.5663, "step": 6746 }, { "epoch": 0.43, "grad_norm": 1.0714167356491089, "learning_rate": 6.398994582579485e-06, "loss": 0.5501, "step": 6747 }, { "epoch": 0.43, "grad_norm": 0.889264702796936, "learning_rate": 6.39800953904808e-06, "loss": 0.5037, "step": 6748 }, { "epoch": 0.43, "grad_norm": 0.9774421453475952, "learning_rate": 6.397024436651356e-06, "loss": 0.4996, "step": 6749 }, { "epoch": 0.43, "grad_norm": 1.0261812210083008, "learning_rate": 6.396039275430792e-06, "loss": 0.5452, "step": 6750 }, { "epoch": 0.43, "grad_norm": 1.1411161422729492, "learning_rate": 6.395054055427872e-06, "loss": 0.5884, "step": 6751 }, { "epoch": 0.43, "grad_norm": 0.9426940679550171, "learning_rate": 6.394068776684078e-06, "loss": 0.5264, "step": 6752 }, { "epoch": 0.43, "grad_norm": 0.9334375262260437, "learning_rate": 6.393083439240897e-06, "loss": 0.4932, "step": 6753 }, { "epoch": 0.43, "grad_norm": 0.9701194763183594, "learning_rate": 6.39209804313982e-06, "loss": 0.5225, "step": 6754 }, { "epoch": 0.43, "grad_norm": 0.9619608521461487, "learning_rate": 6.391112588422337e-06, "loss": 0.496, "step": 6755 }, { "epoch": 0.43, "grad_norm": 1.0969892740249634, "learning_rate": 6.390127075129941e-06, "loss": 0.5008, "step": 6756 }, { "epoch": 0.43, "grad_norm": 1.0912171602249146, "learning_rate": 6.38914150330413e-06, "loss": 0.5472, "step": 6757 }, { "epoch": 0.43, "grad_norm": 1.0904109477996826, "learning_rate": 6.388155872986404e-06, "loss": 0.5307, "step": 6758 }, { "epoch": 0.43, "grad_norm": 0.9923735857009888, "learning_rate": 6.3871701842182625e-06, "loss": 0.5192, "step": 6759 }, { "epoch": 0.43, "grad_norm": 0.9856535196304321, "learning_rate": 6.386184437041208e-06, "loss": 0.5701, "step": 6760 }, { "epoch": 0.43, "grad_norm": 1.0405540466308594, "learning_rate": 6.385198631496752e-06, "loss": 0.5626, "step": 6761 }, { "epoch": 0.43, "grad_norm": 0.9822201728820801, "learning_rate": 6.3842127676263995e-06, "loss": 0.5156, "step": 6762 }, { "epoch": 0.43, "grad_norm": 1.0651607513427734, "learning_rate": 6.383226845471663e-06, "loss": 0.5882, "step": 6763 }, { "epoch": 0.43, "grad_norm": 0.9878702163696289, "learning_rate": 6.382240865074055e-06, "loss": 0.5313, "step": 6764 }, { "epoch": 0.43, "grad_norm": 1.0117194652557373, "learning_rate": 6.381254826475093e-06, "loss": 0.5126, "step": 6765 }, { "epoch": 0.43, "grad_norm": 0.964489758014679, "learning_rate": 6.380268729716296e-06, "loss": 0.5657, "step": 6766 }, { "epoch": 0.43, "grad_norm": 1.0411863327026367, "learning_rate": 6.379282574839184e-06, "loss": 0.5442, "step": 6767 }, { "epoch": 0.43, "grad_norm": 0.9609441161155701, "learning_rate": 6.37829636188528e-06, "loss": 0.5049, "step": 6768 }, { "epoch": 0.43, "grad_norm": 1.0357297658920288, "learning_rate": 6.377310090896112e-06, "loss": 0.5303, "step": 6769 }, { "epoch": 0.43, "grad_norm": 1.0132602453231812, "learning_rate": 6.376323761913208e-06, "loss": 0.5077, "step": 6770 }, { "epoch": 0.43, "grad_norm": 1.058708667755127, "learning_rate": 6.375337374978097e-06, "loss": 0.5762, "step": 6771 }, { "epoch": 0.43, "grad_norm": 1.0159242153167725, "learning_rate": 6.374350930132313e-06, "loss": 0.5666, "step": 6772 }, { "epoch": 0.43, "grad_norm": 1.0299150943756104, "learning_rate": 6.373364427417395e-06, "loss": 0.5648, "step": 6773 }, { "epoch": 0.43, "grad_norm": 0.998892605304718, "learning_rate": 6.372377866874876e-06, "loss": 0.483, "step": 6774 }, { "epoch": 0.43, "grad_norm": 0.9861887693405151, "learning_rate": 6.371391248546299e-06, "loss": 0.5156, "step": 6775 }, { "epoch": 0.43, "grad_norm": 0.9344152808189392, "learning_rate": 6.370404572473209e-06, "loss": 0.5683, "step": 6776 }, { "epoch": 0.43, "grad_norm": 1.0953096151351929, "learning_rate": 6.36941783869715e-06, "loss": 0.609, "step": 6777 }, { "epoch": 0.43, "grad_norm": 0.9250005483627319, "learning_rate": 6.368431047259668e-06, "loss": 0.4682, "step": 6778 }, { "epoch": 0.43, "grad_norm": 1.148626685142517, "learning_rate": 6.367444198202315e-06, "loss": 0.5514, "step": 6779 }, { "epoch": 0.43, "grad_norm": 1.086868166923523, "learning_rate": 6.366457291566645e-06, "loss": 0.575, "step": 6780 }, { "epoch": 0.43, "grad_norm": 1.0445231199264526, "learning_rate": 6.365470327394212e-06, "loss": 0.5424, "step": 6781 }, { "epoch": 0.43, "grad_norm": 0.9501314163208008, "learning_rate": 6.3644833057265735e-06, "loss": 0.5533, "step": 6782 }, { "epoch": 0.43, "grad_norm": 0.951246976852417, "learning_rate": 6.363496226605289e-06, "loss": 0.528, "step": 6783 }, { "epoch": 0.43, "grad_norm": 1.1135523319244385, "learning_rate": 6.362509090071922e-06, "loss": 0.5157, "step": 6784 }, { "epoch": 0.43, "grad_norm": 1.0375339984893799, "learning_rate": 6.361521896168037e-06, "loss": 0.5384, "step": 6785 }, { "epoch": 0.43, "grad_norm": 0.9648322463035583, "learning_rate": 6.360534644935201e-06, "loss": 0.5344, "step": 6786 }, { "epoch": 0.43, "grad_norm": 1.0026075839996338, "learning_rate": 6.359547336414985e-06, "loss": 0.5257, "step": 6787 }, { "epoch": 0.43, "grad_norm": 1.063840627670288, "learning_rate": 6.358559970648958e-06, "loss": 0.5589, "step": 6788 }, { "epoch": 0.43, "grad_norm": 0.9657617211341858, "learning_rate": 6.357572547678701e-06, "loss": 0.5318, "step": 6789 }, { "epoch": 0.43, "grad_norm": 1.0497090816497803, "learning_rate": 6.356585067545784e-06, "loss": 0.5196, "step": 6790 }, { "epoch": 0.43, "grad_norm": 0.9832949638366699, "learning_rate": 6.355597530291788e-06, "loss": 0.5398, "step": 6791 }, { "epoch": 0.43, "grad_norm": 1.04899001121521, "learning_rate": 6.354609935958298e-06, "loss": 0.5216, "step": 6792 }, { "epoch": 0.43, "grad_norm": 1.1187092065811157, "learning_rate": 6.3536222845868934e-06, "loss": 0.5698, "step": 6793 }, { "epoch": 0.43, "grad_norm": 1.0921283960342407, "learning_rate": 6.3526345762191656e-06, "loss": 0.5633, "step": 6794 }, { "epoch": 0.43, "grad_norm": 0.9514185190200806, "learning_rate": 6.351646810896699e-06, "loss": 0.5154, "step": 6795 }, { "epoch": 0.43, "grad_norm": 0.9936259984970093, "learning_rate": 6.350658988661089e-06, "loss": 0.519, "step": 6796 }, { "epoch": 0.43, "grad_norm": 0.969613254070282, "learning_rate": 6.349671109553928e-06, "loss": 0.4938, "step": 6797 }, { "epoch": 0.43, "grad_norm": 1.0424509048461914, "learning_rate": 6.348683173616811e-06, "loss": 0.5087, "step": 6798 }, { "epoch": 0.43, "grad_norm": 0.9708645939826965, "learning_rate": 6.347695180891337e-06, "loss": 0.5396, "step": 6799 }, { "epoch": 0.43, "grad_norm": 0.9806487560272217, "learning_rate": 6.346707131419108e-06, "loss": 0.5133, "step": 6800 }, { "epoch": 0.43, "grad_norm": 1.017426609992981, "learning_rate": 6.345719025241725e-06, "loss": 0.5211, "step": 6801 }, { "epoch": 0.43, "grad_norm": 1.030496597290039, "learning_rate": 6.3447308624007964e-06, "loss": 0.5496, "step": 6802 }, { "epoch": 0.43, "grad_norm": 1.0744706392288208, "learning_rate": 6.343742642937929e-06, "loss": 0.4844, "step": 6803 }, { "epoch": 0.43, "grad_norm": 1.0421864986419678, "learning_rate": 6.342754366894735e-06, "loss": 0.5664, "step": 6804 }, { "epoch": 0.43, "grad_norm": 1.0138890743255615, "learning_rate": 6.341766034312824e-06, "loss": 0.5302, "step": 6805 }, { "epoch": 0.43, "grad_norm": 1.0514960289001465, "learning_rate": 6.340777645233811e-06, "loss": 0.5616, "step": 6806 }, { "epoch": 0.43, "grad_norm": 1.040167212486267, "learning_rate": 6.339789199699319e-06, "loss": 0.5204, "step": 6807 }, { "epoch": 0.43, "grad_norm": 1.0476608276367188, "learning_rate": 6.338800697750963e-06, "loss": 0.5165, "step": 6808 }, { "epoch": 0.43, "grad_norm": 0.9987275004386902, "learning_rate": 6.337812139430368e-06, "loss": 0.4884, "step": 6809 }, { "epoch": 0.43, "grad_norm": 1.1082664728164673, "learning_rate": 6.336823524779155e-06, "loss": 0.5455, "step": 6810 }, { "epoch": 0.43, "grad_norm": 0.9753262996673584, "learning_rate": 6.335834853838957e-06, "loss": 0.4923, "step": 6811 }, { "epoch": 0.43, "grad_norm": 1.0573155879974365, "learning_rate": 6.334846126651399e-06, "loss": 0.5637, "step": 6812 }, { "epoch": 0.43, "grad_norm": 1.116104006767273, "learning_rate": 6.333857343258115e-06, "loss": 0.5546, "step": 6813 }, { "epoch": 0.43, "grad_norm": 1.0665115118026733, "learning_rate": 6.3328685037007365e-06, "loss": 0.5216, "step": 6814 }, { "epoch": 0.43, "grad_norm": 1.1459187269210815, "learning_rate": 6.331879608020905e-06, "loss": 0.536, "step": 6815 }, { "epoch": 0.43, "grad_norm": 1.086716651916504, "learning_rate": 6.330890656260253e-06, "loss": 0.5948, "step": 6816 }, { "epoch": 0.43, "grad_norm": 1.0453628301620483, "learning_rate": 6.329901648460428e-06, "loss": 0.5709, "step": 6817 }, { "epoch": 0.43, "grad_norm": 1.0266021490097046, "learning_rate": 6.32891258466307e-06, "loss": 0.5359, "step": 6818 }, { "epoch": 0.43, "grad_norm": 0.9942297339439392, "learning_rate": 6.3279234649098265e-06, "loss": 0.5165, "step": 6819 }, { "epoch": 0.43, "grad_norm": 1.034275770187378, "learning_rate": 6.326934289242346e-06, "loss": 0.4993, "step": 6820 }, { "epoch": 0.43, "grad_norm": 1.0777255296707153, "learning_rate": 6.325945057702276e-06, "loss": 0.5784, "step": 6821 }, { "epoch": 0.43, "grad_norm": 0.9982571601867676, "learning_rate": 6.324955770331274e-06, "loss": 0.5562, "step": 6822 }, { "epoch": 0.43, "grad_norm": 0.9832544922828674, "learning_rate": 6.323966427170993e-06, "loss": 0.5012, "step": 6823 }, { "epoch": 0.43, "grad_norm": 1.1062462329864502, "learning_rate": 6.322977028263093e-06, "loss": 0.5411, "step": 6824 }, { "epoch": 0.43, "grad_norm": 1.1453473567962646, "learning_rate": 6.321987573649232e-06, "loss": 0.5131, "step": 6825 }, { "epoch": 0.43, "grad_norm": 0.9943939447402954, "learning_rate": 6.320998063371072e-06, "loss": 0.5, "step": 6826 }, { "epoch": 0.43, "grad_norm": 0.993712842464447, "learning_rate": 6.320008497470281e-06, "loss": 0.5696, "step": 6827 }, { "epoch": 0.43, "grad_norm": 1.0750510692596436, "learning_rate": 6.319018875988523e-06, "loss": 0.5619, "step": 6828 }, { "epoch": 0.43, "grad_norm": 1.065926194190979, "learning_rate": 6.318029198967468e-06, "loss": 0.5357, "step": 6829 }, { "epoch": 0.43, "grad_norm": 1.0730857849121094, "learning_rate": 6.317039466448789e-06, "loss": 0.5626, "step": 6830 }, { "epoch": 0.43, "grad_norm": 0.9925942420959473, "learning_rate": 6.316049678474159e-06, "loss": 0.5374, "step": 6831 }, { "epoch": 0.43, "grad_norm": 1.1061160564422607, "learning_rate": 6.315059835085257e-06, "loss": 0.5149, "step": 6832 }, { "epoch": 0.43, "grad_norm": 1.0566545724868774, "learning_rate": 6.314069936323759e-06, "loss": 0.5216, "step": 6833 }, { "epoch": 0.43, "grad_norm": 0.9762769341468811, "learning_rate": 6.313079982231347e-06, "loss": 0.5119, "step": 6834 }, { "epoch": 0.43, "grad_norm": 1.0013434886932373, "learning_rate": 6.312089972849707e-06, "loss": 0.546, "step": 6835 }, { "epoch": 0.43, "grad_norm": 0.9725648760795593, "learning_rate": 6.31109990822052e-06, "loss": 0.5382, "step": 6836 }, { "epoch": 0.43, "grad_norm": 0.9421480894088745, "learning_rate": 6.3101097883854765e-06, "loss": 0.5517, "step": 6837 }, { "epoch": 0.43, "grad_norm": 1.1255269050598145, "learning_rate": 6.30911961338627e-06, "loss": 0.5163, "step": 6838 }, { "epoch": 0.43, "grad_norm": 1.0309721231460571, "learning_rate": 6.3081293832645896e-06, "loss": 0.5258, "step": 6839 }, { "epoch": 0.43, "grad_norm": 1.0831512212753296, "learning_rate": 6.30713909806213e-06, "loss": 0.486, "step": 6840 }, { "epoch": 0.43, "grad_norm": 0.9742302298545837, "learning_rate": 6.306148757820591e-06, "loss": 0.5228, "step": 6841 }, { "epoch": 0.43, "grad_norm": 0.971137523651123, "learning_rate": 6.3051583625816725e-06, "loss": 0.4978, "step": 6842 }, { "epoch": 0.43, "grad_norm": 0.9712395668029785, "learning_rate": 6.304167912387076e-06, "loss": 0.4797, "step": 6843 }, { "epoch": 0.43, "grad_norm": 1.0512776374816895, "learning_rate": 6.303177407278504e-06, "loss": 0.5436, "step": 6844 }, { "epoch": 0.43, "grad_norm": 0.9775992631912231, "learning_rate": 6.302186847297666e-06, "loss": 0.4938, "step": 6845 }, { "epoch": 0.43, "grad_norm": 1.0109450817108154, "learning_rate": 6.301196232486269e-06, "loss": 0.5052, "step": 6846 }, { "epoch": 0.43, "grad_norm": 1.024448275566101, "learning_rate": 6.300205562886026e-06, "loss": 0.4826, "step": 6847 }, { "epoch": 0.43, "grad_norm": 1.0678684711456299, "learning_rate": 6.29921483853865e-06, "loss": 0.5418, "step": 6848 }, { "epoch": 0.43, "grad_norm": 0.9749902486801147, "learning_rate": 6.298224059485856e-06, "loss": 0.5008, "step": 6849 }, { "epoch": 0.43, "grad_norm": 0.9713776111602783, "learning_rate": 6.297233225769363e-06, "loss": 0.5541, "step": 6850 }, { "epoch": 0.43, "grad_norm": 1.0189266204833984, "learning_rate": 6.296242337430892e-06, "loss": 0.4715, "step": 6851 }, { "epoch": 0.43, "grad_norm": 0.994621753692627, "learning_rate": 6.2952513945121654e-06, "loss": 0.5534, "step": 6852 }, { "epoch": 0.43, "grad_norm": 0.974395215511322, "learning_rate": 6.2942603970549075e-06, "loss": 0.514, "step": 6853 }, { "epoch": 0.43, "grad_norm": 0.9735104441642761, "learning_rate": 6.293269345100849e-06, "loss": 0.5116, "step": 6854 }, { "epoch": 0.43, "grad_norm": 0.9252398610115051, "learning_rate": 6.292278238691715e-06, "loss": 0.5395, "step": 6855 }, { "epoch": 0.43, "grad_norm": 0.9296538829803467, "learning_rate": 6.29128707786924e-06, "loss": 0.5244, "step": 6856 }, { "epoch": 0.43, "grad_norm": 1.0363876819610596, "learning_rate": 6.29029586267516e-06, "loss": 0.5, "step": 6857 }, { "epoch": 0.43, "grad_norm": 1.0339961051940918, "learning_rate": 6.289304593151209e-06, "loss": 0.5435, "step": 6858 }, { "epoch": 0.43, "grad_norm": 0.9722663760185242, "learning_rate": 6.288313269339126e-06, "loss": 0.5263, "step": 6859 }, { "epoch": 0.43, "grad_norm": 0.9953331351280212, "learning_rate": 6.287321891280653e-06, "loss": 0.5067, "step": 6860 }, { "epoch": 0.43, "grad_norm": 1.011223316192627, "learning_rate": 6.2863304590175335e-06, "loss": 0.522, "step": 6861 }, { "epoch": 0.43, "grad_norm": 0.9478493928909302, "learning_rate": 6.2853389725915146e-06, "loss": 0.5295, "step": 6862 }, { "epoch": 0.43, "grad_norm": 0.960415780544281, "learning_rate": 6.284347432044342e-06, "loss": 0.538, "step": 6863 }, { "epoch": 0.43, "grad_norm": 1.0387394428253174, "learning_rate": 6.2833558374177664e-06, "loss": 0.5375, "step": 6864 }, { "epoch": 0.43, "grad_norm": 0.9971448183059692, "learning_rate": 6.282364188753541e-06, "loss": 0.5221, "step": 6865 }, { "epoch": 0.43, "grad_norm": 0.9863330721855164, "learning_rate": 6.281372486093422e-06, "loss": 0.557, "step": 6866 }, { "epoch": 0.44, "grad_norm": 1.0580681562423706, "learning_rate": 6.280380729479164e-06, "loss": 0.5088, "step": 6867 }, { "epoch": 0.44, "grad_norm": 1.0422565937042236, "learning_rate": 6.279388918952527e-06, "loss": 0.544, "step": 6868 }, { "epoch": 0.44, "grad_norm": 1.0941506624221802, "learning_rate": 6.278397054555275e-06, "loss": 0.5481, "step": 6869 }, { "epoch": 0.44, "grad_norm": 1.0862221717834473, "learning_rate": 6.277405136329169e-06, "loss": 0.5898, "step": 6870 }, { "epoch": 0.44, "grad_norm": 0.996271550655365, "learning_rate": 6.276413164315978e-06, "loss": 0.4989, "step": 6871 }, { "epoch": 0.44, "grad_norm": 0.9512807726860046, "learning_rate": 6.2754211385574674e-06, "loss": 0.5268, "step": 6872 }, { "epoch": 0.44, "grad_norm": 1.0293751955032349, "learning_rate": 6.274429059095411e-06, "loss": 0.5562, "step": 6873 }, { "epoch": 0.44, "grad_norm": 1.046460509300232, "learning_rate": 6.273436925971578e-06, "loss": 0.537, "step": 6874 }, { "epoch": 0.44, "grad_norm": 0.9507560729980469, "learning_rate": 6.272444739227748e-06, "loss": 0.4754, "step": 6875 }, { "epoch": 0.44, "grad_norm": 1.004612684249878, "learning_rate": 6.2714524989056945e-06, "loss": 0.5451, "step": 6876 }, { "epoch": 0.44, "grad_norm": 1.1527137756347656, "learning_rate": 6.270460205047202e-06, "loss": 0.5796, "step": 6877 }, { "epoch": 0.44, "grad_norm": 0.9972601532936096, "learning_rate": 6.269467857694047e-06, "loss": 0.4971, "step": 6878 }, { "epoch": 0.44, "grad_norm": 1.0107241868972778, "learning_rate": 6.268475456888019e-06, "loss": 0.5368, "step": 6879 }, { "epoch": 0.44, "grad_norm": 0.9781653881072998, "learning_rate": 6.2674830026709014e-06, "loss": 0.5635, "step": 6880 }, { "epoch": 0.44, "grad_norm": 1.0893415212631226, "learning_rate": 6.266490495084484e-06, "loss": 0.5249, "step": 6881 }, { "epoch": 0.44, "grad_norm": 1.066577672958374, "learning_rate": 6.265497934170559e-06, "loss": 0.5358, "step": 6882 }, { "epoch": 0.44, "grad_norm": 1.0110509395599365, "learning_rate": 6.264505319970915e-06, "loss": 0.5135, "step": 6883 }, { "epoch": 0.44, "grad_norm": 0.8866680264472961, "learning_rate": 6.263512652527353e-06, "loss": 0.5053, "step": 6884 }, { "epoch": 0.44, "grad_norm": 1.0273590087890625, "learning_rate": 6.262519931881669e-06, "loss": 0.5519, "step": 6885 }, { "epoch": 0.44, "grad_norm": 1.0803920030593872, "learning_rate": 6.261527158075662e-06, "loss": 0.5404, "step": 6886 }, { "epoch": 0.44, "grad_norm": 0.9233646392822266, "learning_rate": 6.260534331151133e-06, "loss": 0.4459, "step": 6887 }, { "epoch": 0.44, "grad_norm": 1.0693546533584595, "learning_rate": 6.259541451149892e-06, "loss": 0.4995, "step": 6888 }, { "epoch": 0.44, "grad_norm": 1.0313082933425903, "learning_rate": 6.258548518113741e-06, "loss": 0.5383, "step": 6889 }, { "epoch": 0.44, "grad_norm": 0.9983394145965576, "learning_rate": 6.257555532084489e-06, "loss": 0.4719, "step": 6890 }, { "epoch": 0.44, "grad_norm": 1.0172432661056519, "learning_rate": 6.2565624931039485e-06, "loss": 0.5472, "step": 6891 }, { "epoch": 0.44, "grad_norm": 1.0359172821044922, "learning_rate": 6.255569401213933e-06, "loss": 0.5452, "step": 6892 }, { "epoch": 0.44, "grad_norm": 1.0232447385787964, "learning_rate": 6.254576256456257e-06, "loss": 0.5078, "step": 6893 }, { "epoch": 0.44, "grad_norm": 0.8931428790092468, "learning_rate": 6.253583058872741e-06, "loss": 0.5009, "step": 6894 }, { "epoch": 0.44, "grad_norm": 0.9559874534606934, "learning_rate": 6.2525898085052005e-06, "loss": 0.5176, "step": 6895 }, { "epoch": 0.44, "grad_norm": 1.0395197868347168, "learning_rate": 6.251596505395463e-06, "loss": 0.5279, "step": 6896 }, { "epoch": 0.44, "grad_norm": 1.0660779476165771, "learning_rate": 6.25060314958535e-06, "loss": 0.5436, "step": 6897 }, { "epoch": 0.44, "grad_norm": 1.0156817436218262, "learning_rate": 6.249609741116689e-06, "loss": 0.5115, "step": 6898 }, { "epoch": 0.44, "grad_norm": 1.0072088241577148, "learning_rate": 6.24861628003131e-06, "loss": 0.5693, "step": 6899 }, { "epoch": 0.44, "grad_norm": 1.0126852989196777, "learning_rate": 6.247622766371041e-06, "loss": 0.5165, "step": 6900 }, { "epoch": 0.44, "grad_norm": 1.0487877130508423, "learning_rate": 6.246629200177718e-06, "loss": 0.571, "step": 6901 }, { "epoch": 0.44, "grad_norm": 1.0167573690414429, "learning_rate": 6.245635581493176e-06, "loss": 0.5, "step": 6902 }, { "epoch": 0.44, "grad_norm": 1.094506025314331, "learning_rate": 6.244641910359254e-06, "loss": 0.6057, "step": 6903 }, { "epoch": 0.44, "grad_norm": 1.0023902654647827, "learning_rate": 6.24364818681779e-06, "loss": 0.5305, "step": 6904 }, { "epoch": 0.44, "grad_norm": 0.9998495578765869, "learning_rate": 6.242654410910628e-06, "loss": 0.5083, "step": 6905 }, { "epoch": 0.44, "grad_norm": 1.012750267982483, "learning_rate": 6.2416605826796095e-06, "loss": 0.5704, "step": 6906 }, { "epoch": 0.44, "grad_norm": 1.027174472808838, "learning_rate": 6.240666702166587e-06, "loss": 0.5447, "step": 6907 }, { "epoch": 0.44, "grad_norm": 0.9723641276359558, "learning_rate": 6.239672769413403e-06, "loss": 0.5385, "step": 6908 }, { "epoch": 0.44, "grad_norm": 0.960335910320282, "learning_rate": 6.238678784461913e-06, "loss": 0.4809, "step": 6909 }, { "epoch": 0.44, "grad_norm": 1.0493924617767334, "learning_rate": 6.237684747353965e-06, "loss": 0.5192, "step": 6910 }, { "epoch": 0.44, "grad_norm": 0.9411200881004333, "learning_rate": 6.23669065813142e-06, "loss": 0.5224, "step": 6911 }, { "epoch": 0.44, "grad_norm": 0.9179388284683228, "learning_rate": 6.235696516836134e-06, "loss": 0.5309, "step": 6912 }, { "epoch": 0.44, "grad_norm": 1.0288889408111572, "learning_rate": 6.234702323509967e-06, "loss": 0.5611, "step": 6913 }, { "epoch": 0.44, "grad_norm": 1.042685627937317, "learning_rate": 6.233708078194778e-06, "loss": 0.5618, "step": 6914 }, { "epoch": 0.44, "grad_norm": 1.0587626695632935, "learning_rate": 6.232713780932434e-06, "loss": 0.5725, "step": 6915 }, { "epoch": 0.44, "grad_norm": 1.0170725584030151, "learning_rate": 6.231719431764804e-06, "loss": 0.5366, "step": 6916 }, { "epoch": 0.44, "grad_norm": 1.0194566249847412, "learning_rate": 6.230725030733751e-06, "loss": 0.562, "step": 6917 }, { "epoch": 0.44, "grad_norm": 1.0699495077133179, "learning_rate": 6.229730577881148e-06, "loss": 0.5892, "step": 6918 }, { "epoch": 0.44, "grad_norm": 0.9561670422554016, "learning_rate": 6.2287360732488685e-06, "loss": 0.474, "step": 6919 }, { "epoch": 0.44, "grad_norm": 1.077793002128601, "learning_rate": 6.227741516878789e-06, "loss": 0.5547, "step": 6920 }, { "epoch": 0.44, "grad_norm": 1.0175807476043701, "learning_rate": 6.226746908812784e-06, "loss": 0.5531, "step": 6921 }, { "epoch": 0.44, "grad_norm": 1.2047051191329956, "learning_rate": 6.225752249092734e-06, "loss": 0.5856, "step": 6922 }, { "epoch": 0.44, "grad_norm": 1.1163352727890015, "learning_rate": 6.224757537760521e-06, "loss": 0.5526, "step": 6923 }, { "epoch": 0.44, "grad_norm": 1.0539308786392212, "learning_rate": 6.2237627748580294e-06, "loss": 0.5748, "step": 6924 }, { "epoch": 0.44, "grad_norm": 1.031957745552063, "learning_rate": 6.222767960427144e-06, "loss": 0.5798, "step": 6925 }, { "epoch": 0.44, "grad_norm": 1.0769531726837158, "learning_rate": 6.221773094509753e-06, "loss": 0.5501, "step": 6926 }, { "epoch": 0.44, "grad_norm": 0.992579996585846, "learning_rate": 6.220778177147747e-06, "loss": 0.5198, "step": 6927 }, { "epoch": 0.44, "grad_norm": 1.0876187086105347, "learning_rate": 6.219783208383021e-06, "loss": 0.5047, "step": 6928 }, { "epoch": 0.44, "grad_norm": 1.100014090538025, "learning_rate": 6.218788188257465e-06, "loss": 0.5257, "step": 6929 }, { "epoch": 0.44, "grad_norm": 0.9707440137863159, "learning_rate": 6.217793116812979e-06, "loss": 0.492, "step": 6930 }, { "epoch": 0.44, "grad_norm": 1.0497099161148071, "learning_rate": 6.216797994091462e-06, "loss": 0.5627, "step": 6931 }, { "epoch": 0.44, "grad_norm": 0.900424063205719, "learning_rate": 6.215802820134814e-06, "loss": 0.4742, "step": 6932 }, { "epoch": 0.44, "grad_norm": 0.9930451512336731, "learning_rate": 6.214807594984939e-06, "loss": 0.5533, "step": 6933 }, { "epoch": 0.44, "grad_norm": 1.0777264833450317, "learning_rate": 6.213812318683741e-06, "loss": 0.5664, "step": 6934 }, { "epoch": 0.44, "grad_norm": 1.008448839187622, "learning_rate": 6.2128169912731295e-06, "loss": 0.562, "step": 6935 }, { "epoch": 0.44, "grad_norm": 1.102477788925171, "learning_rate": 6.211821612795014e-06, "loss": 0.53, "step": 6936 }, { "epoch": 0.44, "grad_norm": 1.0119174718856812, "learning_rate": 6.210826183291305e-06, "loss": 0.5147, "step": 6937 }, { "epoch": 0.44, "grad_norm": 0.9260329008102417, "learning_rate": 6.209830702803918e-06, "loss": 0.4912, "step": 6938 }, { "epoch": 0.44, "grad_norm": 1.0289298295974731, "learning_rate": 6.208835171374769e-06, "loss": 0.5524, "step": 6939 }, { "epoch": 0.44, "grad_norm": 1.0635002851486206, "learning_rate": 6.207839589045777e-06, "loss": 0.5207, "step": 6940 }, { "epoch": 0.44, "grad_norm": 1.086357593536377, "learning_rate": 6.20684395585886e-06, "loss": 0.5053, "step": 6941 }, { "epoch": 0.44, "grad_norm": 1.067955493927002, "learning_rate": 6.205848271855943e-06, "loss": 0.5373, "step": 6942 }, { "epoch": 0.44, "grad_norm": 1.1029582023620605, "learning_rate": 6.204852537078952e-06, "loss": 0.5356, "step": 6943 }, { "epoch": 0.44, "grad_norm": 1.0627195835113525, "learning_rate": 6.203856751569809e-06, "loss": 0.6011, "step": 6944 }, { "epoch": 0.44, "grad_norm": 1.0238324403762817, "learning_rate": 6.202860915370447e-06, "loss": 0.5398, "step": 6945 }, { "epoch": 0.44, "grad_norm": 0.9644851684570312, "learning_rate": 6.201865028522798e-06, "loss": 0.4662, "step": 6946 }, { "epoch": 0.44, "grad_norm": 1.0515786409378052, "learning_rate": 6.200869091068791e-06, "loss": 0.5293, "step": 6947 }, { "epoch": 0.44, "grad_norm": 1.0306024551391602, "learning_rate": 6.1998731030503655e-06, "loss": 0.4727, "step": 6948 }, { "epoch": 0.44, "grad_norm": 1.0533422231674194, "learning_rate": 6.198877064509458e-06, "loss": 0.5211, "step": 6949 }, { "epoch": 0.44, "grad_norm": 1.1155869960784912, "learning_rate": 6.1978809754880076e-06, "loss": 0.565, "step": 6950 }, { "epoch": 0.44, "grad_norm": 1.0015124082565308, "learning_rate": 6.196884836027957e-06, "loss": 0.4974, "step": 6951 }, { "epoch": 0.44, "grad_norm": 0.9570860862731934, "learning_rate": 6.195888646171247e-06, "loss": 0.4626, "step": 6952 }, { "epoch": 0.44, "grad_norm": 0.9626646637916565, "learning_rate": 6.194892405959829e-06, "loss": 0.5341, "step": 6953 }, { "epoch": 0.44, "grad_norm": 1.0123460292816162, "learning_rate": 6.193896115435648e-06, "loss": 0.5306, "step": 6954 }, { "epoch": 0.44, "grad_norm": 0.9832209348678589, "learning_rate": 6.192899774640655e-06, "loss": 0.5549, "step": 6955 }, { "epoch": 0.44, "grad_norm": 1.0607764720916748, "learning_rate": 6.191903383616801e-06, "loss": 0.5334, "step": 6956 }, { "epoch": 0.44, "grad_norm": 0.986181378364563, "learning_rate": 6.190906942406043e-06, "loss": 0.4941, "step": 6957 }, { "epoch": 0.44, "grad_norm": 1.0732365846633911, "learning_rate": 6.189910451050336e-06, "loss": 0.5171, "step": 6958 }, { "epoch": 0.44, "grad_norm": 1.1563911437988281, "learning_rate": 6.1889139095916395e-06, "loss": 0.5898, "step": 6959 }, { "epoch": 0.44, "grad_norm": 1.0473549365997314, "learning_rate": 6.187917318071914e-06, "loss": 0.5564, "step": 6960 }, { "epoch": 0.44, "grad_norm": 0.9854289889335632, "learning_rate": 6.1869206765331234e-06, "loss": 0.5256, "step": 6961 }, { "epoch": 0.44, "grad_norm": 0.9489195942878723, "learning_rate": 6.1859239850172325e-06, "loss": 0.5141, "step": 6962 }, { "epoch": 0.44, "grad_norm": 0.9980160593986511, "learning_rate": 6.1849272435662065e-06, "loss": 0.5251, "step": 6963 }, { "epoch": 0.44, "grad_norm": 1.0156224966049194, "learning_rate": 6.183930452222017e-06, "loss": 0.5046, "step": 6964 }, { "epoch": 0.44, "grad_norm": 1.0200015306472778, "learning_rate": 6.1829336110266356e-06, "loss": 0.5237, "step": 6965 }, { "epoch": 0.44, "grad_norm": 0.9782587289810181, "learning_rate": 6.181936720022033e-06, "loss": 0.5378, "step": 6966 }, { "epoch": 0.44, "grad_norm": 1.067857027053833, "learning_rate": 6.180939779250188e-06, "loss": 0.5529, "step": 6967 }, { "epoch": 0.44, "grad_norm": 1.0193872451782227, "learning_rate": 6.179942788753077e-06, "loss": 0.5597, "step": 6968 }, { "epoch": 0.44, "grad_norm": 1.2080793380737305, "learning_rate": 6.178945748572681e-06, "loss": 0.5478, "step": 6969 }, { "epoch": 0.44, "grad_norm": 1.0767008066177368, "learning_rate": 6.177948658750979e-06, "loss": 0.465, "step": 6970 }, { "epoch": 0.44, "grad_norm": 1.0378178358078003, "learning_rate": 6.176951519329958e-06, "loss": 0.5356, "step": 6971 }, { "epoch": 0.44, "grad_norm": 1.094118595123291, "learning_rate": 6.1759543303516025e-06, "loss": 0.5179, "step": 6972 }, { "epoch": 0.44, "grad_norm": 1.1048043966293335, "learning_rate": 6.174957091857901e-06, "loss": 0.5683, "step": 6973 }, { "epoch": 0.44, "grad_norm": 0.9769123792648315, "learning_rate": 6.173959803890843e-06, "loss": 0.5302, "step": 6974 }, { "epoch": 0.44, "grad_norm": 1.1159851551055908, "learning_rate": 6.172962466492423e-06, "loss": 0.5453, "step": 6975 }, { "epoch": 0.44, "grad_norm": 0.9689269661903381, "learning_rate": 6.171965079704634e-06, "loss": 0.5562, "step": 6976 }, { "epoch": 0.44, "grad_norm": 1.0246952772140503, "learning_rate": 6.17096764356947e-06, "loss": 0.5288, "step": 6977 }, { "epoch": 0.44, "grad_norm": 0.9824046492576599, "learning_rate": 6.169970158128935e-06, "loss": 0.5636, "step": 6978 }, { "epoch": 0.44, "grad_norm": 1.055438756942749, "learning_rate": 6.168972623425023e-06, "loss": 0.5349, "step": 6979 }, { "epoch": 0.44, "grad_norm": 1.0025379657745361, "learning_rate": 6.167975039499744e-06, "loss": 0.5189, "step": 6980 }, { "epoch": 0.44, "grad_norm": 0.9753031134605408, "learning_rate": 6.1669774063950985e-06, "loss": 0.6042, "step": 6981 }, { "epoch": 0.44, "grad_norm": 1.338631510734558, "learning_rate": 6.165979724153094e-06, "loss": 0.5728, "step": 6982 }, { "epoch": 0.44, "grad_norm": 1.0687596797943115, "learning_rate": 6.164981992815737e-06, "loss": 0.5211, "step": 6983 }, { "epoch": 0.44, "grad_norm": 0.9963400363922119, "learning_rate": 6.163984212425043e-06, "loss": 0.5205, "step": 6984 }, { "epoch": 0.44, "grad_norm": 1.1149332523345947, "learning_rate": 6.162986383023023e-06, "loss": 0.5527, "step": 6985 }, { "epoch": 0.44, "grad_norm": 1.0832549333572388, "learning_rate": 6.161988504651692e-06, "loss": 0.5406, "step": 6986 }, { "epoch": 0.44, "grad_norm": 1.0374681949615479, "learning_rate": 6.160990577353066e-06, "loss": 0.5424, "step": 6987 }, { "epoch": 0.44, "grad_norm": 0.9844253659248352, "learning_rate": 6.1599926011691695e-06, "loss": 0.5301, "step": 6988 }, { "epoch": 0.44, "grad_norm": 1.1734834909439087, "learning_rate": 6.1589945761420166e-06, "loss": 0.5422, "step": 6989 }, { "epoch": 0.44, "grad_norm": 0.9257333278656006, "learning_rate": 6.157996502313635e-06, "loss": 0.5301, "step": 6990 }, { "epoch": 0.44, "grad_norm": 0.9583601355552673, "learning_rate": 6.156998379726048e-06, "loss": 0.4917, "step": 6991 }, { "epoch": 0.44, "grad_norm": 1.039122462272644, "learning_rate": 6.1560002084212845e-06, "loss": 0.5045, "step": 6992 }, { "epoch": 0.44, "grad_norm": 0.9951550364494324, "learning_rate": 6.155001988441375e-06, "loss": 0.513, "step": 6993 }, { "epoch": 0.44, "grad_norm": 1.099072813987732, "learning_rate": 6.154003719828349e-06, "loss": 0.5187, "step": 6994 }, { "epoch": 0.44, "grad_norm": 1.0065536499023438, "learning_rate": 6.1530054026242405e-06, "loss": 0.466, "step": 6995 }, { "epoch": 0.44, "grad_norm": 0.9675000905990601, "learning_rate": 6.152007036871085e-06, "loss": 0.4686, "step": 6996 }, { "epoch": 0.44, "grad_norm": 0.9593127369880676, "learning_rate": 6.151008622610921e-06, "loss": 0.5441, "step": 6997 }, { "epoch": 0.44, "grad_norm": 1.01827871799469, "learning_rate": 6.150010159885789e-06, "loss": 0.4729, "step": 6998 }, { "epoch": 0.44, "grad_norm": 1.112960934638977, "learning_rate": 6.149011648737728e-06, "loss": 0.5966, "step": 6999 }, { "epoch": 0.44, "grad_norm": 1.0497690439224243, "learning_rate": 6.148013089208784e-06, "loss": 0.5707, "step": 7000 }, { "epoch": 0.44, "grad_norm": 1.0520048141479492, "learning_rate": 6.1470144813410045e-06, "loss": 0.5515, "step": 7001 }, { "epoch": 0.44, "grad_norm": 1.0527669191360474, "learning_rate": 6.146015825176432e-06, "loss": 0.5006, "step": 7002 }, { "epoch": 0.44, "grad_norm": 1.03863525390625, "learning_rate": 6.145017120757123e-06, "loss": 0.5391, "step": 7003 }, { "epoch": 0.44, "grad_norm": 0.9999639987945557, "learning_rate": 6.144018368125124e-06, "loss": 0.5037, "step": 7004 }, { "epoch": 0.44, "grad_norm": 1.0270698070526123, "learning_rate": 6.143019567322493e-06, "loss": 0.4914, "step": 7005 }, { "epoch": 0.44, "grad_norm": 1.092484951019287, "learning_rate": 6.1420207183912824e-06, "loss": 0.5923, "step": 7006 }, { "epoch": 0.44, "grad_norm": 0.9754437804222107, "learning_rate": 6.141021821373555e-06, "loss": 0.5353, "step": 7007 }, { "epoch": 0.44, "grad_norm": 1.034129023551941, "learning_rate": 6.140022876311367e-06, "loss": 0.5276, "step": 7008 }, { "epoch": 0.44, "grad_norm": 0.9061996936798096, "learning_rate": 6.139023883246781e-06, "loss": 0.4921, "step": 7009 }, { "epoch": 0.44, "grad_norm": 1.0841065645217896, "learning_rate": 6.1380248422218604e-06, "loss": 0.465, "step": 7010 }, { "epoch": 0.44, "grad_norm": 1.0482650995254517, "learning_rate": 6.137025753278673e-06, "loss": 0.5501, "step": 7011 }, { "epoch": 0.44, "grad_norm": 1.004610538482666, "learning_rate": 6.1360266164592886e-06, "loss": 0.5599, "step": 7012 }, { "epoch": 0.44, "grad_norm": 1.0636749267578125, "learning_rate": 6.135027431805774e-06, "loss": 0.561, "step": 7013 }, { "epoch": 0.44, "grad_norm": 0.9683583378791809, "learning_rate": 6.134028199360203e-06, "loss": 0.4905, "step": 7014 }, { "epoch": 0.44, "grad_norm": 1.003348708152771, "learning_rate": 6.133028919164647e-06, "loss": 0.5758, "step": 7015 }, { "epoch": 0.44, "grad_norm": 0.9529774785041809, "learning_rate": 6.132029591261188e-06, "loss": 0.5214, "step": 7016 }, { "epoch": 0.44, "grad_norm": 1.1036036014556885, "learning_rate": 6.1310302156919e-06, "loss": 0.5496, "step": 7017 }, { "epoch": 0.44, "grad_norm": 1.0264195203781128, "learning_rate": 6.130030792498865e-06, "loss": 0.5335, "step": 7018 }, { "epoch": 0.44, "grad_norm": 1.0804702043533325, "learning_rate": 6.129031321724163e-06, "loss": 0.5226, "step": 7019 }, { "epoch": 0.44, "grad_norm": 1.05149245262146, "learning_rate": 6.128031803409881e-06, "loss": 0.5137, "step": 7020 }, { "epoch": 0.44, "grad_norm": 1.036487340927124, "learning_rate": 6.127032237598102e-06, "loss": 0.5587, "step": 7021 }, { "epoch": 0.44, "grad_norm": 0.9598145484924316, "learning_rate": 6.126032624330917e-06, "loss": 0.5237, "step": 7022 }, { "epoch": 0.44, "grad_norm": 1.0859782695770264, "learning_rate": 6.125032963650417e-06, "loss": 0.5699, "step": 7023 }, { "epoch": 0.45, "grad_norm": 1.0361154079437256, "learning_rate": 6.124033255598691e-06, "loss": 0.5027, "step": 7024 }, { "epoch": 0.45, "grad_norm": 1.0015485286712646, "learning_rate": 6.1230335002178345e-06, "loss": 0.5339, "step": 7025 }, { "epoch": 0.45, "grad_norm": 1.0793373584747314, "learning_rate": 6.1220336975499435e-06, "loss": 0.5159, "step": 7026 }, { "epoch": 0.45, "grad_norm": 1.102267861366272, "learning_rate": 6.121033847637119e-06, "loss": 0.5646, "step": 7027 }, { "epoch": 0.45, "grad_norm": 1.0097570419311523, "learning_rate": 6.120033950521458e-06, "loss": 0.5047, "step": 7028 }, { "epoch": 0.45, "grad_norm": 0.9865574836730957, "learning_rate": 6.119034006245063e-06, "loss": 0.5083, "step": 7029 }, { "epoch": 0.45, "grad_norm": 1.0643231868743896, "learning_rate": 6.118034014850039e-06, "loss": 0.5583, "step": 7030 }, { "epoch": 0.45, "grad_norm": 0.9546524882316589, "learning_rate": 6.117033976378493e-06, "loss": 0.4621, "step": 7031 }, { "epoch": 0.45, "grad_norm": 1.061911940574646, "learning_rate": 6.116033890872531e-06, "loss": 0.5029, "step": 7032 }, { "epoch": 0.45, "grad_norm": 1.0499768257141113, "learning_rate": 6.115033758374265e-06, "loss": 0.5618, "step": 7033 }, { "epoch": 0.45, "grad_norm": 0.9878824353218079, "learning_rate": 6.114033578925805e-06, "loss": 0.5288, "step": 7034 }, { "epoch": 0.45, "grad_norm": 1.0290721654891968, "learning_rate": 6.1130333525692684e-06, "loss": 0.5213, "step": 7035 }, { "epoch": 0.45, "grad_norm": 0.9989311099052429, "learning_rate": 6.112033079346767e-06, "loss": 0.5193, "step": 7036 }, { "epoch": 0.45, "grad_norm": 0.9303186535835266, "learning_rate": 6.111032759300423e-06, "loss": 0.5508, "step": 7037 }, { "epoch": 0.45, "grad_norm": 0.9461025595664978, "learning_rate": 6.110032392472354e-06, "loss": 0.5139, "step": 7038 }, { "epoch": 0.45, "grad_norm": 0.9654031991958618, "learning_rate": 6.109031978904683e-06, "loss": 0.5021, "step": 7039 }, { "epoch": 0.45, "grad_norm": 0.9956388473510742, "learning_rate": 6.108031518639532e-06, "loss": 0.5524, "step": 7040 }, { "epoch": 0.45, "grad_norm": 1.084328532218933, "learning_rate": 6.107031011719029e-06, "loss": 0.6076, "step": 7041 }, { "epoch": 0.45, "grad_norm": 0.9746370911598206, "learning_rate": 6.106030458185303e-06, "loss": 0.5185, "step": 7042 }, { "epoch": 0.45, "grad_norm": 1.0461184978485107, "learning_rate": 6.105029858080479e-06, "loss": 0.5268, "step": 7043 }, { "epoch": 0.45, "grad_norm": 0.9482165575027466, "learning_rate": 6.1040292114466935e-06, "loss": 0.5076, "step": 7044 }, { "epoch": 0.45, "grad_norm": 1.044372797012329, "learning_rate": 6.103028518326077e-06, "loss": 0.5106, "step": 7045 }, { "epoch": 0.45, "grad_norm": 0.982097864151001, "learning_rate": 6.102027778760769e-06, "loss": 0.5403, "step": 7046 }, { "epoch": 0.45, "grad_norm": 1.0360002517700195, "learning_rate": 6.101026992792904e-06, "loss": 0.5853, "step": 7047 }, { "epoch": 0.45, "grad_norm": 0.9550833702087402, "learning_rate": 6.100026160464621e-06, "loss": 0.511, "step": 7048 }, { "epoch": 0.45, "grad_norm": 0.9916879534721375, "learning_rate": 6.099025281818065e-06, "loss": 0.5023, "step": 7049 }, { "epoch": 0.45, "grad_norm": 1.0016727447509766, "learning_rate": 6.098024356895378e-06, "loss": 0.514, "step": 7050 }, { "epoch": 0.45, "grad_norm": 0.995428740978241, "learning_rate": 6.097023385738704e-06, "loss": 0.4829, "step": 7051 }, { "epoch": 0.45, "grad_norm": 1.0760793685913086, "learning_rate": 6.096022368390191e-06, "loss": 0.5491, "step": 7052 }, { "epoch": 0.45, "grad_norm": 1.0174927711486816, "learning_rate": 6.0950213048919895e-06, "loss": 0.5249, "step": 7053 }, { "epoch": 0.45, "grad_norm": 0.9960131049156189, "learning_rate": 6.094020195286251e-06, "loss": 0.5026, "step": 7054 }, { "epoch": 0.45, "grad_norm": 1.0475268363952637, "learning_rate": 6.093019039615128e-06, "loss": 0.5887, "step": 7055 }, { "epoch": 0.45, "grad_norm": 0.9496270418167114, "learning_rate": 6.092017837920773e-06, "loss": 0.5053, "step": 7056 }, { "epoch": 0.45, "grad_norm": 0.999090313911438, "learning_rate": 6.091016590245347e-06, "loss": 0.51, "step": 7057 }, { "epoch": 0.45, "grad_norm": 1.0329222679138184, "learning_rate": 6.090015296631009e-06, "loss": 0.4875, "step": 7058 }, { "epoch": 0.45, "grad_norm": 1.0270658731460571, "learning_rate": 6.089013957119918e-06, "loss": 0.5859, "step": 7059 }, { "epoch": 0.45, "grad_norm": 0.9140265583992004, "learning_rate": 6.088012571754236e-06, "loss": 0.4633, "step": 7060 }, { "epoch": 0.45, "grad_norm": 1.1130242347717285, "learning_rate": 6.087011140576132e-06, "loss": 0.5435, "step": 7061 }, { "epoch": 0.45, "grad_norm": 0.9865022897720337, "learning_rate": 6.086009663627769e-06, "loss": 0.5155, "step": 7062 }, { "epoch": 0.45, "grad_norm": 1.0083192586898804, "learning_rate": 6.085008140951318e-06, "loss": 0.5141, "step": 7063 }, { "epoch": 0.45, "grad_norm": 1.1197551488876343, "learning_rate": 6.084006572588947e-06, "loss": 0.5497, "step": 7064 }, { "epoch": 0.45, "grad_norm": 1.0250638723373413, "learning_rate": 6.083004958582832e-06, "loss": 0.4772, "step": 7065 }, { "epoch": 0.45, "grad_norm": 1.0011177062988281, "learning_rate": 6.082003298975144e-06, "loss": 0.5462, "step": 7066 }, { "epoch": 0.45, "grad_norm": 1.0027698278427124, "learning_rate": 6.081001593808063e-06, "loss": 0.5069, "step": 7067 }, { "epoch": 0.45, "grad_norm": 1.0362638235092163, "learning_rate": 6.079999843123763e-06, "loss": 0.5839, "step": 7068 }, { "epoch": 0.45, "grad_norm": 0.9713705778121948, "learning_rate": 6.07899804696443e-06, "loss": 0.5555, "step": 7069 }, { "epoch": 0.45, "grad_norm": 1.039381980895996, "learning_rate": 6.077996205372241e-06, "loss": 0.4965, "step": 7070 }, { "epoch": 0.45, "grad_norm": 0.9748886823654175, "learning_rate": 6.07699431838938e-06, "loss": 0.5145, "step": 7071 }, { "epoch": 0.45, "grad_norm": 1.0257819890975952, "learning_rate": 6.075992386058037e-06, "loss": 0.5321, "step": 7072 }, { "epoch": 0.45, "grad_norm": 1.067957878112793, "learning_rate": 6.074990408420397e-06, "loss": 0.5104, "step": 7073 }, { "epoch": 0.45, "grad_norm": 0.9645175337791443, "learning_rate": 6.073988385518652e-06, "loss": 0.4861, "step": 7074 }, { "epoch": 0.45, "grad_norm": 0.9836180210113525, "learning_rate": 6.07298631739499e-06, "loss": 0.5501, "step": 7075 }, { "epoch": 0.45, "grad_norm": 0.9671414494514465, "learning_rate": 6.071984204091608e-06, "loss": 0.5687, "step": 7076 }, { "epoch": 0.45, "grad_norm": 1.02313232421875, "learning_rate": 6.0709820456507e-06, "loss": 0.5469, "step": 7077 }, { "epoch": 0.45, "grad_norm": 1.0085055828094482, "learning_rate": 6.069979842114465e-06, "loss": 0.5109, "step": 7078 }, { "epoch": 0.45, "grad_norm": 1.1000394821166992, "learning_rate": 6.068977593525098e-06, "loss": 0.5624, "step": 7079 }, { "epoch": 0.45, "grad_norm": 0.9950181245803833, "learning_rate": 6.067975299924806e-06, "loss": 0.5516, "step": 7080 }, { "epoch": 0.45, "grad_norm": 1.0332417488098145, "learning_rate": 6.066972961355788e-06, "loss": 0.5637, "step": 7081 }, { "epoch": 0.45, "grad_norm": 0.9457080364227295, "learning_rate": 6.065970577860252e-06, "loss": 0.5312, "step": 7082 }, { "epoch": 0.45, "grad_norm": 1.035883903503418, "learning_rate": 6.0649681494804014e-06, "loss": 0.5549, "step": 7083 }, { "epoch": 0.45, "grad_norm": 0.9677972197532654, "learning_rate": 6.063965676258448e-06, "loss": 0.5325, "step": 7084 }, { "epoch": 0.45, "grad_norm": 0.9882851839065552, "learning_rate": 6.0629631582366015e-06, "loss": 0.549, "step": 7085 }, { "epoch": 0.45, "grad_norm": 0.9976891875267029, "learning_rate": 6.0619605954570726e-06, "loss": 0.5047, "step": 7086 }, { "epoch": 0.45, "grad_norm": 0.9844744801521301, "learning_rate": 6.060957987962077e-06, "loss": 0.5322, "step": 7087 }, { "epoch": 0.45, "grad_norm": 1.0253148078918457, "learning_rate": 6.059955335793832e-06, "loss": 0.5551, "step": 7088 }, { "epoch": 0.45, "grad_norm": 1.0289077758789062, "learning_rate": 6.0589526389945576e-06, "loss": 0.5105, "step": 7089 }, { "epoch": 0.45, "grad_norm": 0.9292216897010803, "learning_rate": 6.057949897606469e-06, "loss": 0.5282, "step": 7090 }, { "epoch": 0.45, "grad_norm": 1.0359158515930176, "learning_rate": 6.05694711167179e-06, "loss": 0.5276, "step": 7091 }, { "epoch": 0.45, "grad_norm": 1.0398133993148804, "learning_rate": 6.055944281232746e-06, "loss": 0.5545, "step": 7092 }, { "epoch": 0.45, "grad_norm": 1.0795741081237793, "learning_rate": 6.0549414063315625e-06, "loss": 0.5894, "step": 7093 }, { "epoch": 0.45, "grad_norm": 1.0921592712402344, "learning_rate": 6.053938487010464e-06, "loss": 0.5194, "step": 7094 }, { "epoch": 0.45, "grad_norm": 0.942608654499054, "learning_rate": 6.052935523311684e-06, "loss": 0.516, "step": 7095 }, { "epoch": 0.45, "grad_norm": 0.9816745519638062, "learning_rate": 6.0519325152774515e-06, "loss": 0.5125, "step": 7096 }, { "epoch": 0.45, "grad_norm": 0.950702428817749, "learning_rate": 6.05092946295e-06, "loss": 0.5427, "step": 7097 }, { "epoch": 0.45, "grad_norm": 0.9388405084609985, "learning_rate": 6.049926366371565e-06, "loss": 0.5388, "step": 7098 }, { "epoch": 0.45, "grad_norm": 0.9361859560012817, "learning_rate": 6.048923225584383e-06, "loss": 0.5181, "step": 7099 }, { "epoch": 0.45, "grad_norm": 1.0264475345611572, "learning_rate": 6.047920040630692e-06, "loss": 0.5171, "step": 7100 }, { "epoch": 0.45, "grad_norm": 0.9536789059638977, "learning_rate": 6.046916811552735e-06, "loss": 0.5023, "step": 7101 }, { "epoch": 0.45, "grad_norm": 0.930137574672699, "learning_rate": 6.045913538392754e-06, "loss": 0.5208, "step": 7102 }, { "epoch": 0.45, "grad_norm": 0.9892581701278687, "learning_rate": 6.04491022119299e-06, "loss": 0.5434, "step": 7103 }, { "epoch": 0.45, "grad_norm": 0.9390099048614502, "learning_rate": 6.043906859995693e-06, "loss": 0.4808, "step": 7104 }, { "epoch": 0.45, "grad_norm": 1.090390920639038, "learning_rate": 6.042903454843109e-06, "loss": 0.5112, "step": 7105 }, { "epoch": 0.45, "grad_norm": 1.1024433374404907, "learning_rate": 6.041900005777488e-06, "loss": 0.6206, "step": 7106 }, { "epoch": 0.45, "grad_norm": 1.0256569385528564, "learning_rate": 6.040896512841083e-06, "loss": 0.5461, "step": 7107 }, { "epoch": 0.45, "grad_norm": 1.032071828842163, "learning_rate": 6.039892976076147e-06, "loss": 0.5021, "step": 7108 }, { "epoch": 0.45, "grad_norm": 1.000108003616333, "learning_rate": 6.038889395524935e-06, "loss": 0.5312, "step": 7109 }, { "epoch": 0.45, "grad_norm": 1.0488094091415405, "learning_rate": 6.037885771229703e-06, "loss": 0.5545, "step": 7110 }, { "epoch": 0.45, "grad_norm": 0.9266911745071411, "learning_rate": 6.036882103232714e-06, "loss": 0.5378, "step": 7111 }, { "epoch": 0.45, "grad_norm": 1.0361651182174683, "learning_rate": 6.0358783915762265e-06, "loss": 0.5222, "step": 7112 }, { "epoch": 0.45, "grad_norm": 1.0582817792892456, "learning_rate": 6.034874636302502e-06, "loss": 0.5234, "step": 7113 }, { "epoch": 0.45, "grad_norm": 1.1131682395935059, "learning_rate": 6.033870837453808e-06, "loss": 0.5866, "step": 7114 }, { "epoch": 0.45, "grad_norm": 1.0257360935211182, "learning_rate": 6.0328669950724096e-06, "loss": 0.5094, "step": 7115 }, { "epoch": 0.45, "grad_norm": 1.0267966985702515, "learning_rate": 6.031863109200575e-06, "loss": 0.5952, "step": 7116 }, { "epoch": 0.45, "grad_norm": 1.0467355251312256, "learning_rate": 6.030859179880574e-06, "loss": 0.5453, "step": 7117 }, { "epoch": 0.45, "grad_norm": 1.02724289894104, "learning_rate": 6.029855207154679e-06, "loss": 0.5244, "step": 7118 }, { "epoch": 0.45, "grad_norm": 1.0115382671356201, "learning_rate": 6.0288511910651644e-06, "loss": 0.5273, "step": 7119 }, { "epoch": 0.45, "grad_norm": 0.9595180749893188, "learning_rate": 6.027847131654305e-06, "loss": 0.518, "step": 7120 }, { "epoch": 0.45, "grad_norm": 1.017401933670044, "learning_rate": 6.026843028964378e-06, "loss": 0.6056, "step": 7121 }, { "epoch": 0.45, "grad_norm": 1.0124104022979736, "learning_rate": 6.025838883037664e-06, "loss": 0.5109, "step": 7122 }, { "epoch": 0.45, "grad_norm": 1.096415400505066, "learning_rate": 6.024834693916443e-06, "loss": 0.5399, "step": 7123 }, { "epoch": 0.45, "grad_norm": 1.0635879039764404, "learning_rate": 6.023830461642998e-06, "loss": 0.5239, "step": 7124 }, { "epoch": 0.45, "grad_norm": 0.9495733380317688, "learning_rate": 6.022826186259614e-06, "loss": 0.5079, "step": 7125 }, { "epoch": 0.45, "grad_norm": 1.0496259927749634, "learning_rate": 6.021821867808576e-06, "loss": 0.5161, "step": 7126 }, { "epoch": 0.45, "grad_norm": 1.0454121828079224, "learning_rate": 6.0208175063321765e-06, "loss": 0.5236, "step": 7127 }, { "epoch": 0.45, "grad_norm": 0.9280383586883545, "learning_rate": 6.019813101872701e-06, "loss": 0.4974, "step": 7128 }, { "epoch": 0.45, "grad_norm": 1.033594012260437, "learning_rate": 6.018808654472445e-06, "loss": 0.5289, "step": 7129 }, { "epoch": 0.45, "grad_norm": 1.0081610679626465, "learning_rate": 6.017804164173698e-06, "loss": 0.5561, "step": 7130 }, { "epoch": 0.45, "grad_norm": 1.1945390701293945, "learning_rate": 6.0167996310187615e-06, "loss": 0.5433, "step": 7131 }, { "epoch": 0.45, "grad_norm": 0.9437111616134644, "learning_rate": 6.015795055049929e-06, "loss": 0.483, "step": 7132 }, { "epoch": 0.45, "grad_norm": 1.036550521850586, "learning_rate": 6.014790436309499e-06, "loss": 0.5601, "step": 7133 }, { "epoch": 0.45, "grad_norm": 0.9895676970481873, "learning_rate": 6.013785774839776e-06, "loss": 0.4948, "step": 7134 }, { "epoch": 0.45, "grad_norm": 0.9519324898719788, "learning_rate": 6.012781070683058e-06, "loss": 0.559, "step": 7135 }, { "epoch": 0.45, "grad_norm": 1.0611015558242798, "learning_rate": 6.011776323881654e-06, "loss": 0.5345, "step": 7136 }, { "epoch": 0.45, "grad_norm": 0.933640718460083, "learning_rate": 6.0107715344778684e-06, "loss": 0.4938, "step": 7137 }, { "epoch": 0.45, "grad_norm": 1.0362071990966797, "learning_rate": 6.00976670251401e-06, "loss": 0.5398, "step": 7138 }, { "epoch": 0.45, "grad_norm": 1.0097980499267578, "learning_rate": 6.008761828032389e-06, "loss": 0.5134, "step": 7139 }, { "epoch": 0.45, "grad_norm": 0.9700465798377991, "learning_rate": 6.007756911075315e-06, "loss": 0.4799, "step": 7140 }, { "epoch": 0.45, "grad_norm": 0.9842071533203125, "learning_rate": 6.006751951685104e-06, "loss": 0.5089, "step": 7141 }, { "epoch": 0.45, "grad_norm": 1.0734883546829224, "learning_rate": 6.005746949904072e-06, "loss": 0.5458, "step": 7142 }, { "epoch": 0.45, "grad_norm": 1.0110918283462524, "learning_rate": 6.004741905774533e-06, "loss": 0.5478, "step": 7143 }, { "epoch": 0.45, "grad_norm": 1.040485143661499, "learning_rate": 6.003736819338808e-06, "loss": 0.5477, "step": 7144 }, { "epoch": 0.45, "grad_norm": 1.105627417564392, "learning_rate": 6.0027316906392165e-06, "loss": 0.4995, "step": 7145 }, { "epoch": 0.45, "grad_norm": 1.0592191219329834, "learning_rate": 6.001726519718083e-06, "loss": 0.5105, "step": 7146 }, { "epoch": 0.45, "grad_norm": 1.0756666660308838, "learning_rate": 6.000721306617731e-06, "loss": 0.5461, "step": 7147 }, { "epoch": 0.45, "grad_norm": 1.0701828002929688, "learning_rate": 5.999716051380484e-06, "loss": 0.523, "step": 7148 }, { "epoch": 0.45, "grad_norm": 0.9982286691665649, "learning_rate": 5.998710754048674e-06, "loss": 0.5411, "step": 7149 }, { "epoch": 0.45, "grad_norm": 1.1478774547576904, "learning_rate": 5.997705414664627e-06, "loss": 0.5197, "step": 7150 }, { "epoch": 0.45, "grad_norm": 1.0219048261642456, "learning_rate": 5.996700033270676e-06, "loss": 0.474, "step": 7151 }, { "epoch": 0.45, "grad_norm": 1.0214338302612305, "learning_rate": 5.995694609909153e-06, "loss": 0.5173, "step": 7152 }, { "epoch": 0.45, "grad_norm": 0.961108386516571, "learning_rate": 5.9946891446223955e-06, "loss": 0.513, "step": 7153 }, { "epoch": 0.45, "grad_norm": 1.0029499530792236, "learning_rate": 5.993683637452736e-06, "loss": 0.5055, "step": 7154 }, { "epoch": 0.45, "grad_norm": 1.0898581743240356, "learning_rate": 5.992678088442518e-06, "loss": 0.5773, "step": 7155 }, { "epoch": 0.45, "grad_norm": 0.9855949282646179, "learning_rate": 5.991672497634076e-06, "loss": 0.5103, "step": 7156 }, { "epoch": 0.45, "grad_norm": 0.9675732851028442, "learning_rate": 5.990666865069759e-06, "loss": 0.5209, "step": 7157 }, { "epoch": 0.45, "grad_norm": 0.9956784844398499, "learning_rate": 5.9896611907919034e-06, "loss": 0.5619, "step": 7158 }, { "epoch": 0.45, "grad_norm": 0.9192211031913757, "learning_rate": 5.98865547484286e-06, "loss": 0.4706, "step": 7159 }, { "epoch": 0.45, "grad_norm": 0.973389744758606, "learning_rate": 5.9876497172649704e-06, "loss": 0.5267, "step": 7160 }, { "epoch": 0.45, "grad_norm": 1.0324655771255493, "learning_rate": 5.986643918100591e-06, "loss": 0.5635, "step": 7161 }, { "epoch": 0.45, "grad_norm": 1.014541745185852, "learning_rate": 5.985638077392066e-06, "loss": 0.5196, "step": 7162 }, { "epoch": 0.45, "grad_norm": 0.9843694567680359, "learning_rate": 5.984632195181752e-06, "loss": 0.528, "step": 7163 }, { "epoch": 0.45, "grad_norm": 1.0023690462112427, "learning_rate": 5.983626271512e-06, "loss": 0.5346, "step": 7164 }, { "epoch": 0.45, "grad_norm": 1.1338468790054321, "learning_rate": 5.982620306425167e-06, "loss": 0.5567, "step": 7165 }, { "epoch": 0.45, "grad_norm": 1.0319536924362183, "learning_rate": 5.981614299963614e-06, "loss": 0.5341, "step": 7166 }, { "epoch": 0.45, "grad_norm": 1.0526453256607056, "learning_rate": 5.9806082521696936e-06, "loss": 0.5511, "step": 7167 }, { "epoch": 0.45, "grad_norm": 1.0752928256988525, "learning_rate": 5.979602163085775e-06, "loss": 0.5323, "step": 7168 }, { "epoch": 0.45, "grad_norm": 1.059898018836975, "learning_rate": 5.978596032754215e-06, "loss": 0.4771, "step": 7169 }, { "epoch": 0.45, "grad_norm": 0.9587810635566711, "learning_rate": 5.977589861217381e-06, "loss": 0.5201, "step": 7170 }, { "epoch": 0.45, "grad_norm": 1.0029340982437134, "learning_rate": 5.9765836485176376e-06, "loss": 0.5188, "step": 7171 }, { "epoch": 0.45, "grad_norm": 0.9671736359596252, "learning_rate": 5.9755773946973546e-06, "loss": 0.5466, "step": 7172 }, { "epoch": 0.45, "grad_norm": 1.058545470237732, "learning_rate": 5.974571099798902e-06, "loss": 0.5128, "step": 7173 }, { "epoch": 0.45, "grad_norm": 0.9857040047645569, "learning_rate": 5.973564763864651e-06, "loss": 0.5702, "step": 7174 }, { "epoch": 0.45, "grad_norm": 0.9996533989906311, "learning_rate": 5.972558386936973e-06, "loss": 0.5469, "step": 7175 }, { "epoch": 0.45, "grad_norm": 1.0640591382980347, "learning_rate": 5.971551969058246e-06, "loss": 0.5474, "step": 7176 }, { "epoch": 0.45, "grad_norm": 1.0704646110534668, "learning_rate": 5.970545510270845e-06, "loss": 0.518, "step": 7177 }, { "epoch": 0.45, "grad_norm": 1.013759732246399, "learning_rate": 5.969539010617149e-06, "loss": 0.4987, "step": 7178 }, { "epoch": 0.45, "grad_norm": 1.059359073638916, "learning_rate": 5.968532470139537e-06, "loss": 0.5226, "step": 7179 }, { "epoch": 0.45, "grad_norm": 0.9865264892578125, "learning_rate": 5.967525888880392e-06, "loss": 0.5028, "step": 7180 }, { "epoch": 0.45, "grad_norm": 0.984788179397583, "learning_rate": 5.966519266882099e-06, "loss": 0.5146, "step": 7181 }, { "epoch": 0.46, "grad_norm": 1.011838436126709, "learning_rate": 5.965512604187041e-06, "loss": 0.4846, "step": 7182 }, { "epoch": 0.46, "grad_norm": 1.053436279296875, "learning_rate": 5.964505900837606e-06, "loss": 0.5144, "step": 7183 }, { "epoch": 0.46, "grad_norm": 1.0706701278686523, "learning_rate": 5.963499156876182e-06, "loss": 0.5005, "step": 7184 }, { "epoch": 0.46, "grad_norm": 1.0562870502471924, "learning_rate": 5.962492372345163e-06, "loss": 0.5236, "step": 7185 }, { "epoch": 0.46, "grad_norm": 1.06119704246521, "learning_rate": 5.961485547286936e-06, "loss": 0.5663, "step": 7186 }, { "epoch": 0.46, "grad_norm": 0.9982765913009644, "learning_rate": 5.960478681743897e-06, "loss": 0.4916, "step": 7187 }, { "epoch": 0.46, "grad_norm": 1.040130615234375, "learning_rate": 5.959471775758444e-06, "loss": 0.5832, "step": 7188 }, { "epoch": 0.46, "grad_norm": 1.0240367650985718, "learning_rate": 5.9584648293729715e-06, "loss": 0.5293, "step": 7189 }, { "epoch": 0.46, "grad_norm": 0.9615424275398254, "learning_rate": 5.957457842629879e-06, "loss": 0.5175, "step": 7190 }, { "epoch": 0.46, "grad_norm": 0.9359433650970459, "learning_rate": 5.956450815571567e-06, "loss": 0.484, "step": 7191 }, { "epoch": 0.46, "grad_norm": 1.009809970855713, "learning_rate": 5.955443748240439e-06, "loss": 0.4816, "step": 7192 }, { "epoch": 0.46, "grad_norm": 0.9725993275642395, "learning_rate": 5.9544366406789e-06, "loss": 0.5377, "step": 7193 }, { "epoch": 0.46, "grad_norm": 1.033837914466858, "learning_rate": 5.953429492929352e-06, "loss": 0.5023, "step": 7194 }, { "epoch": 0.46, "grad_norm": 1.032569169998169, "learning_rate": 5.952422305034206e-06, "loss": 0.4974, "step": 7195 }, { "epoch": 0.46, "grad_norm": 1.3495159149169922, "learning_rate": 5.95141507703587e-06, "loss": 0.5209, "step": 7196 }, { "epoch": 0.46, "grad_norm": 1.002882480621338, "learning_rate": 5.9504078089767545e-06, "loss": 0.5489, "step": 7197 }, { "epoch": 0.46, "grad_norm": 1.0131049156188965, "learning_rate": 5.949400500899272e-06, "loss": 0.5326, "step": 7198 }, { "epoch": 0.46, "grad_norm": 0.9826775193214417, "learning_rate": 5.948393152845837e-06, "loss": 0.4392, "step": 7199 }, { "epoch": 0.46, "grad_norm": 1.0567611455917358, "learning_rate": 5.9473857648588665e-06, "loss": 0.5934, "step": 7200 }, { "epoch": 0.46, "grad_norm": 0.9829743504524231, "learning_rate": 5.9463783369807775e-06, "loss": 0.5364, "step": 7201 }, { "epoch": 0.46, "grad_norm": 0.9536810517311096, "learning_rate": 5.945370869253987e-06, "loss": 0.5174, "step": 7202 }, { "epoch": 0.46, "grad_norm": 0.981810986995697, "learning_rate": 5.944363361720919e-06, "loss": 0.5436, "step": 7203 }, { "epoch": 0.46, "grad_norm": 0.965626060962677, "learning_rate": 5.943355814423996e-06, "loss": 0.4884, "step": 7204 }, { "epoch": 0.46, "grad_norm": 1.0625500679016113, "learning_rate": 5.94234822740564e-06, "loss": 0.549, "step": 7205 }, { "epoch": 0.46, "grad_norm": 1.0152928829193115, "learning_rate": 5.941340600708279e-06, "loss": 0.5234, "step": 7206 }, { "epoch": 0.46, "grad_norm": 1.0407637357711792, "learning_rate": 5.9403329343743385e-06, "loss": 0.5577, "step": 7207 }, { "epoch": 0.46, "grad_norm": 1.0825738906860352, "learning_rate": 5.939325228446251e-06, "loss": 0.6029, "step": 7208 }, { "epoch": 0.46, "grad_norm": 0.9441272616386414, "learning_rate": 5.938317482966446e-06, "loss": 0.5153, "step": 7209 }, { "epoch": 0.46, "grad_norm": 0.9823698997497559, "learning_rate": 5.937309697977355e-06, "loss": 0.4753, "step": 7210 }, { "epoch": 0.46, "grad_norm": 0.9696682691574097, "learning_rate": 5.936301873521414e-06, "loss": 0.5201, "step": 7211 }, { "epoch": 0.46, "grad_norm": 1.059671401977539, "learning_rate": 5.935294009641057e-06, "loss": 0.5362, "step": 7212 }, { "epoch": 0.46, "grad_norm": 1.0727226734161377, "learning_rate": 5.934286106378724e-06, "loss": 0.5466, "step": 7213 }, { "epoch": 0.46, "grad_norm": 1.0253998041152954, "learning_rate": 5.933278163776852e-06, "loss": 0.5549, "step": 7214 }, { "epoch": 0.46, "grad_norm": 1.10127592086792, "learning_rate": 5.932270181877886e-06, "loss": 0.5668, "step": 7215 }, { "epoch": 0.46, "grad_norm": 0.9771760106086731, "learning_rate": 5.9312621607242625e-06, "loss": 0.5037, "step": 7216 }, { "epoch": 0.46, "grad_norm": 0.9859318733215332, "learning_rate": 5.93025410035843e-06, "loss": 0.5106, "step": 7217 }, { "epoch": 0.46, "grad_norm": 0.9642918109893799, "learning_rate": 5.929246000822835e-06, "loss": 0.5447, "step": 7218 }, { "epoch": 0.46, "grad_norm": 1.0542140007019043, "learning_rate": 5.928237862159922e-06, "loss": 0.537, "step": 7219 }, { "epoch": 0.46, "grad_norm": 1.0037953853607178, "learning_rate": 5.927229684412143e-06, "loss": 0.5388, "step": 7220 }, { "epoch": 0.46, "grad_norm": 0.9469515681266785, "learning_rate": 5.926221467621945e-06, "loss": 0.4964, "step": 7221 }, { "epoch": 0.46, "grad_norm": 0.9464107751846313, "learning_rate": 5.925213211831785e-06, "loss": 0.4892, "step": 7222 }, { "epoch": 0.46, "grad_norm": 1.0472491979599, "learning_rate": 5.924204917084116e-06, "loss": 0.5375, "step": 7223 }, { "epoch": 0.46, "grad_norm": 1.16463303565979, "learning_rate": 5.923196583421392e-06, "loss": 0.5273, "step": 7224 }, { "epoch": 0.46, "grad_norm": 0.9915179014205933, "learning_rate": 5.922188210886071e-06, "loss": 0.5008, "step": 7225 }, { "epoch": 0.46, "grad_norm": 0.9429498314857483, "learning_rate": 5.921179799520613e-06, "loss": 0.5507, "step": 7226 }, { "epoch": 0.46, "grad_norm": 1.0695574283599854, "learning_rate": 5.920171349367478e-06, "loss": 0.54, "step": 7227 }, { "epoch": 0.46, "grad_norm": 1.0468008518218994, "learning_rate": 5.919162860469129e-06, "loss": 0.5241, "step": 7228 }, { "epoch": 0.46, "grad_norm": 0.993760883808136, "learning_rate": 5.9181543328680295e-06, "loss": 0.5781, "step": 7229 }, { "epoch": 0.46, "grad_norm": 1.0547833442687988, "learning_rate": 5.917145766606645e-06, "loss": 0.5714, "step": 7230 }, { "epoch": 0.46, "grad_norm": 1.0336589813232422, "learning_rate": 5.9161371617274425e-06, "loss": 0.5129, "step": 7231 }, { "epoch": 0.46, "grad_norm": 1.0273765325546265, "learning_rate": 5.91512851827289e-06, "loss": 0.5227, "step": 7232 }, { "epoch": 0.46, "grad_norm": 0.9871826767921448, "learning_rate": 5.914119836285461e-06, "loss": 0.5027, "step": 7233 }, { "epoch": 0.46, "grad_norm": 0.9362433552742004, "learning_rate": 5.913111115807626e-06, "loss": 0.5302, "step": 7234 }, { "epoch": 0.46, "grad_norm": 1.0491071939468384, "learning_rate": 5.912102356881857e-06, "loss": 0.5021, "step": 7235 }, { "epoch": 0.46, "grad_norm": 1.0819073915481567, "learning_rate": 5.91109355955063e-06, "loss": 0.5088, "step": 7236 }, { "epoch": 0.46, "grad_norm": 0.9606463313102722, "learning_rate": 5.910084723856424e-06, "loss": 0.5218, "step": 7237 }, { "epoch": 0.46, "grad_norm": 0.9783573150634766, "learning_rate": 5.909075849841717e-06, "loss": 0.5265, "step": 7238 }, { "epoch": 0.46, "grad_norm": 0.9417604207992554, "learning_rate": 5.908066937548987e-06, "loss": 0.4648, "step": 7239 }, { "epoch": 0.46, "grad_norm": 0.942327618598938, "learning_rate": 5.907057987020717e-06, "loss": 0.5158, "step": 7240 }, { "epoch": 0.46, "grad_norm": 0.9482901692390442, "learning_rate": 5.906048998299392e-06, "loss": 0.5049, "step": 7241 }, { "epoch": 0.46, "grad_norm": 0.9977984428405762, "learning_rate": 5.905039971427494e-06, "loss": 0.5542, "step": 7242 }, { "epoch": 0.46, "grad_norm": 0.9763760566711426, "learning_rate": 5.9040309064475136e-06, "loss": 0.5426, "step": 7243 }, { "epoch": 0.46, "grad_norm": 0.97144615650177, "learning_rate": 5.903021803401933e-06, "loss": 0.5248, "step": 7244 }, { "epoch": 0.46, "grad_norm": 0.9501352310180664, "learning_rate": 5.902012662333248e-06, "loss": 0.4832, "step": 7245 }, { "epoch": 0.46, "grad_norm": 1.0225166082382202, "learning_rate": 5.9010034832839466e-06, "loss": 0.5706, "step": 7246 }, { "epoch": 0.46, "grad_norm": 1.0085699558258057, "learning_rate": 5.899994266296525e-06, "loss": 0.5263, "step": 7247 }, { "epoch": 0.46, "grad_norm": 1.0352619886398315, "learning_rate": 5.898985011413473e-06, "loss": 0.4869, "step": 7248 }, { "epoch": 0.46, "grad_norm": 0.9713915586471558, "learning_rate": 5.897975718677291e-06, "loss": 0.4974, "step": 7249 }, { "epoch": 0.46, "grad_norm": 1.03853178024292, "learning_rate": 5.896966388130475e-06, "loss": 0.5162, "step": 7250 }, { "epoch": 0.46, "grad_norm": 0.9080546498298645, "learning_rate": 5.895957019815526e-06, "loss": 0.4928, "step": 7251 }, { "epoch": 0.46, "grad_norm": 1.152125597000122, "learning_rate": 5.894947613774942e-06, "loss": 0.5041, "step": 7252 }, { "epoch": 0.46, "grad_norm": 0.8981519341468811, "learning_rate": 5.8939381700512275e-06, "loss": 0.4771, "step": 7253 }, { "epoch": 0.46, "grad_norm": 0.9719518423080444, "learning_rate": 5.892928688686887e-06, "loss": 0.5585, "step": 7254 }, { "epoch": 0.46, "grad_norm": 0.9432595372200012, "learning_rate": 5.891919169724426e-06, "loss": 0.5286, "step": 7255 }, { "epoch": 0.46, "grad_norm": 0.9516439437866211, "learning_rate": 5.890909613206351e-06, "loss": 0.5506, "step": 7256 }, { "epoch": 0.46, "grad_norm": 0.9617924094200134, "learning_rate": 5.889900019175171e-06, "loss": 0.4861, "step": 7257 }, { "epoch": 0.46, "grad_norm": 1.0408291816711426, "learning_rate": 5.888890387673398e-06, "loss": 0.5119, "step": 7258 }, { "epoch": 0.46, "grad_norm": 0.9240466952323914, "learning_rate": 5.887880718743541e-06, "loss": 0.4916, "step": 7259 }, { "epoch": 0.46, "grad_norm": 0.952886164188385, "learning_rate": 5.886871012428117e-06, "loss": 0.5074, "step": 7260 }, { "epoch": 0.46, "grad_norm": 1.010315179824829, "learning_rate": 5.885861268769641e-06, "loss": 0.5255, "step": 7261 }, { "epoch": 0.46, "grad_norm": 1.0000734329223633, "learning_rate": 5.8848514878106275e-06, "loss": 0.4968, "step": 7262 }, { "epoch": 0.46, "grad_norm": 1.0513924360275269, "learning_rate": 5.883841669593595e-06, "loss": 0.5587, "step": 7263 }, { "epoch": 0.46, "grad_norm": 0.9953838586807251, "learning_rate": 5.882831814161065e-06, "loss": 0.5463, "step": 7264 }, { "epoch": 0.46, "grad_norm": 0.9913728833198547, "learning_rate": 5.881821921555559e-06, "loss": 0.4985, "step": 7265 }, { "epoch": 0.46, "grad_norm": 1.0422443151474, "learning_rate": 5.880811991819601e-06, "loss": 0.5383, "step": 7266 }, { "epoch": 0.46, "grad_norm": 1.0038896799087524, "learning_rate": 5.879802024995712e-06, "loss": 0.5339, "step": 7267 }, { "epoch": 0.46, "grad_norm": 1.005843162536621, "learning_rate": 5.878792021126421e-06, "loss": 0.5532, "step": 7268 }, { "epoch": 0.46, "grad_norm": 0.9732350707054138, "learning_rate": 5.877781980254255e-06, "loss": 0.5583, "step": 7269 }, { "epoch": 0.46, "grad_norm": 0.9197762608528137, "learning_rate": 5.876771902421743e-06, "loss": 0.4758, "step": 7270 }, { "epoch": 0.46, "grad_norm": 1.0640990734100342, "learning_rate": 5.875761787671416e-06, "loss": 0.539, "step": 7271 }, { "epoch": 0.46, "grad_norm": 1.0230790376663208, "learning_rate": 5.874751636045808e-06, "loss": 0.4872, "step": 7272 }, { "epoch": 0.46, "grad_norm": 0.9654695391654968, "learning_rate": 5.873741447587451e-06, "loss": 0.5178, "step": 7273 }, { "epoch": 0.46, "grad_norm": 1.006279468536377, "learning_rate": 5.8727312223388814e-06, "loss": 0.5642, "step": 7274 }, { "epoch": 0.46, "grad_norm": 0.96772301197052, "learning_rate": 5.871720960342635e-06, "loss": 0.527, "step": 7275 }, { "epoch": 0.46, "grad_norm": 1.0234496593475342, "learning_rate": 5.870710661641252e-06, "loss": 0.4944, "step": 7276 }, { "epoch": 0.46, "grad_norm": 0.9897413849830627, "learning_rate": 5.869700326277273e-06, "loss": 0.5446, "step": 7277 }, { "epoch": 0.46, "grad_norm": 0.9591410756111145, "learning_rate": 5.868689954293239e-06, "loss": 0.522, "step": 7278 }, { "epoch": 0.46, "grad_norm": 0.9390556812286377, "learning_rate": 5.86767954573169e-06, "loss": 0.4907, "step": 7279 }, { "epoch": 0.46, "grad_norm": 1.0197323560714722, "learning_rate": 5.866669100635176e-06, "loss": 0.5545, "step": 7280 }, { "epoch": 0.46, "grad_norm": 1.019065260887146, "learning_rate": 5.865658619046242e-06, "loss": 0.5217, "step": 7281 }, { "epoch": 0.46, "grad_norm": 0.9386879801750183, "learning_rate": 5.864648101007433e-06, "loss": 0.4943, "step": 7282 }, { "epoch": 0.46, "grad_norm": 1.0120941400527954, "learning_rate": 5.863637546561301e-06, "loss": 0.4953, "step": 7283 }, { "epoch": 0.46, "grad_norm": 1.0714585781097412, "learning_rate": 5.862626955750397e-06, "loss": 0.5644, "step": 7284 }, { "epoch": 0.46, "grad_norm": 0.9985573887825012, "learning_rate": 5.8616163286172726e-06, "loss": 0.5713, "step": 7285 }, { "epoch": 0.46, "grad_norm": 1.022886037826538, "learning_rate": 5.8606056652044805e-06, "loss": 0.5039, "step": 7286 }, { "epoch": 0.46, "grad_norm": 1.0849807262420654, "learning_rate": 5.859594965554579e-06, "loss": 0.5748, "step": 7287 }, { "epoch": 0.46, "grad_norm": 1.0356742143630981, "learning_rate": 5.858584229710124e-06, "loss": 0.5417, "step": 7288 }, { "epoch": 0.46, "grad_norm": 0.9849649667739868, "learning_rate": 5.857573457713674e-06, "loss": 0.508, "step": 7289 }, { "epoch": 0.46, "grad_norm": 0.9887516498565674, "learning_rate": 5.856562649607788e-06, "loss": 0.5373, "step": 7290 }, { "epoch": 0.46, "grad_norm": 1.0872023105621338, "learning_rate": 5.855551805435028e-06, "loss": 0.5253, "step": 7291 }, { "epoch": 0.46, "grad_norm": 1.0221582651138306, "learning_rate": 5.854540925237959e-06, "loss": 0.5486, "step": 7292 }, { "epoch": 0.46, "grad_norm": 1.1867270469665527, "learning_rate": 5.853530009059144e-06, "loss": 0.5338, "step": 7293 }, { "epoch": 0.46, "grad_norm": 1.0727027654647827, "learning_rate": 5.852519056941149e-06, "loss": 0.5335, "step": 7294 }, { "epoch": 0.46, "grad_norm": 0.959506094455719, "learning_rate": 5.851508068926542e-06, "loss": 0.5079, "step": 7295 }, { "epoch": 0.46, "grad_norm": 1.063923716545105, "learning_rate": 5.850497045057895e-06, "loss": 0.5529, "step": 7296 }, { "epoch": 0.46, "grad_norm": 1.0106937885284424, "learning_rate": 5.849485985377774e-06, "loss": 0.5366, "step": 7297 }, { "epoch": 0.46, "grad_norm": 1.0616973638534546, "learning_rate": 5.848474889928753e-06, "loss": 0.5117, "step": 7298 }, { "epoch": 0.46, "grad_norm": 1.140915036201477, "learning_rate": 5.8474637587534065e-06, "loss": 0.5163, "step": 7299 }, { "epoch": 0.46, "grad_norm": 1.064170479774475, "learning_rate": 5.84645259189431e-06, "loss": 0.5552, "step": 7300 }, { "epoch": 0.46, "grad_norm": 1.0307254791259766, "learning_rate": 5.845441389394039e-06, "loss": 0.5654, "step": 7301 }, { "epoch": 0.46, "grad_norm": 0.9613641500473022, "learning_rate": 5.844430151295171e-06, "loss": 0.5264, "step": 7302 }, { "epoch": 0.46, "grad_norm": 1.0288296937942505, "learning_rate": 5.843418877640289e-06, "loss": 0.5845, "step": 7303 }, { "epoch": 0.46, "grad_norm": 1.0588910579681396, "learning_rate": 5.842407568471971e-06, "loss": 0.534, "step": 7304 }, { "epoch": 0.46, "grad_norm": 1.153749942779541, "learning_rate": 5.8413962238328e-06, "loss": 0.5142, "step": 7305 }, { "epoch": 0.46, "grad_norm": 1.014904260635376, "learning_rate": 5.840384843765361e-06, "loss": 0.5243, "step": 7306 }, { "epoch": 0.46, "grad_norm": 1.179085612297058, "learning_rate": 5.839373428312242e-06, "loss": 0.5021, "step": 7307 }, { "epoch": 0.46, "grad_norm": 1.0122723579406738, "learning_rate": 5.838361977516026e-06, "loss": 0.5477, "step": 7308 }, { "epoch": 0.46, "grad_norm": 1.081276535987854, "learning_rate": 5.837350491419304e-06, "loss": 0.6057, "step": 7309 }, { "epoch": 0.46, "grad_norm": 1.0447216033935547, "learning_rate": 5.836338970064664e-06, "loss": 0.5004, "step": 7310 }, { "epoch": 0.46, "grad_norm": 1.0738158226013184, "learning_rate": 5.835327413494702e-06, "loss": 0.5689, "step": 7311 }, { "epoch": 0.46, "grad_norm": 0.9571483731269836, "learning_rate": 5.834315821752008e-06, "loss": 0.5389, "step": 7312 }, { "epoch": 0.46, "grad_norm": 1.1208528280258179, "learning_rate": 5.833304194879176e-06, "loss": 0.5823, "step": 7313 }, { "epoch": 0.46, "grad_norm": 1.013647437095642, "learning_rate": 5.832292532918804e-06, "loss": 0.5109, "step": 7314 }, { "epoch": 0.46, "grad_norm": 0.9659610986709595, "learning_rate": 5.831280835913489e-06, "loss": 0.5205, "step": 7315 }, { "epoch": 0.46, "grad_norm": 0.9982751607894897, "learning_rate": 5.83026910390583e-06, "loss": 0.5645, "step": 7316 }, { "epoch": 0.46, "grad_norm": 0.9619269967079163, "learning_rate": 5.829257336938427e-06, "loss": 0.5456, "step": 7317 }, { "epoch": 0.46, "grad_norm": 0.9727155566215515, "learning_rate": 5.8282455350538815e-06, "loss": 0.5522, "step": 7318 }, { "epoch": 0.46, "grad_norm": 0.9817243814468384, "learning_rate": 5.827233698294799e-06, "loss": 0.5309, "step": 7319 }, { "epoch": 0.46, "grad_norm": 1.0565499067306519, "learning_rate": 5.826221826703783e-06, "loss": 0.6129, "step": 7320 }, { "epoch": 0.46, "grad_norm": 0.9987887740135193, "learning_rate": 5.825209920323438e-06, "loss": 0.5099, "step": 7321 }, { "epoch": 0.46, "grad_norm": 1.0380562543869019, "learning_rate": 5.824197979196377e-06, "loss": 0.5281, "step": 7322 }, { "epoch": 0.46, "grad_norm": 1.0254791975021362, "learning_rate": 5.823186003365205e-06, "loss": 0.5485, "step": 7323 }, { "epoch": 0.46, "grad_norm": 1.097720980644226, "learning_rate": 5.822173992872534e-06, "loss": 0.4919, "step": 7324 }, { "epoch": 0.46, "grad_norm": 1.0166734457015991, "learning_rate": 5.821161947760975e-06, "loss": 0.4991, "step": 7325 }, { "epoch": 0.46, "grad_norm": 1.1378370523452759, "learning_rate": 5.820149868073145e-06, "loss": 0.633, "step": 7326 }, { "epoch": 0.46, "grad_norm": 1.127633810043335, "learning_rate": 5.819137753851656e-06, "loss": 0.5529, "step": 7327 }, { "epoch": 0.46, "grad_norm": 1.0835357904434204, "learning_rate": 5.8181256051391276e-06, "loss": 0.5081, "step": 7328 }, { "epoch": 0.46, "grad_norm": 1.1418222188949585, "learning_rate": 5.817113421978173e-06, "loss": 0.5587, "step": 7329 }, { "epoch": 0.46, "grad_norm": 1.1440740823745728, "learning_rate": 5.816101204411417e-06, "loss": 0.5128, "step": 7330 }, { "epoch": 0.46, "grad_norm": 0.9324057102203369, "learning_rate": 5.815088952481478e-06, "loss": 0.4532, "step": 7331 }, { "epoch": 0.46, "grad_norm": 0.9553789496421814, "learning_rate": 5.814076666230978e-06, "loss": 0.5435, "step": 7332 }, { "epoch": 0.46, "grad_norm": 0.9789353609085083, "learning_rate": 5.813064345702542e-06, "loss": 0.5286, "step": 7333 }, { "epoch": 0.46, "grad_norm": 1.0638878345489502, "learning_rate": 5.812051990938794e-06, "loss": 0.5014, "step": 7334 }, { "epoch": 0.46, "grad_norm": 1.0748344659805298, "learning_rate": 5.811039601982363e-06, "loss": 0.5408, "step": 7335 }, { "epoch": 0.46, "grad_norm": 1.0366030931472778, "learning_rate": 5.810027178875875e-06, "loss": 0.5314, "step": 7336 }, { "epoch": 0.46, "grad_norm": 1.0968186855316162, "learning_rate": 5.809014721661961e-06, "loss": 0.5518, "step": 7337 }, { "epoch": 0.46, "grad_norm": 1.0389659404754639, "learning_rate": 5.808002230383249e-06, "loss": 0.5479, "step": 7338 }, { "epoch": 0.46, "grad_norm": 1.0004786252975464, "learning_rate": 5.806989705082377e-06, "loss": 0.4869, "step": 7339 }, { "epoch": 0.47, "grad_norm": 1.078975796699524, "learning_rate": 5.805977145801975e-06, "loss": 0.5622, "step": 7340 }, { "epoch": 0.47, "grad_norm": 0.9257678985595703, "learning_rate": 5.8049645525846785e-06, "loss": 0.4662, "step": 7341 }, { "epoch": 0.47, "grad_norm": 0.9679080247879028, "learning_rate": 5.8039519254731245e-06, "loss": 0.505, "step": 7342 }, { "epoch": 0.47, "grad_norm": 0.9533823132514954, "learning_rate": 5.802939264509954e-06, "loss": 0.5402, "step": 7343 }, { "epoch": 0.47, "grad_norm": 0.9467368125915527, "learning_rate": 5.801926569737802e-06, "loss": 0.5128, "step": 7344 }, { "epoch": 0.47, "grad_norm": 0.991810142993927, "learning_rate": 5.800913841199312e-06, "loss": 0.5508, "step": 7345 }, { "epoch": 0.47, "grad_norm": 1.0695257186889648, "learning_rate": 5.799901078937127e-06, "loss": 0.521, "step": 7346 }, { "epoch": 0.47, "grad_norm": 0.9737617373466492, "learning_rate": 5.798888282993891e-06, "loss": 0.4757, "step": 7347 }, { "epoch": 0.47, "grad_norm": 1.0166969299316406, "learning_rate": 5.7978754534122465e-06, "loss": 0.5163, "step": 7348 }, { "epoch": 0.47, "grad_norm": 0.9834796786308289, "learning_rate": 5.7968625902348445e-06, "loss": 0.5261, "step": 7349 }, { "epoch": 0.47, "grad_norm": 1.0013189315795898, "learning_rate": 5.7958496935043296e-06, "loss": 0.5152, "step": 7350 }, { "epoch": 0.47, "grad_norm": 0.9282286763191223, "learning_rate": 5.794836763263353e-06, "loss": 0.5351, "step": 7351 }, { "epoch": 0.47, "grad_norm": 1.0409108400344849, "learning_rate": 5.793823799554564e-06, "loss": 0.5005, "step": 7352 }, { "epoch": 0.47, "grad_norm": 0.9965145587921143, "learning_rate": 5.792810802420618e-06, "loss": 0.5648, "step": 7353 }, { "epoch": 0.47, "grad_norm": 0.9893680214881897, "learning_rate": 5.791797771904168e-06, "loss": 0.5249, "step": 7354 }, { "epoch": 0.47, "grad_norm": 1.1029082536697388, "learning_rate": 5.790784708047866e-06, "loss": 0.5759, "step": 7355 }, { "epoch": 0.47, "grad_norm": 1.0168418884277344, "learning_rate": 5.789771610894371e-06, "loss": 0.5042, "step": 7356 }, { "epoch": 0.47, "grad_norm": 1.0601954460144043, "learning_rate": 5.7887584804863414e-06, "loss": 0.5, "step": 7357 }, { "epoch": 0.47, "grad_norm": 1.008843183517456, "learning_rate": 5.787745316866438e-06, "loss": 0.5029, "step": 7358 }, { "epoch": 0.47, "grad_norm": 0.9807408452033997, "learning_rate": 5.786732120077318e-06, "loss": 0.4781, "step": 7359 }, { "epoch": 0.47, "grad_norm": 1.0779192447662354, "learning_rate": 5.7857188901616444e-06, "loss": 0.5461, "step": 7360 }, { "epoch": 0.47, "grad_norm": 1.0375462770462036, "learning_rate": 5.7847056271620815e-06, "loss": 0.5284, "step": 7361 }, { "epoch": 0.47, "grad_norm": 1.0602962970733643, "learning_rate": 5.783692331121296e-06, "loss": 0.5502, "step": 7362 }, { "epoch": 0.47, "grad_norm": 1.1337302923202515, "learning_rate": 5.7826790020819525e-06, "loss": 0.5508, "step": 7363 }, { "epoch": 0.47, "grad_norm": 1.0568877458572388, "learning_rate": 5.781665640086719e-06, "loss": 0.4774, "step": 7364 }, { "epoch": 0.47, "grad_norm": 1.1909563541412354, "learning_rate": 5.780652245178263e-06, "loss": 0.5323, "step": 7365 }, { "epoch": 0.47, "grad_norm": 1.220807433128357, "learning_rate": 5.779638817399259e-06, "loss": 0.4864, "step": 7366 }, { "epoch": 0.47, "grad_norm": 1.078238606452942, "learning_rate": 5.778625356792376e-06, "loss": 0.5182, "step": 7367 }, { "epoch": 0.47, "grad_norm": 0.9133523106575012, "learning_rate": 5.7776118634002865e-06, "loss": 0.4301, "step": 7368 }, { "epoch": 0.47, "grad_norm": 1.0682060718536377, "learning_rate": 5.776598337265668e-06, "loss": 0.5082, "step": 7369 }, { "epoch": 0.47, "grad_norm": 1.0951640605926514, "learning_rate": 5.775584778431194e-06, "loss": 0.5636, "step": 7370 }, { "epoch": 0.47, "grad_norm": 1.0886175632476807, "learning_rate": 5.774571186939543e-06, "loss": 0.524, "step": 7371 }, { "epoch": 0.47, "grad_norm": 1.1518522500991821, "learning_rate": 5.773557562833394e-06, "loss": 0.5627, "step": 7372 }, { "epoch": 0.47, "grad_norm": 1.1411592960357666, "learning_rate": 5.772543906155429e-06, "loss": 0.5511, "step": 7373 }, { "epoch": 0.47, "grad_norm": 0.9977938532829285, "learning_rate": 5.7715302169483254e-06, "loss": 0.5045, "step": 7374 }, { "epoch": 0.47, "grad_norm": 1.0202271938323975, "learning_rate": 5.770516495254769e-06, "loss": 0.597, "step": 7375 }, { "epoch": 0.47, "grad_norm": 1.0154244899749756, "learning_rate": 5.769502741117443e-06, "loss": 0.564, "step": 7376 }, { "epoch": 0.47, "grad_norm": 1.016966700553894, "learning_rate": 5.7684889545790346e-06, "loss": 0.5074, "step": 7377 }, { "epoch": 0.47, "grad_norm": 1.0249959230422974, "learning_rate": 5.767475135682228e-06, "loss": 0.5406, "step": 7378 }, { "epoch": 0.47, "grad_norm": 1.005169153213501, "learning_rate": 5.766461284469714e-06, "loss": 0.5431, "step": 7379 }, { "epoch": 0.47, "grad_norm": 1.0918866395950317, "learning_rate": 5.765447400984182e-06, "loss": 0.5594, "step": 7380 }, { "epoch": 0.47, "grad_norm": 1.0976300239562988, "learning_rate": 5.7644334852683236e-06, "loss": 0.5582, "step": 7381 }, { "epoch": 0.47, "grad_norm": 1.0252267122268677, "learning_rate": 5.763419537364828e-06, "loss": 0.5008, "step": 7382 }, { "epoch": 0.47, "grad_norm": 1.0654020309448242, "learning_rate": 5.762405557316393e-06, "loss": 0.5281, "step": 7383 }, { "epoch": 0.47, "grad_norm": 0.9876992702484131, "learning_rate": 5.761391545165713e-06, "loss": 0.514, "step": 7384 }, { "epoch": 0.47, "grad_norm": 1.0419373512268066, "learning_rate": 5.760377500955483e-06, "loss": 0.5574, "step": 7385 }, { "epoch": 0.47, "grad_norm": 1.0095206499099731, "learning_rate": 5.759363424728401e-06, "loss": 0.5356, "step": 7386 }, { "epoch": 0.47, "grad_norm": 1.118194580078125, "learning_rate": 5.758349316527166e-06, "loss": 0.5572, "step": 7387 }, { "epoch": 0.47, "grad_norm": 1.035229206085205, "learning_rate": 5.7573351763944815e-06, "loss": 0.5283, "step": 7388 }, { "epoch": 0.47, "grad_norm": 0.990778923034668, "learning_rate": 5.756321004373047e-06, "loss": 0.5052, "step": 7389 }, { "epoch": 0.47, "grad_norm": 1.0303928852081299, "learning_rate": 5.755306800505564e-06, "loss": 0.5484, "step": 7390 }, { "epoch": 0.47, "grad_norm": 1.0018138885498047, "learning_rate": 5.754292564834741e-06, "loss": 0.5501, "step": 7391 }, { "epoch": 0.47, "grad_norm": 1.1125024557113647, "learning_rate": 5.753278297403282e-06, "loss": 0.5509, "step": 7392 }, { "epoch": 0.47, "grad_norm": 0.9921433329582214, "learning_rate": 5.752263998253893e-06, "loss": 0.496, "step": 7393 }, { "epoch": 0.47, "grad_norm": 1.1274008750915527, "learning_rate": 5.751249667429285e-06, "loss": 0.5595, "step": 7394 }, { "epoch": 0.47, "grad_norm": 1.0307135581970215, "learning_rate": 5.7502353049721674e-06, "loss": 0.5318, "step": 7395 }, { "epoch": 0.47, "grad_norm": 0.9848186373710632, "learning_rate": 5.74922091092525e-06, "loss": 0.5003, "step": 7396 }, { "epoch": 0.47, "grad_norm": 1.0523111820220947, "learning_rate": 5.748206485331247e-06, "loss": 0.5423, "step": 7397 }, { "epoch": 0.47, "grad_norm": 0.9812511205673218, "learning_rate": 5.747192028232872e-06, "loss": 0.5282, "step": 7398 }, { "epoch": 0.47, "grad_norm": 1.0850028991699219, "learning_rate": 5.746177539672841e-06, "loss": 0.5266, "step": 7399 }, { "epoch": 0.47, "grad_norm": 1.083706021308899, "learning_rate": 5.745163019693867e-06, "loss": 0.5036, "step": 7400 }, { "epoch": 0.47, "grad_norm": 0.9930241703987122, "learning_rate": 5.744148468338671e-06, "loss": 0.5442, "step": 7401 }, { "epoch": 0.47, "grad_norm": 1.1262766122817993, "learning_rate": 5.743133885649972e-06, "loss": 0.5537, "step": 7402 }, { "epoch": 0.47, "grad_norm": 1.022183895111084, "learning_rate": 5.742119271670491e-06, "loss": 0.5127, "step": 7403 }, { "epoch": 0.47, "grad_norm": 1.06358003616333, "learning_rate": 5.741104626442948e-06, "loss": 0.5542, "step": 7404 }, { "epoch": 0.47, "grad_norm": 0.9712068438529968, "learning_rate": 5.740089950010068e-06, "loss": 0.4641, "step": 7405 }, { "epoch": 0.47, "grad_norm": 0.9932650327682495, "learning_rate": 5.739075242414575e-06, "loss": 0.5319, "step": 7406 }, { "epoch": 0.47, "grad_norm": 0.936883270740509, "learning_rate": 5.738060503699194e-06, "loss": 0.4861, "step": 7407 }, { "epoch": 0.47, "grad_norm": 1.0825799703598022, "learning_rate": 5.737045733906653e-06, "loss": 0.5237, "step": 7408 }, { "epoch": 0.47, "grad_norm": 1.1196141242980957, "learning_rate": 5.7360309330796805e-06, "loss": 0.5853, "step": 7409 }, { "epoch": 0.47, "grad_norm": 1.0770564079284668, "learning_rate": 5.735016101261005e-06, "loss": 0.5376, "step": 7410 }, { "epoch": 0.47, "grad_norm": 0.9987449049949646, "learning_rate": 5.7340012384933595e-06, "loss": 0.5465, "step": 7411 }, { "epoch": 0.47, "grad_norm": 1.1566431522369385, "learning_rate": 5.732986344819475e-06, "loss": 0.5475, "step": 7412 }, { "epoch": 0.47, "grad_norm": 0.9625303149223328, "learning_rate": 5.731971420282085e-06, "loss": 0.4451, "step": 7413 }, { "epoch": 0.47, "grad_norm": 1.0463659763336182, "learning_rate": 5.730956464923926e-06, "loss": 0.5262, "step": 7414 }, { "epoch": 0.47, "grad_norm": 1.0219457149505615, "learning_rate": 5.729941478787732e-06, "loss": 0.5347, "step": 7415 }, { "epoch": 0.47, "grad_norm": 1.053633451461792, "learning_rate": 5.728926461916242e-06, "loss": 0.4918, "step": 7416 }, { "epoch": 0.47, "grad_norm": 1.0385435819625854, "learning_rate": 5.727911414352192e-06, "loss": 0.5032, "step": 7417 }, { "epoch": 0.47, "grad_norm": 1.042497158050537, "learning_rate": 5.726896336138328e-06, "loss": 0.5376, "step": 7418 }, { "epoch": 0.47, "grad_norm": 0.910614013671875, "learning_rate": 5.725881227317386e-06, "loss": 0.4965, "step": 7419 }, { "epoch": 0.47, "grad_norm": 0.9734965562820435, "learning_rate": 5.724866087932113e-06, "loss": 0.5052, "step": 7420 }, { "epoch": 0.47, "grad_norm": 1.014836311340332, "learning_rate": 5.723850918025246e-06, "loss": 0.5158, "step": 7421 }, { "epoch": 0.47, "grad_norm": 1.0883235931396484, "learning_rate": 5.722835717639539e-06, "loss": 0.5289, "step": 7422 }, { "epoch": 0.47, "grad_norm": 1.045098900794983, "learning_rate": 5.721820486817733e-06, "loss": 0.5294, "step": 7423 }, { "epoch": 0.47, "grad_norm": 0.9732691645622253, "learning_rate": 5.720805225602579e-06, "loss": 0.508, "step": 7424 }, { "epoch": 0.47, "grad_norm": 0.999125063419342, "learning_rate": 5.719789934036821e-06, "loss": 0.5315, "step": 7425 }, { "epoch": 0.47, "grad_norm": 1.0546282529830933, "learning_rate": 5.718774612163216e-06, "loss": 0.5232, "step": 7426 }, { "epoch": 0.47, "grad_norm": 1.0071479082107544, "learning_rate": 5.717759260024511e-06, "loss": 0.5603, "step": 7427 }, { "epoch": 0.47, "grad_norm": 1.0235660076141357, "learning_rate": 5.716743877663462e-06, "loss": 0.5142, "step": 7428 }, { "epoch": 0.47, "grad_norm": 1.014546275138855, "learning_rate": 5.715728465122821e-06, "loss": 0.5312, "step": 7429 }, { "epoch": 0.47, "grad_norm": 1.0218431949615479, "learning_rate": 5.714713022445344e-06, "loss": 0.5317, "step": 7430 }, { "epoch": 0.47, "grad_norm": 1.017401099205017, "learning_rate": 5.713697549673788e-06, "loss": 0.5115, "step": 7431 }, { "epoch": 0.47, "grad_norm": 1.0051920413970947, "learning_rate": 5.712682046850909e-06, "loss": 0.5265, "step": 7432 }, { "epoch": 0.47, "grad_norm": 1.0701223611831665, "learning_rate": 5.711666514019472e-06, "loss": 0.5939, "step": 7433 }, { "epoch": 0.47, "grad_norm": 0.9435673356056213, "learning_rate": 5.710650951222231e-06, "loss": 0.5134, "step": 7434 }, { "epoch": 0.47, "grad_norm": 1.01740562915802, "learning_rate": 5.709635358501952e-06, "loss": 0.52, "step": 7435 }, { "epoch": 0.47, "grad_norm": 1.0538522005081177, "learning_rate": 5.708619735901394e-06, "loss": 0.5062, "step": 7436 }, { "epoch": 0.47, "grad_norm": 1.0109902620315552, "learning_rate": 5.707604083463327e-06, "loss": 0.5181, "step": 7437 }, { "epoch": 0.47, "grad_norm": 1.0090477466583252, "learning_rate": 5.706588401230513e-06, "loss": 0.5117, "step": 7438 }, { "epoch": 0.47, "grad_norm": 1.0345077514648438, "learning_rate": 5.70557268924572e-06, "loss": 0.5434, "step": 7439 }, { "epoch": 0.47, "grad_norm": 0.9655662775039673, "learning_rate": 5.7045569475517126e-06, "loss": 0.4792, "step": 7440 }, { "epoch": 0.47, "grad_norm": 1.113183617591858, "learning_rate": 5.703541176191266e-06, "loss": 0.5593, "step": 7441 }, { "epoch": 0.47, "grad_norm": 0.9703465104103088, "learning_rate": 5.702525375207147e-06, "loss": 0.4756, "step": 7442 }, { "epoch": 0.47, "grad_norm": 1.0538132190704346, "learning_rate": 5.70150954464213e-06, "loss": 0.4799, "step": 7443 }, { "epoch": 0.47, "grad_norm": 1.0759013891220093, "learning_rate": 5.700493684538984e-06, "loss": 0.531, "step": 7444 }, { "epoch": 0.47, "grad_norm": 1.0236819982528687, "learning_rate": 5.699477794940487e-06, "loss": 0.5299, "step": 7445 }, { "epoch": 0.47, "grad_norm": 1.0337623357772827, "learning_rate": 5.698461875889414e-06, "loss": 0.5307, "step": 7446 }, { "epoch": 0.47, "grad_norm": 0.9565263986587524, "learning_rate": 5.6974459274285395e-06, "loss": 0.5615, "step": 7447 }, { "epoch": 0.47, "grad_norm": 0.9748880863189697, "learning_rate": 5.696429949600643e-06, "loss": 0.5106, "step": 7448 }, { "epoch": 0.47, "grad_norm": 1.0070738792419434, "learning_rate": 5.695413942448505e-06, "loss": 0.5267, "step": 7449 }, { "epoch": 0.47, "grad_norm": 1.0051288604736328, "learning_rate": 5.694397906014907e-06, "loss": 0.516, "step": 7450 }, { "epoch": 0.47, "grad_norm": 1.027597427368164, "learning_rate": 5.693381840342626e-06, "loss": 0.5177, "step": 7451 }, { "epoch": 0.47, "grad_norm": 1.013757586479187, "learning_rate": 5.692365745474448e-06, "loss": 0.5277, "step": 7452 }, { "epoch": 0.47, "grad_norm": 0.9197251796722412, "learning_rate": 5.691349621453158e-06, "loss": 0.4893, "step": 7453 }, { "epoch": 0.47, "grad_norm": 0.9335641860961914, "learning_rate": 5.6903334683215416e-06, "loss": 0.4918, "step": 7454 }, { "epoch": 0.47, "grad_norm": 1.0981416702270508, "learning_rate": 5.689317286122382e-06, "loss": 0.5927, "step": 7455 }, { "epoch": 0.47, "grad_norm": 1.0974715948104858, "learning_rate": 5.68830107489847e-06, "loss": 0.5241, "step": 7456 }, { "epoch": 0.47, "grad_norm": 1.1248267889022827, "learning_rate": 5.687284834692595e-06, "loss": 0.5348, "step": 7457 }, { "epoch": 0.47, "grad_norm": 1.0581552982330322, "learning_rate": 5.686268565547547e-06, "loss": 0.5987, "step": 7458 }, { "epoch": 0.47, "grad_norm": 0.9802426695823669, "learning_rate": 5.685252267506116e-06, "loss": 0.5243, "step": 7459 }, { "epoch": 0.47, "grad_norm": 1.1204898357391357, "learning_rate": 5.6842359406110945e-06, "loss": 0.5413, "step": 7460 }, { "epoch": 0.47, "grad_norm": 0.9641440510749817, "learning_rate": 5.683219584905281e-06, "loss": 0.5212, "step": 7461 }, { "epoch": 0.47, "grad_norm": 1.077943205833435, "learning_rate": 5.682203200431465e-06, "loss": 0.5271, "step": 7462 }, { "epoch": 0.47, "grad_norm": 1.0182640552520752, "learning_rate": 5.6811867872324465e-06, "loss": 0.5443, "step": 7463 }, { "epoch": 0.47, "grad_norm": 1.078507661819458, "learning_rate": 5.680170345351021e-06, "loss": 0.5489, "step": 7464 }, { "epoch": 0.47, "grad_norm": 1.0855196714401245, "learning_rate": 5.67915387482999e-06, "loss": 0.5549, "step": 7465 }, { "epoch": 0.47, "grad_norm": 1.047361135482788, "learning_rate": 5.678137375712152e-06, "loss": 0.5683, "step": 7466 }, { "epoch": 0.47, "grad_norm": 0.9580419659614563, "learning_rate": 5.6771208480403065e-06, "loss": 0.5284, "step": 7467 }, { "epoch": 0.47, "grad_norm": 0.9908188581466675, "learning_rate": 5.6761042918572585e-06, "loss": 0.5679, "step": 7468 }, { "epoch": 0.47, "grad_norm": 1.076659083366394, "learning_rate": 5.675087707205811e-06, "loss": 0.5352, "step": 7469 }, { "epoch": 0.47, "grad_norm": 0.9653880000114441, "learning_rate": 5.674071094128768e-06, "loss": 0.4909, "step": 7470 }, { "epoch": 0.47, "grad_norm": 1.0040349960327148, "learning_rate": 5.673054452668936e-06, "loss": 0.4924, "step": 7471 }, { "epoch": 0.47, "grad_norm": 0.9688589572906494, "learning_rate": 5.672037782869123e-06, "loss": 0.5142, "step": 7472 }, { "epoch": 0.47, "grad_norm": 1.058455228805542, "learning_rate": 5.671021084772137e-06, "loss": 0.5664, "step": 7473 }, { "epoch": 0.47, "grad_norm": 0.992531418800354, "learning_rate": 5.670004358420786e-06, "loss": 0.5211, "step": 7474 }, { "epoch": 0.47, "grad_norm": 1.0130715370178223, "learning_rate": 5.668987603857884e-06, "loss": 0.5056, "step": 7475 }, { "epoch": 0.47, "grad_norm": 0.9969527125358582, "learning_rate": 5.6679708211262415e-06, "loss": 0.5038, "step": 7476 }, { "epoch": 0.47, "grad_norm": 0.9759190082550049, "learning_rate": 5.66695401026867e-06, "loss": 0.527, "step": 7477 }, { "epoch": 0.47, "grad_norm": 1.0427945852279663, "learning_rate": 5.665937171327985e-06, "loss": 0.5542, "step": 7478 }, { "epoch": 0.47, "grad_norm": 0.9948601126670837, "learning_rate": 5.664920304347004e-06, "loss": 0.5351, "step": 7479 }, { "epoch": 0.47, "grad_norm": 1.0073400735855103, "learning_rate": 5.6639034093685416e-06, "loss": 0.5366, "step": 7480 }, { "epoch": 0.47, "grad_norm": 1.0551327466964722, "learning_rate": 5.662886486435415e-06, "loss": 0.5293, "step": 7481 }, { "epoch": 0.47, "grad_norm": 0.9984437823295593, "learning_rate": 5.6618695355904456e-06, "loss": 0.521, "step": 7482 }, { "epoch": 0.47, "grad_norm": 0.9802277088165283, "learning_rate": 5.660852556876452e-06, "loss": 0.5206, "step": 7483 }, { "epoch": 0.47, "grad_norm": 0.9552007913589478, "learning_rate": 5.659835550336257e-06, "loss": 0.4878, "step": 7484 }, { "epoch": 0.47, "grad_norm": 0.9671801924705505, "learning_rate": 5.658818516012681e-06, "loss": 0.5358, "step": 7485 }, { "epoch": 0.47, "grad_norm": 1.019771695137024, "learning_rate": 5.65780145394855e-06, "loss": 0.5134, "step": 7486 }, { "epoch": 0.47, "grad_norm": 1.0270360708236694, "learning_rate": 5.656784364186687e-06, "loss": 0.5547, "step": 7487 }, { "epoch": 0.47, "grad_norm": 0.9477678537368774, "learning_rate": 5.655767246769921e-06, "loss": 0.5307, "step": 7488 }, { "epoch": 0.47, "grad_norm": 0.9901880621910095, "learning_rate": 5.6547501017410765e-06, "loss": 0.4953, "step": 7489 }, { "epoch": 0.47, "grad_norm": 1.0108751058578491, "learning_rate": 5.6537329291429835e-06, "loss": 0.5092, "step": 7490 }, { "epoch": 0.47, "grad_norm": 1.1157495975494385, "learning_rate": 5.652715729018471e-06, "loss": 0.4955, "step": 7491 }, { "epoch": 0.47, "grad_norm": 1.0335301160812378, "learning_rate": 5.65169850141037e-06, "loss": 0.5353, "step": 7492 }, { "epoch": 0.47, "grad_norm": 0.9454318881034851, "learning_rate": 5.650681246361511e-06, "loss": 0.5018, "step": 7493 }, { "epoch": 0.47, "grad_norm": 0.953421950340271, "learning_rate": 5.649663963914729e-06, "loss": 0.5033, "step": 7494 }, { "epoch": 0.47, "grad_norm": 1.0138174295425415, "learning_rate": 5.6486466541128575e-06, "loss": 0.5348, "step": 7495 }, { "epoch": 0.47, "grad_norm": 1.002591848373413, "learning_rate": 5.6476293169987314e-06, "loss": 0.5153, "step": 7496 }, { "epoch": 0.47, "grad_norm": 1.0464341640472412, "learning_rate": 5.646611952615188e-06, "loss": 0.5146, "step": 7497 }, { "epoch": 0.48, "grad_norm": 1.0779297351837158, "learning_rate": 5.645594561005064e-06, "loss": 0.5705, "step": 7498 }, { "epoch": 0.48, "grad_norm": 1.0479854345321655, "learning_rate": 5.6445771422112005e-06, "loss": 0.4992, "step": 7499 }, { "epoch": 0.48, "grad_norm": 1.1008824110031128, "learning_rate": 5.643559696276435e-06, "loss": 0.5279, "step": 7500 }, { "epoch": 0.48, "grad_norm": 0.9838743805885315, "learning_rate": 5.6425422232436085e-06, "loss": 0.4645, "step": 7501 }, { "epoch": 0.48, "grad_norm": 1.0333632230758667, "learning_rate": 5.641524723155566e-06, "loss": 0.5329, "step": 7502 }, { "epoch": 0.48, "grad_norm": 1.0836952924728394, "learning_rate": 5.6405071960551485e-06, "loss": 0.4969, "step": 7503 }, { "epoch": 0.48, "grad_norm": 1.0006016492843628, "learning_rate": 5.639489641985201e-06, "loss": 0.5532, "step": 7504 }, { "epoch": 0.48, "grad_norm": 1.1209015846252441, "learning_rate": 5.638472060988569e-06, "loss": 0.5947, "step": 7505 }, { "epoch": 0.48, "grad_norm": 1.001816749572754, "learning_rate": 5.6374544531081e-06, "loss": 0.4769, "step": 7506 }, { "epoch": 0.48, "grad_norm": 1.0426093339920044, "learning_rate": 5.636436818386641e-06, "loss": 0.5528, "step": 7507 }, { "epoch": 0.48, "grad_norm": 0.9544576406478882, "learning_rate": 5.635419156867043e-06, "loss": 0.4907, "step": 7508 }, { "epoch": 0.48, "grad_norm": 0.9945775866508484, "learning_rate": 5.634401468592152e-06, "loss": 0.5223, "step": 7509 }, { "epoch": 0.48, "grad_norm": 1.0476137399673462, "learning_rate": 5.6333837536048255e-06, "loss": 0.4948, "step": 7510 }, { "epoch": 0.48, "grad_norm": 0.964775800704956, "learning_rate": 5.63236601194791e-06, "loss": 0.4961, "step": 7511 }, { "epoch": 0.48, "grad_norm": 0.9926450848579407, "learning_rate": 5.631348243664263e-06, "loss": 0.5498, "step": 7512 }, { "epoch": 0.48, "grad_norm": 1.049108862876892, "learning_rate": 5.630330448796736e-06, "loss": 0.5514, "step": 7513 }, { "epoch": 0.48, "grad_norm": 1.0942082405090332, "learning_rate": 5.629312627388188e-06, "loss": 0.5991, "step": 7514 }, { "epoch": 0.48, "grad_norm": 1.0420613288879395, "learning_rate": 5.628294779481474e-06, "loss": 0.5274, "step": 7515 }, { "epoch": 0.48, "grad_norm": 1.0404502153396606, "learning_rate": 5.6272769051194535e-06, "loss": 0.6015, "step": 7516 }, { "epoch": 0.48, "grad_norm": 1.0156911611557007, "learning_rate": 5.626259004344983e-06, "loss": 0.5138, "step": 7517 }, { "epoch": 0.48, "grad_norm": 1.0107665061950684, "learning_rate": 5.625241077200926e-06, "loss": 0.5352, "step": 7518 }, { "epoch": 0.48, "grad_norm": 1.0115772485733032, "learning_rate": 5.624223123730141e-06, "loss": 0.5144, "step": 7519 }, { "epoch": 0.48, "grad_norm": 0.98555588722229, "learning_rate": 5.6232051439754935e-06, "loss": 0.524, "step": 7520 }, { "epoch": 0.48, "grad_norm": 1.0191307067871094, "learning_rate": 5.622187137979843e-06, "loss": 0.4849, "step": 7521 }, { "epoch": 0.48, "grad_norm": 1.0475789308547974, "learning_rate": 5.621169105786057e-06, "loss": 0.5079, "step": 7522 }, { "epoch": 0.48, "grad_norm": 1.0014190673828125, "learning_rate": 5.620151047437004e-06, "loss": 0.5712, "step": 7523 }, { "epoch": 0.48, "grad_norm": 0.9675139784812927, "learning_rate": 5.619132962975544e-06, "loss": 0.5339, "step": 7524 }, { "epoch": 0.48, "grad_norm": 1.063788890838623, "learning_rate": 5.6181148524445506e-06, "loss": 0.5195, "step": 7525 }, { "epoch": 0.48, "grad_norm": 1.0479037761688232, "learning_rate": 5.617096715886889e-06, "loss": 0.532, "step": 7526 }, { "epoch": 0.48, "grad_norm": 1.13522469997406, "learning_rate": 5.616078553345434e-06, "loss": 0.5687, "step": 7527 }, { "epoch": 0.48, "grad_norm": 1.0124516487121582, "learning_rate": 5.615060364863053e-06, "loss": 0.521, "step": 7528 }, { "epoch": 0.48, "grad_norm": 1.0758651494979858, "learning_rate": 5.6140421504826205e-06, "loss": 0.5213, "step": 7529 }, { "epoch": 0.48, "grad_norm": 1.0261516571044922, "learning_rate": 5.6130239102470075e-06, "loss": 0.5589, "step": 7530 }, { "epoch": 0.48, "grad_norm": 1.0139498710632324, "learning_rate": 5.612005644199092e-06, "loss": 0.5123, "step": 7531 }, { "epoch": 0.48, "grad_norm": 0.9417591691017151, "learning_rate": 5.610987352381747e-06, "loss": 0.5109, "step": 7532 }, { "epoch": 0.48, "grad_norm": 1.1065438985824585, "learning_rate": 5.60996903483785e-06, "loss": 0.5689, "step": 7533 }, { "epoch": 0.48, "grad_norm": 1.055302381515503, "learning_rate": 5.608950691610279e-06, "loss": 0.526, "step": 7534 }, { "epoch": 0.48, "grad_norm": 1.0546671152114868, "learning_rate": 5.607932322741912e-06, "loss": 0.5658, "step": 7535 }, { "epoch": 0.48, "grad_norm": 1.0092540979385376, "learning_rate": 5.60691392827563e-06, "loss": 0.5359, "step": 7536 }, { "epoch": 0.48, "grad_norm": 0.9884355068206787, "learning_rate": 5.605895508254315e-06, "loss": 0.4899, "step": 7537 }, { "epoch": 0.48, "grad_norm": 1.0973706245422363, "learning_rate": 5.604877062720848e-06, "loss": 0.5866, "step": 7538 }, { "epoch": 0.48, "grad_norm": 0.958016037940979, "learning_rate": 5.603858591718111e-06, "loss": 0.5111, "step": 7539 }, { "epoch": 0.48, "grad_norm": 1.0794581174850464, "learning_rate": 5.602840095288989e-06, "loss": 0.5572, "step": 7540 }, { "epoch": 0.48, "grad_norm": 1.0130765438079834, "learning_rate": 5.6018215734763685e-06, "loss": 0.5632, "step": 7541 }, { "epoch": 0.48, "grad_norm": 1.0051946640014648, "learning_rate": 5.600803026323136e-06, "loss": 0.4771, "step": 7542 }, { "epoch": 0.48, "grad_norm": 0.996223509311676, "learning_rate": 5.599784453872177e-06, "loss": 0.523, "step": 7543 }, { "epoch": 0.48, "grad_norm": 1.0174285173416138, "learning_rate": 5.5987658561663805e-06, "loss": 0.5544, "step": 7544 }, { "epoch": 0.48, "grad_norm": 0.9483619332313538, "learning_rate": 5.597747233248637e-06, "loss": 0.5308, "step": 7545 }, { "epoch": 0.48, "grad_norm": 1.006107211112976, "learning_rate": 5.596728585161838e-06, "loss": 0.5132, "step": 7546 }, { "epoch": 0.48, "grad_norm": 0.967338502407074, "learning_rate": 5.595709911948873e-06, "loss": 0.5632, "step": 7547 }, { "epoch": 0.48, "grad_norm": 1.0335711240768433, "learning_rate": 5.5946912136526365e-06, "loss": 0.5433, "step": 7548 }, { "epoch": 0.48, "grad_norm": 1.052869439125061, "learning_rate": 5.593672490316022e-06, "loss": 0.5595, "step": 7549 }, { "epoch": 0.48, "grad_norm": 1.0248395204544067, "learning_rate": 5.5926537419819234e-06, "loss": 0.5632, "step": 7550 }, { "epoch": 0.48, "grad_norm": 1.0395309925079346, "learning_rate": 5.591634968693238e-06, "loss": 0.5384, "step": 7551 }, { "epoch": 0.48, "grad_norm": 1.1257261037826538, "learning_rate": 5.590616170492862e-06, "loss": 0.5595, "step": 7552 }, { "epoch": 0.48, "grad_norm": 1.0620990991592407, "learning_rate": 5.589597347423696e-06, "loss": 0.5428, "step": 7553 }, { "epoch": 0.48, "grad_norm": 1.0337821245193481, "learning_rate": 5.588578499528633e-06, "loss": 0.5271, "step": 7554 }, { "epoch": 0.48, "grad_norm": 0.9734302163124084, "learning_rate": 5.587559626850578e-06, "loss": 0.5032, "step": 7555 }, { "epoch": 0.48, "grad_norm": 1.0158849954605103, "learning_rate": 5.586540729432431e-06, "loss": 0.5196, "step": 7556 }, { "epoch": 0.48, "grad_norm": 1.018912434577942, "learning_rate": 5.585521807317097e-06, "loss": 0.589, "step": 7557 }, { "epoch": 0.48, "grad_norm": 1.00540030002594, "learning_rate": 5.584502860547474e-06, "loss": 0.5473, "step": 7558 }, { "epoch": 0.48, "grad_norm": 1.018280029296875, "learning_rate": 5.5834838891664685e-06, "loss": 0.5522, "step": 7559 }, { "epoch": 0.48, "grad_norm": 1.0685259103775024, "learning_rate": 5.582464893216987e-06, "loss": 0.5125, "step": 7560 }, { "epoch": 0.48, "grad_norm": 0.96529620885849, "learning_rate": 5.5814458727419365e-06, "loss": 0.5196, "step": 7561 }, { "epoch": 0.48, "grad_norm": 0.99607253074646, "learning_rate": 5.580426827784221e-06, "loss": 0.5406, "step": 7562 }, { "epoch": 0.48, "grad_norm": 1.0173125267028809, "learning_rate": 5.579407758386751e-06, "loss": 0.5668, "step": 7563 }, { "epoch": 0.48, "grad_norm": 1.13144850730896, "learning_rate": 5.578388664592435e-06, "loss": 0.5369, "step": 7564 }, { "epoch": 0.48, "grad_norm": 0.9925999045372009, "learning_rate": 5.577369546444188e-06, "loss": 0.549, "step": 7565 }, { "epoch": 0.48, "grad_norm": 1.0043359994888306, "learning_rate": 5.576350403984915e-06, "loss": 0.5658, "step": 7566 }, { "epoch": 0.48, "grad_norm": 0.9701277613639832, "learning_rate": 5.575331237257532e-06, "loss": 0.5118, "step": 7567 }, { "epoch": 0.48, "grad_norm": 1.1391804218292236, "learning_rate": 5.574312046304954e-06, "loss": 0.5759, "step": 7568 }, { "epoch": 0.48, "grad_norm": 1.0357731580734253, "learning_rate": 5.5732928311700906e-06, "loss": 0.5158, "step": 7569 }, { "epoch": 0.48, "grad_norm": 0.991754412651062, "learning_rate": 5.5722735918958614e-06, "loss": 0.5159, "step": 7570 }, { "epoch": 0.48, "grad_norm": 1.1032490730285645, "learning_rate": 5.571254328525183e-06, "loss": 0.5228, "step": 7571 }, { "epoch": 0.48, "grad_norm": 1.0968703031539917, "learning_rate": 5.570235041100972e-06, "loss": 0.5555, "step": 7572 }, { "epoch": 0.48, "grad_norm": 1.003477692604065, "learning_rate": 5.569215729666146e-06, "loss": 0.529, "step": 7573 }, { "epoch": 0.48, "grad_norm": 1.0698657035827637, "learning_rate": 5.568196394263626e-06, "loss": 0.5357, "step": 7574 }, { "epoch": 0.48, "grad_norm": 0.9531198143959045, "learning_rate": 5.567177034936333e-06, "loss": 0.4848, "step": 7575 }, { "epoch": 0.48, "grad_norm": 1.00896418094635, "learning_rate": 5.566157651727189e-06, "loss": 0.5829, "step": 7576 }, { "epoch": 0.48, "grad_norm": 0.9287537932395935, "learning_rate": 5.5651382446791134e-06, "loss": 0.5138, "step": 7577 }, { "epoch": 0.48, "grad_norm": 1.0205477476119995, "learning_rate": 5.564118813835033e-06, "loss": 0.517, "step": 7578 }, { "epoch": 0.48, "grad_norm": 0.9876137375831604, "learning_rate": 5.563099359237872e-06, "loss": 0.522, "step": 7579 }, { "epoch": 0.48, "grad_norm": 1.0472246408462524, "learning_rate": 5.5620798809305575e-06, "loss": 0.5319, "step": 7580 }, { "epoch": 0.48, "grad_norm": 1.028066873550415, "learning_rate": 5.561060378956014e-06, "loss": 0.5435, "step": 7581 }, { "epoch": 0.48, "grad_norm": 0.9206072092056274, "learning_rate": 5.560040853357168e-06, "loss": 0.5235, "step": 7582 }, { "epoch": 0.48, "grad_norm": 1.1040226221084595, "learning_rate": 5.55902130417695e-06, "loss": 0.552, "step": 7583 }, { "epoch": 0.48, "grad_norm": 1.0034788846969604, "learning_rate": 5.558001731458293e-06, "loss": 0.5498, "step": 7584 }, { "epoch": 0.48, "grad_norm": 1.090221643447876, "learning_rate": 5.556982135244121e-06, "loss": 0.5496, "step": 7585 }, { "epoch": 0.48, "grad_norm": 1.0335744619369507, "learning_rate": 5.5559625155773685e-06, "loss": 0.5127, "step": 7586 }, { "epoch": 0.48, "grad_norm": 0.9486215710639954, "learning_rate": 5.554942872500971e-06, "loss": 0.506, "step": 7587 }, { "epoch": 0.48, "grad_norm": 0.9887097477912903, "learning_rate": 5.5539232060578574e-06, "loss": 0.5326, "step": 7588 }, { "epoch": 0.48, "grad_norm": 0.9414849281311035, "learning_rate": 5.552903516290966e-06, "loss": 0.5164, "step": 7589 }, { "epoch": 0.48, "grad_norm": 0.9639526605606079, "learning_rate": 5.55188380324323e-06, "loss": 0.5163, "step": 7590 }, { "epoch": 0.48, "grad_norm": 1.0122463703155518, "learning_rate": 5.550864066957587e-06, "loss": 0.5385, "step": 7591 }, { "epoch": 0.48, "grad_norm": 1.0007060766220093, "learning_rate": 5.549844307476975e-06, "loss": 0.5596, "step": 7592 }, { "epoch": 0.48, "grad_norm": 1.0017606019973755, "learning_rate": 5.548824524844333e-06, "loss": 0.5603, "step": 7593 }, { "epoch": 0.48, "grad_norm": 1.0292510986328125, "learning_rate": 5.547804719102596e-06, "loss": 0.4978, "step": 7594 }, { "epoch": 0.48, "grad_norm": 1.049460530281067, "learning_rate": 5.546784890294712e-06, "loss": 0.5703, "step": 7595 }, { "epoch": 0.48, "grad_norm": 1.0576456785202026, "learning_rate": 5.545765038463615e-06, "loss": 0.5687, "step": 7596 }, { "epoch": 0.48, "grad_norm": 1.138731598854065, "learning_rate": 5.544745163652253e-06, "loss": 0.5332, "step": 7597 }, { "epoch": 0.48, "grad_norm": 1.0252635478973389, "learning_rate": 5.543725265903565e-06, "loss": 0.5096, "step": 7598 }, { "epoch": 0.48, "grad_norm": 1.0496039390563965, "learning_rate": 5.5427053452605004e-06, "loss": 0.5393, "step": 7599 }, { "epoch": 0.48, "grad_norm": 0.97575443983078, "learning_rate": 5.541685401766001e-06, "loss": 0.5464, "step": 7600 }, { "epoch": 0.48, "grad_norm": 1.069856882095337, "learning_rate": 5.540665435463013e-06, "loss": 0.4764, "step": 7601 }, { "epoch": 0.48, "grad_norm": 0.9672538042068481, "learning_rate": 5.539645446394485e-06, "loss": 0.5317, "step": 7602 }, { "epoch": 0.48, "grad_norm": 1.077881932258606, "learning_rate": 5.538625434603363e-06, "loss": 0.5585, "step": 7603 }, { "epoch": 0.48, "grad_norm": 1.1011637449264526, "learning_rate": 5.5376054001326e-06, "loss": 0.5458, "step": 7604 }, { "epoch": 0.48, "grad_norm": 0.9810386896133423, "learning_rate": 5.53658534302514e-06, "loss": 0.5768, "step": 7605 }, { "epoch": 0.48, "grad_norm": 0.9890307784080505, "learning_rate": 5.535565263323942e-06, "loss": 0.4688, "step": 7606 }, { "epoch": 0.48, "grad_norm": 1.0282771587371826, "learning_rate": 5.534545161071951e-06, "loss": 0.5566, "step": 7607 }, { "epoch": 0.48, "grad_norm": 0.9736272692680359, "learning_rate": 5.533525036312125e-06, "loss": 0.5212, "step": 7608 }, { "epoch": 0.48, "grad_norm": 1.0575153827667236, "learning_rate": 5.532504889087413e-06, "loss": 0.5358, "step": 7609 }, { "epoch": 0.48, "grad_norm": 1.133524775505066, "learning_rate": 5.531484719440776e-06, "loss": 0.5454, "step": 7610 }, { "epoch": 0.48, "grad_norm": 1.0144935846328735, "learning_rate": 5.530464527415164e-06, "loss": 0.4708, "step": 7611 }, { "epoch": 0.48, "grad_norm": 1.1847485303878784, "learning_rate": 5.529444313053538e-06, "loss": 0.5269, "step": 7612 }, { "epoch": 0.48, "grad_norm": 1.0995430946350098, "learning_rate": 5.528424076398851e-06, "loss": 0.5347, "step": 7613 }, { "epoch": 0.48, "grad_norm": 0.9704165458679199, "learning_rate": 5.527403817494067e-06, "loss": 0.5345, "step": 7614 }, { "epoch": 0.48, "grad_norm": 1.0518720149993896, "learning_rate": 5.526383536382142e-06, "loss": 0.4638, "step": 7615 }, { "epoch": 0.48, "grad_norm": 1.0203839540481567, "learning_rate": 5.525363233106037e-06, "loss": 0.5295, "step": 7616 }, { "epoch": 0.48, "grad_norm": 0.931056022644043, "learning_rate": 5.524342907708714e-06, "loss": 0.5434, "step": 7617 }, { "epoch": 0.48, "grad_norm": 0.9502934217453003, "learning_rate": 5.5233225602331355e-06, "loss": 0.4904, "step": 7618 }, { "epoch": 0.48, "grad_norm": 1.056097149848938, "learning_rate": 5.522302190722264e-06, "loss": 0.5118, "step": 7619 }, { "epoch": 0.48, "grad_norm": 0.9836977124214172, "learning_rate": 5.5212817992190644e-06, "loss": 0.5366, "step": 7620 }, { "epoch": 0.48, "grad_norm": 1.0775401592254639, "learning_rate": 5.5202613857665025e-06, "loss": 0.5572, "step": 7621 }, { "epoch": 0.48, "grad_norm": 0.9739736914634705, "learning_rate": 5.5192409504075416e-06, "loss": 0.5008, "step": 7622 }, { "epoch": 0.48, "grad_norm": 1.0487024784088135, "learning_rate": 5.518220493185153e-06, "loss": 0.5212, "step": 7623 }, { "epoch": 0.48, "grad_norm": 1.0653893947601318, "learning_rate": 5.517200014142301e-06, "loss": 0.5076, "step": 7624 }, { "epoch": 0.48, "grad_norm": 1.0490169525146484, "learning_rate": 5.516179513321955e-06, "loss": 0.5149, "step": 7625 }, { "epoch": 0.48, "grad_norm": 1.0116205215454102, "learning_rate": 5.5151589907670856e-06, "loss": 0.5025, "step": 7626 }, { "epoch": 0.48, "grad_norm": 1.0689759254455566, "learning_rate": 5.514138446520664e-06, "loss": 0.5473, "step": 7627 }, { "epoch": 0.48, "grad_norm": 0.9866931438446045, "learning_rate": 5.51311788062566e-06, "loss": 0.5215, "step": 7628 }, { "epoch": 0.48, "grad_norm": 0.9698858261108398, "learning_rate": 5.512097293125047e-06, "loss": 0.4779, "step": 7629 }, { "epoch": 0.48, "grad_norm": 1.0759952068328857, "learning_rate": 5.511076684061799e-06, "loss": 0.5025, "step": 7630 }, { "epoch": 0.48, "grad_norm": 1.093837022781372, "learning_rate": 5.51005605347889e-06, "loss": 0.5604, "step": 7631 }, { "epoch": 0.48, "grad_norm": 1.0192079544067383, "learning_rate": 5.509035401419296e-06, "loss": 0.4878, "step": 7632 }, { "epoch": 0.48, "grad_norm": 1.0973546504974365, "learning_rate": 5.50801472792599e-06, "loss": 0.5226, "step": 7633 }, { "epoch": 0.48, "grad_norm": 0.9534756541252136, "learning_rate": 5.5069940330419525e-06, "loss": 0.4877, "step": 7634 }, { "epoch": 0.48, "grad_norm": 1.0228980779647827, "learning_rate": 5.5059733168101596e-06, "loss": 0.5558, "step": 7635 }, { "epoch": 0.48, "grad_norm": 1.0513986349105835, "learning_rate": 5.504952579273589e-06, "loss": 0.497, "step": 7636 }, { "epoch": 0.48, "grad_norm": 1.0697723627090454, "learning_rate": 5.503931820475223e-06, "loss": 0.5738, "step": 7637 }, { "epoch": 0.48, "grad_norm": 0.9767886996269226, "learning_rate": 5.502911040458042e-06, "loss": 0.5528, "step": 7638 }, { "epoch": 0.48, "grad_norm": 0.9490932822227478, "learning_rate": 5.501890239265025e-06, "loss": 0.5065, "step": 7639 }, { "epoch": 0.48, "grad_norm": 1.006232500076294, "learning_rate": 5.500869416939156e-06, "loss": 0.5653, "step": 7640 }, { "epoch": 0.48, "grad_norm": 1.022708773612976, "learning_rate": 5.49984857352342e-06, "loss": 0.5219, "step": 7641 }, { "epoch": 0.48, "grad_norm": 1.036411166191101, "learning_rate": 5.4988277090607986e-06, "loss": 0.4857, "step": 7642 }, { "epoch": 0.48, "grad_norm": 1.144761562347412, "learning_rate": 5.4978068235942775e-06, "loss": 0.5774, "step": 7643 }, { "epoch": 0.48, "grad_norm": 1.1377125978469849, "learning_rate": 5.496785917166843e-06, "loss": 0.537, "step": 7644 }, { "epoch": 0.48, "grad_norm": 1.0362329483032227, "learning_rate": 5.49576498982148e-06, "loss": 0.5017, "step": 7645 }, { "epoch": 0.48, "grad_norm": 0.9684587121009827, "learning_rate": 5.49474404160118e-06, "loss": 0.4949, "step": 7646 }, { "epoch": 0.48, "grad_norm": 1.0577255487442017, "learning_rate": 5.4937230725489285e-06, "loss": 0.5589, "step": 7647 }, { "epoch": 0.48, "grad_norm": 0.99142986536026, "learning_rate": 5.492702082707716e-06, "loss": 0.5267, "step": 7648 }, { "epoch": 0.48, "grad_norm": 1.0030139684677124, "learning_rate": 5.491681072120534e-06, "loss": 0.5461, "step": 7649 }, { "epoch": 0.48, "grad_norm": 1.0612186193466187, "learning_rate": 5.4906600408303715e-06, "loss": 0.5408, "step": 7650 }, { "epoch": 0.48, "grad_norm": 1.0308854579925537, "learning_rate": 5.489638988880222e-06, "loss": 0.5, "step": 7651 }, { "epoch": 0.48, "grad_norm": 0.9938170909881592, "learning_rate": 5.488617916313077e-06, "loss": 0.4949, "step": 7652 }, { "epoch": 0.48, "grad_norm": 0.9927681684494019, "learning_rate": 5.487596823171932e-06, "loss": 0.4685, "step": 7653 }, { "epoch": 0.48, "grad_norm": 0.9878483414649963, "learning_rate": 5.486575709499782e-06, "loss": 0.5507, "step": 7654 }, { "epoch": 0.48, "grad_norm": 1.0441032648086548, "learning_rate": 5.48555457533962e-06, "loss": 0.5432, "step": 7655 }, { "epoch": 0.49, "grad_norm": 0.9428449869155884, "learning_rate": 5.484533420734444e-06, "loss": 0.494, "step": 7656 }, { "epoch": 0.49, "grad_norm": 1.008828043937683, "learning_rate": 5.483512245727252e-06, "loss": 0.5541, "step": 7657 }, { "epoch": 0.49, "grad_norm": 0.9554852247238159, "learning_rate": 5.482491050361041e-06, "loss": 0.5062, "step": 7658 }, { "epoch": 0.49, "grad_norm": 0.9811081886291504, "learning_rate": 5.48146983467881e-06, "loss": 0.5395, "step": 7659 }, { "epoch": 0.49, "grad_norm": 1.1356273889541626, "learning_rate": 5.480448598723559e-06, "loss": 0.5582, "step": 7660 }, { "epoch": 0.49, "grad_norm": 1.0313886404037476, "learning_rate": 5.47942734253829e-06, "loss": 0.5369, "step": 7661 }, { "epoch": 0.49, "grad_norm": 1.0309735536575317, "learning_rate": 5.478406066166003e-06, "loss": 0.4946, "step": 7662 }, { "epoch": 0.49, "grad_norm": 1.0279486179351807, "learning_rate": 5.477384769649701e-06, "loss": 0.5469, "step": 7663 }, { "epoch": 0.49, "grad_norm": 1.0054008960723877, "learning_rate": 5.476363453032387e-06, "loss": 0.5179, "step": 7664 }, { "epoch": 0.49, "grad_norm": 0.8933504223823547, "learning_rate": 5.475342116357064e-06, "loss": 0.547, "step": 7665 }, { "epoch": 0.49, "grad_norm": 1.058522343635559, "learning_rate": 5.474320759666739e-06, "loss": 0.5159, "step": 7666 }, { "epoch": 0.49, "grad_norm": 1.122342824935913, "learning_rate": 5.473299383004417e-06, "loss": 0.5809, "step": 7667 }, { "epoch": 0.49, "grad_norm": 1.0336556434631348, "learning_rate": 5.472277986413104e-06, "loss": 0.4835, "step": 7668 }, { "epoch": 0.49, "grad_norm": 0.9952534437179565, "learning_rate": 5.471256569935809e-06, "loss": 0.5157, "step": 7669 }, { "epoch": 0.49, "grad_norm": 1.0328069925308228, "learning_rate": 5.470235133615538e-06, "loss": 0.5165, "step": 7670 }, { "epoch": 0.49, "grad_norm": 1.0147979259490967, "learning_rate": 5.4692136774953004e-06, "loss": 0.5189, "step": 7671 }, { "epoch": 0.49, "grad_norm": 1.0270156860351562, "learning_rate": 5.46819220161811e-06, "loss": 0.5101, "step": 7672 }, { "epoch": 0.49, "grad_norm": 0.9940213561058044, "learning_rate": 5.467170706026973e-06, "loss": 0.4904, "step": 7673 }, { "epoch": 0.49, "grad_norm": 1.0557513236999512, "learning_rate": 5.466149190764902e-06, "loss": 0.5072, "step": 7674 }, { "epoch": 0.49, "grad_norm": 1.0868325233459473, "learning_rate": 5.465127655874911e-06, "loss": 0.5929, "step": 7675 }, { "epoch": 0.49, "grad_norm": 0.991359293460846, "learning_rate": 5.464106101400013e-06, "loss": 0.5201, "step": 7676 }, { "epoch": 0.49, "grad_norm": 0.9942566752433777, "learning_rate": 5.463084527383222e-06, "loss": 0.5014, "step": 7677 }, { "epoch": 0.49, "grad_norm": 0.9597623944282532, "learning_rate": 5.4620629338675505e-06, "loss": 0.501, "step": 7678 }, { "epoch": 0.49, "grad_norm": 1.0973620414733887, "learning_rate": 5.461041320896019e-06, "loss": 0.5523, "step": 7679 }, { "epoch": 0.49, "grad_norm": 1.0960867404937744, "learning_rate": 5.460019688511639e-06, "loss": 0.5053, "step": 7680 }, { "epoch": 0.49, "grad_norm": 0.9990050196647644, "learning_rate": 5.458998036757431e-06, "loss": 0.5863, "step": 7681 }, { "epoch": 0.49, "grad_norm": 1.0510493516921997, "learning_rate": 5.4579763656764115e-06, "loss": 0.5487, "step": 7682 }, { "epoch": 0.49, "grad_norm": 1.0192859172821045, "learning_rate": 5.456954675311602e-06, "loss": 0.5118, "step": 7683 }, { "epoch": 0.49, "grad_norm": 1.112775444984436, "learning_rate": 5.45593296570602e-06, "loss": 0.5487, "step": 7684 }, { "epoch": 0.49, "grad_norm": 1.047751784324646, "learning_rate": 5.454911236902687e-06, "loss": 0.561, "step": 7685 }, { "epoch": 0.49, "grad_norm": 1.1452687978744507, "learning_rate": 5.453889488944623e-06, "loss": 0.4948, "step": 7686 }, { "epoch": 0.49, "grad_norm": 1.0465176105499268, "learning_rate": 5.452867721874854e-06, "loss": 0.5825, "step": 7687 }, { "epoch": 0.49, "grad_norm": 1.027136206626892, "learning_rate": 5.4518459357364e-06, "loss": 0.5623, "step": 7688 }, { "epoch": 0.49, "grad_norm": 0.9339892864227295, "learning_rate": 5.4508241305722856e-06, "loss": 0.5346, "step": 7689 }, { "epoch": 0.49, "grad_norm": 1.0112874507904053, "learning_rate": 5.449802306425532e-06, "loss": 0.496, "step": 7690 }, { "epoch": 0.49, "grad_norm": 1.0279783010482788, "learning_rate": 5.448780463339172e-06, "loss": 0.5555, "step": 7691 }, { "epoch": 0.49, "grad_norm": 1.0531165599822998, "learning_rate": 5.447758601356226e-06, "loss": 0.5527, "step": 7692 }, { "epoch": 0.49, "grad_norm": 1.04063880443573, "learning_rate": 5.446736720519725e-06, "loss": 0.5314, "step": 7693 }, { "epoch": 0.49, "grad_norm": 1.0666983127593994, "learning_rate": 5.445714820872693e-06, "loss": 0.5058, "step": 7694 }, { "epoch": 0.49, "grad_norm": 0.9285444617271423, "learning_rate": 5.4446929024581606e-06, "loss": 0.4899, "step": 7695 }, { "epoch": 0.49, "grad_norm": 1.0237271785736084, "learning_rate": 5.4436709653191575e-06, "loss": 0.5197, "step": 7696 }, { "epoch": 0.49, "grad_norm": 0.9723763465881348, "learning_rate": 5.442649009498713e-06, "loss": 0.5023, "step": 7697 }, { "epoch": 0.49, "grad_norm": 0.9989533424377441, "learning_rate": 5.441627035039859e-06, "loss": 0.5363, "step": 7698 }, { "epoch": 0.49, "grad_norm": 1.0461578369140625, "learning_rate": 5.440605041985626e-06, "loss": 0.4827, "step": 7699 }, { "epoch": 0.49, "grad_norm": 0.984526515007019, "learning_rate": 5.439583030379049e-06, "loss": 0.4742, "step": 7700 }, { "epoch": 0.49, "grad_norm": 1.0510518550872803, "learning_rate": 5.438561000263157e-06, "loss": 0.4966, "step": 7701 }, { "epoch": 0.49, "grad_norm": 1.0577447414398193, "learning_rate": 5.4375389516809895e-06, "loss": 0.5713, "step": 7702 }, { "epoch": 0.49, "grad_norm": 1.0560086965560913, "learning_rate": 5.436516884675579e-06, "loss": 0.5584, "step": 7703 }, { "epoch": 0.49, "grad_norm": 1.076753854751587, "learning_rate": 5.43549479928996e-06, "loss": 0.5296, "step": 7704 }, { "epoch": 0.49, "grad_norm": 0.9767786860466003, "learning_rate": 5.434472695567169e-06, "loss": 0.4962, "step": 7705 }, { "epoch": 0.49, "grad_norm": 0.9865174889564514, "learning_rate": 5.433450573550246e-06, "loss": 0.5034, "step": 7706 }, { "epoch": 0.49, "grad_norm": 1.0956785678863525, "learning_rate": 5.432428433282226e-06, "loss": 0.5249, "step": 7707 }, { "epoch": 0.49, "grad_norm": 1.0960456132888794, "learning_rate": 5.43140627480615e-06, "loss": 0.5717, "step": 7708 }, { "epoch": 0.49, "grad_norm": 0.9936690926551819, "learning_rate": 5.4303840981650565e-06, "loss": 0.4751, "step": 7709 }, { "epoch": 0.49, "grad_norm": 1.0348507165908813, "learning_rate": 5.429361903401985e-06, "loss": 0.5432, "step": 7710 }, { "epoch": 0.49, "grad_norm": 1.0245975255966187, "learning_rate": 5.4283396905599785e-06, "loss": 0.5356, "step": 7711 }, { "epoch": 0.49, "grad_norm": 1.0867342948913574, "learning_rate": 5.427317459682076e-06, "loss": 0.5326, "step": 7712 }, { "epoch": 0.49, "grad_norm": 0.9968048334121704, "learning_rate": 5.426295210811323e-06, "loss": 0.5311, "step": 7713 }, { "epoch": 0.49, "grad_norm": 0.9450915455818176, "learning_rate": 5.425272943990761e-06, "loss": 0.503, "step": 7714 }, { "epoch": 0.49, "grad_norm": 1.1199814081192017, "learning_rate": 5.4242506592634354e-06, "loss": 0.5206, "step": 7715 }, { "epoch": 0.49, "grad_norm": 1.0570131540298462, "learning_rate": 5.423228356672391e-06, "loss": 0.4955, "step": 7716 }, { "epoch": 0.49, "grad_norm": 0.9853617548942566, "learning_rate": 5.422206036260671e-06, "loss": 0.5114, "step": 7717 }, { "epoch": 0.49, "grad_norm": 0.9816334247589111, "learning_rate": 5.421183698071325e-06, "loss": 0.5552, "step": 7718 }, { "epoch": 0.49, "grad_norm": 1.049434781074524, "learning_rate": 5.420161342147399e-06, "loss": 0.5132, "step": 7719 }, { "epoch": 0.49, "grad_norm": 1.0932750701904297, "learning_rate": 5.4191389685319395e-06, "loss": 0.5444, "step": 7720 }, { "epoch": 0.49, "grad_norm": 0.9652199149131775, "learning_rate": 5.4181165772679955e-06, "loss": 0.5003, "step": 7721 }, { "epoch": 0.49, "grad_norm": 0.9985790252685547, "learning_rate": 5.417094168398618e-06, "loss": 0.5291, "step": 7722 }, { "epoch": 0.49, "grad_norm": 1.0097248554229736, "learning_rate": 5.416071741966856e-06, "loss": 0.5368, "step": 7723 }, { "epoch": 0.49, "grad_norm": 1.0083731412887573, "learning_rate": 5.41504929801576e-06, "loss": 0.5365, "step": 7724 }, { "epoch": 0.49, "grad_norm": 0.9989120364189148, "learning_rate": 5.414026836588382e-06, "loss": 0.5189, "step": 7725 }, { "epoch": 0.49, "grad_norm": 0.9898789525032043, "learning_rate": 5.413004357727775e-06, "loss": 0.5192, "step": 7726 }, { "epoch": 0.49, "grad_norm": 1.0092414617538452, "learning_rate": 5.411981861476991e-06, "loss": 0.4877, "step": 7727 }, { "epoch": 0.49, "grad_norm": 0.991629958152771, "learning_rate": 5.4109593478790825e-06, "loss": 0.4838, "step": 7728 }, { "epoch": 0.49, "grad_norm": 1.0284823179244995, "learning_rate": 5.409936816977106e-06, "loss": 0.5843, "step": 7729 }, { "epoch": 0.49, "grad_norm": 0.9995983839035034, "learning_rate": 5.408914268814117e-06, "loss": 0.5284, "step": 7730 }, { "epoch": 0.49, "grad_norm": 1.0161820650100708, "learning_rate": 5.4078917034331705e-06, "loss": 0.4845, "step": 7731 }, { "epoch": 0.49, "grad_norm": 1.0265183448791504, "learning_rate": 5.4068691208773225e-06, "loss": 0.4619, "step": 7732 }, { "epoch": 0.49, "grad_norm": 1.013788104057312, "learning_rate": 5.405846521189632e-06, "loss": 0.5413, "step": 7733 }, { "epoch": 0.49, "grad_norm": 1.008409857749939, "learning_rate": 5.404823904413157e-06, "loss": 0.5253, "step": 7734 }, { "epoch": 0.49, "grad_norm": 1.005803108215332, "learning_rate": 5.403801270590955e-06, "loss": 0.5186, "step": 7735 }, { "epoch": 0.49, "grad_norm": 1.036096453666687, "learning_rate": 5.402778619766086e-06, "loss": 0.5444, "step": 7736 }, { "epoch": 0.49, "grad_norm": 1.055092453956604, "learning_rate": 5.40175595198161e-06, "loss": 0.5386, "step": 7737 }, { "epoch": 0.49, "grad_norm": 1.0218758583068848, "learning_rate": 5.400733267280589e-06, "loss": 0.5209, "step": 7738 }, { "epoch": 0.49, "grad_norm": 0.9987779259681702, "learning_rate": 5.399710565706084e-06, "loss": 0.5508, "step": 7739 }, { "epoch": 0.49, "grad_norm": 0.9801649451255798, "learning_rate": 5.3986878473011585e-06, "loss": 0.5269, "step": 7740 }, { "epoch": 0.49, "grad_norm": 0.9721705317497253, "learning_rate": 5.397665112108874e-06, "loss": 0.4668, "step": 7741 }, { "epoch": 0.49, "grad_norm": 0.9974575638771057, "learning_rate": 5.3966423601722955e-06, "loss": 0.5104, "step": 7742 }, { "epoch": 0.49, "grad_norm": 1.067785382270813, "learning_rate": 5.3956195915344855e-06, "loss": 0.5503, "step": 7743 }, { "epoch": 0.49, "grad_norm": 1.03829026222229, "learning_rate": 5.394596806238511e-06, "loss": 0.552, "step": 7744 }, { "epoch": 0.49, "grad_norm": 1.060089349746704, "learning_rate": 5.39357400432744e-06, "loss": 0.4677, "step": 7745 }, { "epoch": 0.49, "grad_norm": 1.0661898851394653, "learning_rate": 5.392551185844334e-06, "loss": 0.5309, "step": 7746 }, { "epoch": 0.49, "grad_norm": 1.0674138069152832, "learning_rate": 5.391528350832265e-06, "loss": 0.5272, "step": 7747 }, { "epoch": 0.49, "grad_norm": 1.078151822090149, "learning_rate": 5.3905054993342985e-06, "loss": 0.5373, "step": 7748 }, { "epoch": 0.49, "grad_norm": 1.0482679605484009, "learning_rate": 5.389482631393504e-06, "loss": 0.552, "step": 7749 }, { "epoch": 0.49, "grad_norm": 1.006722092628479, "learning_rate": 5.388459747052951e-06, "loss": 0.5065, "step": 7750 }, { "epoch": 0.49, "grad_norm": 1.036423921585083, "learning_rate": 5.387436846355709e-06, "loss": 0.5223, "step": 7751 }, { "epoch": 0.49, "grad_norm": 1.0063308477401733, "learning_rate": 5.386413929344849e-06, "loss": 0.5187, "step": 7752 }, { "epoch": 0.49, "grad_norm": 1.0499755144119263, "learning_rate": 5.3853909960634446e-06, "loss": 0.474, "step": 7753 }, { "epoch": 0.49, "grad_norm": 1.002184271812439, "learning_rate": 5.3843680465545635e-06, "loss": 0.5435, "step": 7754 }, { "epoch": 0.49, "grad_norm": 1.0605528354644775, "learning_rate": 5.3833450808612816e-06, "loss": 0.5474, "step": 7755 }, { "epoch": 0.49, "grad_norm": 0.9395731091499329, "learning_rate": 5.382322099026673e-06, "loss": 0.5307, "step": 7756 }, { "epoch": 0.49, "grad_norm": 0.9526917934417725, "learning_rate": 5.38129910109381e-06, "loss": 0.5048, "step": 7757 }, { "epoch": 0.49, "grad_norm": 1.0066355466842651, "learning_rate": 5.380276087105769e-06, "loss": 0.5045, "step": 7758 }, { "epoch": 0.49, "grad_norm": 0.9914907217025757, "learning_rate": 5.379253057105623e-06, "loss": 0.5049, "step": 7759 }, { "epoch": 0.49, "grad_norm": 1.0168482065200806, "learning_rate": 5.378230011136453e-06, "loss": 0.5707, "step": 7760 }, { "epoch": 0.49, "grad_norm": 1.1721969842910767, "learning_rate": 5.37720694924133e-06, "loss": 0.4713, "step": 7761 }, { "epoch": 0.49, "grad_norm": 0.9489725828170776, "learning_rate": 5.376183871463336e-06, "loss": 0.4936, "step": 7762 }, { "epoch": 0.49, "grad_norm": 1.0816004276275635, "learning_rate": 5.375160777845548e-06, "loss": 0.5248, "step": 7763 }, { "epoch": 0.49, "grad_norm": 1.0063161849975586, "learning_rate": 5.3741376684310455e-06, "loss": 0.5209, "step": 7764 }, { "epoch": 0.49, "grad_norm": 0.9822127223014832, "learning_rate": 5.3731145432629065e-06, "loss": 0.529, "step": 7765 }, { "epoch": 0.49, "grad_norm": 1.0101993083953857, "learning_rate": 5.3720914023842105e-06, "loss": 0.5136, "step": 7766 }, { "epoch": 0.49, "grad_norm": 1.0614701509475708, "learning_rate": 5.371068245838042e-06, "loss": 0.5653, "step": 7767 }, { "epoch": 0.49, "grad_norm": 0.9922329187393188, "learning_rate": 5.37004507366748e-06, "loss": 0.5353, "step": 7768 }, { "epoch": 0.49, "grad_norm": 1.0334720611572266, "learning_rate": 5.369021885915607e-06, "loss": 0.5286, "step": 7769 }, { "epoch": 0.49, "grad_norm": 1.0917425155639648, "learning_rate": 5.367998682625506e-06, "loss": 0.5455, "step": 7770 }, { "epoch": 0.49, "grad_norm": 0.9876338243484497, "learning_rate": 5.366975463840262e-06, "loss": 0.4676, "step": 7771 }, { "epoch": 0.49, "grad_norm": 1.0127947330474854, "learning_rate": 5.365952229602956e-06, "loss": 0.4777, "step": 7772 }, { "epoch": 0.49, "grad_norm": 1.0870912075042725, "learning_rate": 5.3649289799566766e-06, "loss": 0.5259, "step": 7773 }, { "epoch": 0.49, "grad_norm": 0.9871841669082642, "learning_rate": 5.363905714944505e-06, "loss": 0.5157, "step": 7774 }, { "epoch": 0.49, "grad_norm": 1.1193543672561646, "learning_rate": 5.362882434609531e-06, "loss": 0.5634, "step": 7775 }, { "epoch": 0.49, "grad_norm": 1.0339303016662598, "learning_rate": 5.36185913899484e-06, "loss": 0.5139, "step": 7776 }, { "epoch": 0.49, "grad_norm": 1.0583903789520264, "learning_rate": 5.36083582814352e-06, "loss": 0.5469, "step": 7777 }, { "epoch": 0.49, "grad_norm": 1.0183539390563965, "learning_rate": 5.359812502098657e-06, "loss": 0.5168, "step": 7778 }, { "epoch": 0.49, "grad_norm": 1.0020984411239624, "learning_rate": 5.358789160903343e-06, "loss": 0.5192, "step": 7779 }, { "epoch": 0.49, "grad_norm": 1.016532063484192, "learning_rate": 5.357765804600664e-06, "loss": 0.5113, "step": 7780 }, { "epoch": 0.49, "grad_norm": 1.0904549360275269, "learning_rate": 5.3567424332337125e-06, "loss": 0.5276, "step": 7781 }, { "epoch": 0.49, "grad_norm": 1.070608377456665, "learning_rate": 5.355719046845577e-06, "loss": 0.5316, "step": 7782 }, { "epoch": 0.49, "grad_norm": 1.0256794691085815, "learning_rate": 5.354695645479352e-06, "loss": 0.5148, "step": 7783 }, { "epoch": 0.49, "grad_norm": 1.0035301446914673, "learning_rate": 5.353672229178125e-06, "loss": 0.5005, "step": 7784 }, { "epoch": 0.49, "grad_norm": 1.0000191926956177, "learning_rate": 5.352648797984993e-06, "loss": 0.5198, "step": 7785 }, { "epoch": 0.49, "grad_norm": 1.0152604579925537, "learning_rate": 5.351625351943044e-06, "loss": 0.5477, "step": 7786 }, { "epoch": 0.49, "grad_norm": 0.9971880316734314, "learning_rate": 5.350601891095377e-06, "loss": 0.52, "step": 7787 }, { "epoch": 0.49, "grad_norm": 0.9151439070701599, "learning_rate": 5.349578415485085e-06, "loss": 0.4491, "step": 7788 }, { "epoch": 0.49, "grad_norm": 1.0387840270996094, "learning_rate": 5.34855492515526e-06, "loss": 0.5188, "step": 7789 }, { "epoch": 0.49, "grad_norm": 1.0091477632522583, "learning_rate": 5.347531420148999e-06, "loss": 0.5585, "step": 7790 }, { "epoch": 0.49, "grad_norm": 1.001717209815979, "learning_rate": 5.3465079005094e-06, "loss": 0.4615, "step": 7791 }, { "epoch": 0.49, "grad_norm": 1.0388224124908447, "learning_rate": 5.34548436627956e-06, "loss": 0.5465, "step": 7792 }, { "epoch": 0.49, "grad_norm": 1.0266194343566895, "learning_rate": 5.344460817502573e-06, "loss": 0.5628, "step": 7793 }, { "epoch": 0.49, "grad_norm": 0.9543954730033875, "learning_rate": 5.34343725422154e-06, "loss": 0.5228, "step": 7794 }, { "epoch": 0.49, "grad_norm": 0.9868411421775818, "learning_rate": 5.342413676479559e-06, "loss": 0.5379, "step": 7795 }, { "epoch": 0.49, "grad_norm": 1.0662660598754883, "learning_rate": 5.34139008431973e-06, "loss": 0.5012, "step": 7796 }, { "epoch": 0.49, "grad_norm": 0.9981074333190918, "learning_rate": 5.34036647778515e-06, "loss": 0.5613, "step": 7797 }, { "epoch": 0.49, "grad_norm": 0.9298437833786011, "learning_rate": 5.3393428569189235e-06, "loss": 0.489, "step": 7798 }, { "epoch": 0.49, "grad_norm": 0.973460853099823, "learning_rate": 5.338319221764149e-06, "loss": 0.509, "step": 7799 }, { "epoch": 0.49, "grad_norm": 0.9743896126747131, "learning_rate": 5.33729557236393e-06, "loss": 0.5396, "step": 7800 }, { "epoch": 0.49, "grad_norm": 0.9878454804420471, "learning_rate": 5.336271908761367e-06, "loss": 0.5333, "step": 7801 }, { "epoch": 0.49, "grad_norm": 0.89174884557724, "learning_rate": 5.335248230999565e-06, "loss": 0.4602, "step": 7802 }, { "epoch": 0.49, "grad_norm": 1.1391502618789673, "learning_rate": 5.334224539121625e-06, "loss": 0.5299, "step": 7803 }, { "epoch": 0.49, "grad_norm": 0.9654562473297119, "learning_rate": 5.333200833170652e-06, "loss": 0.489, "step": 7804 }, { "epoch": 0.49, "grad_norm": 1.0083357095718384, "learning_rate": 5.332177113189751e-06, "loss": 0.5592, "step": 7805 }, { "epoch": 0.49, "grad_norm": 1.0468686819076538, "learning_rate": 5.331153379222028e-06, "loss": 0.55, "step": 7806 }, { "epoch": 0.49, "grad_norm": 1.0573049783706665, "learning_rate": 5.330129631310589e-06, "loss": 0.5602, "step": 7807 }, { "epoch": 0.49, "grad_norm": 1.0374951362609863, "learning_rate": 5.3291058694985385e-06, "loss": 0.5454, "step": 7808 }, { "epoch": 0.49, "grad_norm": 0.983371376991272, "learning_rate": 5.328082093828984e-06, "loss": 0.5491, "step": 7809 }, { "epoch": 0.49, "grad_norm": 1.0157434940338135, "learning_rate": 5.327058304345035e-06, "loss": 0.5071, "step": 7810 }, { "epoch": 0.49, "grad_norm": 1.0032756328582764, "learning_rate": 5.3260345010898e-06, "loss": 0.5273, "step": 7811 }, { "epoch": 0.49, "grad_norm": 1.0313307046890259, "learning_rate": 5.325010684106384e-06, "loss": 0.514, "step": 7812 }, { "epoch": 0.49, "grad_norm": 1.046082854270935, "learning_rate": 5.323986853437899e-06, "loss": 0.4815, "step": 7813 }, { "epoch": 0.5, "grad_norm": 1.1349530220031738, "learning_rate": 5.322963009127454e-06, "loss": 0.5488, "step": 7814 }, { "epoch": 0.5, "grad_norm": 1.0096465349197388, "learning_rate": 5.321939151218163e-06, "loss": 0.5026, "step": 7815 }, { "epoch": 0.5, "grad_norm": 1.0394556522369385, "learning_rate": 5.320915279753132e-06, "loss": 0.5339, "step": 7816 }, { "epoch": 0.5, "grad_norm": 0.9634128212928772, "learning_rate": 5.319891394775475e-06, "loss": 0.4668, "step": 7817 }, { "epoch": 0.5, "grad_norm": 1.2029508352279663, "learning_rate": 5.3188674963283064e-06, "loss": 0.5302, "step": 7818 }, { "epoch": 0.5, "grad_norm": 1.0494053363800049, "learning_rate": 5.317843584454734e-06, "loss": 0.4982, "step": 7819 }, { "epoch": 0.5, "grad_norm": 1.0254759788513184, "learning_rate": 5.316819659197875e-06, "loss": 0.5218, "step": 7820 }, { "epoch": 0.5, "grad_norm": 1.0763325691223145, "learning_rate": 5.315795720600842e-06, "loss": 0.5358, "step": 7821 }, { "epoch": 0.5, "grad_norm": 0.9637579917907715, "learning_rate": 5.314771768706751e-06, "loss": 0.5269, "step": 7822 }, { "epoch": 0.5, "grad_norm": 1.058788776397705, "learning_rate": 5.313747803558714e-06, "loss": 0.5326, "step": 7823 }, { "epoch": 0.5, "grad_norm": 0.9709265828132629, "learning_rate": 5.312723825199849e-06, "loss": 0.5564, "step": 7824 }, { "epoch": 0.5, "grad_norm": 1.089979648590088, "learning_rate": 5.311699833673273e-06, "loss": 0.5756, "step": 7825 }, { "epoch": 0.5, "grad_norm": 1.0571144819259644, "learning_rate": 5.310675829022101e-06, "loss": 0.499, "step": 7826 }, { "epoch": 0.5, "grad_norm": 1.0553808212280273, "learning_rate": 5.309651811289449e-06, "loss": 0.5218, "step": 7827 }, { "epoch": 0.5, "grad_norm": 1.0515599250793457, "learning_rate": 5.308627780518437e-06, "loss": 0.5052, "step": 7828 }, { "epoch": 0.5, "grad_norm": 1.0557936429977417, "learning_rate": 5.307603736752183e-06, "loss": 0.4942, "step": 7829 }, { "epoch": 0.5, "grad_norm": 0.9929682612419128, "learning_rate": 5.306579680033807e-06, "loss": 0.5244, "step": 7830 }, { "epoch": 0.5, "grad_norm": 1.0507532358169556, "learning_rate": 5.305555610406425e-06, "loss": 0.5383, "step": 7831 }, { "epoch": 0.5, "grad_norm": 1.044619083404541, "learning_rate": 5.30453152791316e-06, "loss": 0.6029, "step": 7832 }, { "epoch": 0.5, "grad_norm": 0.9375192523002625, "learning_rate": 5.303507432597134e-06, "loss": 0.5224, "step": 7833 }, { "epoch": 0.5, "grad_norm": 0.9598952531814575, "learning_rate": 5.302483324501463e-06, "loss": 0.5442, "step": 7834 }, { "epoch": 0.5, "grad_norm": 0.9932873845100403, "learning_rate": 5.3014592036692715e-06, "loss": 0.5299, "step": 7835 }, { "epoch": 0.5, "grad_norm": 1.100043773651123, "learning_rate": 5.300435070143683e-06, "loss": 0.5309, "step": 7836 }, { "epoch": 0.5, "grad_norm": 1.0243855714797974, "learning_rate": 5.2994109239678185e-06, "loss": 0.5571, "step": 7837 }, { "epoch": 0.5, "grad_norm": 0.9657698273658752, "learning_rate": 5.298386765184801e-06, "loss": 0.5374, "step": 7838 }, { "epoch": 0.5, "grad_norm": 0.9283653497695923, "learning_rate": 5.297362593837755e-06, "loss": 0.4895, "step": 7839 }, { "epoch": 0.5, "grad_norm": 1.0488038063049316, "learning_rate": 5.296338409969805e-06, "loss": 0.4973, "step": 7840 }, { "epoch": 0.5, "grad_norm": 0.9657244682312012, "learning_rate": 5.295314213624076e-06, "loss": 0.5156, "step": 7841 }, { "epoch": 0.5, "grad_norm": 0.9581754207611084, "learning_rate": 5.2942900048436914e-06, "loss": 0.4893, "step": 7842 }, { "epoch": 0.5, "grad_norm": 1.078657865524292, "learning_rate": 5.293265783671778e-06, "loss": 0.5152, "step": 7843 }, { "epoch": 0.5, "grad_norm": 0.9514079689979553, "learning_rate": 5.292241550151465e-06, "loss": 0.5408, "step": 7844 }, { "epoch": 0.5, "grad_norm": 1.1053167581558228, "learning_rate": 5.291217304325875e-06, "loss": 0.5604, "step": 7845 }, { "epoch": 0.5, "grad_norm": 1.0533970594406128, "learning_rate": 5.290193046238139e-06, "loss": 0.5402, "step": 7846 }, { "epoch": 0.5, "grad_norm": 1.0341068506240845, "learning_rate": 5.289168775931381e-06, "loss": 0.5277, "step": 7847 }, { "epoch": 0.5, "grad_norm": 0.9752747416496277, "learning_rate": 5.288144493448733e-06, "loss": 0.5015, "step": 7848 }, { "epoch": 0.5, "grad_norm": 1.034314513206482, "learning_rate": 5.287120198833324e-06, "loss": 0.5076, "step": 7849 }, { "epoch": 0.5, "grad_norm": 1.0576122999191284, "learning_rate": 5.286095892128282e-06, "loss": 0.5438, "step": 7850 }, { "epoch": 0.5, "grad_norm": 1.103843331336975, "learning_rate": 5.285071573376735e-06, "loss": 0.5414, "step": 7851 }, { "epoch": 0.5, "grad_norm": 1.0938315391540527, "learning_rate": 5.2840472426218185e-06, "loss": 0.5247, "step": 7852 }, { "epoch": 0.5, "grad_norm": 1.0951156616210938, "learning_rate": 5.283022899906659e-06, "loss": 0.5565, "step": 7853 }, { "epoch": 0.5, "grad_norm": 1.049530267715454, "learning_rate": 5.28199854527439e-06, "loss": 0.5209, "step": 7854 }, { "epoch": 0.5, "grad_norm": 1.0040522813796997, "learning_rate": 5.280974178768144e-06, "loss": 0.4782, "step": 7855 }, { "epoch": 0.5, "grad_norm": 1.3000999689102173, "learning_rate": 5.279949800431052e-06, "loss": 0.5432, "step": 7856 }, { "epoch": 0.5, "grad_norm": 1.0034246444702148, "learning_rate": 5.278925410306248e-06, "loss": 0.5224, "step": 7857 }, { "epoch": 0.5, "grad_norm": 0.9741222858428955, "learning_rate": 5.277901008436865e-06, "loss": 0.5138, "step": 7858 }, { "epoch": 0.5, "grad_norm": 1.0248442888259888, "learning_rate": 5.276876594866037e-06, "loss": 0.5259, "step": 7859 }, { "epoch": 0.5, "grad_norm": 1.0938154458999634, "learning_rate": 5.2758521696369e-06, "loss": 0.5056, "step": 7860 }, { "epoch": 0.5, "grad_norm": 1.0054714679718018, "learning_rate": 5.274827732792587e-06, "loss": 0.4986, "step": 7861 }, { "epoch": 0.5, "grad_norm": 1.0986206531524658, "learning_rate": 5.273803284376234e-06, "loss": 0.5365, "step": 7862 }, { "epoch": 0.5, "grad_norm": 1.1783478260040283, "learning_rate": 5.272778824430977e-06, "loss": 0.5317, "step": 7863 }, { "epoch": 0.5, "grad_norm": 1.0371789932250977, "learning_rate": 5.271754352999953e-06, "loss": 0.5394, "step": 7864 }, { "epoch": 0.5, "grad_norm": 0.9669798612594604, "learning_rate": 5.2707298701263e-06, "loss": 0.5093, "step": 7865 }, { "epoch": 0.5, "grad_norm": 1.0162286758422852, "learning_rate": 5.269705375853151e-06, "loss": 0.5385, "step": 7866 }, { "epoch": 0.5, "grad_norm": 1.0095473527908325, "learning_rate": 5.26868087022365e-06, "loss": 0.5413, "step": 7867 }, { "epoch": 0.5, "grad_norm": 1.0174813270568848, "learning_rate": 5.26765635328093e-06, "loss": 0.5052, "step": 7868 }, { "epoch": 0.5, "grad_norm": 1.1494885683059692, "learning_rate": 5.266631825068134e-06, "loss": 0.5739, "step": 7869 }, { "epoch": 0.5, "grad_norm": 1.041465163230896, "learning_rate": 5.265607285628397e-06, "loss": 0.5501, "step": 7870 }, { "epoch": 0.5, "grad_norm": 0.9956937432289124, "learning_rate": 5.264582735004863e-06, "loss": 0.5237, "step": 7871 }, { "epoch": 0.5, "grad_norm": 0.9891913533210754, "learning_rate": 5.26355817324067e-06, "loss": 0.5737, "step": 7872 }, { "epoch": 0.5, "grad_norm": 0.93519127368927, "learning_rate": 5.26253360037896e-06, "loss": 0.547, "step": 7873 }, { "epoch": 0.5, "grad_norm": 1.0138989686965942, "learning_rate": 5.2615090164628705e-06, "loss": 0.5397, "step": 7874 }, { "epoch": 0.5, "grad_norm": 1.075402021408081, "learning_rate": 5.2604844215355484e-06, "loss": 0.4961, "step": 7875 }, { "epoch": 0.5, "grad_norm": 1.0560067892074585, "learning_rate": 5.259459815640133e-06, "loss": 0.5353, "step": 7876 }, { "epoch": 0.5, "grad_norm": 1.066972255706787, "learning_rate": 5.258435198819768e-06, "loss": 0.5427, "step": 7877 }, { "epoch": 0.5, "grad_norm": 1.0539740324020386, "learning_rate": 5.257410571117594e-06, "loss": 0.5777, "step": 7878 }, { "epoch": 0.5, "grad_norm": 0.9533175230026245, "learning_rate": 5.256385932576759e-06, "loss": 0.5012, "step": 7879 }, { "epoch": 0.5, "grad_norm": 1.1580781936645508, "learning_rate": 5.255361283240402e-06, "loss": 0.4792, "step": 7880 }, { "epoch": 0.5, "grad_norm": 1.0315744876861572, "learning_rate": 5.254336623151672e-06, "loss": 0.5604, "step": 7881 }, { "epoch": 0.5, "grad_norm": 0.9291620850563049, "learning_rate": 5.253311952353708e-06, "loss": 0.5167, "step": 7882 }, { "epoch": 0.5, "grad_norm": 1.1018723249435425, "learning_rate": 5.252287270889661e-06, "loss": 0.5823, "step": 7883 }, { "epoch": 0.5, "grad_norm": 0.9712012410163879, "learning_rate": 5.251262578802675e-06, "loss": 0.5012, "step": 7884 }, { "epoch": 0.5, "grad_norm": 1.0360691547393799, "learning_rate": 5.250237876135895e-06, "loss": 0.5469, "step": 7885 }, { "epoch": 0.5, "grad_norm": 1.002305507659912, "learning_rate": 5.2492131629324695e-06, "loss": 0.4992, "step": 7886 }, { "epoch": 0.5, "grad_norm": 1.0135037899017334, "learning_rate": 5.248188439235544e-06, "loss": 0.5125, "step": 7887 }, { "epoch": 0.5, "grad_norm": 1.004456877708435, "learning_rate": 5.247163705088267e-06, "loss": 0.509, "step": 7888 }, { "epoch": 0.5, "grad_norm": 1.0056803226470947, "learning_rate": 5.246138960533786e-06, "loss": 0.5611, "step": 7889 }, { "epoch": 0.5, "grad_norm": 1.0572956800460815, "learning_rate": 5.245114205615249e-06, "loss": 0.4943, "step": 7890 }, { "epoch": 0.5, "grad_norm": 1.105042576789856, "learning_rate": 5.244089440375807e-06, "loss": 0.5174, "step": 7891 }, { "epoch": 0.5, "grad_norm": 0.9719240665435791, "learning_rate": 5.243064664858607e-06, "loss": 0.4899, "step": 7892 }, { "epoch": 0.5, "grad_norm": 1.0321283340454102, "learning_rate": 5.242039879106799e-06, "loss": 0.5553, "step": 7893 }, { "epoch": 0.5, "grad_norm": 1.0477603673934937, "learning_rate": 5.241015083163534e-06, "loss": 0.6115, "step": 7894 }, { "epoch": 0.5, "grad_norm": 0.9469429850578308, "learning_rate": 5.239990277071962e-06, "loss": 0.551, "step": 7895 }, { "epoch": 0.5, "grad_norm": 1.0114669799804688, "learning_rate": 5.238965460875236e-06, "loss": 0.5004, "step": 7896 }, { "epoch": 0.5, "grad_norm": 0.9960516691207886, "learning_rate": 5.237940634616504e-06, "loss": 0.5269, "step": 7897 }, { "epoch": 0.5, "grad_norm": 0.985478937625885, "learning_rate": 5.2369157983389205e-06, "loss": 0.5471, "step": 7898 }, { "epoch": 0.5, "grad_norm": 0.9954918026924133, "learning_rate": 5.235890952085637e-06, "loss": 0.503, "step": 7899 }, { "epoch": 0.5, "grad_norm": 1.015634536743164, "learning_rate": 5.234866095899806e-06, "loss": 0.5127, "step": 7900 }, { "epoch": 0.5, "grad_norm": 1.0375806093215942, "learning_rate": 5.23384122982458e-06, "loss": 0.5291, "step": 7901 }, { "epoch": 0.5, "grad_norm": 1.1058557033538818, "learning_rate": 5.232816353903113e-06, "loss": 0.5343, "step": 7902 }, { "epoch": 0.5, "grad_norm": 0.9224012494087219, "learning_rate": 5.231791468178561e-06, "loss": 0.4541, "step": 7903 }, { "epoch": 0.5, "grad_norm": 1.0257867574691772, "learning_rate": 5.230766572694075e-06, "loss": 0.5085, "step": 7904 }, { "epoch": 0.5, "grad_norm": 1.092187523841858, "learning_rate": 5.229741667492811e-06, "loss": 0.5596, "step": 7905 }, { "epoch": 0.5, "grad_norm": 1.0415695905685425, "learning_rate": 5.228716752617926e-06, "loss": 0.524, "step": 7906 }, { "epoch": 0.5, "grad_norm": 1.0576292276382446, "learning_rate": 5.2276918281125744e-06, "loss": 0.5467, "step": 7907 }, { "epoch": 0.5, "grad_norm": 0.9662138223648071, "learning_rate": 5.22666689401991e-06, "loss": 0.5185, "step": 7908 }, { "epoch": 0.5, "grad_norm": 1.0126597881317139, "learning_rate": 5.225641950383094e-06, "loss": 0.5587, "step": 7909 }, { "epoch": 0.5, "grad_norm": 0.9800564646720886, "learning_rate": 5.2246169972452775e-06, "loss": 0.5487, "step": 7910 }, { "epoch": 0.5, "grad_norm": 1.0493261814117432, "learning_rate": 5.223592034649624e-06, "loss": 0.4969, "step": 7911 }, { "epoch": 0.5, "grad_norm": 1.0116685628890991, "learning_rate": 5.2225670626392845e-06, "loss": 0.4894, "step": 7912 }, { "epoch": 0.5, "grad_norm": 1.0772850513458252, "learning_rate": 5.221542081257421e-06, "loss": 0.5611, "step": 7913 }, { "epoch": 0.5, "grad_norm": 1.0064325332641602, "learning_rate": 5.220517090547194e-06, "loss": 0.499, "step": 7914 }, { "epoch": 0.5, "grad_norm": 1.0418468713760376, "learning_rate": 5.219492090551757e-06, "loss": 0.5412, "step": 7915 }, { "epoch": 0.5, "grad_norm": 1.0826489925384521, "learning_rate": 5.21846708131427e-06, "loss": 0.5556, "step": 7916 }, { "epoch": 0.5, "grad_norm": 1.1069979667663574, "learning_rate": 5.217442062877897e-06, "loss": 0.5309, "step": 7917 }, { "epoch": 0.5, "grad_norm": 0.9450262188911438, "learning_rate": 5.216417035285795e-06, "loss": 0.5483, "step": 7918 }, { "epoch": 0.5, "grad_norm": 0.9797537922859192, "learning_rate": 5.215391998581123e-06, "loss": 0.525, "step": 7919 }, { "epoch": 0.5, "grad_norm": 0.9857979416847229, "learning_rate": 5.214366952807043e-06, "loss": 0.5083, "step": 7920 }, { "epoch": 0.5, "grad_norm": 0.9631537795066833, "learning_rate": 5.213341898006718e-06, "loss": 0.4687, "step": 7921 }, { "epoch": 0.5, "grad_norm": 1.0123926401138306, "learning_rate": 5.212316834223307e-06, "loss": 0.5513, "step": 7922 }, { "epoch": 0.5, "grad_norm": 0.9671790599822998, "learning_rate": 5.211291761499973e-06, "loss": 0.4902, "step": 7923 }, { "epoch": 0.5, "grad_norm": 1.0273030996322632, "learning_rate": 5.210266679879877e-06, "loss": 0.4917, "step": 7924 }, { "epoch": 0.5, "grad_norm": 1.070820689201355, "learning_rate": 5.209241589406183e-06, "loss": 0.5306, "step": 7925 }, { "epoch": 0.5, "grad_norm": 0.9976992607116699, "learning_rate": 5.208216490122055e-06, "loss": 0.5001, "step": 7926 }, { "epoch": 0.5, "grad_norm": 1.0579338073730469, "learning_rate": 5.207191382070653e-06, "loss": 0.518, "step": 7927 }, { "epoch": 0.5, "grad_norm": 1.0654932260513306, "learning_rate": 5.206166265295143e-06, "loss": 0.5334, "step": 7928 }, { "epoch": 0.5, "grad_norm": 1.046481728553772, "learning_rate": 5.205141139838691e-06, "loss": 0.4989, "step": 7929 }, { "epoch": 0.5, "grad_norm": 0.9963456988334656, "learning_rate": 5.204116005744456e-06, "loss": 0.5171, "step": 7930 }, { "epoch": 0.5, "grad_norm": 1.0503910779953003, "learning_rate": 5.2030908630556075e-06, "loss": 0.5327, "step": 7931 }, { "epoch": 0.5, "grad_norm": 1.0344712734222412, "learning_rate": 5.202065711815309e-06, "loss": 0.4963, "step": 7932 }, { "epoch": 0.5, "grad_norm": 0.9059045314788818, "learning_rate": 5.201040552066727e-06, "loss": 0.4543, "step": 7933 }, { "epoch": 0.5, "grad_norm": 1.0495176315307617, "learning_rate": 5.200015383853026e-06, "loss": 0.5298, "step": 7934 }, { "epoch": 0.5, "grad_norm": 0.9795430898666382, "learning_rate": 5.1989902072173735e-06, "loss": 0.5515, "step": 7935 }, { "epoch": 0.5, "grad_norm": 1.0523420572280884, "learning_rate": 5.197965022202935e-06, "loss": 0.5265, "step": 7936 }, { "epoch": 0.5, "grad_norm": 0.996130645275116, "learning_rate": 5.196939828852879e-06, "loss": 0.5299, "step": 7937 }, { "epoch": 0.5, "grad_norm": 0.9796398878097534, "learning_rate": 5.195914627210372e-06, "loss": 0.5059, "step": 7938 }, { "epoch": 0.5, "grad_norm": 0.9897952675819397, "learning_rate": 5.19488941731858e-06, "loss": 0.541, "step": 7939 }, { "epoch": 0.5, "grad_norm": 0.9556361436843872, "learning_rate": 5.193864199220674e-06, "loss": 0.4893, "step": 7940 }, { "epoch": 0.5, "grad_norm": 1.047682523727417, "learning_rate": 5.192838972959821e-06, "loss": 0.5366, "step": 7941 }, { "epoch": 0.5, "grad_norm": 1.0672297477722168, "learning_rate": 5.19181373857919e-06, "loss": 0.5293, "step": 7942 }, { "epoch": 0.5, "grad_norm": 0.9972975850105286, "learning_rate": 5.190788496121948e-06, "loss": 0.522, "step": 7943 }, { "epoch": 0.5, "grad_norm": 0.9845316410064697, "learning_rate": 5.189763245631268e-06, "loss": 0.4624, "step": 7944 }, { "epoch": 0.5, "grad_norm": 1.0090800523757935, "learning_rate": 5.188737987150316e-06, "loss": 0.548, "step": 7945 }, { "epoch": 0.5, "grad_norm": 0.9909870028495789, "learning_rate": 5.1877127207222666e-06, "loss": 0.502, "step": 7946 }, { "epoch": 0.5, "grad_norm": 1.0190807580947876, "learning_rate": 5.186687446390284e-06, "loss": 0.5031, "step": 7947 }, { "epoch": 0.5, "grad_norm": 1.0893406867980957, "learning_rate": 5.185662164197546e-06, "loss": 0.5275, "step": 7948 }, { "epoch": 0.5, "grad_norm": 1.0079681873321533, "learning_rate": 5.184636874187218e-06, "loss": 0.5121, "step": 7949 }, { "epoch": 0.5, "grad_norm": 1.0926753282546997, "learning_rate": 5.183611576402474e-06, "loss": 0.5142, "step": 7950 }, { "epoch": 0.5, "grad_norm": 0.982645571231842, "learning_rate": 5.182586270886485e-06, "loss": 0.5325, "step": 7951 }, { "epoch": 0.5, "grad_norm": 1.001665711402893, "learning_rate": 5.181560957682423e-06, "loss": 0.5317, "step": 7952 }, { "epoch": 0.5, "grad_norm": 0.9564476609230042, "learning_rate": 5.180535636833462e-06, "loss": 0.4977, "step": 7953 }, { "epoch": 0.5, "grad_norm": 0.9797195792198181, "learning_rate": 5.179510308382773e-06, "loss": 0.5138, "step": 7954 }, { "epoch": 0.5, "grad_norm": 1.0832334756851196, "learning_rate": 5.178484972373528e-06, "loss": 0.555, "step": 7955 }, { "epoch": 0.5, "grad_norm": 1.011929988861084, "learning_rate": 5.177459628848903e-06, "loss": 0.5478, "step": 7956 }, { "epoch": 0.5, "grad_norm": 1.05528724193573, "learning_rate": 5.17643427785207e-06, "loss": 0.5586, "step": 7957 }, { "epoch": 0.5, "grad_norm": 0.9753866791725159, "learning_rate": 5.175408919426204e-06, "loss": 0.5098, "step": 7958 }, { "epoch": 0.5, "grad_norm": 1.102591633796692, "learning_rate": 5.174383553614478e-06, "loss": 0.485, "step": 7959 }, { "epoch": 0.5, "grad_norm": 0.9618451595306396, "learning_rate": 5.1733581804600674e-06, "loss": 0.528, "step": 7960 }, { "epoch": 0.5, "grad_norm": 1.053686499595642, "learning_rate": 5.172332800006147e-06, "loss": 0.5683, "step": 7961 }, { "epoch": 0.5, "grad_norm": 1.02312433719635, "learning_rate": 5.171307412295892e-06, "loss": 0.5234, "step": 7962 }, { "epoch": 0.5, "grad_norm": 0.9336697459220886, "learning_rate": 5.1702820173724766e-06, "loss": 0.4938, "step": 7963 }, { "epoch": 0.5, "grad_norm": 1.1132444143295288, "learning_rate": 5.169256615279078e-06, "loss": 0.5379, "step": 7964 }, { "epoch": 0.5, "grad_norm": 1.061083436012268, "learning_rate": 5.168231206058874e-06, "loss": 0.497, "step": 7965 }, { "epoch": 0.5, "grad_norm": 0.9309837818145752, "learning_rate": 5.167205789755037e-06, "loss": 0.483, "step": 7966 }, { "epoch": 0.5, "grad_norm": 1.0379348993301392, "learning_rate": 5.1661803664107465e-06, "loss": 0.5018, "step": 7967 }, { "epoch": 0.5, "grad_norm": 0.9954214096069336, "learning_rate": 5.16515493606918e-06, "loss": 0.5241, "step": 7968 }, { "epoch": 0.5, "grad_norm": 0.9716700315475464, "learning_rate": 5.164129498773513e-06, "loss": 0.5286, "step": 7969 }, { "epoch": 0.5, "grad_norm": 1.0255597829818726, "learning_rate": 5.163104054566922e-06, "loss": 0.5416, "step": 7970 }, { "epoch": 0.5, "grad_norm": 1.0864416360855103, "learning_rate": 5.16207860349259e-06, "loss": 0.5285, "step": 7971 }, { "epoch": 0.51, "grad_norm": 0.9192246794700623, "learning_rate": 5.16105314559369e-06, "loss": 0.4966, "step": 7972 }, { "epoch": 0.51, "grad_norm": 1.0215632915496826, "learning_rate": 5.160027680913402e-06, "loss": 0.5171, "step": 7973 }, { "epoch": 0.51, "grad_norm": 0.9981522560119629, "learning_rate": 5.159002209494905e-06, "loss": 0.5831, "step": 7974 }, { "epoch": 0.51, "grad_norm": 1.0376025438308716, "learning_rate": 5.157976731381379e-06, "loss": 0.5321, "step": 7975 }, { "epoch": 0.51, "grad_norm": 1.1233857870101929, "learning_rate": 5.1569512466160025e-06, "loss": 0.5601, "step": 7976 }, { "epoch": 0.51, "grad_norm": 1.0849857330322266, "learning_rate": 5.155925755241954e-06, "loss": 0.5672, "step": 7977 }, { "epoch": 0.51, "grad_norm": 0.9773584008216858, "learning_rate": 5.1549002573024144e-06, "loss": 0.4966, "step": 7978 }, { "epoch": 0.51, "grad_norm": 1.0027586221694946, "learning_rate": 5.153874752840564e-06, "loss": 0.5014, "step": 7979 }, { "epoch": 0.51, "grad_norm": 1.0651005506515503, "learning_rate": 5.152849241899585e-06, "loss": 0.5096, "step": 7980 }, { "epoch": 0.51, "grad_norm": 1.1254360675811768, "learning_rate": 5.151823724522653e-06, "loss": 0.5215, "step": 7981 }, { "epoch": 0.51, "grad_norm": 0.9797729849815369, "learning_rate": 5.150798200752953e-06, "loss": 0.5305, "step": 7982 }, { "epoch": 0.51, "grad_norm": 1.045276165008545, "learning_rate": 5.149772670633666e-06, "loss": 0.5164, "step": 7983 }, { "epoch": 0.51, "grad_norm": 1.019736409187317, "learning_rate": 5.148747134207974e-06, "loss": 0.4887, "step": 7984 }, { "epoch": 0.51, "grad_norm": 1.034010410308838, "learning_rate": 5.147721591519056e-06, "loss": 0.5115, "step": 7985 }, { "epoch": 0.51, "grad_norm": 1.0028609037399292, "learning_rate": 5.146696042610095e-06, "loss": 0.4889, "step": 7986 }, { "epoch": 0.51, "grad_norm": 1.123826503753662, "learning_rate": 5.145670487524276e-06, "loss": 0.5456, "step": 7987 }, { "epoch": 0.51, "grad_norm": 1.024096965789795, "learning_rate": 5.144644926304778e-06, "loss": 0.5716, "step": 7988 }, { "epoch": 0.51, "grad_norm": 1.121075987815857, "learning_rate": 5.1436193589947855e-06, "loss": 0.516, "step": 7989 }, { "epoch": 0.51, "grad_norm": 1.1426475048065186, "learning_rate": 5.1425937856374816e-06, "loss": 0.5672, "step": 7990 }, { "epoch": 0.51, "grad_norm": 1.0405372381210327, "learning_rate": 5.141568206276051e-06, "loss": 0.5529, "step": 7991 }, { "epoch": 0.51, "grad_norm": 1.0194613933563232, "learning_rate": 5.140542620953675e-06, "loss": 0.4764, "step": 7992 }, { "epoch": 0.51, "grad_norm": 1.1488856077194214, "learning_rate": 5.139517029713537e-06, "loss": 0.5922, "step": 7993 }, { "epoch": 0.51, "grad_norm": 1.020204782485962, "learning_rate": 5.138491432598822e-06, "loss": 0.5008, "step": 7994 }, { "epoch": 0.51, "grad_norm": 1.0580018758773804, "learning_rate": 5.137465829652716e-06, "loss": 0.5173, "step": 7995 }, { "epoch": 0.51, "grad_norm": 1.0638741254806519, "learning_rate": 5.136440220918401e-06, "loss": 0.5124, "step": 7996 }, { "epoch": 0.51, "grad_norm": 1.088953971862793, "learning_rate": 5.135414606439063e-06, "loss": 0.5547, "step": 7997 }, { "epoch": 0.51, "grad_norm": 1.0534359216690063, "learning_rate": 5.134388986257887e-06, "loss": 0.5194, "step": 7998 }, { "epoch": 0.51, "grad_norm": 0.9983291625976562, "learning_rate": 5.133363360418059e-06, "loss": 0.5332, "step": 7999 }, { "epoch": 0.51, "grad_norm": 1.041108250617981, "learning_rate": 5.132337728962763e-06, "loss": 0.5423, "step": 8000 }, { "epoch": 0.51, "grad_norm": 1.0383859872817993, "learning_rate": 5.131312091935186e-06, "loss": 0.5261, "step": 8001 }, { "epoch": 0.51, "grad_norm": 1.0309644937515259, "learning_rate": 5.130286449378513e-06, "loss": 0.5788, "step": 8002 }, { "epoch": 0.51, "grad_norm": 1.0815945863723755, "learning_rate": 5.129260801335932e-06, "loss": 0.5365, "step": 8003 }, { "epoch": 0.51, "grad_norm": 1.018567442893982, "learning_rate": 5.128235147850629e-06, "loss": 0.5332, "step": 8004 }, { "epoch": 0.51, "grad_norm": 0.9869685769081116, "learning_rate": 5.127209488965787e-06, "loss": 0.5147, "step": 8005 }, { "epoch": 0.51, "grad_norm": 0.973550021648407, "learning_rate": 5.1261838247246e-06, "loss": 0.4759, "step": 8006 }, { "epoch": 0.51, "grad_norm": 1.0792498588562012, "learning_rate": 5.125158155170248e-06, "loss": 0.514, "step": 8007 }, { "epoch": 0.51, "grad_norm": 1.0632809400558472, "learning_rate": 5.124132480345922e-06, "loss": 0.4947, "step": 8008 }, { "epoch": 0.51, "grad_norm": 1.0588916540145874, "learning_rate": 5.123106800294809e-06, "loss": 0.5306, "step": 8009 }, { "epoch": 0.51, "grad_norm": 1.0726609230041504, "learning_rate": 5.122081115060098e-06, "loss": 0.5241, "step": 8010 }, { "epoch": 0.51, "grad_norm": 1.0798453092575073, "learning_rate": 5.121055424684975e-06, "loss": 0.4875, "step": 8011 }, { "epoch": 0.51, "grad_norm": 1.0017881393432617, "learning_rate": 5.12002972921263e-06, "loss": 0.5088, "step": 8012 }, { "epoch": 0.51, "grad_norm": 1.0158437490463257, "learning_rate": 5.119004028686249e-06, "loss": 0.5124, "step": 8013 }, { "epoch": 0.51, "grad_norm": 0.9737855195999146, "learning_rate": 5.117978323149025e-06, "loss": 0.5514, "step": 8014 }, { "epoch": 0.51, "grad_norm": 1.107811450958252, "learning_rate": 5.116952612644141e-06, "loss": 0.4671, "step": 8015 }, { "epoch": 0.51, "grad_norm": 1.1360200643539429, "learning_rate": 5.1159268972147915e-06, "loss": 0.5371, "step": 8016 }, { "epoch": 0.51, "grad_norm": 1.0516135692596436, "learning_rate": 5.114901176904164e-06, "loss": 0.5106, "step": 8017 }, { "epoch": 0.51, "grad_norm": 1.0169031620025635, "learning_rate": 5.113875451755447e-06, "loss": 0.5439, "step": 8018 }, { "epoch": 0.51, "grad_norm": 0.9697759747505188, "learning_rate": 5.11284972181183e-06, "loss": 0.4929, "step": 8019 }, { "epoch": 0.51, "grad_norm": 1.0336737632751465, "learning_rate": 5.111823987116504e-06, "loss": 0.514, "step": 8020 }, { "epoch": 0.51, "grad_norm": 1.02176034450531, "learning_rate": 5.110798247712661e-06, "loss": 0.5333, "step": 8021 }, { "epoch": 0.51, "grad_norm": 1.036531925201416, "learning_rate": 5.109772503643486e-06, "loss": 0.5656, "step": 8022 }, { "epoch": 0.51, "grad_norm": 0.9949049949645996, "learning_rate": 5.108746754952177e-06, "loss": 0.4844, "step": 8023 }, { "epoch": 0.51, "grad_norm": 1.0902539491653442, "learning_rate": 5.107721001681915e-06, "loss": 0.5508, "step": 8024 }, { "epoch": 0.51, "grad_norm": 1.0021989345550537, "learning_rate": 5.1066952438759e-06, "loss": 0.4943, "step": 8025 }, { "epoch": 0.51, "grad_norm": 1.0186973810195923, "learning_rate": 5.105669481577319e-06, "loss": 0.541, "step": 8026 }, { "epoch": 0.51, "grad_norm": 1.0388914346694946, "learning_rate": 5.104643714829362e-06, "loss": 0.5638, "step": 8027 }, { "epoch": 0.51, "grad_norm": 0.9377147555351257, "learning_rate": 5.103617943675224e-06, "loss": 0.5213, "step": 8028 }, { "epoch": 0.51, "grad_norm": 1.0112721920013428, "learning_rate": 5.102592168158095e-06, "loss": 0.531, "step": 8029 }, { "epoch": 0.51, "grad_norm": 1.0095853805541992, "learning_rate": 5.101566388321165e-06, "loss": 0.5159, "step": 8030 }, { "epoch": 0.51, "grad_norm": 0.9937832951545715, "learning_rate": 5.100540604207629e-06, "loss": 0.51, "step": 8031 }, { "epoch": 0.51, "grad_norm": 1.125806450843811, "learning_rate": 5.099514815860678e-06, "loss": 0.5244, "step": 8032 }, { "epoch": 0.51, "grad_norm": 0.9696172475814819, "learning_rate": 5.098489023323504e-06, "loss": 0.4601, "step": 8033 }, { "epoch": 0.51, "grad_norm": 1.0181834697723389, "learning_rate": 5.0974632266393e-06, "loss": 0.5248, "step": 8034 }, { "epoch": 0.51, "grad_norm": 1.0194844007492065, "learning_rate": 5.0964374258512585e-06, "loss": 0.5156, "step": 8035 }, { "epoch": 0.51, "grad_norm": 1.1436933279037476, "learning_rate": 5.0954116210025725e-06, "loss": 0.5529, "step": 8036 }, { "epoch": 0.51, "grad_norm": 1.0950855016708374, "learning_rate": 5.094385812136435e-06, "loss": 0.5164, "step": 8037 }, { "epoch": 0.51, "grad_norm": 1.067351222038269, "learning_rate": 5.09335999929604e-06, "loss": 0.5117, "step": 8038 }, { "epoch": 0.51, "grad_norm": 1.0079240798950195, "learning_rate": 5.092334182524578e-06, "loss": 0.5118, "step": 8039 }, { "epoch": 0.51, "grad_norm": 1.1458498239517212, "learning_rate": 5.091308361865247e-06, "loss": 0.5854, "step": 8040 }, { "epoch": 0.51, "grad_norm": 0.9869314432144165, "learning_rate": 5.090282537361237e-06, "loss": 0.4858, "step": 8041 }, { "epoch": 0.51, "grad_norm": 1.0945738554000854, "learning_rate": 5.089256709055745e-06, "loss": 0.5682, "step": 8042 }, { "epoch": 0.51, "grad_norm": 1.0837138891220093, "learning_rate": 5.088230876991962e-06, "loss": 0.522, "step": 8043 }, { "epoch": 0.51, "grad_norm": 1.0137262344360352, "learning_rate": 5.087205041213085e-06, "loss": 0.5351, "step": 8044 }, { "epoch": 0.51, "grad_norm": 1.1037157773971558, "learning_rate": 5.086179201762306e-06, "loss": 0.5433, "step": 8045 }, { "epoch": 0.51, "grad_norm": 1.0440733432769775, "learning_rate": 5.085153358682822e-06, "loss": 0.5122, "step": 8046 }, { "epoch": 0.51, "grad_norm": 1.0042705535888672, "learning_rate": 5.084127512017823e-06, "loss": 0.5224, "step": 8047 }, { "epoch": 0.51, "grad_norm": 0.9671124219894409, "learning_rate": 5.083101661810511e-06, "loss": 0.5237, "step": 8048 }, { "epoch": 0.51, "grad_norm": 1.04704749584198, "learning_rate": 5.082075808104075e-06, "loss": 0.5165, "step": 8049 }, { "epoch": 0.51, "grad_norm": 1.0171769857406616, "learning_rate": 5.081049950941713e-06, "loss": 0.5362, "step": 8050 }, { "epoch": 0.51, "grad_norm": 1.0862504243850708, "learning_rate": 5.080024090366618e-06, "loss": 0.528, "step": 8051 }, { "epoch": 0.51, "grad_norm": 1.0223110914230347, "learning_rate": 5.078998226421989e-06, "loss": 0.5447, "step": 8052 }, { "epoch": 0.51, "grad_norm": 1.0615509748458862, "learning_rate": 5.07797235915102e-06, "loss": 0.5355, "step": 8053 }, { "epoch": 0.51, "grad_norm": 0.9888608455657959, "learning_rate": 5.076946488596905e-06, "loss": 0.5384, "step": 8054 }, { "epoch": 0.51, "grad_norm": 1.1075698137283325, "learning_rate": 5.07592061480284e-06, "loss": 0.5596, "step": 8055 }, { "epoch": 0.51, "grad_norm": 1.0511996746063232, "learning_rate": 5.074894737812023e-06, "loss": 0.5158, "step": 8056 }, { "epoch": 0.51, "grad_norm": 1.0970938205718994, "learning_rate": 5.07386885766765e-06, "loss": 0.5757, "step": 8057 }, { "epoch": 0.51, "grad_norm": 0.9284539818763733, "learning_rate": 5.072842974412916e-06, "loss": 0.4619, "step": 8058 }, { "epoch": 0.51, "grad_norm": 0.9632412791252136, "learning_rate": 5.071817088091017e-06, "loss": 0.4778, "step": 8059 }, { "epoch": 0.51, "grad_norm": 1.0104366540908813, "learning_rate": 5.0707911987451496e-06, "loss": 0.5728, "step": 8060 }, { "epoch": 0.51, "grad_norm": 1.1155264377593994, "learning_rate": 5.0697653064185125e-06, "loss": 0.5494, "step": 8061 }, { "epoch": 0.51, "grad_norm": 1.0364484786987305, "learning_rate": 5.068739411154301e-06, "loss": 0.5255, "step": 8062 }, { "epoch": 0.51, "grad_norm": 1.0144158601760864, "learning_rate": 5.0677135129957115e-06, "loss": 0.5346, "step": 8063 }, { "epoch": 0.51, "grad_norm": 0.9359563589096069, "learning_rate": 5.066687611985941e-06, "loss": 0.5088, "step": 8064 }, { "epoch": 0.51, "grad_norm": 1.0331517457962036, "learning_rate": 5.065661708168188e-06, "loss": 0.4937, "step": 8065 }, { "epoch": 0.51, "grad_norm": 1.1041889190673828, "learning_rate": 5.064635801585649e-06, "loss": 0.4994, "step": 8066 }, { "epoch": 0.51, "grad_norm": 0.930534839630127, "learning_rate": 5.06360989228152e-06, "loss": 0.4854, "step": 8067 }, { "epoch": 0.51, "grad_norm": 1.0794141292572021, "learning_rate": 5.062583980299002e-06, "loss": 0.5193, "step": 8068 }, { "epoch": 0.51, "grad_norm": 0.9313056468963623, "learning_rate": 5.061558065681288e-06, "loss": 0.5059, "step": 8069 }, { "epoch": 0.51, "grad_norm": 1.0706911087036133, "learning_rate": 5.060532148471578e-06, "loss": 0.5567, "step": 8070 }, { "epoch": 0.51, "grad_norm": 1.0160199403762817, "learning_rate": 5.059506228713071e-06, "loss": 0.4841, "step": 8071 }, { "epoch": 0.51, "grad_norm": 0.9667502641677856, "learning_rate": 5.058480306448965e-06, "loss": 0.5376, "step": 8072 }, { "epoch": 0.51, "grad_norm": 1.044235110282898, "learning_rate": 5.057454381722455e-06, "loss": 0.5092, "step": 8073 }, { "epoch": 0.51, "grad_norm": 1.0147597789764404, "learning_rate": 5.056428454576741e-06, "loss": 0.5273, "step": 8074 }, { "epoch": 0.51, "grad_norm": 0.9804866909980774, "learning_rate": 5.0554025250550195e-06, "loss": 0.5348, "step": 8075 }, { "epoch": 0.51, "grad_norm": 1.0162832736968994, "learning_rate": 5.054376593200493e-06, "loss": 0.4554, "step": 8076 }, { "epoch": 0.51, "grad_norm": 1.035589575767517, "learning_rate": 5.053350659056356e-06, "loss": 0.5056, "step": 8077 }, { "epoch": 0.51, "grad_norm": 1.0157426595687866, "learning_rate": 5.052324722665809e-06, "loss": 0.5324, "step": 8078 }, { "epoch": 0.51, "grad_norm": 1.0081170797348022, "learning_rate": 5.0512987840720495e-06, "loss": 0.5206, "step": 8079 }, { "epoch": 0.51, "grad_norm": 1.0904947519302368, "learning_rate": 5.0502728433182765e-06, "loss": 0.5142, "step": 8080 }, { "epoch": 0.51, "grad_norm": 1.0651867389678955, "learning_rate": 5.049246900447689e-06, "loss": 0.5411, "step": 8081 }, { "epoch": 0.51, "grad_norm": 0.9559816718101501, "learning_rate": 5.048220955503487e-06, "loss": 0.5081, "step": 8082 }, { "epoch": 0.51, "grad_norm": 0.8969957232475281, "learning_rate": 5.047195008528868e-06, "loss": 0.4831, "step": 8083 }, { "epoch": 0.51, "grad_norm": 0.9508232474327087, "learning_rate": 5.04616905956703e-06, "loss": 0.49, "step": 8084 }, { "epoch": 0.51, "grad_norm": 0.9438045620918274, "learning_rate": 5.045143108661174e-06, "loss": 0.5268, "step": 8085 }, { "epoch": 0.51, "grad_norm": 0.9639183878898621, "learning_rate": 5.044117155854499e-06, "loss": 0.5587, "step": 8086 }, { "epoch": 0.51, "grad_norm": 0.8572230935096741, "learning_rate": 5.043091201190204e-06, "loss": 0.5037, "step": 8087 }, { "epoch": 0.51, "grad_norm": 0.997568666934967, "learning_rate": 5.042065244711488e-06, "loss": 0.4654, "step": 8088 }, { "epoch": 0.51, "grad_norm": 1.0512139797210693, "learning_rate": 5.041039286461552e-06, "loss": 0.5253, "step": 8089 }, { "epoch": 0.51, "grad_norm": 1.0104326009750366, "learning_rate": 5.040013326483593e-06, "loss": 0.5375, "step": 8090 }, { "epoch": 0.51, "grad_norm": 1.1923975944519043, "learning_rate": 5.038987364820813e-06, "loss": 0.5782, "step": 8091 }, { "epoch": 0.51, "grad_norm": 1.0443609952926636, "learning_rate": 5.037961401516411e-06, "loss": 0.4763, "step": 8092 }, { "epoch": 0.51, "grad_norm": 0.9891601800918579, "learning_rate": 5.036935436613586e-06, "loss": 0.5175, "step": 8093 }, { "epoch": 0.51, "grad_norm": 1.0420844554901123, "learning_rate": 5.0359094701555375e-06, "loss": 0.512, "step": 8094 }, { "epoch": 0.51, "grad_norm": 0.922382652759552, "learning_rate": 5.034883502185467e-06, "loss": 0.5143, "step": 8095 }, { "epoch": 0.51, "grad_norm": 1.006429672241211, "learning_rate": 5.033857532746573e-06, "loss": 0.5191, "step": 8096 }, { "epoch": 0.51, "grad_norm": 0.9326863884925842, "learning_rate": 5.032831561882057e-06, "loss": 0.4857, "step": 8097 }, { "epoch": 0.51, "grad_norm": 0.9927276372909546, "learning_rate": 5.0318055896351185e-06, "loss": 0.5147, "step": 8098 }, { "epoch": 0.51, "grad_norm": 0.885697066783905, "learning_rate": 5.030779616048955e-06, "loss": 0.4553, "step": 8099 }, { "epoch": 0.51, "grad_norm": 1.1350202560424805, "learning_rate": 5.02975364116677e-06, "loss": 0.5308, "step": 8100 }, { "epoch": 0.51, "grad_norm": 1.0157482624053955, "learning_rate": 5.0287276650317626e-06, "loss": 0.5095, "step": 8101 }, { "epoch": 0.51, "grad_norm": 1.0490323305130005, "learning_rate": 5.027701687687135e-06, "loss": 0.5334, "step": 8102 }, { "epoch": 0.51, "grad_norm": 0.9174306392669678, "learning_rate": 5.026675709176084e-06, "loss": 0.456, "step": 8103 }, { "epoch": 0.51, "grad_norm": 0.9931322336196899, "learning_rate": 5.0256497295418115e-06, "loss": 0.5302, "step": 8104 }, { "epoch": 0.51, "grad_norm": 1.018126130104065, "learning_rate": 5.0246237488275185e-06, "loss": 0.4929, "step": 8105 }, { "epoch": 0.51, "grad_norm": 0.996679961681366, "learning_rate": 5.0235977670764055e-06, "loss": 0.4864, "step": 8106 }, { "epoch": 0.51, "grad_norm": 1.0220783948898315, "learning_rate": 5.022571784331672e-06, "loss": 0.5348, "step": 8107 }, { "epoch": 0.51, "grad_norm": 0.9700262546539307, "learning_rate": 5.021545800636519e-06, "loss": 0.511, "step": 8108 }, { "epoch": 0.51, "grad_norm": 1.045074701309204, "learning_rate": 5.020519816034148e-06, "loss": 0.5139, "step": 8109 }, { "epoch": 0.51, "grad_norm": 1.0336368083953857, "learning_rate": 5.019493830567758e-06, "loss": 0.5456, "step": 8110 }, { "epoch": 0.51, "grad_norm": 1.0149891376495361, "learning_rate": 5.018467844280553e-06, "loss": 0.5, "step": 8111 }, { "epoch": 0.51, "grad_norm": 1.019803524017334, "learning_rate": 5.0174418572157276e-06, "loss": 0.4974, "step": 8112 }, { "epoch": 0.51, "grad_norm": 0.9631032943725586, "learning_rate": 5.0164158694164884e-06, "loss": 0.4931, "step": 8113 }, { "epoch": 0.51, "grad_norm": 1.086776614189148, "learning_rate": 5.015389880926035e-06, "loss": 0.5513, "step": 8114 }, { "epoch": 0.51, "grad_norm": 0.9517618417739868, "learning_rate": 5.014363891787567e-06, "loss": 0.5024, "step": 8115 }, { "epoch": 0.51, "grad_norm": 1.0779685974121094, "learning_rate": 5.013337902044283e-06, "loss": 0.5292, "step": 8116 }, { "epoch": 0.51, "grad_norm": 1.1412652730941772, "learning_rate": 5.0123119117393894e-06, "loss": 0.5368, "step": 8117 }, { "epoch": 0.51, "grad_norm": 1.0637733936309814, "learning_rate": 5.011285920916082e-06, "loss": 0.5402, "step": 8118 }, { "epoch": 0.51, "grad_norm": 1.0142605304718018, "learning_rate": 5.010259929617565e-06, "loss": 0.5356, "step": 8119 }, { "epoch": 0.51, "grad_norm": 1.202275276184082, "learning_rate": 5.009233937887036e-06, "loss": 0.5115, "step": 8120 }, { "epoch": 0.51, "grad_norm": 1.0249601602554321, "learning_rate": 5.0082079457677e-06, "loss": 0.49, "step": 8121 }, { "epoch": 0.51, "grad_norm": 1.1110284328460693, "learning_rate": 5.007181953302755e-06, "loss": 0.5404, "step": 8122 }, { "epoch": 0.51, "grad_norm": 1.0863715410232544, "learning_rate": 5.006155960535405e-06, "loss": 0.5385, "step": 8123 }, { "epoch": 0.51, "grad_norm": 1.015622854232788, "learning_rate": 5.005129967508845e-06, "loss": 0.5406, "step": 8124 }, { "epoch": 0.51, "grad_norm": 1.0340197086334229, "learning_rate": 5.004103974266284e-06, "loss": 0.5186, "step": 8125 }, { "epoch": 0.51, "grad_norm": 1.0327247381210327, "learning_rate": 5.0030779808509155e-06, "loss": 0.5257, "step": 8126 }, { "epoch": 0.51, "grad_norm": 0.9580963253974915, "learning_rate": 5.002051987305947e-06, "loss": 0.5182, "step": 8127 }, { "epoch": 0.51, "grad_norm": 0.9721316695213318, "learning_rate": 5.0010259936745735e-06, "loss": 0.4748, "step": 8128 }, { "epoch": 0.52, "grad_norm": 1.0096826553344727, "learning_rate": 5e-06, "loss": 0.548, "step": 8129 }, { "epoch": 0.52, "grad_norm": 0.9705838561058044, "learning_rate": 4.998974006325428e-06, "loss": 0.5073, "step": 8130 }, { "epoch": 0.52, "grad_norm": 1.019299864768982, "learning_rate": 4.997948012694056e-06, "loss": 0.5197, "step": 8131 }, { "epoch": 0.52, "grad_norm": 1.1114896535873413, "learning_rate": 4.9969220191490845e-06, "loss": 0.5462, "step": 8132 }, { "epoch": 0.52, "grad_norm": 1.0755159854888916, "learning_rate": 4.995896025733719e-06, "loss": 0.5358, "step": 8133 }, { "epoch": 0.52, "grad_norm": 1.1081016063690186, "learning_rate": 4.994870032491156e-06, "loss": 0.5108, "step": 8134 }, { "epoch": 0.52, "grad_norm": 1.0805017948150635, "learning_rate": 4.993844039464598e-06, "loss": 0.6051, "step": 8135 }, { "epoch": 0.52, "grad_norm": 1.0222865343093872, "learning_rate": 4.992818046697245e-06, "loss": 0.5212, "step": 8136 }, { "epoch": 0.52, "grad_norm": 1.035110592842102, "learning_rate": 4.991792054232301e-06, "loss": 0.4815, "step": 8137 }, { "epoch": 0.52, "grad_norm": 0.9688609838485718, "learning_rate": 4.990766062112966e-06, "loss": 0.5028, "step": 8138 }, { "epoch": 0.52, "grad_norm": 1.0254881381988525, "learning_rate": 4.989740070382438e-06, "loss": 0.5143, "step": 8139 }, { "epoch": 0.52, "grad_norm": 0.9833453893661499, "learning_rate": 4.988714079083918e-06, "loss": 0.5275, "step": 8140 }, { "epoch": 0.52, "grad_norm": 1.152333378791809, "learning_rate": 4.987688088260613e-06, "loss": 0.5085, "step": 8141 }, { "epoch": 0.52, "grad_norm": 1.0296305418014526, "learning_rate": 4.986662097955718e-06, "loss": 0.518, "step": 8142 }, { "epoch": 0.52, "grad_norm": 0.9811147451400757, "learning_rate": 4.985636108212435e-06, "loss": 0.5338, "step": 8143 }, { "epoch": 0.52, "grad_norm": 0.9948538541793823, "learning_rate": 4.984610119073965e-06, "loss": 0.4752, "step": 8144 }, { "epoch": 0.52, "grad_norm": 1.0104701519012451, "learning_rate": 4.9835841305835115e-06, "loss": 0.5064, "step": 8145 }, { "epoch": 0.52, "grad_norm": 0.9781093597412109, "learning_rate": 4.982558142784273e-06, "loss": 0.5434, "step": 8146 }, { "epoch": 0.52, "grad_norm": 1.0168230533599854, "learning_rate": 4.98153215571945e-06, "loss": 0.5495, "step": 8147 }, { "epoch": 0.52, "grad_norm": 1.0309807062149048, "learning_rate": 4.980506169432243e-06, "loss": 0.525, "step": 8148 }, { "epoch": 0.52, "grad_norm": 1.058572769165039, "learning_rate": 4.979480183965852e-06, "loss": 0.5429, "step": 8149 }, { "epoch": 0.52, "grad_norm": 0.9933605194091797, "learning_rate": 4.9784541993634824e-06, "loss": 0.5254, "step": 8150 }, { "epoch": 0.52, "grad_norm": 1.0911980867385864, "learning_rate": 4.977428215668329e-06, "loss": 0.5847, "step": 8151 }, { "epoch": 0.52, "grad_norm": 1.0300828218460083, "learning_rate": 4.976402232923597e-06, "loss": 0.5364, "step": 8152 }, { "epoch": 0.52, "grad_norm": 1.0467183589935303, "learning_rate": 4.9753762511724815e-06, "loss": 0.4921, "step": 8153 }, { "epoch": 0.52, "grad_norm": 0.990119218826294, "learning_rate": 4.974350270458189e-06, "loss": 0.5429, "step": 8154 }, { "epoch": 0.52, "grad_norm": 1.0183179378509521, "learning_rate": 4.9733242908239175e-06, "loss": 0.5546, "step": 8155 }, { "epoch": 0.52, "grad_norm": 1.0725822448730469, "learning_rate": 4.972298312312867e-06, "loss": 0.539, "step": 8156 }, { "epoch": 0.52, "grad_norm": 1.1482937335968018, "learning_rate": 4.9712723349682365e-06, "loss": 0.5243, "step": 8157 }, { "epoch": 0.52, "grad_norm": 1.0681301355361938, "learning_rate": 4.970246358833231e-06, "loss": 0.4775, "step": 8158 }, { "epoch": 0.52, "grad_norm": 1.0565322637557983, "learning_rate": 4.969220383951046e-06, "loss": 0.4802, "step": 8159 }, { "epoch": 0.52, "grad_norm": 0.9825900197029114, "learning_rate": 4.968194410364884e-06, "loss": 0.5165, "step": 8160 }, { "epoch": 0.52, "grad_norm": 0.9784873723983765, "learning_rate": 4.967168438117945e-06, "loss": 0.5198, "step": 8161 }, { "epoch": 0.52, "grad_norm": 0.9857720732688904, "learning_rate": 4.966142467253428e-06, "loss": 0.4751, "step": 8162 }, { "epoch": 0.52, "grad_norm": 1.069770336151123, "learning_rate": 4.965116497814534e-06, "loss": 0.5385, "step": 8163 }, { "epoch": 0.52, "grad_norm": 1.05392587184906, "learning_rate": 4.964090529844464e-06, "loss": 0.5194, "step": 8164 }, { "epoch": 0.52, "grad_norm": 0.9713079333305359, "learning_rate": 4.963064563386416e-06, "loss": 0.5253, "step": 8165 }, { "epoch": 0.52, "grad_norm": 0.9919078350067139, "learning_rate": 4.96203859848359e-06, "loss": 0.5185, "step": 8166 }, { "epoch": 0.52, "grad_norm": 0.9249306917190552, "learning_rate": 4.961012635179188e-06, "loss": 0.4828, "step": 8167 }, { "epoch": 0.52, "grad_norm": 1.0226560831069946, "learning_rate": 4.959986673516408e-06, "loss": 0.513, "step": 8168 }, { "epoch": 0.52, "grad_norm": 0.9974196553230286, "learning_rate": 4.95896071353845e-06, "loss": 0.5031, "step": 8169 }, { "epoch": 0.52, "grad_norm": 1.070699691772461, "learning_rate": 4.9579347552885125e-06, "loss": 0.5519, "step": 8170 }, { "epoch": 0.52, "grad_norm": 1.057245135307312, "learning_rate": 4.956908798809797e-06, "loss": 0.5376, "step": 8171 }, { "epoch": 0.52, "grad_norm": 1.1630752086639404, "learning_rate": 4.955882844145503e-06, "loss": 0.5544, "step": 8172 }, { "epoch": 0.52, "grad_norm": 1.0155560970306396, "learning_rate": 4.954856891338827e-06, "loss": 0.5533, "step": 8173 }, { "epoch": 0.52, "grad_norm": 1.047834873199463, "learning_rate": 4.95383094043297e-06, "loss": 0.5008, "step": 8174 }, { "epoch": 0.52, "grad_norm": 0.9876782298088074, "learning_rate": 4.952804991471134e-06, "loss": 0.4999, "step": 8175 }, { "epoch": 0.52, "grad_norm": 0.9726967215538025, "learning_rate": 4.951779044496515e-06, "loss": 0.4991, "step": 8176 }, { "epoch": 0.52, "grad_norm": 1.105119228363037, "learning_rate": 4.9507530995523115e-06, "loss": 0.516, "step": 8177 }, { "epoch": 0.52, "grad_norm": 0.9466031789779663, "learning_rate": 4.949727156681726e-06, "loss": 0.5342, "step": 8178 }, { "epoch": 0.52, "grad_norm": 1.0259852409362793, "learning_rate": 4.948701215927951e-06, "loss": 0.4601, "step": 8179 }, { "epoch": 0.52, "grad_norm": 0.952790379524231, "learning_rate": 4.947675277334193e-06, "loss": 0.5096, "step": 8180 }, { "epoch": 0.52, "grad_norm": 1.0431325435638428, "learning_rate": 4.946649340943645e-06, "loss": 0.5657, "step": 8181 }, { "epoch": 0.52, "grad_norm": 1.0588798522949219, "learning_rate": 4.9456234067995094e-06, "loss": 0.5562, "step": 8182 }, { "epoch": 0.52, "grad_norm": 1.000832200050354, "learning_rate": 4.9445974749449805e-06, "loss": 0.4955, "step": 8183 }, { "epoch": 0.52, "grad_norm": 0.9583312273025513, "learning_rate": 4.9435715454232615e-06, "loss": 0.5197, "step": 8184 }, { "epoch": 0.52, "grad_norm": 1.0641454458236694, "learning_rate": 4.942545618277547e-06, "loss": 0.5462, "step": 8185 }, { "epoch": 0.52, "grad_norm": 1.0573210716247559, "learning_rate": 4.9415196935510375e-06, "loss": 0.5063, "step": 8186 }, { "epoch": 0.52, "grad_norm": 1.0898346900939941, "learning_rate": 4.940493771286929e-06, "loss": 0.5157, "step": 8187 }, { "epoch": 0.52, "grad_norm": 1.0173532962799072, "learning_rate": 4.939467851528423e-06, "loss": 0.5257, "step": 8188 }, { "epoch": 0.52, "grad_norm": 0.9410024881362915, "learning_rate": 4.938441934318713e-06, "loss": 0.4716, "step": 8189 }, { "epoch": 0.52, "grad_norm": 0.9916033148765564, "learning_rate": 4.937416019701e-06, "loss": 0.5225, "step": 8190 }, { "epoch": 0.52, "grad_norm": 1.0031166076660156, "learning_rate": 4.93639010771848e-06, "loss": 0.4652, "step": 8191 }, { "epoch": 0.52, "grad_norm": 1.0323004722595215, "learning_rate": 4.9353641984143526e-06, "loss": 0.528, "step": 8192 }, { "epoch": 0.52, "grad_norm": 0.9885910153388977, "learning_rate": 4.934338291831813e-06, "loss": 0.5162, "step": 8193 }, { "epoch": 0.52, "grad_norm": 1.0446704626083374, "learning_rate": 4.93331238801406e-06, "loss": 0.5019, "step": 8194 }, { "epoch": 0.52, "grad_norm": 1.1711422204971313, "learning_rate": 4.932286487004291e-06, "loss": 0.596, "step": 8195 }, { "epoch": 0.52, "grad_norm": 1.0227354764938354, "learning_rate": 4.931260588845701e-06, "loss": 0.517, "step": 8196 }, { "epoch": 0.52, "grad_norm": 0.9722004532814026, "learning_rate": 4.930234693581489e-06, "loss": 0.531, "step": 8197 }, { "epoch": 0.52, "grad_norm": 1.0352272987365723, "learning_rate": 4.929208801254851e-06, "loss": 0.5281, "step": 8198 }, { "epoch": 0.52, "grad_norm": 1.0903185606002808, "learning_rate": 4.928182911908987e-06, "loss": 0.581, "step": 8199 }, { "epoch": 0.52, "grad_norm": 0.969157874584198, "learning_rate": 4.927157025587086e-06, "loss": 0.4981, "step": 8200 }, { "epoch": 0.52, "grad_norm": 0.968953549861908, "learning_rate": 4.926131142332351e-06, "loss": 0.5285, "step": 8201 }, { "epoch": 0.52, "grad_norm": 1.0791070461273193, "learning_rate": 4.925105262187978e-06, "loss": 0.5594, "step": 8202 }, { "epoch": 0.52, "grad_norm": 1.0036054849624634, "learning_rate": 4.924079385197162e-06, "loss": 0.5002, "step": 8203 }, { "epoch": 0.52, "grad_norm": 1.017087697982788, "learning_rate": 4.923053511403096e-06, "loss": 0.5861, "step": 8204 }, { "epoch": 0.52, "grad_norm": 0.9648655652999878, "learning_rate": 4.922027640848981e-06, "loss": 0.5136, "step": 8205 }, { "epoch": 0.52, "grad_norm": 1.1127065420150757, "learning_rate": 4.921001773578012e-06, "loss": 0.5753, "step": 8206 }, { "epoch": 0.52, "grad_norm": 1.1435563564300537, "learning_rate": 4.9199759096333825e-06, "loss": 0.5837, "step": 8207 }, { "epoch": 0.52, "grad_norm": 1.0285307168960571, "learning_rate": 4.918950049058289e-06, "loss": 0.5234, "step": 8208 }, { "epoch": 0.52, "grad_norm": 0.9845409989356995, "learning_rate": 4.9179241918959255e-06, "loss": 0.5261, "step": 8209 }, { "epoch": 0.52, "grad_norm": 0.9043030142784119, "learning_rate": 4.916898338189491e-06, "loss": 0.4721, "step": 8210 }, { "epoch": 0.52, "grad_norm": 0.9924689531326294, "learning_rate": 4.9158724879821775e-06, "loss": 0.5078, "step": 8211 }, { "epoch": 0.52, "grad_norm": 0.9461621046066284, "learning_rate": 4.914846641317181e-06, "loss": 0.4674, "step": 8212 }, { "epoch": 0.52, "grad_norm": 1.0354009866714478, "learning_rate": 4.913820798237695e-06, "loss": 0.5153, "step": 8213 }, { "epoch": 0.52, "grad_norm": 1.0802775621414185, "learning_rate": 4.912794958786917e-06, "loss": 0.5283, "step": 8214 }, { "epoch": 0.52, "grad_norm": 1.0849580764770508, "learning_rate": 4.91176912300804e-06, "loss": 0.5152, "step": 8215 }, { "epoch": 0.52, "grad_norm": 1.101136326789856, "learning_rate": 4.9107432909442575e-06, "loss": 0.5241, "step": 8216 }, { "epoch": 0.52, "grad_norm": 1.0249545574188232, "learning_rate": 4.909717462638763e-06, "loss": 0.5575, "step": 8217 }, { "epoch": 0.52, "grad_norm": 1.073858380317688, "learning_rate": 4.908691638134754e-06, "loss": 0.5598, "step": 8218 }, { "epoch": 0.52, "grad_norm": 1.037700891494751, "learning_rate": 4.907665817475424e-06, "loss": 0.5018, "step": 8219 }, { "epoch": 0.52, "grad_norm": 1.0014963150024414, "learning_rate": 4.906640000703963e-06, "loss": 0.5335, "step": 8220 }, { "epoch": 0.52, "grad_norm": 1.0454741716384888, "learning_rate": 4.905614187863565e-06, "loss": 0.5591, "step": 8221 }, { "epoch": 0.52, "grad_norm": 1.1414268016815186, "learning_rate": 4.904588378997428e-06, "loss": 0.5646, "step": 8222 }, { "epoch": 0.52, "grad_norm": 1.002533197402954, "learning_rate": 4.903562574148744e-06, "loss": 0.5633, "step": 8223 }, { "epoch": 0.52, "grad_norm": 1.0724406242370605, "learning_rate": 4.902536773360702e-06, "loss": 0.4934, "step": 8224 }, { "epoch": 0.52, "grad_norm": 0.9824497699737549, "learning_rate": 4.9015109766764985e-06, "loss": 0.5355, "step": 8225 }, { "epoch": 0.52, "grad_norm": 1.024309515953064, "learning_rate": 4.900485184139323e-06, "loss": 0.5124, "step": 8226 }, { "epoch": 0.52, "grad_norm": 1.008635401725769, "learning_rate": 4.899459395792373e-06, "loss": 0.533, "step": 8227 }, { "epoch": 0.52, "grad_norm": 0.9910851120948792, "learning_rate": 4.8984336116788355e-06, "loss": 0.4683, "step": 8228 }, { "epoch": 0.52, "grad_norm": 0.9705127477645874, "learning_rate": 4.897407831841908e-06, "loss": 0.4987, "step": 8229 }, { "epoch": 0.52, "grad_norm": 1.0432102680206299, "learning_rate": 4.8963820563247765e-06, "loss": 0.5263, "step": 8230 }, { "epoch": 0.52, "grad_norm": 1.0487322807312012, "learning_rate": 4.8953562851706385e-06, "loss": 0.4836, "step": 8231 }, { "epoch": 0.52, "grad_norm": 1.0196106433868408, "learning_rate": 4.894330518422683e-06, "loss": 0.5178, "step": 8232 }, { "epoch": 0.52, "grad_norm": 1.0171598196029663, "learning_rate": 4.893304756124102e-06, "loss": 0.5162, "step": 8233 }, { "epoch": 0.52, "grad_norm": 1.055601716041565, "learning_rate": 4.8922789983180854e-06, "loss": 0.5432, "step": 8234 }, { "epoch": 0.52, "grad_norm": 1.013988733291626, "learning_rate": 4.891253245047826e-06, "loss": 0.4949, "step": 8235 }, { "epoch": 0.52, "grad_norm": 0.9848381876945496, "learning_rate": 4.890227496356515e-06, "loss": 0.5142, "step": 8236 }, { "epoch": 0.52, "grad_norm": 0.9790887832641602, "learning_rate": 4.889201752287342e-06, "loss": 0.5156, "step": 8237 }, { "epoch": 0.52, "grad_norm": 1.032889723777771, "learning_rate": 4.888176012883496e-06, "loss": 0.537, "step": 8238 }, { "epoch": 0.52, "grad_norm": 0.9301667213439941, "learning_rate": 4.88715027818817e-06, "loss": 0.5155, "step": 8239 }, { "epoch": 0.52, "grad_norm": 1.0242931842803955, "learning_rate": 4.886124548244555e-06, "loss": 0.5752, "step": 8240 }, { "epoch": 0.52, "grad_norm": 1.0368810892105103, "learning_rate": 4.885098823095838e-06, "loss": 0.5318, "step": 8241 }, { "epoch": 0.52, "grad_norm": 1.0037161111831665, "learning_rate": 4.884073102785209e-06, "loss": 0.5325, "step": 8242 }, { "epoch": 0.52, "grad_norm": 1.0490317344665527, "learning_rate": 4.883047387355858e-06, "loss": 0.5131, "step": 8243 }, { "epoch": 0.52, "grad_norm": 1.0288115739822388, "learning_rate": 4.882021676850977e-06, "loss": 0.5095, "step": 8244 }, { "epoch": 0.52, "grad_norm": 1.0677239894866943, "learning_rate": 4.880995971313752e-06, "loss": 0.4767, "step": 8245 }, { "epoch": 0.52, "grad_norm": 1.0304758548736572, "learning_rate": 4.879970270787372e-06, "loss": 0.567, "step": 8246 }, { "epoch": 0.52, "grad_norm": 1.1207501888275146, "learning_rate": 4.878944575315025e-06, "loss": 0.5218, "step": 8247 }, { "epoch": 0.52, "grad_norm": 0.9568483829498291, "learning_rate": 4.877918884939903e-06, "loss": 0.5296, "step": 8248 }, { "epoch": 0.52, "grad_norm": 1.0585397481918335, "learning_rate": 4.8768931997051925e-06, "loss": 0.5318, "step": 8249 }, { "epoch": 0.52, "grad_norm": 0.9968030452728271, "learning_rate": 4.8758675196540795e-06, "loss": 0.5101, "step": 8250 }, { "epoch": 0.52, "grad_norm": 1.0286331176757812, "learning_rate": 4.874841844829753e-06, "loss": 0.5026, "step": 8251 }, { "epoch": 0.52, "grad_norm": 1.0161633491516113, "learning_rate": 4.873816175275402e-06, "loss": 0.5088, "step": 8252 }, { "epoch": 0.52, "grad_norm": 1.0071805715560913, "learning_rate": 4.8727905110342135e-06, "loss": 0.5457, "step": 8253 }, { "epoch": 0.52, "grad_norm": 0.9614889025688171, "learning_rate": 4.871764852149373e-06, "loss": 0.5366, "step": 8254 }, { "epoch": 0.52, "grad_norm": 1.009331226348877, "learning_rate": 4.87073919866407e-06, "loss": 0.5317, "step": 8255 }, { "epoch": 0.52, "grad_norm": 0.9906996488571167, "learning_rate": 4.869713550621487e-06, "loss": 0.5224, "step": 8256 }, { "epoch": 0.52, "grad_norm": 0.9802281856536865, "learning_rate": 4.868687908064815e-06, "loss": 0.5189, "step": 8257 }, { "epoch": 0.52, "grad_norm": 0.9882171750068665, "learning_rate": 4.867662271037238e-06, "loss": 0.4723, "step": 8258 }, { "epoch": 0.52, "grad_norm": 1.1115453243255615, "learning_rate": 4.866636639581943e-06, "loss": 0.5251, "step": 8259 }, { "epoch": 0.52, "grad_norm": 0.9748610258102417, "learning_rate": 4.865611013742114e-06, "loss": 0.5112, "step": 8260 }, { "epoch": 0.52, "grad_norm": 1.0606300830841064, "learning_rate": 4.864585393560939e-06, "loss": 0.5332, "step": 8261 }, { "epoch": 0.52, "grad_norm": 1.117438793182373, "learning_rate": 4.863559779081601e-06, "loss": 0.5163, "step": 8262 }, { "epoch": 0.52, "grad_norm": 0.9546915888786316, "learning_rate": 4.862534170347287e-06, "loss": 0.5315, "step": 8263 }, { "epoch": 0.52, "grad_norm": 0.9329437613487244, "learning_rate": 4.861508567401179e-06, "loss": 0.4849, "step": 8264 }, { "epoch": 0.52, "grad_norm": 1.0045757293701172, "learning_rate": 4.860482970286465e-06, "loss": 0.4874, "step": 8265 }, { "epoch": 0.52, "grad_norm": 0.9793338775634766, "learning_rate": 4.859457379046327e-06, "loss": 0.5172, "step": 8266 }, { "epoch": 0.52, "grad_norm": 1.05069899559021, "learning_rate": 4.858431793723952e-06, "loss": 0.4986, "step": 8267 }, { "epoch": 0.52, "grad_norm": 0.9594676494598389, "learning_rate": 4.857406214362518e-06, "loss": 0.4993, "step": 8268 }, { "epoch": 0.52, "grad_norm": 1.0364305973052979, "learning_rate": 4.856380641005215e-06, "loss": 0.5177, "step": 8269 }, { "epoch": 0.52, "grad_norm": 1.0088708400726318, "learning_rate": 4.855355073695223e-06, "loss": 0.587, "step": 8270 }, { "epoch": 0.52, "grad_norm": 1.0791456699371338, "learning_rate": 4.8543295124757265e-06, "loss": 0.5208, "step": 8271 }, { "epoch": 0.52, "grad_norm": 1.078839659690857, "learning_rate": 4.8533039573899075e-06, "loss": 0.537, "step": 8272 }, { "epoch": 0.52, "grad_norm": 1.02333402633667, "learning_rate": 4.852278408480946e-06, "loss": 0.5355, "step": 8273 }, { "epoch": 0.52, "grad_norm": 1.1506671905517578, "learning_rate": 4.8512528657920275e-06, "loss": 0.5161, "step": 8274 }, { "epoch": 0.52, "grad_norm": 1.0825648307800293, "learning_rate": 4.850227329366335e-06, "loss": 0.5492, "step": 8275 }, { "epoch": 0.52, "grad_norm": 1.0078729391098022, "learning_rate": 4.849201799247049e-06, "loss": 0.4773, "step": 8276 }, { "epoch": 0.52, "grad_norm": 0.9716581702232361, "learning_rate": 4.848176275477348e-06, "loss": 0.4887, "step": 8277 }, { "epoch": 0.52, "grad_norm": 0.9962989091873169, "learning_rate": 4.847150758100418e-06, "loss": 0.5134, "step": 8278 }, { "epoch": 0.52, "grad_norm": 1.0206736326217651, "learning_rate": 4.846125247159437e-06, "loss": 0.513, "step": 8279 }, { "epoch": 0.52, "grad_norm": 0.9762862920761108, "learning_rate": 4.845099742697588e-06, "loss": 0.4922, "step": 8280 }, { "epoch": 0.52, "grad_norm": 0.9894128441810608, "learning_rate": 4.844074244758047e-06, "loss": 0.4491, "step": 8281 }, { "epoch": 0.52, "grad_norm": 1.0172109603881836, "learning_rate": 4.843048753383998e-06, "loss": 0.5184, "step": 8282 }, { "epoch": 0.52, "grad_norm": 1.0501362085342407, "learning_rate": 4.8420232686186226e-06, "loss": 0.5013, "step": 8283 }, { "epoch": 0.52, "grad_norm": 1.0115443468093872, "learning_rate": 4.840997790505097e-06, "loss": 0.4965, "step": 8284 }, { "epoch": 0.52, "grad_norm": 1.0318505764007568, "learning_rate": 4.8399723190866e-06, "loss": 0.4768, "step": 8285 }, { "epoch": 0.52, "grad_norm": 0.9210131764411926, "learning_rate": 4.838946854406311e-06, "loss": 0.5059, "step": 8286 }, { "epoch": 0.53, "grad_norm": 1.0285263061523438, "learning_rate": 4.8379213965074125e-06, "loss": 0.5317, "step": 8287 }, { "epoch": 0.53, "grad_norm": 1.0240325927734375, "learning_rate": 4.83689594543308e-06, "loss": 0.5145, "step": 8288 }, { "epoch": 0.53, "grad_norm": 1.0739846229553223, "learning_rate": 4.835870501226489e-06, "loss": 0.5382, "step": 8289 }, { "epoch": 0.53, "grad_norm": 1.0172021389007568, "learning_rate": 4.834845063930821e-06, "loss": 0.5106, "step": 8290 }, { "epoch": 0.53, "grad_norm": 0.904426634311676, "learning_rate": 4.833819633589254e-06, "loss": 0.5247, "step": 8291 }, { "epoch": 0.53, "grad_norm": 0.9170308113098145, "learning_rate": 4.832794210244965e-06, "loss": 0.4646, "step": 8292 }, { "epoch": 0.53, "grad_norm": 1.143886923789978, "learning_rate": 4.831768793941129e-06, "loss": 0.569, "step": 8293 }, { "epoch": 0.53, "grad_norm": 0.993350625038147, "learning_rate": 4.830743384720922e-06, "loss": 0.514, "step": 8294 }, { "epoch": 0.53, "grad_norm": 1.063253402709961, "learning_rate": 4.829717982627525e-06, "loss": 0.5275, "step": 8295 }, { "epoch": 0.53, "grad_norm": 1.0138853788375854, "learning_rate": 4.82869258770411e-06, "loss": 0.5617, "step": 8296 }, { "epoch": 0.53, "grad_norm": 0.8883842825889587, "learning_rate": 4.827667199993855e-06, "loss": 0.4943, "step": 8297 }, { "epoch": 0.53, "grad_norm": 1.048701524734497, "learning_rate": 4.826641819539933e-06, "loss": 0.493, "step": 8298 }, { "epoch": 0.53, "grad_norm": 0.979813277721405, "learning_rate": 4.825616446385523e-06, "loss": 0.5189, "step": 8299 }, { "epoch": 0.53, "grad_norm": 0.9900126457214355, "learning_rate": 4.824591080573797e-06, "loss": 0.5252, "step": 8300 }, { "epoch": 0.53, "grad_norm": 1.0179758071899414, "learning_rate": 4.823565722147932e-06, "loss": 0.544, "step": 8301 }, { "epoch": 0.53, "grad_norm": 1.0025790929794312, "learning_rate": 4.8225403711511e-06, "loss": 0.4974, "step": 8302 }, { "epoch": 0.53, "grad_norm": 1.0100517272949219, "learning_rate": 4.821515027626473e-06, "loss": 0.5133, "step": 8303 }, { "epoch": 0.53, "grad_norm": 0.9746113419532776, "learning_rate": 4.8204896916172285e-06, "loss": 0.4728, "step": 8304 }, { "epoch": 0.53, "grad_norm": 0.9883295893669128, "learning_rate": 4.819464363166539e-06, "loss": 0.473, "step": 8305 }, { "epoch": 0.53, "grad_norm": 1.025826096534729, "learning_rate": 4.818439042317578e-06, "loss": 0.5227, "step": 8306 }, { "epoch": 0.53, "grad_norm": 0.9911962747573853, "learning_rate": 4.817413729113516e-06, "loss": 0.5128, "step": 8307 }, { "epoch": 0.53, "grad_norm": 1.089560627937317, "learning_rate": 4.816388423597527e-06, "loss": 0.5084, "step": 8308 }, { "epoch": 0.53, "grad_norm": 1.0557607412338257, "learning_rate": 4.815363125812784e-06, "loss": 0.5609, "step": 8309 }, { "epoch": 0.53, "grad_norm": 1.0592869520187378, "learning_rate": 4.814337835802457e-06, "loss": 0.5411, "step": 8310 }, { "epoch": 0.53, "grad_norm": 1.0457826852798462, "learning_rate": 4.813312553609716e-06, "loss": 0.4667, "step": 8311 }, { "epoch": 0.53, "grad_norm": 0.9835514426231384, "learning_rate": 4.812287279277735e-06, "loss": 0.5102, "step": 8312 }, { "epoch": 0.53, "grad_norm": 0.9978135228157043, "learning_rate": 4.811262012849685e-06, "loss": 0.5234, "step": 8313 }, { "epoch": 0.53, "grad_norm": 0.9925533533096313, "learning_rate": 4.810236754368735e-06, "loss": 0.5346, "step": 8314 }, { "epoch": 0.53, "grad_norm": 1.0429518222808838, "learning_rate": 4.8092115038780525e-06, "loss": 0.5445, "step": 8315 }, { "epoch": 0.53, "grad_norm": 1.012973427772522, "learning_rate": 4.808186261420811e-06, "loss": 0.5187, "step": 8316 }, { "epoch": 0.53, "grad_norm": 0.9603223204612732, "learning_rate": 4.80716102704018e-06, "loss": 0.5143, "step": 8317 }, { "epoch": 0.53, "grad_norm": 1.066994547843933, "learning_rate": 4.806135800779328e-06, "loss": 0.5151, "step": 8318 }, { "epoch": 0.53, "grad_norm": 0.9137266874313354, "learning_rate": 4.805110582681421e-06, "loss": 0.4991, "step": 8319 }, { "epoch": 0.53, "grad_norm": 1.0653810501098633, "learning_rate": 4.804085372789629e-06, "loss": 0.5267, "step": 8320 }, { "epoch": 0.53, "grad_norm": 1.0767114162445068, "learning_rate": 4.803060171147122e-06, "loss": 0.5417, "step": 8321 }, { "epoch": 0.53, "grad_norm": 1.0749000310897827, "learning_rate": 4.802034977797066e-06, "loss": 0.5645, "step": 8322 }, { "epoch": 0.53, "grad_norm": 1.0301631689071655, "learning_rate": 4.801009792782627e-06, "loss": 0.5231, "step": 8323 }, { "epoch": 0.53, "grad_norm": 0.8945549726486206, "learning_rate": 4.799984616146974e-06, "loss": 0.495, "step": 8324 }, { "epoch": 0.53, "grad_norm": 1.0703390836715698, "learning_rate": 4.798959447933274e-06, "loss": 0.5903, "step": 8325 }, { "epoch": 0.53, "grad_norm": 1.0251332521438599, "learning_rate": 4.797934288184692e-06, "loss": 0.51, "step": 8326 }, { "epoch": 0.53, "grad_norm": 1.1210784912109375, "learning_rate": 4.796909136944394e-06, "loss": 0.5686, "step": 8327 }, { "epoch": 0.53, "grad_norm": 1.000189185142517, "learning_rate": 4.795883994255544e-06, "loss": 0.4993, "step": 8328 }, { "epoch": 0.53, "grad_norm": 1.0635324716567993, "learning_rate": 4.794858860161311e-06, "loss": 0.6022, "step": 8329 }, { "epoch": 0.53, "grad_norm": 1.010852575302124, "learning_rate": 4.793833734704858e-06, "loss": 0.5292, "step": 8330 }, { "epoch": 0.53, "grad_norm": 1.0470019578933716, "learning_rate": 4.792808617929348e-06, "loss": 0.5249, "step": 8331 }, { "epoch": 0.53, "grad_norm": 0.9671838283538818, "learning_rate": 4.791783509877948e-06, "loss": 0.5258, "step": 8332 }, { "epoch": 0.53, "grad_norm": 1.0945289134979248, "learning_rate": 4.790758410593818e-06, "loss": 0.5389, "step": 8333 }, { "epoch": 0.53, "grad_norm": 1.033327341079712, "learning_rate": 4.789733320120124e-06, "loss": 0.5051, "step": 8334 }, { "epoch": 0.53, "grad_norm": 1.0071804523468018, "learning_rate": 4.788708238500029e-06, "loss": 0.5091, "step": 8335 }, { "epoch": 0.53, "grad_norm": 1.0532846450805664, "learning_rate": 4.787683165776695e-06, "loss": 0.5137, "step": 8336 }, { "epoch": 0.53, "grad_norm": 1.0724596977233887, "learning_rate": 4.786658101993283e-06, "loss": 0.5375, "step": 8337 }, { "epoch": 0.53, "grad_norm": 0.977753221988678, "learning_rate": 4.785633047192959e-06, "loss": 0.4983, "step": 8338 }, { "epoch": 0.53, "grad_norm": 1.0433781147003174, "learning_rate": 4.7846080014188786e-06, "loss": 0.5464, "step": 8339 }, { "epoch": 0.53, "grad_norm": 0.9462462663650513, "learning_rate": 4.783582964714209e-06, "loss": 0.5299, "step": 8340 }, { "epoch": 0.53, "grad_norm": 1.0582375526428223, "learning_rate": 4.782557937122104e-06, "loss": 0.5019, "step": 8341 }, { "epoch": 0.53, "grad_norm": 1.099599838256836, "learning_rate": 4.781532918685731e-06, "loss": 0.4984, "step": 8342 }, { "epoch": 0.53, "grad_norm": 1.1724430322647095, "learning_rate": 4.780507909448246e-06, "loss": 0.5364, "step": 8343 }, { "epoch": 0.53, "grad_norm": 1.1686534881591797, "learning_rate": 4.77948290945281e-06, "loss": 0.507, "step": 8344 }, { "epoch": 0.53, "grad_norm": 1.1176148653030396, "learning_rate": 4.778457918742579e-06, "loss": 0.5317, "step": 8345 }, { "epoch": 0.53, "grad_norm": 1.0374079942703247, "learning_rate": 4.777432937360716e-06, "loss": 0.5507, "step": 8346 }, { "epoch": 0.53, "grad_norm": 1.043051838874817, "learning_rate": 4.776407965350378e-06, "loss": 0.5252, "step": 8347 }, { "epoch": 0.53, "grad_norm": 0.9879152774810791, "learning_rate": 4.775383002754723e-06, "loss": 0.5514, "step": 8348 }, { "epoch": 0.53, "grad_norm": 0.9836734533309937, "learning_rate": 4.7743580496169095e-06, "loss": 0.5587, "step": 8349 }, { "epoch": 0.53, "grad_norm": 1.044381856918335, "learning_rate": 4.773333105980091e-06, "loss": 0.5032, "step": 8350 }, { "epoch": 0.53, "grad_norm": 0.9817684292793274, "learning_rate": 4.772308171887427e-06, "loss": 0.4559, "step": 8351 }, { "epoch": 0.53, "grad_norm": 1.1289509534835815, "learning_rate": 4.771283247382076e-06, "loss": 0.5294, "step": 8352 }, { "epoch": 0.53, "grad_norm": 0.9817908406257629, "learning_rate": 4.770258332507191e-06, "loss": 0.4814, "step": 8353 }, { "epoch": 0.53, "grad_norm": 1.103776454925537, "learning_rate": 4.7692334273059265e-06, "loss": 0.5398, "step": 8354 }, { "epoch": 0.53, "grad_norm": 1.0110007524490356, "learning_rate": 4.768208531821441e-06, "loss": 0.4905, "step": 8355 }, { "epoch": 0.53, "grad_norm": 0.983505368232727, "learning_rate": 4.767183646096889e-06, "loss": 0.5312, "step": 8356 }, { "epoch": 0.53, "grad_norm": 1.0207186937332153, "learning_rate": 4.766158770175422e-06, "loss": 0.5324, "step": 8357 }, { "epoch": 0.53, "grad_norm": 1.0847700834274292, "learning_rate": 4.765133904100196e-06, "loss": 0.4881, "step": 8358 }, { "epoch": 0.53, "grad_norm": 1.1356765031814575, "learning_rate": 4.764109047914365e-06, "loss": 0.5199, "step": 8359 }, { "epoch": 0.53, "grad_norm": 1.091707706451416, "learning_rate": 4.763084201661081e-06, "loss": 0.5283, "step": 8360 }, { "epoch": 0.53, "grad_norm": 1.0814317464828491, "learning_rate": 4.762059365383497e-06, "loss": 0.5562, "step": 8361 }, { "epoch": 0.53, "grad_norm": 0.9616392850875854, "learning_rate": 4.761034539124765e-06, "loss": 0.4963, "step": 8362 }, { "epoch": 0.53, "grad_norm": 1.1386058330535889, "learning_rate": 4.760009722928038e-06, "loss": 0.5513, "step": 8363 }, { "epoch": 0.53, "grad_norm": 0.9828891158103943, "learning_rate": 4.7589849168364675e-06, "loss": 0.5039, "step": 8364 }, { "epoch": 0.53, "grad_norm": 1.0613303184509277, "learning_rate": 4.7579601208932015e-06, "loss": 0.5441, "step": 8365 }, { "epoch": 0.53, "grad_norm": 1.0633760690689087, "learning_rate": 4.756935335141395e-06, "loss": 0.5289, "step": 8366 }, { "epoch": 0.53, "grad_norm": 1.004191279411316, "learning_rate": 4.755910559624194e-06, "loss": 0.5025, "step": 8367 }, { "epoch": 0.53, "grad_norm": 1.016272783279419, "learning_rate": 4.754885794384752e-06, "loss": 0.5273, "step": 8368 }, { "epoch": 0.53, "grad_norm": 1.0552794933319092, "learning_rate": 4.7538610394662156e-06, "loss": 0.47, "step": 8369 }, { "epoch": 0.53, "grad_norm": 0.9942074418067932, "learning_rate": 4.7528362949117355e-06, "loss": 0.4947, "step": 8370 }, { "epoch": 0.53, "grad_norm": 1.042699933052063, "learning_rate": 4.751811560764457e-06, "loss": 0.4983, "step": 8371 }, { "epoch": 0.53, "grad_norm": 1.0090547800064087, "learning_rate": 4.750786837067532e-06, "loss": 0.5255, "step": 8372 }, { "epoch": 0.53, "grad_norm": 1.2265372276306152, "learning_rate": 4.7497621238641055e-06, "loss": 0.5495, "step": 8373 }, { "epoch": 0.53, "grad_norm": 1.0453957319259644, "learning_rate": 4.7487374211973266e-06, "loss": 0.5459, "step": 8374 }, { "epoch": 0.53, "grad_norm": 0.9630346298217773, "learning_rate": 4.747712729110339e-06, "loss": 0.4726, "step": 8375 }, { "epoch": 0.53, "grad_norm": 1.0352476835250854, "learning_rate": 4.746688047646293e-06, "loss": 0.497, "step": 8376 }, { "epoch": 0.53, "grad_norm": 1.0061421394348145, "learning_rate": 4.745663376848331e-06, "loss": 0.4905, "step": 8377 }, { "epoch": 0.53, "grad_norm": 1.0827962160110474, "learning_rate": 4.744638716759599e-06, "loss": 0.5524, "step": 8378 }, { "epoch": 0.53, "grad_norm": 0.9328921437263489, "learning_rate": 4.743614067423245e-06, "loss": 0.5545, "step": 8379 }, { "epoch": 0.53, "grad_norm": 0.996181309223175, "learning_rate": 4.742589428882406e-06, "loss": 0.4888, "step": 8380 }, { "epoch": 0.53, "grad_norm": 0.9578323364257812, "learning_rate": 4.7415648011802335e-06, "loss": 0.5488, "step": 8381 }, { "epoch": 0.53, "grad_norm": 1.0353081226348877, "learning_rate": 4.7405401843598686e-06, "loss": 0.5308, "step": 8382 }, { "epoch": 0.53, "grad_norm": 0.9680172204971313, "learning_rate": 4.739515578464454e-06, "loss": 0.4932, "step": 8383 }, { "epoch": 0.53, "grad_norm": 1.0518368482589722, "learning_rate": 4.73849098353713e-06, "loss": 0.5648, "step": 8384 }, { "epoch": 0.53, "grad_norm": 0.9996711611747742, "learning_rate": 4.737466399621043e-06, "loss": 0.5298, "step": 8385 }, { "epoch": 0.53, "grad_norm": 1.0345032215118408, "learning_rate": 4.736441826759332e-06, "loss": 0.4943, "step": 8386 }, { "epoch": 0.53, "grad_norm": 1.020096778869629, "learning_rate": 4.73541726499514e-06, "loss": 0.5162, "step": 8387 }, { "epoch": 0.53, "grad_norm": 1.0417054891586304, "learning_rate": 4.734392714371603e-06, "loss": 0.5666, "step": 8388 }, { "epoch": 0.53, "grad_norm": 1.0406177043914795, "learning_rate": 4.733368174931867e-06, "loss": 0.5087, "step": 8389 }, { "epoch": 0.53, "grad_norm": 1.0037623643875122, "learning_rate": 4.7323436467190705e-06, "loss": 0.488, "step": 8390 }, { "epoch": 0.53, "grad_norm": 1.1015545129776, "learning_rate": 4.7313191297763524e-06, "loss": 0.5243, "step": 8391 }, { "epoch": 0.53, "grad_norm": 0.9793073534965515, "learning_rate": 4.730294624146849e-06, "loss": 0.5298, "step": 8392 }, { "epoch": 0.53, "grad_norm": 1.1413781642913818, "learning_rate": 4.729270129873701e-06, "loss": 0.5088, "step": 8393 }, { "epoch": 0.53, "grad_norm": 1.040779709815979, "learning_rate": 4.728245647000047e-06, "loss": 0.512, "step": 8394 }, { "epoch": 0.53, "grad_norm": 0.9371123313903809, "learning_rate": 4.7272211755690245e-06, "loss": 0.5446, "step": 8395 }, { "epoch": 0.53, "grad_norm": 0.961765706539154, "learning_rate": 4.7261967156237676e-06, "loss": 0.502, "step": 8396 }, { "epoch": 0.53, "grad_norm": 0.990039050579071, "learning_rate": 4.725172267207413e-06, "loss": 0.4656, "step": 8397 }, { "epoch": 0.53, "grad_norm": 0.9950096607208252, "learning_rate": 4.724147830363101e-06, "loss": 0.5172, "step": 8398 }, { "epoch": 0.53, "grad_norm": 0.9665403366088867, "learning_rate": 4.723123405133965e-06, "loss": 0.5397, "step": 8399 }, { "epoch": 0.53, "grad_norm": 1.0354504585266113, "learning_rate": 4.722098991563137e-06, "loss": 0.5202, "step": 8400 }, { "epoch": 0.53, "grad_norm": 1.0049880743026733, "learning_rate": 4.721074589693753e-06, "loss": 0.5133, "step": 8401 }, { "epoch": 0.53, "grad_norm": 1.0299439430236816, "learning_rate": 4.72005019956895e-06, "loss": 0.5117, "step": 8402 }, { "epoch": 0.53, "grad_norm": 1.0537679195404053, "learning_rate": 4.719025821231859e-06, "loss": 0.5176, "step": 8403 }, { "epoch": 0.53, "grad_norm": 1.0134973526000977, "learning_rate": 4.718001454725612e-06, "loss": 0.4598, "step": 8404 }, { "epoch": 0.53, "grad_norm": 1.0414093732833862, "learning_rate": 4.716977100093342e-06, "loss": 0.5476, "step": 8405 }, { "epoch": 0.53, "grad_norm": 1.0167829990386963, "learning_rate": 4.715952757378183e-06, "loss": 0.51, "step": 8406 }, { "epoch": 0.53, "grad_norm": 1.0440165996551514, "learning_rate": 4.714928426623266e-06, "loss": 0.5288, "step": 8407 }, { "epoch": 0.53, "grad_norm": 1.0283722877502441, "learning_rate": 4.71390410787172e-06, "loss": 0.5408, "step": 8408 }, { "epoch": 0.53, "grad_norm": 1.045358657836914, "learning_rate": 4.712879801166676e-06, "loss": 0.5227, "step": 8409 }, { "epoch": 0.53, "grad_norm": 1.03081476688385, "learning_rate": 4.711855506551267e-06, "loss": 0.5446, "step": 8410 }, { "epoch": 0.53, "grad_norm": 1.0450913906097412, "learning_rate": 4.71083122406862e-06, "loss": 0.5193, "step": 8411 }, { "epoch": 0.53, "grad_norm": 1.0645527839660645, "learning_rate": 4.709806953761863e-06, "loss": 0.5577, "step": 8412 }, { "epoch": 0.53, "grad_norm": 0.9882965683937073, "learning_rate": 4.7087826956741266e-06, "loss": 0.532, "step": 8413 }, { "epoch": 0.53, "grad_norm": 1.0072165727615356, "learning_rate": 4.707758449848536e-06, "loss": 0.4521, "step": 8414 }, { "epoch": 0.53, "grad_norm": 1.1032558679580688, "learning_rate": 4.7067342163282225e-06, "loss": 0.5005, "step": 8415 }, { "epoch": 0.53, "grad_norm": 0.9673135876655579, "learning_rate": 4.70570999515631e-06, "loss": 0.5064, "step": 8416 }, { "epoch": 0.53, "grad_norm": 1.0546165704727173, "learning_rate": 4.704685786375927e-06, "loss": 0.5241, "step": 8417 }, { "epoch": 0.53, "grad_norm": 1.070737361907959, "learning_rate": 4.703661590030196e-06, "loss": 0.519, "step": 8418 }, { "epoch": 0.53, "grad_norm": 1.0162523984909058, "learning_rate": 4.702637406162247e-06, "loss": 0.5676, "step": 8419 }, { "epoch": 0.53, "grad_norm": 0.9657307863235474, "learning_rate": 4.7016132348152e-06, "loss": 0.4602, "step": 8420 }, { "epoch": 0.53, "grad_norm": 1.0089730024337769, "learning_rate": 4.700589076032184e-06, "loss": 0.5214, "step": 8421 }, { "epoch": 0.53, "grad_norm": 1.0143710374832153, "learning_rate": 4.699564929856318e-06, "loss": 0.5651, "step": 8422 }, { "epoch": 0.53, "grad_norm": 0.9930520057678223, "learning_rate": 4.698540796330729e-06, "loss": 0.5328, "step": 8423 }, { "epoch": 0.53, "grad_norm": 0.998888373374939, "learning_rate": 4.697516675498538e-06, "loss": 0.5252, "step": 8424 }, { "epoch": 0.53, "grad_norm": 1.0171403884887695, "learning_rate": 4.69649256740287e-06, "loss": 0.5095, "step": 8425 }, { "epoch": 0.53, "grad_norm": 1.1262316703796387, "learning_rate": 4.695468472086841e-06, "loss": 0.5916, "step": 8426 }, { "epoch": 0.53, "grad_norm": 1.0274018049240112, "learning_rate": 4.694444389593576e-06, "loss": 0.5097, "step": 8427 }, { "epoch": 0.53, "grad_norm": 1.0473886728286743, "learning_rate": 4.693420319966195e-06, "loss": 0.5476, "step": 8428 }, { "epoch": 0.53, "grad_norm": 0.9588348269462585, "learning_rate": 4.692396263247818e-06, "loss": 0.5464, "step": 8429 }, { "epoch": 0.53, "grad_norm": 0.990731418132782, "learning_rate": 4.691372219481564e-06, "loss": 0.5093, "step": 8430 }, { "epoch": 0.53, "grad_norm": 0.9880147576332092, "learning_rate": 4.690348188710552e-06, "loss": 0.5624, "step": 8431 }, { "epoch": 0.53, "grad_norm": 1.1084983348846436, "learning_rate": 4.689324170977901e-06, "loss": 0.5151, "step": 8432 }, { "epoch": 0.53, "grad_norm": 0.9567615985870361, "learning_rate": 4.688300166326729e-06, "loss": 0.5076, "step": 8433 }, { "epoch": 0.53, "grad_norm": 1.052372932434082, "learning_rate": 4.6872761748001515e-06, "loss": 0.519, "step": 8434 }, { "epoch": 0.53, "grad_norm": 1.0967961549758911, "learning_rate": 4.6862521964412865e-06, "loss": 0.5539, "step": 8435 }, { "epoch": 0.53, "grad_norm": 1.1107224225997925, "learning_rate": 4.6852282312932505e-06, "loss": 0.5518, "step": 8436 }, { "epoch": 0.53, "grad_norm": 1.0529141426086426, "learning_rate": 4.684204279399159e-06, "loss": 0.5703, "step": 8437 }, { "epoch": 0.53, "grad_norm": 1.0516138076782227, "learning_rate": 4.683180340802126e-06, "loss": 0.5452, "step": 8438 }, { "epoch": 0.53, "grad_norm": 1.0087449550628662, "learning_rate": 4.682156415545266e-06, "loss": 0.4957, "step": 8439 }, { "epoch": 0.53, "grad_norm": 1.019419550895691, "learning_rate": 4.681132503671696e-06, "loss": 0.511, "step": 8440 }, { "epoch": 0.53, "grad_norm": 1.1684471368789673, "learning_rate": 4.680108605224526e-06, "loss": 0.5277, "step": 8441 }, { "epoch": 0.53, "grad_norm": 1.0348973274230957, "learning_rate": 4.679084720246869e-06, "loss": 0.5267, "step": 8442 }, { "epoch": 0.53, "grad_norm": 0.9964972138404846, "learning_rate": 4.67806084878184e-06, "loss": 0.5104, "step": 8443 }, { "epoch": 0.53, "grad_norm": 0.9671248197555542, "learning_rate": 4.677036990872546e-06, "loss": 0.4965, "step": 8444 }, { "epoch": 0.54, "grad_norm": 1.0187405347824097, "learning_rate": 4.676013146562103e-06, "loss": 0.5092, "step": 8445 }, { "epoch": 0.54, "grad_norm": 1.2161133289337158, "learning_rate": 4.674989315893618e-06, "loss": 0.555, "step": 8446 }, { "epoch": 0.54, "grad_norm": 0.9527446627616882, "learning_rate": 4.6739654989102034e-06, "loss": 0.5235, "step": 8447 }, { "epoch": 0.54, "grad_norm": 1.0290900468826294, "learning_rate": 4.672941695654965e-06, "loss": 0.5317, "step": 8448 }, { "epoch": 0.54, "grad_norm": 0.9144114851951599, "learning_rate": 4.6719179061710164e-06, "loss": 0.4646, "step": 8449 }, { "epoch": 0.54, "grad_norm": 1.0445489883422852, "learning_rate": 4.670894130501462e-06, "loss": 0.4481, "step": 8450 }, { "epoch": 0.54, "grad_norm": 1.0031083822250366, "learning_rate": 4.669870368689414e-06, "loss": 0.5274, "step": 8451 }, { "epoch": 0.54, "grad_norm": 1.1422038078308105, "learning_rate": 4.668846620777972e-06, "loss": 0.5328, "step": 8452 }, { "epoch": 0.54, "grad_norm": 0.969397783279419, "learning_rate": 4.6678228868102495e-06, "loss": 0.54, "step": 8453 }, { "epoch": 0.54, "grad_norm": 0.9985876679420471, "learning_rate": 4.666799166829349e-06, "loss": 0.531, "step": 8454 }, { "epoch": 0.54, "grad_norm": 1.0527873039245605, "learning_rate": 4.665775460878377e-06, "loss": 0.5221, "step": 8455 }, { "epoch": 0.54, "grad_norm": 0.9826925992965698, "learning_rate": 4.664751769000436e-06, "loss": 0.4628, "step": 8456 }, { "epoch": 0.54, "grad_norm": 1.0141879320144653, "learning_rate": 4.663728091238634e-06, "loss": 0.5114, "step": 8457 }, { "epoch": 0.54, "grad_norm": 0.9625977277755737, "learning_rate": 4.662704427636071e-06, "loss": 0.4867, "step": 8458 }, { "epoch": 0.54, "grad_norm": 1.0554953813552856, "learning_rate": 4.661680778235852e-06, "loss": 0.5338, "step": 8459 }, { "epoch": 0.54, "grad_norm": 1.0381543636322021, "learning_rate": 4.660657143081079e-06, "loss": 0.5401, "step": 8460 }, { "epoch": 0.54, "grad_norm": 1.0382134914398193, "learning_rate": 4.65963352221485e-06, "loss": 0.5198, "step": 8461 }, { "epoch": 0.54, "grad_norm": 0.9821277856826782, "learning_rate": 4.658609915680272e-06, "loss": 0.4602, "step": 8462 }, { "epoch": 0.54, "grad_norm": 0.9753869771957397, "learning_rate": 4.657586323520443e-06, "loss": 0.4994, "step": 8463 }, { "epoch": 0.54, "grad_norm": 1.028468132019043, "learning_rate": 4.6565627457784625e-06, "loss": 0.5271, "step": 8464 }, { "epoch": 0.54, "grad_norm": 0.9985352158546448, "learning_rate": 4.655539182497428e-06, "loss": 0.5155, "step": 8465 }, { "epoch": 0.54, "grad_norm": 0.9296327233314514, "learning_rate": 4.654515633720442e-06, "loss": 0.4305, "step": 8466 }, { "epoch": 0.54, "grad_norm": 0.9554487466812134, "learning_rate": 4.653492099490601e-06, "loss": 0.4575, "step": 8467 }, { "epoch": 0.54, "grad_norm": 1.037149429321289, "learning_rate": 4.6524685798510025e-06, "loss": 0.5524, "step": 8468 }, { "epoch": 0.54, "grad_norm": 0.9794292449951172, "learning_rate": 4.651445074844742e-06, "loss": 0.5105, "step": 8469 }, { "epoch": 0.54, "grad_norm": 1.0618610382080078, "learning_rate": 4.650421584514917e-06, "loss": 0.5193, "step": 8470 }, { "epoch": 0.54, "grad_norm": 0.9309068918228149, "learning_rate": 4.649398108904624e-06, "loss": 0.4767, "step": 8471 }, { "epoch": 0.54, "grad_norm": 1.0112494230270386, "learning_rate": 4.648374648056957e-06, "loss": 0.5232, "step": 8472 }, { "epoch": 0.54, "grad_norm": 1.0387423038482666, "learning_rate": 4.64735120201501e-06, "loss": 0.5237, "step": 8473 }, { "epoch": 0.54, "grad_norm": 1.0283302068710327, "learning_rate": 4.646327770821875e-06, "loss": 0.5583, "step": 8474 }, { "epoch": 0.54, "grad_norm": 1.013685941696167, "learning_rate": 4.64530435452065e-06, "loss": 0.5209, "step": 8475 }, { "epoch": 0.54, "grad_norm": 1.009364128112793, "learning_rate": 4.644280953154424e-06, "loss": 0.5592, "step": 8476 }, { "epoch": 0.54, "grad_norm": 1.0646246671676636, "learning_rate": 4.643257566766289e-06, "loss": 0.5199, "step": 8477 }, { "epoch": 0.54, "grad_norm": 0.9765976071357727, "learning_rate": 4.642234195399336e-06, "loss": 0.5128, "step": 8478 }, { "epoch": 0.54, "grad_norm": 0.9888172149658203, "learning_rate": 4.641210839096659e-06, "loss": 0.5092, "step": 8479 }, { "epoch": 0.54, "grad_norm": 1.0132274627685547, "learning_rate": 4.6401874979013455e-06, "loss": 0.5179, "step": 8480 }, { "epoch": 0.54, "grad_norm": 1.0327469110488892, "learning_rate": 4.639164171856483e-06, "loss": 0.5518, "step": 8481 }, { "epoch": 0.54, "grad_norm": 1.0490937232971191, "learning_rate": 4.6381408610051605e-06, "loss": 0.5321, "step": 8482 }, { "epoch": 0.54, "grad_norm": 1.0915193557739258, "learning_rate": 4.63711756539047e-06, "loss": 0.5386, "step": 8483 }, { "epoch": 0.54, "grad_norm": 0.9249290823936462, "learning_rate": 4.636094285055497e-06, "loss": 0.4814, "step": 8484 }, { "epoch": 0.54, "grad_norm": 0.9355958700180054, "learning_rate": 4.635071020043326e-06, "loss": 0.4708, "step": 8485 }, { "epoch": 0.54, "grad_norm": 1.037896752357483, "learning_rate": 4.634047770397044e-06, "loss": 0.5087, "step": 8486 }, { "epoch": 0.54, "grad_norm": 1.0038915872573853, "learning_rate": 4.633024536159739e-06, "loss": 0.4977, "step": 8487 }, { "epoch": 0.54, "grad_norm": 1.0237152576446533, "learning_rate": 4.632001317374495e-06, "loss": 0.5282, "step": 8488 }, { "epoch": 0.54, "grad_norm": 0.994515597820282, "learning_rate": 4.630978114084394e-06, "loss": 0.4939, "step": 8489 }, { "epoch": 0.54, "grad_norm": 1.0709589719772339, "learning_rate": 4.629954926332522e-06, "loss": 0.5543, "step": 8490 }, { "epoch": 0.54, "grad_norm": 1.0311912298202515, "learning_rate": 4.628931754161959e-06, "loss": 0.5189, "step": 8491 }, { "epoch": 0.54, "grad_norm": 0.9902054071426392, "learning_rate": 4.62790859761579e-06, "loss": 0.5191, "step": 8492 }, { "epoch": 0.54, "grad_norm": 1.0637824535369873, "learning_rate": 4.626885456737095e-06, "loss": 0.5301, "step": 8493 }, { "epoch": 0.54, "grad_norm": 1.0846922397613525, "learning_rate": 4.625862331568957e-06, "loss": 0.5169, "step": 8494 }, { "epoch": 0.54, "grad_norm": 1.1294918060302734, "learning_rate": 4.624839222154453e-06, "loss": 0.5251, "step": 8495 }, { "epoch": 0.54, "grad_norm": 1.0639559030532837, "learning_rate": 4.623816128536665e-06, "loss": 0.5209, "step": 8496 }, { "epoch": 0.54, "grad_norm": 1.0102713108062744, "learning_rate": 4.6227930507586705e-06, "loss": 0.4967, "step": 8497 }, { "epoch": 0.54, "grad_norm": 1.1638081073760986, "learning_rate": 4.62176998886355e-06, "loss": 0.5388, "step": 8498 }, { "epoch": 0.54, "grad_norm": 0.9671724438667297, "learning_rate": 4.620746942894377e-06, "loss": 0.5346, "step": 8499 }, { "epoch": 0.54, "grad_norm": 1.0830614566802979, "learning_rate": 4.619723912894232e-06, "loss": 0.521, "step": 8500 }, { "epoch": 0.54, "grad_norm": 1.0154008865356445, "learning_rate": 4.618700898906191e-06, "loss": 0.4904, "step": 8501 }, { "epoch": 0.54, "grad_norm": 0.967204213142395, "learning_rate": 4.6176779009733295e-06, "loss": 0.477, "step": 8502 }, { "epoch": 0.54, "grad_norm": 0.9524750709533691, "learning_rate": 4.616654919138719e-06, "loss": 0.4654, "step": 8503 }, { "epoch": 0.54, "grad_norm": 1.08669114112854, "learning_rate": 4.6156319534454365e-06, "loss": 0.5724, "step": 8504 }, { "epoch": 0.54, "grad_norm": 1.047778844833374, "learning_rate": 4.614609003936558e-06, "loss": 0.5093, "step": 8505 }, { "epoch": 0.54, "grad_norm": 0.9351442456245422, "learning_rate": 4.613586070655152e-06, "loss": 0.5105, "step": 8506 }, { "epoch": 0.54, "grad_norm": 1.0905542373657227, "learning_rate": 4.612563153644292e-06, "loss": 0.5206, "step": 8507 }, { "epoch": 0.54, "grad_norm": 0.9523585438728333, "learning_rate": 4.6115402529470495e-06, "loss": 0.5325, "step": 8508 }, { "epoch": 0.54, "grad_norm": 1.0420037508010864, "learning_rate": 4.610517368606497e-06, "loss": 0.5033, "step": 8509 }, { "epoch": 0.54, "grad_norm": 1.0466331243515015, "learning_rate": 4.609494500665703e-06, "loss": 0.5121, "step": 8510 }, { "epoch": 0.54, "grad_norm": 1.1029798984527588, "learning_rate": 4.608471649167737e-06, "loss": 0.5173, "step": 8511 }, { "epoch": 0.54, "grad_norm": 1.0008111000061035, "learning_rate": 4.6074488141556656e-06, "loss": 0.5627, "step": 8512 }, { "epoch": 0.54, "grad_norm": 0.9665666818618774, "learning_rate": 4.606425995672562e-06, "loss": 0.5089, "step": 8513 }, { "epoch": 0.54, "grad_norm": 0.9968435764312744, "learning_rate": 4.605403193761489e-06, "loss": 0.5333, "step": 8514 }, { "epoch": 0.54, "grad_norm": 0.9083412885665894, "learning_rate": 4.604380408465516e-06, "loss": 0.4867, "step": 8515 }, { "epoch": 0.54, "grad_norm": 1.011513352394104, "learning_rate": 4.603357639827705e-06, "loss": 0.545, "step": 8516 }, { "epoch": 0.54, "grad_norm": 1.0549994707107544, "learning_rate": 4.602334887891127e-06, "loss": 0.5561, "step": 8517 }, { "epoch": 0.54, "grad_norm": 1.101017951965332, "learning_rate": 4.601312152698843e-06, "loss": 0.5535, "step": 8518 }, { "epoch": 0.54, "grad_norm": 1.0652989149093628, "learning_rate": 4.600289434293917e-06, "loss": 0.5307, "step": 8519 }, { "epoch": 0.54, "grad_norm": 0.9657049179077148, "learning_rate": 4.599266732719413e-06, "loss": 0.5296, "step": 8520 }, { "epoch": 0.54, "grad_norm": 0.9676050543785095, "learning_rate": 4.598244048018391e-06, "loss": 0.5069, "step": 8521 }, { "epoch": 0.54, "grad_norm": 1.0080175399780273, "learning_rate": 4.5972213802339165e-06, "loss": 0.5375, "step": 8522 }, { "epoch": 0.54, "grad_norm": 1.0194203853607178, "learning_rate": 4.596198729409047e-06, "loss": 0.5634, "step": 8523 }, { "epoch": 0.54, "grad_norm": 1.2580246925354004, "learning_rate": 4.5951760955868455e-06, "loss": 0.535, "step": 8524 }, { "epoch": 0.54, "grad_norm": 1.0555814504623413, "learning_rate": 4.594153478810368e-06, "loss": 0.5286, "step": 8525 }, { "epoch": 0.54, "grad_norm": 1.0560455322265625, "learning_rate": 4.593130879122678e-06, "loss": 0.4989, "step": 8526 }, { "epoch": 0.54, "grad_norm": 1.077176809310913, "learning_rate": 4.59210829656683e-06, "loss": 0.5223, "step": 8527 }, { "epoch": 0.54, "grad_norm": 0.9897431135177612, "learning_rate": 4.591085731185885e-06, "loss": 0.487, "step": 8528 }, { "epoch": 0.54, "grad_norm": 1.020951271057129, "learning_rate": 4.590063183022894e-06, "loss": 0.5112, "step": 8529 }, { "epoch": 0.54, "grad_norm": 0.9736301302909851, "learning_rate": 4.589040652120919e-06, "loss": 0.521, "step": 8530 }, { "epoch": 0.54, "grad_norm": 1.1638489961624146, "learning_rate": 4.588018138523011e-06, "loss": 0.5338, "step": 8531 }, { "epoch": 0.54, "grad_norm": 1.0394287109375, "learning_rate": 4.5869956422722274e-06, "loss": 0.5025, "step": 8532 }, { "epoch": 0.54, "grad_norm": 1.0351699590682983, "learning_rate": 4.585973163411618e-06, "loss": 0.5207, "step": 8533 }, { "epoch": 0.54, "grad_norm": 0.9668200612068176, "learning_rate": 4.584950701984241e-06, "loss": 0.4933, "step": 8534 }, { "epoch": 0.54, "grad_norm": 0.9660054445266724, "learning_rate": 4.583928258033145e-06, "loss": 0.5076, "step": 8535 }, { "epoch": 0.54, "grad_norm": 1.0498343706130981, "learning_rate": 4.5829058316013835e-06, "loss": 0.4908, "step": 8536 }, { "epoch": 0.54, "grad_norm": 1.191595196723938, "learning_rate": 4.581883422732007e-06, "loss": 0.5428, "step": 8537 }, { "epoch": 0.54, "grad_norm": 1.024022102355957, "learning_rate": 4.580861031468062e-06, "loss": 0.4978, "step": 8538 }, { "epoch": 0.54, "grad_norm": 0.9271550178527832, "learning_rate": 4.579838657852603e-06, "loss": 0.5192, "step": 8539 }, { "epoch": 0.54, "grad_norm": 0.9976838827133179, "learning_rate": 4.578816301928677e-06, "loss": 0.5176, "step": 8540 }, { "epoch": 0.54, "grad_norm": 1.048948049545288, "learning_rate": 4.577793963739331e-06, "loss": 0.5165, "step": 8541 }, { "epoch": 0.54, "grad_norm": 0.9421103000640869, "learning_rate": 4.576771643327611e-06, "loss": 0.4898, "step": 8542 }, { "epoch": 0.54, "grad_norm": 0.9716400504112244, "learning_rate": 4.575749340736565e-06, "loss": 0.5073, "step": 8543 }, { "epoch": 0.54, "grad_norm": 1.0311450958251953, "learning_rate": 4.57472705600924e-06, "loss": 0.4947, "step": 8544 }, { "epoch": 0.54, "grad_norm": 1.0700623989105225, "learning_rate": 4.573704789188679e-06, "loss": 0.5679, "step": 8545 }, { "epoch": 0.54, "grad_norm": 0.9896937608718872, "learning_rate": 4.5726825403179245e-06, "loss": 0.5628, "step": 8546 }, { "epoch": 0.54, "grad_norm": 0.929032027721405, "learning_rate": 4.571660309440022e-06, "loss": 0.4865, "step": 8547 }, { "epoch": 0.54, "grad_norm": 1.082908034324646, "learning_rate": 4.570638096598016e-06, "loss": 0.5504, "step": 8548 }, { "epoch": 0.54, "grad_norm": 1.0592923164367676, "learning_rate": 4.569615901834946e-06, "loss": 0.5115, "step": 8549 }, { "epoch": 0.54, "grad_norm": 1.1382687091827393, "learning_rate": 4.568593725193852e-06, "loss": 0.5198, "step": 8550 }, { "epoch": 0.54, "grad_norm": 0.9509519338607788, "learning_rate": 4.567571566717774e-06, "loss": 0.5091, "step": 8551 }, { "epoch": 0.54, "grad_norm": 0.9528672099113464, "learning_rate": 4.566549426449755e-06, "loss": 0.477, "step": 8552 }, { "epoch": 0.54, "grad_norm": 1.0432177782058716, "learning_rate": 4.565527304432833e-06, "loss": 0.5301, "step": 8553 }, { "epoch": 0.54, "grad_norm": 0.9746152758598328, "learning_rate": 4.564505200710042e-06, "loss": 0.4672, "step": 8554 }, { "epoch": 0.54, "grad_norm": 0.9885959029197693, "learning_rate": 4.5634831153244215e-06, "loss": 0.5086, "step": 8555 }, { "epoch": 0.54, "grad_norm": 1.022283911705017, "learning_rate": 4.562461048319011e-06, "loss": 0.4954, "step": 8556 }, { "epoch": 0.54, "grad_norm": 1.1526809930801392, "learning_rate": 4.561438999736844e-06, "loss": 0.5885, "step": 8557 }, { "epoch": 0.54, "grad_norm": 1.0040638446807861, "learning_rate": 4.5604169696209535e-06, "loss": 0.5376, "step": 8558 }, { "epoch": 0.54, "grad_norm": 0.9949778318405151, "learning_rate": 4.559394958014375e-06, "loss": 0.5441, "step": 8559 }, { "epoch": 0.54, "grad_norm": 1.0233004093170166, "learning_rate": 4.558372964960142e-06, "loss": 0.5206, "step": 8560 }, { "epoch": 0.54, "grad_norm": 0.9786447882652283, "learning_rate": 4.557350990501288e-06, "loss": 0.4969, "step": 8561 }, { "epoch": 0.54, "grad_norm": 1.03334641456604, "learning_rate": 4.556329034680845e-06, "loss": 0.5064, "step": 8562 }, { "epoch": 0.54, "grad_norm": 1.0475358963012695, "learning_rate": 4.55530709754184e-06, "loss": 0.536, "step": 8563 }, { "epoch": 0.54, "grad_norm": 1.0991499423980713, "learning_rate": 4.5542851791273085e-06, "loss": 0.5446, "step": 8564 }, { "epoch": 0.54, "grad_norm": 0.9369568228721619, "learning_rate": 4.5532632794802766e-06, "loss": 0.4684, "step": 8565 }, { "epoch": 0.54, "grad_norm": 0.9981191754341125, "learning_rate": 4.5522413986437745e-06, "loss": 0.5202, "step": 8566 }, { "epoch": 0.54, "grad_norm": 1.1952184438705444, "learning_rate": 4.55121953666083e-06, "loss": 0.503, "step": 8567 }, { "epoch": 0.54, "grad_norm": 1.0004924535751343, "learning_rate": 4.550197693574468e-06, "loss": 0.5004, "step": 8568 }, { "epoch": 0.54, "grad_norm": 1.096377968788147, "learning_rate": 4.549175869427717e-06, "loss": 0.5176, "step": 8569 }, { "epoch": 0.54, "grad_norm": 0.9786026477813721, "learning_rate": 4.548154064263603e-06, "loss": 0.4952, "step": 8570 }, { "epoch": 0.54, "grad_norm": 1.1313389539718628, "learning_rate": 4.547132278125149e-06, "loss": 0.5221, "step": 8571 }, { "epoch": 0.54, "grad_norm": 1.0534061193466187, "learning_rate": 4.546110511055377e-06, "loss": 0.4931, "step": 8572 }, { "epoch": 0.54, "grad_norm": 1.0496286153793335, "learning_rate": 4.545088763097314e-06, "loss": 0.5005, "step": 8573 }, { "epoch": 0.54, "grad_norm": 1.0372889041900635, "learning_rate": 4.544067034293982e-06, "loss": 0.5127, "step": 8574 }, { "epoch": 0.54, "grad_norm": 1.0367099046707153, "learning_rate": 4.543045324688401e-06, "loss": 0.4881, "step": 8575 }, { "epoch": 0.54, "grad_norm": 1.008887529373169, "learning_rate": 4.542023634323589e-06, "loss": 0.5153, "step": 8576 }, { "epoch": 0.54, "grad_norm": 1.0606989860534668, "learning_rate": 4.54100196324257e-06, "loss": 0.5216, "step": 8577 }, { "epoch": 0.54, "grad_norm": 1.0111998319625854, "learning_rate": 4.539980311488363e-06, "loss": 0.4682, "step": 8578 }, { "epoch": 0.54, "grad_norm": 0.9441483616828918, "learning_rate": 4.538958679103984e-06, "loss": 0.4991, "step": 8579 }, { "epoch": 0.54, "grad_norm": 0.9652086496353149, "learning_rate": 4.5379370661324495e-06, "loss": 0.4923, "step": 8580 }, { "epoch": 0.54, "grad_norm": 1.0206347703933716, "learning_rate": 4.536915472616779e-06, "loss": 0.5068, "step": 8581 }, { "epoch": 0.54, "grad_norm": 1.0516248941421509, "learning_rate": 4.535893898599988e-06, "loss": 0.4905, "step": 8582 }, { "epoch": 0.54, "grad_norm": 1.0783042907714844, "learning_rate": 4.53487234412509e-06, "loss": 0.5042, "step": 8583 }, { "epoch": 0.54, "grad_norm": 1.0359718799591064, "learning_rate": 4.533850809235099e-06, "loss": 0.5514, "step": 8584 }, { "epoch": 0.54, "grad_norm": 1.06660795211792, "learning_rate": 4.532829293973028e-06, "loss": 0.5476, "step": 8585 }, { "epoch": 0.54, "grad_norm": 1.0299817323684692, "learning_rate": 4.531807798381892e-06, "loss": 0.5157, "step": 8586 }, { "epoch": 0.54, "grad_norm": 0.9901638627052307, "learning_rate": 4.5307863225047e-06, "loss": 0.4882, "step": 8587 }, { "epoch": 0.54, "grad_norm": 1.1186354160308838, "learning_rate": 4.529764866384464e-06, "loss": 0.5465, "step": 8588 }, { "epoch": 0.54, "grad_norm": 0.9637424349784851, "learning_rate": 4.528743430064192e-06, "loss": 0.4752, "step": 8589 }, { "epoch": 0.54, "grad_norm": 1.017280101776123, "learning_rate": 4.527722013586897e-06, "loss": 0.5137, "step": 8590 }, { "epoch": 0.54, "grad_norm": 1.065182089805603, "learning_rate": 4.5267006169955855e-06, "loss": 0.5726, "step": 8591 }, { "epoch": 0.54, "grad_norm": 1.0572706460952759, "learning_rate": 4.525679240333262e-06, "loss": 0.5353, "step": 8592 }, { "epoch": 0.54, "grad_norm": 1.0447081327438354, "learning_rate": 4.524657883642936e-06, "loss": 0.5573, "step": 8593 }, { "epoch": 0.54, "grad_norm": 0.9903708100318909, "learning_rate": 4.5236365469676144e-06, "loss": 0.4939, "step": 8594 }, { "epoch": 0.54, "grad_norm": 0.9923166632652283, "learning_rate": 4.522615230350302e-06, "loss": 0.5644, "step": 8595 }, { "epoch": 0.54, "grad_norm": 0.9883496761322021, "learning_rate": 4.521593933833998e-06, "loss": 0.4849, "step": 8596 }, { "epoch": 0.54, "grad_norm": 1.0408037900924683, "learning_rate": 4.520572657461712e-06, "loss": 0.488, "step": 8597 }, { "epoch": 0.54, "grad_norm": 1.059937596321106, "learning_rate": 4.519551401276441e-06, "loss": 0.5129, "step": 8598 }, { "epoch": 0.54, "grad_norm": 0.9665573239326477, "learning_rate": 4.518530165321192e-06, "loss": 0.5119, "step": 8599 }, { "epoch": 0.54, "grad_norm": 1.0387489795684814, "learning_rate": 4.517508949638961e-06, "loss": 0.525, "step": 8600 }, { "epoch": 0.54, "grad_norm": 0.9976455569267273, "learning_rate": 4.516487754272751e-06, "loss": 0.5312, "step": 8601 }, { "epoch": 0.54, "grad_norm": 1.044742226600647, "learning_rate": 4.515466579265557e-06, "loss": 0.5311, "step": 8602 }, { "epoch": 0.55, "grad_norm": 1.0177574157714844, "learning_rate": 4.5144454246603816e-06, "loss": 0.5094, "step": 8603 }, { "epoch": 0.55, "grad_norm": 1.0687826871871948, "learning_rate": 4.51342429050022e-06, "loss": 0.5238, "step": 8604 }, { "epoch": 0.55, "grad_norm": 1.0799012184143066, "learning_rate": 4.51240317682807e-06, "loss": 0.5797, "step": 8605 }, { "epoch": 0.55, "grad_norm": 0.9996159672737122, "learning_rate": 4.5113820836869234e-06, "loss": 0.5035, "step": 8606 }, { "epoch": 0.55, "grad_norm": 1.1689174175262451, "learning_rate": 4.51036101111978e-06, "loss": 0.5899, "step": 8607 }, { "epoch": 0.55, "grad_norm": 0.946363091468811, "learning_rate": 4.509339959169629e-06, "loss": 0.4894, "step": 8608 }, { "epoch": 0.55, "grad_norm": 1.1414538621902466, "learning_rate": 4.508318927879468e-06, "loss": 0.6091, "step": 8609 }, { "epoch": 0.55, "grad_norm": 1.0159684419631958, "learning_rate": 4.507297917292284e-06, "loss": 0.4701, "step": 8610 }, { "epoch": 0.55, "grad_norm": 0.9742702841758728, "learning_rate": 4.506276927451072e-06, "loss": 0.4889, "step": 8611 }, { "epoch": 0.55, "grad_norm": 1.0368071794509888, "learning_rate": 4.505255958398821e-06, "loss": 0.5436, "step": 8612 }, { "epoch": 0.55, "grad_norm": 1.145423173904419, "learning_rate": 4.504235010178521e-06, "loss": 0.518, "step": 8613 }, { "epoch": 0.55, "grad_norm": 1.131622314453125, "learning_rate": 4.503214082833161e-06, "loss": 0.5244, "step": 8614 }, { "epoch": 0.55, "grad_norm": 1.0988490581512451, "learning_rate": 4.502193176405724e-06, "loss": 0.5126, "step": 8615 }, { "epoch": 0.55, "grad_norm": 1.0306994915008545, "learning_rate": 4.501172290939203e-06, "loss": 0.4994, "step": 8616 }, { "epoch": 0.55, "grad_norm": 1.0991085767745972, "learning_rate": 4.5001514264765826e-06, "loss": 0.5116, "step": 8617 }, { "epoch": 0.55, "grad_norm": 1.0253667831420898, "learning_rate": 4.499130583060845e-06, "loss": 0.4981, "step": 8618 }, { "epoch": 0.55, "grad_norm": 0.9436437487602234, "learning_rate": 4.4981097607349764e-06, "loss": 0.5227, "step": 8619 }, { "epoch": 0.55, "grad_norm": 1.0218784809112549, "learning_rate": 4.49708895954196e-06, "loss": 0.4794, "step": 8620 }, { "epoch": 0.55, "grad_norm": 1.0793664455413818, "learning_rate": 4.496068179524778e-06, "loss": 0.5312, "step": 8621 }, { "epoch": 0.55, "grad_norm": 0.9964872598648071, "learning_rate": 4.495047420726412e-06, "loss": 0.5461, "step": 8622 }, { "epoch": 0.55, "grad_norm": 1.032078742980957, "learning_rate": 4.494026683189843e-06, "loss": 0.5549, "step": 8623 }, { "epoch": 0.55, "grad_norm": 1.1988216638565063, "learning_rate": 4.493005966958049e-06, "loss": 0.5169, "step": 8624 }, { "epoch": 0.55, "grad_norm": 0.9762097001075745, "learning_rate": 4.4919852720740115e-06, "loss": 0.4873, "step": 8625 }, { "epoch": 0.55, "grad_norm": 1.0649491548538208, "learning_rate": 4.490964598580706e-06, "loss": 0.5566, "step": 8626 }, { "epoch": 0.55, "grad_norm": 1.026347279548645, "learning_rate": 4.489943946521111e-06, "loss": 0.512, "step": 8627 }, { "epoch": 0.55, "grad_norm": 1.117616057395935, "learning_rate": 4.4889233159382e-06, "loss": 0.5275, "step": 8628 }, { "epoch": 0.55, "grad_norm": 1.0276168584823608, "learning_rate": 4.487902706874954e-06, "loss": 0.5575, "step": 8629 }, { "epoch": 0.55, "grad_norm": 1.2161848545074463, "learning_rate": 4.486882119374341e-06, "loss": 0.5479, "step": 8630 }, { "epoch": 0.55, "grad_norm": 1.1287305355072021, "learning_rate": 4.485861553479338e-06, "loss": 0.5908, "step": 8631 }, { "epoch": 0.55, "grad_norm": 1.1614528894424438, "learning_rate": 4.484841009232914e-06, "loss": 0.5418, "step": 8632 }, { "epoch": 0.55, "grad_norm": 1.0197196006774902, "learning_rate": 4.483820486678047e-06, "loss": 0.5131, "step": 8633 }, { "epoch": 0.55, "grad_norm": 1.054356575012207, "learning_rate": 4.482799985857701e-06, "loss": 0.5642, "step": 8634 }, { "epoch": 0.55, "grad_norm": 1.007958173751831, "learning_rate": 4.48177950681485e-06, "loss": 0.5021, "step": 8635 }, { "epoch": 0.55, "grad_norm": 1.0405101776123047, "learning_rate": 4.480759049592458e-06, "loss": 0.5466, "step": 8636 }, { "epoch": 0.55, "grad_norm": 1.0778744220733643, "learning_rate": 4.4797386142335e-06, "loss": 0.5221, "step": 8637 }, { "epoch": 0.55, "grad_norm": 1.1062657833099365, "learning_rate": 4.478718200780936e-06, "loss": 0.5462, "step": 8638 }, { "epoch": 0.55, "grad_norm": 0.9767703413963318, "learning_rate": 4.477697809277738e-06, "loss": 0.4649, "step": 8639 }, { "epoch": 0.55, "grad_norm": 1.03668212890625, "learning_rate": 4.476677439766865e-06, "loss": 0.5115, "step": 8640 }, { "epoch": 0.55, "grad_norm": 1.1296067237854004, "learning_rate": 4.475657092291287e-06, "loss": 0.5405, "step": 8641 }, { "epoch": 0.55, "grad_norm": 1.1303768157958984, "learning_rate": 4.4746367668939646e-06, "loss": 0.5346, "step": 8642 }, { "epoch": 0.55, "grad_norm": 0.9903587698936462, "learning_rate": 4.4736164636178605e-06, "loss": 0.4757, "step": 8643 }, { "epoch": 0.55, "grad_norm": 1.0310946702957153, "learning_rate": 4.472596182505936e-06, "loss": 0.5133, "step": 8644 }, { "epoch": 0.55, "grad_norm": 1.1160991191864014, "learning_rate": 4.47157592360115e-06, "loss": 0.5269, "step": 8645 }, { "epoch": 0.55, "grad_norm": 1.086572289466858, "learning_rate": 4.470555686946464e-06, "loss": 0.4952, "step": 8646 }, { "epoch": 0.55, "grad_norm": 1.0365424156188965, "learning_rate": 4.469535472584837e-06, "loss": 0.5276, "step": 8647 }, { "epoch": 0.55, "grad_norm": 1.0874872207641602, "learning_rate": 4.468515280559227e-06, "loss": 0.5314, "step": 8648 }, { "epoch": 0.55, "grad_norm": 1.0258994102478027, "learning_rate": 4.467495110912587e-06, "loss": 0.551, "step": 8649 }, { "epoch": 0.55, "grad_norm": 0.9841521382331848, "learning_rate": 4.466474963687876e-06, "loss": 0.5573, "step": 8650 }, { "epoch": 0.55, "grad_norm": 0.9820963144302368, "learning_rate": 4.46545483892805e-06, "loss": 0.5394, "step": 8651 }, { "epoch": 0.55, "grad_norm": 1.1955668926239014, "learning_rate": 4.464434736676061e-06, "loss": 0.5036, "step": 8652 }, { "epoch": 0.55, "grad_norm": 1.0836960077285767, "learning_rate": 4.46341465697486e-06, "loss": 0.5425, "step": 8653 }, { "epoch": 0.55, "grad_norm": 1.0003873109817505, "learning_rate": 4.462394599867402e-06, "loss": 0.4825, "step": 8654 }, { "epoch": 0.55, "grad_norm": 1.045983910560608, "learning_rate": 4.461374565396638e-06, "loss": 0.4935, "step": 8655 }, { "epoch": 0.55, "grad_norm": 1.0779129266738892, "learning_rate": 4.460354553605518e-06, "loss": 0.5392, "step": 8656 }, { "epoch": 0.55, "grad_norm": 1.1067196130752563, "learning_rate": 4.459334564536988e-06, "loss": 0.5776, "step": 8657 }, { "epoch": 0.55, "grad_norm": 1.0516122579574585, "learning_rate": 4.458314598234e-06, "loss": 0.515, "step": 8658 }, { "epoch": 0.55, "grad_norm": 0.964877188205719, "learning_rate": 4.4572946547395e-06, "loss": 0.4937, "step": 8659 }, { "epoch": 0.55, "grad_norm": 0.990656316280365, "learning_rate": 4.456274734096436e-06, "loss": 0.5204, "step": 8660 }, { "epoch": 0.55, "grad_norm": 1.071433424949646, "learning_rate": 4.455254836347749e-06, "loss": 0.5563, "step": 8661 }, { "epoch": 0.55, "grad_norm": 0.9665083289146423, "learning_rate": 4.454234961536384e-06, "loss": 0.525, "step": 8662 }, { "epoch": 0.55, "grad_norm": 0.9684353470802307, "learning_rate": 4.45321510970529e-06, "loss": 0.5188, "step": 8663 }, { "epoch": 0.55, "grad_norm": 1.012878656387329, "learning_rate": 4.452195280897405e-06, "loss": 0.5086, "step": 8664 }, { "epoch": 0.55, "grad_norm": 1.006637454032898, "learning_rate": 4.451175475155669e-06, "loss": 0.4902, "step": 8665 }, { "epoch": 0.55, "grad_norm": 1.0858285427093506, "learning_rate": 4.450155692523025e-06, "loss": 0.5124, "step": 8666 }, { "epoch": 0.55, "grad_norm": 1.0405857563018799, "learning_rate": 4.449135933042414e-06, "loss": 0.5258, "step": 8667 }, { "epoch": 0.55, "grad_norm": 1.1059043407440186, "learning_rate": 4.448116196756771e-06, "loss": 0.5433, "step": 8668 }, { "epoch": 0.55, "grad_norm": 0.9712663292884827, "learning_rate": 4.447096483709035e-06, "loss": 0.4664, "step": 8669 }, { "epoch": 0.55, "grad_norm": 1.0021990537643433, "learning_rate": 4.4460767939421425e-06, "loss": 0.525, "step": 8670 }, { "epoch": 0.55, "grad_norm": 1.0234445333480835, "learning_rate": 4.44505712749903e-06, "loss": 0.4958, "step": 8671 }, { "epoch": 0.55, "grad_norm": 1.0537341833114624, "learning_rate": 4.444037484422632e-06, "loss": 0.5475, "step": 8672 }, { "epoch": 0.55, "grad_norm": 0.962066650390625, "learning_rate": 4.44301786475588e-06, "loss": 0.4978, "step": 8673 }, { "epoch": 0.55, "grad_norm": 1.0533349514007568, "learning_rate": 4.441998268541708e-06, "loss": 0.502, "step": 8674 }, { "epoch": 0.55, "grad_norm": 1.0353072881698608, "learning_rate": 4.440978695823049e-06, "loss": 0.5081, "step": 8675 }, { "epoch": 0.55, "grad_norm": 1.0396310091018677, "learning_rate": 4.439959146642833e-06, "loss": 0.5245, "step": 8676 }, { "epoch": 0.55, "grad_norm": 1.0958211421966553, "learning_rate": 4.4389396210439886e-06, "loss": 0.5164, "step": 8677 }, { "epoch": 0.55, "grad_norm": 1.0075626373291016, "learning_rate": 4.437920119069445e-06, "loss": 0.5302, "step": 8678 }, { "epoch": 0.55, "grad_norm": 1.1665135622024536, "learning_rate": 4.436900640762128e-06, "loss": 0.5332, "step": 8679 }, { "epoch": 0.55, "grad_norm": 1.0166640281677246, "learning_rate": 4.435881186164968e-06, "loss": 0.5204, "step": 8680 }, { "epoch": 0.55, "grad_norm": 1.0299077033996582, "learning_rate": 4.434861755320888e-06, "loss": 0.5005, "step": 8681 }, { "epoch": 0.55, "grad_norm": 0.9902662038803101, "learning_rate": 4.433842348272815e-06, "loss": 0.5081, "step": 8682 }, { "epoch": 0.55, "grad_norm": 1.0685404539108276, "learning_rate": 4.4328229650636676e-06, "loss": 0.5507, "step": 8683 }, { "epoch": 0.55, "grad_norm": 1.0103635787963867, "learning_rate": 4.431803605736376e-06, "loss": 0.4898, "step": 8684 }, { "epoch": 0.55, "grad_norm": 1.0012294054031372, "learning_rate": 4.430784270333855e-06, "loss": 0.5491, "step": 8685 }, { "epoch": 0.55, "grad_norm": 0.9294708371162415, "learning_rate": 4.429764958899031e-06, "loss": 0.4647, "step": 8686 }, { "epoch": 0.55, "grad_norm": 0.9999809861183167, "learning_rate": 4.428745671474818e-06, "loss": 0.5089, "step": 8687 }, { "epoch": 0.55, "grad_norm": 1.1061701774597168, "learning_rate": 4.427726408104139e-06, "loss": 0.5441, "step": 8688 }, { "epoch": 0.55, "grad_norm": 1.0385311841964722, "learning_rate": 4.42670716882991e-06, "loss": 0.5386, "step": 8689 }, { "epoch": 0.55, "grad_norm": 1.0141210556030273, "learning_rate": 4.4256879536950495e-06, "loss": 0.511, "step": 8690 }, { "epoch": 0.55, "grad_norm": 0.968830406665802, "learning_rate": 4.4246687627424686e-06, "loss": 0.4983, "step": 8691 }, { "epoch": 0.55, "grad_norm": 1.0696613788604736, "learning_rate": 4.423649596015086e-06, "loss": 0.5288, "step": 8692 }, { "epoch": 0.55, "grad_norm": 0.9941192269325256, "learning_rate": 4.422630453555814e-06, "loss": 0.4934, "step": 8693 }, { "epoch": 0.55, "grad_norm": 1.0725018978118896, "learning_rate": 4.4216113354075654e-06, "loss": 0.5326, "step": 8694 }, { "epoch": 0.55, "grad_norm": 1.0443642139434814, "learning_rate": 4.420592241613251e-06, "loss": 0.5394, "step": 8695 }, { "epoch": 0.55, "grad_norm": 1.0298329591751099, "learning_rate": 4.4195731722157805e-06, "loss": 0.5062, "step": 8696 }, { "epoch": 0.55, "grad_norm": 1.0180301666259766, "learning_rate": 4.418554127258066e-06, "loss": 0.5403, "step": 8697 }, { "epoch": 0.55, "grad_norm": 1.0847681760787964, "learning_rate": 4.417535106783015e-06, "loss": 0.5775, "step": 8698 }, { "epoch": 0.55, "grad_norm": 0.9627909660339355, "learning_rate": 4.416516110833533e-06, "loss": 0.4586, "step": 8699 }, { "epoch": 0.55, "grad_norm": 1.01035737991333, "learning_rate": 4.415497139452528e-06, "loss": 0.5501, "step": 8700 }, { "epoch": 0.55, "grad_norm": 1.139830231666565, "learning_rate": 4.414478192682905e-06, "loss": 0.5466, "step": 8701 }, { "epoch": 0.55, "grad_norm": 1.0512480735778809, "learning_rate": 4.41345927056757e-06, "loss": 0.4649, "step": 8702 }, { "epoch": 0.55, "grad_norm": 1.0854779481887817, "learning_rate": 4.4124403731494235e-06, "loss": 0.5747, "step": 8703 }, { "epoch": 0.55, "grad_norm": 0.9707875847816467, "learning_rate": 4.4114215004713665e-06, "loss": 0.5162, "step": 8704 }, { "epoch": 0.55, "grad_norm": 1.0341905355453491, "learning_rate": 4.410402652576307e-06, "loss": 0.4722, "step": 8705 }, { "epoch": 0.55, "grad_norm": 1.0561015605926514, "learning_rate": 4.409383829507139e-06, "loss": 0.4789, "step": 8706 }, { "epoch": 0.55, "grad_norm": 1.0236762762069702, "learning_rate": 4.408365031306763e-06, "loss": 0.4962, "step": 8707 }, { "epoch": 0.55, "grad_norm": 1.0971812009811401, "learning_rate": 4.407346258018078e-06, "loss": 0.528, "step": 8708 }, { "epoch": 0.55, "grad_norm": 1.047594428062439, "learning_rate": 4.4063275096839785e-06, "loss": 0.4914, "step": 8709 }, { "epoch": 0.55, "grad_norm": 1.137151837348938, "learning_rate": 4.405308786347365e-06, "loss": 0.5227, "step": 8710 }, { "epoch": 0.55, "grad_norm": 1.0921539068222046, "learning_rate": 4.404290088051128e-06, "loss": 0.5255, "step": 8711 }, { "epoch": 0.55, "grad_norm": 0.9931677579879761, "learning_rate": 4.403271414838164e-06, "loss": 0.5069, "step": 8712 }, { "epoch": 0.55, "grad_norm": 1.0669801235198975, "learning_rate": 4.402252766751363e-06, "loss": 0.5159, "step": 8713 }, { "epoch": 0.55, "grad_norm": 1.0193895101547241, "learning_rate": 4.401234143833621e-06, "loss": 0.5175, "step": 8714 }, { "epoch": 0.55, "grad_norm": 0.9947845935821533, "learning_rate": 4.400215546127825e-06, "loss": 0.51, "step": 8715 }, { "epoch": 0.55, "grad_norm": 0.9431485533714294, "learning_rate": 4.399196973676867e-06, "loss": 0.4892, "step": 8716 }, { "epoch": 0.55, "grad_norm": 1.080181360244751, "learning_rate": 4.398178426523632e-06, "loss": 0.5802, "step": 8717 }, { "epoch": 0.55, "grad_norm": 1.0866167545318604, "learning_rate": 4.3971599047110116e-06, "loss": 0.514, "step": 8718 }, { "epoch": 0.55, "grad_norm": 1.0306841135025024, "learning_rate": 4.3961414082818904e-06, "loss": 0.5033, "step": 8719 }, { "epoch": 0.55, "grad_norm": 1.0695841312408447, "learning_rate": 4.395122937279154e-06, "loss": 0.5483, "step": 8720 }, { "epoch": 0.55, "grad_norm": 1.0567008256912231, "learning_rate": 4.394104491745686e-06, "loss": 0.5153, "step": 8721 }, { "epoch": 0.55, "grad_norm": 0.904281735420227, "learning_rate": 4.393086071724371e-06, "loss": 0.4623, "step": 8722 }, { "epoch": 0.55, "grad_norm": 1.081354022026062, "learning_rate": 4.392067677258089e-06, "loss": 0.5676, "step": 8723 }, { "epoch": 0.55, "grad_norm": 1.0314452648162842, "learning_rate": 4.391049308389723e-06, "loss": 0.4855, "step": 8724 }, { "epoch": 0.55, "grad_norm": 1.0365545749664307, "learning_rate": 4.390030965162153e-06, "loss": 0.5059, "step": 8725 }, { "epoch": 0.55, "grad_norm": 0.9358240962028503, "learning_rate": 4.389012647618255e-06, "loss": 0.4984, "step": 8726 }, { "epoch": 0.55, "grad_norm": 1.0348268747329712, "learning_rate": 4.387994355800909e-06, "loss": 0.5419, "step": 8727 }, { "epoch": 0.55, "grad_norm": 0.9691881537437439, "learning_rate": 4.386976089752994e-06, "loss": 0.4885, "step": 8728 }, { "epoch": 0.55, "grad_norm": 1.110189437866211, "learning_rate": 4.385957849517383e-06, "loss": 0.5961, "step": 8729 }, { "epoch": 0.55, "grad_norm": 0.9775313138961792, "learning_rate": 4.384939635136948e-06, "loss": 0.514, "step": 8730 }, { "epoch": 0.55, "grad_norm": 1.0049653053283691, "learning_rate": 4.383921446654567e-06, "loss": 0.4882, "step": 8731 }, { "epoch": 0.55, "grad_norm": 1.04422926902771, "learning_rate": 4.3829032841131116e-06, "loss": 0.53, "step": 8732 }, { "epoch": 0.55, "grad_norm": 1.1362097263336182, "learning_rate": 4.381885147555453e-06, "loss": 0.5841, "step": 8733 }, { "epoch": 0.55, "grad_norm": 1.1133431196212769, "learning_rate": 4.380867037024457e-06, "loss": 0.4776, "step": 8734 }, { "epoch": 0.55, "grad_norm": 0.994042158126831, "learning_rate": 4.379848952562999e-06, "loss": 0.5495, "step": 8735 }, { "epoch": 0.55, "grad_norm": 0.9828497767448425, "learning_rate": 4.3788308942139435e-06, "loss": 0.5037, "step": 8736 }, { "epoch": 0.55, "grad_norm": 0.9757148623466492, "learning_rate": 4.3778128620201595e-06, "loss": 0.4795, "step": 8737 }, { "epoch": 0.55, "grad_norm": 1.0285186767578125, "learning_rate": 4.376794856024509e-06, "loss": 0.5662, "step": 8738 }, { "epoch": 0.55, "grad_norm": 1.112226128578186, "learning_rate": 4.37577687626986e-06, "loss": 0.525, "step": 8739 }, { "epoch": 0.55, "grad_norm": 1.0480462312698364, "learning_rate": 4.374758922799076e-06, "loss": 0.5211, "step": 8740 }, { "epoch": 0.55, "grad_norm": 1.0427954196929932, "learning_rate": 4.373740995655019e-06, "loss": 0.5334, "step": 8741 }, { "epoch": 0.55, "grad_norm": 1.0622020959854126, "learning_rate": 4.372723094880549e-06, "loss": 0.5801, "step": 8742 }, { "epoch": 0.55, "grad_norm": 1.0892544984817505, "learning_rate": 4.371705220518526e-06, "loss": 0.5184, "step": 8743 }, { "epoch": 0.55, "grad_norm": 1.0577740669250488, "learning_rate": 4.3706873726118135e-06, "loss": 0.5136, "step": 8744 }, { "epoch": 0.55, "grad_norm": 1.012571096420288, "learning_rate": 4.369669551203266e-06, "loss": 0.5262, "step": 8745 }, { "epoch": 0.55, "grad_norm": 1.0907323360443115, "learning_rate": 4.368651756335739e-06, "loss": 0.5274, "step": 8746 }, { "epoch": 0.55, "grad_norm": 1.0260123014450073, "learning_rate": 4.36763398805209e-06, "loss": 0.507, "step": 8747 }, { "epoch": 0.55, "grad_norm": 1.0460829734802246, "learning_rate": 4.366616246395177e-06, "loss": 0.5179, "step": 8748 }, { "epoch": 0.55, "grad_norm": 1.047634482383728, "learning_rate": 4.365598531407849e-06, "loss": 0.542, "step": 8749 }, { "epoch": 0.55, "grad_norm": 1.1334526538848877, "learning_rate": 4.364580843132959e-06, "loss": 0.5015, "step": 8750 }, { "epoch": 0.55, "grad_norm": 0.9913628101348877, "learning_rate": 4.363563181613359e-06, "loss": 0.5075, "step": 8751 }, { "epoch": 0.55, "grad_norm": 1.002979040145874, "learning_rate": 4.362545546891901e-06, "loss": 0.5112, "step": 8752 }, { "epoch": 0.55, "grad_norm": 1.1702262163162231, "learning_rate": 4.361527939011433e-06, "loss": 0.4818, "step": 8753 }, { "epoch": 0.55, "grad_norm": 1.0306715965270996, "learning_rate": 4.360510358014801e-06, "loss": 0.5553, "step": 8754 }, { "epoch": 0.55, "grad_norm": 1.1320462226867676, "learning_rate": 4.359492803944854e-06, "loss": 0.5412, "step": 8755 }, { "epoch": 0.55, "grad_norm": 1.000876545906067, "learning_rate": 4.358475276844435e-06, "loss": 0.5132, "step": 8756 }, { "epoch": 0.55, "grad_norm": 1.0229440927505493, "learning_rate": 4.357457776756392e-06, "loss": 0.5563, "step": 8757 }, { "epoch": 0.55, "grad_norm": 1.0038920640945435, "learning_rate": 4.3564403037235666e-06, "loss": 0.4872, "step": 8758 }, { "epoch": 0.55, "grad_norm": 0.9732649922370911, "learning_rate": 4.355422857788802e-06, "loss": 0.4896, "step": 8759 }, { "epoch": 0.55, "grad_norm": 0.9638651609420776, "learning_rate": 4.3544054389949366e-06, "loss": 0.5102, "step": 8760 }, { "epoch": 0.56, "grad_norm": 1.0643752813339233, "learning_rate": 4.353388047384813e-06, "loss": 0.527, "step": 8761 }, { "epoch": 0.56, "grad_norm": 0.9771501421928406, "learning_rate": 4.35237068300127e-06, "loss": 0.5216, "step": 8762 }, { "epoch": 0.56, "grad_norm": 1.0805439949035645, "learning_rate": 4.351353345887145e-06, "loss": 0.5344, "step": 8763 }, { "epoch": 0.56, "grad_norm": 1.0408697128295898, "learning_rate": 4.350336036085272e-06, "loss": 0.5461, "step": 8764 }, { "epoch": 0.56, "grad_norm": 1.0978062152862549, "learning_rate": 4.349318753638491e-06, "loss": 0.5936, "step": 8765 }, { "epoch": 0.56, "grad_norm": 1.0012162923812866, "learning_rate": 4.348301498589632e-06, "loss": 0.4985, "step": 8766 }, { "epoch": 0.56, "grad_norm": 1.0011204481124878, "learning_rate": 4.347284270981531e-06, "loss": 0.5391, "step": 8767 }, { "epoch": 0.56, "grad_norm": 1.055498480796814, "learning_rate": 4.346267070857017e-06, "loss": 0.5287, "step": 8768 }, { "epoch": 0.56, "grad_norm": 0.9990624189376831, "learning_rate": 4.3452498982589234e-06, "loss": 0.5037, "step": 8769 }, { "epoch": 0.56, "grad_norm": 1.0288701057434082, "learning_rate": 4.34423275323008e-06, "loss": 0.5123, "step": 8770 }, { "epoch": 0.56, "grad_norm": 1.0007120370864868, "learning_rate": 4.343215635813314e-06, "loss": 0.4961, "step": 8771 }, { "epoch": 0.56, "grad_norm": 0.9640586376190186, "learning_rate": 4.3421985460514515e-06, "loss": 0.5214, "step": 8772 }, { "epoch": 0.56, "grad_norm": 1.0430772304534912, "learning_rate": 4.341181483987319e-06, "loss": 0.4957, "step": 8773 }, { "epoch": 0.56, "grad_norm": 0.9861213564872742, "learning_rate": 4.340164449663745e-06, "loss": 0.4997, "step": 8774 }, { "epoch": 0.56, "grad_norm": 1.0308138132095337, "learning_rate": 4.33914744312355e-06, "loss": 0.4976, "step": 8775 }, { "epoch": 0.56, "grad_norm": 1.0044180154800415, "learning_rate": 4.338130464409556e-06, "loss": 0.4805, "step": 8776 }, { "epoch": 0.56, "grad_norm": 1.0167678594589233, "learning_rate": 4.3371135135645845e-06, "loss": 0.5643, "step": 8777 }, { "epoch": 0.56, "grad_norm": 1.0586427450180054, "learning_rate": 4.33609659063146e-06, "loss": 0.5242, "step": 8778 }, { "epoch": 0.56, "grad_norm": 0.9682163596153259, "learning_rate": 4.335079695652998e-06, "loss": 0.5393, "step": 8779 }, { "epoch": 0.56, "grad_norm": 1.1850287914276123, "learning_rate": 4.334062828672016e-06, "loss": 0.551, "step": 8780 }, { "epoch": 0.56, "grad_norm": 1.0070314407348633, "learning_rate": 4.3330459897313305e-06, "loss": 0.5023, "step": 8781 }, { "epoch": 0.56, "grad_norm": 1.0112802982330322, "learning_rate": 4.33202917887376e-06, "loss": 0.5113, "step": 8782 }, { "epoch": 0.56, "grad_norm": 0.9870576858520508, "learning_rate": 4.331012396142117e-06, "loss": 0.4985, "step": 8783 }, { "epoch": 0.56, "grad_norm": 1.0064136981964111, "learning_rate": 4.3299956415792145e-06, "loss": 0.5616, "step": 8784 }, { "epoch": 0.56, "grad_norm": 0.9818264842033386, "learning_rate": 4.328978915227866e-06, "loss": 0.5323, "step": 8785 }, { "epoch": 0.56, "grad_norm": 1.033555269241333, "learning_rate": 4.327962217130878e-06, "loss": 0.5553, "step": 8786 }, { "epoch": 0.56, "grad_norm": 1.0492753982543945, "learning_rate": 4.326945547331065e-06, "loss": 0.4836, "step": 8787 }, { "epoch": 0.56, "grad_norm": 1.0379021167755127, "learning_rate": 4.325928905871233e-06, "loss": 0.5469, "step": 8788 }, { "epoch": 0.56, "grad_norm": 1.0607675313949585, "learning_rate": 4.324912292794192e-06, "loss": 0.5407, "step": 8789 }, { "epoch": 0.56, "grad_norm": 1.0625678300857544, "learning_rate": 4.323895708142742e-06, "loss": 0.4681, "step": 8790 }, { "epoch": 0.56, "grad_norm": 0.9727704524993896, "learning_rate": 4.322879151959695e-06, "loss": 0.5138, "step": 8791 }, { "epoch": 0.56, "grad_norm": 0.9812003374099731, "learning_rate": 4.321862624287851e-06, "loss": 0.5129, "step": 8792 }, { "epoch": 0.56, "grad_norm": 1.045271873474121, "learning_rate": 4.320846125170012e-06, "loss": 0.5434, "step": 8793 }, { "epoch": 0.56, "grad_norm": 0.9572071433067322, "learning_rate": 4.31982965464898e-06, "loss": 0.501, "step": 8794 }, { "epoch": 0.56, "grad_norm": 1.0174649953842163, "learning_rate": 4.318813212767555e-06, "loss": 0.5048, "step": 8795 }, { "epoch": 0.56, "grad_norm": 1.009214162826538, "learning_rate": 4.317796799568536e-06, "loss": 0.5297, "step": 8796 }, { "epoch": 0.56, "grad_norm": 1.0225062370300293, "learning_rate": 4.316780415094722e-06, "loss": 0.5042, "step": 8797 }, { "epoch": 0.56, "grad_norm": 1.0147311687469482, "learning_rate": 4.315764059388905e-06, "loss": 0.5224, "step": 8798 }, { "epoch": 0.56, "grad_norm": 1.0039790868759155, "learning_rate": 4.314747732493886e-06, "loss": 0.502, "step": 8799 }, { "epoch": 0.56, "grad_norm": 1.007888913154602, "learning_rate": 4.313731434452455e-06, "loss": 0.5, "step": 8800 }, { "epoch": 0.56, "grad_norm": 1.0893665552139282, "learning_rate": 4.312715165307407e-06, "loss": 0.5831, "step": 8801 }, { "epoch": 0.56, "grad_norm": 1.087306022644043, "learning_rate": 4.311698925101532e-06, "loss": 0.5141, "step": 8802 }, { "epoch": 0.56, "grad_norm": 1.0267753601074219, "learning_rate": 4.310682713877619e-06, "loss": 0.49, "step": 8803 }, { "epoch": 0.56, "grad_norm": 1.0899007320404053, "learning_rate": 4.30966653167846e-06, "loss": 0.5678, "step": 8804 }, { "epoch": 0.56, "grad_norm": 0.9892306923866272, "learning_rate": 4.308650378546843e-06, "loss": 0.5193, "step": 8805 }, { "epoch": 0.56, "grad_norm": 0.994459867477417, "learning_rate": 4.3076342545255535e-06, "loss": 0.5005, "step": 8806 }, { "epoch": 0.56, "grad_norm": 1.0411771535873413, "learning_rate": 4.306618159657375e-06, "loss": 0.5296, "step": 8807 }, { "epoch": 0.56, "grad_norm": 1.0516984462738037, "learning_rate": 4.305602093985095e-06, "loss": 0.5262, "step": 8808 }, { "epoch": 0.56, "grad_norm": 1.0397151708602905, "learning_rate": 4.3045860575514955e-06, "loss": 0.5153, "step": 8809 }, { "epoch": 0.56, "grad_norm": 1.0043975114822388, "learning_rate": 4.303570050399358e-06, "loss": 0.53, "step": 8810 }, { "epoch": 0.56, "grad_norm": 0.9643057584762573, "learning_rate": 4.302554072571461e-06, "loss": 0.4874, "step": 8811 }, { "epoch": 0.56, "grad_norm": 1.0050950050354004, "learning_rate": 4.301538124110588e-06, "loss": 0.5355, "step": 8812 }, { "epoch": 0.56, "grad_norm": 1.0209351778030396, "learning_rate": 4.300522205059515e-06, "loss": 0.5249, "step": 8813 }, { "epoch": 0.56, "grad_norm": 1.0641778707504272, "learning_rate": 4.299506315461018e-06, "loss": 0.5145, "step": 8814 }, { "epoch": 0.56, "grad_norm": 1.0395965576171875, "learning_rate": 4.2984904553578725e-06, "loss": 0.5339, "step": 8815 }, { "epoch": 0.56, "grad_norm": 0.9726712107658386, "learning_rate": 4.297474624792853e-06, "loss": 0.5004, "step": 8816 }, { "epoch": 0.56, "grad_norm": 1.0722999572753906, "learning_rate": 4.296458823808735e-06, "loss": 0.5294, "step": 8817 }, { "epoch": 0.56, "grad_norm": 1.1132960319519043, "learning_rate": 4.295443052448288e-06, "loss": 0.5647, "step": 8818 }, { "epoch": 0.56, "grad_norm": 1.040102481842041, "learning_rate": 4.294427310754283e-06, "loss": 0.4886, "step": 8819 }, { "epoch": 0.56, "grad_norm": 1.065936803817749, "learning_rate": 4.293411598769487e-06, "loss": 0.5641, "step": 8820 }, { "epoch": 0.56, "grad_norm": 0.9980764389038086, "learning_rate": 4.292395916536674e-06, "loss": 0.5349, "step": 8821 }, { "epoch": 0.56, "grad_norm": 1.1201890707015991, "learning_rate": 4.291380264098607e-06, "loss": 0.5481, "step": 8822 }, { "epoch": 0.56, "grad_norm": 1.1285412311553955, "learning_rate": 4.290364641498051e-06, "loss": 0.4842, "step": 8823 }, { "epoch": 0.56, "grad_norm": 1.0192679166793823, "learning_rate": 4.28934904877777e-06, "loss": 0.5005, "step": 8824 }, { "epoch": 0.56, "grad_norm": 1.0406442880630493, "learning_rate": 4.288333485980531e-06, "loss": 0.5226, "step": 8825 }, { "epoch": 0.56, "grad_norm": 1.028008222579956, "learning_rate": 4.287317953149092e-06, "loss": 0.5267, "step": 8826 }, { "epoch": 0.56, "grad_norm": 0.9059807062149048, "learning_rate": 4.2863024503262146e-06, "loss": 0.4318, "step": 8827 }, { "epoch": 0.56, "grad_norm": 1.1068507432937622, "learning_rate": 4.285286977554657e-06, "loss": 0.5515, "step": 8828 }, { "epoch": 0.56, "grad_norm": 1.0708606243133545, "learning_rate": 4.284271534877181e-06, "loss": 0.5372, "step": 8829 }, { "epoch": 0.56, "grad_norm": 1.1273646354675293, "learning_rate": 4.283256122336539e-06, "loss": 0.5746, "step": 8830 }, { "epoch": 0.56, "grad_norm": 1.0696847438812256, "learning_rate": 4.28224073997549e-06, "loss": 0.5372, "step": 8831 }, { "epoch": 0.56, "grad_norm": 1.0077567100524902, "learning_rate": 4.281225387836786e-06, "loss": 0.4877, "step": 8832 }, { "epoch": 0.56, "grad_norm": 0.9975242018699646, "learning_rate": 4.280210065963179e-06, "loss": 0.4981, "step": 8833 }, { "epoch": 0.56, "grad_norm": 1.0427783727645874, "learning_rate": 4.279194774397422e-06, "loss": 0.4912, "step": 8834 }, { "epoch": 0.56, "grad_norm": 1.032755732536316, "learning_rate": 4.278179513182268e-06, "loss": 0.5291, "step": 8835 }, { "epoch": 0.56, "grad_norm": 1.051928162574768, "learning_rate": 4.2771642823604635e-06, "loss": 0.5167, "step": 8836 }, { "epoch": 0.56, "grad_norm": 1.0293514728546143, "learning_rate": 4.276149081974754e-06, "loss": 0.5173, "step": 8837 }, { "epoch": 0.56, "grad_norm": 0.9974006414413452, "learning_rate": 4.275133912067889e-06, "loss": 0.459, "step": 8838 }, { "epoch": 0.56, "grad_norm": 1.059767723083496, "learning_rate": 4.274118772682615e-06, "loss": 0.4941, "step": 8839 }, { "epoch": 0.56, "grad_norm": 1.0430185794830322, "learning_rate": 4.273103663861675e-06, "loss": 0.5184, "step": 8840 }, { "epoch": 0.56, "grad_norm": 1.0201420783996582, "learning_rate": 4.272088585647808e-06, "loss": 0.4704, "step": 8841 }, { "epoch": 0.56, "grad_norm": 1.027017593383789, "learning_rate": 4.27107353808376e-06, "loss": 0.5296, "step": 8842 }, { "epoch": 0.56, "grad_norm": 1.0478624105453491, "learning_rate": 4.2700585212122705e-06, "loss": 0.5028, "step": 8843 }, { "epoch": 0.56, "grad_norm": 1.0227540731430054, "learning_rate": 4.269043535076077e-06, "loss": 0.4999, "step": 8844 }, { "epoch": 0.56, "grad_norm": 1.003603219985962, "learning_rate": 4.2680285797179155e-06, "loss": 0.5115, "step": 8845 }, { "epoch": 0.56, "grad_norm": 1.0649179220199585, "learning_rate": 4.267013655180526e-06, "loss": 0.5591, "step": 8846 }, { "epoch": 0.56, "grad_norm": 1.0227795839309692, "learning_rate": 4.265998761506641e-06, "loss": 0.5373, "step": 8847 }, { "epoch": 0.56, "grad_norm": 0.9785618185997009, "learning_rate": 4.264983898738996e-06, "loss": 0.4689, "step": 8848 }, { "epoch": 0.56, "grad_norm": 1.12110435962677, "learning_rate": 4.263969066920321e-06, "loss": 0.5042, "step": 8849 }, { "epoch": 0.56, "grad_norm": 1.0460258722305298, "learning_rate": 4.262954266093347e-06, "loss": 0.5292, "step": 8850 }, { "epoch": 0.56, "grad_norm": 0.9611269235610962, "learning_rate": 4.261939496300807e-06, "loss": 0.4752, "step": 8851 }, { "epoch": 0.56, "grad_norm": 0.9734213948249817, "learning_rate": 4.260924757585427e-06, "loss": 0.5075, "step": 8852 }, { "epoch": 0.56, "grad_norm": 1.006095290184021, "learning_rate": 4.259910049989933e-06, "loss": 0.4719, "step": 8853 }, { "epoch": 0.56, "grad_norm": 0.9881126284599304, "learning_rate": 4.258895373557051e-06, "loss": 0.4989, "step": 8854 }, { "epoch": 0.56, "grad_norm": 0.9929634928703308, "learning_rate": 4.25788072832951e-06, "loss": 0.5272, "step": 8855 }, { "epoch": 0.56, "grad_norm": 1.017573356628418, "learning_rate": 4.256866114350029e-06, "loss": 0.4803, "step": 8856 }, { "epoch": 0.56, "grad_norm": 1.0489202737808228, "learning_rate": 4.25585153166133e-06, "loss": 0.597, "step": 8857 }, { "epoch": 0.56, "grad_norm": 1.0458786487579346, "learning_rate": 4.254836980306134e-06, "loss": 0.5275, "step": 8858 }, { "epoch": 0.56, "grad_norm": 1.017008662223816, "learning_rate": 4.253822460327162e-06, "loss": 0.5081, "step": 8859 }, { "epoch": 0.56, "grad_norm": 1.1114718914031982, "learning_rate": 4.25280797176713e-06, "loss": 0.5291, "step": 8860 }, { "epoch": 0.56, "grad_norm": 1.1073929071426392, "learning_rate": 4.251793514668754e-06, "loss": 0.5235, "step": 8861 }, { "epoch": 0.56, "grad_norm": 0.9434214234352112, "learning_rate": 4.250779089074752e-06, "loss": 0.4956, "step": 8862 }, { "epoch": 0.56, "grad_norm": 1.001192331314087, "learning_rate": 4.249764695027833e-06, "loss": 0.5378, "step": 8863 }, { "epoch": 0.56, "grad_norm": 1.0642035007476807, "learning_rate": 4.248750332570716e-06, "loss": 0.5893, "step": 8864 }, { "epoch": 0.56, "grad_norm": 1.0944091081619263, "learning_rate": 4.247736001746108e-06, "loss": 0.5033, "step": 8865 }, { "epoch": 0.56, "grad_norm": 1.0930684804916382, "learning_rate": 4.246721702596721e-06, "loss": 0.5263, "step": 8866 }, { "epoch": 0.56, "grad_norm": 1.0492407083511353, "learning_rate": 4.24570743516526e-06, "loss": 0.5593, "step": 8867 }, { "epoch": 0.56, "grad_norm": 1.0513110160827637, "learning_rate": 4.2446931994944375e-06, "loss": 0.5257, "step": 8868 }, { "epoch": 0.56, "grad_norm": 1.0380091667175293, "learning_rate": 4.243678995626955e-06, "loss": 0.5087, "step": 8869 }, { "epoch": 0.56, "grad_norm": 0.9944601058959961, "learning_rate": 4.242664823605521e-06, "loss": 0.4907, "step": 8870 }, { "epoch": 0.56, "grad_norm": 1.045111894607544, "learning_rate": 4.241650683472834e-06, "loss": 0.4721, "step": 8871 }, { "epoch": 0.56, "grad_norm": 0.9547770023345947, "learning_rate": 4.240636575271601e-06, "loss": 0.4366, "step": 8872 }, { "epoch": 0.56, "grad_norm": 0.9458922147750854, "learning_rate": 4.239622499044519e-06, "loss": 0.4814, "step": 8873 }, { "epoch": 0.56, "grad_norm": 1.0829119682312012, "learning_rate": 4.23860845483429e-06, "loss": 0.5168, "step": 8874 }, { "epoch": 0.56, "grad_norm": 1.032349705696106, "learning_rate": 4.237594442683607e-06, "loss": 0.5417, "step": 8875 }, { "epoch": 0.56, "grad_norm": 0.9982881546020508, "learning_rate": 4.236580462635173e-06, "loss": 0.5178, "step": 8876 }, { "epoch": 0.56, "grad_norm": 1.0686936378479004, "learning_rate": 4.235566514731678e-06, "loss": 0.5355, "step": 8877 }, { "epoch": 0.56, "grad_norm": 1.0420191287994385, "learning_rate": 4.23455259901582e-06, "loss": 0.5431, "step": 8878 }, { "epoch": 0.56, "grad_norm": 0.9994642734527588, "learning_rate": 4.2335387155302885e-06, "loss": 0.5152, "step": 8879 }, { "epoch": 0.56, "grad_norm": 1.0132908821105957, "learning_rate": 4.232524864317773e-06, "loss": 0.4807, "step": 8880 }, { "epoch": 0.56, "grad_norm": 0.9887734055519104, "learning_rate": 4.231511045420967e-06, "loss": 0.5242, "step": 8881 }, { "epoch": 0.56, "grad_norm": 0.95689857006073, "learning_rate": 4.230497258882559e-06, "loss": 0.4774, "step": 8882 }, { "epoch": 0.56, "grad_norm": 1.124097466468811, "learning_rate": 4.229483504745233e-06, "loss": 0.5405, "step": 8883 }, { "epoch": 0.56, "grad_norm": 0.9804500937461853, "learning_rate": 4.228469783051676e-06, "loss": 0.5318, "step": 8884 }, { "epoch": 0.56, "grad_norm": 1.0371235609054565, "learning_rate": 4.227456093844573e-06, "loss": 0.5059, "step": 8885 }, { "epoch": 0.56, "grad_norm": 1.142540693283081, "learning_rate": 4.226442437166607e-06, "loss": 0.4845, "step": 8886 }, { "epoch": 0.56, "grad_norm": 1.0167458057403564, "learning_rate": 4.225428813060459e-06, "loss": 0.5167, "step": 8887 }, { "epoch": 0.56, "grad_norm": 1.0186748504638672, "learning_rate": 4.224415221568807e-06, "loss": 0.5142, "step": 8888 }, { "epoch": 0.56, "grad_norm": 1.0175448656082153, "learning_rate": 4.223401662734333e-06, "loss": 0.4768, "step": 8889 }, { "epoch": 0.56, "grad_norm": 0.9420079588890076, "learning_rate": 4.222388136599715e-06, "loss": 0.5242, "step": 8890 }, { "epoch": 0.56, "grad_norm": 1.0170434713363647, "learning_rate": 4.221374643207626e-06, "loss": 0.5128, "step": 8891 }, { "epoch": 0.56, "grad_norm": 0.9713902473449707, "learning_rate": 4.220361182600742e-06, "loss": 0.5407, "step": 8892 }, { "epoch": 0.56, "grad_norm": 1.0836924314498901, "learning_rate": 4.219347754821737e-06, "loss": 0.4982, "step": 8893 }, { "epoch": 0.56, "grad_norm": 1.0609179735183716, "learning_rate": 4.218334359913283e-06, "loss": 0.4988, "step": 8894 }, { "epoch": 0.56, "grad_norm": 0.9709429144859314, "learning_rate": 4.217320997918048e-06, "loss": 0.5252, "step": 8895 }, { "epoch": 0.56, "grad_norm": 1.0228431224822998, "learning_rate": 4.216307668878706e-06, "loss": 0.5595, "step": 8896 }, { "epoch": 0.56, "grad_norm": 1.0736753940582275, "learning_rate": 4.2152943728379185e-06, "loss": 0.5469, "step": 8897 }, { "epoch": 0.56, "grad_norm": 0.9818423986434937, "learning_rate": 4.214281109838357e-06, "loss": 0.4778, "step": 8898 }, { "epoch": 0.56, "grad_norm": 1.0688577890396118, "learning_rate": 4.213267879922685e-06, "loss": 0.5599, "step": 8899 }, { "epoch": 0.56, "grad_norm": 0.9578925967216492, "learning_rate": 4.212254683133565e-06, "loss": 0.4579, "step": 8900 }, { "epoch": 0.56, "grad_norm": 1.0140105485916138, "learning_rate": 4.2112415195136585e-06, "loss": 0.504, "step": 8901 }, { "epoch": 0.56, "grad_norm": 1.115033507347107, "learning_rate": 4.21022838910563e-06, "loss": 0.498, "step": 8902 }, { "epoch": 0.56, "grad_norm": 1.0247352123260498, "learning_rate": 4.209215291952135e-06, "loss": 0.5112, "step": 8903 }, { "epoch": 0.56, "grad_norm": 1.0588626861572266, "learning_rate": 4.208202228095835e-06, "loss": 0.5315, "step": 8904 }, { "epoch": 0.56, "grad_norm": 1.0949006080627441, "learning_rate": 4.207189197579382e-06, "loss": 0.487, "step": 8905 }, { "epoch": 0.56, "grad_norm": 0.9179094433784485, "learning_rate": 4.2061762004454365e-06, "loss": 0.4714, "step": 8906 }, { "epoch": 0.56, "grad_norm": 0.9899871349334717, "learning_rate": 4.2051632367366485e-06, "loss": 0.5705, "step": 8907 }, { "epoch": 0.56, "grad_norm": 1.0226097106933594, "learning_rate": 4.204150306495672e-06, "loss": 0.5763, "step": 8908 }, { "epoch": 0.56, "grad_norm": 0.9980384111404419, "learning_rate": 4.203137409765159e-06, "loss": 0.4652, "step": 8909 }, { "epoch": 0.56, "grad_norm": 0.9871333241462708, "learning_rate": 4.202124546587754e-06, "loss": 0.5151, "step": 8910 }, { "epoch": 0.56, "grad_norm": 0.9857281446456909, "learning_rate": 4.201111717006111e-06, "loss": 0.5353, "step": 8911 }, { "epoch": 0.56, "grad_norm": 1.0676463842391968, "learning_rate": 4.200098921062875e-06, "loss": 0.5313, "step": 8912 }, { "epoch": 0.56, "grad_norm": 1.0641769170761108, "learning_rate": 4.19908615880069e-06, "loss": 0.5544, "step": 8913 }, { "epoch": 0.56, "grad_norm": 1.0252957344055176, "learning_rate": 4.198073430262199e-06, "loss": 0.5368, "step": 8914 }, { "epoch": 0.56, "grad_norm": 0.9849558472633362, "learning_rate": 4.197060735490048e-06, "loss": 0.486, "step": 8915 }, { "epoch": 0.56, "grad_norm": 1.0962169170379639, "learning_rate": 4.196048074526876e-06, "loss": 0.5688, "step": 8916 }, { "epoch": 0.56, "grad_norm": 1.074508786201477, "learning_rate": 4.195035447415324e-06, "loss": 0.5516, "step": 8917 }, { "epoch": 0.56, "grad_norm": 1.011013388633728, "learning_rate": 4.194022854198026e-06, "loss": 0.5414, "step": 8918 }, { "epoch": 0.57, "grad_norm": 1.0027620792388916, "learning_rate": 4.193010294917624e-06, "loss": 0.4732, "step": 8919 }, { "epoch": 0.57, "grad_norm": 1.0046014785766602, "learning_rate": 4.1919977696167515e-06, "loss": 0.4781, "step": 8920 }, { "epoch": 0.57, "grad_norm": 0.9843215942382812, "learning_rate": 4.190985278338042e-06, "loss": 0.5117, "step": 8921 }, { "epoch": 0.57, "grad_norm": 1.0696995258331299, "learning_rate": 4.189972821124126e-06, "loss": 0.5469, "step": 8922 }, { "epoch": 0.57, "grad_norm": 0.970366358757019, "learning_rate": 4.188960398017638e-06, "loss": 0.5035, "step": 8923 }, { "epoch": 0.57, "grad_norm": 0.9937092661857605, "learning_rate": 4.187948009061207e-06, "loss": 0.4778, "step": 8924 }, { "epoch": 0.57, "grad_norm": 0.9816207885742188, "learning_rate": 4.186935654297461e-06, "loss": 0.5206, "step": 8925 }, { "epoch": 0.57, "grad_norm": 0.9615216851234436, "learning_rate": 4.1859233337690245e-06, "loss": 0.4804, "step": 8926 }, { "epoch": 0.57, "grad_norm": 1.017531394958496, "learning_rate": 4.1849110475185225e-06, "loss": 0.509, "step": 8927 }, { "epoch": 0.57, "grad_norm": 1.1661837100982666, "learning_rate": 4.183898795588584e-06, "loss": 0.5104, "step": 8928 }, { "epoch": 0.57, "grad_norm": 1.035853624343872, "learning_rate": 4.1828865780218285e-06, "loss": 0.4876, "step": 8929 }, { "epoch": 0.57, "grad_norm": 0.9957482218742371, "learning_rate": 4.181874394860875e-06, "loss": 0.5066, "step": 8930 }, { "epoch": 0.57, "grad_norm": 1.0787684917449951, "learning_rate": 4.180862246148344e-06, "loss": 0.5226, "step": 8931 }, { "epoch": 0.57, "grad_norm": 0.9769288301467896, "learning_rate": 4.1798501319268565e-06, "loss": 0.4636, "step": 8932 }, { "epoch": 0.57, "grad_norm": 1.1175696849822998, "learning_rate": 4.178838052239027e-06, "loss": 0.5323, "step": 8933 }, { "epoch": 0.57, "grad_norm": 1.0295634269714355, "learning_rate": 4.177826007127468e-06, "loss": 0.4897, "step": 8934 }, { "epoch": 0.57, "grad_norm": 0.9559885859489441, "learning_rate": 4.176813996634796e-06, "loss": 0.5013, "step": 8935 }, { "epoch": 0.57, "grad_norm": 1.1553454399108887, "learning_rate": 4.175802020803624e-06, "loss": 0.5116, "step": 8936 }, { "epoch": 0.57, "grad_norm": 1.1046700477600098, "learning_rate": 4.174790079676563e-06, "loss": 0.49, "step": 8937 }, { "epoch": 0.57, "grad_norm": 1.0114251375198364, "learning_rate": 4.173778173296219e-06, "loss": 0.5084, "step": 8938 }, { "epoch": 0.57, "grad_norm": 1.115433692932129, "learning_rate": 4.172766301705202e-06, "loss": 0.5343, "step": 8939 }, { "epoch": 0.57, "grad_norm": 1.0423344373703003, "learning_rate": 4.171754464946119e-06, "loss": 0.576, "step": 8940 }, { "epoch": 0.57, "grad_norm": 0.9994631409645081, "learning_rate": 4.170742663061575e-06, "loss": 0.5001, "step": 8941 }, { "epoch": 0.57, "grad_norm": 1.0563585758209229, "learning_rate": 4.169730896094172e-06, "loss": 0.486, "step": 8942 }, { "epoch": 0.57, "grad_norm": 1.0463634729385376, "learning_rate": 4.1687191640865135e-06, "loss": 0.4895, "step": 8943 }, { "epoch": 0.57, "grad_norm": 1.0102711915969849, "learning_rate": 4.167707467081197e-06, "loss": 0.5176, "step": 8944 }, { "epoch": 0.57, "grad_norm": 1.0874836444854736, "learning_rate": 4.166695805120825e-06, "loss": 0.6213, "step": 8945 }, { "epoch": 0.57, "grad_norm": 0.9626545906066895, "learning_rate": 4.165684178247993e-06, "loss": 0.4792, "step": 8946 }, { "epoch": 0.57, "grad_norm": 1.0832995176315308, "learning_rate": 4.1646725865053005e-06, "loss": 0.546, "step": 8947 }, { "epoch": 0.57, "grad_norm": 1.1358460187911987, "learning_rate": 4.163661029935336e-06, "loss": 0.4843, "step": 8948 }, { "epoch": 0.57, "grad_norm": 1.0058339834213257, "learning_rate": 4.162649508580698e-06, "loss": 0.5078, "step": 8949 }, { "epoch": 0.57, "grad_norm": 1.0836069583892822, "learning_rate": 4.161638022483976e-06, "loss": 0.493, "step": 8950 }, { "epoch": 0.57, "grad_norm": 1.0379528999328613, "learning_rate": 4.160626571687761e-06, "loss": 0.5035, "step": 8951 }, { "epoch": 0.57, "grad_norm": 0.9744682312011719, "learning_rate": 4.159615156234639e-06, "loss": 0.5002, "step": 8952 }, { "epoch": 0.57, "grad_norm": 1.0467766523361206, "learning_rate": 4.158603776167201e-06, "loss": 0.4753, "step": 8953 }, { "epoch": 0.57, "grad_norm": 0.9876313805580139, "learning_rate": 4.157592431528031e-06, "loss": 0.4733, "step": 8954 }, { "epoch": 0.57, "grad_norm": 0.9515472054481506, "learning_rate": 4.156581122359714e-06, "loss": 0.5397, "step": 8955 }, { "epoch": 0.57, "grad_norm": 0.9919760227203369, "learning_rate": 4.15556984870483e-06, "loss": 0.5128, "step": 8956 }, { "epoch": 0.57, "grad_norm": 0.9981586933135986, "learning_rate": 4.1545586106059636e-06, "loss": 0.517, "step": 8957 }, { "epoch": 0.57, "grad_norm": 1.0415711402893066, "learning_rate": 4.153547408105691e-06, "loss": 0.5341, "step": 8958 }, { "epoch": 0.57, "grad_norm": 1.0257682800292969, "learning_rate": 4.152536241246595e-06, "loss": 0.5003, "step": 8959 }, { "epoch": 0.57, "grad_norm": 1.0822460651397705, "learning_rate": 4.151525110071248e-06, "loss": 0.5218, "step": 8960 }, { "epoch": 0.57, "grad_norm": 1.0275789499282837, "learning_rate": 4.1505140146222276e-06, "loss": 0.5335, "step": 8961 }, { "epoch": 0.57, "grad_norm": 1.0042998790740967, "learning_rate": 4.149502954942107e-06, "loss": 0.5144, "step": 8962 }, { "epoch": 0.57, "grad_norm": 1.5982105731964111, "learning_rate": 4.148491931073459e-06, "loss": 0.53, "step": 8963 }, { "epoch": 0.57, "grad_norm": 0.9901741743087769, "learning_rate": 4.147480943058852e-06, "loss": 0.55, "step": 8964 }, { "epoch": 0.57, "grad_norm": 1.0572997331619263, "learning_rate": 4.146469990940858e-06, "loss": 0.5193, "step": 8965 }, { "epoch": 0.57, "grad_norm": 1.0043081045150757, "learning_rate": 4.1454590747620424e-06, "loss": 0.5609, "step": 8966 }, { "epoch": 0.57, "grad_norm": 1.0042357444763184, "learning_rate": 4.144448194564973e-06, "loss": 0.504, "step": 8967 }, { "epoch": 0.57, "grad_norm": 1.105394721031189, "learning_rate": 4.1434373503922145e-06, "loss": 0.5269, "step": 8968 }, { "epoch": 0.57, "grad_norm": 1.0081567764282227, "learning_rate": 4.142426542286329e-06, "loss": 0.551, "step": 8969 }, { "epoch": 0.57, "grad_norm": 1.062154769897461, "learning_rate": 4.141415770289877e-06, "loss": 0.5276, "step": 8970 }, { "epoch": 0.57, "grad_norm": 0.9704147577285767, "learning_rate": 4.140405034445423e-06, "loss": 0.5222, "step": 8971 }, { "epoch": 0.57, "grad_norm": 1.0527328252792358, "learning_rate": 4.13939433479552e-06, "loss": 0.5073, "step": 8972 }, { "epoch": 0.57, "grad_norm": 1.0498989820480347, "learning_rate": 4.13838367138273e-06, "loss": 0.5046, "step": 8973 }, { "epoch": 0.57, "grad_norm": 1.0412909984588623, "learning_rate": 4.137373044249604e-06, "loss": 0.5062, "step": 8974 }, { "epoch": 0.57, "grad_norm": 1.0586767196655273, "learning_rate": 4.1363624534387e-06, "loss": 0.5243, "step": 8975 }, { "epoch": 0.57, "grad_norm": 0.9737481474876404, "learning_rate": 4.135351898992568e-06, "loss": 0.4969, "step": 8976 }, { "epoch": 0.57, "grad_norm": 1.039725422859192, "learning_rate": 4.134341380953761e-06, "loss": 0.5123, "step": 8977 }, { "epoch": 0.57, "grad_norm": 0.9762406349182129, "learning_rate": 4.133330899364824e-06, "loss": 0.509, "step": 8978 }, { "epoch": 0.57, "grad_norm": 1.0494922399520874, "learning_rate": 4.1323204542683105e-06, "loss": 0.5729, "step": 8979 }, { "epoch": 0.57, "grad_norm": 1.1076762676239014, "learning_rate": 4.131310045706763e-06, "loss": 0.5347, "step": 8980 }, { "epoch": 0.57, "grad_norm": 1.0384715795516968, "learning_rate": 4.130299673722729e-06, "loss": 0.5733, "step": 8981 }, { "epoch": 0.57, "grad_norm": 0.998306930065155, "learning_rate": 4.129289338358748e-06, "loss": 0.498, "step": 8982 }, { "epoch": 0.57, "grad_norm": 1.0489163398742676, "learning_rate": 4.128279039657366e-06, "loss": 0.5565, "step": 8983 }, { "epoch": 0.57, "grad_norm": 1.0348968505859375, "learning_rate": 4.127268777661119e-06, "loss": 0.5388, "step": 8984 }, { "epoch": 0.57, "grad_norm": 0.9773428440093994, "learning_rate": 4.126258552412551e-06, "loss": 0.5214, "step": 8985 }, { "epoch": 0.57, "grad_norm": 1.0756745338439941, "learning_rate": 4.125248363954192e-06, "loss": 0.5214, "step": 8986 }, { "epoch": 0.57, "grad_norm": 0.950646698474884, "learning_rate": 4.124238212328585e-06, "loss": 0.5012, "step": 8987 }, { "epoch": 0.57, "grad_norm": 1.036226749420166, "learning_rate": 4.123228097578258e-06, "loss": 0.5613, "step": 8988 }, { "epoch": 0.57, "grad_norm": 1.103487253189087, "learning_rate": 4.122218019745748e-06, "loss": 0.5256, "step": 8989 }, { "epoch": 0.57, "grad_norm": 1.0424480438232422, "learning_rate": 4.121207978873582e-06, "loss": 0.4807, "step": 8990 }, { "epoch": 0.57, "grad_norm": 1.0146026611328125, "learning_rate": 4.12019797500429e-06, "loss": 0.4948, "step": 8991 }, { "epoch": 0.57, "grad_norm": 1.040250301361084, "learning_rate": 4.119188008180401e-06, "loss": 0.4975, "step": 8992 }, { "epoch": 0.57, "grad_norm": 1.0362321138381958, "learning_rate": 4.118178078444442e-06, "loss": 0.5254, "step": 8993 }, { "epoch": 0.57, "grad_norm": 1.110710859298706, "learning_rate": 4.117168185838936e-06, "loss": 0.5201, "step": 8994 }, { "epoch": 0.57, "grad_norm": 1.0704374313354492, "learning_rate": 4.1161583304064055e-06, "loss": 0.4986, "step": 8995 }, { "epoch": 0.57, "grad_norm": 1.2568418979644775, "learning_rate": 4.115148512189374e-06, "loss": 0.5502, "step": 8996 }, { "epoch": 0.57, "grad_norm": 1.0175095796585083, "learning_rate": 4.114138731230362e-06, "loss": 0.5464, "step": 8997 }, { "epoch": 0.57, "grad_norm": 1.0222601890563965, "learning_rate": 4.113128987571885e-06, "loss": 0.5629, "step": 8998 }, { "epoch": 0.57, "grad_norm": 1.0167863368988037, "learning_rate": 4.1121192812564595e-06, "loss": 0.4754, "step": 8999 }, { "epoch": 0.57, "grad_norm": 1.1251527070999146, "learning_rate": 4.111109612326603e-06, "loss": 0.5546, "step": 9000 }, { "epoch": 0.57, "grad_norm": 1.0255568027496338, "learning_rate": 4.110099980824831e-06, "loss": 0.5204, "step": 9001 }, { "epoch": 0.57, "grad_norm": 0.967444658279419, "learning_rate": 4.109090386793652e-06, "loss": 0.4901, "step": 9002 }, { "epoch": 0.57, "grad_norm": 0.9833146333694458, "learning_rate": 4.108080830275576e-06, "loss": 0.5335, "step": 9003 }, { "epoch": 0.57, "grad_norm": 1.0646792650222778, "learning_rate": 4.107071311313113e-06, "loss": 0.501, "step": 9004 }, { "epoch": 0.57, "grad_norm": 1.0886857509613037, "learning_rate": 4.106061829948773e-06, "loss": 0.5414, "step": 9005 }, { "epoch": 0.57, "grad_norm": 1.0678012371063232, "learning_rate": 4.10505238622506e-06, "loss": 0.5214, "step": 9006 }, { "epoch": 0.57, "grad_norm": 1.06692373752594, "learning_rate": 4.104042980184476e-06, "loss": 0.6, "step": 9007 }, { "epoch": 0.57, "grad_norm": 0.9910625219345093, "learning_rate": 4.103033611869525e-06, "loss": 0.4904, "step": 9008 }, { "epoch": 0.57, "grad_norm": 0.9979705214500427, "learning_rate": 4.1020242813227096e-06, "loss": 0.5246, "step": 9009 }, { "epoch": 0.57, "grad_norm": 1.0530836582183838, "learning_rate": 4.101014988586528e-06, "loss": 0.5236, "step": 9010 }, { "epoch": 0.57, "grad_norm": 0.9468037486076355, "learning_rate": 4.100005733703477e-06, "loss": 0.4779, "step": 9011 }, { "epoch": 0.57, "grad_norm": 1.0236084461212158, "learning_rate": 4.0989965167160526e-06, "loss": 0.5306, "step": 9012 }, { "epoch": 0.57, "grad_norm": 0.9787879586219788, "learning_rate": 4.097987337666753e-06, "loss": 0.5132, "step": 9013 }, { "epoch": 0.57, "grad_norm": 0.9772061109542847, "learning_rate": 4.096978196598068e-06, "loss": 0.5285, "step": 9014 }, { "epoch": 0.57, "grad_norm": 1.0517624616622925, "learning_rate": 4.09596909355249e-06, "loss": 0.5299, "step": 9015 }, { "epoch": 0.57, "grad_norm": 0.9787384867668152, "learning_rate": 4.094960028572506e-06, "loss": 0.5308, "step": 9016 }, { "epoch": 0.57, "grad_norm": 1.0707100629806519, "learning_rate": 4.0939510017006095e-06, "loss": 0.4623, "step": 9017 }, { "epoch": 0.57, "grad_norm": 0.9557934999465942, "learning_rate": 4.092942012979285e-06, "loss": 0.4952, "step": 9018 }, { "epoch": 0.57, "grad_norm": 1.0198768377304077, "learning_rate": 4.091933062451015e-06, "loss": 0.5118, "step": 9019 }, { "epoch": 0.57, "grad_norm": 1.0764273405075073, "learning_rate": 4.0909241501582865e-06, "loss": 0.5339, "step": 9020 }, { "epoch": 0.57, "grad_norm": 1.1788642406463623, "learning_rate": 4.089915276143577e-06, "loss": 0.5636, "step": 9021 }, { "epoch": 0.57, "grad_norm": 1.1062636375427246, "learning_rate": 4.088906440449371e-06, "loss": 0.4871, "step": 9022 }, { "epoch": 0.57, "grad_norm": 1.003823161125183, "learning_rate": 4.087897643118145e-06, "loss": 0.5081, "step": 9023 }, { "epoch": 0.57, "grad_norm": 1.0966594219207764, "learning_rate": 4.086888884192377e-06, "loss": 0.524, "step": 9024 }, { "epoch": 0.57, "grad_norm": 1.0141334533691406, "learning_rate": 4.0858801637145395e-06, "loss": 0.4818, "step": 9025 }, { "epoch": 0.57, "grad_norm": 1.045664668083191, "learning_rate": 4.084871481727111e-06, "loss": 0.4793, "step": 9026 }, { "epoch": 0.57, "grad_norm": 1.0357179641723633, "learning_rate": 4.083862838272559e-06, "loss": 0.5286, "step": 9027 }, { "epoch": 0.57, "grad_norm": 1.0230491161346436, "learning_rate": 4.082854233393358e-06, "loss": 0.4954, "step": 9028 }, { "epoch": 0.57, "grad_norm": 1.1332944631576538, "learning_rate": 4.081845667131971e-06, "loss": 0.5341, "step": 9029 }, { "epoch": 0.57, "grad_norm": 1.1222625970840454, "learning_rate": 4.080837139530872e-06, "loss": 0.52, "step": 9030 }, { "epoch": 0.57, "grad_norm": 1.0352935791015625, "learning_rate": 4.0798286506325225e-06, "loss": 0.4609, "step": 9031 }, { "epoch": 0.57, "grad_norm": 1.044788122177124, "learning_rate": 4.078820200479389e-06, "loss": 0.5113, "step": 9032 }, { "epoch": 0.57, "grad_norm": 0.9968512058258057, "learning_rate": 4.077811789113929e-06, "loss": 0.5507, "step": 9033 }, { "epoch": 0.57, "grad_norm": 1.1189789772033691, "learning_rate": 4.076803416578608e-06, "loss": 0.5566, "step": 9034 }, { "epoch": 0.57, "grad_norm": 0.9433977007865906, "learning_rate": 4.0757950829158855e-06, "loss": 0.4937, "step": 9035 }, { "epoch": 0.57, "grad_norm": 0.9398898482322693, "learning_rate": 4.074786788168216e-06, "loss": 0.5519, "step": 9036 }, { "epoch": 0.57, "grad_norm": 1.0074583292007446, "learning_rate": 4.073778532378056e-06, "loss": 0.5391, "step": 9037 }, { "epoch": 0.57, "grad_norm": 1.0626059770584106, "learning_rate": 4.072770315587858e-06, "loss": 0.5198, "step": 9038 }, { "epoch": 0.57, "grad_norm": 1.0814262628555298, "learning_rate": 4.071762137840079e-06, "loss": 0.4846, "step": 9039 }, { "epoch": 0.57, "grad_norm": 1.0414854288101196, "learning_rate": 4.070753999177167e-06, "loss": 0.5235, "step": 9040 }, { "epoch": 0.57, "grad_norm": 0.9925733804702759, "learning_rate": 4.069745899641571e-06, "loss": 0.4721, "step": 9041 }, { "epoch": 0.57, "grad_norm": 1.0932774543762207, "learning_rate": 4.0687378392757374e-06, "loss": 0.5368, "step": 9042 }, { "epoch": 0.57, "grad_norm": 1.0027060508728027, "learning_rate": 4.0677298181221155e-06, "loss": 0.5164, "step": 9043 }, { "epoch": 0.57, "grad_norm": 1.0120025873184204, "learning_rate": 4.066721836223149e-06, "loss": 0.5285, "step": 9044 }, { "epoch": 0.57, "grad_norm": 1.1019954681396484, "learning_rate": 4.065713893621278e-06, "loss": 0.5937, "step": 9045 }, { "epoch": 0.57, "grad_norm": 1.0674412250518799, "learning_rate": 4.064705990358943e-06, "loss": 0.4762, "step": 9046 }, { "epoch": 0.57, "grad_norm": 1.0491529703140259, "learning_rate": 4.063698126478587e-06, "loss": 0.5085, "step": 9047 }, { "epoch": 0.57, "grad_norm": 1.0456452369689941, "learning_rate": 4.062690302022647e-06, "loss": 0.5022, "step": 9048 }, { "epoch": 0.57, "grad_norm": 0.9888370037078857, "learning_rate": 4.0616825170335565e-06, "loss": 0.5098, "step": 9049 }, { "epoch": 0.57, "grad_norm": 0.9898523688316345, "learning_rate": 4.060674771553751e-06, "loss": 0.5218, "step": 9050 }, { "epoch": 0.57, "grad_norm": 0.9696733951568604, "learning_rate": 4.059667065625662e-06, "loss": 0.4681, "step": 9051 }, { "epoch": 0.57, "grad_norm": 0.947935938835144, "learning_rate": 4.058659399291724e-06, "loss": 0.4732, "step": 9052 }, { "epoch": 0.57, "grad_norm": 1.0508413314819336, "learning_rate": 4.057651772594362e-06, "loss": 0.5909, "step": 9053 }, { "epoch": 0.57, "grad_norm": 0.9845364689826965, "learning_rate": 4.056644185576007e-06, "loss": 0.5609, "step": 9054 }, { "epoch": 0.57, "grad_norm": 0.9722887873649597, "learning_rate": 4.055636638279082e-06, "loss": 0.4734, "step": 9055 }, { "epoch": 0.57, "grad_norm": 1.0830838680267334, "learning_rate": 4.054629130746015e-06, "loss": 0.5326, "step": 9056 }, { "epoch": 0.57, "grad_norm": 1.0439698696136475, "learning_rate": 4.053621663019225e-06, "loss": 0.4756, "step": 9057 }, { "epoch": 0.57, "grad_norm": 0.9679062962532043, "learning_rate": 4.052614235141136e-06, "loss": 0.4911, "step": 9058 }, { "epoch": 0.57, "grad_norm": 0.990229070186615, "learning_rate": 4.051606847154164e-06, "loss": 0.4859, "step": 9059 }, { "epoch": 0.57, "grad_norm": 1.0050839185714722, "learning_rate": 4.05059949910073e-06, "loss": 0.5275, "step": 9060 }, { "epoch": 0.57, "grad_norm": 0.9835556745529175, "learning_rate": 4.049592191023247e-06, "loss": 0.4554, "step": 9061 }, { "epoch": 0.57, "grad_norm": 1.0395163297653198, "learning_rate": 4.0485849229641325e-06, "loss": 0.5225, "step": 9062 }, { "epoch": 0.57, "grad_norm": 1.0173500776290894, "learning_rate": 4.047577694965794e-06, "loss": 0.5247, "step": 9063 }, { "epoch": 0.57, "grad_norm": 1.1180040836334229, "learning_rate": 4.046570507070649e-06, "loss": 0.5739, "step": 9064 }, { "epoch": 0.57, "grad_norm": 0.9771701097488403, "learning_rate": 4.045563359321102e-06, "loss": 0.4915, "step": 9065 }, { "epoch": 0.57, "grad_norm": 1.1348835229873657, "learning_rate": 4.044556251759562e-06, "loss": 0.5332, "step": 9066 }, { "epoch": 0.57, "grad_norm": 0.9895780086517334, "learning_rate": 4.043549184428434e-06, "loss": 0.5067, "step": 9067 }, { "epoch": 0.57, "grad_norm": 1.0886191129684448, "learning_rate": 4.042542157370122e-06, "loss": 0.5575, "step": 9068 }, { "epoch": 0.57, "grad_norm": 1.0821690559387207, "learning_rate": 4.041535170627029e-06, "loss": 0.5179, "step": 9069 }, { "epoch": 0.57, "grad_norm": 1.0421892404556274, "learning_rate": 4.040528224241558e-06, "loss": 0.4974, "step": 9070 }, { "epoch": 0.57, "grad_norm": 0.916330873966217, "learning_rate": 4.039521318256104e-06, "loss": 0.4191, "step": 9071 }, { "epoch": 0.57, "grad_norm": 1.0716793537139893, "learning_rate": 4.038514452713065e-06, "loss": 0.5112, "step": 9072 }, { "epoch": 0.57, "grad_norm": 0.9565438628196716, "learning_rate": 4.037507627654838e-06, "loss": 0.48, "step": 9073 }, { "epoch": 0.57, "grad_norm": 1.0748134851455688, "learning_rate": 4.0365008431238184e-06, "loss": 0.5484, "step": 9074 }, { "epoch": 0.57, "grad_norm": 1.108292579650879, "learning_rate": 4.035494099162396e-06, "loss": 0.5484, "step": 9075 }, { "epoch": 0.58, "grad_norm": 0.9552716612815857, "learning_rate": 4.03448739581296e-06, "loss": 0.4937, "step": 9076 }, { "epoch": 0.58, "grad_norm": 1.0210188627243042, "learning_rate": 4.033480733117902e-06, "loss": 0.5145, "step": 9077 }, { "epoch": 0.58, "grad_norm": 1.0513836145401, "learning_rate": 4.032474111119609e-06, "loss": 0.5311, "step": 9078 }, { "epoch": 0.58, "grad_norm": 1.0993913412094116, "learning_rate": 4.031467529860466e-06, "loss": 0.5559, "step": 9079 }, { "epoch": 0.58, "grad_norm": 1.02354097366333, "learning_rate": 4.030460989382853e-06, "loss": 0.5015, "step": 9080 }, { "epoch": 0.58, "grad_norm": 0.9933673739433289, "learning_rate": 4.029454489729156e-06, "loss": 0.4839, "step": 9081 }, { "epoch": 0.58, "grad_norm": 1.0586044788360596, "learning_rate": 4.028448030941756e-06, "loss": 0.5142, "step": 9082 }, { "epoch": 0.58, "grad_norm": 0.9417781233787537, "learning_rate": 4.027441613063029e-06, "loss": 0.514, "step": 9083 }, { "epoch": 0.58, "grad_norm": 0.9901105165481567, "learning_rate": 4.026435236135351e-06, "loss": 0.5514, "step": 9084 }, { "epoch": 0.58, "grad_norm": 1.0686466693878174, "learning_rate": 4.025428900201098e-06, "loss": 0.5488, "step": 9085 }, { "epoch": 0.58, "grad_norm": 1.0662195682525635, "learning_rate": 4.024422605302646e-06, "loss": 0.5545, "step": 9086 }, { "epoch": 0.58, "grad_norm": 0.995975911617279, "learning_rate": 4.023416351482364e-06, "loss": 0.4709, "step": 9087 }, { "epoch": 0.58, "grad_norm": 1.0386031866073608, "learning_rate": 4.022410138782621e-06, "loss": 0.5448, "step": 9088 }, { "epoch": 0.58, "grad_norm": 0.9993204474449158, "learning_rate": 4.021403967245786e-06, "loss": 0.4724, "step": 9089 }, { "epoch": 0.58, "grad_norm": 0.9913796186447144, "learning_rate": 4.020397836914227e-06, "loss": 0.4881, "step": 9090 }, { "epoch": 0.58, "grad_norm": 1.0130491256713867, "learning_rate": 4.019391747830307e-06, "loss": 0.5029, "step": 9091 }, { "epoch": 0.58, "grad_norm": 1.1187041997909546, "learning_rate": 4.018385700036389e-06, "loss": 0.5349, "step": 9092 }, { "epoch": 0.58, "grad_norm": 1.0950320959091187, "learning_rate": 4.017379693574833e-06, "loss": 0.5018, "step": 9093 }, { "epoch": 0.58, "grad_norm": 1.059096336364746, "learning_rate": 4.016373728488002e-06, "loss": 0.5471, "step": 9094 }, { "epoch": 0.58, "grad_norm": 0.9699594974517822, "learning_rate": 4.01536780481825e-06, "loss": 0.5111, "step": 9095 }, { "epoch": 0.58, "grad_norm": 0.9474306106567383, "learning_rate": 4.014361922607936e-06, "loss": 0.5221, "step": 9096 }, { "epoch": 0.58, "grad_norm": 1.0697035789489746, "learning_rate": 4.013356081899412e-06, "loss": 0.5109, "step": 9097 }, { "epoch": 0.58, "grad_norm": 1.0884960889816284, "learning_rate": 4.0123502827350295e-06, "loss": 0.5052, "step": 9098 }, { "epoch": 0.58, "grad_norm": 1.0792391300201416, "learning_rate": 4.011344525157141e-06, "loss": 0.5616, "step": 9099 }, { "epoch": 0.58, "grad_norm": 1.062630295753479, "learning_rate": 4.010338809208098e-06, "loss": 0.5512, "step": 9100 }, { "epoch": 0.58, "grad_norm": 1.1278208494186401, "learning_rate": 4.009333134930244e-06, "loss": 0.4883, "step": 9101 }, { "epoch": 0.58, "grad_norm": 1.066338062286377, "learning_rate": 4.0083275023659236e-06, "loss": 0.5633, "step": 9102 }, { "epoch": 0.58, "grad_norm": 1.0112189054489136, "learning_rate": 4.007321911557483e-06, "loss": 0.5312, "step": 9103 }, { "epoch": 0.58, "grad_norm": 1.0003081560134888, "learning_rate": 4.0063163625472645e-06, "loss": 0.4837, "step": 9104 }, { "epoch": 0.58, "grad_norm": 0.9883372783660889, "learning_rate": 4.005310855377608e-06, "loss": 0.5329, "step": 9105 }, { "epoch": 0.58, "grad_norm": 0.9752151370048523, "learning_rate": 4.004305390090848e-06, "loss": 0.5131, "step": 9106 }, { "epoch": 0.58, "grad_norm": 1.092448353767395, "learning_rate": 4.003299966729325e-06, "loss": 0.5343, "step": 9107 }, { "epoch": 0.58, "grad_norm": 1.0620566606521606, "learning_rate": 4.002294585335375e-06, "loss": 0.4834, "step": 9108 }, { "epoch": 0.58, "grad_norm": 1.0752540826797485, "learning_rate": 4.001289245951329e-06, "loss": 0.5103, "step": 9109 }, { "epoch": 0.58, "grad_norm": 1.0744627714157104, "learning_rate": 4.000283948619517e-06, "loss": 0.5233, "step": 9110 }, { "epoch": 0.58, "grad_norm": 1.0267804861068726, "learning_rate": 3.99927869338227e-06, "loss": 0.4902, "step": 9111 }, { "epoch": 0.58, "grad_norm": 1.1059128046035767, "learning_rate": 3.998273480281919e-06, "loss": 0.524, "step": 9112 }, { "epoch": 0.58, "grad_norm": 1.12089204788208, "learning_rate": 3.997268309360785e-06, "loss": 0.5238, "step": 9113 }, { "epoch": 0.58, "grad_norm": 1.0233542919158936, "learning_rate": 3.996263180661194e-06, "loss": 0.5276, "step": 9114 }, { "epoch": 0.58, "grad_norm": 1.0219800472259521, "learning_rate": 3.995258094225468e-06, "loss": 0.4793, "step": 9115 }, { "epoch": 0.58, "grad_norm": 0.9956018328666687, "learning_rate": 3.99425305009593e-06, "loss": 0.4779, "step": 9116 }, { "epoch": 0.58, "grad_norm": 1.0090950727462769, "learning_rate": 3.993248048314897e-06, "loss": 0.5031, "step": 9117 }, { "epoch": 0.58, "grad_norm": 1.0749940872192383, "learning_rate": 3.992243088924686e-06, "loss": 0.5009, "step": 9118 }, { "epoch": 0.58, "grad_norm": 1.0072776079177856, "learning_rate": 3.991238171967612e-06, "loss": 0.5036, "step": 9119 }, { "epoch": 0.58, "grad_norm": 1.0810630321502686, "learning_rate": 3.9902332974859906e-06, "loss": 0.5413, "step": 9120 }, { "epoch": 0.58, "grad_norm": 1.0139497518539429, "learning_rate": 3.989228465522133e-06, "loss": 0.5441, "step": 9121 }, { "epoch": 0.58, "grad_norm": 1.0974375009536743, "learning_rate": 3.9882236761183476e-06, "loss": 0.5376, "step": 9122 }, { "epoch": 0.58, "grad_norm": 1.0594953298568726, "learning_rate": 3.987218929316942e-06, "loss": 0.5285, "step": 9123 }, { "epoch": 0.58, "grad_norm": 0.9792594313621521, "learning_rate": 3.986214225160226e-06, "loss": 0.4978, "step": 9124 }, { "epoch": 0.58, "grad_norm": 1.0863070487976074, "learning_rate": 3.9852095636905026e-06, "loss": 0.551, "step": 9125 }, { "epoch": 0.58, "grad_norm": 1.0734864473342896, "learning_rate": 3.984204944950073e-06, "loss": 0.5533, "step": 9126 }, { "epoch": 0.58, "grad_norm": 1.0189743041992188, "learning_rate": 3.983200368981241e-06, "loss": 0.5499, "step": 9127 }, { "epoch": 0.58, "grad_norm": 1.0332967042922974, "learning_rate": 3.982195835826302e-06, "loss": 0.507, "step": 9128 }, { "epoch": 0.58, "grad_norm": 1.0456657409667969, "learning_rate": 3.981191345527558e-06, "loss": 0.5451, "step": 9129 }, { "epoch": 0.58, "grad_norm": 1.15138578414917, "learning_rate": 3.9801868981273e-06, "loss": 0.542, "step": 9130 }, { "epoch": 0.58, "grad_norm": 0.9716564416885376, "learning_rate": 3.979182493667826e-06, "loss": 0.5107, "step": 9131 }, { "epoch": 0.58, "grad_norm": 0.9666895866394043, "learning_rate": 3.978178132191424e-06, "loss": 0.5195, "step": 9132 }, { "epoch": 0.58, "grad_norm": 1.1179858446121216, "learning_rate": 3.9771738137403885e-06, "loss": 0.5324, "step": 9133 }, { "epoch": 0.58, "grad_norm": 1.0694400072097778, "learning_rate": 3.976169538357004e-06, "loss": 0.5392, "step": 9134 }, { "epoch": 0.58, "grad_norm": 1.1132067441940308, "learning_rate": 3.97516530608356e-06, "loss": 0.5465, "step": 9135 }, { "epoch": 0.58, "grad_norm": 1.0701768398284912, "learning_rate": 3.974161116962337e-06, "loss": 0.533, "step": 9136 }, { "epoch": 0.58, "grad_norm": 0.993928849697113, "learning_rate": 3.973156971035623e-06, "loss": 0.525, "step": 9137 }, { "epoch": 0.58, "grad_norm": 1.0189783573150635, "learning_rate": 3.9721528683456966e-06, "loss": 0.5099, "step": 9138 }, { "epoch": 0.58, "grad_norm": 1.042134404182434, "learning_rate": 3.971148808934838e-06, "loss": 0.5525, "step": 9139 }, { "epoch": 0.58, "grad_norm": 1.031539797782898, "learning_rate": 3.970144792845322e-06, "loss": 0.4708, "step": 9140 }, { "epoch": 0.58, "grad_norm": 0.9681317806243896, "learning_rate": 3.9691408201194275e-06, "loss": 0.5319, "step": 9141 }, { "epoch": 0.58, "grad_norm": 0.963118314743042, "learning_rate": 3.968136890799426e-06, "loss": 0.4821, "step": 9142 }, { "epoch": 0.58, "grad_norm": 0.9749932289123535, "learning_rate": 3.967133004927592e-06, "loss": 0.5076, "step": 9143 }, { "epoch": 0.58, "grad_norm": 1.0764023065567017, "learning_rate": 3.9661291625461945e-06, "loss": 0.5544, "step": 9144 }, { "epoch": 0.58, "grad_norm": 1.0290968418121338, "learning_rate": 3.965125363697499e-06, "loss": 0.5353, "step": 9145 }, { "epoch": 0.58, "grad_norm": 0.998972475528717, "learning_rate": 3.964121608423775e-06, "loss": 0.5439, "step": 9146 }, { "epoch": 0.58, "grad_norm": 0.948601245880127, "learning_rate": 3.963117896767288e-06, "loss": 0.5401, "step": 9147 }, { "epoch": 0.58, "grad_norm": 0.9426062107086182, "learning_rate": 3.962114228770299e-06, "loss": 0.5126, "step": 9148 }, { "epoch": 0.58, "grad_norm": 0.9606030583381653, "learning_rate": 3.961110604475067e-06, "loss": 0.4788, "step": 9149 }, { "epoch": 0.58, "grad_norm": 0.9690484404563904, "learning_rate": 3.960107023923855e-06, "loss": 0.5265, "step": 9150 }, { "epoch": 0.58, "grad_norm": 1.0060521364212036, "learning_rate": 3.959103487158919e-06, "loss": 0.5165, "step": 9151 }, { "epoch": 0.58, "grad_norm": 0.9228886961936951, "learning_rate": 3.958099994222515e-06, "loss": 0.504, "step": 9152 }, { "epoch": 0.58, "grad_norm": 1.0803656578063965, "learning_rate": 3.957096545156893e-06, "loss": 0.5398, "step": 9153 }, { "epoch": 0.58, "grad_norm": 1.093693733215332, "learning_rate": 3.956093140004308e-06, "loss": 0.5192, "step": 9154 }, { "epoch": 0.58, "grad_norm": 1.07072114944458, "learning_rate": 3.955089778807012e-06, "loss": 0.5252, "step": 9155 }, { "epoch": 0.58, "grad_norm": 1.0248234272003174, "learning_rate": 3.954086461607248e-06, "loss": 0.479, "step": 9156 }, { "epoch": 0.58, "grad_norm": 0.9825848937034607, "learning_rate": 3.9530831884472655e-06, "loss": 0.4851, "step": 9157 }, { "epoch": 0.58, "grad_norm": 1.0224632024765015, "learning_rate": 3.952079959369308e-06, "loss": 0.509, "step": 9158 }, { "epoch": 0.58, "grad_norm": 1.1817082166671753, "learning_rate": 3.951076774415619e-06, "loss": 0.5363, "step": 9159 }, { "epoch": 0.58, "grad_norm": 1.055174469947815, "learning_rate": 3.950073633628436e-06, "loss": 0.5546, "step": 9160 }, { "epoch": 0.58, "grad_norm": 1.0198811292648315, "learning_rate": 3.949070537050002e-06, "loss": 0.5562, "step": 9161 }, { "epoch": 0.58, "grad_norm": 0.990945041179657, "learning_rate": 3.948067484722549e-06, "loss": 0.5661, "step": 9162 }, { "epoch": 0.58, "grad_norm": 1.0743000507354736, "learning_rate": 3.947064476688318e-06, "loss": 0.5032, "step": 9163 }, { "epoch": 0.58, "grad_norm": 1.1395080089569092, "learning_rate": 3.946061512989537e-06, "loss": 0.5668, "step": 9164 }, { "epoch": 0.58, "grad_norm": 0.9844474196434021, "learning_rate": 3.94505859366844e-06, "loss": 0.5251, "step": 9165 }, { "epoch": 0.58, "grad_norm": 1.0164319276809692, "learning_rate": 3.944055718767255e-06, "loss": 0.5247, "step": 9166 }, { "epoch": 0.58, "grad_norm": 1.0355435609817505, "learning_rate": 3.943052888328211e-06, "loss": 0.4943, "step": 9167 }, { "epoch": 0.58, "grad_norm": 1.0106382369995117, "learning_rate": 3.942050102393533e-06, "loss": 0.5792, "step": 9168 }, { "epoch": 0.58, "grad_norm": 0.9311456680297852, "learning_rate": 3.941047361005445e-06, "loss": 0.4944, "step": 9169 }, { "epoch": 0.58, "grad_norm": 1.0806012153625488, "learning_rate": 3.940044664206168e-06, "loss": 0.5499, "step": 9170 }, { "epoch": 0.58, "grad_norm": 1.1101220846176147, "learning_rate": 3.939042012037924e-06, "loss": 0.5134, "step": 9171 }, { "epoch": 0.58, "grad_norm": 1.014001488685608, "learning_rate": 3.938039404542929e-06, "loss": 0.4938, "step": 9172 }, { "epoch": 0.58, "grad_norm": 1.0506998300552368, "learning_rate": 3.937036841763401e-06, "loss": 0.5107, "step": 9173 }, { "epoch": 0.58, "grad_norm": 0.9829044342041016, "learning_rate": 3.936034323741555e-06, "loss": 0.5064, "step": 9174 }, { "epoch": 0.58, "grad_norm": 1.094900131225586, "learning_rate": 3.935031850519599e-06, "loss": 0.5056, "step": 9175 }, { "epoch": 0.58, "grad_norm": 1.1232244968414307, "learning_rate": 3.934029422139749e-06, "loss": 0.5032, "step": 9176 }, { "epoch": 0.58, "grad_norm": 1.076108694076538, "learning_rate": 3.933027038644213e-06, "loss": 0.4857, "step": 9177 }, { "epoch": 0.58, "grad_norm": 1.0531495809555054, "learning_rate": 3.932024700075196e-06, "loss": 0.5043, "step": 9178 }, { "epoch": 0.58, "grad_norm": 0.9696038961410522, "learning_rate": 3.931022406474902e-06, "loss": 0.5041, "step": 9179 }, { "epoch": 0.58, "grad_norm": 1.0817495584487915, "learning_rate": 3.930020157885537e-06, "loss": 0.5031, "step": 9180 }, { "epoch": 0.58, "grad_norm": 1.058228850364685, "learning_rate": 3.929017954349301e-06, "loss": 0.5281, "step": 9181 }, { "epoch": 0.58, "grad_norm": 1.0713286399841309, "learning_rate": 3.928015795908394e-06, "loss": 0.5015, "step": 9182 }, { "epoch": 0.58, "grad_norm": 1.0175244808197021, "learning_rate": 3.927013682605011e-06, "loss": 0.4855, "step": 9183 }, { "epoch": 0.58, "grad_norm": 1.0039374828338623, "learning_rate": 3.9260116144813495e-06, "loss": 0.5395, "step": 9184 }, { "epoch": 0.58, "grad_norm": 0.9803718328475952, "learning_rate": 3.925009591579604e-06, "loss": 0.4945, "step": 9185 }, { "epoch": 0.58, "grad_norm": 1.0349849462509155, "learning_rate": 3.9240076139419655e-06, "loss": 0.4929, "step": 9186 }, { "epoch": 0.58, "grad_norm": 0.9854714274406433, "learning_rate": 3.92300568161062e-06, "loss": 0.529, "step": 9187 }, { "epoch": 0.58, "grad_norm": 1.0709526538848877, "learning_rate": 3.9220037946277606e-06, "loss": 0.515, "step": 9188 }, { "epoch": 0.58, "grad_norm": 0.9761338829994202, "learning_rate": 3.921001953035573e-06, "loss": 0.4962, "step": 9189 }, { "epoch": 0.58, "grad_norm": 1.0243232250213623, "learning_rate": 3.920000156876238e-06, "loss": 0.5189, "step": 9190 }, { "epoch": 0.58, "grad_norm": 1.0289701223373413, "learning_rate": 3.91899840619194e-06, "loss": 0.4971, "step": 9191 }, { "epoch": 0.58, "grad_norm": 1.1676281690597534, "learning_rate": 3.9179967010248556e-06, "loss": 0.5659, "step": 9192 }, { "epoch": 0.58, "grad_norm": 1.0041871070861816, "learning_rate": 3.91699504141717e-06, "loss": 0.4873, "step": 9193 }, { "epoch": 0.58, "grad_norm": 1.0443731546401978, "learning_rate": 3.915993427411054e-06, "loss": 0.5586, "step": 9194 }, { "epoch": 0.58, "grad_norm": 1.0109294652938843, "learning_rate": 3.914991859048684e-06, "loss": 0.5058, "step": 9195 }, { "epoch": 0.58, "grad_norm": 1.0021347999572754, "learning_rate": 3.913990336372231e-06, "loss": 0.4987, "step": 9196 }, { "epoch": 0.58, "grad_norm": 1.090026617050171, "learning_rate": 3.912988859423869e-06, "loss": 0.5192, "step": 9197 }, { "epoch": 0.58, "grad_norm": 1.079319715499878, "learning_rate": 3.911987428245765e-06, "loss": 0.5062, "step": 9198 }, { "epoch": 0.58, "grad_norm": 1.0323288440704346, "learning_rate": 3.9109860428800845e-06, "loss": 0.5075, "step": 9199 }, { "epoch": 0.58, "grad_norm": 0.9536668658256531, "learning_rate": 3.909984703368992e-06, "loss": 0.4905, "step": 9200 }, { "epoch": 0.58, "grad_norm": 1.0101124048233032, "learning_rate": 3.9089834097546534e-06, "loss": 0.5074, "step": 9201 }, { "epoch": 0.58, "grad_norm": 1.1430697441101074, "learning_rate": 3.907982162079229e-06, "loss": 0.5154, "step": 9202 }, { "epoch": 0.58, "grad_norm": 1.0381815433502197, "learning_rate": 3.906980960384875e-06, "loss": 0.5142, "step": 9203 }, { "epoch": 0.58, "grad_norm": 1.0313682556152344, "learning_rate": 3.90597980471375e-06, "loss": 0.49, "step": 9204 }, { "epoch": 0.58, "grad_norm": 1.1362711191177368, "learning_rate": 3.904978695108011e-06, "loss": 0.5485, "step": 9205 }, { "epoch": 0.58, "grad_norm": 1.0760146379470825, "learning_rate": 3.9039776316098104e-06, "loss": 0.5211, "step": 9206 }, { "epoch": 0.58, "grad_norm": 1.040243148803711, "learning_rate": 3.902976614261298e-06, "loss": 0.4711, "step": 9207 }, { "epoch": 0.58, "grad_norm": 1.0480488538742065, "learning_rate": 3.901975643104625e-06, "loss": 0.5126, "step": 9208 }, { "epoch": 0.58, "grad_norm": 0.9665186405181885, "learning_rate": 3.9009747181819355e-06, "loss": 0.5173, "step": 9209 }, { "epoch": 0.58, "grad_norm": 1.1100220680236816, "learning_rate": 3.8999738395353795e-06, "loss": 0.5552, "step": 9210 }, { "epoch": 0.58, "grad_norm": 1.0339200496673584, "learning_rate": 3.898973007207097e-06, "loss": 0.5203, "step": 9211 }, { "epoch": 0.58, "grad_norm": 1.0091273784637451, "learning_rate": 3.897972221239233e-06, "loss": 0.5636, "step": 9212 }, { "epoch": 0.58, "grad_norm": 0.9974884390830994, "learning_rate": 3.896971481673923e-06, "loss": 0.53, "step": 9213 }, { "epoch": 0.58, "grad_norm": 1.0038785934448242, "learning_rate": 3.895970788553308e-06, "loss": 0.4895, "step": 9214 }, { "epoch": 0.58, "grad_norm": 1.0148288011550903, "learning_rate": 3.894970141919522e-06, "loss": 0.5236, "step": 9215 }, { "epoch": 0.58, "grad_norm": 1.0612456798553467, "learning_rate": 3.8939695418147e-06, "loss": 0.5206, "step": 9216 }, { "epoch": 0.58, "grad_norm": 1.0391217470169067, "learning_rate": 3.892968988280971e-06, "loss": 0.5306, "step": 9217 }, { "epoch": 0.58, "grad_norm": 0.9972529411315918, "learning_rate": 3.891968481360469e-06, "loss": 0.5359, "step": 9218 }, { "epoch": 0.58, "grad_norm": 1.0199835300445557, "learning_rate": 3.890968021095318e-06, "loss": 0.5045, "step": 9219 }, { "epoch": 0.58, "grad_norm": 1.0816878080368042, "learning_rate": 3.889967607527648e-06, "loss": 0.5284, "step": 9220 }, { "epoch": 0.58, "grad_norm": 0.9663817286491394, "learning_rate": 3.888967240699578e-06, "loss": 0.5057, "step": 9221 }, { "epoch": 0.58, "grad_norm": 1.0129668712615967, "learning_rate": 3.887966920653234e-06, "loss": 0.5088, "step": 9222 }, { "epoch": 0.58, "grad_norm": 1.0308432579040527, "learning_rate": 3.886966647430733e-06, "loss": 0.4421, "step": 9223 }, { "epoch": 0.58, "grad_norm": 0.9944412112236023, "learning_rate": 3.8859664210741965e-06, "loss": 0.4675, "step": 9224 }, { "epoch": 0.58, "grad_norm": 1.1368075609207153, "learning_rate": 3.884966241625737e-06, "loss": 0.5579, "step": 9225 }, { "epoch": 0.58, "grad_norm": 1.0805929899215698, "learning_rate": 3.88396610912747e-06, "loss": 0.5132, "step": 9226 }, { "epoch": 0.58, "grad_norm": 0.9060555696487427, "learning_rate": 3.882966023621509e-06, "loss": 0.4862, "step": 9227 }, { "epoch": 0.58, "grad_norm": 0.9819164276123047, "learning_rate": 3.881965985149962e-06, "loss": 0.502, "step": 9228 }, { "epoch": 0.58, "grad_norm": 1.055558204650879, "learning_rate": 3.880965993754939e-06, "loss": 0.5513, "step": 9229 }, { "epoch": 0.58, "grad_norm": 1.0333936214447021, "learning_rate": 3.879966049478544e-06, "loss": 0.486, "step": 9230 }, { "epoch": 0.58, "grad_norm": 1.0895931720733643, "learning_rate": 3.878966152362882e-06, "loss": 0.5341, "step": 9231 }, { "epoch": 0.58, "grad_norm": 1.1182314157485962, "learning_rate": 3.877966302450057e-06, "loss": 0.5111, "step": 9232 }, { "epoch": 0.58, "grad_norm": 0.9638293981552124, "learning_rate": 3.876966499782168e-06, "loss": 0.4846, "step": 9233 }, { "epoch": 0.59, "grad_norm": 1.008067011833191, "learning_rate": 3.875966744401311e-06, "loss": 0.5494, "step": 9234 }, { "epoch": 0.59, "grad_norm": 0.9954712390899658, "learning_rate": 3.874967036349585e-06, "loss": 0.5261, "step": 9235 }, { "epoch": 0.59, "grad_norm": 0.9387092590332031, "learning_rate": 3.8739673756690845e-06, "loss": 0.4703, "step": 9236 }, { "epoch": 0.59, "grad_norm": 1.0514830350875854, "learning_rate": 3.872967762401899e-06, "loss": 0.4944, "step": 9237 }, { "epoch": 0.59, "grad_norm": 1.0541136264801025, "learning_rate": 3.8719681965901225e-06, "loss": 0.5415, "step": 9238 }, { "epoch": 0.59, "grad_norm": 1.0214393138885498, "learning_rate": 3.870968678275838e-06, "loss": 0.559, "step": 9239 }, { "epoch": 0.59, "grad_norm": 1.0299464464187622, "learning_rate": 3.869969207501138e-06, "loss": 0.547, "step": 9240 }, { "epoch": 0.59, "grad_norm": 0.9679032564163208, "learning_rate": 3.868969784308101e-06, "loss": 0.5373, "step": 9241 }, { "epoch": 0.59, "grad_norm": 1.0148398876190186, "learning_rate": 3.867970408738814e-06, "loss": 0.5005, "step": 9242 }, { "epoch": 0.59, "grad_norm": 1.0126168727874756, "learning_rate": 3.866971080835352e-06, "loss": 0.528, "step": 9243 }, { "epoch": 0.59, "grad_norm": 1.1298620700836182, "learning_rate": 3.8659718006398e-06, "loss": 0.5561, "step": 9244 }, { "epoch": 0.59, "grad_norm": 1.0348117351531982, "learning_rate": 3.864972568194227e-06, "loss": 0.5081, "step": 9245 }, { "epoch": 0.59, "grad_norm": 1.0190778970718384, "learning_rate": 3.863973383540714e-06, "loss": 0.5419, "step": 9246 }, { "epoch": 0.59, "grad_norm": 0.9507538080215454, "learning_rate": 3.8629742467213266e-06, "loss": 0.4728, "step": 9247 }, { "epoch": 0.59, "grad_norm": 1.019291639328003, "learning_rate": 3.86197515777814e-06, "loss": 0.4777, "step": 9248 }, { "epoch": 0.59, "grad_norm": 1.1635910272598267, "learning_rate": 3.860976116753221e-06, "loss": 0.5153, "step": 9249 }, { "epoch": 0.59, "grad_norm": 1.0270839929580688, "learning_rate": 3.859977123688636e-06, "loss": 0.5348, "step": 9250 }, { "epoch": 0.59, "grad_norm": 1.003929615020752, "learning_rate": 3.858978178626446e-06, "loss": 0.5224, "step": 9251 }, { "epoch": 0.59, "grad_norm": 1.0090539455413818, "learning_rate": 3.8579792816087175e-06, "loss": 0.4824, "step": 9252 }, { "epoch": 0.59, "grad_norm": 1.0115137100219727, "learning_rate": 3.856980432677508e-06, "loss": 0.5075, "step": 9253 }, { "epoch": 0.59, "grad_norm": 1.0627245903015137, "learning_rate": 3.855981631874877e-06, "loss": 0.5505, "step": 9254 }, { "epoch": 0.59, "grad_norm": 1.0712007284164429, "learning_rate": 3.85498287924288e-06, "loss": 0.5619, "step": 9255 }, { "epoch": 0.59, "grad_norm": 0.9710827469825745, "learning_rate": 3.853984174823568e-06, "loss": 0.5019, "step": 9256 }, { "epoch": 0.59, "grad_norm": 1.1037195920944214, "learning_rate": 3.852985518658997e-06, "loss": 0.5162, "step": 9257 }, { "epoch": 0.59, "grad_norm": 1.0953142642974854, "learning_rate": 3.851986910791217e-06, "loss": 0.5105, "step": 9258 }, { "epoch": 0.59, "grad_norm": 1.0179473161697388, "learning_rate": 3.850988351262274e-06, "loss": 0.4858, "step": 9259 }, { "epoch": 0.59, "grad_norm": 1.0427842140197754, "learning_rate": 3.849989840114213e-06, "loss": 0.5628, "step": 9260 }, { "epoch": 0.59, "grad_norm": 1.0496574640274048, "learning_rate": 3.84899137738908e-06, "loss": 0.5172, "step": 9261 }, { "epoch": 0.59, "grad_norm": 0.9854255318641663, "learning_rate": 3.847992963128917e-06, "loss": 0.5073, "step": 9262 }, { "epoch": 0.59, "grad_norm": 0.9890566468238831, "learning_rate": 3.846994597375763e-06, "loss": 0.4825, "step": 9263 }, { "epoch": 0.59, "grad_norm": 1.06586754322052, "learning_rate": 3.845996280171653e-06, "loss": 0.5554, "step": 9264 }, { "epoch": 0.59, "grad_norm": 1.0632151365280151, "learning_rate": 3.844998011558626e-06, "loss": 0.5553, "step": 9265 }, { "epoch": 0.59, "grad_norm": 1.0093785524368286, "learning_rate": 3.843999791578716e-06, "loss": 0.4919, "step": 9266 }, { "epoch": 0.59, "grad_norm": 1.0256174802780151, "learning_rate": 3.843001620273954e-06, "loss": 0.5176, "step": 9267 }, { "epoch": 0.59, "grad_norm": 1.087141990661621, "learning_rate": 3.842003497686367e-06, "loss": 0.5095, "step": 9268 }, { "epoch": 0.59, "grad_norm": 0.9762932062149048, "learning_rate": 3.841005423857984e-06, "loss": 0.4938, "step": 9269 }, { "epoch": 0.59, "grad_norm": 1.0480889081954956, "learning_rate": 3.840007398830833e-06, "loss": 0.5023, "step": 9270 }, { "epoch": 0.59, "grad_norm": 0.9900689125061035, "learning_rate": 3.839009422646935e-06, "loss": 0.5756, "step": 9271 }, { "epoch": 0.59, "grad_norm": 1.013576865196228, "learning_rate": 3.8380114953483095e-06, "loss": 0.5436, "step": 9272 }, { "epoch": 0.59, "grad_norm": 0.955690860748291, "learning_rate": 3.837013616976977e-06, "loss": 0.4558, "step": 9273 }, { "epoch": 0.59, "grad_norm": 1.0240601301193237, "learning_rate": 3.8360157875749575e-06, "loss": 0.5591, "step": 9274 }, { "epoch": 0.59, "grad_norm": 0.9818782806396484, "learning_rate": 3.835018007184265e-06, "loss": 0.5416, "step": 9275 }, { "epoch": 0.59, "grad_norm": 0.9765317440032959, "learning_rate": 3.834020275846909e-06, "loss": 0.4923, "step": 9276 }, { "epoch": 0.59, "grad_norm": 1.0090014934539795, "learning_rate": 3.833022593604902e-06, "loss": 0.5279, "step": 9277 }, { "epoch": 0.59, "grad_norm": 0.9756602048873901, "learning_rate": 3.832024960500257e-06, "loss": 0.4579, "step": 9278 }, { "epoch": 0.59, "grad_norm": 1.0843050479888916, "learning_rate": 3.8310273765749774e-06, "loss": 0.5439, "step": 9279 }, { "epoch": 0.59, "grad_norm": 1.0157897472381592, "learning_rate": 3.830029841871067e-06, "loss": 0.5275, "step": 9280 }, { "epoch": 0.59, "grad_norm": 1.0507923364639282, "learning_rate": 3.82903235643053e-06, "loss": 0.5352, "step": 9281 }, { "epoch": 0.59, "grad_norm": 1.057509183883667, "learning_rate": 3.828034920295368e-06, "loss": 0.5214, "step": 9282 }, { "epoch": 0.59, "grad_norm": 1.1005525588989258, "learning_rate": 3.827037533507579e-06, "loss": 0.5365, "step": 9283 }, { "epoch": 0.59, "grad_norm": 1.0654922723770142, "learning_rate": 3.826040196109158e-06, "loss": 0.5105, "step": 9284 }, { "epoch": 0.59, "grad_norm": 1.0815964937210083, "learning_rate": 3.825042908142102e-06, "loss": 0.5384, "step": 9285 }, { "epoch": 0.59, "grad_norm": 0.967993438243866, "learning_rate": 3.824045669648398e-06, "loss": 0.5078, "step": 9286 }, { "epoch": 0.59, "grad_norm": 1.002392292022705, "learning_rate": 3.823048480670044e-06, "loss": 0.5284, "step": 9287 }, { "epoch": 0.59, "grad_norm": 1.01754629611969, "learning_rate": 3.8220513412490215e-06, "loss": 0.5216, "step": 9288 }, { "epoch": 0.59, "grad_norm": 1.083465337753296, "learning_rate": 3.821054251427321e-06, "loss": 0.5552, "step": 9289 }, { "epoch": 0.59, "grad_norm": 0.9693893790245056, "learning_rate": 3.820057211246923e-06, "loss": 0.4693, "step": 9290 }, { "epoch": 0.59, "grad_norm": 1.2156462669372559, "learning_rate": 3.819060220749813e-06, "loss": 0.5517, "step": 9291 }, { "epoch": 0.59, "grad_norm": 1.1351375579833984, "learning_rate": 3.8180632799779675e-06, "loss": 0.4979, "step": 9292 }, { "epoch": 0.59, "grad_norm": 1.1142170429229736, "learning_rate": 3.817066388973367e-06, "loss": 0.4765, "step": 9293 }, { "epoch": 0.59, "grad_norm": 1.0561952590942383, "learning_rate": 3.816069547777983e-06, "loss": 0.5332, "step": 9294 }, { "epoch": 0.59, "grad_norm": 1.0577905178070068, "learning_rate": 3.815072756433794e-06, "loss": 0.5367, "step": 9295 }, { "epoch": 0.59, "grad_norm": 1.036191701889038, "learning_rate": 3.814076014982769e-06, "loss": 0.5153, "step": 9296 }, { "epoch": 0.59, "grad_norm": 1.082903504371643, "learning_rate": 3.8130793234668782e-06, "loss": 0.5365, "step": 9297 }, { "epoch": 0.59, "grad_norm": 1.0335683822631836, "learning_rate": 3.812082681928086e-06, "loss": 0.5667, "step": 9298 }, { "epoch": 0.59, "grad_norm": 0.9932186603546143, "learning_rate": 3.81108609040836e-06, "loss": 0.5059, "step": 9299 }, { "epoch": 0.59, "grad_norm": 1.030775547027588, "learning_rate": 3.810089548949665e-06, "loss": 0.5228, "step": 9300 }, { "epoch": 0.59, "grad_norm": 1.0719740390777588, "learning_rate": 3.8090930575939588e-06, "loss": 0.4898, "step": 9301 }, { "epoch": 0.59, "grad_norm": 1.2146013975143433, "learning_rate": 3.8080966163832e-06, "loss": 0.5341, "step": 9302 }, { "epoch": 0.59, "grad_norm": 1.1133536100387573, "learning_rate": 3.807100225359346e-06, "loss": 0.5073, "step": 9303 }, { "epoch": 0.59, "grad_norm": 1.131595253944397, "learning_rate": 3.8061038845643535e-06, "loss": 0.5494, "step": 9304 }, { "epoch": 0.59, "grad_norm": 1.0334343910217285, "learning_rate": 3.8051075940401727e-06, "loss": 0.5115, "step": 9305 }, { "epoch": 0.59, "grad_norm": 1.0557368993759155, "learning_rate": 3.8041113538287537e-06, "loss": 0.5398, "step": 9306 }, { "epoch": 0.59, "grad_norm": 1.090477705001831, "learning_rate": 3.803115163972044e-06, "loss": 0.5244, "step": 9307 }, { "epoch": 0.59, "grad_norm": 1.0973477363586426, "learning_rate": 3.8021190245119937e-06, "loss": 0.5909, "step": 9308 }, { "epoch": 0.59, "grad_norm": 1.1514519453048706, "learning_rate": 3.8011229354905445e-06, "loss": 0.5593, "step": 9309 }, { "epoch": 0.59, "grad_norm": 1.0729470252990723, "learning_rate": 3.8001268969496357e-06, "loss": 0.5221, "step": 9310 }, { "epoch": 0.59, "grad_norm": 1.132446527481079, "learning_rate": 3.799130908931209e-06, "loss": 0.5498, "step": 9311 }, { "epoch": 0.59, "grad_norm": 1.0245213508605957, "learning_rate": 3.7981349714772044e-06, "loss": 0.496, "step": 9312 }, { "epoch": 0.59, "grad_norm": 1.035501480102539, "learning_rate": 3.7971390846295546e-06, "loss": 0.568, "step": 9313 }, { "epoch": 0.59, "grad_norm": 1.1167900562286377, "learning_rate": 3.7961432484301925e-06, "loss": 0.5439, "step": 9314 }, { "epoch": 0.59, "grad_norm": 1.0059419870376587, "learning_rate": 3.7951474629210517e-06, "loss": 0.4986, "step": 9315 }, { "epoch": 0.59, "grad_norm": 1.0720117092132568, "learning_rate": 3.7941517281440577e-06, "loss": 0.4818, "step": 9316 }, { "epoch": 0.59, "grad_norm": 1.0264700651168823, "learning_rate": 3.7931560441411413e-06, "loss": 0.52, "step": 9317 }, { "epoch": 0.59, "grad_norm": 0.9719229340553284, "learning_rate": 3.792160410954225e-06, "loss": 0.5183, "step": 9318 }, { "epoch": 0.59, "grad_norm": 0.9971143007278442, "learning_rate": 3.791164828625233e-06, "loss": 0.5075, "step": 9319 }, { "epoch": 0.59, "grad_norm": 0.9525416493415833, "learning_rate": 3.7901692971960823e-06, "loss": 0.4485, "step": 9320 }, { "epoch": 0.59, "grad_norm": 1.0481828451156616, "learning_rate": 3.7891738167086968e-06, "loss": 0.4993, "step": 9321 }, { "epoch": 0.59, "grad_norm": 1.0023530721664429, "learning_rate": 3.7881783872049875e-06, "loss": 0.531, "step": 9322 }, { "epoch": 0.59, "grad_norm": 1.02839994430542, "learning_rate": 3.7871830087268726e-06, "loss": 0.4787, "step": 9323 }, { "epoch": 0.59, "grad_norm": 1.1028426885604858, "learning_rate": 3.7861876813162596e-06, "loss": 0.5187, "step": 9324 }, { "epoch": 0.59, "grad_norm": 1.1042375564575195, "learning_rate": 3.7851924050150633e-06, "loss": 0.5468, "step": 9325 }, { "epoch": 0.59, "grad_norm": 1.0218197107315063, "learning_rate": 3.7841971798651876e-06, "loss": 0.4854, "step": 9326 }, { "epoch": 0.59, "grad_norm": 1.0304937362670898, "learning_rate": 3.78320200590854e-06, "loss": 0.5644, "step": 9327 }, { "epoch": 0.59, "grad_norm": 0.9608798027038574, "learning_rate": 3.782206883187021e-06, "loss": 0.5526, "step": 9328 }, { "epoch": 0.59, "grad_norm": 1.1082509756088257, "learning_rate": 3.7812118117425363e-06, "loss": 0.4879, "step": 9329 }, { "epoch": 0.59, "grad_norm": 0.9273836612701416, "learning_rate": 3.7802167916169808e-06, "loss": 0.535, "step": 9330 }, { "epoch": 0.59, "grad_norm": 1.0372880697250366, "learning_rate": 3.7792218228522536e-06, "loss": 0.5635, "step": 9331 }, { "epoch": 0.59, "grad_norm": 1.0150055885314941, "learning_rate": 3.7782269054902493e-06, "loss": 0.5236, "step": 9332 }, { "epoch": 0.59, "grad_norm": 0.9948717355728149, "learning_rate": 3.777232039572858e-06, "loss": 0.5182, "step": 9333 }, { "epoch": 0.59, "grad_norm": 1.0627039670944214, "learning_rate": 3.7762372251419722e-06, "loss": 0.5691, "step": 9334 }, { "epoch": 0.59, "grad_norm": 0.9712206721305847, "learning_rate": 3.7752424622394807e-06, "loss": 0.4835, "step": 9335 }, { "epoch": 0.59, "grad_norm": 1.0412235260009766, "learning_rate": 3.7742477509072684e-06, "loss": 0.4699, "step": 9336 }, { "epoch": 0.59, "grad_norm": 1.0442676544189453, "learning_rate": 3.7732530911872177e-06, "loss": 0.5041, "step": 9337 }, { "epoch": 0.59, "grad_norm": 1.0916869640350342, "learning_rate": 3.7722584831212127e-06, "loss": 0.5264, "step": 9338 }, { "epoch": 0.59, "grad_norm": 1.066199541091919, "learning_rate": 3.771263926751133e-06, "loss": 0.5292, "step": 9339 }, { "epoch": 0.59, "grad_norm": 1.045325756072998, "learning_rate": 3.7702694221188548e-06, "loss": 0.5335, "step": 9340 }, { "epoch": 0.59, "grad_norm": 1.0770453214645386, "learning_rate": 3.769274969266251e-06, "loss": 0.5102, "step": 9341 }, { "epoch": 0.59, "grad_norm": 1.151835560798645, "learning_rate": 3.768280568235198e-06, "loss": 0.5352, "step": 9342 }, { "epoch": 0.59, "grad_norm": 0.9812878966331482, "learning_rate": 3.767286219067566e-06, "loss": 0.5264, "step": 9343 }, { "epoch": 0.59, "grad_norm": 1.0495941638946533, "learning_rate": 3.766291921805224e-06, "loss": 0.5127, "step": 9344 }, { "epoch": 0.59, "grad_norm": 1.004805088043213, "learning_rate": 3.765297676490035e-06, "loss": 0.4805, "step": 9345 }, { "epoch": 0.59, "grad_norm": 1.0435104370117188, "learning_rate": 3.764303483163867e-06, "loss": 0.5383, "step": 9346 }, { "epoch": 0.59, "grad_norm": 1.1722232103347778, "learning_rate": 3.7633093418685806e-06, "loss": 0.4849, "step": 9347 }, { "epoch": 0.59, "grad_norm": 1.0799705982208252, "learning_rate": 3.7623152526460365e-06, "loss": 0.5175, "step": 9348 }, { "epoch": 0.59, "grad_norm": 1.1457706689834595, "learning_rate": 3.7613212155380907e-06, "loss": 0.5338, "step": 9349 }, { "epoch": 0.59, "grad_norm": 1.0496329069137573, "learning_rate": 3.760327230586598e-06, "loss": 0.4957, "step": 9350 }, { "epoch": 0.59, "grad_norm": 1.058712124824524, "learning_rate": 3.7593332978334153e-06, "loss": 0.51, "step": 9351 }, { "epoch": 0.59, "grad_norm": 1.0165393352508545, "learning_rate": 3.7583394173203913e-06, "loss": 0.5161, "step": 9352 }, { "epoch": 0.59, "grad_norm": 1.0252450704574585, "learning_rate": 3.757345589089374e-06, "loss": 0.5228, "step": 9353 }, { "epoch": 0.59, "grad_norm": 1.0472545623779297, "learning_rate": 3.75635181318221e-06, "loss": 0.5508, "step": 9354 }, { "epoch": 0.59, "grad_norm": 1.1078425645828247, "learning_rate": 3.755358089640747e-06, "loss": 0.5382, "step": 9355 }, { "epoch": 0.59, "grad_norm": 1.1093820333480835, "learning_rate": 3.754364418506825e-06, "loss": 0.5056, "step": 9356 }, { "epoch": 0.59, "grad_norm": 0.99863600730896, "learning_rate": 3.7533707998222835e-06, "loss": 0.5549, "step": 9357 }, { "epoch": 0.59, "grad_norm": 1.0610816478729248, "learning_rate": 3.7523772336289594e-06, "loss": 0.5259, "step": 9358 }, { "epoch": 0.59, "grad_norm": 0.961898684501648, "learning_rate": 3.751383719968692e-06, "loss": 0.4806, "step": 9359 }, { "epoch": 0.59, "grad_norm": 1.0301170349121094, "learning_rate": 3.7503902588833124e-06, "loss": 0.5118, "step": 9360 }, { "epoch": 0.59, "grad_norm": 1.0162115097045898, "learning_rate": 3.7493968504146513e-06, "loss": 0.4647, "step": 9361 }, { "epoch": 0.59, "grad_norm": 1.0619779825210571, "learning_rate": 3.748403494604539e-06, "loss": 0.5069, "step": 9362 }, { "epoch": 0.59, "grad_norm": 1.0400292873382568, "learning_rate": 3.747410191494799e-06, "loss": 0.5548, "step": 9363 }, { "epoch": 0.59, "grad_norm": 1.1837493181228638, "learning_rate": 3.74641694112726e-06, "loss": 0.4983, "step": 9364 }, { "epoch": 0.59, "grad_norm": 1.074021816253662, "learning_rate": 3.745423743543744e-06, "loss": 0.4983, "step": 9365 }, { "epoch": 0.59, "grad_norm": 1.034995675086975, "learning_rate": 3.7444305987860698e-06, "loss": 0.5463, "step": 9366 }, { "epoch": 0.59, "grad_norm": 1.1181045770645142, "learning_rate": 3.7434375068960528e-06, "loss": 0.5069, "step": 9367 }, { "epoch": 0.59, "grad_norm": 1.0152443647384644, "learning_rate": 3.7424444679155126e-06, "loss": 0.5617, "step": 9368 }, { "epoch": 0.59, "grad_norm": 1.0449225902557373, "learning_rate": 3.7414514818862613e-06, "loss": 0.5603, "step": 9369 }, { "epoch": 0.59, "grad_norm": 1.0839518308639526, "learning_rate": 3.7404585488501106e-06, "loss": 0.5406, "step": 9370 }, { "epoch": 0.59, "grad_norm": 0.9754869937896729, "learning_rate": 3.7394656688488663e-06, "loss": 0.4874, "step": 9371 }, { "epoch": 0.59, "grad_norm": 1.0486022233963013, "learning_rate": 3.7384728419243386e-06, "loss": 0.5203, "step": 9372 }, { "epoch": 0.59, "grad_norm": 1.0572601556777954, "learning_rate": 3.7374800681183334e-06, "loss": 0.5349, "step": 9373 }, { "epoch": 0.59, "grad_norm": 1.0961726903915405, "learning_rate": 3.736487347472649e-06, "loss": 0.5418, "step": 9374 }, { "epoch": 0.59, "grad_norm": 1.1418040990829468, "learning_rate": 3.735494680029086e-06, "loss": 0.5269, "step": 9375 }, { "epoch": 0.59, "grad_norm": 1.0011223554611206, "learning_rate": 3.734502065829443e-06, "loss": 0.4951, "step": 9376 }, { "epoch": 0.59, "grad_norm": 1.0225920677185059, "learning_rate": 3.7335095049155173e-06, "loss": 0.4769, "step": 9377 }, { "epoch": 0.59, "grad_norm": 1.065794587135315, "learning_rate": 3.732516997329101e-06, "loss": 0.5224, "step": 9378 }, { "epoch": 0.59, "grad_norm": 0.9985071420669556, "learning_rate": 3.731524543111983e-06, "loss": 0.5744, "step": 9379 }, { "epoch": 0.59, "grad_norm": 1.0735899209976196, "learning_rate": 3.7305321423059526e-06, "loss": 0.5112, "step": 9380 }, { "epoch": 0.59, "grad_norm": 1.0018068552017212, "learning_rate": 3.7295397949528e-06, "loss": 0.4992, "step": 9381 }, { "epoch": 0.59, "grad_norm": 1.014588475227356, "learning_rate": 3.7285475010943067e-06, "loss": 0.5304, "step": 9382 }, { "epoch": 0.59, "grad_norm": 1.0748690366744995, "learning_rate": 3.7275552607722544e-06, "loss": 0.5171, "step": 9383 }, { "epoch": 0.59, "grad_norm": 1.0756276845932007, "learning_rate": 3.726563074028422e-06, "loss": 0.5285, "step": 9384 }, { "epoch": 0.59, "grad_norm": 1.0047024488449097, "learning_rate": 3.7255709409045914e-06, "loss": 0.4926, "step": 9385 }, { "epoch": 0.59, "grad_norm": 1.0440177917480469, "learning_rate": 3.724578861442535e-06, "loss": 0.4949, "step": 9386 }, { "epoch": 0.59, "grad_norm": 1.0596648454666138, "learning_rate": 3.7235868356840244e-06, "loss": 0.5292, "step": 9387 }, { "epoch": 0.59, "grad_norm": 0.9568275809288025, "learning_rate": 3.722594863670831e-06, "loss": 0.4839, "step": 9388 }, { "epoch": 0.59, "grad_norm": 1.017176628112793, "learning_rate": 3.7216029454447262e-06, "loss": 0.4838, "step": 9389 }, { "epoch": 0.59, "grad_norm": 1.0817347764968872, "learning_rate": 3.720611081047474e-06, "loss": 0.5194, "step": 9390 }, { "epoch": 0.59, "grad_norm": 0.9253323078155518, "learning_rate": 3.7196192705208378e-06, "loss": 0.4787, "step": 9391 }, { "epoch": 0.6, "grad_norm": 1.0608935356140137, "learning_rate": 3.7186275139065807e-06, "loss": 0.4919, "step": 9392 }, { "epoch": 0.6, "grad_norm": 1.075279951095581, "learning_rate": 3.7176358112464593e-06, "loss": 0.5452, "step": 9393 }, { "epoch": 0.6, "grad_norm": 0.9955409169197083, "learning_rate": 3.716644162582235e-06, "loss": 0.5298, "step": 9394 }, { "epoch": 0.6, "grad_norm": 1.0128940343856812, "learning_rate": 3.7156525679556597e-06, "loss": 0.5528, "step": 9395 }, { "epoch": 0.6, "grad_norm": 0.9894576072692871, "learning_rate": 3.7146610274084875e-06, "loss": 0.538, "step": 9396 }, { "epoch": 0.6, "grad_norm": 1.1473660469055176, "learning_rate": 3.7136695409824665e-06, "loss": 0.5343, "step": 9397 }, { "epoch": 0.6, "grad_norm": 1.062568187713623, "learning_rate": 3.712678108719348e-06, "loss": 0.5436, "step": 9398 }, { "epoch": 0.6, "grad_norm": 1.1151442527770996, "learning_rate": 3.711686730660875e-06, "loss": 0.5086, "step": 9399 }, { "epoch": 0.6, "grad_norm": 1.0051631927490234, "learning_rate": 3.710695406848794e-06, "loss": 0.5591, "step": 9400 }, { "epoch": 0.6, "grad_norm": 1.1091972589492798, "learning_rate": 3.709704137324841e-06, "loss": 0.5446, "step": 9401 }, { "epoch": 0.6, "grad_norm": 1.0302300453186035, "learning_rate": 3.7087129221307605e-06, "loss": 0.4906, "step": 9402 }, { "epoch": 0.6, "grad_norm": 1.0200457572937012, "learning_rate": 3.7077217613082863e-06, "loss": 0.5153, "step": 9403 }, { "epoch": 0.6, "grad_norm": 1.0734446048736572, "learning_rate": 3.7067306548991543e-06, "loss": 0.5324, "step": 9404 }, { "epoch": 0.6, "grad_norm": 1.0424174070358276, "learning_rate": 3.7057396029450925e-06, "loss": 0.5747, "step": 9405 }, { "epoch": 0.6, "grad_norm": 0.9913774132728577, "learning_rate": 3.7047486054878367e-06, "loss": 0.531, "step": 9406 }, { "epoch": 0.6, "grad_norm": 1.007533073425293, "learning_rate": 3.7037576625691095e-06, "loss": 0.5315, "step": 9407 }, { "epoch": 0.6, "grad_norm": 0.990408182144165, "learning_rate": 3.7027667742306393e-06, "loss": 0.5084, "step": 9408 }, { "epoch": 0.6, "grad_norm": 0.9975370764732361, "learning_rate": 3.7017759405141476e-06, "loss": 0.4906, "step": 9409 }, { "epoch": 0.6, "grad_norm": 1.0215646028518677, "learning_rate": 3.7007851614613522e-06, "loss": 0.5576, "step": 9410 }, { "epoch": 0.6, "grad_norm": 0.9978448152542114, "learning_rate": 3.699794437113975e-06, "loss": 0.5561, "step": 9411 }, { "epoch": 0.6, "grad_norm": 0.9627044796943665, "learning_rate": 3.698803767513732e-06, "loss": 0.5112, "step": 9412 }, { "epoch": 0.6, "grad_norm": 1.0477159023284912, "learning_rate": 3.6978131527023363e-06, "loss": 0.4439, "step": 9413 }, { "epoch": 0.6, "grad_norm": 1.0030741691589355, "learning_rate": 3.696822592721497e-06, "loss": 0.5298, "step": 9414 }, { "epoch": 0.6, "grad_norm": 0.9714114665985107, "learning_rate": 3.695832087612925e-06, "loss": 0.5228, "step": 9415 }, { "epoch": 0.6, "grad_norm": 1.1148886680603027, "learning_rate": 3.6948416374183287e-06, "loss": 0.5417, "step": 9416 }, { "epoch": 0.6, "grad_norm": 1.027069330215454, "learning_rate": 3.6938512421794103e-06, "loss": 0.5612, "step": 9417 }, { "epoch": 0.6, "grad_norm": 1.0608090162277222, "learning_rate": 3.6928609019378702e-06, "loss": 0.5397, "step": 9418 }, { "epoch": 0.6, "grad_norm": 1.0678914785385132, "learning_rate": 3.6918706167354125e-06, "loss": 0.5306, "step": 9419 }, { "epoch": 0.6, "grad_norm": 0.9682676792144775, "learning_rate": 3.690880386613732e-06, "loss": 0.4933, "step": 9420 }, { "epoch": 0.6, "grad_norm": 1.0711426734924316, "learning_rate": 3.689890211614525e-06, "loss": 0.5093, "step": 9421 }, { "epoch": 0.6, "grad_norm": 1.001579999923706, "learning_rate": 3.6889000917794816e-06, "loss": 0.5197, "step": 9422 }, { "epoch": 0.6, "grad_norm": 1.130233883857727, "learning_rate": 3.6879100271502953e-06, "loss": 0.5472, "step": 9423 }, { "epoch": 0.6, "grad_norm": 0.9985414147377014, "learning_rate": 3.6869200177686543e-06, "loss": 0.4826, "step": 9424 }, { "epoch": 0.6, "grad_norm": 0.9541159868240356, "learning_rate": 3.6859300636762423e-06, "loss": 0.4679, "step": 9425 }, { "epoch": 0.6, "grad_norm": 1.0594643354415894, "learning_rate": 3.6849401649147453e-06, "loss": 0.5476, "step": 9426 }, { "epoch": 0.6, "grad_norm": 1.0757240056991577, "learning_rate": 3.683950321525841e-06, "loss": 0.5004, "step": 9427 }, { "epoch": 0.6, "grad_norm": 0.9650991559028625, "learning_rate": 3.682960533551213e-06, "loss": 0.4863, "step": 9428 }, { "epoch": 0.6, "grad_norm": 0.9989383220672607, "learning_rate": 3.681970801032534e-06, "loss": 0.5131, "step": 9429 }, { "epoch": 0.6, "grad_norm": 1.018264651298523, "learning_rate": 3.68098112401148e-06, "loss": 0.4824, "step": 9430 }, { "epoch": 0.6, "grad_norm": 1.0563603639602661, "learning_rate": 3.6799915025297206e-06, "loss": 0.4889, "step": 9431 }, { "epoch": 0.6, "grad_norm": 1.0259501934051514, "learning_rate": 3.6790019366289293e-06, "loss": 0.5, "step": 9432 }, { "epoch": 0.6, "grad_norm": 1.0241059064865112, "learning_rate": 3.67801242635077e-06, "loss": 0.5454, "step": 9433 }, { "epoch": 0.6, "grad_norm": 1.0234817266464233, "learning_rate": 3.6770229717369086e-06, "loss": 0.5117, "step": 9434 }, { "epoch": 0.6, "grad_norm": 1.0400785207748413, "learning_rate": 3.6760335728290062e-06, "loss": 0.5439, "step": 9435 }, { "epoch": 0.6, "grad_norm": 1.0051709413528442, "learning_rate": 3.6750442296687272e-06, "loss": 0.5163, "step": 9436 }, { "epoch": 0.6, "grad_norm": 1.0035061836242676, "learning_rate": 3.6740549422977244e-06, "loss": 0.5235, "step": 9437 }, { "epoch": 0.6, "grad_norm": 1.043760061264038, "learning_rate": 3.6730657107576574e-06, "loss": 0.5468, "step": 9438 }, { "epoch": 0.6, "grad_norm": 1.0327634811401367, "learning_rate": 3.6720765350901765e-06, "loss": 0.5015, "step": 9439 }, { "epoch": 0.6, "grad_norm": 1.0611823797225952, "learning_rate": 3.671087415336931e-06, "loss": 0.5478, "step": 9440 }, { "epoch": 0.6, "grad_norm": 0.970547616481781, "learning_rate": 3.6700983515395726e-06, "loss": 0.5038, "step": 9441 }, { "epoch": 0.6, "grad_norm": 1.0590057373046875, "learning_rate": 3.669109343739747e-06, "loss": 0.4945, "step": 9442 }, { "epoch": 0.6, "grad_norm": 1.0782158374786377, "learning_rate": 3.668120391979098e-06, "loss": 0.4998, "step": 9443 }, { "epoch": 0.6, "grad_norm": 0.8958311676979065, "learning_rate": 3.6671314962992634e-06, "loss": 0.4783, "step": 9444 }, { "epoch": 0.6, "grad_norm": 1.0121147632598877, "learning_rate": 3.666142656741886e-06, "loss": 0.5356, "step": 9445 }, { "epoch": 0.6, "grad_norm": 1.0281257629394531, "learning_rate": 3.6651538733486027e-06, "loss": 0.5264, "step": 9446 }, { "epoch": 0.6, "grad_norm": 0.9911076426506042, "learning_rate": 3.664165146161045e-06, "loss": 0.5249, "step": 9447 }, { "epoch": 0.6, "grad_norm": 1.0183645486831665, "learning_rate": 3.663176475220844e-06, "loss": 0.532, "step": 9448 }, { "epoch": 0.6, "grad_norm": 0.9785016775131226, "learning_rate": 3.6621878605696338e-06, "loss": 0.4689, "step": 9449 }, { "epoch": 0.6, "grad_norm": 1.0997915267944336, "learning_rate": 3.6611993022490383e-06, "loss": 0.4984, "step": 9450 }, { "epoch": 0.6, "grad_norm": 1.024304986000061, "learning_rate": 3.660210800300683e-06, "loss": 0.5057, "step": 9451 }, { "epoch": 0.6, "grad_norm": 1.0579379796981812, "learning_rate": 3.6592223547661888e-06, "loss": 0.5391, "step": 9452 }, { "epoch": 0.6, "grad_norm": 1.0190274715423584, "learning_rate": 3.6582339656871778e-06, "loss": 0.499, "step": 9453 }, { "epoch": 0.6, "grad_norm": 1.0800539255142212, "learning_rate": 3.6572456331052673e-06, "loss": 0.5218, "step": 9454 }, { "epoch": 0.6, "grad_norm": 1.018898844718933, "learning_rate": 3.656257357062073e-06, "loss": 0.5727, "step": 9455 }, { "epoch": 0.6, "grad_norm": 1.1038990020751953, "learning_rate": 3.6552691375992056e-06, "loss": 0.5848, "step": 9456 }, { "epoch": 0.6, "grad_norm": 1.0068682432174683, "learning_rate": 3.6542809747582755e-06, "loss": 0.52, "step": 9457 }, { "epoch": 0.6, "grad_norm": 1.0212358236312866, "learning_rate": 3.6532928685808937e-06, "loss": 0.5399, "step": 9458 }, { "epoch": 0.6, "grad_norm": 1.0951766967773438, "learning_rate": 3.6523048191086654e-06, "loss": 0.5039, "step": 9459 }, { "epoch": 0.6, "grad_norm": 1.0452274084091187, "learning_rate": 3.6513168263831913e-06, "loss": 0.4942, "step": 9460 }, { "epoch": 0.6, "grad_norm": 1.118046760559082, "learning_rate": 3.6503288904460725e-06, "loss": 0.5067, "step": 9461 }, { "epoch": 0.6, "grad_norm": 1.0423998832702637, "learning_rate": 3.6493410113389116e-06, "loss": 0.5379, "step": 9462 }, { "epoch": 0.6, "grad_norm": 1.0723527669906616, "learning_rate": 3.648353189103302e-06, "loss": 0.5605, "step": 9463 }, { "epoch": 0.6, "grad_norm": 1.032225251197815, "learning_rate": 3.6473654237808365e-06, "loss": 0.4947, "step": 9464 }, { "epoch": 0.6, "grad_norm": 1.0548571348190308, "learning_rate": 3.6463777154131065e-06, "loss": 0.5521, "step": 9465 }, { "epoch": 0.6, "grad_norm": 0.9877645373344421, "learning_rate": 3.645390064041704e-06, "loss": 0.4899, "step": 9466 }, { "epoch": 0.6, "grad_norm": 0.9609139561653137, "learning_rate": 3.6444024697082137e-06, "loss": 0.4671, "step": 9467 }, { "epoch": 0.6, "grad_norm": 1.0752314329147339, "learning_rate": 3.6434149324542185e-06, "loss": 0.5313, "step": 9468 }, { "epoch": 0.6, "grad_norm": 0.9778896570205688, "learning_rate": 3.6424274523213e-06, "loss": 0.5053, "step": 9469 }, { "epoch": 0.6, "grad_norm": 1.0713123083114624, "learning_rate": 3.641440029351041e-06, "loss": 0.5318, "step": 9470 }, { "epoch": 0.6, "grad_norm": 1.1029250621795654, "learning_rate": 3.640452663585017e-06, "loss": 0.5373, "step": 9471 }, { "epoch": 0.6, "grad_norm": 1.0140851736068726, "learning_rate": 3.6394653550647996e-06, "loss": 0.4839, "step": 9472 }, { "epoch": 0.6, "grad_norm": 0.9579431414604187, "learning_rate": 3.638478103831965e-06, "loss": 0.48, "step": 9473 }, { "epoch": 0.6, "grad_norm": 0.975917398929596, "learning_rate": 3.6374909099280786e-06, "loss": 0.5076, "step": 9474 }, { "epoch": 0.6, "grad_norm": 1.0448685884475708, "learning_rate": 3.636503773394713e-06, "loss": 0.5353, "step": 9475 }, { "epoch": 0.6, "grad_norm": 1.0627449750900269, "learning_rate": 3.635516694273428e-06, "loss": 0.5691, "step": 9476 }, { "epoch": 0.6, "grad_norm": 1.1303012371063232, "learning_rate": 3.63452967260579e-06, "loss": 0.5148, "step": 9477 }, { "epoch": 0.6, "grad_norm": 0.9685790538787842, "learning_rate": 3.633542708433355e-06, "loss": 0.4675, "step": 9478 }, { "epoch": 0.6, "grad_norm": 1.0133579969406128, "learning_rate": 3.632555801797686e-06, "loss": 0.5217, "step": 9479 }, { "epoch": 0.6, "grad_norm": 1.0786575078964233, "learning_rate": 3.631568952740333e-06, "loss": 0.5198, "step": 9480 }, { "epoch": 0.6, "grad_norm": 1.087209701538086, "learning_rate": 3.6305821613028524e-06, "loss": 0.5274, "step": 9481 }, { "epoch": 0.6, "grad_norm": 1.0685940980911255, "learning_rate": 3.6295954275267914e-06, "loss": 0.5198, "step": 9482 }, { "epoch": 0.6, "grad_norm": 1.0028527975082397, "learning_rate": 3.6286087514537017e-06, "loss": 0.4928, "step": 9483 }, { "epoch": 0.6, "grad_norm": 1.0007100105285645, "learning_rate": 3.6276221331251253e-06, "loss": 0.4894, "step": 9484 }, { "epoch": 0.6, "grad_norm": 0.9105003476142883, "learning_rate": 3.626635572582608e-06, "loss": 0.4802, "step": 9485 }, { "epoch": 0.6, "grad_norm": 1.1068147420883179, "learning_rate": 3.6256490698676884e-06, "loss": 0.5292, "step": 9486 }, { "epoch": 0.6, "grad_norm": 1.0141433477401733, "learning_rate": 3.6246626250219047e-06, "loss": 0.4763, "step": 9487 }, { "epoch": 0.6, "grad_norm": 0.9817478060722351, "learning_rate": 3.623676238086794e-06, "loss": 0.4768, "step": 9488 }, { "epoch": 0.6, "grad_norm": 1.070478916168213, "learning_rate": 3.6226899091038896e-06, "loss": 0.5326, "step": 9489 }, { "epoch": 0.6, "grad_norm": 1.1199346780776978, "learning_rate": 3.6217036381147216e-06, "loss": 0.5614, "step": 9490 }, { "epoch": 0.6, "grad_norm": 1.1189179420471191, "learning_rate": 3.620717425160818e-06, "loss": 0.5233, "step": 9491 }, { "epoch": 0.6, "grad_norm": 1.1126006841659546, "learning_rate": 3.619731270283705e-06, "loss": 0.5454, "step": 9492 }, { "epoch": 0.6, "grad_norm": 1.058700442314148, "learning_rate": 3.6187451735249085e-06, "loss": 0.4868, "step": 9493 }, { "epoch": 0.6, "grad_norm": 1.1182832717895508, "learning_rate": 3.6177591349259465e-06, "loss": 0.5634, "step": 9494 }, { "epoch": 0.6, "grad_norm": 1.1220393180847168, "learning_rate": 3.616773154528339e-06, "loss": 0.5331, "step": 9495 }, { "epoch": 0.6, "grad_norm": 1.0078002214431763, "learning_rate": 3.6157872323736017e-06, "loss": 0.4684, "step": 9496 }, { "epoch": 0.6, "grad_norm": 1.0165269374847412, "learning_rate": 3.61480136850325e-06, "loss": 0.5044, "step": 9497 }, { "epoch": 0.6, "grad_norm": 1.0735347270965576, "learning_rate": 3.6138155629587925e-06, "loss": 0.5003, "step": 9498 }, { "epoch": 0.6, "grad_norm": 1.0364093780517578, "learning_rate": 3.61282981578174e-06, "loss": 0.5305, "step": 9499 }, { "epoch": 0.6, "grad_norm": 0.9629833698272705, "learning_rate": 3.611844127013598e-06, "loss": 0.5103, "step": 9500 }, { "epoch": 0.6, "grad_norm": 1.0671041011810303, "learning_rate": 3.6108584966958717e-06, "loss": 0.4761, "step": 9501 }, { "epoch": 0.6, "grad_norm": 1.0553507804870605, "learning_rate": 3.6098729248700604e-06, "loss": 0.5267, "step": 9502 }, { "epoch": 0.6, "grad_norm": 0.9224212765693665, "learning_rate": 3.6088874115776664e-06, "loss": 0.4905, "step": 9503 }, { "epoch": 0.6, "grad_norm": 0.9481444954872131, "learning_rate": 3.6079019568601816e-06, "loss": 0.5146, "step": 9504 }, { "epoch": 0.6, "grad_norm": 0.9659034609794617, "learning_rate": 3.606916560759104e-06, "loss": 0.484, "step": 9505 }, { "epoch": 0.6, "grad_norm": 1.0172665119171143, "learning_rate": 3.6059312233159237e-06, "loss": 0.5203, "step": 9506 }, { "epoch": 0.6, "grad_norm": 1.0298722982406616, "learning_rate": 3.6049459445721303e-06, "loss": 0.4974, "step": 9507 }, { "epoch": 0.6, "grad_norm": 1.0093005895614624, "learning_rate": 3.6039607245692086e-06, "loss": 0.4608, "step": 9508 }, { "epoch": 0.6, "grad_norm": 0.9928223490715027, "learning_rate": 3.6029755633486464e-06, "loss": 0.5323, "step": 9509 }, { "epoch": 0.6, "grad_norm": 0.9848858714103699, "learning_rate": 3.601990460951922e-06, "loss": 0.4473, "step": 9510 }, { "epoch": 0.6, "grad_norm": 0.9341360926628113, "learning_rate": 3.6010054174205167e-06, "loss": 0.4807, "step": 9511 }, { "epoch": 0.6, "grad_norm": 1.0773816108703613, "learning_rate": 3.6000204327959055e-06, "loss": 0.5336, "step": 9512 }, { "epoch": 0.6, "grad_norm": 0.973141074180603, "learning_rate": 3.599035507119565e-06, "loss": 0.4723, "step": 9513 }, { "epoch": 0.6, "grad_norm": 1.0100555419921875, "learning_rate": 3.5980506404329647e-06, "loss": 0.5553, "step": 9514 }, { "epoch": 0.6, "grad_norm": 1.0515456199645996, "learning_rate": 3.597065832777576e-06, "loss": 0.5559, "step": 9515 }, { "epoch": 0.6, "grad_norm": 0.9875101447105408, "learning_rate": 3.5960810841948622e-06, "loss": 0.532, "step": 9516 }, { "epoch": 0.6, "grad_norm": 0.9792640209197998, "learning_rate": 3.595096394726293e-06, "loss": 0.4745, "step": 9517 }, { "epoch": 0.6, "grad_norm": 1.0469679832458496, "learning_rate": 3.594111764413326e-06, "loss": 0.5014, "step": 9518 }, { "epoch": 0.6, "grad_norm": 1.092686414718628, "learning_rate": 3.5931271932974227e-06, "loss": 0.5121, "step": 9519 }, { "epoch": 0.6, "grad_norm": 0.9899653196334839, "learning_rate": 3.592142681420039e-06, "loss": 0.5247, "step": 9520 }, { "epoch": 0.6, "grad_norm": 1.0317049026489258, "learning_rate": 3.5911582288226275e-06, "loss": 0.5234, "step": 9521 }, { "epoch": 0.6, "grad_norm": 1.0484519004821777, "learning_rate": 3.5901738355466433e-06, "loss": 0.5742, "step": 9522 }, { "epoch": 0.6, "grad_norm": 1.0550673007965088, "learning_rate": 3.5891895016335347e-06, "loss": 0.5109, "step": 9523 }, { "epoch": 0.6, "grad_norm": 1.0651562213897705, "learning_rate": 3.588205227124749e-06, "loss": 0.529, "step": 9524 }, { "epoch": 0.6, "grad_norm": 1.0518194437026978, "learning_rate": 3.587221012061728e-06, "loss": 0.5127, "step": 9525 }, { "epoch": 0.6, "grad_norm": 0.9867557287216187, "learning_rate": 3.586236856485916e-06, "loss": 0.5191, "step": 9526 }, { "epoch": 0.6, "grad_norm": 1.00824773311615, "learning_rate": 3.5852527604387533e-06, "loss": 0.5048, "step": 9527 }, { "epoch": 0.6, "grad_norm": 1.0379291772842407, "learning_rate": 3.5842687239616745e-06, "loss": 0.4946, "step": 9528 }, { "epoch": 0.6, "grad_norm": 0.9897534847259521, "learning_rate": 3.583284747096114e-06, "loss": 0.5173, "step": 9529 }, { "epoch": 0.6, "grad_norm": 0.9886677265167236, "learning_rate": 3.5823008298835044e-06, "loss": 0.5686, "step": 9530 }, { "epoch": 0.6, "grad_norm": 1.1005386114120483, "learning_rate": 3.5813169723652763e-06, "loss": 0.4842, "step": 9531 }, { "epoch": 0.6, "grad_norm": 1.1401194334030151, "learning_rate": 3.5803331745828558e-06, "loss": 0.5507, "step": 9532 }, { "epoch": 0.6, "grad_norm": 0.9957314133644104, "learning_rate": 3.579349436577665e-06, "loss": 0.5301, "step": 9533 }, { "epoch": 0.6, "grad_norm": 1.05839204788208, "learning_rate": 3.5783657583911268e-06, "loss": 0.5346, "step": 9534 }, { "epoch": 0.6, "grad_norm": 0.9779189825057983, "learning_rate": 3.5773821400646623e-06, "loss": 0.4958, "step": 9535 }, { "epoch": 0.6, "grad_norm": 0.9635352492332458, "learning_rate": 3.5763985816396873e-06, "loss": 0.4826, "step": 9536 }, { "epoch": 0.6, "grad_norm": 1.0159794092178345, "learning_rate": 3.575415083157615e-06, "loss": 0.4449, "step": 9537 }, { "epoch": 0.6, "grad_norm": 0.9795805215835571, "learning_rate": 3.5744316446598565e-06, "loss": 0.4828, "step": 9538 }, { "epoch": 0.6, "grad_norm": 0.9881680607795715, "learning_rate": 3.5734482661878244e-06, "loss": 0.5146, "step": 9539 }, { "epoch": 0.6, "grad_norm": 1.1142942905426025, "learning_rate": 3.5724649477829232e-06, "loss": 0.5824, "step": 9540 }, { "epoch": 0.6, "grad_norm": 1.0041018724441528, "learning_rate": 3.5714816894865556e-06, "loss": 0.4668, "step": 9541 }, { "epoch": 0.6, "grad_norm": 1.0158956050872803, "learning_rate": 3.570498491340124e-06, "loss": 0.49, "step": 9542 }, { "epoch": 0.6, "grad_norm": 1.0179728269577026, "learning_rate": 3.5695153533850302e-06, "loss": 0.4837, "step": 9543 }, { "epoch": 0.6, "grad_norm": 1.0044492483139038, "learning_rate": 3.5685322756626683e-06, "loss": 0.5229, "step": 9544 }, { "epoch": 0.6, "grad_norm": 1.0525065660476685, "learning_rate": 3.5675492582144322e-06, "loss": 0.5419, "step": 9545 }, { "epoch": 0.6, "grad_norm": 1.0244168043136597, "learning_rate": 3.566566301081712e-06, "loss": 0.485, "step": 9546 }, { "epoch": 0.6, "grad_norm": 1.0063843727111816, "learning_rate": 3.5655834043059e-06, "loss": 0.4753, "step": 9547 }, { "epoch": 0.6, "grad_norm": 0.9746105074882507, "learning_rate": 3.5646005679283813e-06, "loss": 0.5192, "step": 9548 }, { "epoch": 0.6, "grad_norm": 0.9663261771202087, "learning_rate": 3.5636177919905385e-06, "loss": 0.4677, "step": 9549 }, { "epoch": 0.61, "grad_norm": 1.0138956308364868, "learning_rate": 3.5626350765337546e-06, "loss": 0.5469, "step": 9550 }, { "epoch": 0.61, "grad_norm": 1.0008865594863892, "learning_rate": 3.5616524215994052e-06, "loss": 0.5154, "step": 9551 }, { "epoch": 0.61, "grad_norm": 1.1205394268035889, "learning_rate": 3.560669827228871e-06, "loss": 0.552, "step": 9552 }, { "epoch": 0.61, "grad_norm": 1.0503113269805908, "learning_rate": 3.559687293463522e-06, "loss": 0.5198, "step": 9553 }, { "epoch": 0.61, "grad_norm": 0.9706525802612305, "learning_rate": 3.5587048203447314e-06, "loss": 0.5262, "step": 9554 }, { "epoch": 0.61, "grad_norm": 1.0099480152130127, "learning_rate": 3.557722407913865e-06, "loss": 0.518, "step": 9555 }, { "epoch": 0.61, "grad_norm": 1.021026611328125, "learning_rate": 3.5567400562122934e-06, "loss": 0.4408, "step": 9556 }, { "epoch": 0.61, "grad_norm": 1.036303997039795, "learning_rate": 3.5557577652813758e-06, "loss": 0.4817, "step": 9557 }, { "epoch": 0.61, "grad_norm": 1.0459154844284058, "learning_rate": 3.554775535162475e-06, "loss": 0.5314, "step": 9558 }, { "epoch": 0.61, "grad_norm": 1.0108460187911987, "learning_rate": 3.5537933658969475e-06, "loss": 0.471, "step": 9559 }, { "epoch": 0.61, "grad_norm": 1.0860623121261597, "learning_rate": 3.5528112575261525e-06, "loss": 0.5329, "step": 9560 }, { "epoch": 0.61, "grad_norm": 1.072257399559021, "learning_rate": 3.5518292100914396e-06, "loss": 0.5182, "step": 9561 }, { "epoch": 0.61, "grad_norm": 1.0338189601898193, "learning_rate": 3.550847223634162e-06, "loss": 0.5529, "step": 9562 }, { "epoch": 0.61, "grad_norm": 1.070862889289856, "learning_rate": 3.549865298195665e-06, "loss": 0.5077, "step": 9563 }, { "epoch": 0.61, "grad_norm": 0.977547287940979, "learning_rate": 3.5488834338172974e-06, "loss": 0.5186, "step": 9564 }, { "epoch": 0.61, "grad_norm": 1.0770364999771118, "learning_rate": 3.547901630540399e-06, "loss": 0.5116, "step": 9565 }, { "epoch": 0.61, "grad_norm": 1.0419204235076904, "learning_rate": 3.546919888406313e-06, "loss": 0.5073, "step": 9566 }, { "epoch": 0.61, "grad_norm": 1.1295720338821411, "learning_rate": 3.5459382074563737e-06, "loss": 0.5172, "step": 9567 }, { "epoch": 0.61, "grad_norm": 0.9190590381622314, "learning_rate": 3.5449565877319175e-06, "loss": 0.4753, "step": 9568 }, { "epoch": 0.61, "grad_norm": 1.0490338802337646, "learning_rate": 3.54397502927428e-06, "loss": 0.4919, "step": 9569 }, { "epoch": 0.61, "grad_norm": 1.0051147937774658, "learning_rate": 3.5429935321247887e-06, "loss": 0.5308, "step": 9570 }, { "epoch": 0.61, "grad_norm": 1.1259526014328003, "learning_rate": 3.5420120963247706e-06, "loss": 0.5069, "step": 9571 }, { "epoch": 0.61, "grad_norm": 1.0539841651916504, "learning_rate": 3.5410307219155495e-06, "loss": 0.52, "step": 9572 }, { "epoch": 0.61, "grad_norm": 1.0451796054840088, "learning_rate": 3.540049408938452e-06, "loss": 0.5172, "step": 9573 }, { "epoch": 0.61, "grad_norm": 1.0502591133117676, "learning_rate": 3.539068157434794e-06, "loss": 0.485, "step": 9574 }, { "epoch": 0.61, "grad_norm": 1.0056434869766235, "learning_rate": 3.538086967445894e-06, "loss": 0.4762, "step": 9575 }, { "epoch": 0.61, "grad_norm": 1.001711368560791, "learning_rate": 3.5371058390130643e-06, "loss": 0.5313, "step": 9576 }, { "epoch": 0.61, "grad_norm": 0.9541638493537903, "learning_rate": 3.536124772177621e-06, "loss": 0.4718, "step": 9577 }, { "epoch": 0.61, "grad_norm": 1.0295259952545166, "learning_rate": 3.535143766980871e-06, "loss": 0.493, "step": 9578 }, { "epoch": 0.61, "grad_norm": 1.0524946451187134, "learning_rate": 3.53416282346412e-06, "loss": 0.5176, "step": 9579 }, { "epoch": 0.61, "grad_norm": 1.1142582893371582, "learning_rate": 3.533181941668675e-06, "loss": 0.4635, "step": 9580 }, { "epoch": 0.61, "grad_norm": 0.9713674783706665, "learning_rate": 3.5322011216358325e-06, "loss": 0.4751, "step": 9581 }, { "epoch": 0.61, "grad_norm": 1.0555380582809448, "learning_rate": 3.5312203634068977e-06, "loss": 0.5359, "step": 9582 }, { "epoch": 0.61, "grad_norm": 1.0953872203826904, "learning_rate": 3.5302396670231622e-06, "loss": 0.5405, "step": 9583 }, { "epoch": 0.61, "grad_norm": 0.9998186230659485, "learning_rate": 3.529259032525923e-06, "loss": 0.5104, "step": 9584 }, { "epoch": 0.61, "grad_norm": 1.030078649520874, "learning_rate": 3.5282784599564667e-06, "loss": 0.4584, "step": 9585 }, { "epoch": 0.61, "grad_norm": 0.9956438541412354, "learning_rate": 3.5272979493560877e-06, "loss": 0.5021, "step": 9586 }, { "epoch": 0.61, "grad_norm": 1.0601505041122437, "learning_rate": 3.5263175007660676e-06, "loss": 0.5392, "step": 9587 }, { "epoch": 0.61, "grad_norm": 0.9951921701431274, "learning_rate": 3.5253371142276915e-06, "loss": 0.5092, "step": 9588 }, { "epoch": 0.61, "grad_norm": 1.18478262424469, "learning_rate": 3.5243567897822382e-06, "loss": 0.5594, "step": 9589 }, { "epoch": 0.61, "grad_norm": 1.0945976972579956, "learning_rate": 3.5233765274709885e-06, "loss": 0.5258, "step": 9590 }, { "epoch": 0.61, "grad_norm": 1.0256088972091675, "learning_rate": 3.5223963273352157e-06, "loss": 0.5315, "step": 9591 }, { "epoch": 0.61, "grad_norm": 1.0806553363800049, "learning_rate": 3.5214161894161948e-06, "loss": 0.5353, "step": 9592 }, { "epoch": 0.61, "grad_norm": 1.0250076055526733, "learning_rate": 3.5204361137551924e-06, "loss": 0.5281, "step": 9593 }, { "epoch": 0.61, "grad_norm": 1.0070712566375732, "learning_rate": 3.5194561003934798e-06, "loss": 0.4873, "step": 9594 }, { "epoch": 0.61, "grad_norm": 1.0493123531341553, "learning_rate": 3.5184761493723197e-06, "loss": 0.5123, "step": 9595 }, { "epoch": 0.61, "grad_norm": 0.9745450019836426, "learning_rate": 3.5174962607329755e-06, "loss": 0.5053, "step": 9596 }, { "epoch": 0.61, "grad_norm": 0.979505181312561, "learning_rate": 3.516516434516707e-06, "loss": 0.5271, "step": 9597 }, { "epoch": 0.61, "grad_norm": 1.0419960021972656, "learning_rate": 3.5155366707647686e-06, "loss": 0.4916, "step": 9598 }, { "epoch": 0.61, "grad_norm": 1.0558388233184814, "learning_rate": 3.514556969518418e-06, "loss": 0.5722, "step": 9599 }, { "epoch": 0.61, "grad_norm": 1.0423963069915771, "learning_rate": 3.513577330818907e-06, "loss": 0.5179, "step": 9600 }, { "epoch": 0.61, "grad_norm": 1.0253368616104126, "learning_rate": 3.512597754707484e-06, "loss": 0.5341, "step": 9601 }, { "epoch": 0.61, "grad_norm": 0.9791843295097351, "learning_rate": 3.511618241225393e-06, "loss": 0.5181, "step": 9602 }, { "epoch": 0.61, "grad_norm": 1.036952257156372, "learning_rate": 3.5106387904138804e-06, "loss": 0.4877, "step": 9603 }, { "epoch": 0.61, "grad_norm": 1.0005004405975342, "learning_rate": 3.5096594023141895e-06, "loss": 0.5288, "step": 9604 }, { "epoch": 0.61, "grad_norm": 1.0786027908325195, "learning_rate": 3.508680076967556e-06, "loss": 0.568, "step": 9605 }, { "epoch": 0.61, "grad_norm": 1.108238935470581, "learning_rate": 3.507700814415215e-06, "loss": 0.5047, "step": 9606 }, { "epoch": 0.61, "grad_norm": 1.0978314876556396, "learning_rate": 3.5067216146984016e-06, "loss": 0.5321, "step": 9607 }, { "epoch": 0.61, "grad_norm": 1.0798311233520508, "learning_rate": 3.505742477858348e-06, "loss": 0.4769, "step": 9608 }, { "epoch": 0.61, "grad_norm": 1.0491946935653687, "learning_rate": 3.50476340393628e-06, "loss": 0.4781, "step": 9609 }, { "epoch": 0.61, "grad_norm": 1.044782280921936, "learning_rate": 3.5037843929734216e-06, "loss": 0.5266, "step": 9610 }, { "epoch": 0.61, "grad_norm": 1.0892606973648071, "learning_rate": 3.502805445010998e-06, "loss": 0.4602, "step": 9611 }, { "epoch": 0.61, "grad_norm": 1.022353172302246, "learning_rate": 3.5018265600902313e-06, "loss": 0.5044, "step": 9612 }, { "epoch": 0.61, "grad_norm": 1.0843284130096436, "learning_rate": 3.5008477382523355e-06, "loss": 0.5184, "step": 9613 }, { "epoch": 0.61, "grad_norm": 1.0725637674331665, "learning_rate": 3.4998689795385245e-06, "loss": 0.5246, "step": 9614 }, { "epoch": 0.61, "grad_norm": 1.0283935070037842, "learning_rate": 3.4988902839900118e-06, "loss": 0.5522, "step": 9615 }, { "epoch": 0.61, "grad_norm": 1.0854668617248535, "learning_rate": 3.4979116516480094e-06, "loss": 0.5249, "step": 9616 }, { "epoch": 0.61, "grad_norm": 1.140599012374878, "learning_rate": 3.496933082553722e-06, "loss": 0.5659, "step": 9617 }, { "epoch": 0.61, "grad_norm": 1.0611871480941772, "learning_rate": 3.495954576748353e-06, "loss": 0.4834, "step": 9618 }, { "epoch": 0.61, "grad_norm": 1.074779748916626, "learning_rate": 3.4949761342731025e-06, "loss": 0.5413, "step": 9619 }, { "epoch": 0.61, "grad_norm": 1.0762356519699097, "learning_rate": 3.493997755169174e-06, "loss": 0.5502, "step": 9620 }, { "epoch": 0.61, "grad_norm": 1.0419806241989136, "learning_rate": 3.4930194394777615e-06, "loss": 0.5192, "step": 9621 }, { "epoch": 0.61, "grad_norm": 0.9293636679649353, "learning_rate": 3.492041187240056e-06, "loss": 0.4888, "step": 9622 }, { "epoch": 0.61, "grad_norm": 1.0450637340545654, "learning_rate": 3.49106299849725e-06, "loss": 0.5002, "step": 9623 }, { "epoch": 0.61, "grad_norm": 1.0150624513626099, "learning_rate": 3.4900848732905348e-06, "loss": 0.5786, "step": 9624 }, { "epoch": 0.61, "grad_norm": 1.049338698387146, "learning_rate": 3.4891068116610914e-06, "loss": 0.5621, "step": 9625 }, { "epoch": 0.61, "grad_norm": 1.0134360790252686, "learning_rate": 3.4881288136501036e-06, "loss": 0.5458, "step": 9626 }, { "epoch": 0.61, "grad_norm": 1.0989668369293213, "learning_rate": 3.487150879298753e-06, "loss": 0.5159, "step": 9627 }, { "epoch": 0.61, "grad_norm": 0.9743390083312988, "learning_rate": 3.486173008648215e-06, "loss": 0.5264, "step": 9628 }, { "epoch": 0.61, "grad_norm": 0.9863070249557495, "learning_rate": 3.485195201739665e-06, "loss": 0.499, "step": 9629 }, { "epoch": 0.61, "grad_norm": 0.9773442149162292, "learning_rate": 3.4842174586142772e-06, "loss": 0.4861, "step": 9630 }, { "epoch": 0.61, "grad_norm": 1.0338678359985352, "learning_rate": 3.4832397793132187e-06, "loss": 0.565, "step": 9631 }, { "epoch": 0.61, "grad_norm": 1.0463933944702148, "learning_rate": 3.4822621638776555e-06, "loss": 0.5112, "step": 9632 }, { "epoch": 0.61, "grad_norm": 0.9847338795661926, "learning_rate": 3.4812846123487532e-06, "loss": 0.4575, "step": 9633 }, { "epoch": 0.61, "grad_norm": 1.1015348434448242, "learning_rate": 3.4803071247676735e-06, "loss": 0.5258, "step": 9634 }, { "epoch": 0.61, "grad_norm": 0.9594420194625854, "learning_rate": 3.4793297011755746e-06, "loss": 0.4941, "step": 9635 }, { "epoch": 0.61, "grad_norm": 1.0367622375488281, "learning_rate": 3.4783523416136096e-06, "loss": 0.5378, "step": 9636 }, { "epoch": 0.61, "grad_norm": 0.9925249218940735, "learning_rate": 3.477375046122935e-06, "loss": 0.5082, "step": 9637 }, { "epoch": 0.61, "grad_norm": 1.014980673789978, "learning_rate": 3.476397814744702e-06, "loss": 0.5395, "step": 9638 }, { "epoch": 0.61, "grad_norm": 1.002394199371338, "learning_rate": 3.4754206475200556e-06, "loss": 0.5063, "step": 9639 }, { "epoch": 0.61, "grad_norm": 1.1005549430847168, "learning_rate": 3.4744435444901412e-06, "loss": 0.5004, "step": 9640 }, { "epoch": 0.61, "grad_norm": 1.0560725927352905, "learning_rate": 3.473466505696103e-06, "loss": 0.4656, "step": 9641 }, { "epoch": 0.61, "grad_norm": 1.0724444389343262, "learning_rate": 3.4724895311790806e-06, "loss": 0.4971, "step": 9642 }, { "epoch": 0.61, "grad_norm": 1.0155487060546875, "learning_rate": 3.4715126209802104e-06, "loss": 0.5048, "step": 9643 }, { "epoch": 0.61, "grad_norm": 1.043113350868225, "learning_rate": 3.4705357751406256e-06, "loss": 0.5237, "step": 9644 }, { "epoch": 0.61, "grad_norm": 0.9849514365196228, "learning_rate": 3.469558993701457e-06, "loss": 0.471, "step": 9645 }, { "epoch": 0.61, "grad_norm": 1.0826847553253174, "learning_rate": 3.468582276703838e-06, "loss": 0.5301, "step": 9646 }, { "epoch": 0.61, "grad_norm": 1.0258899927139282, "learning_rate": 3.467605624188891e-06, "loss": 0.4994, "step": 9647 }, { "epoch": 0.61, "grad_norm": 0.9913710355758667, "learning_rate": 3.46662903619774e-06, "loss": 0.4979, "step": 9648 }, { "epoch": 0.61, "grad_norm": 1.1202266216278076, "learning_rate": 3.4656525127715045e-06, "loss": 0.5012, "step": 9649 }, { "epoch": 0.61, "grad_norm": 1.0036853551864624, "learning_rate": 3.464676053951307e-06, "loss": 0.5242, "step": 9650 }, { "epoch": 0.61, "grad_norm": 1.0133076906204224, "learning_rate": 3.463699659778259e-06, "loss": 0.5275, "step": 9651 }, { "epoch": 0.61, "grad_norm": 1.055254578590393, "learning_rate": 3.4627233302934737e-06, "loss": 0.5477, "step": 9652 }, { "epoch": 0.61, "grad_norm": 1.0198588371276855, "learning_rate": 3.4617470655380597e-06, "loss": 0.5041, "step": 9653 }, { "epoch": 0.61, "grad_norm": 1.0016930103302002, "learning_rate": 3.460770865553128e-06, "loss": 0.4829, "step": 9654 }, { "epoch": 0.61, "grad_norm": 1.0819693803787231, "learning_rate": 3.4597947303797795e-06, "loss": 0.5676, "step": 9655 }, { "epoch": 0.61, "grad_norm": 1.0289030075073242, "learning_rate": 3.458818660059117e-06, "loss": 0.5057, "step": 9656 }, { "epoch": 0.61, "grad_norm": 1.0696585178375244, "learning_rate": 3.4578426546322403e-06, "loss": 0.5772, "step": 9657 }, { "epoch": 0.61, "grad_norm": 0.9678754806518555, "learning_rate": 3.4568667141402425e-06, "loss": 0.5178, "step": 9658 }, { "epoch": 0.61, "grad_norm": 1.0327085256576538, "learning_rate": 3.4558908386242208e-06, "loss": 0.4855, "step": 9659 }, { "epoch": 0.61, "grad_norm": 1.0984344482421875, "learning_rate": 3.4549150281252635e-06, "loss": 0.5505, "step": 9660 }, { "epoch": 0.61, "grad_norm": 1.1058754920959473, "learning_rate": 3.4539392826844607e-06, "loss": 0.5344, "step": 9661 }, { "epoch": 0.61, "grad_norm": 1.0699716806411743, "learning_rate": 3.4529636023428935e-06, "loss": 0.4925, "step": 9662 }, { "epoch": 0.61, "grad_norm": 1.0517855882644653, "learning_rate": 3.4519879871416505e-06, "loss": 0.4727, "step": 9663 }, { "epoch": 0.61, "grad_norm": 1.0027638673782349, "learning_rate": 3.451012437121806e-06, "loss": 0.4895, "step": 9664 }, { "epoch": 0.61, "grad_norm": 1.0605578422546387, "learning_rate": 3.4500369523244414e-06, "loss": 0.4929, "step": 9665 }, { "epoch": 0.61, "grad_norm": 0.9826477766036987, "learning_rate": 3.4490615327906264e-06, "loss": 0.5018, "step": 9666 }, { "epoch": 0.61, "grad_norm": 1.012223482131958, "learning_rate": 3.448086178561436e-06, "loss": 0.49, "step": 9667 }, { "epoch": 0.61, "grad_norm": 1.0503581762313843, "learning_rate": 3.447110889677938e-06, "loss": 0.5748, "step": 9668 }, { "epoch": 0.61, "grad_norm": 1.1112549304962158, "learning_rate": 3.4461356661811997e-06, "loss": 0.504, "step": 9669 }, { "epoch": 0.61, "grad_norm": 0.9363526701927185, "learning_rate": 3.4451605081122797e-06, "loss": 0.4459, "step": 9670 }, { "epoch": 0.61, "grad_norm": 1.1454665660858154, "learning_rate": 3.4441854155122446e-06, "loss": 0.5197, "step": 9671 }, { "epoch": 0.61, "grad_norm": 1.0244606733322144, "learning_rate": 3.443210388422148e-06, "loss": 0.5044, "step": 9672 }, { "epoch": 0.61, "grad_norm": 0.9871229529380798, "learning_rate": 3.4422354268830473e-06, "loss": 0.5346, "step": 9673 }, { "epoch": 0.61, "grad_norm": 1.0190272331237793, "learning_rate": 3.441260530935994e-06, "loss": 0.5098, "step": 9674 }, { "epoch": 0.61, "grad_norm": 1.0977753400802612, "learning_rate": 3.4402857006220353e-06, "loss": 0.5826, "step": 9675 }, { "epoch": 0.61, "grad_norm": 1.1146341562271118, "learning_rate": 3.439310935982221e-06, "loss": 0.5425, "step": 9676 }, { "epoch": 0.61, "grad_norm": 1.0122137069702148, "learning_rate": 3.4383362370575947e-06, "loss": 0.4889, "step": 9677 }, { "epoch": 0.61, "grad_norm": 1.0664081573486328, "learning_rate": 3.4373616038891966e-06, "loss": 0.5433, "step": 9678 }, { "epoch": 0.61, "grad_norm": 0.9652659893035889, "learning_rate": 3.4363870365180634e-06, "loss": 0.4762, "step": 9679 }, { "epoch": 0.61, "grad_norm": 1.0801855325698853, "learning_rate": 3.435412534985234e-06, "loss": 0.5382, "step": 9680 }, { "epoch": 0.61, "grad_norm": 1.0500061511993408, "learning_rate": 3.4344380993317404e-06, "loss": 0.5247, "step": 9681 }, { "epoch": 0.61, "grad_norm": 1.1069306135177612, "learning_rate": 3.433463729598613e-06, "loss": 0.5384, "step": 9682 }, { "epoch": 0.61, "grad_norm": 1.0150129795074463, "learning_rate": 3.432489425826876e-06, "loss": 0.5031, "step": 9683 }, { "epoch": 0.61, "grad_norm": 1.047875165939331, "learning_rate": 3.431515188057557e-06, "loss": 0.4805, "step": 9684 }, { "epoch": 0.61, "grad_norm": 0.9312357306480408, "learning_rate": 3.4305410163316788e-06, "loss": 0.4782, "step": 9685 }, { "epoch": 0.61, "grad_norm": 1.0885319709777832, "learning_rate": 3.429566910690258e-06, "loss": 0.5127, "step": 9686 }, { "epoch": 0.61, "grad_norm": 1.1217483282089233, "learning_rate": 3.42859287117431e-06, "loss": 0.5348, "step": 9687 }, { "epoch": 0.61, "grad_norm": 1.1635698080062866, "learning_rate": 3.42761889782485e-06, "loss": 0.5381, "step": 9688 }, { "epoch": 0.61, "grad_norm": 1.1344828605651855, "learning_rate": 3.4266449906828897e-06, "loss": 0.5175, "step": 9689 }, { "epoch": 0.61, "grad_norm": 1.1053919792175293, "learning_rate": 3.4256711497894346e-06, "loss": 0.53, "step": 9690 }, { "epoch": 0.61, "grad_norm": 0.9443258047103882, "learning_rate": 3.4246973751854917e-06, "loss": 0.4928, "step": 9691 }, { "epoch": 0.61, "grad_norm": 1.0636018514633179, "learning_rate": 3.42372366691206e-06, "loss": 0.5219, "step": 9692 }, { "epoch": 0.61, "grad_norm": 1.1260120868682861, "learning_rate": 3.422750025010143e-06, "loss": 0.4891, "step": 9693 }, { "epoch": 0.61, "grad_norm": 1.0381945371627808, "learning_rate": 3.421776449520735e-06, "loss": 0.5239, "step": 9694 }, { "epoch": 0.61, "grad_norm": 1.145067572593689, "learning_rate": 3.4208029404848315e-06, "loss": 0.524, "step": 9695 }, { "epoch": 0.61, "grad_norm": 1.0171116590499878, "learning_rate": 3.4198294979434207e-06, "loss": 0.5243, "step": 9696 }, { "epoch": 0.61, "grad_norm": 1.0491117238998413, "learning_rate": 3.418856121937494e-06, "loss": 0.4638, "step": 9697 }, { "epoch": 0.61, "grad_norm": 1.0508257150650024, "learning_rate": 3.4178828125080354e-06, "loss": 0.5367, "step": 9698 }, { "epoch": 0.61, "grad_norm": 1.080831527709961, "learning_rate": 3.4169095696960287e-06, "loss": 0.573, "step": 9699 }, { "epoch": 0.61, "grad_norm": 0.9865775108337402, "learning_rate": 3.4159363935424505e-06, "loss": 0.5196, "step": 9700 }, { "epoch": 0.61, "grad_norm": 1.059741497039795, "learning_rate": 3.4149632840882817e-06, "loss": 0.5221, "step": 9701 }, { "epoch": 0.61, "grad_norm": 0.9895997643470764, "learning_rate": 3.413990241374495e-06, "loss": 0.5056, "step": 9702 }, { "epoch": 0.61, "grad_norm": 1.0225650072097778, "learning_rate": 3.4130172654420623e-06, "loss": 0.4945, "step": 9703 }, { "epoch": 0.61, "grad_norm": 1.0705265998840332, "learning_rate": 3.4120443563319527e-06, "loss": 0.4667, "step": 9704 }, { "epoch": 0.61, "grad_norm": 1.0751066207885742, "learning_rate": 3.4110715140851286e-06, "loss": 0.4826, "step": 9705 }, { "epoch": 0.61, "grad_norm": 1.0618090629577637, "learning_rate": 3.4100987387425566e-06, "loss": 0.5378, "step": 9706 }, { "epoch": 0.61, "grad_norm": 1.0707107782363892, "learning_rate": 3.4091260303451967e-06, "loss": 0.5091, "step": 9707 }, { "epoch": 0.62, "grad_norm": 1.029998540878296, "learning_rate": 3.4081533889340056e-06, "loss": 0.4905, "step": 9708 }, { "epoch": 0.62, "grad_norm": 1.0424970388412476, "learning_rate": 3.407180814549935e-06, "loss": 0.5162, "step": 9709 }, { "epoch": 0.62, "grad_norm": 1.09152352809906, "learning_rate": 3.4062083072339415e-06, "loss": 0.5531, "step": 9710 }, { "epoch": 0.62, "grad_norm": 1.0747696161270142, "learning_rate": 3.4052358670269727e-06, "loss": 0.556, "step": 9711 }, { "epoch": 0.62, "grad_norm": 0.9668502807617188, "learning_rate": 3.4042634939699728e-06, "loss": 0.4954, "step": 9712 }, { "epoch": 0.62, "grad_norm": 1.0281851291656494, "learning_rate": 3.4032911881038842e-06, "loss": 0.5478, "step": 9713 }, { "epoch": 0.62, "grad_norm": 1.090576410293579, "learning_rate": 3.4023189494696506e-06, "loss": 0.5851, "step": 9714 }, { "epoch": 0.62, "grad_norm": 1.1164906024932861, "learning_rate": 3.401346778108209e-06, "loss": 0.5551, "step": 9715 }, { "epoch": 0.62, "grad_norm": 1.0226433277130127, "learning_rate": 3.4003746740604925e-06, "loss": 0.53, "step": 9716 }, { "epoch": 0.62, "grad_norm": 1.0139816999435425, "learning_rate": 3.399402637367433e-06, "loss": 0.5212, "step": 9717 }, { "epoch": 0.62, "grad_norm": 1.0398943424224854, "learning_rate": 3.398430668069961e-06, "loss": 0.5091, "step": 9718 }, { "epoch": 0.62, "grad_norm": 0.9936200380325317, "learning_rate": 3.3974587662090026e-06, "loss": 0.5545, "step": 9719 }, { "epoch": 0.62, "grad_norm": 1.1318458318710327, "learning_rate": 3.396486931825481e-06, "loss": 0.5478, "step": 9720 }, { "epoch": 0.62, "grad_norm": 0.9907205700874329, "learning_rate": 3.395515164960316e-06, "loss": 0.4836, "step": 9721 }, { "epoch": 0.62, "grad_norm": 1.0219179391860962, "learning_rate": 3.394543465654424e-06, "loss": 0.53, "step": 9722 }, { "epoch": 0.62, "grad_norm": 1.0675764083862305, "learning_rate": 3.393571833948724e-06, "loss": 0.5186, "step": 9723 }, { "epoch": 0.62, "grad_norm": 1.0911437273025513, "learning_rate": 3.3926002698841253e-06, "loss": 0.4721, "step": 9724 }, { "epoch": 0.62, "grad_norm": 1.0146348476409912, "learning_rate": 3.3916287735015375e-06, "loss": 0.517, "step": 9725 }, { "epoch": 0.62, "grad_norm": 1.0276731252670288, "learning_rate": 3.390657344841865e-06, "loss": 0.459, "step": 9726 }, { "epoch": 0.62, "grad_norm": 1.1111414432525635, "learning_rate": 3.3896859839460155e-06, "loss": 0.5204, "step": 9727 }, { "epoch": 0.62, "grad_norm": 1.0293622016906738, "learning_rate": 3.3887146908548875e-06, "loss": 0.558, "step": 9728 }, { "epoch": 0.62, "grad_norm": 1.0552021265029907, "learning_rate": 3.3877434656093777e-06, "loss": 0.4724, "step": 9729 }, { "epoch": 0.62, "grad_norm": 1.110007405281067, "learning_rate": 3.3867723082503807e-06, "loss": 0.5886, "step": 9730 }, { "epoch": 0.62, "grad_norm": 1.009547472000122, "learning_rate": 3.385801218818792e-06, "loss": 0.4899, "step": 9731 }, { "epoch": 0.62, "grad_norm": 1.1629246473312378, "learning_rate": 3.384830197355499e-06, "loss": 0.5559, "step": 9732 }, { "epoch": 0.62, "grad_norm": 1.0362046957015991, "learning_rate": 3.383859243901385e-06, "loss": 0.4975, "step": 9733 }, { "epoch": 0.62, "grad_norm": 0.9874945878982544, "learning_rate": 3.3828883584973364e-06, "loss": 0.5235, "step": 9734 }, { "epoch": 0.62, "grad_norm": 1.0950794219970703, "learning_rate": 3.3819175411842353e-06, "loss": 0.5257, "step": 9735 }, { "epoch": 0.62, "grad_norm": 1.1203292608261108, "learning_rate": 3.3809467920029574e-06, "loss": 0.5148, "step": 9736 }, { "epoch": 0.62, "grad_norm": 1.057608962059021, "learning_rate": 3.3799761109943775e-06, "loss": 0.4833, "step": 9737 }, { "epoch": 0.62, "grad_norm": 0.9889419078826904, "learning_rate": 3.3790054981993683e-06, "loss": 0.5243, "step": 9738 }, { "epoch": 0.62, "grad_norm": 1.0515849590301514, "learning_rate": 3.3780349536587965e-06, "loss": 0.4974, "step": 9739 }, { "epoch": 0.62, "grad_norm": 1.059637427330017, "learning_rate": 3.377064477413533e-06, "loss": 0.5346, "step": 9740 }, { "epoch": 0.62, "grad_norm": 0.9503380656242371, "learning_rate": 3.376094069504437e-06, "loss": 0.4479, "step": 9741 }, { "epoch": 0.62, "grad_norm": 1.1286183595657349, "learning_rate": 3.3751237299723715e-06, "loss": 0.5201, "step": 9742 }, { "epoch": 0.62, "grad_norm": 1.0307422876358032, "learning_rate": 3.3741534588581915e-06, "loss": 0.542, "step": 9743 }, { "epoch": 0.62, "grad_norm": 1.067309856414795, "learning_rate": 3.3731832562027555e-06, "loss": 0.5095, "step": 9744 }, { "epoch": 0.62, "grad_norm": 1.1034531593322754, "learning_rate": 3.372213122046912e-06, "loss": 0.504, "step": 9745 }, { "epoch": 0.62, "grad_norm": 1.020670771598816, "learning_rate": 3.3712430564315124e-06, "loss": 0.4971, "step": 9746 }, { "epoch": 0.62, "grad_norm": 1.0703192949295044, "learning_rate": 3.3702730593974e-06, "loss": 0.5267, "step": 9747 }, { "epoch": 0.62, "grad_norm": 1.0400869846343994, "learning_rate": 3.3693031309854214e-06, "loss": 0.5108, "step": 9748 }, { "epoch": 0.62, "grad_norm": 0.9968366622924805, "learning_rate": 3.3683332712364138e-06, "loss": 0.488, "step": 9749 }, { "epoch": 0.62, "grad_norm": 1.0771602392196655, "learning_rate": 3.3673634801912175e-06, "loss": 0.5276, "step": 9750 }, { "epoch": 0.62, "grad_norm": 0.9836703538894653, "learning_rate": 3.3663937578906642e-06, "loss": 0.4721, "step": 9751 }, { "epoch": 0.62, "grad_norm": 1.000105381011963, "learning_rate": 3.365424104375587e-06, "loss": 0.4941, "step": 9752 }, { "epoch": 0.62, "grad_norm": 0.9447736740112305, "learning_rate": 3.3644545196868146e-06, "loss": 0.5152, "step": 9753 }, { "epoch": 0.62, "grad_norm": 0.9786331057548523, "learning_rate": 3.3634850038651734e-06, "loss": 0.5285, "step": 9754 }, { "epoch": 0.62, "grad_norm": 1.0418307781219482, "learning_rate": 3.362515556951485e-06, "loss": 0.488, "step": 9755 }, { "epoch": 0.62, "grad_norm": 0.9895498752593994, "learning_rate": 3.36154617898657e-06, "loss": 0.4815, "step": 9756 }, { "epoch": 0.62, "grad_norm": 1.0580096244812012, "learning_rate": 3.360576870011246e-06, "loss": 0.4725, "step": 9757 }, { "epoch": 0.62, "grad_norm": 1.0222854614257812, "learning_rate": 3.3596076300663273e-06, "loss": 0.5171, "step": 9758 }, { "epoch": 0.62, "grad_norm": 1.0051594972610474, "learning_rate": 3.3586384591926235e-06, "loss": 0.5213, "step": 9759 }, { "epoch": 0.62, "grad_norm": 1.0701817274093628, "learning_rate": 3.3576693574309447e-06, "loss": 0.5393, "step": 9760 }, { "epoch": 0.62, "grad_norm": 1.0076082944869995, "learning_rate": 3.3567003248220966e-06, "loss": 0.5361, "step": 9761 }, { "epoch": 0.62, "grad_norm": 1.0037952661514282, "learning_rate": 3.355731361406882e-06, "loss": 0.4991, "step": 9762 }, { "epoch": 0.62, "grad_norm": 1.0817643404006958, "learning_rate": 3.354762467226098e-06, "loss": 0.5282, "step": 9763 }, { "epoch": 0.62, "grad_norm": 1.0783755779266357, "learning_rate": 3.3537936423205435e-06, "loss": 0.5244, "step": 9764 }, { "epoch": 0.62, "grad_norm": 1.0131877660751343, "learning_rate": 3.352824886731012e-06, "loss": 0.4696, "step": 9765 }, { "epoch": 0.62, "grad_norm": 1.0318191051483154, "learning_rate": 3.351856200498296e-06, "loss": 0.546, "step": 9766 }, { "epoch": 0.62, "grad_norm": 0.9506867527961731, "learning_rate": 3.3508875836631806e-06, "loss": 0.5074, "step": 9767 }, { "epoch": 0.62, "grad_norm": 0.9747241735458374, "learning_rate": 3.3499190362664523e-06, "loss": 0.4476, "step": 9768 }, { "epoch": 0.62, "grad_norm": 0.9470990896224976, "learning_rate": 3.3489505583488925e-06, "loss": 0.4749, "step": 9769 }, { "epoch": 0.62, "grad_norm": 1.0915932655334473, "learning_rate": 3.3479821499512823e-06, "loss": 0.5607, "step": 9770 }, { "epoch": 0.62, "grad_norm": 1.0824368000030518, "learning_rate": 3.347013811114396e-06, "loss": 0.5394, "step": 9771 }, { "epoch": 0.62, "grad_norm": 1.0586308240890503, "learning_rate": 3.346045541879009e-06, "loss": 0.4506, "step": 9772 }, { "epoch": 0.62, "grad_norm": 0.9798187017440796, "learning_rate": 3.3450773422858886e-06, "loss": 0.5374, "step": 9773 }, { "epoch": 0.62, "grad_norm": 0.9897297024726868, "learning_rate": 3.3441092123758055e-06, "loss": 0.4729, "step": 9774 }, { "epoch": 0.62, "grad_norm": 1.115580677986145, "learning_rate": 3.3431411521895228e-06, "loss": 0.4852, "step": 9775 }, { "epoch": 0.62, "grad_norm": 1.088579773902893, "learning_rate": 3.342173161767803e-06, "loss": 0.5668, "step": 9776 }, { "epoch": 0.62, "grad_norm": 0.986916184425354, "learning_rate": 3.341205241151403e-06, "loss": 0.4979, "step": 9777 }, { "epoch": 0.62, "grad_norm": 1.0729726552963257, "learning_rate": 3.3402373903810807e-06, "loss": 0.5086, "step": 9778 }, { "epoch": 0.62, "grad_norm": 0.9873821139335632, "learning_rate": 3.3392696094975875e-06, "loss": 0.4812, "step": 9779 }, { "epoch": 0.62, "grad_norm": 1.0625243186950684, "learning_rate": 3.338301898541675e-06, "loss": 0.5295, "step": 9780 }, { "epoch": 0.62, "grad_norm": 1.0633163452148438, "learning_rate": 3.337334257554086e-06, "loss": 0.5408, "step": 9781 }, { "epoch": 0.62, "grad_norm": 0.9733723402023315, "learning_rate": 3.3363666865755708e-06, "loss": 0.5166, "step": 9782 }, { "epoch": 0.62, "grad_norm": 1.1456708908081055, "learning_rate": 3.335399185646865e-06, "loss": 0.5323, "step": 9783 }, { "epoch": 0.62, "grad_norm": 0.9787518382072449, "learning_rate": 3.33443175480871e-06, "loss": 0.4762, "step": 9784 }, { "epoch": 0.62, "grad_norm": 1.0292288064956665, "learning_rate": 3.3334643941018398e-06, "loss": 0.4775, "step": 9785 }, { "epoch": 0.62, "grad_norm": 1.0584862232208252, "learning_rate": 3.3324971035669844e-06, "loss": 0.5477, "step": 9786 }, { "epoch": 0.62, "grad_norm": 1.0264787673950195, "learning_rate": 3.3315298832448762e-06, "loss": 0.4961, "step": 9787 }, { "epoch": 0.62, "grad_norm": 1.108717918395996, "learning_rate": 3.330562733176242e-06, "loss": 0.4933, "step": 9788 }, { "epoch": 0.62, "grad_norm": 1.1448596715927124, "learning_rate": 3.3295956534018033e-06, "loss": 0.5257, "step": 9789 }, { "epoch": 0.62, "grad_norm": 1.0974916219711304, "learning_rate": 3.328628643962278e-06, "loss": 0.5133, "step": 9790 }, { "epoch": 0.62, "grad_norm": 0.9968658685684204, "learning_rate": 3.3276617048983876e-06, "loss": 0.4774, "step": 9791 }, { "epoch": 0.62, "grad_norm": 1.061164140701294, "learning_rate": 3.326694836250847e-06, "loss": 0.5387, "step": 9792 }, { "epoch": 0.62, "grad_norm": 1.0636687278747559, "learning_rate": 3.325728038060365e-06, "loss": 0.5125, "step": 9793 }, { "epoch": 0.62, "grad_norm": 1.0595797300338745, "learning_rate": 3.324761310367649e-06, "loss": 0.5199, "step": 9794 }, { "epoch": 0.62, "grad_norm": 1.101511001586914, "learning_rate": 3.3237946532134077e-06, "loss": 0.5327, "step": 9795 }, { "epoch": 0.62, "grad_norm": 0.987757682800293, "learning_rate": 3.322828066638343e-06, "loss": 0.4707, "step": 9796 }, { "epoch": 0.62, "grad_norm": 1.0710266828536987, "learning_rate": 3.321861550683154e-06, "loss": 0.5579, "step": 9797 }, { "epoch": 0.62, "grad_norm": 1.121856927871704, "learning_rate": 3.3208951053885367e-06, "loss": 0.5582, "step": 9798 }, { "epoch": 0.62, "grad_norm": 1.0450007915496826, "learning_rate": 3.3199287307951844e-06, "loss": 0.528, "step": 9799 }, { "epoch": 0.62, "grad_norm": 1.0158655643463135, "learning_rate": 3.31896242694379e-06, "loss": 0.5187, "step": 9800 }, { "epoch": 0.62, "grad_norm": 1.2039495706558228, "learning_rate": 3.317996193875041e-06, "loss": 0.5636, "step": 9801 }, { "epoch": 0.62, "grad_norm": 1.0527689456939697, "learning_rate": 3.3170300316296194e-06, "loss": 0.5362, "step": 9802 }, { "epoch": 0.62, "grad_norm": 0.9969490170478821, "learning_rate": 3.3160639402482077e-06, "loss": 0.4939, "step": 9803 }, { "epoch": 0.62, "grad_norm": 1.1049686670303345, "learning_rate": 3.3150979197714874e-06, "loss": 0.5056, "step": 9804 }, { "epoch": 0.62, "grad_norm": 1.0586570501327515, "learning_rate": 3.314131970240132e-06, "loss": 0.5425, "step": 9805 }, { "epoch": 0.62, "grad_norm": 1.0372644662857056, "learning_rate": 3.3131660916948137e-06, "loss": 0.5566, "step": 9806 }, { "epoch": 0.62, "grad_norm": 0.983227550983429, "learning_rate": 3.3122002841762023e-06, "loss": 0.5616, "step": 9807 }, { "epoch": 0.62, "grad_norm": 1.0370455980300903, "learning_rate": 3.311234547724968e-06, "loss": 0.5537, "step": 9808 }, { "epoch": 0.62, "grad_norm": 1.0073283910751343, "learning_rate": 3.310268882381772e-06, "loss": 0.5028, "step": 9809 }, { "epoch": 0.62, "grad_norm": 1.0266960859298706, "learning_rate": 3.3093032881872738e-06, "loss": 0.4729, "step": 9810 }, { "epoch": 0.62, "grad_norm": 1.022322177886963, "learning_rate": 3.3083377651821314e-06, "loss": 0.4925, "step": 9811 }, { "epoch": 0.62, "grad_norm": 0.9509308934211731, "learning_rate": 3.3073723134070033e-06, "loss": 0.5048, "step": 9812 }, { "epoch": 0.62, "grad_norm": 1.0213093757629395, "learning_rate": 3.3064069329025394e-06, "loss": 0.531, "step": 9813 }, { "epoch": 0.62, "grad_norm": 1.0336143970489502, "learning_rate": 3.3054416237093863e-06, "loss": 0.5419, "step": 9814 }, { "epoch": 0.62, "grad_norm": 0.9689751863479614, "learning_rate": 3.3044763858681928e-06, "loss": 0.4507, "step": 9815 }, { "epoch": 0.62, "grad_norm": 1.0528861284255981, "learning_rate": 3.303511219419598e-06, "loss": 0.5247, "step": 9816 }, { "epoch": 0.62, "grad_norm": 1.0158718824386597, "learning_rate": 3.3025461244042462e-06, "loss": 0.4837, "step": 9817 }, { "epoch": 0.62, "grad_norm": 1.0771912336349487, "learning_rate": 3.3015811008627707e-06, "loss": 0.5347, "step": 9818 }, { "epoch": 0.62, "grad_norm": 1.1466127634048462, "learning_rate": 3.3006161488358084e-06, "loss": 0.5163, "step": 9819 }, { "epoch": 0.62, "grad_norm": 1.0220280885696411, "learning_rate": 3.299651268363986e-06, "loss": 0.5152, "step": 9820 }, { "epoch": 0.62, "grad_norm": 1.0260629653930664, "learning_rate": 3.298686459487936e-06, "loss": 0.4703, "step": 9821 }, { "epoch": 0.62, "grad_norm": 1.0086876153945923, "learning_rate": 3.2977217222482794e-06, "loss": 0.4937, "step": 9822 }, { "epoch": 0.62, "grad_norm": 1.048424482345581, "learning_rate": 3.29675705668564e-06, "loss": 0.5067, "step": 9823 }, { "epoch": 0.62, "grad_norm": 0.9417296648025513, "learning_rate": 3.2957924628406347e-06, "loss": 0.4311, "step": 9824 }, { "epoch": 0.62, "grad_norm": 1.0503716468811035, "learning_rate": 3.2948279407538813e-06, "loss": 0.474, "step": 9825 }, { "epoch": 0.62, "grad_norm": 1.0028961896896362, "learning_rate": 3.2938634904659903e-06, "loss": 0.4969, "step": 9826 }, { "epoch": 0.62, "grad_norm": 1.0006153583526611, "learning_rate": 3.2928991120175747e-06, "loss": 0.5085, "step": 9827 }, { "epoch": 0.62, "grad_norm": 1.036969542503357, "learning_rate": 3.2919348054492363e-06, "loss": 0.4478, "step": 9828 }, { "epoch": 0.62, "grad_norm": 1.0671666860580444, "learning_rate": 3.2909705708015834e-06, "loss": 0.5363, "step": 9829 }, { "epoch": 0.62, "grad_norm": 1.0576975345611572, "learning_rate": 3.290006408115213e-06, "loss": 0.5515, "step": 9830 }, { "epoch": 0.62, "grad_norm": 1.024174451828003, "learning_rate": 3.289042317430726e-06, "loss": 0.5003, "step": 9831 }, { "epoch": 0.62, "grad_norm": 0.9663321375846863, "learning_rate": 3.2880782987887128e-06, "loss": 0.489, "step": 9832 }, { "epoch": 0.62, "grad_norm": 0.9781193137168884, "learning_rate": 3.2871143522297672e-06, "loss": 0.5053, "step": 9833 }, { "epoch": 0.62, "grad_norm": 1.021514892578125, "learning_rate": 3.286150477794479e-06, "loss": 0.517, "step": 9834 }, { "epoch": 0.62, "grad_norm": 0.9754778742790222, "learning_rate": 3.2851866755234324e-06, "loss": 0.5128, "step": 9835 }, { "epoch": 0.62, "grad_norm": 0.9535737633705139, "learning_rate": 3.2842229454572084e-06, "loss": 0.4931, "step": 9836 }, { "epoch": 0.62, "grad_norm": 1.0680464506149292, "learning_rate": 3.2832592876363866e-06, "loss": 0.5357, "step": 9837 }, { "epoch": 0.62, "grad_norm": 1.0458132028579712, "learning_rate": 3.2822957021015455e-06, "loss": 0.4679, "step": 9838 }, { "epoch": 0.62, "grad_norm": 1.0636483430862427, "learning_rate": 3.2813321888932573e-06, "loss": 0.4727, "step": 9839 }, { "epoch": 0.62, "grad_norm": 0.9563536643981934, "learning_rate": 3.2803687480520905e-06, "loss": 0.4623, "step": 9840 }, { "epoch": 0.62, "grad_norm": 1.0053445100784302, "learning_rate": 3.279405379618613e-06, "loss": 0.4782, "step": 9841 }, { "epoch": 0.62, "grad_norm": 1.0068389177322388, "learning_rate": 3.278442083633392e-06, "loss": 0.4693, "step": 9842 }, { "epoch": 0.62, "grad_norm": 1.0501048564910889, "learning_rate": 3.277478860136985e-06, "loss": 0.5249, "step": 9843 }, { "epoch": 0.62, "grad_norm": 1.0450025796890259, "learning_rate": 3.276515709169951e-06, "loss": 0.5085, "step": 9844 }, { "epoch": 0.62, "grad_norm": 0.9735075831413269, "learning_rate": 3.2755526307728447e-06, "loss": 0.509, "step": 9845 }, { "epoch": 0.62, "grad_norm": 0.9778958559036255, "learning_rate": 3.2745896249862166e-06, "loss": 0.4734, "step": 9846 }, { "epoch": 0.62, "grad_norm": 1.1158515214920044, "learning_rate": 3.2736266918506195e-06, "loss": 0.5633, "step": 9847 }, { "epoch": 0.62, "grad_norm": 1.112127661705017, "learning_rate": 3.272663831406595e-06, "loss": 0.5784, "step": 9848 }, { "epoch": 0.62, "grad_norm": 1.0029457807540894, "learning_rate": 3.2717010436946894e-06, "loss": 0.5044, "step": 9849 }, { "epoch": 0.62, "grad_norm": 1.359940767288208, "learning_rate": 3.270738328755438e-06, "loss": 0.5601, "step": 9850 }, { "epoch": 0.62, "grad_norm": 1.055325984954834, "learning_rate": 3.269775686629383e-06, "loss": 0.5604, "step": 9851 }, { "epoch": 0.62, "grad_norm": 1.098065733909607, "learning_rate": 3.2688131173570523e-06, "loss": 0.548, "step": 9852 }, { "epoch": 0.62, "grad_norm": 1.0677322149276733, "learning_rate": 3.2678506209789805e-06, "loss": 0.5502, "step": 9853 }, { "epoch": 0.62, "grad_norm": 0.995072603225708, "learning_rate": 3.2668881975356915e-06, "loss": 0.4901, "step": 9854 }, { "epoch": 0.62, "grad_norm": 1.11382257938385, "learning_rate": 3.2659258470677137e-06, "loss": 0.5686, "step": 9855 }, { "epoch": 0.62, "grad_norm": 0.9235460758209229, "learning_rate": 3.2649635696155646e-06, "loss": 0.4439, "step": 9856 }, { "epoch": 0.62, "grad_norm": 1.0282461643218994, "learning_rate": 3.2640013652197654e-06, "loss": 0.5014, "step": 9857 }, { "epoch": 0.62, "grad_norm": 1.0538965463638306, "learning_rate": 3.263039233920827e-06, "loss": 0.5484, "step": 9858 }, { "epoch": 0.62, "grad_norm": 1.136786699295044, "learning_rate": 3.2620771757592663e-06, "loss": 0.4908, "step": 9859 }, { "epoch": 0.62, "grad_norm": 1.2098864316940308, "learning_rate": 3.261115190775589e-06, "loss": 0.4867, "step": 9860 }, { "epoch": 0.62, "grad_norm": 1.1039862632751465, "learning_rate": 3.2601532790103026e-06, "loss": 0.4787, "step": 9861 }, { "epoch": 0.62, "grad_norm": 1.1275825500488281, "learning_rate": 3.259191440503909e-06, "loss": 0.5362, "step": 9862 }, { "epoch": 0.62, "grad_norm": 1.0742462873458862, "learning_rate": 3.258229675296907e-06, "loss": 0.5357, "step": 9863 }, { "epoch": 0.62, "grad_norm": 1.030516266822815, "learning_rate": 3.257267983429794e-06, "loss": 0.5373, "step": 9864 }, { "epoch": 0.62, "grad_norm": 1.0194627046585083, "learning_rate": 3.2563063649430647e-06, "loss": 0.5076, "step": 9865 }, { "epoch": 0.63, "grad_norm": 1.0631455183029175, "learning_rate": 3.25534481987721e-06, "loss": 0.5389, "step": 9866 }, { "epoch": 0.63, "grad_norm": 1.0659403800964355, "learning_rate": 3.2543833482727123e-06, "loss": 0.5412, "step": 9867 }, { "epoch": 0.63, "grad_norm": 0.9998225569725037, "learning_rate": 3.2534219501700603e-06, "loss": 0.4862, "step": 9868 }, { "epoch": 0.63, "grad_norm": 1.070391058921814, "learning_rate": 3.252460625609736e-06, "loss": 0.4995, "step": 9869 }, { "epoch": 0.63, "grad_norm": 1.0833836793899536, "learning_rate": 3.251499374632214e-06, "loss": 0.5161, "step": 9870 }, { "epoch": 0.63, "grad_norm": 1.0146968364715576, "learning_rate": 3.2505381972779704e-06, "loss": 0.4949, "step": 9871 }, { "epoch": 0.63, "grad_norm": 1.0704102516174316, "learning_rate": 3.249577093587477e-06, "loss": 0.4979, "step": 9872 }, { "epoch": 0.63, "grad_norm": 1.0711511373519897, "learning_rate": 3.2486160636012054e-06, "loss": 0.5424, "step": 9873 }, { "epoch": 0.63, "grad_norm": 0.9882245659828186, "learning_rate": 3.2476551073596173e-06, "loss": 0.5129, "step": 9874 }, { "epoch": 0.63, "grad_norm": 1.091054081916809, "learning_rate": 3.246694224903175e-06, "loss": 0.4935, "step": 9875 }, { "epoch": 0.63, "grad_norm": 1.0546417236328125, "learning_rate": 3.245733416272341e-06, "loss": 0.5292, "step": 9876 }, { "epoch": 0.63, "grad_norm": 1.0956344604492188, "learning_rate": 3.2447726815075715e-06, "loss": 0.5185, "step": 9877 }, { "epoch": 0.63, "grad_norm": 1.0548691749572754, "learning_rate": 3.243812020649318e-06, "loss": 0.4968, "step": 9878 }, { "epoch": 0.63, "grad_norm": 1.1286178827285767, "learning_rate": 3.24285143373803e-06, "loss": 0.5511, "step": 9879 }, { "epoch": 0.63, "grad_norm": 1.0500680208206177, "learning_rate": 3.241890920814154e-06, "loss": 0.4643, "step": 9880 }, { "epoch": 0.63, "grad_norm": 1.0624537467956543, "learning_rate": 3.2409304819181377e-06, "loss": 0.5021, "step": 9881 }, { "epoch": 0.63, "grad_norm": 1.0358778238296509, "learning_rate": 3.2399701170904197e-06, "loss": 0.4912, "step": 9882 }, { "epoch": 0.63, "grad_norm": 0.9780733585357666, "learning_rate": 3.239009826371436e-06, "loss": 0.5024, "step": 9883 }, { "epoch": 0.63, "grad_norm": 1.0151331424713135, "learning_rate": 3.238049609801621e-06, "loss": 0.5204, "step": 9884 }, { "epoch": 0.63, "grad_norm": 1.0676971673965454, "learning_rate": 3.2370894674214102e-06, "loss": 0.5266, "step": 9885 }, { "epoch": 0.63, "grad_norm": 0.9793708920478821, "learning_rate": 3.2361293992712295e-06, "loss": 0.4243, "step": 9886 }, { "epoch": 0.63, "grad_norm": 0.9515348672866821, "learning_rate": 3.2351694053915027e-06, "loss": 0.502, "step": 9887 }, { "epoch": 0.63, "grad_norm": 1.0956379175186157, "learning_rate": 3.2342094858226514e-06, "loss": 0.5517, "step": 9888 }, { "epoch": 0.63, "grad_norm": 0.9976474642753601, "learning_rate": 3.233249640605098e-06, "loss": 0.4876, "step": 9889 }, { "epoch": 0.63, "grad_norm": 1.0082616806030273, "learning_rate": 3.232289869779256e-06, "loss": 0.48, "step": 9890 }, { "epoch": 0.63, "grad_norm": 1.0102382898330688, "learning_rate": 3.231330173385537e-06, "loss": 0.5028, "step": 9891 }, { "epoch": 0.63, "grad_norm": 0.9744532108306885, "learning_rate": 3.2303705514643537e-06, "loss": 0.5359, "step": 9892 }, { "epoch": 0.63, "grad_norm": 1.0318796634674072, "learning_rate": 3.229411004056108e-06, "loss": 0.5089, "step": 9893 }, { "epoch": 0.63, "grad_norm": 1.0769177675247192, "learning_rate": 3.2284515312012056e-06, "loss": 0.4983, "step": 9894 }, { "epoch": 0.63, "grad_norm": 1.0182427167892456, "learning_rate": 3.2274921329400484e-06, "loss": 0.5398, "step": 9895 }, { "epoch": 0.63, "grad_norm": 0.9979657530784607, "learning_rate": 3.226532809313031e-06, "loss": 0.4801, "step": 9896 }, { "epoch": 0.63, "grad_norm": 1.0415728092193604, "learning_rate": 3.2255735603605454e-06, "loss": 0.5321, "step": 9897 }, { "epoch": 0.63, "grad_norm": 1.053540825843811, "learning_rate": 3.2246143861229857e-06, "loss": 0.4922, "step": 9898 }, { "epoch": 0.63, "grad_norm": 1.0729095935821533, "learning_rate": 3.223655286640739e-06, "loss": 0.5117, "step": 9899 }, { "epoch": 0.63, "grad_norm": 1.0364995002746582, "learning_rate": 3.2226962619541885e-06, "loss": 0.5212, "step": 9900 }, { "epoch": 0.63, "grad_norm": 1.0050946474075317, "learning_rate": 3.221737312103714e-06, "loss": 0.4442, "step": 9901 }, { "epoch": 0.63, "grad_norm": 1.0963795185089111, "learning_rate": 3.2207784371296957e-06, "loss": 0.5423, "step": 9902 }, { "epoch": 0.63, "grad_norm": 1.034480333328247, "learning_rate": 3.2198196370725095e-06, "loss": 0.5377, "step": 9903 }, { "epoch": 0.63, "grad_norm": 1.0671687126159668, "learning_rate": 3.218860911972525e-06, "loss": 0.5646, "step": 9904 }, { "epoch": 0.63, "grad_norm": 1.066817283630371, "learning_rate": 3.2179022618701093e-06, "loss": 0.5549, "step": 9905 }, { "epoch": 0.63, "grad_norm": 1.04256272315979, "learning_rate": 3.2169436868056316e-06, "loss": 0.5206, "step": 9906 }, { "epoch": 0.63, "grad_norm": 1.0120028257369995, "learning_rate": 3.215985186819453e-06, "loss": 0.5276, "step": 9907 }, { "epoch": 0.63, "grad_norm": 0.9782642722129822, "learning_rate": 3.2150267619519326e-06, "loss": 0.5158, "step": 9908 }, { "epoch": 0.63, "grad_norm": 1.0254771709442139, "learning_rate": 3.214068412243424e-06, "loss": 0.4962, "step": 9909 }, { "epoch": 0.63, "grad_norm": 1.0787056684494019, "learning_rate": 3.213110137734281e-06, "loss": 0.5418, "step": 9910 }, { "epoch": 0.63, "grad_norm": 1.0449023246765137, "learning_rate": 3.2121519384648558e-06, "loss": 0.5456, "step": 9911 }, { "epoch": 0.63, "grad_norm": 0.9803979992866516, "learning_rate": 3.211193814475494e-06, "loss": 0.5253, "step": 9912 }, { "epoch": 0.63, "grad_norm": 1.007240653038025, "learning_rate": 3.2102357658065357e-06, "loss": 0.5351, "step": 9913 }, { "epoch": 0.63, "grad_norm": 1.0950753688812256, "learning_rate": 3.2092777924983224e-06, "loss": 0.56, "step": 9914 }, { "epoch": 0.63, "grad_norm": 1.057042121887207, "learning_rate": 3.208319894591194e-06, "loss": 0.534, "step": 9915 }, { "epoch": 0.63, "grad_norm": 1.131855845451355, "learning_rate": 3.207362072125482e-06, "loss": 0.5138, "step": 9916 }, { "epoch": 0.63, "grad_norm": 1.0062309503555298, "learning_rate": 3.2064043251415166e-06, "loss": 0.4759, "step": 9917 }, { "epoch": 0.63, "grad_norm": 1.0769048929214478, "learning_rate": 3.2054466536796236e-06, "loss": 0.5153, "step": 9918 }, { "epoch": 0.63, "grad_norm": 1.0601894855499268, "learning_rate": 3.2044890577801317e-06, "loss": 0.5159, "step": 9919 }, { "epoch": 0.63, "grad_norm": 1.018688440322876, "learning_rate": 3.2035315374833596e-06, "loss": 0.5008, "step": 9920 }, { "epoch": 0.63, "grad_norm": 1.0696890354156494, "learning_rate": 3.2025740928296235e-06, "loss": 0.5138, "step": 9921 }, { "epoch": 0.63, "grad_norm": 1.0606672763824463, "learning_rate": 3.201616723859241e-06, "loss": 0.5422, "step": 9922 }, { "epoch": 0.63, "grad_norm": 1.055463194847107, "learning_rate": 3.20065943061252e-06, "loss": 0.5375, "step": 9923 }, { "epoch": 0.63, "grad_norm": 0.9518321752548218, "learning_rate": 3.199702213129773e-06, "loss": 0.4758, "step": 9924 }, { "epoch": 0.63, "grad_norm": 1.0561015605926514, "learning_rate": 3.1987450714513018e-06, "loss": 0.5227, "step": 9925 }, { "epoch": 0.63, "grad_norm": 1.0529154539108276, "learning_rate": 3.1977880056174105e-06, "loss": 0.5044, "step": 9926 }, { "epoch": 0.63, "grad_norm": 0.9994805455207825, "learning_rate": 3.196831015668396e-06, "loss": 0.5001, "step": 9927 }, { "epoch": 0.63, "grad_norm": 1.1532816886901855, "learning_rate": 3.195874101644555e-06, "loss": 0.5132, "step": 9928 }, { "epoch": 0.63, "grad_norm": 0.9695237874984741, "learning_rate": 3.194917263586179e-06, "loss": 0.4726, "step": 9929 }, { "epoch": 0.63, "grad_norm": 0.9463463425636292, "learning_rate": 3.1939605015335588e-06, "loss": 0.4794, "step": 9930 }, { "epoch": 0.63, "grad_norm": 1.0079615116119385, "learning_rate": 3.193003815526977e-06, "loss": 0.512, "step": 9931 }, { "epoch": 0.63, "grad_norm": 1.112295150756836, "learning_rate": 3.192047205606721e-06, "loss": 0.5726, "step": 9932 }, { "epoch": 0.63, "grad_norm": 1.0682371854782104, "learning_rate": 3.1910906718130665e-06, "loss": 0.5121, "step": 9933 }, { "epoch": 0.63, "grad_norm": 1.0254143476486206, "learning_rate": 3.1901342141862917e-06, "loss": 0.5264, "step": 9934 }, { "epoch": 0.63, "grad_norm": 1.0990660190582275, "learning_rate": 3.1891778327666673e-06, "loss": 0.5597, "step": 9935 }, { "epoch": 0.63, "grad_norm": 1.0980420112609863, "learning_rate": 3.1882215275944673e-06, "loss": 0.5718, "step": 9936 }, { "epoch": 0.63, "grad_norm": 1.0230224132537842, "learning_rate": 3.187265298709954e-06, "loss": 0.502, "step": 9937 }, { "epoch": 0.63, "grad_norm": 1.0240975618362427, "learning_rate": 3.1863091461533945e-06, "loss": 0.4951, "step": 9938 }, { "epoch": 0.63, "grad_norm": 1.000030755996704, "learning_rate": 3.1853530699650483e-06, "loss": 0.5177, "step": 9939 }, { "epoch": 0.63, "grad_norm": 1.0125668048858643, "learning_rate": 3.184397070185169e-06, "loss": 0.5508, "step": 9940 }, { "epoch": 0.63, "grad_norm": 0.9873566627502441, "learning_rate": 3.183441146854014e-06, "loss": 0.4808, "step": 9941 }, { "epoch": 0.63, "grad_norm": 1.15785813331604, "learning_rate": 3.182485300011834e-06, "loss": 0.5752, "step": 9942 }, { "epoch": 0.63, "grad_norm": 1.069769024848938, "learning_rate": 3.181529529698875e-06, "loss": 0.4809, "step": 9943 }, { "epoch": 0.63, "grad_norm": 0.9738884568214417, "learning_rate": 3.1805738359553796e-06, "loss": 0.4969, "step": 9944 }, { "epoch": 0.63, "grad_norm": 0.9629955291748047, "learning_rate": 3.1796182188215917e-06, "loss": 0.4698, "step": 9945 }, { "epoch": 0.63, "grad_norm": 1.0184857845306396, "learning_rate": 3.1786626783377494e-06, "loss": 0.5016, "step": 9946 }, { "epoch": 0.63, "grad_norm": 1.0259008407592773, "learning_rate": 3.177707214544086e-06, "loss": 0.5089, "step": 9947 }, { "epoch": 0.63, "grad_norm": 0.9904293417930603, "learning_rate": 3.1767518274808298e-06, "loss": 0.477, "step": 9948 }, { "epoch": 0.63, "grad_norm": 1.0364230871200562, "learning_rate": 3.175796517188212e-06, "loss": 0.5233, "step": 9949 }, { "epoch": 0.63, "grad_norm": 0.961456298828125, "learning_rate": 3.174841283706459e-06, "loss": 0.483, "step": 9950 }, { "epoch": 0.63, "grad_norm": 0.9019849300384521, "learning_rate": 3.17388612707579e-06, "loss": 0.5053, "step": 9951 }, { "epoch": 0.63, "grad_norm": 1.0838817358016968, "learning_rate": 3.172931047336421e-06, "loss": 0.5164, "step": 9952 }, { "epoch": 0.63, "grad_norm": 1.0342077016830444, "learning_rate": 3.1719760445285712e-06, "loss": 0.4517, "step": 9953 }, { "epoch": 0.63, "grad_norm": 1.0971794128417969, "learning_rate": 3.1710211186924524e-06, "loss": 0.4854, "step": 9954 }, { "epoch": 0.63, "grad_norm": 1.061440348625183, "learning_rate": 3.170066269868271e-06, "loss": 0.5214, "step": 9955 }, { "epoch": 0.63, "grad_norm": 1.1033830642700195, "learning_rate": 3.169111498096232e-06, "loss": 0.5152, "step": 9956 }, { "epoch": 0.63, "grad_norm": 0.9737071394920349, "learning_rate": 3.1681568034165383e-06, "loss": 0.4773, "step": 9957 }, { "epoch": 0.63, "grad_norm": 0.9737476110458374, "learning_rate": 3.167202185869391e-06, "loss": 0.5052, "step": 9958 }, { "epoch": 0.63, "grad_norm": 1.07955002784729, "learning_rate": 3.166247645494982e-06, "loss": 0.5298, "step": 9959 }, { "epoch": 0.63, "grad_norm": 1.0375590324401855, "learning_rate": 3.1652931823335074e-06, "loss": 0.4933, "step": 9960 }, { "epoch": 0.63, "grad_norm": 1.0839290618896484, "learning_rate": 3.164338796425152e-06, "loss": 0.5463, "step": 9961 }, { "epoch": 0.63, "grad_norm": 1.0975672006607056, "learning_rate": 3.163384487810106e-06, "loss": 0.4767, "step": 9962 }, { "epoch": 0.63, "grad_norm": 1.0232704877853394, "learning_rate": 3.162430256528549e-06, "loss": 0.5154, "step": 9963 }, { "epoch": 0.63, "grad_norm": 1.0841917991638184, "learning_rate": 3.161476102620663e-06, "loss": 0.5422, "step": 9964 }, { "epoch": 0.63, "grad_norm": 1.0752722024917603, "learning_rate": 3.16052202612662e-06, "loss": 0.4876, "step": 9965 }, { "epoch": 0.63, "grad_norm": 1.0556458234786987, "learning_rate": 3.159568027086598e-06, "loss": 0.5505, "step": 9966 }, { "epoch": 0.63, "grad_norm": 1.0755393505096436, "learning_rate": 3.1586141055407627e-06, "loss": 0.5475, "step": 9967 }, { "epoch": 0.63, "grad_norm": 0.9437804818153381, "learning_rate": 3.157660261529283e-06, "loss": 0.4961, "step": 9968 }, { "epoch": 0.63, "grad_norm": 1.0762423276901245, "learning_rate": 3.15670649509232e-06, "loss": 0.4954, "step": 9969 }, { "epoch": 0.63, "grad_norm": 1.0257775783538818, "learning_rate": 3.155752806270033e-06, "loss": 0.5933, "step": 9970 }, { "epoch": 0.63, "grad_norm": 1.016419768333435, "learning_rate": 3.1547991951025795e-06, "loss": 0.4811, "step": 9971 }, { "epoch": 0.63, "grad_norm": 1.0622209310531616, "learning_rate": 3.153845661630115e-06, "loss": 0.5471, "step": 9972 }, { "epoch": 0.63, "grad_norm": 1.0601319074630737, "learning_rate": 3.152892205892787e-06, "loss": 0.5176, "step": 9973 }, { "epoch": 0.63, "grad_norm": 1.0306676626205444, "learning_rate": 3.15193882793074e-06, "loss": 0.5243, "step": 9974 }, { "epoch": 0.63, "grad_norm": 1.0149081945419312, "learning_rate": 3.150985527784122e-06, "loss": 0.4713, "step": 9975 }, { "epoch": 0.63, "grad_norm": 0.9692007303237915, "learning_rate": 3.1500323054930715e-06, "loss": 0.4944, "step": 9976 }, { "epoch": 0.63, "grad_norm": 1.1371957063674927, "learning_rate": 3.149079161097725e-06, "loss": 0.5151, "step": 9977 }, { "epoch": 0.63, "grad_norm": 1.0571235418319702, "learning_rate": 3.1481260946382143e-06, "loss": 0.476, "step": 9978 }, { "epoch": 0.63, "grad_norm": 1.071311116218567, "learning_rate": 3.147173106154673e-06, "loss": 0.5701, "step": 9979 }, { "epoch": 0.63, "grad_norm": 1.0023607015609741, "learning_rate": 3.146220195687227e-06, "loss": 0.5251, "step": 9980 }, { "epoch": 0.63, "grad_norm": 1.0234299898147583, "learning_rate": 3.145267363276e-06, "loss": 0.5122, "step": 9981 }, { "epoch": 0.63, "grad_norm": 1.0225727558135986, "learning_rate": 3.1443146089611102e-06, "loss": 0.4988, "step": 9982 }, { "epoch": 0.63, "grad_norm": 0.9956499338150024, "learning_rate": 3.143361932782678e-06, "loss": 0.5068, "step": 9983 }, { "epoch": 0.63, "grad_norm": 1.0327447652816772, "learning_rate": 3.142409334780817e-06, "loss": 0.4816, "step": 9984 }, { "epoch": 0.63, "grad_norm": 0.9761244058609009, "learning_rate": 3.1414568149956366e-06, "loss": 0.4235, "step": 9985 }, { "epoch": 0.63, "grad_norm": 1.0141386985778809, "learning_rate": 3.1405043734672436e-06, "loss": 0.511, "step": 9986 }, { "epoch": 0.63, "grad_norm": 0.9915059208869934, "learning_rate": 3.1395520102357413e-06, "loss": 0.5017, "step": 9987 }, { "epoch": 0.63, "grad_norm": 1.140318512916565, "learning_rate": 3.1385997253412336e-06, "loss": 0.4719, "step": 9988 }, { "epoch": 0.63, "grad_norm": 1.1061350107192993, "learning_rate": 3.137647518823817e-06, "loss": 0.5217, "step": 9989 }, { "epoch": 0.63, "grad_norm": 1.1086782217025757, "learning_rate": 3.136695390723583e-06, "loss": 0.5753, "step": 9990 }, { "epoch": 0.63, "grad_norm": 1.1140615940093994, "learning_rate": 3.135743341080624e-06, "loss": 0.5298, "step": 9991 }, { "epoch": 0.63, "grad_norm": 1.0715811252593994, "learning_rate": 3.1347913699350286e-06, "loss": 0.5055, "step": 9992 }, { "epoch": 0.63, "grad_norm": 0.9809359908103943, "learning_rate": 3.1338394773268805e-06, "loss": 0.4604, "step": 9993 }, { "epoch": 0.63, "grad_norm": 1.063254714012146, "learning_rate": 3.132887663296259e-06, "loss": 0.5345, "step": 9994 }, { "epoch": 0.63, "grad_norm": 1.029570460319519, "learning_rate": 3.131935927883242e-06, "loss": 0.4974, "step": 9995 }, { "epoch": 0.63, "grad_norm": 1.1471205949783325, "learning_rate": 3.1309842711279066e-06, "loss": 0.5022, "step": 9996 }, { "epoch": 0.63, "grad_norm": 1.062389612197876, "learning_rate": 3.130032693070322e-06, "loss": 0.4802, "step": 9997 }, { "epoch": 0.63, "grad_norm": 1.061064600944519, "learning_rate": 3.129081193750554e-06, "loss": 0.4954, "step": 9998 }, { "epoch": 0.63, "grad_norm": 1.0565736293792725, "learning_rate": 3.1281297732086666e-06, "loss": 0.5121, "step": 9999 }, { "epoch": 0.63, "grad_norm": 1.1841918230056763, "learning_rate": 3.1271784314847266e-06, "loss": 0.5414, "step": 10000 }, { "epoch": 0.63, "grad_norm": 0.9984389543533325, "learning_rate": 3.126227168618786e-06, "loss": 0.4735, "step": 10001 }, { "epoch": 0.63, "grad_norm": 1.0396091938018799, "learning_rate": 3.1252759846509013e-06, "loss": 0.5111, "step": 10002 }, { "epoch": 0.63, "grad_norm": 0.9318031072616577, "learning_rate": 3.1243248796211234e-06, "loss": 0.5002, "step": 10003 }, { "epoch": 0.63, "grad_norm": 1.0406665802001953, "learning_rate": 3.123373853569498e-06, "loss": 0.521, "step": 10004 }, { "epoch": 0.63, "grad_norm": 1.0957379341125488, "learning_rate": 3.1224229065360734e-06, "loss": 0.5221, "step": 10005 }, { "epoch": 0.63, "grad_norm": 1.0628998279571533, "learning_rate": 3.1214720385608875e-06, "loss": 0.5577, "step": 10006 }, { "epoch": 0.63, "grad_norm": 1.1218875646591187, "learning_rate": 3.120521249683981e-06, "loss": 0.5252, "step": 10007 }, { "epoch": 0.63, "grad_norm": 1.1864256858825684, "learning_rate": 3.1195705399453833e-06, "loss": 0.4871, "step": 10008 }, { "epoch": 0.63, "grad_norm": 1.0815975666046143, "learning_rate": 3.118619909385131e-06, "loss": 0.5513, "step": 10009 }, { "epoch": 0.63, "grad_norm": 1.0236462354660034, "learning_rate": 3.117669358043248e-06, "loss": 0.5042, "step": 10010 }, { "epoch": 0.63, "grad_norm": 1.1469367742538452, "learning_rate": 3.116718885959762e-06, "loss": 0.5255, "step": 10011 }, { "epoch": 0.63, "grad_norm": 1.0528572797775269, "learning_rate": 3.1157684931746902e-06, "loss": 0.4901, "step": 10012 }, { "epoch": 0.63, "grad_norm": 1.0513113737106323, "learning_rate": 3.1148181797280543e-06, "loss": 0.5343, "step": 10013 }, { "epoch": 0.63, "grad_norm": 0.9985837936401367, "learning_rate": 3.1138679456598654e-06, "loss": 0.5021, "step": 10014 }, { "epoch": 0.63, "grad_norm": 1.0434377193450928, "learning_rate": 3.112917791010137e-06, "loss": 0.5342, "step": 10015 }, { "epoch": 0.63, "grad_norm": 1.0435587167739868, "learning_rate": 3.111967715818876e-06, "loss": 0.5156, "step": 10016 }, { "epoch": 0.63, "grad_norm": 1.067971110343933, "learning_rate": 3.1110177201260845e-06, "loss": 0.5037, "step": 10017 }, { "epoch": 0.63, "grad_norm": 1.0365383625030518, "learning_rate": 3.1100678039717665e-06, "loss": 0.4851, "step": 10018 }, { "epoch": 0.63, "grad_norm": 1.078518033027649, "learning_rate": 3.1091179673959194e-06, "loss": 0.5094, "step": 10019 }, { "epoch": 0.63, "grad_norm": 1.158864974975586, "learning_rate": 3.108168210438536e-06, "loss": 0.528, "step": 10020 }, { "epoch": 0.63, "grad_norm": 1.0493173599243164, "learning_rate": 3.1072185331396083e-06, "loss": 0.5069, "step": 10021 }, { "epoch": 0.63, "grad_norm": 1.00165593624115, "learning_rate": 3.106268935539123e-06, "loss": 0.5226, "step": 10022 }, { "epoch": 0.64, "grad_norm": 1.1848684549331665, "learning_rate": 3.1053194176770662e-06, "loss": 0.5295, "step": 10023 }, { "epoch": 0.64, "grad_norm": 0.9629892706871033, "learning_rate": 3.1043699795934172e-06, "loss": 0.4802, "step": 10024 }, { "epoch": 0.64, "grad_norm": 1.0899156332015991, "learning_rate": 3.1034206213281536e-06, "loss": 0.5688, "step": 10025 }, { "epoch": 0.64, "grad_norm": 1.0079987049102783, "learning_rate": 3.10247134292125e-06, "loss": 0.4579, "step": 10026 }, { "epoch": 0.64, "grad_norm": 1.0822067260742188, "learning_rate": 3.1015221444126776e-06, "loss": 0.5051, "step": 10027 }, { "epoch": 0.64, "grad_norm": 0.9707460999488831, "learning_rate": 3.1005730258424025e-06, "loss": 0.5169, "step": 10028 }, { "epoch": 0.64, "grad_norm": 0.9045226573944092, "learning_rate": 3.099623987250391e-06, "loss": 0.4893, "step": 10029 }, { "epoch": 0.64, "grad_norm": 1.0418764352798462, "learning_rate": 3.098675028676601e-06, "loss": 0.5289, "step": 10030 }, { "epoch": 0.64, "grad_norm": 1.0181958675384521, "learning_rate": 3.0977261501609924e-06, "loss": 0.512, "step": 10031 }, { "epoch": 0.64, "grad_norm": 1.1507220268249512, "learning_rate": 3.0967773517435173e-06, "loss": 0.5234, "step": 10032 }, { "epoch": 0.64, "grad_norm": 1.0432597398757935, "learning_rate": 3.0958286334641284e-06, "loss": 0.4601, "step": 10033 }, { "epoch": 0.64, "grad_norm": 1.1117364168167114, "learning_rate": 3.0948799953627696e-06, "loss": 0.5066, "step": 10034 }, { "epoch": 0.64, "grad_norm": 1.0182552337646484, "learning_rate": 3.093931437479388e-06, "loss": 0.4733, "step": 10035 }, { "epoch": 0.64, "grad_norm": 0.9871261119842529, "learning_rate": 3.092982959853923e-06, "loss": 0.5096, "step": 10036 }, { "epoch": 0.64, "grad_norm": 1.086436152458191, "learning_rate": 3.092034562526312e-06, "loss": 0.5515, "step": 10037 }, { "epoch": 0.64, "grad_norm": 1.0634336471557617, "learning_rate": 3.0910862455364864e-06, "loss": 0.4862, "step": 10038 }, { "epoch": 0.64, "grad_norm": 1.1165075302124023, "learning_rate": 3.09013800892438e-06, "loss": 0.5504, "step": 10039 }, { "epoch": 0.64, "grad_norm": 0.9831963181495667, "learning_rate": 3.0891898527299167e-06, "loss": 0.477, "step": 10040 }, { "epoch": 0.64, "grad_norm": 1.017632246017456, "learning_rate": 3.088241776993024e-06, "loss": 0.527, "step": 10041 }, { "epoch": 0.64, "grad_norm": 1.0667827129364014, "learning_rate": 3.0872937817536165e-06, "loss": 0.5569, "step": 10042 }, { "epoch": 0.64, "grad_norm": 1.0934066772460938, "learning_rate": 3.0863458670516157e-06, "loss": 0.5066, "step": 10043 }, { "epoch": 0.64, "grad_norm": 1.0351758003234863, "learning_rate": 3.085398032926933e-06, "loss": 0.5125, "step": 10044 }, { "epoch": 0.64, "grad_norm": 1.0830355882644653, "learning_rate": 3.0844502794194795e-06, "loss": 0.5458, "step": 10045 }, { "epoch": 0.64, "grad_norm": 1.0485659837722778, "learning_rate": 3.083502606569159e-06, "loss": 0.4988, "step": 10046 }, { "epoch": 0.64, "grad_norm": 0.9413774609565735, "learning_rate": 3.0825550144158788e-06, "loss": 0.4749, "step": 10047 }, { "epoch": 0.64, "grad_norm": 1.0139778852462769, "learning_rate": 3.081607502999536e-06, "loss": 0.4913, "step": 10048 }, { "epoch": 0.64, "grad_norm": 1.0455811023712158, "learning_rate": 3.0806600723600275e-06, "loss": 0.511, "step": 10049 }, { "epoch": 0.64, "grad_norm": 1.008416771888733, "learning_rate": 3.0797127225372477e-06, "loss": 0.5121, "step": 10050 }, { "epoch": 0.64, "grad_norm": 1.03008234500885, "learning_rate": 3.078765453571082e-06, "loss": 0.47, "step": 10051 }, { "epoch": 0.64, "grad_norm": 1.091876745223999, "learning_rate": 3.077818265501421e-06, "loss": 0.5114, "step": 10052 }, { "epoch": 0.64, "grad_norm": 1.0145922899246216, "learning_rate": 3.0768711583681475e-06, "loss": 0.5052, "step": 10053 }, { "epoch": 0.64, "grad_norm": 0.9602000117301941, "learning_rate": 3.075924132211139e-06, "loss": 0.4515, "step": 10054 }, { "epoch": 0.64, "grad_norm": 1.020117163658142, "learning_rate": 3.07497718707027e-06, "loss": 0.4649, "step": 10055 }, { "epoch": 0.64, "grad_norm": 1.1142258644104004, "learning_rate": 3.074030322985416e-06, "loss": 0.5001, "step": 10056 }, { "epoch": 0.64, "grad_norm": 1.013404130935669, "learning_rate": 3.073083539996446e-06, "loss": 0.5161, "step": 10057 }, { "epoch": 0.64, "grad_norm": 0.9824324250221252, "learning_rate": 3.072136838143225e-06, "loss": 0.4878, "step": 10058 }, { "epoch": 0.64, "grad_norm": 0.9934601187705994, "learning_rate": 3.0711902174656126e-06, "loss": 0.4888, "step": 10059 }, { "epoch": 0.64, "grad_norm": 1.08644437789917, "learning_rate": 3.070243678003472e-06, "loss": 0.5309, "step": 10060 }, { "epoch": 0.64, "grad_norm": 1.105520248413086, "learning_rate": 3.069297219796658e-06, "loss": 0.5485, "step": 10061 }, { "epoch": 0.64, "grad_norm": 1.015161156654358, "learning_rate": 3.068350842885022e-06, "loss": 0.498, "step": 10062 }, { "epoch": 0.64, "grad_norm": 1.0500092506408691, "learning_rate": 3.0674045473084103e-06, "loss": 0.5216, "step": 10063 }, { "epoch": 0.64, "grad_norm": 1.0675020217895508, "learning_rate": 3.0664583331066695e-06, "loss": 0.4649, "step": 10064 }, { "epoch": 0.64, "grad_norm": 1.0304772853851318, "learning_rate": 3.0655122003196443e-06, "loss": 0.4777, "step": 10065 }, { "epoch": 0.64, "grad_norm": 1.0809754133224487, "learning_rate": 3.06456614898717e-06, "loss": 0.5531, "step": 10066 }, { "epoch": 0.64, "grad_norm": 1.0226454734802246, "learning_rate": 3.0636201791490823e-06, "loss": 0.5415, "step": 10067 }, { "epoch": 0.64, "grad_norm": 1.0683984756469727, "learning_rate": 3.062674290845211e-06, "loss": 0.4793, "step": 10068 }, { "epoch": 0.64, "grad_norm": 0.9812599420547485, "learning_rate": 3.061728484115388e-06, "loss": 0.4737, "step": 10069 }, { "epoch": 0.64, "grad_norm": 0.9435461759567261, "learning_rate": 3.0607827589994353e-06, "loss": 0.4736, "step": 10070 }, { "epoch": 0.64, "grad_norm": 1.0279617309570312, "learning_rate": 3.0598371155371747e-06, "loss": 0.4617, "step": 10071 }, { "epoch": 0.64, "grad_norm": 1.0339475870132446, "learning_rate": 3.058891553768422e-06, "loss": 0.4999, "step": 10072 }, { "epoch": 0.64, "grad_norm": 1.1052274703979492, "learning_rate": 3.0579460737329958e-06, "loss": 0.5335, "step": 10073 }, { "epoch": 0.64, "grad_norm": 1.117714285850525, "learning_rate": 3.0570006754707044e-06, "loss": 0.5456, "step": 10074 }, { "epoch": 0.64, "grad_norm": 1.066931128501892, "learning_rate": 3.056055359021354e-06, "loss": 0.5208, "step": 10075 }, { "epoch": 0.64, "grad_norm": 0.9597610831260681, "learning_rate": 3.0551101244247494e-06, "loss": 0.4912, "step": 10076 }, { "epoch": 0.64, "grad_norm": 1.0383501052856445, "learning_rate": 3.0541649717206933e-06, "loss": 0.4777, "step": 10077 }, { "epoch": 0.64, "grad_norm": 1.014605164527893, "learning_rate": 3.0532199009489814e-06, "loss": 0.5084, "step": 10078 }, { "epoch": 0.64, "grad_norm": 1.0429009199142456, "learning_rate": 3.052274912149406e-06, "loss": 0.5059, "step": 10079 }, { "epoch": 0.64, "grad_norm": 1.058207631111145, "learning_rate": 3.0513300053617595e-06, "loss": 0.4925, "step": 10080 }, { "epoch": 0.64, "grad_norm": 0.9927064776420593, "learning_rate": 3.0503851806258257e-06, "loss": 0.4984, "step": 10081 }, { "epoch": 0.64, "grad_norm": 0.9581083059310913, "learning_rate": 3.0494404379813914e-06, "loss": 0.5451, "step": 10082 }, { "epoch": 0.64, "grad_norm": 1.0245808362960815, "learning_rate": 3.048495777468234e-06, "loss": 0.5594, "step": 10083 }, { "epoch": 0.64, "grad_norm": 1.1457056999206543, "learning_rate": 3.047551199126131e-06, "loss": 0.5133, "step": 10084 }, { "epoch": 0.64, "grad_norm": 1.0019303560256958, "learning_rate": 3.046606702994854e-06, "loss": 0.5592, "step": 10085 }, { "epoch": 0.64, "grad_norm": 1.0403997898101807, "learning_rate": 3.0456622891141748e-06, "loss": 0.4771, "step": 10086 }, { "epoch": 0.64, "grad_norm": 1.0224168300628662, "learning_rate": 3.0447179575238565e-06, "loss": 0.5306, "step": 10087 }, { "epoch": 0.64, "grad_norm": 1.1169382333755493, "learning_rate": 3.0437737082636647e-06, "loss": 0.5728, "step": 10088 }, { "epoch": 0.64, "grad_norm": 1.010927677154541, "learning_rate": 3.0428295413733546e-06, "loss": 0.4344, "step": 10089 }, { "epoch": 0.64, "grad_norm": 1.061489224433899, "learning_rate": 3.0418854568926866e-06, "loss": 0.5116, "step": 10090 }, { "epoch": 0.64, "grad_norm": 1.0159995555877686, "learning_rate": 3.0409414548614086e-06, "loss": 0.4819, "step": 10091 }, { "epoch": 0.64, "grad_norm": 0.984320342540741, "learning_rate": 3.039997535319272e-06, "loss": 0.5023, "step": 10092 }, { "epoch": 0.64, "grad_norm": 1.054763674736023, "learning_rate": 3.039053698306019e-06, "loss": 0.4756, "step": 10093 }, { "epoch": 0.64, "grad_norm": 1.0427004098892212, "learning_rate": 3.0381099438613948e-06, "loss": 0.4761, "step": 10094 }, { "epoch": 0.64, "grad_norm": 1.049312949180603, "learning_rate": 3.037166272025135e-06, "loss": 0.4976, "step": 10095 }, { "epoch": 0.64, "grad_norm": 1.0668352842330933, "learning_rate": 3.0362226828369767e-06, "loss": 0.5331, "step": 10096 }, { "epoch": 0.64, "grad_norm": 1.065746784210205, "learning_rate": 3.0352791763366484e-06, "loss": 0.5142, "step": 10097 }, { "epoch": 0.64, "grad_norm": 0.9855623841285706, "learning_rate": 3.0343357525638787e-06, "loss": 0.4476, "step": 10098 }, { "epoch": 0.64, "grad_norm": 1.0520395040512085, "learning_rate": 3.0333924115583935e-06, "loss": 0.5118, "step": 10099 }, { "epoch": 0.64, "grad_norm": 1.0070985555648804, "learning_rate": 3.032449153359913e-06, "loss": 0.4616, "step": 10100 }, { "epoch": 0.64, "grad_norm": 1.040044903755188, "learning_rate": 3.031505978008153e-06, "loss": 0.517, "step": 10101 }, { "epoch": 0.64, "grad_norm": 1.0617992877960205, "learning_rate": 3.030562885542827e-06, "loss": 0.4896, "step": 10102 }, { "epoch": 0.64, "grad_norm": 1.01646888256073, "learning_rate": 3.0296198760036493e-06, "loss": 0.4899, "step": 10103 }, { "epoch": 0.64, "grad_norm": 1.137224793434143, "learning_rate": 3.0286769494303237e-06, "loss": 0.5234, "step": 10104 }, { "epoch": 0.64, "grad_norm": 1.110205054283142, "learning_rate": 3.0277341058625537e-06, "loss": 0.5328, "step": 10105 }, { "epoch": 0.64, "grad_norm": 1.036597490310669, "learning_rate": 3.026791345340038e-06, "loss": 0.5172, "step": 10106 }, { "epoch": 0.64, "grad_norm": 0.9912294149398804, "learning_rate": 3.0258486679024767e-06, "loss": 0.4618, "step": 10107 }, { "epoch": 0.64, "grad_norm": 1.1212050914764404, "learning_rate": 3.0249060735895603e-06, "loss": 0.5229, "step": 10108 }, { "epoch": 0.64, "grad_norm": 1.0347286462783813, "learning_rate": 3.0239635624409767e-06, "loss": 0.4913, "step": 10109 }, { "epoch": 0.64, "grad_norm": 1.0135654211044312, "learning_rate": 3.0230211344964154e-06, "loss": 0.4797, "step": 10110 }, { "epoch": 0.64, "grad_norm": 1.1035380363464355, "learning_rate": 3.0220787897955544e-06, "loss": 0.5162, "step": 10111 }, { "epoch": 0.64, "grad_norm": 1.0869108438491821, "learning_rate": 3.021136528378077e-06, "loss": 0.5043, "step": 10112 }, { "epoch": 0.64, "grad_norm": 1.066138744354248, "learning_rate": 3.020194350283655e-06, "loss": 0.5352, "step": 10113 }, { "epoch": 0.64, "grad_norm": 1.148913025856018, "learning_rate": 3.019252255551963e-06, "loss": 0.5744, "step": 10114 }, { "epoch": 0.64, "grad_norm": 0.9562913179397583, "learning_rate": 3.0183102442226653e-06, "loss": 0.4837, "step": 10115 }, { "epoch": 0.64, "grad_norm": 0.9955688118934631, "learning_rate": 3.017368316335432e-06, "loss": 0.5081, "step": 10116 }, { "epoch": 0.64, "grad_norm": 1.027421236038208, "learning_rate": 3.0164264719299207e-06, "loss": 0.5122, "step": 10117 }, { "epoch": 0.64, "grad_norm": 1.0766228437423706, "learning_rate": 3.0154847110457918e-06, "loss": 0.4476, "step": 10118 }, { "epoch": 0.64, "grad_norm": 1.0523964166641235, "learning_rate": 3.0145430337226955e-06, "loss": 0.5192, "step": 10119 }, { "epoch": 0.64, "grad_norm": 1.1346592903137207, "learning_rate": 3.013601440000288e-06, "loss": 0.5259, "step": 10120 }, { "epoch": 0.64, "grad_norm": 1.127166509628296, "learning_rate": 3.0126599299182114e-06, "loss": 0.5195, "step": 10121 }, { "epoch": 0.64, "grad_norm": 1.1691970825195312, "learning_rate": 3.0117185035161135e-06, "loss": 0.522, "step": 10122 }, { "epoch": 0.64, "grad_norm": 1.0456876754760742, "learning_rate": 3.01077716083363e-06, "loss": 0.5068, "step": 10123 }, { "epoch": 0.64, "grad_norm": 1.0364118814468384, "learning_rate": 3.009835901910403e-06, "loss": 0.4758, "step": 10124 }, { "epoch": 0.64, "grad_norm": 1.0276139974594116, "learning_rate": 3.008894726786062e-06, "loss": 0.4957, "step": 10125 }, { "epoch": 0.64, "grad_norm": 1.1089617013931274, "learning_rate": 3.007953635500238e-06, "loss": 0.4891, "step": 10126 }, { "epoch": 0.64, "grad_norm": 1.0690889358520508, "learning_rate": 3.0070126280925564e-06, "loss": 0.5857, "step": 10127 }, { "epoch": 0.64, "grad_norm": 1.0123674869537354, "learning_rate": 3.0060717046026387e-06, "loss": 0.531, "step": 10128 }, { "epoch": 0.64, "grad_norm": 1.0811229944229126, "learning_rate": 3.0051308650701054e-06, "loss": 0.5572, "step": 10129 }, { "epoch": 0.64, "grad_norm": 1.0825634002685547, "learning_rate": 3.004190109534573e-06, "loss": 0.5139, "step": 10130 }, { "epoch": 0.64, "grad_norm": 1.081278920173645, "learning_rate": 3.0032494380356523e-06, "loss": 0.508, "step": 10131 }, { "epoch": 0.64, "grad_norm": 0.9947942495346069, "learning_rate": 3.002308850612949e-06, "loss": 0.5034, "step": 10132 }, { "epoch": 0.64, "grad_norm": 1.0024405717849731, "learning_rate": 3.001368347306073e-06, "loss": 0.4934, "step": 10133 }, { "epoch": 0.64, "grad_norm": 1.0109182596206665, "learning_rate": 3.0004279281546235e-06, "loss": 0.5127, "step": 10134 }, { "epoch": 0.64, "grad_norm": 1.0295737981796265, "learning_rate": 2.999487593198197e-06, "loss": 0.4838, "step": 10135 }, { "epoch": 0.64, "grad_norm": 1.0023385286331177, "learning_rate": 2.9985473424763876e-06, "loss": 0.4832, "step": 10136 }, { "epoch": 0.64, "grad_norm": 1.0790435075759888, "learning_rate": 2.9976071760287874e-06, "loss": 0.5331, "step": 10137 }, { "epoch": 0.64, "grad_norm": 1.053161859512329, "learning_rate": 2.9966670938949847e-06, "loss": 0.4834, "step": 10138 }, { "epoch": 0.64, "grad_norm": 0.980963408946991, "learning_rate": 2.995727096114561e-06, "loss": 0.5004, "step": 10139 }, { "epoch": 0.64, "grad_norm": 1.0529303550720215, "learning_rate": 2.9947871827270956e-06, "loss": 0.5491, "step": 10140 }, { "epoch": 0.64, "grad_norm": 1.0220396518707275, "learning_rate": 2.993847353772168e-06, "loss": 0.5115, "step": 10141 }, { "epoch": 0.64, "grad_norm": 0.9761862754821777, "learning_rate": 2.9929076092893496e-06, "loss": 0.4773, "step": 10142 }, { "epoch": 0.64, "grad_norm": 1.0252829790115356, "learning_rate": 2.991967949318209e-06, "loss": 0.5256, "step": 10143 }, { "epoch": 0.64, "grad_norm": 1.00223970413208, "learning_rate": 2.9910283738983125e-06, "loss": 0.4909, "step": 10144 }, { "epoch": 0.64, "grad_norm": 1.0818865299224854, "learning_rate": 2.9900888830692208e-06, "loss": 0.5353, "step": 10145 }, { "epoch": 0.64, "grad_norm": 1.0524858236312866, "learning_rate": 2.9891494768704964e-06, "loss": 0.5401, "step": 10146 }, { "epoch": 0.64, "grad_norm": 1.0927537679672241, "learning_rate": 2.9882101553416932e-06, "loss": 0.4795, "step": 10147 }, { "epoch": 0.64, "grad_norm": 1.0539554357528687, "learning_rate": 2.9872709185223596e-06, "loss": 0.5344, "step": 10148 }, { "epoch": 0.64, "grad_norm": 0.976118266582489, "learning_rate": 2.9863317664520453e-06, "loss": 0.4826, "step": 10149 }, { "epoch": 0.64, "grad_norm": 1.1192395687103271, "learning_rate": 2.9853926991702974e-06, "loss": 0.5249, "step": 10150 }, { "epoch": 0.64, "grad_norm": 1.0809307098388672, "learning_rate": 2.984453716716655e-06, "loss": 0.5024, "step": 10151 }, { "epoch": 0.64, "grad_norm": 0.9954077005386353, "learning_rate": 2.9835148191306535e-06, "loss": 0.479, "step": 10152 }, { "epoch": 0.64, "grad_norm": 1.067521572113037, "learning_rate": 2.9825760064518273e-06, "loss": 0.5265, "step": 10153 }, { "epoch": 0.64, "grad_norm": 0.8902660012245178, "learning_rate": 2.981637278719709e-06, "loss": 0.4432, "step": 10154 }, { "epoch": 0.64, "grad_norm": 1.0645134449005127, "learning_rate": 2.9806986359738244e-06, "loss": 0.5195, "step": 10155 }, { "epoch": 0.64, "grad_norm": 0.9383110404014587, "learning_rate": 2.979760078253694e-06, "loss": 0.4828, "step": 10156 }, { "epoch": 0.64, "grad_norm": 1.1661232709884644, "learning_rate": 2.9788216055988397e-06, "loss": 0.5346, "step": 10157 }, { "epoch": 0.64, "grad_norm": 1.0412225723266602, "learning_rate": 2.977883218048775e-06, "loss": 0.5077, "step": 10158 }, { "epoch": 0.64, "grad_norm": 1.0863960981369019, "learning_rate": 2.9769449156430147e-06, "loss": 0.5489, "step": 10159 }, { "epoch": 0.64, "grad_norm": 1.0391831398010254, "learning_rate": 2.9760066984210655e-06, "loss": 0.5354, "step": 10160 }, { "epoch": 0.64, "grad_norm": 0.9662297964096069, "learning_rate": 2.975068566422434e-06, "loss": 0.501, "step": 10161 }, { "epoch": 0.64, "grad_norm": 1.076661229133606, "learning_rate": 2.97413051968662e-06, "loss": 0.5495, "step": 10162 }, { "epoch": 0.64, "grad_norm": 1.1024425029754639, "learning_rate": 2.9731925582531227e-06, "loss": 0.5142, "step": 10163 }, { "epoch": 0.64, "grad_norm": 1.0270276069641113, "learning_rate": 2.9722546821614373e-06, "loss": 0.504, "step": 10164 }, { "epoch": 0.64, "grad_norm": 1.0266375541687012, "learning_rate": 2.9713168914510533e-06, "loss": 0.472, "step": 10165 }, { "epoch": 0.64, "grad_norm": 1.0487178564071655, "learning_rate": 2.970379186161455e-06, "loss": 0.5039, "step": 10166 }, { "epoch": 0.64, "grad_norm": 1.0786352157592773, "learning_rate": 2.96944156633213e-06, "loss": 0.555, "step": 10167 }, { "epoch": 0.64, "grad_norm": 1.0177651643753052, "learning_rate": 2.9685040320025583e-06, "loss": 0.4892, "step": 10168 }, { "epoch": 0.64, "grad_norm": 1.00448739528656, "learning_rate": 2.9675665832122146e-06, "loss": 0.5229, "step": 10169 }, { "epoch": 0.64, "grad_norm": 1.0026636123657227, "learning_rate": 2.966629220000569e-06, "loss": 0.4948, "step": 10170 }, { "epoch": 0.64, "grad_norm": 1.0992698669433594, "learning_rate": 2.965691942407095e-06, "loss": 0.4946, "step": 10171 }, { "epoch": 0.64, "grad_norm": 0.9647083282470703, "learning_rate": 2.9647547504712577e-06, "loss": 0.535, "step": 10172 }, { "epoch": 0.64, "grad_norm": 1.021139144897461, "learning_rate": 2.9638176442325173e-06, "loss": 0.5139, "step": 10173 }, { "epoch": 0.64, "grad_norm": 1.0401581525802612, "learning_rate": 2.962880623730332e-06, "loss": 0.5098, "step": 10174 }, { "epoch": 0.64, "grad_norm": 1.1423335075378418, "learning_rate": 2.9619436890041555e-06, "loss": 0.5268, "step": 10175 }, { "epoch": 0.64, "grad_norm": 1.057899832725525, "learning_rate": 2.961006840093442e-06, "loss": 0.4863, "step": 10176 }, { "epoch": 0.64, "grad_norm": 0.9457885026931763, "learning_rate": 2.9600700770376384e-06, "loss": 0.4911, "step": 10177 }, { "epoch": 0.64, "grad_norm": 1.0614778995513916, "learning_rate": 2.959133399876186e-06, "loss": 0.4978, "step": 10178 }, { "epoch": 0.64, "grad_norm": 1.0491783618927002, "learning_rate": 2.958196808648525e-06, "loss": 0.5372, "step": 10179 }, { "epoch": 0.64, "grad_norm": 1.0236880779266357, "learning_rate": 2.957260303394096e-06, "loss": 0.5064, "step": 10180 }, { "epoch": 0.65, "grad_norm": 1.0263805389404297, "learning_rate": 2.9563238841523293e-06, "loss": 0.519, "step": 10181 }, { "epoch": 0.65, "grad_norm": 0.969336986541748, "learning_rate": 2.955387550962654e-06, "loss": 0.4496, "step": 10182 }, { "epoch": 0.65, "grad_norm": 1.028093934059143, "learning_rate": 2.954451303864494e-06, "loss": 0.5405, "step": 10183 }, { "epoch": 0.65, "grad_norm": 1.0766602754592896, "learning_rate": 2.9535151428972762e-06, "loss": 0.5221, "step": 10184 }, { "epoch": 0.65, "grad_norm": 1.0071594715118408, "learning_rate": 2.9525790681004172e-06, "loss": 0.4856, "step": 10185 }, { "epoch": 0.65, "grad_norm": 0.9100344181060791, "learning_rate": 2.9516430795133294e-06, "loss": 0.4717, "step": 10186 }, { "epoch": 0.65, "grad_norm": 1.075358510017395, "learning_rate": 2.950707177175427e-06, "loss": 0.4949, "step": 10187 }, { "epoch": 0.65, "grad_norm": 0.9946542978286743, "learning_rate": 2.9497713611261146e-06, "loss": 0.4821, "step": 10188 }, { "epoch": 0.65, "grad_norm": 1.1402562856674194, "learning_rate": 2.9488356314047994e-06, "loss": 0.5511, "step": 10189 }, { "epoch": 0.65, "grad_norm": 0.995521605014801, "learning_rate": 2.94789998805088e-06, "loss": 0.5295, "step": 10190 }, { "epoch": 0.65, "grad_norm": 1.1219182014465332, "learning_rate": 2.9469644311037545e-06, "loss": 0.5539, "step": 10191 }, { "epoch": 0.65, "grad_norm": 1.0393383502960205, "learning_rate": 2.946028960602812e-06, "loss": 0.5071, "step": 10192 }, { "epoch": 0.65, "grad_norm": 1.0298091173171997, "learning_rate": 2.9450935765874474e-06, "loss": 0.5163, "step": 10193 }, { "epoch": 0.65, "grad_norm": 1.0860475301742554, "learning_rate": 2.9441582790970425e-06, "loss": 0.5379, "step": 10194 }, { "epoch": 0.65, "grad_norm": 1.0946840047836304, "learning_rate": 2.9432230681709815e-06, "loss": 0.5529, "step": 10195 }, { "epoch": 0.65, "grad_norm": 0.9496505856513977, "learning_rate": 2.942287943848641e-06, "loss": 0.4892, "step": 10196 }, { "epoch": 0.65, "grad_norm": 1.1770808696746826, "learning_rate": 2.941352906169398e-06, "loss": 0.5534, "step": 10197 }, { "epoch": 0.65, "grad_norm": 0.992904782295227, "learning_rate": 2.9404179551726214e-06, "loss": 0.4744, "step": 10198 }, { "epoch": 0.65, "grad_norm": 1.0817198753356934, "learning_rate": 2.939483090897681e-06, "loss": 0.54, "step": 10199 }, { "epoch": 0.65, "grad_norm": 1.0625017881393433, "learning_rate": 2.9385483133839386e-06, "loss": 0.4746, "step": 10200 }, { "epoch": 0.65, "grad_norm": 1.2074081897735596, "learning_rate": 2.937613622670756e-06, "loss": 0.579, "step": 10201 }, { "epoch": 0.65, "grad_norm": 0.994379997253418, "learning_rate": 2.9366790187974897e-06, "loss": 0.4887, "step": 10202 }, { "epoch": 0.65, "grad_norm": 0.9682003855705261, "learning_rate": 2.9357445018034926e-06, "loss": 0.516, "step": 10203 }, { "epoch": 0.65, "grad_norm": 1.0344123840332031, "learning_rate": 2.934810071728114e-06, "loss": 0.514, "step": 10204 }, { "epoch": 0.65, "grad_norm": 1.05597984790802, "learning_rate": 2.9338757286106955e-06, "loss": 0.5176, "step": 10205 }, { "epoch": 0.65, "grad_norm": 0.9796289801597595, "learning_rate": 2.9329414724905845e-06, "loss": 0.514, "step": 10206 }, { "epoch": 0.65, "grad_norm": 0.9932934045791626, "learning_rate": 2.9320073034071187e-06, "loss": 0.4862, "step": 10207 }, { "epoch": 0.65, "grad_norm": 0.98027104139328, "learning_rate": 2.9310732213996305e-06, "loss": 0.5024, "step": 10208 }, { "epoch": 0.65, "grad_norm": 0.9642611742019653, "learning_rate": 2.9301392265074506e-06, "loss": 0.5061, "step": 10209 }, { "epoch": 0.65, "grad_norm": 1.0452041625976562, "learning_rate": 2.9292053187699075e-06, "loss": 0.4624, "step": 10210 }, { "epoch": 0.65, "grad_norm": 1.0322476625442505, "learning_rate": 2.9282714982263265e-06, "loss": 0.4644, "step": 10211 }, { "epoch": 0.65, "grad_norm": 1.046685814857483, "learning_rate": 2.927337764916025e-06, "loss": 0.5169, "step": 10212 }, { "epoch": 0.65, "grad_norm": 1.061316728591919, "learning_rate": 2.926404118878319e-06, "loss": 0.5334, "step": 10213 }, { "epoch": 0.65, "grad_norm": 1.000592827796936, "learning_rate": 2.925470560152522e-06, "loss": 0.5085, "step": 10214 }, { "epoch": 0.65, "grad_norm": 0.9926203489303589, "learning_rate": 2.924537088777944e-06, "loss": 0.5286, "step": 10215 }, { "epoch": 0.65, "grad_norm": 1.045668601989746, "learning_rate": 2.9236037047938894e-06, "loss": 0.4923, "step": 10216 }, { "epoch": 0.65, "grad_norm": 1.0347856283187866, "learning_rate": 2.922670408239657e-06, "loss": 0.5097, "step": 10217 }, { "epoch": 0.65, "grad_norm": 1.0084527730941772, "learning_rate": 2.921737199154549e-06, "loss": 0.5545, "step": 10218 }, { "epoch": 0.65, "grad_norm": 1.0063661336898804, "learning_rate": 2.920804077577859e-06, "loss": 0.4822, "step": 10219 }, { "epoch": 0.65, "grad_norm": 0.9825767874717712, "learning_rate": 2.919871043548875e-06, "loss": 0.4867, "step": 10220 }, { "epoch": 0.65, "grad_norm": 1.020814061164856, "learning_rate": 2.9189380971068864e-06, "loss": 0.4682, "step": 10221 }, { "epoch": 0.65, "grad_norm": 1.1582943201065063, "learning_rate": 2.918005238291172e-06, "loss": 0.5218, "step": 10222 }, { "epoch": 0.65, "grad_norm": 1.0233937501907349, "learning_rate": 2.9170724671410155e-06, "loss": 0.5315, "step": 10223 }, { "epoch": 0.65, "grad_norm": 0.9818440675735474, "learning_rate": 2.916139783695694e-06, "loss": 0.4892, "step": 10224 }, { "epoch": 0.65, "grad_norm": 1.0906521081924438, "learning_rate": 2.9152071879944743e-06, "loss": 0.5153, "step": 10225 }, { "epoch": 0.65, "grad_norm": 1.1499855518341064, "learning_rate": 2.914274680076628e-06, "loss": 0.5607, "step": 10226 }, { "epoch": 0.65, "grad_norm": 1.0200022459030151, "learning_rate": 2.913342259981419e-06, "loss": 0.5322, "step": 10227 }, { "epoch": 0.65, "grad_norm": 1.0477135181427002, "learning_rate": 2.9124099277481088e-06, "loss": 0.4779, "step": 10228 }, { "epoch": 0.65, "grad_norm": 0.9612158536911011, "learning_rate": 2.9114776834159563e-06, "loss": 0.4984, "step": 10229 }, { "epoch": 0.65, "grad_norm": 1.0411784648895264, "learning_rate": 2.910545527024209e-06, "loss": 0.5143, "step": 10230 }, { "epoch": 0.65, "grad_norm": 1.0339840650558472, "learning_rate": 2.9096134586121227e-06, "loss": 0.5487, "step": 10231 }, { "epoch": 0.65, "grad_norm": 1.057961106300354, "learning_rate": 2.908681478218944e-06, "loss": 0.4985, "step": 10232 }, { "epoch": 0.65, "grad_norm": 1.0351730585098267, "learning_rate": 2.907749585883911e-06, "loss": 0.5014, "step": 10233 }, { "epoch": 0.65, "grad_norm": 1.06086003780365, "learning_rate": 2.906817781646264e-06, "loss": 0.5022, "step": 10234 }, { "epoch": 0.65, "grad_norm": 1.0551403760910034, "learning_rate": 2.905886065545239e-06, "loss": 0.5281, "step": 10235 }, { "epoch": 0.65, "grad_norm": 0.962441086769104, "learning_rate": 2.9049544376200674e-06, "loss": 0.5143, "step": 10236 }, { "epoch": 0.65, "grad_norm": 1.1584631204605103, "learning_rate": 2.9040228979099777e-06, "loss": 0.5067, "step": 10237 }, { "epoch": 0.65, "grad_norm": 0.9825107455253601, "learning_rate": 2.9030914464541904e-06, "loss": 0.4701, "step": 10238 }, { "epoch": 0.65, "grad_norm": 1.0211668014526367, "learning_rate": 2.902160083291926e-06, "loss": 0.4898, "step": 10239 }, { "epoch": 0.65, "grad_norm": 1.1130526065826416, "learning_rate": 2.9012288084624065e-06, "loss": 0.474, "step": 10240 }, { "epoch": 0.65, "grad_norm": 1.003676176071167, "learning_rate": 2.9002976220048383e-06, "loss": 0.4768, "step": 10241 }, { "epoch": 0.65, "grad_norm": 1.0888468027114868, "learning_rate": 2.899366523958434e-06, "loss": 0.546, "step": 10242 }, { "epoch": 0.65, "grad_norm": 1.0272295475006104, "learning_rate": 2.898435514362397e-06, "loss": 0.5243, "step": 10243 }, { "epoch": 0.65, "grad_norm": 0.9819967150688171, "learning_rate": 2.89750459325593e-06, "loss": 0.484, "step": 10244 }, { "epoch": 0.65, "grad_norm": 1.091143012046814, "learning_rate": 2.896573760678232e-06, "loss": 0.5206, "step": 10245 }, { "epoch": 0.65, "grad_norm": 1.0350513458251953, "learning_rate": 2.8956430166684945e-06, "loss": 0.4974, "step": 10246 }, { "epoch": 0.65, "grad_norm": 1.0358695983886719, "learning_rate": 2.8947123612659068e-06, "loss": 0.5276, "step": 10247 }, { "epoch": 0.65, "grad_norm": 1.046087622642517, "learning_rate": 2.8937817945096614e-06, "loss": 0.4793, "step": 10248 }, { "epoch": 0.65, "grad_norm": 1.1108758449554443, "learning_rate": 2.8928513164389353e-06, "loss": 0.5576, "step": 10249 }, { "epoch": 0.65, "grad_norm": 1.047646403312683, "learning_rate": 2.8919209270929106e-06, "loss": 0.5497, "step": 10250 }, { "epoch": 0.65, "grad_norm": 1.0258592367172241, "learning_rate": 2.8909906265107647e-06, "loss": 0.487, "step": 10251 }, { "epoch": 0.65, "grad_norm": 1.1011090278625488, "learning_rate": 2.890060414731662e-06, "loss": 0.4945, "step": 10252 }, { "epoch": 0.65, "grad_norm": 1.131653070449829, "learning_rate": 2.8891302917947794e-06, "loss": 0.5597, "step": 10253 }, { "epoch": 0.65, "grad_norm": 1.1027075052261353, "learning_rate": 2.8882002577392752e-06, "loss": 0.5561, "step": 10254 }, { "epoch": 0.65, "grad_norm": 1.1370142698287964, "learning_rate": 2.8872703126043116e-06, "loss": 0.6032, "step": 10255 }, { "epoch": 0.65, "grad_norm": 1.10757315158844, "learning_rate": 2.8863404564290455e-06, "loss": 0.4975, "step": 10256 }, { "epoch": 0.65, "grad_norm": 1.0993229150772095, "learning_rate": 2.88541068925263e-06, "loss": 0.5404, "step": 10257 }, { "epoch": 0.65, "grad_norm": 0.9537004828453064, "learning_rate": 2.8844810111142143e-06, "loss": 0.4863, "step": 10258 }, { "epoch": 0.65, "grad_norm": 1.100672960281372, "learning_rate": 2.883551422052946e-06, "loss": 0.4964, "step": 10259 }, { "epoch": 0.65, "grad_norm": 1.0630990266799927, "learning_rate": 2.8826219221079597e-06, "loss": 0.5286, "step": 10260 }, { "epoch": 0.65, "grad_norm": 1.017022728919983, "learning_rate": 2.8816925113184034e-06, "loss": 0.4967, "step": 10261 }, { "epoch": 0.65, "grad_norm": 1.0680783987045288, "learning_rate": 2.8807631897234045e-06, "loss": 0.5264, "step": 10262 }, { "epoch": 0.65, "grad_norm": 1.1391302347183228, "learning_rate": 2.8798339573620953e-06, "loss": 0.5474, "step": 10263 }, { "epoch": 0.65, "grad_norm": 1.0025023221969604, "learning_rate": 2.8789048142736026e-06, "loss": 0.4627, "step": 10264 }, { "epoch": 0.65, "grad_norm": 1.1172410249710083, "learning_rate": 2.8779757604970495e-06, "loss": 0.5047, "step": 10265 }, { "epoch": 0.65, "grad_norm": 0.9674686789512634, "learning_rate": 2.877046796071554e-06, "loss": 0.5257, "step": 10266 }, { "epoch": 0.65, "grad_norm": 1.0824980735778809, "learning_rate": 2.8761179210362365e-06, "loss": 0.5243, "step": 10267 }, { "epoch": 0.65, "grad_norm": 1.1241073608398438, "learning_rate": 2.8751891354302018e-06, "loss": 0.5312, "step": 10268 }, { "epoch": 0.65, "grad_norm": 0.9392661452293396, "learning_rate": 2.8742604392925587e-06, "loss": 0.4949, "step": 10269 }, { "epoch": 0.65, "grad_norm": 1.0615390539169312, "learning_rate": 2.8733318326624182e-06, "loss": 0.5227, "step": 10270 }, { "epoch": 0.65, "grad_norm": 1.0059072971343994, "learning_rate": 2.8724033155788743e-06, "loss": 0.5227, "step": 10271 }, { "epoch": 0.65, "grad_norm": 1.0599112510681152, "learning_rate": 2.871474888081025e-06, "loss": 0.543, "step": 10272 }, { "epoch": 0.65, "grad_norm": 1.0321950912475586, "learning_rate": 2.870546550207964e-06, "loss": 0.4835, "step": 10273 }, { "epoch": 0.65, "grad_norm": 1.0996196269989014, "learning_rate": 2.8696183019987796e-06, "loss": 0.4924, "step": 10274 }, { "epoch": 0.65, "grad_norm": 1.0861026048660278, "learning_rate": 2.868690143492559e-06, "loss": 0.5298, "step": 10275 }, { "epoch": 0.65, "grad_norm": 1.1031099557876587, "learning_rate": 2.8677620747283807e-06, "loss": 0.5125, "step": 10276 }, { "epoch": 0.65, "grad_norm": 1.0979763269424438, "learning_rate": 2.8668340957453224e-06, "loss": 0.4891, "step": 10277 }, { "epoch": 0.65, "grad_norm": 0.9893242716789246, "learning_rate": 2.865906206582463e-06, "loss": 0.5146, "step": 10278 }, { "epoch": 0.65, "grad_norm": 0.9797508716583252, "learning_rate": 2.8649784072788668e-06, "loss": 0.4693, "step": 10279 }, { "epoch": 0.65, "grad_norm": 1.061628818511963, "learning_rate": 2.8640506978736027e-06, "loss": 0.568, "step": 10280 }, { "epoch": 0.65, "grad_norm": 1.185620665550232, "learning_rate": 2.8631230784057362e-06, "loss": 0.5479, "step": 10281 }, { "epoch": 0.65, "grad_norm": 1.0274667739868164, "learning_rate": 2.862195548914318e-06, "loss": 0.5336, "step": 10282 }, { "epoch": 0.65, "grad_norm": 1.1475064754486084, "learning_rate": 2.8612681094384135e-06, "loss": 0.5158, "step": 10283 }, { "epoch": 0.65, "grad_norm": 1.0287840366363525, "learning_rate": 2.8603407600170664e-06, "loss": 0.5117, "step": 10284 }, { "epoch": 0.65, "grad_norm": 0.9745176434516907, "learning_rate": 2.8594135006893264e-06, "loss": 0.5212, "step": 10285 }, { "epoch": 0.65, "grad_norm": 1.0082731246948242, "learning_rate": 2.858486331494238e-06, "loss": 0.5245, "step": 10286 }, { "epoch": 0.65, "grad_norm": 0.9963515400886536, "learning_rate": 2.8575592524708397e-06, "loss": 0.5252, "step": 10287 }, { "epoch": 0.65, "grad_norm": 0.9823704957962036, "learning_rate": 2.856632263658169e-06, "loss": 0.4971, "step": 10288 }, { "epoch": 0.65, "grad_norm": 1.0706892013549805, "learning_rate": 2.855705365095258e-06, "loss": 0.5358, "step": 10289 }, { "epoch": 0.65, "grad_norm": 0.9489477872848511, "learning_rate": 2.854778556821132e-06, "loss": 0.4582, "step": 10290 }, { "epoch": 0.65, "grad_norm": 1.0349112749099731, "learning_rate": 2.8538518388748214e-06, "loss": 0.5261, "step": 10291 }, { "epoch": 0.65, "grad_norm": 1.108465313911438, "learning_rate": 2.8529252112953434e-06, "loss": 0.5271, "step": 10292 }, { "epoch": 0.65, "grad_norm": 1.125195860862732, "learning_rate": 2.8519986741217144e-06, "loss": 0.5316, "step": 10293 }, { "epoch": 0.65, "grad_norm": 0.9881192445755005, "learning_rate": 2.8510722273929486e-06, "loss": 0.5166, "step": 10294 }, { "epoch": 0.65, "grad_norm": 1.0663776397705078, "learning_rate": 2.8501458711480564e-06, "loss": 0.5127, "step": 10295 }, { "epoch": 0.65, "grad_norm": 1.0565035343170166, "learning_rate": 2.8492196054260424e-06, "loss": 0.5389, "step": 10296 }, { "epoch": 0.65, "grad_norm": 1.0431386232376099, "learning_rate": 2.848293430265911e-06, "loss": 0.5097, "step": 10297 }, { "epoch": 0.65, "grad_norm": 0.9941859245300293, "learning_rate": 2.8473673457066564e-06, "loss": 0.479, "step": 10298 }, { "epoch": 0.65, "grad_norm": 0.9996705055236816, "learning_rate": 2.8464413517872737e-06, "loss": 0.492, "step": 10299 }, { "epoch": 0.65, "grad_norm": 1.066678524017334, "learning_rate": 2.845515448546754e-06, "loss": 0.5115, "step": 10300 }, { "epoch": 0.65, "grad_norm": 0.9481530785560608, "learning_rate": 2.8445896360240845e-06, "loss": 0.4736, "step": 10301 }, { "epoch": 0.65, "grad_norm": 1.095447301864624, "learning_rate": 2.843663914258249e-06, "loss": 0.5133, "step": 10302 }, { "epoch": 0.65, "grad_norm": 0.9487720131874084, "learning_rate": 2.8427382832882207e-06, "loss": 0.494, "step": 10303 }, { "epoch": 0.65, "grad_norm": 1.069291353225708, "learning_rate": 2.8418127431529807e-06, "loss": 0.5182, "step": 10304 }, { "epoch": 0.65, "grad_norm": 1.0383696556091309, "learning_rate": 2.8408872938915e-06, "loss": 0.4865, "step": 10305 }, { "epoch": 0.65, "grad_norm": 1.0746654272079468, "learning_rate": 2.8399619355427427e-06, "loss": 0.5395, "step": 10306 }, { "epoch": 0.65, "grad_norm": 1.0153661966323853, "learning_rate": 2.839036668145674e-06, "loss": 0.5271, "step": 10307 }, { "epoch": 0.65, "grad_norm": 1.0152404308319092, "learning_rate": 2.8381114917392538e-06, "loss": 0.5079, "step": 10308 }, { "epoch": 0.65, "grad_norm": 1.0082815885543823, "learning_rate": 2.8371864063624375e-06, "loss": 0.4782, "step": 10309 }, { "epoch": 0.65, "grad_norm": 1.0504685640335083, "learning_rate": 2.836261412054181e-06, "loss": 0.4719, "step": 10310 }, { "epoch": 0.65, "grad_norm": 0.9775835871696472, "learning_rate": 2.8353365088534247e-06, "loss": 0.5359, "step": 10311 }, { "epoch": 0.65, "grad_norm": 1.0367127656936646, "learning_rate": 2.8344116967991197e-06, "loss": 0.4911, "step": 10312 }, { "epoch": 0.65, "grad_norm": 1.0500134229660034, "learning_rate": 2.8334869759302064e-06, "loss": 0.5102, "step": 10313 }, { "epoch": 0.65, "grad_norm": 1.0859054327011108, "learning_rate": 2.8325623462856176e-06, "loss": 0.518, "step": 10314 }, { "epoch": 0.65, "grad_norm": 1.0743653774261475, "learning_rate": 2.8316378079042887e-06, "loss": 0.5542, "step": 10315 }, { "epoch": 0.65, "grad_norm": 1.0794223546981812, "learning_rate": 2.8307133608251486e-06, "loss": 0.4962, "step": 10316 }, { "epoch": 0.65, "grad_norm": 1.0531331300735474, "learning_rate": 2.8297890050871222e-06, "loss": 0.5037, "step": 10317 }, { "epoch": 0.65, "grad_norm": 1.021409273147583, "learning_rate": 2.8288647407291337e-06, "loss": 0.5426, "step": 10318 }, { "epoch": 0.65, "grad_norm": 1.0144912004470825, "learning_rate": 2.827940567790096e-06, "loss": 0.5216, "step": 10319 }, { "epoch": 0.65, "grad_norm": 1.0451698303222656, "learning_rate": 2.8270164863089227e-06, "loss": 0.53, "step": 10320 }, { "epoch": 0.65, "grad_norm": 1.0527116060256958, "learning_rate": 2.82609249632453e-06, "loss": 0.5234, "step": 10321 }, { "epoch": 0.65, "grad_norm": 1.05509352684021, "learning_rate": 2.825168597875818e-06, "loss": 0.5229, "step": 10322 }, { "epoch": 0.65, "grad_norm": 1.0506893396377563, "learning_rate": 2.82424479100169e-06, "loss": 0.4694, "step": 10323 }, { "epoch": 0.65, "grad_norm": 1.0868808031082153, "learning_rate": 2.8233210757410454e-06, "loss": 0.5533, "step": 10324 }, { "epoch": 0.65, "grad_norm": 1.011303186416626, "learning_rate": 2.8223974521327787e-06, "loss": 0.5178, "step": 10325 }, { "epoch": 0.65, "grad_norm": 1.0738871097564697, "learning_rate": 2.8214739202157794e-06, "loss": 0.5024, "step": 10326 }, { "epoch": 0.65, "grad_norm": 1.006366491317749, "learning_rate": 2.820550480028937e-06, "loss": 0.5213, "step": 10327 }, { "epoch": 0.65, "grad_norm": 1.1595299243927002, "learning_rate": 2.81962713161113e-06, "loss": 0.5485, "step": 10328 }, { "epoch": 0.65, "grad_norm": 1.003004789352417, "learning_rate": 2.8187038750012396e-06, "loss": 0.5179, "step": 10329 }, { "epoch": 0.65, "grad_norm": 0.9998498558998108, "learning_rate": 2.8177807102381404e-06, "loss": 0.4903, "step": 10330 }, { "epoch": 0.65, "grad_norm": 0.9879375696182251, "learning_rate": 2.816857637360705e-06, "loss": 0.4948, "step": 10331 }, { "epoch": 0.65, "grad_norm": 1.04654061794281, "learning_rate": 2.8159346564078006e-06, "loss": 0.5101, "step": 10332 }, { "epoch": 0.65, "grad_norm": 1.0643385648727417, "learning_rate": 2.815011767418287e-06, "loss": 0.5383, "step": 10333 }, { "epoch": 0.65, "grad_norm": 1.0637271404266357, "learning_rate": 2.8140889704310287e-06, "loss": 0.5227, "step": 10334 }, { "epoch": 0.65, "grad_norm": 1.0037415027618408, "learning_rate": 2.8131662654848814e-06, "loss": 0.5071, "step": 10335 }, { "epoch": 0.65, "grad_norm": 1.0276168584823608, "learning_rate": 2.8122436526186935e-06, "loss": 0.5532, "step": 10336 }, { "epoch": 0.65, "grad_norm": 0.9621335864067078, "learning_rate": 2.8113211318713146e-06, "loss": 0.536, "step": 10337 }, { "epoch": 0.65, "grad_norm": 0.9891295433044434, "learning_rate": 2.810398703281589e-06, "loss": 0.491, "step": 10338 }, { "epoch": 0.66, "grad_norm": 1.081217646598816, "learning_rate": 2.8094763668883567e-06, "loss": 0.4953, "step": 10339 }, { "epoch": 0.66, "grad_norm": 1.0589220523834229, "learning_rate": 2.808554122730457e-06, "loss": 0.4989, "step": 10340 }, { "epoch": 0.66, "grad_norm": 1.0248987674713135, "learning_rate": 2.8076319708467146e-06, "loss": 0.5145, "step": 10341 }, { "epoch": 0.66, "grad_norm": 1.1088663339614868, "learning_rate": 2.8067099112759665e-06, "loss": 0.522, "step": 10342 }, { "epoch": 0.66, "grad_norm": 1.0120095014572144, "learning_rate": 2.8057879440570356e-06, "loss": 0.5174, "step": 10343 }, { "epoch": 0.66, "grad_norm": 0.9366309642791748, "learning_rate": 2.804866069228739e-06, "loss": 0.472, "step": 10344 }, { "epoch": 0.66, "grad_norm": 1.1038411855697632, "learning_rate": 2.803944286829896e-06, "loss": 0.4936, "step": 10345 }, { "epoch": 0.66, "grad_norm": 1.0267481803894043, "learning_rate": 2.8030225968993198e-06, "loss": 0.5221, "step": 10346 }, { "epoch": 0.66, "grad_norm": 1.0624173879623413, "learning_rate": 2.802100999475819e-06, "loss": 0.4932, "step": 10347 }, { "epoch": 0.66, "grad_norm": 1.2405089139938354, "learning_rate": 2.8011794945982013e-06, "loss": 0.5502, "step": 10348 }, { "epoch": 0.66, "grad_norm": 0.9911314249038696, "learning_rate": 2.8002580823052638e-06, "loss": 0.4974, "step": 10349 }, { "epoch": 0.66, "grad_norm": 1.0014399290084839, "learning_rate": 2.7993367626358047e-06, "loss": 0.4675, "step": 10350 }, { "epoch": 0.66, "grad_norm": 0.9858644008636475, "learning_rate": 2.7984155356286224e-06, "loss": 0.4446, "step": 10351 }, { "epoch": 0.66, "grad_norm": 1.096359372138977, "learning_rate": 2.7974944013225013e-06, "loss": 0.4912, "step": 10352 }, { "epoch": 0.66, "grad_norm": 1.0251573324203491, "learning_rate": 2.796573359756229e-06, "loss": 0.514, "step": 10353 }, { "epoch": 0.66, "grad_norm": 1.1570641994476318, "learning_rate": 2.7956524109685874e-06, "loss": 0.5704, "step": 10354 }, { "epoch": 0.66, "grad_norm": 0.9382094144821167, "learning_rate": 2.7947315549983545e-06, "loss": 0.514, "step": 10355 }, { "epoch": 0.66, "grad_norm": 1.0530364513397217, "learning_rate": 2.793810791884306e-06, "loss": 0.5164, "step": 10356 }, { "epoch": 0.66, "grad_norm": 1.0562663078308105, "learning_rate": 2.792890121665208e-06, "loss": 0.5073, "step": 10357 }, { "epoch": 0.66, "grad_norm": 1.1291313171386719, "learning_rate": 2.791969544379828e-06, "loss": 0.5317, "step": 10358 }, { "epoch": 0.66, "grad_norm": 1.0145524740219116, "learning_rate": 2.7910490600669327e-06, "loss": 0.5066, "step": 10359 }, { "epoch": 0.66, "grad_norm": 1.0272448062896729, "learning_rate": 2.790128668765275e-06, "loss": 0.5488, "step": 10360 }, { "epoch": 0.66, "grad_norm": 1.0094681978225708, "learning_rate": 2.789208370513612e-06, "loss": 0.4549, "step": 10361 }, { "epoch": 0.66, "grad_norm": 1.1560615301132202, "learning_rate": 2.7882881653506947e-06, "loss": 0.5397, "step": 10362 }, { "epoch": 0.66, "grad_norm": 0.9382747411727905, "learning_rate": 2.787368053315266e-06, "loss": 0.4965, "step": 10363 }, { "epoch": 0.66, "grad_norm": 1.0615712404251099, "learning_rate": 2.7864480344460743e-06, "loss": 0.4694, "step": 10364 }, { "epoch": 0.66, "grad_norm": 1.137567162513733, "learning_rate": 2.7855281087818543e-06, "loss": 0.5259, "step": 10365 }, { "epoch": 0.66, "grad_norm": 1.0025049448013306, "learning_rate": 2.7846082763613412e-06, "loss": 0.4573, "step": 10366 }, { "epoch": 0.66, "grad_norm": 1.1223913431167603, "learning_rate": 2.783688537223268e-06, "loss": 0.58, "step": 10367 }, { "epoch": 0.66, "grad_norm": 1.0727946758270264, "learning_rate": 2.7827688914063596e-06, "loss": 0.5462, "step": 10368 }, { "epoch": 0.66, "grad_norm": 1.0876458883285522, "learning_rate": 2.78184933894934e-06, "loss": 0.4887, "step": 10369 }, { "epoch": 0.66, "grad_norm": 0.9600358009338379, "learning_rate": 2.780929879890931e-06, "loss": 0.5089, "step": 10370 }, { "epoch": 0.66, "grad_norm": 1.0861115455627441, "learning_rate": 2.780010514269841e-06, "loss": 0.5004, "step": 10371 }, { "epoch": 0.66, "grad_norm": 1.0740866661071777, "learning_rate": 2.7790912421247883e-06, "loss": 0.5236, "step": 10372 }, { "epoch": 0.66, "grad_norm": 1.0634615421295166, "learning_rate": 2.7781720634944766e-06, "loss": 0.5436, "step": 10373 }, { "epoch": 0.66, "grad_norm": 1.0422475337982178, "learning_rate": 2.77725297841761e-06, "loss": 0.4598, "step": 10374 }, { "epoch": 0.66, "grad_norm": 1.0295344591140747, "learning_rate": 2.7763339869328897e-06, "loss": 0.5102, "step": 10375 }, { "epoch": 0.66, "grad_norm": 1.0481268167495728, "learning_rate": 2.7754150890790067e-06, "loss": 0.5244, "step": 10376 }, { "epoch": 0.66, "grad_norm": 1.0282061100006104, "learning_rate": 2.7744962848946565e-06, "loss": 0.4872, "step": 10377 }, { "epoch": 0.66, "grad_norm": 1.0922186374664307, "learning_rate": 2.7735775744185276e-06, "loss": 0.5161, "step": 10378 }, { "epoch": 0.66, "grad_norm": 0.9676588773727417, "learning_rate": 2.7726589576893004e-06, "loss": 0.4734, "step": 10379 }, { "epoch": 0.66, "grad_norm": 1.0311994552612305, "learning_rate": 2.7717404347456567e-06, "loss": 0.4676, "step": 10380 }, { "epoch": 0.66, "grad_norm": 0.9735383987426758, "learning_rate": 2.7708220056262706e-06, "loss": 0.5032, "step": 10381 }, { "epoch": 0.66, "grad_norm": 1.048998236656189, "learning_rate": 2.7699036703698158e-06, "loss": 0.5058, "step": 10382 }, { "epoch": 0.66, "grad_norm": 1.0708740949630737, "learning_rate": 2.7689854290149608e-06, "loss": 0.5323, "step": 10383 }, { "epoch": 0.66, "grad_norm": 1.0217273235321045, "learning_rate": 2.768067281600365e-06, "loss": 0.5148, "step": 10384 }, { "epoch": 0.66, "grad_norm": 1.074256181716919, "learning_rate": 2.7671492281646937e-06, "loss": 0.4863, "step": 10385 }, { "epoch": 0.66, "grad_norm": 1.0781575441360474, "learning_rate": 2.7662312687466026e-06, "loss": 0.4688, "step": 10386 }, { "epoch": 0.66, "grad_norm": 0.9867082238197327, "learning_rate": 2.7653134033847393e-06, "loss": 0.5157, "step": 10387 }, { "epoch": 0.66, "grad_norm": 1.0689009428024292, "learning_rate": 2.7643956321177558e-06, "loss": 0.5145, "step": 10388 }, { "epoch": 0.66, "grad_norm": 0.9699030518531799, "learning_rate": 2.763477954984295e-06, "loss": 0.5254, "step": 10389 }, { "epoch": 0.66, "grad_norm": 1.0327718257904053, "learning_rate": 2.7625603720229964e-06, "loss": 0.5171, "step": 10390 }, { "epoch": 0.66, "grad_norm": 1.0824087858200073, "learning_rate": 2.7616428832724983e-06, "loss": 0.5744, "step": 10391 }, { "epoch": 0.66, "grad_norm": 1.0552942752838135, "learning_rate": 2.760725488771433e-06, "loss": 0.5646, "step": 10392 }, { "epoch": 0.66, "grad_norm": 1.0860564708709717, "learning_rate": 2.7598081885584237e-06, "loss": 0.5154, "step": 10393 }, { "epoch": 0.66, "grad_norm": 1.087424635887146, "learning_rate": 2.758890982672102e-06, "loss": 0.5118, "step": 10394 }, { "epoch": 0.66, "grad_norm": 1.0438737869262695, "learning_rate": 2.757973871151083e-06, "loss": 0.5584, "step": 10395 }, { "epoch": 0.66, "grad_norm": 1.0562806129455566, "learning_rate": 2.757056854033985e-06, "loss": 0.5106, "step": 10396 }, { "epoch": 0.66, "grad_norm": 1.0467734336853027, "learning_rate": 2.7561399313594205e-06, "loss": 0.5218, "step": 10397 }, { "epoch": 0.66, "grad_norm": 1.2227981090545654, "learning_rate": 2.7552231031659972e-06, "loss": 0.5171, "step": 10398 }, { "epoch": 0.66, "grad_norm": 1.0784660577774048, "learning_rate": 2.75430636949232e-06, "loss": 0.5352, "step": 10399 }, { "epoch": 0.66, "grad_norm": 0.9642545580863953, "learning_rate": 2.753389730376992e-06, "loss": 0.5041, "step": 10400 }, { "epoch": 0.66, "grad_norm": 1.0255966186523438, "learning_rate": 2.752473185858603e-06, "loss": 0.51, "step": 10401 }, { "epoch": 0.66, "grad_norm": 1.1170417070388794, "learning_rate": 2.7515567359757526e-06, "loss": 0.5409, "step": 10402 }, { "epoch": 0.66, "grad_norm": 1.0311150550842285, "learning_rate": 2.750640380767025e-06, "loss": 0.5213, "step": 10403 }, { "epoch": 0.66, "grad_norm": 1.0396472215652466, "learning_rate": 2.7497241202710056e-06, "loss": 0.5205, "step": 10404 }, { "epoch": 0.66, "grad_norm": 1.0209033489227295, "learning_rate": 2.7488079545262757e-06, "loss": 0.534, "step": 10405 }, { "epoch": 0.66, "grad_norm": 1.1066263914108276, "learning_rate": 2.747891883571412e-06, "loss": 0.5341, "step": 10406 }, { "epoch": 0.66, "grad_norm": 1.022965669631958, "learning_rate": 2.746975907444986e-06, "loss": 0.4647, "step": 10407 }, { "epoch": 0.66, "grad_norm": 1.0523531436920166, "learning_rate": 2.7460600261855687e-06, "loss": 0.551, "step": 10408 }, { "epoch": 0.66, "grad_norm": 1.0454198122024536, "learning_rate": 2.7451442398317206e-06, "loss": 0.5579, "step": 10409 }, { "epoch": 0.66, "grad_norm": 1.031033992767334, "learning_rate": 2.7442285484220055e-06, "loss": 0.4956, "step": 10410 }, { "epoch": 0.66, "grad_norm": 1.082212209701538, "learning_rate": 2.7433129519949784e-06, "loss": 0.5204, "step": 10411 }, { "epoch": 0.66, "grad_norm": 1.0533207654953003, "learning_rate": 2.742397450589193e-06, "loss": 0.5319, "step": 10412 }, { "epoch": 0.66, "grad_norm": 0.9724899530410767, "learning_rate": 2.7414820442431976e-06, "loss": 0.5085, "step": 10413 }, { "epoch": 0.66, "grad_norm": 1.1080093383789062, "learning_rate": 2.7405667329955344e-06, "loss": 0.546, "step": 10414 }, { "epoch": 0.66, "grad_norm": 1.017476201057434, "learning_rate": 2.739651516884747e-06, "loss": 0.454, "step": 10415 }, { "epoch": 0.66, "grad_norm": 1.0047162771224976, "learning_rate": 2.7387363959493733e-06, "loss": 0.5082, "step": 10416 }, { "epoch": 0.66, "grad_norm": 1.0568748712539673, "learning_rate": 2.737821370227942e-06, "loss": 0.5333, "step": 10417 }, { "epoch": 0.66, "grad_norm": 1.1608695983886719, "learning_rate": 2.7369064397589828e-06, "loss": 0.5212, "step": 10418 }, { "epoch": 0.66, "grad_norm": 0.9917661547660828, "learning_rate": 2.7359916045810207e-06, "loss": 0.4802, "step": 10419 }, { "epoch": 0.66, "grad_norm": 1.007771372795105, "learning_rate": 2.7350768647325766e-06, "loss": 0.5001, "step": 10420 }, { "epoch": 0.66, "grad_norm": 0.9977226257324219, "learning_rate": 2.734162220252168e-06, "loss": 0.447, "step": 10421 }, { "epoch": 0.66, "grad_norm": 1.012022852897644, "learning_rate": 2.7332476711783044e-06, "loss": 0.5321, "step": 10422 }, { "epoch": 0.66, "grad_norm": 1.0312007665634155, "learning_rate": 2.732333217549494e-06, "loss": 0.5302, "step": 10423 }, { "epoch": 0.66, "grad_norm": 1.0605684518814087, "learning_rate": 2.7314188594042466e-06, "loss": 0.4972, "step": 10424 }, { "epoch": 0.66, "grad_norm": 1.0762418508529663, "learning_rate": 2.7305045967810585e-06, "loss": 0.4942, "step": 10425 }, { "epoch": 0.66, "grad_norm": 1.0472568273544312, "learning_rate": 2.7295904297184262e-06, "loss": 0.5072, "step": 10426 }, { "epoch": 0.66, "grad_norm": 1.0426957607269287, "learning_rate": 2.7286763582548424e-06, "loss": 0.5053, "step": 10427 }, { "epoch": 0.66, "grad_norm": 1.085888147354126, "learning_rate": 2.7277623824287957e-06, "loss": 0.4957, "step": 10428 }, { "epoch": 0.66, "grad_norm": 1.0257681608200073, "learning_rate": 2.726848502278773e-06, "loss": 0.5649, "step": 10429 }, { "epoch": 0.66, "grad_norm": 1.0250338315963745, "learning_rate": 2.7259347178432493e-06, "loss": 0.4895, "step": 10430 }, { "epoch": 0.66, "grad_norm": 1.0389498472213745, "learning_rate": 2.7250210291607026e-06, "loss": 0.5052, "step": 10431 }, { "epoch": 0.66, "grad_norm": 1.0429414510726929, "learning_rate": 2.7241074362696108e-06, "loss": 0.4572, "step": 10432 }, { "epoch": 0.66, "grad_norm": 1.091538667678833, "learning_rate": 2.7231939392084347e-06, "loss": 0.5382, "step": 10433 }, { "epoch": 0.66, "grad_norm": 1.0113235712051392, "learning_rate": 2.7222805380156414e-06, "loss": 0.5437, "step": 10434 }, { "epoch": 0.66, "grad_norm": 1.0240751504898071, "learning_rate": 2.7213672327296914e-06, "loss": 0.5194, "step": 10435 }, { "epoch": 0.66, "grad_norm": 1.016080379486084, "learning_rate": 2.72045402338904e-06, "loss": 0.5137, "step": 10436 }, { "epoch": 0.66, "grad_norm": 1.0559788942337036, "learning_rate": 2.719540910032142e-06, "loss": 0.4918, "step": 10437 }, { "epoch": 0.66, "grad_norm": 1.0363986492156982, "learning_rate": 2.7186278926974406e-06, "loss": 0.5293, "step": 10438 }, { "epoch": 0.66, "grad_norm": 1.0203218460083008, "learning_rate": 2.717714971423383e-06, "loss": 0.4876, "step": 10439 }, { "epoch": 0.66, "grad_norm": 1.032476544380188, "learning_rate": 2.7168021462484084e-06, "loss": 0.4804, "step": 10440 }, { "epoch": 0.66, "grad_norm": 1.0136157274246216, "learning_rate": 2.715889417210953e-06, "loss": 0.5163, "step": 10441 }, { "epoch": 0.66, "grad_norm": 1.0525606870651245, "learning_rate": 2.714976784349448e-06, "loss": 0.51, "step": 10442 }, { "epoch": 0.66, "grad_norm": 1.0758986473083496, "learning_rate": 2.7140642477023237e-06, "loss": 0.5201, "step": 10443 }, { "epoch": 0.66, "grad_norm": 1.0525035858154297, "learning_rate": 2.7131518073079976e-06, "loss": 0.5402, "step": 10444 }, { "epoch": 0.66, "grad_norm": 1.0384174585342407, "learning_rate": 2.7122394632048974e-06, "loss": 0.5201, "step": 10445 }, { "epoch": 0.66, "grad_norm": 1.0647464990615845, "learning_rate": 2.7113272154314328e-06, "loss": 0.5336, "step": 10446 }, { "epoch": 0.66, "grad_norm": 1.0159748792648315, "learning_rate": 2.710415064026018e-06, "loss": 0.4487, "step": 10447 }, { "epoch": 0.66, "grad_norm": 1.0917627811431885, "learning_rate": 2.7095030090270596e-06, "loss": 0.4907, "step": 10448 }, { "epoch": 0.66, "grad_norm": 0.9589986801147461, "learning_rate": 2.7085910504729617e-06, "loss": 0.5268, "step": 10449 }, { "epoch": 0.66, "grad_norm": 0.976966142654419, "learning_rate": 2.7076791884021236e-06, "loss": 0.4598, "step": 10450 }, { "epoch": 0.66, "grad_norm": 1.0308228731155396, "learning_rate": 2.7067674228529417e-06, "loss": 0.5131, "step": 10451 }, { "epoch": 0.66, "grad_norm": 1.0818092823028564, "learning_rate": 2.7058557538638026e-06, "loss": 0.493, "step": 10452 }, { "epoch": 0.66, "grad_norm": 1.0554735660552979, "learning_rate": 2.7049441814731007e-06, "loss": 0.5117, "step": 10453 }, { "epoch": 0.66, "grad_norm": 0.9924067854881287, "learning_rate": 2.704032705719214e-06, "loss": 0.5183, "step": 10454 }, { "epoch": 0.66, "grad_norm": 1.1226341724395752, "learning_rate": 2.703121326640522e-06, "loss": 0.5464, "step": 10455 }, { "epoch": 0.66, "grad_norm": 0.9563577771186829, "learning_rate": 2.702210044275401e-06, "loss": 0.4453, "step": 10456 }, { "epoch": 0.66, "grad_norm": 1.0308963060379028, "learning_rate": 2.7012988586622224e-06, "loss": 0.5021, "step": 10457 }, { "epoch": 0.66, "grad_norm": 1.0026469230651855, "learning_rate": 2.7003877698393512e-06, "loss": 0.4859, "step": 10458 }, { "epoch": 0.66, "grad_norm": 1.0531280040740967, "learning_rate": 2.6994767778451535e-06, "loss": 0.4827, "step": 10459 }, { "epoch": 0.66, "grad_norm": 1.0416374206542969, "learning_rate": 2.6985658827179845e-06, "loss": 0.519, "step": 10460 }, { "epoch": 0.66, "grad_norm": 0.9877068400382996, "learning_rate": 2.6976550844961992e-06, "loss": 0.4707, "step": 10461 }, { "epoch": 0.66, "grad_norm": 1.0973079204559326, "learning_rate": 2.6967443832181496e-06, "loss": 0.5126, "step": 10462 }, { "epoch": 0.66, "grad_norm": 0.960408091545105, "learning_rate": 2.6958337789221813e-06, "loss": 0.4921, "step": 10463 }, { "epoch": 0.66, "grad_norm": 1.1162091493606567, "learning_rate": 2.694923271646637e-06, "loss": 0.4924, "step": 10464 }, { "epoch": 0.66, "grad_norm": 0.9959160089492798, "learning_rate": 2.694012861429855e-06, "loss": 0.5197, "step": 10465 }, { "epoch": 0.66, "grad_norm": 1.061828851699829, "learning_rate": 2.693102548310169e-06, "loss": 0.5235, "step": 10466 }, { "epoch": 0.66, "grad_norm": 1.0121952295303345, "learning_rate": 2.6921923323259124e-06, "loss": 0.5247, "step": 10467 }, { "epoch": 0.66, "grad_norm": 1.118021845817566, "learning_rate": 2.691282213515406e-06, "loss": 0.5105, "step": 10468 }, { "epoch": 0.66, "grad_norm": 0.962216317653656, "learning_rate": 2.690372191916974e-06, "loss": 0.499, "step": 10469 }, { "epoch": 0.66, "grad_norm": 1.0699259042739868, "learning_rate": 2.6894622675689345e-06, "loss": 0.5279, "step": 10470 }, { "epoch": 0.66, "grad_norm": 1.0497336387634277, "learning_rate": 2.6885524405096007e-06, "loss": 0.5426, "step": 10471 }, { "epoch": 0.66, "grad_norm": 1.1183973550796509, "learning_rate": 2.687642710777284e-06, "loss": 0.5466, "step": 10472 }, { "epoch": 0.66, "grad_norm": 1.0451266765594482, "learning_rate": 2.6867330784102896e-06, "loss": 0.4896, "step": 10473 }, { "epoch": 0.66, "grad_norm": 1.0753915309906006, "learning_rate": 2.6858235434469138e-06, "loss": 0.5291, "step": 10474 }, { "epoch": 0.66, "grad_norm": 0.9498035907745361, "learning_rate": 2.684914105925463e-06, "loss": 0.4851, "step": 10475 }, { "epoch": 0.66, "grad_norm": 1.0795294046401978, "learning_rate": 2.6840047658842226e-06, "loss": 0.4516, "step": 10476 }, { "epoch": 0.66, "grad_norm": 1.12007737159729, "learning_rate": 2.683095523361486e-06, "loss": 0.5383, "step": 10477 }, { "epoch": 0.66, "grad_norm": 1.1048475503921509, "learning_rate": 2.682186378395536e-06, "loss": 0.5598, "step": 10478 }, { "epoch": 0.66, "grad_norm": 1.258007526397705, "learning_rate": 2.6812773310246547e-06, "loss": 0.5166, "step": 10479 }, { "epoch": 0.66, "grad_norm": 0.981302797794342, "learning_rate": 2.680368381287119e-06, "loss": 0.4954, "step": 10480 }, { "epoch": 0.66, "grad_norm": 1.0412334203720093, "learning_rate": 2.6794595292212035e-06, "loss": 0.4819, "step": 10481 }, { "epoch": 0.66, "grad_norm": 1.0630468130111694, "learning_rate": 2.67855077486517e-06, "loss": 0.5235, "step": 10482 }, { "epoch": 0.66, "grad_norm": 1.1821410655975342, "learning_rate": 2.677642118257292e-06, "loss": 0.5377, "step": 10483 }, { "epoch": 0.66, "grad_norm": 1.074578046798706, "learning_rate": 2.6767335594358234e-06, "loss": 0.5425, "step": 10484 }, { "epoch": 0.66, "grad_norm": 0.9681217670440674, "learning_rate": 2.675825098439023e-06, "loss": 0.4821, "step": 10485 }, { "epoch": 0.66, "grad_norm": 1.0614813566207886, "learning_rate": 2.6749167353051443e-06, "loss": 0.5295, "step": 10486 }, { "epoch": 0.66, "grad_norm": 1.0384877920150757, "learning_rate": 2.674008470072429e-06, "loss": 0.482, "step": 10487 }, { "epoch": 0.66, "grad_norm": 1.1568013429641724, "learning_rate": 2.673100302779128e-06, "loss": 0.5158, "step": 10488 }, { "epoch": 0.66, "grad_norm": 0.9850515723228455, "learning_rate": 2.6721922334634804e-06, "loss": 0.4893, "step": 10489 }, { "epoch": 0.66, "grad_norm": 1.0520575046539307, "learning_rate": 2.671284262163718e-06, "loss": 0.5214, "step": 10490 }, { "epoch": 0.66, "grad_norm": 1.033778190612793, "learning_rate": 2.6703763889180746e-06, "loss": 0.4863, "step": 10491 }, { "epoch": 0.66, "grad_norm": 1.0140942335128784, "learning_rate": 2.6694686137647767e-06, "loss": 0.5278, "step": 10492 }, { "epoch": 0.66, "grad_norm": 1.0416064262390137, "learning_rate": 2.668560936742048e-06, "loss": 0.48, "step": 10493 }, { "epoch": 0.66, "grad_norm": 1.0666289329528809, "learning_rate": 2.6676533578881102e-06, "loss": 0.4862, "step": 10494 }, { "epoch": 0.66, "grad_norm": 0.9985889196395874, "learning_rate": 2.6667458772411724e-06, "loss": 0.488, "step": 10495 }, { "epoch": 0.66, "grad_norm": 1.0429118871688843, "learning_rate": 2.66583849483945e-06, "loss": 0.5285, "step": 10496 }, { "epoch": 0.67, "grad_norm": 0.9838635325431824, "learning_rate": 2.664931210721151e-06, "loss": 0.5122, "step": 10497 }, { "epoch": 0.67, "grad_norm": 0.9577754735946655, "learning_rate": 2.6640240249244744e-06, "loss": 0.4817, "step": 10498 }, { "epoch": 0.67, "grad_norm": 0.9911318421363831, "learning_rate": 2.6631169374876185e-06, "loss": 0.4981, "step": 10499 }, { "epoch": 0.67, "grad_norm": 0.989579439163208, "learning_rate": 2.6622099484487794e-06, "loss": 0.5132, "step": 10500 }, { "epoch": 0.67, "grad_norm": 1.030277967453003, "learning_rate": 2.6613030578461476e-06, "loss": 0.4955, "step": 10501 }, { "epoch": 0.67, "grad_norm": 1.0554670095443726, "learning_rate": 2.6603962657179094e-06, "loss": 0.5048, "step": 10502 }, { "epoch": 0.67, "grad_norm": 1.0888514518737793, "learning_rate": 2.6594895721022436e-06, "loss": 0.5773, "step": 10503 }, { "epoch": 0.67, "grad_norm": 1.0285956859588623, "learning_rate": 2.6585829770373286e-06, "loss": 0.5055, "step": 10504 }, { "epoch": 0.67, "grad_norm": 1.151036262512207, "learning_rate": 2.657676480561342e-06, "loss": 0.5474, "step": 10505 }, { "epoch": 0.67, "grad_norm": 0.9403602480888367, "learning_rate": 2.6567700827124494e-06, "loss": 0.4994, "step": 10506 }, { "epoch": 0.67, "grad_norm": 1.0743473768234253, "learning_rate": 2.655863783528817e-06, "loss": 0.5122, "step": 10507 }, { "epoch": 0.67, "grad_norm": 1.1450196504592896, "learning_rate": 2.6549575830486053e-06, "loss": 0.5577, "step": 10508 }, { "epoch": 0.67, "grad_norm": 1.0517287254333496, "learning_rate": 2.6540514813099728e-06, "loss": 0.5208, "step": 10509 }, { "epoch": 0.67, "grad_norm": 1.0516631603240967, "learning_rate": 2.6531454783510736e-06, "loss": 0.5507, "step": 10510 }, { "epoch": 0.67, "grad_norm": 1.1319580078125, "learning_rate": 2.6522395742100514e-06, "loss": 0.546, "step": 10511 }, { "epoch": 0.67, "grad_norm": 1.0354925394058228, "learning_rate": 2.651333768925052e-06, "loss": 0.5001, "step": 10512 }, { "epoch": 0.67, "grad_norm": 1.1074974536895752, "learning_rate": 2.6504280625342203e-06, "loss": 0.5803, "step": 10513 }, { "epoch": 0.67, "grad_norm": 1.0434691905975342, "learning_rate": 2.6495224550756888e-06, "loss": 0.5396, "step": 10514 }, { "epoch": 0.67, "grad_norm": 1.013555884361267, "learning_rate": 2.6486169465875887e-06, "loss": 0.5151, "step": 10515 }, { "epoch": 0.67, "grad_norm": 1.0723886489868164, "learning_rate": 2.647711537108052e-06, "loss": 0.5255, "step": 10516 }, { "epoch": 0.67, "grad_norm": 1.099655270576477, "learning_rate": 2.6468062266751955e-06, "loss": 0.5143, "step": 10517 }, { "epoch": 0.67, "grad_norm": 0.9594826698303223, "learning_rate": 2.6459010153271456e-06, "loss": 0.482, "step": 10518 }, { "epoch": 0.67, "grad_norm": 1.0790048837661743, "learning_rate": 2.6449959031020134e-06, "loss": 0.5033, "step": 10519 }, { "epoch": 0.67, "grad_norm": 1.1070122718811035, "learning_rate": 2.6440908900379115e-06, "loss": 0.4909, "step": 10520 }, { "epoch": 0.67, "grad_norm": 0.9774593114852905, "learning_rate": 2.6431859761729462e-06, "loss": 0.5239, "step": 10521 }, { "epoch": 0.67, "grad_norm": 1.0332831144332886, "learning_rate": 2.6422811615452205e-06, "loss": 0.4888, "step": 10522 }, { "epoch": 0.67, "grad_norm": 1.0701346397399902, "learning_rate": 2.6413764461928335e-06, "loss": 0.4977, "step": 10523 }, { "epoch": 0.67, "grad_norm": 0.9489225149154663, "learning_rate": 2.6404718301538814e-06, "loss": 0.5256, "step": 10524 }, { "epoch": 0.67, "grad_norm": 0.9808928370475769, "learning_rate": 2.639567313466448e-06, "loss": 0.4806, "step": 10525 }, { "epoch": 0.67, "grad_norm": 0.9877341985702515, "learning_rate": 2.6386628961686277e-06, "loss": 0.4937, "step": 10526 }, { "epoch": 0.67, "grad_norm": 1.076948642730713, "learning_rate": 2.6377585782984972e-06, "loss": 0.4969, "step": 10527 }, { "epoch": 0.67, "grad_norm": 1.0355870723724365, "learning_rate": 2.636854359894134e-06, "loss": 0.4837, "step": 10528 }, { "epoch": 0.67, "grad_norm": 1.047590732574463, "learning_rate": 2.635950240993614e-06, "loss": 0.5303, "step": 10529 }, { "epoch": 0.67, "grad_norm": 1.0804356336593628, "learning_rate": 2.635046221635005e-06, "loss": 0.5481, "step": 10530 }, { "epoch": 0.67, "grad_norm": 0.9647104144096375, "learning_rate": 2.6341423018563727e-06, "loss": 0.5039, "step": 10531 }, { "epoch": 0.67, "grad_norm": 1.0416531562805176, "learning_rate": 2.633238481695779e-06, "loss": 0.4922, "step": 10532 }, { "epoch": 0.67, "grad_norm": 1.0723743438720703, "learning_rate": 2.6323347611912786e-06, "loss": 0.5032, "step": 10533 }, { "epoch": 0.67, "grad_norm": 1.1137053966522217, "learning_rate": 2.6314311403809224e-06, "loss": 0.5648, "step": 10534 }, { "epoch": 0.67, "grad_norm": 0.9235475063323975, "learning_rate": 2.630527619302765e-06, "loss": 0.4404, "step": 10535 }, { "epoch": 0.67, "grad_norm": 1.0233099460601807, "learning_rate": 2.6296241979948455e-06, "loss": 0.5304, "step": 10536 }, { "epoch": 0.67, "grad_norm": 1.0859589576721191, "learning_rate": 2.6287208764952045e-06, "loss": 0.5345, "step": 10537 }, { "epoch": 0.67, "grad_norm": 1.0127158164978027, "learning_rate": 2.6278176548418783e-06, "loss": 0.5178, "step": 10538 }, { "epoch": 0.67, "grad_norm": 1.0827937126159668, "learning_rate": 2.6269145330728985e-06, "loss": 0.4957, "step": 10539 }, { "epoch": 0.67, "grad_norm": 1.040254831314087, "learning_rate": 2.626011511226294e-06, "loss": 0.5154, "step": 10540 }, { "epoch": 0.67, "grad_norm": 1.0404975414276123, "learning_rate": 2.625108589340085e-06, "loss": 0.5102, "step": 10541 }, { "epoch": 0.67, "grad_norm": 0.9493241906166077, "learning_rate": 2.624205767452289e-06, "loss": 0.4422, "step": 10542 }, { "epoch": 0.67, "grad_norm": 0.9997432827949524, "learning_rate": 2.623303045600928e-06, "loss": 0.4639, "step": 10543 }, { "epoch": 0.67, "grad_norm": 1.1698287725448608, "learning_rate": 2.622400423824005e-06, "loss": 0.5274, "step": 10544 }, { "epoch": 0.67, "grad_norm": 1.0547444820404053, "learning_rate": 2.62149790215953e-06, "loss": 0.4699, "step": 10545 }, { "epoch": 0.67, "grad_norm": 0.9943386912345886, "learning_rate": 2.6205954806455057e-06, "loss": 0.5104, "step": 10546 }, { "epoch": 0.67, "grad_norm": 1.054709553718567, "learning_rate": 2.6196931593199247e-06, "loss": 0.5321, "step": 10547 }, { "epoch": 0.67, "grad_norm": 1.1206477880477905, "learning_rate": 2.618790938220788e-06, "loss": 0.51, "step": 10548 }, { "epoch": 0.67, "grad_norm": 1.0720930099487305, "learning_rate": 2.617888817386079e-06, "loss": 0.4765, "step": 10549 }, { "epoch": 0.67, "grad_norm": 1.0825947523117065, "learning_rate": 2.6169867968537856e-06, "loss": 0.5156, "step": 10550 }, { "epoch": 0.67, "grad_norm": 1.0014879703521729, "learning_rate": 2.616084876661888e-06, "loss": 0.468, "step": 10551 }, { "epoch": 0.67, "grad_norm": 0.965927243232727, "learning_rate": 2.6151830568483627e-06, "loss": 0.5206, "step": 10552 }, { "epoch": 0.67, "grad_norm": 1.0722675323486328, "learning_rate": 2.614281337451183e-06, "loss": 0.5103, "step": 10553 }, { "epoch": 0.67, "grad_norm": 1.0606327056884766, "learning_rate": 2.61337971850832e-06, "loss": 0.5609, "step": 10554 }, { "epoch": 0.67, "grad_norm": 0.9931026101112366, "learning_rate": 2.6124782000577296e-06, "loss": 0.4632, "step": 10555 }, { "epoch": 0.67, "grad_norm": 1.0780014991760254, "learning_rate": 2.6115767821373807e-06, "loss": 0.4643, "step": 10556 }, { "epoch": 0.67, "grad_norm": 1.0118858814239502, "learning_rate": 2.610675464785223e-06, "loss": 0.5311, "step": 10557 }, { "epoch": 0.67, "grad_norm": 0.9681828022003174, "learning_rate": 2.6097742480392097e-06, "loss": 0.4892, "step": 10558 }, { "epoch": 0.67, "grad_norm": 0.9976950287818909, "learning_rate": 2.6088731319372874e-06, "loss": 0.5262, "step": 10559 }, { "epoch": 0.67, "grad_norm": 1.0612802505493164, "learning_rate": 2.6079721165173994e-06, "loss": 0.5053, "step": 10560 }, { "epoch": 0.67, "grad_norm": 1.2253762483596802, "learning_rate": 2.6070712018174847e-06, "loss": 0.4711, "step": 10561 }, { "epoch": 0.67, "grad_norm": 1.0154792070388794, "learning_rate": 2.6061703878754784e-06, "loss": 0.5201, "step": 10562 }, { "epoch": 0.67, "grad_norm": 1.0816336870193481, "learning_rate": 2.6052696747293087e-06, "loss": 0.488, "step": 10563 }, { "epoch": 0.67, "grad_norm": 1.0748910903930664, "learning_rate": 2.6043690624169014e-06, "loss": 0.5206, "step": 10564 }, { "epoch": 0.67, "grad_norm": 1.0948655605316162, "learning_rate": 2.6034685509761803e-06, "loss": 0.5116, "step": 10565 }, { "epoch": 0.67, "grad_norm": 1.0533651113510132, "learning_rate": 2.602568140445061e-06, "loss": 0.5126, "step": 10566 }, { "epoch": 0.67, "grad_norm": 1.120794653892517, "learning_rate": 2.6016678308614583e-06, "loss": 0.4874, "step": 10567 }, { "epoch": 0.67, "grad_norm": 0.9894031882286072, "learning_rate": 2.600767622263277e-06, "loss": 0.4943, "step": 10568 }, { "epoch": 0.67, "grad_norm": 1.0337748527526855, "learning_rate": 2.599867514688427e-06, "loss": 0.5128, "step": 10569 }, { "epoch": 0.67, "grad_norm": 0.9865636229515076, "learning_rate": 2.598967508174808e-06, "loss": 0.4648, "step": 10570 }, { "epoch": 0.67, "grad_norm": 1.0098962783813477, "learning_rate": 2.598067602760313e-06, "loss": 0.4785, "step": 10571 }, { "epoch": 0.67, "grad_norm": 0.9975713491439819, "learning_rate": 2.597167798482835e-06, "loss": 0.5205, "step": 10572 }, { "epoch": 0.67, "grad_norm": 1.1072999238967896, "learning_rate": 2.596268095380263e-06, "loss": 0.5235, "step": 10573 }, { "epoch": 0.67, "grad_norm": 1.0465779304504395, "learning_rate": 2.5953684934904788e-06, "loss": 0.4864, "step": 10574 }, { "epoch": 0.67, "grad_norm": 1.051168441772461, "learning_rate": 2.5944689928513643e-06, "loss": 0.4652, "step": 10575 }, { "epoch": 0.67, "grad_norm": 1.0790677070617676, "learning_rate": 2.593569593500789e-06, "loss": 0.4975, "step": 10576 }, { "epoch": 0.67, "grad_norm": 1.0252071619033813, "learning_rate": 2.592670295476628e-06, "loss": 0.4697, "step": 10577 }, { "epoch": 0.67, "grad_norm": 1.033689260482788, "learning_rate": 2.591771098816749e-06, "loss": 0.5521, "step": 10578 }, { "epoch": 0.67, "grad_norm": 1.0405455827713013, "learning_rate": 2.5908720035590085e-06, "loss": 0.5088, "step": 10579 }, { "epoch": 0.67, "grad_norm": 1.0239884853363037, "learning_rate": 2.5899730097412678e-06, "loss": 0.5009, "step": 10580 }, { "epoch": 0.67, "grad_norm": 1.033341646194458, "learning_rate": 2.58907411740138e-06, "loss": 0.5109, "step": 10581 }, { "epoch": 0.67, "grad_norm": 0.9961441159248352, "learning_rate": 2.5881753265771938e-06, "loss": 0.5134, "step": 10582 }, { "epoch": 0.67, "grad_norm": 0.9867962598800659, "learning_rate": 2.587276637306556e-06, "loss": 0.448, "step": 10583 }, { "epoch": 0.67, "grad_norm": 1.0330294370651245, "learning_rate": 2.586378049627304e-06, "loss": 0.5084, "step": 10584 }, { "epoch": 0.67, "grad_norm": 1.0358963012695312, "learning_rate": 2.5854795635772743e-06, "loss": 0.5136, "step": 10585 }, { "epoch": 0.67, "grad_norm": 1.01925790309906, "learning_rate": 2.584581179194304e-06, "loss": 0.5117, "step": 10586 }, { "epoch": 0.67, "grad_norm": 1.0224287509918213, "learning_rate": 2.5836828965162167e-06, "loss": 0.518, "step": 10587 }, { "epoch": 0.67, "grad_norm": 1.0976794958114624, "learning_rate": 2.582784715580836e-06, "loss": 0.5151, "step": 10588 }, { "epoch": 0.67, "grad_norm": 1.0719406604766846, "learning_rate": 2.581886636425983e-06, "loss": 0.5174, "step": 10589 }, { "epoch": 0.67, "grad_norm": 1.2258132696151733, "learning_rate": 2.580988659089471e-06, "loss": 0.533, "step": 10590 }, { "epoch": 0.67, "grad_norm": 1.126112699508667, "learning_rate": 2.580090783609114e-06, "loss": 0.545, "step": 10591 }, { "epoch": 0.67, "grad_norm": 1.0415563583374023, "learning_rate": 2.5791930100227133e-06, "loss": 0.4935, "step": 10592 }, { "epoch": 0.67, "grad_norm": 1.033074140548706, "learning_rate": 2.5782953383680733e-06, "loss": 0.4644, "step": 10593 }, { "epoch": 0.67, "grad_norm": 1.00439453125, "learning_rate": 2.5773977686829928e-06, "loss": 0.5112, "step": 10594 }, { "epoch": 0.67, "grad_norm": 1.0228551626205444, "learning_rate": 2.5765003010052643e-06, "loss": 0.4834, "step": 10595 }, { "epoch": 0.67, "grad_norm": 1.091952919960022, "learning_rate": 2.5756029353726777e-06, "loss": 0.5088, "step": 10596 }, { "epoch": 0.67, "grad_norm": 1.0610514879226685, "learning_rate": 2.574705671823019e-06, "loss": 0.538, "step": 10597 }, { "epoch": 0.67, "grad_norm": 1.093294620513916, "learning_rate": 2.5738085103940634e-06, "loss": 0.5479, "step": 10598 }, { "epoch": 0.67, "grad_norm": 1.0571091175079346, "learning_rate": 2.572911451123594e-06, "loss": 0.5387, "step": 10599 }, { "epoch": 0.67, "grad_norm": 1.0951002836227417, "learning_rate": 2.572014494049382e-06, "loss": 0.5231, "step": 10600 }, { "epoch": 0.67, "grad_norm": 1.038913369178772, "learning_rate": 2.571117639209191e-06, "loss": 0.5439, "step": 10601 }, { "epoch": 0.67, "grad_norm": 1.0661230087280273, "learning_rate": 2.5702208866407873e-06, "loss": 0.4539, "step": 10602 }, { "epoch": 0.67, "grad_norm": 0.9565131664276123, "learning_rate": 2.5693242363819292e-06, "loss": 0.4747, "step": 10603 }, { "epoch": 0.67, "grad_norm": 1.0338586568832397, "learning_rate": 2.5684276884703717e-06, "loss": 0.496, "step": 10604 }, { "epoch": 0.67, "grad_norm": 1.1044895648956299, "learning_rate": 2.567531242943867e-06, "loss": 0.5165, "step": 10605 }, { "epoch": 0.67, "grad_norm": 1.1141607761383057, "learning_rate": 2.5666348998401565e-06, "loss": 0.499, "step": 10606 }, { "epoch": 0.67, "grad_norm": 1.0243730545043945, "learning_rate": 2.565738659196987e-06, "loss": 0.4852, "step": 10607 }, { "epoch": 0.67, "grad_norm": 1.0987368822097778, "learning_rate": 2.5648425210520967e-06, "loss": 0.5726, "step": 10608 }, { "epoch": 0.67, "grad_norm": 1.0123798847198486, "learning_rate": 2.563946485443214e-06, "loss": 0.4462, "step": 10609 }, { "epoch": 0.67, "grad_norm": 1.0435576438903809, "learning_rate": 2.5630505524080707e-06, "loss": 0.488, "step": 10610 }, { "epoch": 0.67, "grad_norm": 1.0640788078308105, "learning_rate": 2.5621547219843905e-06, "loss": 0.5194, "step": 10611 }, { "epoch": 0.67, "grad_norm": 1.094903826713562, "learning_rate": 2.5612589942098952e-06, "loss": 0.4869, "step": 10612 }, { "epoch": 0.67, "grad_norm": 1.0041868686676025, "learning_rate": 2.560363369122301e-06, "loss": 0.5348, "step": 10613 }, { "epoch": 0.67, "grad_norm": 1.072270154953003, "learning_rate": 2.559467846759317e-06, "loss": 0.5234, "step": 10614 }, { "epoch": 0.67, "grad_norm": 1.0632047653198242, "learning_rate": 2.5585724271586505e-06, "loss": 0.5144, "step": 10615 }, { "epoch": 0.67, "grad_norm": 1.0923739671707153, "learning_rate": 2.557677110358009e-06, "loss": 0.5269, "step": 10616 }, { "epoch": 0.67, "grad_norm": 1.0472354888916016, "learning_rate": 2.556781896395087e-06, "loss": 0.4982, "step": 10617 }, { "epoch": 0.67, "grad_norm": 1.073376178741455, "learning_rate": 2.55588678530758e-06, "loss": 0.4807, "step": 10618 }, { "epoch": 0.67, "grad_norm": 1.096312165260315, "learning_rate": 2.5549917771331767e-06, "loss": 0.4738, "step": 10619 }, { "epoch": 0.67, "grad_norm": 1.0125393867492676, "learning_rate": 2.5540968719095656e-06, "loss": 0.4835, "step": 10620 }, { "epoch": 0.67, "grad_norm": 1.0725840330123901, "learning_rate": 2.5532020696744277e-06, "loss": 0.4868, "step": 10621 }, { "epoch": 0.67, "grad_norm": 1.1230672597885132, "learning_rate": 2.5523073704654374e-06, "loss": 0.5352, "step": 10622 }, { "epoch": 0.67, "grad_norm": 1.0457652807235718, "learning_rate": 2.5514127743202668e-06, "loss": 0.5059, "step": 10623 }, { "epoch": 0.67, "grad_norm": 1.0251890420913696, "learning_rate": 2.5505182812765894e-06, "loss": 0.5243, "step": 10624 }, { "epoch": 0.67, "grad_norm": 1.0891318321228027, "learning_rate": 2.549623891372065e-06, "loss": 0.539, "step": 10625 }, { "epoch": 0.67, "grad_norm": 1.1247227191925049, "learning_rate": 2.5487296046443537e-06, "loss": 0.4829, "step": 10626 }, { "epoch": 0.67, "grad_norm": 1.102414608001709, "learning_rate": 2.547835421131114e-06, "loss": 0.5509, "step": 10627 }, { "epoch": 0.67, "grad_norm": 1.024851679801941, "learning_rate": 2.5469413408699894e-06, "loss": 0.4753, "step": 10628 }, { "epoch": 0.67, "grad_norm": 1.136244773864746, "learning_rate": 2.546047363898636e-06, "loss": 0.5328, "step": 10629 }, { "epoch": 0.67, "grad_norm": 1.0303579568862915, "learning_rate": 2.545153490254689e-06, "loss": 0.5493, "step": 10630 }, { "epoch": 0.67, "grad_norm": 0.9925675392150879, "learning_rate": 2.5442597199757896e-06, "loss": 0.4668, "step": 10631 }, { "epoch": 0.67, "grad_norm": 1.0880279541015625, "learning_rate": 2.5433660530995696e-06, "loss": 0.5369, "step": 10632 }, { "epoch": 0.67, "grad_norm": 1.025112271308899, "learning_rate": 2.54247248966366e-06, "loss": 0.4766, "step": 10633 }, { "epoch": 0.67, "grad_norm": 1.0779856443405151, "learning_rate": 2.5415790297056843e-06, "loss": 0.477, "step": 10634 }, { "epoch": 0.67, "grad_norm": 1.0276527404785156, "learning_rate": 2.5406856732632647e-06, "loss": 0.5265, "step": 10635 }, { "epoch": 0.67, "grad_norm": 1.0492390394210815, "learning_rate": 2.539792420374013e-06, "loss": 0.5449, "step": 10636 }, { "epoch": 0.67, "grad_norm": 1.0722205638885498, "learning_rate": 2.5388992710755477e-06, "loss": 0.4909, "step": 10637 }, { "epoch": 0.67, "grad_norm": 0.9178640842437744, "learning_rate": 2.5380062254054706e-06, "loss": 0.4667, "step": 10638 }, { "epoch": 0.67, "grad_norm": 1.117708683013916, "learning_rate": 2.5371132834013867e-06, "loss": 0.5095, "step": 10639 }, { "epoch": 0.67, "grad_norm": 0.9836210012435913, "learning_rate": 2.5362204451008963e-06, "loss": 0.4706, "step": 10640 }, { "epoch": 0.67, "grad_norm": 1.0415635108947754, "learning_rate": 2.5353277105415887e-06, "loss": 0.536, "step": 10641 }, { "epoch": 0.67, "grad_norm": 1.0077011585235596, "learning_rate": 2.5344350797610597e-06, "loss": 0.4601, "step": 10642 }, { "epoch": 0.67, "grad_norm": 1.0329313278198242, "learning_rate": 2.533542552796893e-06, "loss": 0.4972, "step": 10643 }, { "epoch": 0.67, "grad_norm": 1.1889619827270508, "learning_rate": 2.5326501296866677e-06, "loss": 0.5188, "step": 10644 }, { "epoch": 0.67, "grad_norm": 0.997260570526123, "learning_rate": 2.531757810467963e-06, "loss": 0.4805, "step": 10645 }, { "epoch": 0.67, "grad_norm": 0.9727004766464233, "learning_rate": 2.53086559517835e-06, "loss": 0.504, "step": 10646 }, { "epoch": 0.67, "grad_norm": 1.0125079154968262, "learning_rate": 2.529973483855397e-06, "loss": 0.4527, "step": 10647 }, { "epoch": 0.67, "grad_norm": 1.019005537033081, "learning_rate": 2.52908147653667e-06, "loss": 0.4595, "step": 10648 }, { "epoch": 0.67, "grad_norm": 1.0392253398895264, "learning_rate": 2.5281895732597227e-06, "loss": 0.5012, "step": 10649 }, { "epoch": 0.67, "grad_norm": 1.103070855140686, "learning_rate": 2.527297774062115e-06, "loss": 0.4936, "step": 10650 }, { "epoch": 0.67, "grad_norm": 1.1274436712265015, "learning_rate": 2.5264060789813994e-06, "loss": 0.4775, "step": 10651 }, { "epoch": 0.67, "grad_norm": 0.9909786581993103, "learning_rate": 2.525514488055116e-06, "loss": 0.507, "step": 10652 }, { "epoch": 0.67, "grad_norm": 1.0899275541305542, "learning_rate": 2.5246230013208093e-06, "loss": 0.5599, "step": 10653 }, { "epoch": 0.67, "grad_norm": 0.9369357824325562, "learning_rate": 2.5237316188160165e-06, "loss": 0.4331, "step": 10654 }, { "epoch": 0.68, "grad_norm": 1.0517666339874268, "learning_rate": 2.522840340578272e-06, "loss": 0.5221, "step": 10655 }, { "epoch": 0.68, "grad_norm": 1.086459994316101, "learning_rate": 2.521949166645102e-06, "loss": 0.5641, "step": 10656 }, { "epoch": 0.68, "grad_norm": 1.0164384841918945, "learning_rate": 2.5210580970540354e-06, "loss": 0.4767, "step": 10657 }, { "epoch": 0.68, "grad_norm": 1.0612106323242188, "learning_rate": 2.5201671318425834e-06, "loss": 0.5062, "step": 10658 }, { "epoch": 0.68, "grad_norm": 1.0751674175262451, "learning_rate": 2.519276271048272e-06, "loss": 0.5299, "step": 10659 }, { "epoch": 0.68, "grad_norm": 1.109610915184021, "learning_rate": 2.5183855147086045e-06, "loss": 0.5302, "step": 10660 }, { "epoch": 0.68, "grad_norm": 1.0094627141952515, "learning_rate": 2.51749486286109e-06, "loss": 0.4559, "step": 10661 }, { "epoch": 0.68, "grad_norm": 0.9121614098548889, "learning_rate": 2.516604315543231e-06, "loss": 0.4979, "step": 10662 }, { "epoch": 0.68, "grad_norm": 1.1115944385528564, "learning_rate": 2.515713872792525e-06, "loss": 0.5419, "step": 10663 }, { "epoch": 0.68, "grad_norm": 1.0575385093688965, "learning_rate": 2.5148235346464654e-06, "loss": 0.4703, "step": 10664 }, { "epoch": 0.68, "grad_norm": 1.1535428762435913, "learning_rate": 2.5139333011425435e-06, "loss": 0.5102, "step": 10665 }, { "epoch": 0.68, "grad_norm": 0.9647922515869141, "learning_rate": 2.5130431723182386e-06, "loss": 0.4526, "step": 10666 }, { "epoch": 0.68, "grad_norm": 1.1463685035705566, "learning_rate": 2.512153148211038e-06, "loss": 0.5403, "step": 10667 }, { "epoch": 0.68, "grad_norm": 1.0591135025024414, "learning_rate": 2.5112632288584116e-06, "loss": 0.5183, "step": 10668 }, { "epoch": 0.68, "grad_norm": 1.058384895324707, "learning_rate": 2.5103734142978325e-06, "loss": 0.5627, "step": 10669 }, { "epoch": 0.68, "grad_norm": 1.0416806936264038, "learning_rate": 2.5094837045667684e-06, "loss": 0.4947, "step": 10670 }, { "epoch": 0.68, "grad_norm": 0.9496021270751953, "learning_rate": 2.508594099702682e-06, "loss": 0.5067, "step": 10671 }, { "epoch": 0.68, "grad_norm": 1.076542854309082, "learning_rate": 2.5077045997430304e-06, "loss": 0.5172, "step": 10672 }, { "epoch": 0.68, "grad_norm": 1.0082660913467407, "learning_rate": 2.5068152047252702e-06, "loss": 0.529, "step": 10673 }, { "epoch": 0.68, "grad_norm": 1.1301501989364624, "learning_rate": 2.5059259146868474e-06, "loss": 0.5218, "step": 10674 }, { "epoch": 0.68, "grad_norm": 1.0519551038742065, "learning_rate": 2.5050367296652075e-06, "loss": 0.4938, "step": 10675 }, { "epoch": 0.68, "grad_norm": 1.0395547151565552, "learning_rate": 2.504147649697791e-06, "loss": 0.5311, "step": 10676 }, { "epoch": 0.68, "grad_norm": 0.9280543327331543, "learning_rate": 2.5032586748220354e-06, "loss": 0.5124, "step": 10677 }, { "epoch": 0.68, "grad_norm": 1.0799845457077026, "learning_rate": 2.5023698050753732e-06, "loss": 0.5323, "step": 10678 }, { "epoch": 0.68, "grad_norm": 0.9525831937789917, "learning_rate": 2.5014810404952262e-06, "loss": 0.4677, "step": 10679 }, { "epoch": 0.68, "grad_norm": 0.9634466767311096, "learning_rate": 2.5005923811190226e-06, "loss": 0.499, "step": 10680 }, { "epoch": 0.68, "grad_norm": 1.0382812023162842, "learning_rate": 2.4997038269841804e-06, "loss": 0.4953, "step": 10681 }, { "epoch": 0.68, "grad_norm": 1.0796928405761719, "learning_rate": 2.498815378128111e-06, "loss": 0.5341, "step": 10682 }, { "epoch": 0.68, "grad_norm": 0.9890086054801941, "learning_rate": 2.497927034588225e-06, "loss": 0.4597, "step": 10683 }, { "epoch": 0.68, "grad_norm": 1.052189826965332, "learning_rate": 2.497038796401927e-06, "loss": 0.4985, "step": 10684 }, { "epoch": 0.68, "grad_norm": 1.0046368837356567, "learning_rate": 2.4961506636066185e-06, "loss": 0.5485, "step": 10685 }, { "epoch": 0.68, "grad_norm": 1.116420865058899, "learning_rate": 2.495262636239697e-06, "loss": 0.5483, "step": 10686 }, { "epoch": 0.68, "grad_norm": 1.1046727895736694, "learning_rate": 2.4943747143385503e-06, "loss": 0.5414, "step": 10687 }, { "epoch": 0.68, "grad_norm": 1.0905431509017944, "learning_rate": 2.4934868979405667e-06, "loss": 0.5231, "step": 10688 }, { "epoch": 0.68, "grad_norm": 1.111757755279541, "learning_rate": 2.492599187083134e-06, "loss": 0.5722, "step": 10689 }, { "epoch": 0.68, "grad_norm": 0.9807526469230652, "learning_rate": 2.491711581803625e-06, "loss": 0.4776, "step": 10690 }, { "epoch": 0.68, "grad_norm": 0.9862124919891357, "learning_rate": 2.490824082139415e-06, "loss": 0.4711, "step": 10691 }, { "epoch": 0.68, "grad_norm": 1.117592215538025, "learning_rate": 2.489936688127875e-06, "loss": 0.5298, "step": 10692 }, { "epoch": 0.68, "grad_norm": 1.0601587295532227, "learning_rate": 2.4890493998063685e-06, "loss": 0.5039, "step": 10693 }, { "epoch": 0.68, "grad_norm": 1.090985894203186, "learning_rate": 2.4881622172122595e-06, "loss": 0.5655, "step": 10694 }, { "epoch": 0.68, "grad_norm": 1.056349515914917, "learning_rate": 2.4872751403828986e-06, "loss": 0.5423, "step": 10695 }, { "epoch": 0.68, "grad_norm": 1.168630838394165, "learning_rate": 2.4863881693556393e-06, "loss": 0.5241, "step": 10696 }, { "epoch": 0.68, "grad_norm": 1.084328055381775, "learning_rate": 2.4855013041678335e-06, "loss": 0.4798, "step": 10697 }, { "epoch": 0.68, "grad_norm": 1.046592116355896, "learning_rate": 2.484614544856819e-06, "loss": 0.5097, "step": 10698 }, { "epoch": 0.68, "grad_norm": 1.0084978342056274, "learning_rate": 2.483727891459935e-06, "loss": 0.5117, "step": 10699 }, { "epoch": 0.68, "grad_norm": 1.095803141593933, "learning_rate": 2.482841344014516e-06, "loss": 0.4997, "step": 10700 }, { "epoch": 0.68, "grad_norm": 1.0160983800888062, "learning_rate": 2.4819549025578917e-06, "loss": 0.5153, "step": 10701 }, { "epoch": 0.68, "grad_norm": 1.0845803022384644, "learning_rate": 2.481068567127389e-06, "loss": 0.5097, "step": 10702 }, { "epoch": 0.68, "grad_norm": 1.0083107948303223, "learning_rate": 2.4801823377603236e-06, "loss": 0.4816, "step": 10703 }, { "epoch": 0.68, "grad_norm": 1.1475107669830322, "learning_rate": 2.4792962144940148e-06, "loss": 0.5146, "step": 10704 }, { "epoch": 0.68, "grad_norm": 1.1041582822799683, "learning_rate": 2.4784101973657724e-06, "loss": 0.5572, "step": 10705 }, { "epoch": 0.68, "grad_norm": 0.9976695775985718, "learning_rate": 2.4775242864129055e-06, "loss": 0.4891, "step": 10706 }, { "epoch": 0.68, "grad_norm": 1.0840741395950317, "learning_rate": 2.4766384816727164e-06, "loss": 0.5289, "step": 10707 }, { "epoch": 0.68, "grad_norm": 1.0682698488235474, "learning_rate": 2.475752783182504e-06, "loss": 0.5124, "step": 10708 }, { "epoch": 0.68, "grad_norm": 1.0782908201217651, "learning_rate": 2.4748671909795568e-06, "loss": 0.5061, "step": 10709 }, { "epoch": 0.68, "grad_norm": 1.015122652053833, "learning_rate": 2.4739817051011717e-06, "loss": 0.4699, "step": 10710 }, { "epoch": 0.68, "grad_norm": 1.0786633491516113, "learning_rate": 2.473096325584628e-06, "loss": 0.515, "step": 10711 }, { "epoch": 0.68, "grad_norm": 1.0838096141815186, "learning_rate": 2.4722110524672074e-06, "loss": 0.5407, "step": 10712 }, { "epoch": 0.68, "grad_norm": 1.1118197441101074, "learning_rate": 2.4713258857861856e-06, "loss": 0.5248, "step": 10713 }, { "epoch": 0.68, "grad_norm": 1.047063946723938, "learning_rate": 2.4704408255788342e-06, "loss": 0.5378, "step": 10714 }, { "epoch": 0.68, "grad_norm": 0.9615069031715393, "learning_rate": 2.4695558718824204e-06, "loss": 0.4926, "step": 10715 }, { "epoch": 0.68, "grad_norm": 0.9735535383224487, "learning_rate": 2.468671024734208e-06, "loss": 0.4964, "step": 10716 }, { "epoch": 0.68, "grad_norm": 1.0757182836532593, "learning_rate": 2.4677862841714485e-06, "loss": 0.4802, "step": 10717 }, { "epoch": 0.68, "grad_norm": 0.9946287870407104, "learning_rate": 2.4669016502314038e-06, "loss": 0.5065, "step": 10718 }, { "epoch": 0.68, "grad_norm": 1.1031183004379272, "learning_rate": 2.4660171229513165e-06, "loss": 0.5147, "step": 10719 }, { "epoch": 0.68, "grad_norm": 1.0394502878189087, "learning_rate": 2.465132702368433e-06, "loss": 0.4895, "step": 10720 }, { "epoch": 0.68, "grad_norm": 0.9271803498268127, "learning_rate": 2.4642483885199938e-06, "loss": 0.4821, "step": 10721 }, { "epoch": 0.68, "grad_norm": 1.0470598936080933, "learning_rate": 2.463364181443233e-06, "loss": 0.5455, "step": 10722 }, { "epoch": 0.68, "grad_norm": 1.0677751302719116, "learning_rate": 2.4624800811753826e-06, "loss": 0.5372, "step": 10723 }, { "epoch": 0.68, "grad_norm": 1.032324194908142, "learning_rate": 2.4615960877536706e-06, "loss": 0.4579, "step": 10724 }, { "epoch": 0.68, "grad_norm": 0.9706496596336365, "learning_rate": 2.4607122012153146e-06, "loss": 0.482, "step": 10725 }, { "epoch": 0.68, "grad_norm": 1.0008245706558228, "learning_rate": 2.459828421597534e-06, "loss": 0.4669, "step": 10726 }, { "epoch": 0.68, "grad_norm": 1.1143993139266968, "learning_rate": 2.458944748937543e-06, "loss": 0.5348, "step": 10727 }, { "epoch": 0.68, "grad_norm": 1.0865949392318726, "learning_rate": 2.4580611832725482e-06, "loss": 0.4915, "step": 10728 }, { "epoch": 0.68, "grad_norm": 1.0539220571517944, "learning_rate": 2.4571777246397543e-06, "loss": 0.5598, "step": 10729 }, { "epoch": 0.68, "grad_norm": 1.0416845083236694, "learning_rate": 2.456294373076361e-06, "loss": 0.497, "step": 10730 }, { "epoch": 0.68, "grad_norm": 1.0223973989486694, "learning_rate": 2.455411128619562e-06, "loss": 0.4578, "step": 10731 }, { "epoch": 0.68, "grad_norm": 1.1366970539093018, "learning_rate": 2.4545279913065513e-06, "loss": 0.5305, "step": 10732 }, { "epoch": 0.68, "grad_norm": 1.1712008714675903, "learning_rate": 2.4536449611745087e-06, "loss": 0.5753, "step": 10733 }, { "epoch": 0.68, "grad_norm": 1.0615066289901733, "learning_rate": 2.45276203826062e-06, "loss": 0.5046, "step": 10734 }, { "epoch": 0.68, "grad_norm": 0.9939412474632263, "learning_rate": 2.451879222602059e-06, "loss": 0.4732, "step": 10735 }, { "epoch": 0.68, "grad_norm": 1.2695436477661133, "learning_rate": 2.4509965142360013e-06, "loss": 0.5399, "step": 10736 }, { "epoch": 0.68, "grad_norm": 1.1129322052001953, "learning_rate": 2.4501139131996122e-06, "loss": 0.5113, "step": 10737 }, { "epoch": 0.68, "grad_norm": 1.116741418838501, "learning_rate": 2.4492314195300583e-06, "loss": 0.5192, "step": 10738 }, { "epoch": 0.68, "grad_norm": 1.0863484144210815, "learning_rate": 2.4483490332644918e-06, "loss": 0.5285, "step": 10739 }, { "epoch": 0.68, "grad_norm": 1.0622340440750122, "learning_rate": 2.4474667544400744e-06, "loss": 0.4946, "step": 10740 }, { "epoch": 0.68, "grad_norm": 1.0966776609420776, "learning_rate": 2.4465845830939504e-06, "loss": 0.553, "step": 10741 }, { "epoch": 0.68, "grad_norm": 0.9429232478141785, "learning_rate": 2.4457025192632672e-06, "loss": 0.452, "step": 10742 }, { "epoch": 0.68, "grad_norm": 0.9729829430580139, "learning_rate": 2.444820562985165e-06, "loss": 0.4634, "step": 10743 }, { "epoch": 0.68, "grad_norm": 1.085930585861206, "learning_rate": 2.443938714296781e-06, "loss": 0.5135, "step": 10744 }, { "epoch": 0.68, "grad_norm": 0.9769576787948608, "learning_rate": 2.4430569732352444e-06, "loss": 0.5007, "step": 10745 }, { "epoch": 0.68, "grad_norm": 1.0735533237457275, "learning_rate": 2.4421753398376865e-06, "loss": 0.5416, "step": 10746 }, { "epoch": 0.68, "grad_norm": 1.0380295515060425, "learning_rate": 2.441293814141223e-06, "loss": 0.5186, "step": 10747 }, { "epoch": 0.68, "grad_norm": 1.0052216053009033, "learning_rate": 2.4404123961829795e-06, "loss": 0.4998, "step": 10748 }, { "epoch": 0.68, "grad_norm": 0.9670013189315796, "learning_rate": 2.4395310860000644e-06, "loss": 0.4764, "step": 10749 }, { "epoch": 0.68, "grad_norm": 1.0874783992767334, "learning_rate": 2.438649883629588e-06, "loss": 0.5141, "step": 10750 }, { "epoch": 0.68, "grad_norm": 1.0350924730300903, "learning_rate": 2.437768789108656e-06, "loss": 0.5431, "step": 10751 }, { "epoch": 0.68, "grad_norm": 1.0951682329177856, "learning_rate": 2.4368878024743638e-06, "loss": 0.4804, "step": 10752 }, { "epoch": 0.68, "grad_norm": 1.0911171436309814, "learning_rate": 2.4360069237638114e-06, "loss": 0.522, "step": 10753 }, { "epoch": 0.68, "grad_norm": 1.0631780624389648, "learning_rate": 2.43512615301409e-06, "loss": 0.4894, "step": 10754 }, { "epoch": 0.68, "grad_norm": 1.0774295330047607, "learning_rate": 2.434245490262282e-06, "loss": 0.5196, "step": 10755 }, { "epoch": 0.68, "grad_norm": 1.0458868741989136, "learning_rate": 2.4333649355454704e-06, "loss": 0.528, "step": 10756 }, { "epoch": 0.68, "grad_norm": 1.0356422662734985, "learning_rate": 2.4324844889007328e-06, "loss": 0.5465, "step": 10757 }, { "epoch": 0.68, "grad_norm": 1.0376536846160889, "learning_rate": 2.4316041503651417e-06, "loss": 0.4611, "step": 10758 }, { "epoch": 0.68, "grad_norm": 1.1252613067626953, "learning_rate": 2.430723919975767e-06, "loss": 0.5091, "step": 10759 }, { "epoch": 0.68, "grad_norm": 1.0624945163726807, "learning_rate": 2.4298437977696658e-06, "loss": 0.5268, "step": 10760 }, { "epoch": 0.68, "grad_norm": 1.0497515201568604, "learning_rate": 2.428963783783904e-06, "loss": 0.4852, "step": 10761 }, { "epoch": 0.68, "grad_norm": 1.0248337984085083, "learning_rate": 2.4280838780555347e-06, "loss": 0.5007, "step": 10762 }, { "epoch": 0.68, "grad_norm": 1.0751545429229736, "learning_rate": 2.427204080621605e-06, "loss": 0.5021, "step": 10763 }, { "epoch": 0.68, "grad_norm": 1.0481481552124023, "learning_rate": 2.426324391519161e-06, "loss": 0.4454, "step": 10764 }, { "epoch": 0.68, "grad_norm": 1.0749013423919678, "learning_rate": 2.4254448107852434e-06, "loss": 0.5357, "step": 10765 }, { "epoch": 0.68, "grad_norm": 1.035150170326233, "learning_rate": 2.424565338456889e-06, "loss": 0.5128, "step": 10766 }, { "epoch": 0.68, "grad_norm": 1.0497207641601562, "learning_rate": 2.4236859745711305e-06, "loss": 0.5407, "step": 10767 }, { "epoch": 0.68, "grad_norm": 1.0844335556030273, "learning_rate": 2.4228067191649917e-06, "loss": 0.5075, "step": 10768 }, { "epoch": 0.68, "grad_norm": 0.9757227301597595, "learning_rate": 2.421927572275494e-06, "loss": 0.5017, "step": 10769 }, { "epoch": 0.68, "grad_norm": 1.0580272674560547, "learning_rate": 2.4210485339396627e-06, "loss": 0.5002, "step": 10770 }, { "epoch": 0.68, "grad_norm": 1.091774821281433, "learning_rate": 2.4201696041945033e-06, "loss": 0.5408, "step": 10771 }, { "epoch": 0.68, "grad_norm": 1.0345991849899292, "learning_rate": 2.419290783077028e-06, "loss": 0.4692, "step": 10772 }, { "epoch": 0.68, "grad_norm": 1.04410982131958, "learning_rate": 2.41841207062424e-06, "loss": 0.4801, "step": 10773 }, { "epoch": 0.68, "grad_norm": 1.0724915266036987, "learning_rate": 2.4175334668731383e-06, "loss": 0.539, "step": 10774 }, { "epoch": 0.68, "grad_norm": 0.9957897067070007, "learning_rate": 2.416654971860721e-06, "loss": 0.5044, "step": 10775 }, { "epoch": 0.68, "grad_norm": 1.0342212915420532, "learning_rate": 2.415776585623974e-06, "loss": 0.5185, "step": 10776 }, { "epoch": 0.68, "grad_norm": 1.1434073448181152, "learning_rate": 2.4148983081998834e-06, "loss": 0.5179, "step": 10777 }, { "epoch": 0.68, "grad_norm": 1.029054045677185, "learning_rate": 2.414020139625436e-06, "loss": 0.4609, "step": 10778 }, { "epoch": 0.68, "grad_norm": 1.0691587924957275, "learning_rate": 2.413142079937602e-06, "loss": 0.5596, "step": 10779 }, { "epoch": 0.68, "grad_norm": 1.087937593460083, "learning_rate": 2.4122641291733567e-06, "loss": 0.5135, "step": 10780 }, { "epoch": 0.68, "grad_norm": 1.101758599281311, "learning_rate": 2.4113862873696687e-06, "loss": 0.5433, "step": 10781 }, { "epoch": 0.68, "grad_norm": 1.0143336057662964, "learning_rate": 2.410508554563495e-06, "loss": 0.5028, "step": 10782 }, { "epoch": 0.68, "grad_norm": 1.081833004951477, "learning_rate": 2.4096309307918013e-06, "loss": 0.5302, "step": 10783 }, { "epoch": 0.68, "grad_norm": 1.1051571369171143, "learning_rate": 2.4087534160915364e-06, "loss": 0.5296, "step": 10784 }, { "epoch": 0.68, "grad_norm": 0.9907381534576416, "learning_rate": 2.407876010499651e-06, "loss": 0.5312, "step": 10785 }, { "epoch": 0.68, "grad_norm": 1.1027705669403076, "learning_rate": 2.4069987140530893e-06, "loss": 0.5136, "step": 10786 }, { "epoch": 0.68, "grad_norm": 1.004507064819336, "learning_rate": 2.4061215267887915e-06, "loss": 0.5009, "step": 10787 }, { "epoch": 0.68, "grad_norm": 1.0545748472213745, "learning_rate": 2.4052444487436925e-06, "loss": 0.4824, "step": 10788 }, { "epoch": 0.68, "grad_norm": 1.1453453302383423, "learning_rate": 2.4043674799547252e-06, "loss": 0.484, "step": 10789 }, { "epoch": 0.68, "grad_norm": 1.0942673683166504, "learning_rate": 2.4034906204588104e-06, "loss": 0.5205, "step": 10790 }, { "epoch": 0.68, "grad_norm": 1.0242911577224731, "learning_rate": 2.4026138702928763e-06, "loss": 0.4761, "step": 10791 }, { "epoch": 0.68, "grad_norm": 1.0240156650543213, "learning_rate": 2.4017372294938347e-06, "loss": 0.5277, "step": 10792 }, { "epoch": 0.68, "grad_norm": 1.0593849420547485, "learning_rate": 2.4008606980985994e-06, "loss": 0.5373, "step": 10793 }, { "epoch": 0.68, "grad_norm": 1.114747166633606, "learning_rate": 2.399984276144079e-06, "loss": 0.528, "step": 10794 }, { "epoch": 0.68, "grad_norm": 1.0083765983581543, "learning_rate": 2.3991079636671755e-06, "loss": 0.528, "step": 10795 }, { "epoch": 0.68, "grad_norm": 1.0297280550003052, "learning_rate": 2.398231760704788e-06, "loss": 0.5163, "step": 10796 }, { "epoch": 0.68, "grad_norm": 1.0804991722106934, "learning_rate": 2.397355667293812e-06, "loss": 0.4925, "step": 10797 }, { "epoch": 0.68, "grad_norm": 1.0716068744659424, "learning_rate": 2.396479683471133e-06, "loss": 0.533, "step": 10798 }, { "epoch": 0.68, "grad_norm": 1.0013996362686157, "learning_rate": 2.395603809273635e-06, "loss": 0.5476, "step": 10799 }, { "epoch": 0.68, "grad_norm": 1.211030125617981, "learning_rate": 2.3947280447382055e-06, "loss": 0.5368, "step": 10800 }, { "epoch": 0.68, "grad_norm": 1.0420416593551636, "learning_rate": 2.3938523899017124e-06, "loss": 0.4871, "step": 10801 }, { "epoch": 0.68, "grad_norm": 1.0371475219726562, "learning_rate": 2.392976844801029e-06, "loss": 0.5074, "step": 10802 }, { "epoch": 0.68, "grad_norm": 0.9832603335380554, "learning_rate": 2.3921014094730216e-06, "loss": 0.5094, "step": 10803 }, { "epoch": 0.68, "grad_norm": 1.002054214477539, "learning_rate": 2.3912260839545514e-06, "loss": 0.5261, "step": 10804 }, { "epoch": 0.68, "grad_norm": 1.0066535472869873, "learning_rate": 2.390350868282478e-06, "loss": 0.5592, "step": 10805 }, { "epoch": 0.68, "grad_norm": 1.1311171054840088, "learning_rate": 2.389475762493649e-06, "loss": 0.5044, "step": 10806 }, { "epoch": 0.68, "grad_norm": 0.9754476547241211, "learning_rate": 2.3886007666249124e-06, "loss": 0.4875, "step": 10807 }, { "epoch": 0.68, "grad_norm": 1.0370256900787354, "learning_rate": 2.387725880713117e-06, "loss": 0.5078, "step": 10808 }, { "epoch": 0.68, "grad_norm": 1.1595977544784546, "learning_rate": 2.3868511047950955e-06, "loss": 0.5417, "step": 10809 }, { "epoch": 0.68, "grad_norm": 1.078356146812439, "learning_rate": 2.3859764389076834e-06, "loss": 0.4764, "step": 10810 }, { "epoch": 0.68, "grad_norm": 1.0195378065109253, "learning_rate": 2.3851018830877115e-06, "loss": 0.4784, "step": 10811 }, { "epoch": 0.68, "grad_norm": 1.075260877609253, "learning_rate": 2.3842274373719994e-06, "loss": 0.5229, "step": 10812 }, { "epoch": 0.69, "grad_norm": 1.0607411861419678, "learning_rate": 2.383353101797374e-06, "loss": 0.4833, "step": 10813 }, { "epoch": 0.69, "grad_norm": 1.00974440574646, "learning_rate": 2.3824788764006446e-06, "loss": 0.526, "step": 10814 }, { "epoch": 0.69, "grad_norm": 0.9402571320533752, "learning_rate": 2.3816047612186243e-06, "loss": 0.4657, "step": 10815 }, { "epoch": 0.69, "grad_norm": 1.0360361337661743, "learning_rate": 2.3807307562881188e-06, "loss": 0.5157, "step": 10816 }, { "epoch": 0.69, "grad_norm": 1.0566238164901733, "learning_rate": 2.3798568616459295e-06, "loss": 0.5376, "step": 10817 }, { "epoch": 0.69, "grad_norm": 1.0949710607528687, "learning_rate": 2.378983077328853e-06, "loss": 0.4918, "step": 10818 }, { "epoch": 0.69, "grad_norm": 0.9928608536720276, "learning_rate": 2.378109403373683e-06, "loss": 0.4995, "step": 10819 }, { "epoch": 0.69, "grad_norm": 1.0890331268310547, "learning_rate": 2.3772358398172013e-06, "loss": 0.5446, "step": 10820 }, { "epoch": 0.69, "grad_norm": 0.9923766851425171, "learning_rate": 2.3763623866961984e-06, "loss": 0.4591, "step": 10821 }, { "epoch": 0.69, "grad_norm": 1.0734655857086182, "learning_rate": 2.375489044047446e-06, "loss": 0.4931, "step": 10822 }, { "epoch": 0.69, "grad_norm": 1.0973488092422485, "learning_rate": 2.37461581190772e-06, "loss": 0.549, "step": 10823 }, { "epoch": 0.69, "grad_norm": 1.0098018646240234, "learning_rate": 2.37374269031379e-06, "loss": 0.4981, "step": 10824 }, { "epoch": 0.69, "grad_norm": 0.9769476652145386, "learning_rate": 2.3728696793024187e-06, "loss": 0.478, "step": 10825 }, { "epoch": 0.69, "grad_norm": 1.1181920766830444, "learning_rate": 2.371996778910366e-06, "loss": 0.528, "step": 10826 }, { "epoch": 0.69, "grad_norm": 1.112295389175415, "learning_rate": 2.3711239891743886e-06, "loss": 0.5127, "step": 10827 }, { "epoch": 0.69, "grad_norm": 0.9636074900627136, "learning_rate": 2.370251310131233e-06, "loss": 0.4271, "step": 10828 }, { "epoch": 0.69, "grad_norm": 1.0492388010025024, "learning_rate": 2.369378741817647e-06, "loss": 0.5006, "step": 10829 }, { "epoch": 0.69, "grad_norm": 1.027235984802246, "learning_rate": 2.3685062842703697e-06, "loss": 0.5129, "step": 10830 }, { "epoch": 0.69, "grad_norm": 1.1333270072937012, "learning_rate": 2.3676339375261394e-06, "loss": 0.5233, "step": 10831 }, { "epoch": 0.69, "grad_norm": 1.0313310623168945, "learning_rate": 2.3667617016216885e-06, "loss": 0.4789, "step": 10832 }, { "epoch": 0.69, "grad_norm": 1.0129145383834839, "learning_rate": 2.365889576593738e-06, "loss": 0.4626, "step": 10833 }, { "epoch": 0.69, "grad_norm": 1.0947465896606445, "learning_rate": 2.365017562479016e-06, "loss": 0.5032, "step": 10834 }, { "epoch": 0.69, "grad_norm": 1.0129739046096802, "learning_rate": 2.36414565931424e-06, "loss": 0.4713, "step": 10835 }, { "epoch": 0.69, "grad_norm": 1.03473699092865, "learning_rate": 2.3632738671361187e-06, "loss": 0.4923, "step": 10836 }, { "epoch": 0.69, "grad_norm": 1.103641152381897, "learning_rate": 2.362402185981363e-06, "loss": 0.4954, "step": 10837 }, { "epoch": 0.69, "grad_norm": 1.080424189567566, "learning_rate": 2.3615306158866745e-06, "loss": 0.5467, "step": 10838 }, { "epoch": 0.69, "grad_norm": 1.1474509239196777, "learning_rate": 2.360659156888754e-06, "loss": 0.5577, "step": 10839 }, { "epoch": 0.69, "grad_norm": 0.9592921137809753, "learning_rate": 2.359787809024297e-06, "loss": 0.4802, "step": 10840 }, { "epoch": 0.69, "grad_norm": 1.0543572902679443, "learning_rate": 2.358916572329986e-06, "loss": 0.4925, "step": 10841 }, { "epoch": 0.69, "grad_norm": 0.9967743754386902, "learning_rate": 2.3580454468425136e-06, "loss": 0.4902, "step": 10842 }, { "epoch": 0.69, "grad_norm": 1.053407907485962, "learning_rate": 2.357174432598558e-06, "loss": 0.5122, "step": 10843 }, { "epoch": 0.69, "grad_norm": 1.0734866857528687, "learning_rate": 2.356303529634791e-06, "loss": 0.5312, "step": 10844 }, { "epoch": 0.69, "grad_norm": 1.0064641237258911, "learning_rate": 2.355432737987886e-06, "loss": 0.538, "step": 10845 }, { "epoch": 0.69, "grad_norm": 1.090053915977478, "learning_rate": 2.3545620576945088e-06, "loss": 0.5243, "step": 10846 }, { "epoch": 0.69, "grad_norm": 1.0361047983169556, "learning_rate": 2.3536914887913203e-06, "loss": 0.5288, "step": 10847 }, { "epoch": 0.69, "grad_norm": 1.083053708076477, "learning_rate": 2.3528210313149793e-06, "loss": 0.4931, "step": 10848 }, { "epoch": 0.69, "grad_norm": 1.089536428451538, "learning_rate": 2.351950685302134e-06, "loss": 0.5345, "step": 10849 }, { "epoch": 0.69, "grad_norm": 1.165738582611084, "learning_rate": 2.351080450789431e-06, "loss": 0.5085, "step": 10850 }, { "epoch": 0.69, "grad_norm": 1.0671491622924805, "learning_rate": 2.3502103278135203e-06, "loss": 0.4966, "step": 10851 }, { "epoch": 0.69, "grad_norm": 1.0162256956100464, "learning_rate": 2.349340316411032e-06, "loss": 0.4889, "step": 10852 }, { "epoch": 0.69, "grad_norm": 1.0168367624282837, "learning_rate": 2.3484704166186024e-06, "loss": 0.4913, "step": 10853 }, { "epoch": 0.69, "grad_norm": 1.0524020195007324, "learning_rate": 2.347600628472859e-06, "loss": 0.4952, "step": 10854 }, { "epoch": 0.69, "grad_norm": 1.012343406677246, "learning_rate": 2.3467309520104265e-06, "loss": 0.4806, "step": 10855 }, { "epoch": 0.69, "grad_norm": 1.1498016119003296, "learning_rate": 2.3458613872679255e-06, "loss": 0.5259, "step": 10856 }, { "epoch": 0.69, "grad_norm": 1.0634065866470337, "learning_rate": 2.344991934281966e-06, "loss": 0.4772, "step": 10857 }, { "epoch": 0.69, "grad_norm": 1.207221269607544, "learning_rate": 2.344122593089161e-06, "loss": 0.5228, "step": 10858 }, { "epoch": 0.69, "grad_norm": 0.9218490719795227, "learning_rate": 2.3432533637261135e-06, "loss": 0.446, "step": 10859 }, { "epoch": 0.69, "grad_norm": 1.06690514087677, "learning_rate": 2.3423842462294257e-06, "loss": 0.5001, "step": 10860 }, { "epoch": 0.69, "grad_norm": 1.1247037649154663, "learning_rate": 2.341515240635691e-06, "loss": 0.4739, "step": 10861 }, { "epoch": 0.69, "grad_norm": 1.0756078958511353, "learning_rate": 2.340646346981504e-06, "loss": 0.5022, "step": 10862 }, { "epoch": 0.69, "grad_norm": 1.073510766029358, "learning_rate": 2.339777565303444e-06, "loss": 0.5338, "step": 10863 }, { "epoch": 0.69, "grad_norm": 1.0145232677459717, "learning_rate": 2.3389088956380982e-06, "loss": 0.4355, "step": 10864 }, { "epoch": 0.69, "grad_norm": 0.994920015335083, "learning_rate": 2.338040338022044e-06, "loss": 0.4767, "step": 10865 }, { "epoch": 0.69, "grad_norm": 1.1112754344940186, "learning_rate": 2.3371718924918487e-06, "loss": 0.4915, "step": 10866 }, { "epoch": 0.69, "grad_norm": 1.0785366296768188, "learning_rate": 2.3363035590840814e-06, "loss": 0.4844, "step": 10867 }, { "epoch": 0.69, "grad_norm": 1.0066707134246826, "learning_rate": 2.3354353378353056e-06, "loss": 0.5253, "step": 10868 }, { "epoch": 0.69, "grad_norm": 0.9772728681564331, "learning_rate": 2.334567228782078e-06, "loss": 0.4908, "step": 10869 }, { "epoch": 0.69, "grad_norm": 1.1539030075073242, "learning_rate": 2.3336992319609534e-06, "loss": 0.5205, "step": 10870 }, { "epoch": 0.69, "grad_norm": 1.0286020040512085, "learning_rate": 2.3328313474084755e-06, "loss": 0.5318, "step": 10871 }, { "epoch": 0.69, "grad_norm": 1.0363796949386597, "learning_rate": 2.3319635751611937e-06, "loss": 0.5115, "step": 10872 }, { "epoch": 0.69, "grad_norm": 1.0309985876083374, "learning_rate": 2.3310959152556453e-06, "loss": 0.55, "step": 10873 }, { "epoch": 0.69, "grad_norm": 1.0882515907287598, "learning_rate": 2.3302283677283618e-06, "loss": 0.5182, "step": 10874 }, { "epoch": 0.69, "grad_norm": 1.0382540225982666, "learning_rate": 2.3293609326158745e-06, "loss": 0.4654, "step": 10875 }, { "epoch": 0.69, "grad_norm": 1.0700305700302124, "learning_rate": 2.328493609954707e-06, "loss": 0.5194, "step": 10876 }, { "epoch": 0.69, "grad_norm": 1.0921525955200195, "learning_rate": 2.3276263997813812e-06, "loss": 0.4882, "step": 10877 }, { "epoch": 0.69, "grad_norm": 1.1156283617019653, "learning_rate": 2.3267593021324127e-06, "loss": 0.5642, "step": 10878 }, { "epoch": 0.69, "grad_norm": 1.1570100784301758, "learning_rate": 2.3258923170443087e-06, "loss": 0.5761, "step": 10879 }, { "epoch": 0.69, "grad_norm": 0.9395455121994019, "learning_rate": 2.3250254445535743e-06, "loss": 0.4777, "step": 10880 }, { "epoch": 0.69, "grad_norm": 1.0273380279541016, "learning_rate": 2.324158684696717e-06, "loss": 0.4392, "step": 10881 }, { "epoch": 0.69, "grad_norm": 0.960543692111969, "learning_rate": 2.323292037510227e-06, "loss": 0.4258, "step": 10882 }, { "epoch": 0.69, "grad_norm": 1.1122788190841675, "learning_rate": 2.3224255030305977e-06, "loss": 0.5128, "step": 10883 }, { "epoch": 0.69, "grad_norm": 0.9551585912704468, "learning_rate": 2.321559081294316e-06, "loss": 0.4742, "step": 10884 }, { "epoch": 0.69, "grad_norm": 1.0875403881072998, "learning_rate": 2.3206927723378638e-06, "loss": 0.5332, "step": 10885 }, { "epoch": 0.69, "grad_norm": 1.0719062089920044, "learning_rate": 2.3198265761977196e-06, "loss": 0.5218, "step": 10886 }, { "epoch": 0.69, "grad_norm": 1.0475523471832275, "learning_rate": 2.3189604929103533e-06, "loss": 0.4952, "step": 10887 }, { "epoch": 0.69, "grad_norm": 1.0663193464279175, "learning_rate": 2.318094522512232e-06, "loss": 0.5228, "step": 10888 }, { "epoch": 0.69, "grad_norm": 0.953425407409668, "learning_rate": 2.3172286650398247e-06, "loss": 0.4936, "step": 10889 }, { "epoch": 0.69, "grad_norm": 1.0176100730895996, "learning_rate": 2.3163629205295833e-06, "loss": 0.4967, "step": 10890 }, { "epoch": 0.69, "grad_norm": 1.0192761421203613, "learning_rate": 2.3154972890179638e-06, "loss": 0.4981, "step": 10891 }, { "epoch": 0.69, "grad_norm": 1.119706153869629, "learning_rate": 2.3146317705414168e-06, "loss": 0.4856, "step": 10892 }, { "epoch": 0.69, "grad_norm": 1.0019359588623047, "learning_rate": 2.31376636513638e-06, "loss": 0.5136, "step": 10893 }, { "epoch": 0.69, "grad_norm": 1.0564658641815186, "learning_rate": 2.3129010728393012e-06, "loss": 0.5581, "step": 10894 }, { "epoch": 0.69, "grad_norm": 1.0997692346572876, "learning_rate": 2.3120358936866084e-06, "loss": 0.542, "step": 10895 }, { "epoch": 0.69, "grad_norm": 1.0283379554748535, "learning_rate": 2.3111708277147333e-06, "loss": 0.474, "step": 10896 }, { "epoch": 0.69, "grad_norm": 1.082440972328186, "learning_rate": 2.310305874960101e-06, "loss": 0.5198, "step": 10897 }, { "epoch": 0.69, "grad_norm": 1.0326625108718872, "learning_rate": 2.3094410354591314e-06, "loss": 0.5633, "step": 10898 }, { "epoch": 0.69, "grad_norm": 1.0904386043548584, "learning_rate": 2.30857630924824e-06, "loss": 0.5055, "step": 10899 }, { "epoch": 0.69, "grad_norm": 0.9982689619064331, "learning_rate": 2.3077116963638396e-06, "loss": 0.5241, "step": 10900 }, { "epoch": 0.69, "grad_norm": 0.984889030456543, "learning_rate": 2.3068471968423296e-06, "loss": 0.5254, "step": 10901 }, { "epoch": 0.69, "grad_norm": 1.110054612159729, "learning_rate": 2.305982810720119e-06, "loss": 0.5533, "step": 10902 }, { "epoch": 0.69, "grad_norm": 1.027411937713623, "learning_rate": 2.3051185380335995e-06, "loss": 0.488, "step": 10903 }, { "epoch": 0.69, "grad_norm": 0.9612981081008911, "learning_rate": 2.304254378819163e-06, "loss": 0.4934, "step": 10904 }, { "epoch": 0.69, "grad_norm": 1.0585256814956665, "learning_rate": 2.3033903331131986e-06, "loss": 0.5339, "step": 10905 }, { "epoch": 0.69, "grad_norm": 1.0645774602890015, "learning_rate": 2.3025264009520833e-06, "loss": 0.5331, "step": 10906 }, { "epoch": 0.69, "grad_norm": 1.0150748491287231, "learning_rate": 2.3016625823721985e-06, "loss": 0.5182, "step": 10907 }, { "epoch": 0.69, "grad_norm": 1.169596552848816, "learning_rate": 2.300798877409918e-06, "loss": 0.5295, "step": 10908 }, { "epoch": 0.69, "grad_norm": 1.067642331123352, "learning_rate": 2.2999352861016042e-06, "loss": 0.5006, "step": 10909 }, { "epoch": 0.69, "grad_norm": 1.0091102123260498, "learning_rate": 2.299071808483623e-06, "loss": 0.4797, "step": 10910 }, { "epoch": 0.69, "grad_norm": 1.0823348760604858, "learning_rate": 2.2982084445923327e-06, "loss": 0.5481, "step": 10911 }, { "epoch": 0.69, "grad_norm": 0.9775456190109253, "learning_rate": 2.297345194464086e-06, "loss": 0.5006, "step": 10912 }, { "epoch": 0.69, "grad_norm": 1.0186797380447388, "learning_rate": 2.2964820581352325e-06, "loss": 0.5257, "step": 10913 }, { "epoch": 0.69, "grad_norm": 0.9842431545257568, "learning_rate": 2.295619035642111e-06, "loss": 0.4726, "step": 10914 }, { "epoch": 0.69, "grad_norm": 1.073970079421997, "learning_rate": 2.294756127021066e-06, "loss": 0.502, "step": 10915 }, { "epoch": 0.69, "grad_norm": 1.1141387224197388, "learning_rate": 2.2938933323084315e-06, "loss": 0.5829, "step": 10916 }, { "epoch": 0.69, "grad_norm": 1.0280675888061523, "learning_rate": 2.293030651540534e-06, "loss": 0.5358, "step": 10917 }, { "epoch": 0.69, "grad_norm": 1.0070101022720337, "learning_rate": 2.2921680847536976e-06, "loss": 0.4939, "step": 10918 }, { "epoch": 0.69, "grad_norm": 1.1027600765228271, "learning_rate": 2.2913056319842436e-06, "loss": 0.5214, "step": 10919 }, { "epoch": 0.69, "grad_norm": 0.9915065765380859, "learning_rate": 2.2904432932684865e-06, "loss": 0.5011, "step": 10920 }, { "epoch": 0.69, "grad_norm": 1.006392002105713, "learning_rate": 2.289581068642737e-06, "loss": 0.4891, "step": 10921 }, { "epoch": 0.69, "grad_norm": 1.1884076595306396, "learning_rate": 2.2887189581433016e-06, "loss": 0.4954, "step": 10922 }, { "epoch": 0.69, "grad_norm": 1.1301230192184448, "learning_rate": 2.287856961806475e-06, "loss": 0.5255, "step": 10923 }, { "epoch": 0.69, "grad_norm": 1.010464072227478, "learning_rate": 2.286995079668561e-06, "loss": 0.4895, "step": 10924 }, { "epoch": 0.69, "grad_norm": 1.0081626176834106, "learning_rate": 2.2861333117658442e-06, "loss": 0.4892, "step": 10925 }, { "epoch": 0.69, "grad_norm": 0.9857581853866577, "learning_rate": 2.2852716581346124e-06, "loss": 0.5362, "step": 10926 }, { "epoch": 0.69, "grad_norm": 0.931063175201416, "learning_rate": 2.2844101188111477e-06, "loss": 0.5071, "step": 10927 }, { "epoch": 0.69, "grad_norm": 1.031646728515625, "learning_rate": 2.283548693831726e-06, "loss": 0.4879, "step": 10928 }, { "epoch": 0.69, "grad_norm": 1.063567042350769, "learning_rate": 2.2826873832326192e-06, "loss": 0.5253, "step": 10929 }, { "epoch": 0.69, "grad_norm": 1.0719285011291504, "learning_rate": 2.2818261870500954e-06, "loss": 0.4729, "step": 10930 }, { "epoch": 0.69, "grad_norm": 1.0247489213943481, "learning_rate": 2.280965105320411e-06, "loss": 0.4991, "step": 10931 }, { "epoch": 0.69, "grad_norm": 1.008853793144226, "learning_rate": 2.280104138079831e-06, "loss": 0.5526, "step": 10932 }, { "epoch": 0.69, "grad_norm": 0.9935298562049866, "learning_rate": 2.2792432853646023e-06, "loss": 0.5016, "step": 10933 }, { "epoch": 0.69, "grad_norm": 1.0684596300125122, "learning_rate": 2.2783825472109743e-06, "loss": 0.4832, "step": 10934 }, { "epoch": 0.69, "grad_norm": 1.020822525024414, "learning_rate": 2.277521923655189e-06, "loss": 0.5226, "step": 10935 }, { "epoch": 0.69, "grad_norm": 1.047221302986145, "learning_rate": 2.276661414733485e-06, "loss": 0.51, "step": 10936 }, { "epoch": 0.69, "grad_norm": 1.0854012966156006, "learning_rate": 2.2758010204820945e-06, "loss": 0.5334, "step": 10937 }, { "epoch": 0.69, "grad_norm": 1.0282018184661865, "learning_rate": 2.2749407409372487e-06, "loss": 0.5203, "step": 10938 }, { "epoch": 0.69, "grad_norm": 1.0249234437942505, "learning_rate": 2.2740805761351664e-06, "loss": 0.4742, "step": 10939 }, { "epoch": 0.69, "grad_norm": 1.078487515449524, "learning_rate": 2.273220526112068e-06, "loss": 0.5067, "step": 10940 }, { "epoch": 0.69, "grad_norm": 1.024756669998169, "learning_rate": 2.272360590904168e-06, "loss": 0.5095, "step": 10941 }, { "epoch": 0.69, "grad_norm": 1.0135009288787842, "learning_rate": 2.2715007705476744e-06, "loss": 0.5056, "step": 10942 }, { "epoch": 0.69, "grad_norm": 1.0788309574127197, "learning_rate": 2.2706410650787937e-06, "loss": 0.5284, "step": 10943 }, { "epoch": 0.69, "grad_norm": 0.9614673852920532, "learning_rate": 2.2697814745337186e-06, "loss": 0.4832, "step": 10944 }, { "epoch": 0.69, "grad_norm": 0.9808701872825623, "learning_rate": 2.2689219989486506e-06, "loss": 0.5214, "step": 10945 }, { "epoch": 0.69, "grad_norm": 1.163304328918457, "learning_rate": 2.2680626383597782e-06, "loss": 0.5577, "step": 10946 }, { "epoch": 0.69, "grad_norm": 0.9920132756233215, "learning_rate": 2.267203392803282e-06, "loss": 0.477, "step": 10947 }, { "epoch": 0.69, "grad_norm": 1.0092310905456543, "learning_rate": 2.266344262315345e-06, "loss": 0.5237, "step": 10948 }, { "epoch": 0.69, "grad_norm": 1.155785083770752, "learning_rate": 2.2654852469321405e-06, "loss": 0.5016, "step": 10949 }, { "epoch": 0.69, "grad_norm": 1.1730620861053467, "learning_rate": 2.26462634668984e-06, "loss": 0.5936, "step": 10950 }, { "epoch": 0.69, "grad_norm": 0.9889048337936401, "learning_rate": 2.2637675616246103e-06, "loss": 0.5242, "step": 10951 }, { "epoch": 0.69, "grad_norm": 1.0237679481506348, "learning_rate": 2.262908891772608e-06, "loss": 0.5823, "step": 10952 }, { "epoch": 0.69, "grad_norm": 1.0033601522445679, "learning_rate": 2.2620503371699886e-06, "loss": 0.5212, "step": 10953 }, { "epoch": 0.69, "grad_norm": 1.0933045148849487, "learning_rate": 2.261191897852909e-06, "loss": 0.4784, "step": 10954 }, { "epoch": 0.69, "grad_norm": 1.0246044397354126, "learning_rate": 2.260333573857509e-06, "loss": 0.5272, "step": 10955 }, { "epoch": 0.69, "grad_norm": 1.0688954591751099, "learning_rate": 2.2594753652199313e-06, "loss": 0.5285, "step": 10956 }, { "epoch": 0.69, "grad_norm": 1.0384154319763184, "learning_rate": 2.2586172719763126e-06, "loss": 0.487, "step": 10957 }, { "epoch": 0.69, "grad_norm": 1.0645639896392822, "learning_rate": 2.2577592941627842e-06, "loss": 0.5067, "step": 10958 }, { "epoch": 0.69, "grad_norm": 1.0228403806686401, "learning_rate": 2.2569014318154735e-06, "loss": 0.498, "step": 10959 }, { "epoch": 0.69, "grad_norm": 1.2611366510391235, "learning_rate": 2.2560436849704996e-06, "loss": 0.5444, "step": 10960 }, { "epoch": 0.69, "grad_norm": 1.0925045013427734, "learning_rate": 2.255186053663979e-06, "loss": 0.5394, "step": 10961 }, { "epoch": 0.69, "grad_norm": 0.9899227023124695, "learning_rate": 2.2543285379320283e-06, "loss": 0.4803, "step": 10962 }, { "epoch": 0.69, "grad_norm": 1.0411585569381714, "learning_rate": 2.2534711378107498e-06, "loss": 0.5004, "step": 10963 }, { "epoch": 0.69, "grad_norm": 1.0692851543426514, "learning_rate": 2.2526138533362475e-06, "loss": 0.4789, "step": 10964 }, { "epoch": 0.69, "grad_norm": 1.170039415359497, "learning_rate": 2.2517566845446182e-06, "loss": 0.5275, "step": 10965 }, { "epoch": 0.69, "grad_norm": 1.0667608976364136, "learning_rate": 2.2508996314719544e-06, "loss": 0.4947, "step": 10966 }, { "epoch": 0.69, "grad_norm": 1.0293246507644653, "learning_rate": 2.250042694154345e-06, "loss": 0.4829, "step": 10967 }, { "epoch": 0.69, "grad_norm": 1.0149614810943604, "learning_rate": 2.2491858726278704e-06, "loss": 0.4615, "step": 10968 }, { "epoch": 0.69, "grad_norm": 1.046410322189331, "learning_rate": 2.248329166928609e-06, "loss": 0.5281, "step": 10969 }, { "epoch": 0.69, "grad_norm": 1.2291126251220703, "learning_rate": 2.2474725770926337e-06, "loss": 0.4785, "step": 10970 }, { "epoch": 0.7, "grad_norm": 1.1178964376449585, "learning_rate": 2.2466161031560136e-06, "loss": 0.5346, "step": 10971 }, { "epoch": 0.7, "grad_norm": 1.0019038915634155, "learning_rate": 2.2457597451548102e-06, "loss": 0.4555, "step": 10972 }, { "epoch": 0.7, "grad_norm": 1.0246751308441162, "learning_rate": 2.2449035031250847e-06, "loss": 0.5029, "step": 10973 }, { "epoch": 0.7, "grad_norm": 1.0310834646224976, "learning_rate": 2.2440473771028855e-06, "loss": 0.5217, "step": 10974 }, { "epoch": 0.7, "grad_norm": 1.0542534589767456, "learning_rate": 2.2431913671242666e-06, "loss": 0.5378, "step": 10975 }, { "epoch": 0.7, "grad_norm": 1.0208415985107422, "learning_rate": 2.242335473225268e-06, "loss": 0.4924, "step": 10976 }, { "epoch": 0.7, "grad_norm": 1.0091476440429688, "learning_rate": 2.2414796954419286e-06, "loss": 0.49, "step": 10977 }, { "epoch": 0.7, "grad_norm": 0.9918937683105469, "learning_rate": 2.2406240338102836e-06, "loss": 0.4849, "step": 10978 }, { "epoch": 0.7, "grad_norm": 1.0851179361343384, "learning_rate": 2.239768488366361e-06, "loss": 0.5236, "step": 10979 }, { "epoch": 0.7, "grad_norm": 1.0405689477920532, "learning_rate": 2.2389130591461855e-06, "loss": 0.5159, "step": 10980 }, { "epoch": 0.7, "grad_norm": 1.0542181730270386, "learning_rate": 2.2380577461857777e-06, "loss": 0.5225, "step": 10981 }, { "epoch": 0.7, "grad_norm": 1.0488357543945312, "learning_rate": 2.2372025495211465e-06, "loss": 0.5331, "step": 10982 }, { "epoch": 0.7, "grad_norm": 1.0630464553833008, "learning_rate": 2.236347469188308e-06, "loss": 0.5343, "step": 10983 }, { "epoch": 0.7, "grad_norm": 1.0251017808914185, "learning_rate": 2.2354925052232625e-06, "loss": 0.4954, "step": 10984 }, { "epoch": 0.7, "grad_norm": 1.037653923034668, "learning_rate": 2.2346376576620103e-06, "loss": 0.4754, "step": 10985 }, { "epoch": 0.7, "grad_norm": 1.1310943365097046, "learning_rate": 2.2337829265405466e-06, "loss": 0.5137, "step": 10986 }, { "epoch": 0.7, "grad_norm": 1.0841907262802124, "learning_rate": 2.2329283118948604e-06, "loss": 0.5063, "step": 10987 }, { "epoch": 0.7, "grad_norm": 0.9623340964317322, "learning_rate": 2.232073813760937e-06, "loss": 0.5351, "step": 10988 }, { "epoch": 0.7, "grad_norm": 1.03236722946167, "learning_rate": 2.2312194321747582e-06, "loss": 0.4978, "step": 10989 }, { "epoch": 0.7, "grad_norm": 1.069541096687317, "learning_rate": 2.230365167172296e-06, "loss": 0.5143, "step": 10990 }, { "epoch": 0.7, "grad_norm": 1.1878217458724976, "learning_rate": 2.2295110187895215e-06, "loss": 0.4877, "step": 10991 }, { "epoch": 0.7, "grad_norm": 0.984520435333252, "learning_rate": 2.2286569870624e-06, "loss": 0.5278, "step": 10992 }, { "epoch": 0.7, "grad_norm": 1.0635064840316772, "learning_rate": 2.227803072026892e-06, "loss": 0.4926, "step": 10993 }, { "epoch": 0.7, "grad_norm": 1.092267632484436, "learning_rate": 2.226949273718953e-06, "loss": 0.5379, "step": 10994 }, { "epoch": 0.7, "grad_norm": 1.0395183563232422, "learning_rate": 2.226095592174533e-06, "loss": 0.5277, "step": 10995 }, { "epoch": 0.7, "grad_norm": 0.9301552176475525, "learning_rate": 2.2252420274295782e-06, "loss": 0.4542, "step": 10996 }, { "epoch": 0.7, "grad_norm": 0.9266456961631775, "learning_rate": 2.224388579520031e-06, "loss": 0.4951, "step": 10997 }, { "epoch": 0.7, "grad_norm": 1.0810495615005493, "learning_rate": 2.2235352484818228e-06, "loss": 0.5257, "step": 10998 }, { "epoch": 0.7, "grad_norm": 1.0020378828048706, "learning_rate": 2.222682034350887e-06, "loss": 0.5075, "step": 10999 }, { "epoch": 0.7, "grad_norm": 1.0670814514160156, "learning_rate": 2.221828937163149e-06, "loss": 0.5337, "step": 11000 }, { "epoch": 0.7, "grad_norm": 1.0369603633880615, "learning_rate": 2.22097595695453e-06, "loss": 0.4782, "step": 11001 }, { "epoch": 0.7, "grad_norm": 1.049034833908081, "learning_rate": 2.220123093760946e-06, "loss": 0.5295, "step": 11002 }, { "epoch": 0.7, "grad_norm": 1.1661826372146606, "learning_rate": 2.2192703476183093e-06, "loss": 0.4902, "step": 11003 }, { "epoch": 0.7, "grad_norm": 1.1276367902755737, "learning_rate": 2.2184177185625217e-06, "loss": 0.5505, "step": 11004 }, { "epoch": 0.7, "grad_norm": 1.0560840368270874, "learning_rate": 2.217565206629491e-06, "loss": 0.498, "step": 11005 }, { "epoch": 0.7, "grad_norm": 1.0290144681930542, "learning_rate": 2.2167128118551084e-06, "loss": 0.5105, "step": 11006 }, { "epoch": 0.7, "grad_norm": 1.0535063743591309, "learning_rate": 2.2158605342752667e-06, "loss": 0.5385, "step": 11007 }, { "epoch": 0.7, "grad_norm": 1.0331565141677856, "learning_rate": 2.2150083739258525e-06, "loss": 0.4536, "step": 11008 }, { "epoch": 0.7, "grad_norm": 0.9949620962142944, "learning_rate": 2.214156330842748e-06, "loss": 0.4917, "step": 11009 }, { "epoch": 0.7, "grad_norm": 0.9716442227363586, "learning_rate": 2.2133044050618286e-06, "loss": 0.5163, "step": 11010 }, { "epoch": 0.7, "grad_norm": 1.0355647802352905, "learning_rate": 2.2124525966189685e-06, "loss": 0.4759, "step": 11011 }, { "epoch": 0.7, "grad_norm": 1.0520069599151611, "learning_rate": 2.211600905550029e-06, "loss": 0.4828, "step": 11012 }, { "epoch": 0.7, "grad_norm": 1.0424782037734985, "learning_rate": 2.2107493318908785e-06, "loss": 0.5187, "step": 11013 }, { "epoch": 0.7, "grad_norm": 0.9450308084487915, "learning_rate": 2.2098978756773687e-06, "loss": 0.4999, "step": 11014 }, { "epoch": 0.7, "grad_norm": 1.1031875610351562, "learning_rate": 2.2090465369453533e-06, "loss": 0.5441, "step": 11015 }, { "epoch": 0.7, "grad_norm": 0.9856180548667908, "learning_rate": 2.208195315730681e-06, "loss": 0.5022, "step": 11016 }, { "epoch": 0.7, "grad_norm": 1.1115809679031372, "learning_rate": 2.207344212069189e-06, "loss": 0.5499, "step": 11017 }, { "epoch": 0.7, "grad_norm": 1.0844727754592896, "learning_rate": 2.2064932259967188e-06, "loss": 0.5283, "step": 11018 }, { "epoch": 0.7, "grad_norm": 1.0206760168075562, "learning_rate": 2.2056423575491026e-06, "loss": 0.538, "step": 11019 }, { "epoch": 0.7, "grad_norm": 1.0844743251800537, "learning_rate": 2.204791606762164e-06, "loss": 0.5307, "step": 11020 }, { "epoch": 0.7, "grad_norm": 1.080522894859314, "learning_rate": 2.2039409736717273e-06, "loss": 0.5078, "step": 11021 }, { "epoch": 0.7, "grad_norm": 0.9728801250457764, "learning_rate": 2.2030904583136085e-06, "loss": 0.5093, "step": 11022 }, { "epoch": 0.7, "grad_norm": 1.0371694564819336, "learning_rate": 2.2022400607236214e-06, "loss": 0.5241, "step": 11023 }, { "epoch": 0.7, "grad_norm": 0.995800793170929, "learning_rate": 2.2013897809375753e-06, "loss": 0.5391, "step": 11024 }, { "epoch": 0.7, "grad_norm": 1.137416124343872, "learning_rate": 2.2005396189912647e-06, "loss": 0.4828, "step": 11025 }, { "epoch": 0.7, "grad_norm": 0.9752306938171387, "learning_rate": 2.199689574920495e-06, "loss": 0.4923, "step": 11026 }, { "epoch": 0.7, "grad_norm": 1.0050386190414429, "learning_rate": 2.198839648761057e-06, "loss": 0.5086, "step": 11027 }, { "epoch": 0.7, "grad_norm": 0.9973408579826355, "learning_rate": 2.1979898405487354e-06, "loss": 0.4993, "step": 11028 }, { "epoch": 0.7, "grad_norm": 1.0028589963912964, "learning_rate": 2.197140150319314e-06, "loss": 0.4895, "step": 11029 }, { "epoch": 0.7, "grad_norm": 1.0483782291412354, "learning_rate": 2.19629057810857e-06, "loss": 0.5351, "step": 11030 }, { "epoch": 0.7, "grad_norm": 1.0445411205291748, "learning_rate": 2.195441123952277e-06, "loss": 0.5307, "step": 11031 }, { "epoch": 0.7, "grad_norm": 1.0868538618087769, "learning_rate": 2.1945917878862037e-06, "loss": 0.4981, "step": 11032 }, { "epoch": 0.7, "grad_norm": 1.014582872390747, "learning_rate": 2.193742569946109e-06, "loss": 0.5276, "step": 11033 }, { "epoch": 0.7, "grad_norm": 0.9631657004356384, "learning_rate": 2.1928934701677507e-06, "loss": 0.4964, "step": 11034 }, { "epoch": 0.7, "grad_norm": 1.0068409442901611, "learning_rate": 2.1920444885868862e-06, "loss": 0.4928, "step": 11035 }, { "epoch": 0.7, "grad_norm": 1.0212610960006714, "learning_rate": 2.1911956252392593e-06, "loss": 0.4969, "step": 11036 }, { "epoch": 0.7, "grad_norm": 1.0570365190505981, "learning_rate": 2.1903468801606125e-06, "loss": 0.5042, "step": 11037 }, { "epoch": 0.7, "grad_norm": 1.0584505796432495, "learning_rate": 2.1894982533866852e-06, "loss": 0.5255, "step": 11038 }, { "epoch": 0.7, "grad_norm": 1.0139025449752808, "learning_rate": 2.188649744953209e-06, "loss": 0.5084, "step": 11039 }, { "epoch": 0.7, "grad_norm": 1.0854088068008423, "learning_rate": 2.1878013548959145e-06, "loss": 0.5195, "step": 11040 }, { "epoch": 0.7, "grad_norm": 1.0749729871749878, "learning_rate": 2.186953083250519e-06, "loss": 0.5583, "step": 11041 }, { "epoch": 0.7, "grad_norm": 1.0553749799728394, "learning_rate": 2.1861049300527426e-06, "loss": 0.5038, "step": 11042 }, { "epoch": 0.7, "grad_norm": 1.1561241149902344, "learning_rate": 2.1852568953383025e-06, "loss": 0.5258, "step": 11043 }, { "epoch": 0.7, "grad_norm": 1.0235668420791626, "learning_rate": 2.1844089791429002e-06, "loss": 0.4742, "step": 11044 }, { "epoch": 0.7, "grad_norm": 1.0335506200790405, "learning_rate": 2.1835611815022412e-06, "loss": 0.5198, "step": 11045 }, { "epoch": 0.7, "grad_norm": 1.0722906589508057, "learning_rate": 2.182713502452025e-06, "loss": 0.4756, "step": 11046 }, { "epoch": 0.7, "grad_norm": 1.0140724182128906, "learning_rate": 2.181865942027939e-06, "loss": 0.524, "step": 11047 }, { "epoch": 0.7, "grad_norm": 1.1023540496826172, "learning_rate": 2.181018500265679e-06, "loss": 0.5404, "step": 11048 }, { "epoch": 0.7, "grad_norm": 0.9999150633811951, "learning_rate": 2.1801711772009203e-06, "loss": 0.4834, "step": 11049 }, { "epoch": 0.7, "grad_norm": 1.0206165313720703, "learning_rate": 2.179323972869345e-06, "loss": 0.5255, "step": 11050 }, { "epoch": 0.7, "grad_norm": 1.0228021144866943, "learning_rate": 2.1784768873066243e-06, "loss": 0.5006, "step": 11051 }, { "epoch": 0.7, "grad_norm": 1.073330283164978, "learning_rate": 2.1776299205484265e-06, "loss": 0.481, "step": 11052 }, { "epoch": 0.7, "grad_norm": 1.0602368116378784, "learning_rate": 2.176783072630414e-06, "loss": 0.4962, "step": 11053 }, { "epoch": 0.7, "grad_norm": 1.0899503231048584, "learning_rate": 2.1759363435882475e-06, "loss": 0.5091, "step": 11054 }, { "epoch": 0.7, "grad_norm": 0.9894519448280334, "learning_rate": 2.1750897334575736e-06, "loss": 0.5073, "step": 11055 }, { "epoch": 0.7, "grad_norm": 0.984174370765686, "learning_rate": 2.174243242274047e-06, "loss": 0.4952, "step": 11056 }, { "epoch": 0.7, "grad_norm": 1.1872106790542603, "learning_rate": 2.1733968700733066e-06, "loss": 0.5355, "step": 11057 }, { "epoch": 0.7, "grad_norm": 1.1067372560501099, "learning_rate": 2.1725506168909903e-06, "loss": 0.5262, "step": 11058 }, { "epoch": 0.7, "grad_norm": 1.0191781520843506, "learning_rate": 2.1717044827627314e-06, "loss": 0.4672, "step": 11059 }, { "epoch": 0.7, "grad_norm": 1.087689995765686, "learning_rate": 2.1708584677241586e-06, "loss": 0.5683, "step": 11060 }, { "epoch": 0.7, "grad_norm": 1.0050077438354492, "learning_rate": 2.170012571810893e-06, "loss": 0.4704, "step": 11061 }, { "epoch": 0.7, "grad_norm": 1.0539138317108154, "learning_rate": 2.1691667950585552e-06, "loss": 0.4936, "step": 11062 }, { "epoch": 0.7, "grad_norm": 1.0413488149642944, "learning_rate": 2.1683211375027543e-06, "loss": 0.5392, "step": 11063 }, { "epoch": 0.7, "grad_norm": 1.0355830192565918, "learning_rate": 2.1674755991790976e-06, "loss": 0.5305, "step": 11064 }, { "epoch": 0.7, "grad_norm": 1.0300675630569458, "learning_rate": 2.1666301801231937e-06, "loss": 0.5073, "step": 11065 }, { "epoch": 0.7, "grad_norm": 1.0976272821426392, "learning_rate": 2.1657848803706344e-06, "loss": 0.5136, "step": 11066 }, { "epoch": 0.7, "grad_norm": 0.9987136125564575, "learning_rate": 2.1649396999570137e-06, "loss": 0.4915, "step": 11067 }, { "epoch": 0.7, "grad_norm": 0.9640436172485352, "learning_rate": 2.1640946389179207e-06, "loss": 0.4741, "step": 11068 }, { "epoch": 0.7, "grad_norm": 1.044983148574829, "learning_rate": 2.1632496972889366e-06, "loss": 0.4902, "step": 11069 }, { "epoch": 0.7, "grad_norm": 1.0915316343307495, "learning_rate": 2.162404875105641e-06, "loss": 0.5721, "step": 11070 }, { "epoch": 0.7, "grad_norm": 1.0868504047393799, "learning_rate": 2.1615601724036033e-06, "loss": 0.5099, "step": 11071 }, { "epoch": 0.7, "grad_norm": 1.0221885442733765, "learning_rate": 2.1607155892183905e-06, "loss": 0.4969, "step": 11072 }, { "epoch": 0.7, "grad_norm": 0.9462901949882507, "learning_rate": 2.1598711255855713e-06, "loss": 0.4853, "step": 11073 }, { "epoch": 0.7, "grad_norm": 1.0767602920532227, "learning_rate": 2.1590267815406968e-06, "loss": 0.5303, "step": 11074 }, { "epoch": 0.7, "grad_norm": 1.1433262825012207, "learning_rate": 2.1581825571193216e-06, "loss": 0.5026, "step": 11075 }, { "epoch": 0.7, "grad_norm": 0.9912518858909607, "learning_rate": 2.1573384523569945e-06, "loss": 0.5184, "step": 11076 }, { "epoch": 0.7, "grad_norm": 1.1095296144485474, "learning_rate": 2.1564944672892524e-06, "loss": 0.4929, "step": 11077 }, { "epoch": 0.7, "grad_norm": 1.0347744226455688, "learning_rate": 2.1556506019516405e-06, "loss": 0.4954, "step": 11078 }, { "epoch": 0.7, "grad_norm": 0.9864851236343384, "learning_rate": 2.1548068563796855e-06, "loss": 0.4837, "step": 11079 }, { "epoch": 0.7, "grad_norm": 0.9941489100456238, "learning_rate": 2.1539632306089153e-06, "loss": 0.4784, "step": 11080 }, { "epoch": 0.7, "grad_norm": 1.0469934940338135, "learning_rate": 2.153119724674853e-06, "loss": 0.5082, "step": 11081 }, { "epoch": 0.7, "grad_norm": 1.0488927364349365, "learning_rate": 2.1522763386130156e-06, "loss": 0.5143, "step": 11082 }, { "epoch": 0.7, "grad_norm": 1.0068544149398804, "learning_rate": 2.1514330724589156e-06, "loss": 0.5018, "step": 11083 }, { "epoch": 0.7, "grad_norm": 1.047257423400879, "learning_rate": 2.1505899262480607e-06, "loss": 0.5603, "step": 11084 }, { "epoch": 0.7, "grad_norm": 1.1192617416381836, "learning_rate": 2.149746900015948e-06, "loss": 0.5434, "step": 11085 }, { "epoch": 0.7, "grad_norm": 1.0133657455444336, "learning_rate": 2.148903993798082e-06, "loss": 0.4893, "step": 11086 }, { "epoch": 0.7, "grad_norm": 1.0449867248535156, "learning_rate": 2.148061207629949e-06, "loss": 0.5281, "step": 11087 }, { "epoch": 0.7, "grad_norm": 1.0773366689682007, "learning_rate": 2.1472185415470365e-06, "loss": 0.4871, "step": 11088 }, { "epoch": 0.7, "grad_norm": 1.1084142923355103, "learning_rate": 2.1463759955848277e-06, "loss": 0.4858, "step": 11089 }, { "epoch": 0.7, "grad_norm": 1.022783875465393, "learning_rate": 2.1455335697787987e-06, "loss": 0.5446, "step": 11090 }, { "epoch": 0.7, "grad_norm": 1.0690456628799438, "learning_rate": 2.1446912641644206e-06, "loss": 0.5205, "step": 11091 }, { "epoch": 0.7, "grad_norm": 1.0429967641830444, "learning_rate": 2.1438490787771634e-06, "loss": 0.5059, "step": 11092 }, { "epoch": 0.7, "grad_norm": 1.024294137954712, "learning_rate": 2.1430070136524826e-06, "loss": 0.4773, "step": 11093 }, { "epoch": 0.7, "grad_norm": 1.056626796722412, "learning_rate": 2.1421650688258384e-06, "loss": 0.4814, "step": 11094 }, { "epoch": 0.7, "grad_norm": 1.05207097530365, "learning_rate": 2.1413232443326813e-06, "loss": 0.5463, "step": 11095 }, { "epoch": 0.7, "grad_norm": 1.137169599533081, "learning_rate": 2.140481540208458e-06, "loss": 0.5285, "step": 11096 }, { "epoch": 0.7, "grad_norm": 0.9908720254898071, "learning_rate": 2.1396399564886113e-06, "loss": 0.4938, "step": 11097 }, { "epoch": 0.7, "grad_norm": 1.0105817317962646, "learning_rate": 2.1387984932085714e-06, "loss": 0.4875, "step": 11098 }, { "epoch": 0.7, "grad_norm": 1.1183953285217285, "learning_rate": 2.1379571504037754e-06, "loss": 0.5309, "step": 11099 }, { "epoch": 0.7, "grad_norm": 1.161294937133789, "learning_rate": 2.1371159281096497e-06, "loss": 0.4929, "step": 11100 }, { "epoch": 0.7, "grad_norm": 1.05838143825531, "learning_rate": 2.1362748263616112e-06, "loss": 0.5582, "step": 11101 }, { "epoch": 0.7, "grad_norm": 1.0622502565383911, "learning_rate": 2.1354338451950774e-06, "loss": 0.4505, "step": 11102 }, { "epoch": 0.7, "grad_norm": 1.0257731676101685, "learning_rate": 2.1345929846454593e-06, "loss": 0.5022, "step": 11103 }, { "epoch": 0.7, "grad_norm": 1.1054282188415527, "learning_rate": 2.133752244748163e-06, "loss": 0.5233, "step": 11104 }, { "epoch": 0.7, "grad_norm": 1.0054208040237427, "learning_rate": 2.1329116255385902e-06, "loss": 0.4959, "step": 11105 }, { "epoch": 0.7, "grad_norm": 1.0392478704452515, "learning_rate": 2.132071127052131e-06, "loss": 0.5656, "step": 11106 }, { "epoch": 0.7, "grad_norm": 1.0151344537734985, "learning_rate": 2.1312307493241825e-06, "loss": 0.484, "step": 11107 }, { "epoch": 0.7, "grad_norm": 0.995629608631134, "learning_rate": 2.1303904923901288e-06, "loss": 0.5096, "step": 11108 }, { "epoch": 0.7, "grad_norm": 1.0044054985046387, "learning_rate": 2.1295503562853466e-06, "loss": 0.5092, "step": 11109 }, { "epoch": 0.7, "grad_norm": 1.0939685106277466, "learning_rate": 2.1287103410452135e-06, "loss": 0.5067, "step": 11110 }, { "epoch": 0.7, "grad_norm": 1.074385166168213, "learning_rate": 2.1278704467050996e-06, "loss": 0.5428, "step": 11111 }, { "epoch": 0.7, "grad_norm": 0.9988450407981873, "learning_rate": 2.1270306733003697e-06, "loss": 0.4826, "step": 11112 }, { "epoch": 0.7, "grad_norm": 1.10425865650177, "learning_rate": 2.126191020866386e-06, "loss": 0.5378, "step": 11113 }, { "epoch": 0.7, "grad_norm": 1.0218243598937988, "learning_rate": 2.125351489438499e-06, "loss": 0.502, "step": 11114 }, { "epoch": 0.7, "grad_norm": 1.021587610244751, "learning_rate": 2.124512079052059e-06, "loss": 0.4711, "step": 11115 }, { "epoch": 0.7, "grad_norm": 0.9821772575378418, "learning_rate": 2.123672789742416e-06, "loss": 0.513, "step": 11116 }, { "epoch": 0.7, "grad_norm": 1.0624462366104126, "learning_rate": 2.1228336215449036e-06, "loss": 0.5217, "step": 11117 }, { "epoch": 0.7, "grad_norm": 1.0033706426620483, "learning_rate": 2.1219945744948584e-06, "loss": 0.5026, "step": 11118 }, { "epoch": 0.7, "grad_norm": 1.0093096494674683, "learning_rate": 2.12115564862761e-06, "loss": 0.4732, "step": 11119 }, { "epoch": 0.7, "grad_norm": 1.0732694864273071, "learning_rate": 2.1203168439784828e-06, "loss": 0.5009, "step": 11120 }, { "epoch": 0.7, "grad_norm": 1.037276029586792, "learning_rate": 2.119478160582797e-06, "loss": 0.5289, "step": 11121 }, { "epoch": 0.7, "grad_norm": 1.017081379890442, "learning_rate": 2.1186395984758633e-06, "loss": 0.4603, "step": 11122 }, { "epoch": 0.7, "grad_norm": 1.0212736129760742, "learning_rate": 2.117801157692993e-06, "loss": 0.5097, "step": 11123 }, { "epoch": 0.7, "grad_norm": 1.090965986251831, "learning_rate": 2.1169628382694894e-06, "loss": 0.5066, "step": 11124 }, { "epoch": 0.7, "grad_norm": 1.0188883543014526, "learning_rate": 2.1161246402406518e-06, "loss": 0.4921, "step": 11125 }, { "epoch": 0.7, "grad_norm": 1.0649949312210083, "learning_rate": 2.1152865636417723e-06, "loss": 0.4927, "step": 11126 }, { "epoch": 0.7, "grad_norm": 1.0409399271011353, "learning_rate": 2.114448608508143e-06, "loss": 0.4848, "step": 11127 }, { "epoch": 0.71, "grad_norm": 1.0882501602172852, "learning_rate": 2.113610774875041e-06, "loss": 0.4795, "step": 11128 }, { "epoch": 0.71, "grad_norm": 1.0318166017532349, "learning_rate": 2.1127730627777497e-06, "loss": 0.4837, "step": 11129 }, { "epoch": 0.71, "grad_norm": 1.11487877368927, "learning_rate": 2.111935472251543e-06, "loss": 0.5097, "step": 11130 }, { "epoch": 0.71, "grad_norm": 1.081142544746399, "learning_rate": 2.1110980033316846e-06, "loss": 0.505, "step": 11131 }, { "epoch": 0.71, "grad_norm": 1.092267632484436, "learning_rate": 2.1102606560534393e-06, "loss": 0.5276, "step": 11132 }, { "epoch": 0.71, "grad_norm": 1.138821005821228, "learning_rate": 2.1094234304520655e-06, "loss": 0.5229, "step": 11133 }, { "epoch": 0.71, "grad_norm": 1.0012593269348145, "learning_rate": 2.108586326562816e-06, "loss": 0.5231, "step": 11134 }, { "epoch": 0.71, "grad_norm": 1.0540804862976074, "learning_rate": 2.1077493444209385e-06, "loss": 0.543, "step": 11135 }, { "epoch": 0.71, "grad_norm": 1.1856242418289185, "learning_rate": 2.1069124840616717e-06, "loss": 0.5298, "step": 11136 }, { "epoch": 0.71, "grad_norm": 1.0341116189956665, "learning_rate": 2.1060757455202574e-06, "loss": 0.495, "step": 11137 }, { "epoch": 0.71, "grad_norm": 1.091784954071045, "learning_rate": 2.1052391288319285e-06, "loss": 0.5274, "step": 11138 }, { "epoch": 0.71, "grad_norm": 1.112533450126648, "learning_rate": 2.1044026340319075e-06, "loss": 0.4736, "step": 11139 }, { "epoch": 0.71, "grad_norm": 1.0528075695037842, "learning_rate": 2.1035662611554187e-06, "loss": 0.5098, "step": 11140 }, { "epoch": 0.71, "grad_norm": 1.0677040815353394, "learning_rate": 2.1027300102376787e-06, "loss": 0.5021, "step": 11141 }, { "epoch": 0.71, "grad_norm": 1.055909514427185, "learning_rate": 2.101893881313899e-06, "loss": 0.5107, "step": 11142 }, { "epoch": 0.71, "grad_norm": 1.1071017980575562, "learning_rate": 2.1010578744192885e-06, "loss": 0.5404, "step": 11143 }, { "epoch": 0.71, "grad_norm": 1.079486608505249, "learning_rate": 2.1002219895890435e-06, "loss": 0.5516, "step": 11144 }, { "epoch": 0.71, "grad_norm": 1.0691206455230713, "learning_rate": 2.099386226858362e-06, "loss": 0.518, "step": 11145 }, { "epoch": 0.71, "grad_norm": 0.9732725620269775, "learning_rate": 2.098550586262439e-06, "loss": 0.5213, "step": 11146 }, { "epoch": 0.71, "grad_norm": 1.065765380859375, "learning_rate": 2.097715067836456e-06, "loss": 0.4892, "step": 11147 }, { "epoch": 0.71, "grad_norm": 1.1149744987487793, "learning_rate": 2.096879671615595e-06, "loss": 0.5408, "step": 11148 }, { "epoch": 0.71, "grad_norm": 1.0927714109420776, "learning_rate": 2.0960443976350315e-06, "loss": 0.5461, "step": 11149 }, { "epoch": 0.71, "grad_norm": 1.0623589754104614, "learning_rate": 2.0952092459299366e-06, "loss": 0.5559, "step": 11150 }, { "epoch": 0.71, "grad_norm": 1.0234901905059814, "learning_rate": 2.0943742165354776e-06, "loss": 0.484, "step": 11151 }, { "epoch": 0.71, "grad_norm": 1.0182840824127197, "learning_rate": 2.0935393094868094e-06, "loss": 0.4995, "step": 11152 }, { "epoch": 0.71, "grad_norm": 1.0194854736328125, "learning_rate": 2.092704524819089e-06, "loss": 0.466, "step": 11153 }, { "epoch": 0.71, "grad_norm": 1.0558594465255737, "learning_rate": 2.091869862567471e-06, "loss": 0.5392, "step": 11154 }, { "epoch": 0.71, "grad_norm": 1.046743392944336, "learning_rate": 2.091035322767095e-06, "loss": 0.5264, "step": 11155 }, { "epoch": 0.71, "grad_norm": 1.071263074874878, "learning_rate": 2.0902009054531013e-06, "loss": 0.5353, "step": 11156 }, { "epoch": 0.71, "grad_norm": 1.16365385055542, "learning_rate": 2.089366610660627e-06, "loss": 0.5367, "step": 11157 }, { "epoch": 0.71, "grad_norm": 1.0887624025344849, "learning_rate": 2.0885324384247956e-06, "loss": 0.5095, "step": 11158 }, { "epoch": 0.71, "grad_norm": 0.9924121499061584, "learning_rate": 2.087698388780739e-06, "loss": 0.4924, "step": 11159 }, { "epoch": 0.71, "grad_norm": 1.0082294940948486, "learning_rate": 2.0868644617635697e-06, "loss": 0.4927, "step": 11160 }, { "epoch": 0.71, "grad_norm": 1.0977580547332764, "learning_rate": 2.0860306574084043e-06, "loss": 0.4932, "step": 11161 }, { "epoch": 0.71, "grad_norm": 1.0345255136489868, "learning_rate": 2.085196975750351e-06, "loss": 0.5063, "step": 11162 }, { "epoch": 0.71, "grad_norm": 1.0377495288848877, "learning_rate": 2.084363416824513e-06, "loss": 0.4963, "step": 11163 }, { "epoch": 0.71, "grad_norm": 0.968866765499115, "learning_rate": 2.0835299806659885e-06, "loss": 0.4895, "step": 11164 }, { "epoch": 0.71, "grad_norm": 1.0815399885177612, "learning_rate": 2.0826966673098737e-06, "loss": 0.5468, "step": 11165 }, { "epoch": 0.71, "grad_norm": 1.0565381050109863, "learning_rate": 2.0818634767912495e-06, "loss": 0.5364, "step": 11166 }, { "epoch": 0.71, "grad_norm": 1.0638211965560913, "learning_rate": 2.081030409145206e-06, "loss": 0.5149, "step": 11167 }, { "epoch": 0.71, "grad_norm": 1.0905492305755615, "learning_rate": 2.080197464406816e-06, "loss": 0.525, "step": 11168 }, { "epoch": 0.71, "grad_norm": 0.9871543645858765, "learning_rate": 2.0793646426111536e-06, "loss": 0.4742, "step": 11169 }, { "epoch": 0.71, "grad_norm": 1.0492466688156128, "learning_rate": 2.078531943793288e-06, "loss": 0.5319, "step": 11170 }, { "epoch": 0.71, "grad_norm": 1.1168328523635864, "learning_rate": 2.0776993679882752e-06, "loss": 0.4988, "step": 11171 }, { "epoch": 0.71, "grad_norm": 0.9689909815788269, "learning_rate": 2.076866915231178e-06, "loss": 0.5064, "step": 11172 }, { "epoch": 0.71, "grad_norm": 1.1330360174179077, "learning_rate": 2.076034585557048e-06, "loss": 0.5113, "step": 11173 }, { "epoch": 0.71, "grad_norm": 1.1285103559494019, "learning_rate": 2.075202379000928e-06, "loss": 0.4873, "step": 11174 }, { "epoch": 0.71, "grad_norm": 1.0235905647277832, "learning_rate": 2.074370295597861e-06, "loss": 0.5143, "step": 11175 }, { "epoch": 0.71, "grad_norm": 1.1421451568603516, "learning_rate": 2.0735383353828843e-06, "loss": 0.5372, "step": 11176 }, { "epoch": 0.71, "grad_norm": 1.0220662355422974, "learning_rate": 2.0727064983910266e-06, "loss": 0.5209, "step": 11177 }, { "epoch": 0.71, "grad_norm": 1.1236740350723267, "learning_rate": 2.071874784657318e-06, "loss": 0.5237, "step": 11178 }, { "epoch": 0.71, "grad_norm": 1.0522505044937134, "learning_rate": 2.0710431942167713e-06, "loss": 0.4875, "step": 11179 }, { "epoch": 0.71, "grad_norm": 1.1153757572174072, "learning_rate": 2.070211727104409e-06, "loss": 0.4592, "step": 11180 }, { "epoch": 0.71, "grad_norm": 1.1207882165908813, "learning_rate": 2.0693803833552407e-06, "loss": 0.5077, "step": 11181 }, { "epoch": 0.71, "grad_norm": 0.9922836422920227, "learning_rate": 2.0685491630042677e-06, "loss": 0.527, "step": 11182 }, { "epoch": 0.71, "grad_norm": 1.0508869886398315, "learning_rate": 2.0677180660864916e-06, "loss": 0.5101, "step": 11183 }, { "epoch": 0.71, "grad_norm": 1.1612192392349243, "learning_rate": 2.0668870926369068e-06, "loss": 0.496, "step": 11184 }, { "epoch": 0.71, "grad_norm": 1.0142918825149536, "learning_rate": 2.066056242690503e-06, "loss": 0.5285, "step": 11185 }, { "epoch": 0.71, "grad_norm": 1.0472638607025146, "learning_rate": 2.0652255162822665e-06, "loss": 0.4657, "step": 11186 }, { "epoch": 0.71, "grad_norm": 1.0182528495788574, "learning_rate": 2.0643949134471726e-06, "loss": 0.4788, "step": 11187 }, { "epoch": 0.71, "grad_norm": 1.0504941940307617, "learning_rate": 2.0635644342201942e-06, "loss": 0.528, "step": 11188 }, { "epoch": 0.71, "grad_norm": 0.9788329005241394, "learning_rate": 2.0627340786363063e-06, "loss": 0.5245, "step": 11189 }, { "epoch": 0.71, "grad_norm": 1.095033049583435, "learning_rate": 2.0619038467304663e-06, "loss": 0.517, "step": 11190 }, { "epoch": 0.71, "grad_norm": 1.038672685623169, "learning_rate": 2.061073738537635e-06, "loss": 0.4951, "step": 11191 }, { "epoch": 0.71, "grad_norm": 1.0306553840637207, "learning_rate": 2.0602437540927644e-06, "loss": 0.4946, "step": 11192 }, { "epoch": 0.71, "grad_norm": 1.1046713590621948, "learning_rate": 2.0594138934308027e-06, "loss": 0.5486, "step": 11193 }, { "epoch": 0.71, "grad_norm": 1.0907617807388306, "learning_rate": 2.058584156586692e-06, "loss": 0.5483, "step": 11194 }, { "epoch": 0.71, "grad_norm": 1.0609095096588135, "learning_rate": 2.0577545435953727e-06, "loss": 0.4487, "step": 11195 }, { "epoch": 0.71, "grad_norm": 1.0488719940185547, "learning_rate": 2.05692505449177e-06, "loss": 0.5099, "step": 11196 }, { "epoch": 0.71, "grad_norm": 1.1803630590438843, "learning_rate": 2.0560956893108188e-06, "loss": 0.5679, "step": 11197 }, { "epoch": 0.71, "grad_norm": 1.075703501701355, "learning_rate": 2.0552664480874353e-06, "loss": 0.4943, "step": 11198 }, { "epoch": 0.71, "grad_norm": 1.1885625123977661, "learning_rate": 2.0544373308565374e-06, "loss": 0.5386, "step": 11199 }, { "epoch": 0.71, "grad_norm": 1.1075416803359985, "learning_rate": 2.0536083376530368e-06, "loss": 0.4967, "step": 11200 }, { "epoch": 0.71, "grad_norm": 1.011660099029541, "learning_rate": 2.0527794685118397e-06, "loss": 0.5201, "step": 11201 }, { "epoch": 0.71, "grad_norm": 1.0100550651550293, "learning_rate": 2.0519507234678464e-06, "loss": 0.539, "step": 11202 }, { "epoch": 0.71, "grad_norm": 1.0797109603881836, "learning_rate": 2.051122102555954e-06, "loss": 0.5122, "step": 11203 }, { "epoch": 0.71, "grad_norm": 1.1042869091033936, "learning_rate": 2.0502936058110502e-06, "loss": 0.5564, "step": 11204 }, { "epoch": 0.71, "grad_norm": 1.0236577987670898, "learning_rate": 2.049465233268021e-06, "loss": 0.5112, "step": 11205 }, { "epoch": 0.71, "grad_norm": 1.011128544807434, "learning_rate": 2.0486369849617467e-06, "loss": 0.4928, "step": 11206 }, { "epoch": 0.71, "grad_norm": 1.1878892183303833, "learning_rate": 2.0478088609271018e-06, "loss": 0.4953, "step": 11207 }, { "epoch": 0.71, "grad_norm": 1.1174811124801636, "learning_rate": 2.0469808611989583e-06, "loss": 0.5365, "step": 11208 }, { "epoch": 0.71, "grad_norm": 0.9994081854820251, "learning_rate": 2.0461529858121737e-06, "loss": 0.5011, "step": 11209 }, { "epoch": 0.71, "grad_norm": 1.0205951929092407, "learning_rate": 2.0453252348016133e-06, "loss": 0.5281, "step": 11210 }, { "epoch": 0.71, "grad_norm": 1.1167793273925781, "learning_rate": 2.04449760820213e-06, "loss": 0.5057, "step": 11211 }, { "epoch": 0.71, "grad_norm": 1.0231558084487915, "learning_rate": 2.04367010604857e-06, "loss": 0.5226, "step": 11212 }, { "epoch": 0.71, "grad_norm": 0.9758703112602234, "learning_rate": 2.042842728375777e-06, "loss": 0.5179, "step": 11213 }, { "epoch": 0.71, "grad_norm": 0.9797309637069702, "learning_rate": 2.0420154752185896e-06, "loss": 0.4918, "step": 11214 }, { "epoch": 0.71, "grad_norm": 1.0304148197174072, "learning_rate": 2.0411883466118406e-06, "loss": 0.5326, "step": 11215 }, { "epoch": 0.71, "grad_norm": 1.0446081161499023, "learning_rate": 2.0403613425903584e-06, "loss": 0.5099, "step": 11216 }, { "epoch": 0.71, "grad_norm": 1.0075976848602295, "learning_rate": 2.0395344631889636e-06, "loss": 0.4837, "step": 11217 }, { "epoch": 0.71, "grad_norm": 1.0283238887786865, "learning_rate": 2.038707708442471e-06, "loss": 0.5477, "step": 11218 }, { "epoch": 0.71, "grad_norm": 1.0798712968826294, "learning_rate": 2.0378810783856996e-06, "loss": 0.5146, "step": 11219 }, { "epoch": 0.71, "grad_norm": 0.9746232032775879, "learning_rate": 2.0370545730534493e-06, "loss": 0.478, "step": 11220 }, { "epoch": 0.71, "grad_norm": 1.0954176187515259, "learning_rate": 2.0362281924805238e-06, "loss": 0.5121, "step": 11221 }, { "epoch": 0.71, "grad_norm": 1.0521270036697388, "learning_rate": 2.035401936701719e-06, "loss": 0.5004, "step": 11222 }, { "epoch": 0.71, "grad_norm": 1.0404530763626099, "learning_rate": 2.034575805751825e-06, "loss": 0.5091, "step": 11223 }, { "epoch": 0.71, "grad_norm": 0.9950894713401794, "learning_rate": 2.0337497996656303e-06, "loss": 0.5174, "step": 11224 }, { "epoch": 0.71, "grad_norm": 0.9830288887023926, "learning_rate": 2.03292391847791e-06, "loss": 0.4834, "step": 11225 }, { "epoch": 0.71, "grad_norm": 1.1587971448898315, "learning_rate": 2.032098162223441e-06, "loss": 0.5193, "step": 11226 }, { "epoch": 0.71, "grad_norm": 0.9999991059303284, "learning_rate": 2.031272530936997e-06, "loss": 0.4845, "step": 11227 }, { "epoch": 0.71, "grad_norm": 0.9857888221740723, "learning_rate": 2.0304470246533377e-06, "loss": 0.5142, "step": 11228 }, { "epoch": 0.71, "grad_norm": 1.0121004581451416, "learning_rate": 2.0296216434072237e-06, "loss": 0.4667, "step": 11229 }, { "epoch": 0.71, "grad_norm": 1.0974059104919434, "learning_rate": 2.0287963872334093e-06, "loss": 0.5211, "step": 11230 }, { "epoch": 0.71, "grad_norm": 0.9777387976646423, "learning_rate": 2.0279712561666425e-06, "loss": 0.4779, "step": 11231 }, { "epoch": 0.71, "grad_norm": 1.0181130170822144, "learning_rate": 2.0271462502416694e-06, "loss": 0.5109, "step": 11232 }, { "epoch": 0.71, "grad_norm": 1.071170687675476, "learning_rate": 2.0263213694932238e-06, "loss": 0.4536, "step": 11233 }, { "epoch": 0.71, "grad_norm": 1.005286455154419, "learning_rate": 2.0254966139560404e-06, "loss": 0.4688, "step": 11234 }, { "epoch": 0.71, "grad_norm": 1.1063686609268188, "learning_rate": 2.0246719836648476e-06, "loss": 0.5128, "step": 11235 }, { "epoch": 0.71, "grad_norm": 1.131303310394287, "learning_rate": 2.0238474786543673e-06, "loss": 0.4972, "step": 11236 }, { "epoch": 0.71, "grad_norm": 1.00669264793396, "learning_rate": 2.0230230989593157e-06, "loss": 0.4993, "step": 11237 }, { "epoch": 0.71, "grad_norm": 1.0949076414108276, "learning_rate": 2.0221988446144076e-06, "loss": 0.5441, "step": 11238 }, { "epoch": 0.71, "grad_norm": 1.1112231016159058, "learning_rate": 2.0213747156543432e-06, "loss": 0.4917, "step": 11239 }, { "epoch": 0.71, "grad_norm": 1.0323106050491333, "learning_rate": 2.0205507121138316e-06, "loss": 0.4912, "step": 11240 }, { "epoch": 0.71, "grad_norm": 1.1358355283737183, "learning_rate": 2.019726834027563e-06, "loss": 0.511, "step": 11241 }, { "epoch": 0.71, "grad_norm": 1.0783838033676147, "learning_rate": 2.0189030814302295e-06, "loss": 0.4974, "step": 11242 }, { "epoch": 0.71, "grad_norm": 1.1246992349624634, "learning_rate": 2.018079454356517e-06, "loss": 0.4554, "step": 11243 }, { "epoch": 0.71, "grad_norm": 1.1323680877685547, "learning_rate": 2.017255952841105e-06, "loss": 0.5229, "step": 11244 }, { "epoch": 0.71, "grad_norm": 1.051601767539978, "learning_rate": 2.016432576918669e-06, "loss": 0.4711, "step": 11245 }, { "epoch": 0.71, "grad_norm": 1.0374492406845093, "learning_rate": 2.0156093266238795e-06, "loss": 0.4495, "step": 11246 }, { "epoch": 0.71, "grad_norm": 1.093229055404663, "learning_rate": 2.014786201991396e-06, "loss": 0.5367, "step": 11247 }, { "epoch": 0.71, "grad_norm": 1.1299701929092407, "learning_rate": 2.0139632030558844e-06, "loss": 0.5289, "step": 11248 }, { "epoch": 0.71, "grad_norm": 0.997376561164856, "learning_rate": 2.0131403298519927e-06, "loss": 0.4649, "step": 11249 }, { "epoch": 0.71, "grad_norm": 1.056484580039978, "learning_rate": 2.012317582414371e-06, "loss": 0.5172, "step": 11250 }, { "epoch": 0.71, "grad_norm": 1.0173227787017822, "learning_rate": 2.011494960777663e-06, "loss": 0.4651, "step": 11251 }, { "epoch": 0.71, "grad_norm": 1.0372679233551025, "learning_rate": 2.0106724649765055e-06, "loss": 0.5308, "step": 11252 }, { "epoch": 0.71, "grad_norm": 1.1156564950942993, "learning_rate": 2.0098500950455313e-06, "loss": 0.5787, "step": 11253 }, { "epoch": 0.71, "grad_norm": 1.0682792663574219, "learning_rate": 2.00902785101937e-06, "loss": 0.4912, "step": 11254 }, { "epoch": 0.71, "grad_norm": 0.9660002589225769, "learning_rate": 2.008205732932639e-06, "loss": 0.4581, "step": 11255 }, { "epoch": 0.71, "grad_norm": 1.0296785831451416, "learning_rate": 2.0073837408199566e-06, "loss": 0.4989, "step": 11256 }, { "epoch": 0.71, "grad_norm": 1.090598225593567, "learning_rate": 2.0065618747159342e-06, "loss": 0.4808, "step": 11257 }, { "epoch": 0.71, "grad_norm": 0.943744957447052, "learning_rate": 2.0057401346551785e-06, "loss": 0.4732, "step": 11258 }, { "epoch": 0.71, "grad_norm": 1.0518137216567993, "learning_rate": 2.004918520672289e-06, "loss": 0.5272, "step": 11259 }, { "epoch": 0.71, "grad_norm": 1.031441569328308, "learning_rate": 2.0040970328018618e-06, "loss": 0.535, "step": 11260 }, { "epoch": 0.71, "grad_norm": 1.0439598560333252, "learning_rate": 2.0032756710784864e-06, "loss": 0.5144, "step": 11261 }, { "epoch": 0.71, "grad_norm": 1.0440160036087036, "learning_rate": 2.0024544355367494e-06, "loss": 0.5096, "step": 11262 }, { "epoch": 0.71, "grad_norm": 1.067297339439392, "learning_rate": 2.001633326211227e-06, "loss": 0.4597, "step": 11263 }, { "epoch": 0.71, "grad_norm": 1.032602310180664, "learning_rate": 2.000812343136494e-06, "loss": 0.5235, "step": 11264 }, { "epoch": 0.71, "grad_norm": 1.0836875438690186, "learning_rate": 1.99999148634712e-06, "loss": 0.5136, "step": 11265 }, { "epoch": 0.71, "grad_norm": 1.0357921123504639, "learning_rate": 1.9991707558776686e-06, "loss": 0.5102, "step": 11266 }, { "epoch": 0.71, "grad_norm": 1.1127569675445557, "learning_rate": 1.9983501517626976e-06, "loss": 0.5721, "step": 11267 }, { "epoch": 0.71, "grad_norm": 0.9863804578781128, "learning_rate": 1.997529674036761e-06, "loss": 0.4779, "step": 11268 }, { "epoch": 0.71, "grad_norm": 1.0605449676513672, "learning_rate": 1.9967093227344013e-06, "loss": 0.5381, "step": 11269 }, { "epoch": 0.71, "grad_norm": 1.0444042682647705, "learning_rate": 1.9958890978901685e-06, "loss": 0.5028, "step": 11270 }, { "epoch": 0.71, "grad_norm": 1.1172131299972534, "learning_rate": 1.9950689995385936e-06, "loss": 0.4988, "step": 11271 }, { "epoch": 0.71, "grad_norm": 1.117167592048645, "learning_rate": 1.994249027714209e-06, "loss": 0.5496, "step": 11272 }, { "epoch": 0.71, "grad_norm": 1.0403753519058228, "learning_rate": 1.9934291824515423e-06, "loss": 0.493, "step": 11273 }, { "epoch": 0.71, "grad_norm": 1.0917158126831055, "learning_rate": 1.9926094637851135e-06, "loss": 0.5256, "step": 11274 }, { "epoch": 0.71, "grad_norm": 1.0141263008117676, "learning_rate": 1.9917898717494377e-06, "loss": 0.5241, "step": 11275 }, { "epoch": 0.71, "grad_norm": 1.0066581964492798, "learning_rate": 1.990970406379028e-06, "loss": 0.4802, "step": 11276 }, { "epoch": 0.71, "grad_norm": 1.133183479309082, "learning_rate": 1.990151067708383e-06, "loss": 0.5512, "step": 11277 }, { "epoch": 0.71, "grad_norm": 1.0141957998275757, "learning_rate": 1.9893318557720093e-06, "loss": 0.507, "step": 11278 }, { "epoch": 0.71, "grad_norm": 1.0804271697998047, "learning_rate": 1.9885127706043966e-06, "loss": 0.5047, "step": 11279 }, { "epoch": 0.71, "grad_norm": 1.102764368057251, "learning_rate": 1.9876938122400348e-06, "loss": 0.5152, "step": 11280 }, { "epoch": 0.71, "grad_norm": 1.1434690952301025, "learning_rate": 1.9868749807134087e-06, "loss": 0.5075, "step": 11281 }, { "epoch": 0.71, "grad_norm": 1.0453921556472778, "learning_rate": 1.9860562760589926e-06, "loss": 0.4667, "step": 11282 }, { "epoch": 0.71, "grad_norm": 1.1235970258712769, "learning_rate": 1.9852376983112632e-06, "loss": 0.5656, "step": 11283 }, { "epoch": 0.71, "grad_norm": 1.0331393480300903, "learning_rate": 1.9844192475046885e-06, "loss": 0.5191, "step": 11284 }, { "epoch": 0.71, "grad_norm": 1.0233731269836426, "learning_rate": 1.983600923673727e-06, "loss": 0.4515, "step": 11285 }, { "epoch": 0.72, "grad_norm": 1.056684136390686, "learning_rate": 1.9827827268528378e-06, "loss": 0.471, "step": 11286 }, { "epoch": 0.72, "grad_norm": 1.06417977809906, "learning_rate": 1.9819646570764712e-06, "loss": 0.5335, "step": 11287 }, { "epoch": 0.72, "grad_norm": 1.177718997001648, "learning_rate": 1.981146714379074e-06, "loss": 0.5429, "step": 11288 }, { "epoch": 0.72, "grad_norm": 1.0227022171020508, "learning_rate": 1.980328898795089e-06, "loss": 0.4898, "step": 11289 }, { "epoch": 0.72, "grad_norm": 1.1076995134353638, "learning_rate": 1.979511210358946e-06, "loss": 0.5011, "step": 11290 }, { "epoch": 0.72, "grad_norm": 1.0327277183532715, "learning_rate": 1.9786936491050803e-06, "loss": 0.4993, "step": 11291 }, { "epoch": 0.72, "grad_norm": 1.0501066446304321, "learning_rate": 1.9778762150679155e-06, "loss": 0.4762, "step": 11292 }, { "epoch": 0.72, "grad_norm": 1.088322639465332, "learning_rate": 1.9770589082818694e-06, "loss": 0.5064, "step": 11293 }, { "epoch": 0.72, "grad_norm": 1.0388498306274414, "learning_rate": 1.9762417287813557e-06, "loss": 0.4958, "step": 11294 }, { "epoch": 0.72, "grad_norm": 1.1175765991210938, "learning_rate": 1.9754246766007847e-06, "loss": 0.5141, "step": 11295 }, { "epoch": 0.72, "grad_norm": 1.0213274955749512, "learning_rate": 1.9746077517745582e-06, "loss": 0.4621, "step": 11296 }, { "epoch": 0.72, "grad_norm": 0.9821017980575562, "learning_rate": 1.9737909543370764e-06, "loss": 0.4978, "step": 11297 }, { "epoch": 0.72, "grad_norm": 1.0173606872558594, "learning_rate": 1.972974284322729e-06, "loss": 0.4404, "step": 11298 }, { "epoch": 0.72, "grad_norm": 0.9861236214637756, "learning_rate": 1.9721577417659023e-06, "loss": 0.4939, "step": 11299 }, { "epoch": 0.72, "grad_norm": 1.0011417865753174, "learning_rate": 1.9713413267009827e-06, "loss": 0.4903, "step": 11300 }, { "epoch": 0.72, "grad_norm": 1.010124921798706, "learning_rate": 1.970525039162343e-06, "loss": 0.5351, "step": 11301 }, { "epoch": 0.72, "grad_norm": 1.1067034006118774, "learning_rate": 1.969708879184355e-06, "loss": 0.5409, "step": 11302 }, { "epoch": 0.72, "grad_norm": 1.0343722105026245, "learning_rate": 1.9688928468013846e-06, "loss": 0.4735, "step": 11303 }, { "epoch": 0.72, "grad_norm": 1.037208914756775, "learning_rate": 1.968076942047791e-06, "loss": 0.4547, "step": 11304 }, { "epoch": 0.72, "grad_norm": 1.0555593967437744, "learning_rate": 1.9672611649579332e-06, "loss": 0.5243, "step": 11305 }, { "epoch": 0.72, "grad_norm": 1.0606285333633423, "learning_rate": 1.966445515566155e-06, "loss": 0.4775, "step": 11306 }, { "epoch": 0.72, "grad_norm": 1.1268073320388794, "learning_rate": 1.965629993906802e-06, "loss": 0.538, "step": 11307 }, { "epoch": 0.72, "grad_norm": 0.985319972038269, "learning_rate": 1.9648146000142173e-06, "loss": 0.4964, "step": 11308 }, { "epoch": 0.72, "grad_norm": 1.029909610748291, "learning_rate": 1.963999333922729e-06, "loss": 0.4562, "step": 11309 }, { "epoch": 0.72, "grad_norm": 1.000618815422058, "learning_rate": 1.963184195666668e-06, "loss": 0.4975, "step": 11310 }, { "epoch": 0.72, "grad_norm": 1.0195996761322021, "learning_rate": 1.9623691852803577e-06, "loss": 0.4938, "step": 11311 }, { "epoch": 0.72, "grad_norm": 1.0395841598510742, "learning_rate": 1.9615543027981105e-06, "loss": 0.5104, "step": 11312 }, { "epoch": 0.72, "grad_norm": 1.0242948532104492, "learning_rate": 1.9607395482542446e-06, "loss": 0.4656, "step": 11313 }, { "epoch": 0.72, "grad_norm": 1.117276906967163, "learning_rate": 1.9599249216830624e-06, "loss": 0.5337, "step": 11314 }, { "epoch": 0.72, "grad_norm": 1.0707858800888062, "learning_rate": 1.9591104231188656e-06, "loss": 0.5017, "step": 11315 }, { "epoch": 0.72, "grad_norm": 0.9831403493881226, "learning_rate": 1.958296052595951e-06, "loss": 0.4853, "step": 11316 }, { "epoch": 0.72, "grad_norm": 1.0604276657104492, "learning_rate": 1.9574818101486075e-06, "loss": 0.4831, "step": 11317 }, { "epoch": 0.72, "grad_norm": 1.0770059823989868, "learning_rate": 1.9566676958111214e-06, "loss": 0.5233, "step": 11318 }, { "epoch": 0.72, "grad_norm": 1.0600957870483398, "learning_rate": 1.955853709617773e-06, "loss": 0.517, "step": 11319 }, { "epoch": 0.72, "grad_norm": 1.092265009880066, "learning_rate": 1.955039851602832e-06, "loss": 0.5532, "step": 11320 }, { "epoch": 0.72, "grad_norm": 1.0496869087219238, "learning_rate": 1.9542261218005737e-06, "loss": 0.5111, "step": 11321 }, { "epoch": 0.72, "grad_norm": 1.0955640077590942, "learning_rate": 1.9534125202452557e-06, "loss": 0.5324, "step": 11322 }, { "epoch": 0.72, "grad_norm": 1.0555497407913208, "learning_rate": 1.952599046971139e-06, "loss": 0.5649, "step": 11323 }, { "epoch": 0.72, "grad_norm": 1.0537457466125488, "learning_rate": 1.951785702012475e-06, "loss": 0.5116, "step": 11324 }, { "epoch": 0.72, "grad_norm": 1.101248025894165, "learning_rate": 1.9509724854035105e-06, "loss": 0.5101, "step": 11325 }, { "epoch": 0.72, "grad_norm": 1.058995246887207, "learning_rate": 1.950159397178488e-06, "loss": 0.5711, "step": 11326 }, { "epoch": 0.72, "grad_norm": 1.1053820848464966, "learning_rate": 1.9493464373716458e-06, "loss": 0.5189, "step": 11327 }, { "epoch": 0.72, "grad_norm": 1.038176417350769, "learning_rate": 1.9485336060172106e-06, "loss": 0.5208, "step": 11328 }, { "epoch": 0.72, "grad_norm": 1.162173867225647, "learning_rate": 1.9477209031494104e-06, "loss": 0.5208, "step": 11329 }, { "epoch": 0.72, "grad_norm": 0.8813678026199341, "learning_rate": 1.9469083288024647e-06, "loss": 0.4458, "step": 11330 }, { "epoch": 0.72, "grad_norm": 1.0723689794540405, "learning_rate": 1.9460958830105882e-06, "loss": 0.5337, "step": 11331 }, { "epoch": 0.72, "grad_norm": 1.0425341129302979, "learning_rate": 1.9452835658079905e-06, "loss": 0.4759, "step": 11332 }, { "epoch": 0.72, "grad_norm": 1.118994951248169, "learning_rate": 1.9444713772288747e-06, "loss": 0.5553, "step": 11333 }, { "epoch": 0.72, "grad_norm": 1.1561713218688965, "learning_rate": 1.94365931730744e-06, "loss": 0.5353, "step": 11334 }, { "epoch": 0.72, "grad_norm": 1.0623376369476318, "learning_rate": 1.9428473860778817e-06, "loss": 0.5039, "step": 11335 }, { "epoch": 0.72, "grad_norm": 0.9964343309402466, "learning_rate": 1.9420355835743826e-06, "loss": 0.5395, "step": 11336 }, { "epoch": 0.72, "grad_norm": 1.0136643648147583, "learning_rate": 1.941223909831125e-06, "loss": 0.5167, "step": 11337 }, { "epoch": 0.72, "grad_norm": 1.1962140798568726, "learning_rate": 1.9404123648822924e-06, "loss": 0.5547, "step": 11338 }, { "epoch": 0.72, "grad_norm": 1.083602786064148, "learning_rate": 1.9396009487620494e-06, "loss": 0.4856, "step": 11339 }, { "epoch": 0.72, "grad_norm": 1.0315816402435303, "learning_rate": 1.9387896615045636e-06, "loss": 0.5043, "step": 11340 }, { "epoch": 0.72, "grad_norm": 1.1157749891281128, "learning_rate": 1.9379785031439985e-06, "loss": 0.4993, "step": 11341 }, { "epoch": 0.72, "grad_norm": 1.0104255676269531, "learning_rate": 1.9371674737145023e-06, "loss": 0.489, "step": 11342 }, { "epoch": 0.72, "grad_norm": 0.9678119421005249, "learning_rate": 1.936356573250233e-06, "loss": 0.5093, "step": 11343 }, { "epoch": 0.72, "grad_norm": 1.0498720407485962, "learning_rate": 1.935545801785329e-06, "loss": 0.4843, "step": 11344 }, { "epoch": 0.72, "grad_norm": 1.045360803604126, "learning_rate": 1.934735159353931e-06, "loss": 0.4978, "step": 11345 }, { "epoch": 0.72, "grad_norm": 0.9735890030860901, "learning_rate": 1.9339246459901715e-06, "loss": 0.5155, "step": 11346 }, { "epoch": 0.72, "grad_norm": 1.0178331136703491, "learning_rate": 1.93311426172818e-06, "loss": 0.5073, "step": 11347 }, { "epoch": 0.72, "grad_norm": 1.0024511814117432, "learning_rate": 1.9323040066020774e-06, "loss": 0.5313, "step": 11348 }, { "epoch": 0.72, "grad_norm": 1.004668116569519, "learning_rate": 1.931493880645983e-06, "loss": 0.5297, "step": 11349 }, { "epoch": 0.72, "grad_norm": 1.0038278102874756, "learning_rate": 1.9306838838940035e-06, "loss": 0.481, "step": 11350 }, { "epoch": 0.72, "grad_norm": 1.0347565412521362, "learning_rate": 1.9298740163802523e-06, "loss": 0.5263, "step": 11351 }, { "epoch": 0.72, "grad_norm": 0.9967230558395386, "learning_rate": 1.929064278138823e-06, "loss": 0.5257, "step": 11352 }, { "epoch": 0.72, "grad_norm": 1.077918529510498, "learning_rate": 1.928254669203815e-06, "loss": 0.5305, "step": 11353 }, { "epoch": 0.72, "grad_norm": 1.112382173538208, "learning_rate": 1.9274451896093164e-06, "loss": 0.5486, "step": 11354 }, { "epoch": 0.72, "grad_norm": 1.0560412406921387, "learning_rate": 1.926635839389413e-06, "loss": 0.462, "step": 11355 }, { "epoch": 0.72, "grad_norm": 0.9729539155960083, "learning_rate": 1.925826618578182e-06, "loss": 0.4788, "step": 11356 }, { "epoch": 0.72, "grad_norm": 1.0809910297393799, "learning_rate": 1.9250175272097003e-06, "loss": 0.4713, "step": 11357 }, { "epoch": 0.72, "grad_norm": 1.012824535369873, "learning_rate": 1.9242085653180314e-06, "loss": 0.4867, "step": 11358 }, { "epoch": 0.72, "grad_norm": 1.1029964685440063, "learning_rate": 1.9233997329372402e-06, "loss": 0.5303, "step": 11359 }, { "epoch": 0.72, "grad_norm": 1.1370534896850586, "learning_rate": 1.9225910301013834e-06, "loss": 0.4887, "step": 11360 }, { "epoch": 0.72, "grad_norm": 1.029061198234558, "learning_rate": 1.9217824568445125e-06, "loss": 0.5302, "step": 11361 }, { "epoch": 0.72, "grad_norm": 1.021522879600525, "learning_rate": 1.920974013200676e-06, "loss": 0.5076, "step": 11362 }, { "epoch": 0.72, "grad_norm": 1.06879460811615, "learning_rate": 1.9201656992039092e-06, "loss": 0.4848, "step": 11363 }, { "epoch": 0.72, "grad_norm": 1.2080460786819458, "learning_rate": 1.9193575148882526e-06, "loss": 0.5572, "step": 11364 }, { "epoch": 0.72, "grad_norm": 1.1446185111999512, "learning_rate": 1.918549460287736e-06, "loss": 0.5437, "step": 11365 }, { "epoch": 0.72, "grad_norm": 0.9499037861824036, "learning_rate": 1.9177415354363802e-06, "loss": 0.4596, "step": 11366 }, { "epoch": 0.72, "grad_norm": 1.0208501815795898, "learning_rate": 1.916933740368206e-06, "loss": 0.4868, "step": 11367 }, { "epoch": 0.72, "grad_norm": 0.9540845155715942, "learning_rate": 1.916126075117227e-06, "loss": 0.4696, "step": 11368 }, { "epoch": 0.72, "grad_norm": 0.9916105270385742, "learning_rate": 1.9153185397174506e-06, "loss": 0.5258, "step": 11369 }, { "epoch": 0.72, "grad_norm": 0.9990249872207642, "learning_rate": 1.9145111342028817e-06, "loss": 0.5047, "step": 11370 }, { "epoch": 0.72, "grad_norm": 1.140777587890625, "learning_rate": 1.9137038586075117e-06, "loss": 0.5219, "step": 11371 }, { "epoch": 0.72, "grad_norm": 1.0075373649597168, "learning_rate": 1.9128967129653375e-06, "loss": 0.4989, "step": 11372 }, { "epoch": 0.72, "grad_norm": 1.0705087184906006, "learning_rate": 1.9120896973103453e-06, "loss": 0.5279, "step": 11373 }, { "epoch": 0.72, "grad_norm": 1.1543906927108765, "learning_rate": 1.911282811676512e-06, "loss": 0.5446, "step": 11374 }, { "epoch": 0.72, "grad_norm": 0.9973766207695007, "learning_rate": 1.9104760560978147e-06, "loss": 0.4964, "step": 11375 }, { "epoch": 0.72, "grad_norm": 1.022469401359558, "learning_rate": 1.909669430608223e-06, "loss": 0.4754, "step": 11376 }, { "epoch": 0.72, "grad_norm": 1.0960901975631714, "learning_rate": 1.908862935241701e-06, "loss": 0.5173, "step": 11377 }, { "epoch": 0.72, "grad_norm": 0.974963903427124, "learning_rate": 1.9080565700322095e-06, "loss": 0.4718, "step": 11378 }, { "epoch": 0.72, "grad_norm": 1.1384514570236206, "learning_rate": 1.9072503350136979e-06, "loss": 0.4665, "step": 11379 }, { "epoch": 0.72, "grad_norm": 1.045607328414917, "learning_rate": 1.9064442302201136e-06, "loss": 0.4791, "step": 11380 }, { "epoch": 0.72, "grad_norm": 1.064322829246521, "learning_rate": 1.9056382556854053e-06, "loss": 0.5211, "step": 11381 }, { "epoch": 0.72, "grad_norm": 0.9941070675849915, "learning_rate": 1.9048324114435036e-06, "loss": 0.5104, "step": 11382 }, { "epoch": 0.72, "grad_norm": 1.000701665878296, "learning_rate": 1.9040266975283417e-06, "loss": 0.4908, "step": 11383 }, { "epoch": 0.72, "grad_norm": 1.0231435298919678, "learning_rate": 1.9032211139738455e-06, "loss": 0.497, "step": 11384 }, { "epoch": 0.72, "grad_norm": 0.9945744276046753, "learning_rate": 1.902415660813935e-06, "loss": 0.5399, "step": 11385 }, { "epoch": 0.72, "grad_norm": 1.140770673751831, "learning_rate": 1.9016103380825274e-06, "loss": 0.4905, "step": 11386 }, { "epoch": 0.72, "grad_norm": 1.0725229978561401, "learning_rate": 1.900805145813528e-06, "loss": 0.5188, "step": 11387 }, { "epoch": 0.72, "grad_norm": 1.0397700071334839, "learning_rate": 1.9000000840408421e-06, "loss": 0.5006, "step": 11388 }, { "epoch": 0.72, "grad_norm": 1.0895681381225586, "learning_rate": 1.8991951527983694e-06, "loss": 0.5217, "step": 11389 }, { "epoch": 0.72, "grad_norm": 1.0540904998779297, "learning_rate": 1.8983903521200015e-06, "loss": 0.5251, "step": 11390 }, { "epoch": 0.72, "grad_norm": 1.0280147790908813, "learning_rate": 1.8975856820396265e-06, "loss": 0.4993, "step": 11391 }, { "epoch": 0.72, "grad_norm": 0.9986823797225952, "learning_rate": 1.8967811425911275e-06, "loss": 0.5326, "step": 11392 }, { "epoch": 0.72, "grad_norm": 1.0397193431854248, "learning_rate": 1.8959767338083758e-06, "loss": 0.501, "step": 11393 }, { "epoch": 0.72, "grad_norm": 1.0313090085983276, "learning_rate": 1.8951724557252472e-06, "loss": 0.4645, "step": 11394 }, { "epoch": 0.72, "grad_norm": 0.9666309952735901, "learning_rate": 1.8943683083756075e-06, "loss": 0.5031, "step": 11395 }, { "epoch": 0.72, "grad_norm": 1.138631820678711, "learning_rate": 1.8935642917933128e-06, "loss": 0.5772, "step": 11396 }, { "epoch": 0.72, "grad_norm": 1.0611635446548462, "learning_rate": 1.8927604060122196e-06, "loss": 0.5068, "step": 11397 }, { "epoch": 0.72, "grad_norm": 1.1099668741226196, "learning_rate": 1.8919566510661758e-06, "loss": 0.5055, "step": 11398 }, { "epoch": 0.72, "grad_norm": 1.0566827058792114, "learning_rate": 1.891153026989026e-06, "loss": 0.5213, "step": 11399 }, { "epoch": 0.72, "grad_norm": 1.0656869411468506, "learning_rate": 1.8903495338146089e-06, "loss": 0.5202, "step": 11400 }, { "epoch": 0.72, "grad_norm": 1.1294125318527222, "learning_rate": 1.8895461715767517e-06, "loss": 0.4552, "step": 11401 }, { "epoch": 0.72, "grad_norm": 0.9855488538742065, "learning_rate": 1.888742940309286e-06, "loss": 0.4594, "step": 11402 }, { "epoch": 0.72, "grad_norm": 1.1352120637893677, "learning_rate": 1.8879398400460342e-06, "loss": 0.5326, "step": 11403 }, { "epoch": 0.72, "grad_norm": 0.9844202995300293, "learning_rate": 1.8871368708208076e-06, "loss": 0.4825, "step": 11404 }, { "epoch": 0.72, "grad_norm": 1.0212651491165161, "learning_rate": 1.8863340326674184e-06, "loss": 0.5259, "step": 11405 }, { "epoch": 0.72, "grad_norm": 1.008789300918579, "learning_rate": 1.8855313256196722e-06, "loss": 0.5367, "step": 11406 }, { "epoch": 0.72, "grad_norm": 1.014675498008728, "learning_rate": 1.8847287497113664e-06, "loss": 0.4533, "step": 11407 }, { "epoch": 0.72, "grad_norm": 1.0439823865890503, "learning_rate": 1.883926304976298e-06, "loss": 0.4689, "step": 11408 }, { "epoch": 0.72, "grad_norm": 1.0333361625671387, "learning_rate": 1.8831239914482512e-06, "loss": 0.4874, "step": 11409 }, { "epoch": 0.72, "grad_norm": 1.0649745464324951, "learning_rate": 1.8823218091610085e-06, "loss": 0.5201, "step": 11410 }, { "epoch": 0.72, "grad_norm": 1.0688813924789429, "learning_rate": 1.8815197581483523e-06, "loss": 0.4849, "step": 11411 }, { "epoch": 0.72, "grad_norm": 1.0986980199813843, "learning_rate": 1.880717838444049e-06, "loss": 0.516, "step": 11412 }, { "epoch": 0.72, "grad_norm": 1.0068950653076172, "learning_rate": 1.879916050081866e-06, "loss": 0.474, "step": 11413 }, { "epoch": 0.72, "grad_norm": 1.0525602102279663, "learning_rate": 1.8791143930955641e-06, "loss": 0.4844, "step": 11414 }, { "epoch": 0.72, "grad_norm": 1.0503764152526855, "learning_rate": 1.8783128675188988e-06, "loss": 0.5612, "step": 11415 }, { "epoch": 0.72, "grad_norm": 1.080776572227478, "learning_rate": 1.8775114733856203e-06, "loss": 0.492, "step": 11416 }, { "epoch": 0.72, "grad_norm": 0.9848031997680664, "learning_rate": 1.87671021072947e-06, "loss": 0.5013, "step": 11417 }, { "epoch": 0.72, "grad_norm": 1.121417760848999, "learning_rate": 1.8759090795841856e-06, "loss": 0.5254, "step": 11418 }, { "epoch": 0.72, "grad_norm": 1.0587754249572754, "learning_rate": 1.8751080799835059e-06, "loss": 0.4595, "step": 11419 }, { "epoch": 0.72, "grad_norm": 1.024379849433899, "learning_rate": 1.8743072119611522e-06, "loss": 0.4652, "step": 11420 }, { "epoch": 0.72, "grad_norm": 1.0574456453323364, "learning_rate": 1.873506475550848e-06, "loss": 0.5539, "step": 11421 }, { "epoch": 0.72, "grad_norm": 0.9183557629585266, "learning_rate": 1.8727058707863121e-06, "loss": 0.4387, "step": 11422 }, { "epoch": 0.72, "grad_norm": 1.0336495637893677, "learning_rate": 1.871905397701249e-06, "loss": 0.5565, "step": 11423 }, { "epoch": 0.72, "grad_norm": 1.1050636768341064, "learning_rate": 1.8711050563293714e-06, "loss": 0.5102, "step": 11424 }, { "epoch": 0.72, "grad_norm": 1.00590980052948, "learning_rate": 1.8703048467043732e-06, "loss": 0.4588, "step": 11425 }, { "epoch": 0.72, "grad_norm": 1.1239380836486816, "learning_rate": 1.869504768859951e-06, "loss": 0.4816, "step": 11426 }, { "epoch": 0.72, "grad_norm": 1.1312364339828491, "learning_rate": 1.8687048228297928e-06, "loss": 0.539, "step": 11427 }, { "epoch": 0.72, "grad_norm": 1.0960451364517212, "learning_rate": 1.8679050086475814e-06, "loss": 0.4997, "step": 11428 }, { "epoch": 0.72, "grad_norm": 1.0225590467453003, "learning_rate": 1.867105326346994e-06, "loss": 0.5485, "step": 11429 }, { "epoch": 0.72, "grad_norm": 1.0000320672988892, "learning_rate": 1.8663057759617048e-06, "loss": 0.5167, "step": 11430 }, { "epoch": 0.72, "grad_norm": 1.1537526845932007, "learning_rate": 1.8655063575253746e-06, "loss": 0.5222, "step": 11431 }, { "epoch": 0.72, "grad_norm": 1.1456413269042969, "learning_rate": 1.8647070710716709e-06, "loss": 0.5506, "step": 11432 }, { "epoch": 0.72, "grad_norm": 0.9794883131980896, "learning_rate": 1.8639079166342438e-06, "loss": 0.4625, "step": 11433 }, { "epoch": 0.72, "grad_norm": 1.0213878154754639, "learning_rate": 1.8631088942467452e-06, "loss": 0.5083, "step": 11434 }, { "epoch": 0.72, "grad_norm": 1.0443166494369507, "learning_rate": 1.8623100039428194e-06, "loss": 0.4777, "step": 11435 }, { "epoch": 0.72, "grad_norm": 1.068365454673767, "learning_rate": 1.8615112457561013e-06, "loss": 0.5047, "step": 11436 }, { "epoch": 0.72, "grad_norm": 1.0105688571929932, "learning_rate": 1.860712619720228e-06, "loss": 0.5454, "step": 11437 }, { "epoch": 0.72, "grad_norm": 1.080657958984375, "learning_rate": 1.8599141258688274e-06, "loss": 0.5188, "step": 11438 }, { "epoch": 0.72, "grad_norm": 1.0406492948532104, "learning_rate": 1.8591157642355179e-06, "loss": 0.4535, "step": 11439 }, { "epoch": 0.72, "grad_norm": 1.0377037525177002, "learning_rate": 1.8583175348539173e-06, "loss": 0.4785, "step": 11440 }, { "epoch": 0.72, "grad_norm": 1.0017094612121582, "learning_rate": 1.8575194377576355e-06, "loss": 0.445, "step": 11441 }, { "epoch": 0.72, "grad_norm": 1.05403470993042, "learning_rate": 1.856721472980279e-06, "loss": 0.4839, "step": 11442 }, { "epoch": 0.72, "grad_norm": 1.117053508758545, "learning_rate": 1.855923640555448e-06, "loss": 0.4894, "step": 11443 }, { "epoch": 0.73, "grad_norm": 1.067420482635498, "learning_rate": 1.8551259405167315e-06, "loss": 0.4831, "step": 11444 }, { "epoch": 0.73, "grad_norm": 0.935054361820221, "learning_rate": 1.8543283728977234e-06, "loss": 0.4642, "step": 11445 }, { "epoch": 0.73, "grad_norm": 0.9502724409103394, "learning_rate": 1.8535309377320059e-06, "loss": 0.5063, "step": 11446 }, { "epoch": 0.73, "grad_norm": 0.96114581823349, "learning_rate": 1.8527336350531532e-06, "loss": 0.4877, "step": 11447 }, { "epoch": 0.73, "grad_norm": 1.013749361038208, "learning_rate": 1.851936464894739e-06, "loss": 0.5046, "step": 11448 }, { "epoch": 0.73, "grad_norm": 1.0789744853973389, "learning_rate": 1.8511394272903287e-06, "loss": 0.5355, "step": 11449 }, { "epoch": 0.73, "grad_norm": 1.0386903285980225, "learning_rate": 1.8503425222734834e-06, "loss": 0.4728, "step": 11450 }, { "epoch": 0.73, "grad_norm": 1.0988272428512573, "learning_rate": 1.8495457498777585e-06, "loss": 0.5491, "step": 11451 }, { "epoch": 0.73, "grad_norm": 1.028246283531189, "learning_rate": 1.8487491101367016e-06, "loss": 0.4953, "step": 11452 }, { "epoch": 0.73, "grad_norm": 1.1107392311096191, "learning_rate": 1.8479526030838552e-06, "loss": 0.5548, "step": 11453 }, { "epoch": 0.73, "grad_norm": 1.0843489170074463, "learning_rate": 1.8471562287527627e-06, "loss": 0.5282, "step": 11454 }, { "epoch": 0.73, "grad_norm": 1.0672394037246704, "learning_rate": 1.8463599871769516e-06, "loss": 0.5588, "step": 11455 }, { "epoch": 0.73, "grad_norm": 1.1077324151992798, "learning_rate": 1.8455638783899515e-06, "loss": 0.525, "step": 11456 }, { "epoch": 0.73, "grad_norm": 0.9883722066879272, "learning_rate": 1.8447679024252825e-06, "loss": 0.5146, "step": 11457 }, { "epoch": 0.73, "grad_norm": 1.081155776977539, "learning_rate": 1.8439720593164606e-06, "loss": 0.5191, "step": 11458 }, { "epoch": 0.73, "grad_norm": 0.9750945568084717, "learning_rate": 1.8431763490969968e-06, "loss": 0.5055, "step": 11459 }, { "epoch": 0.73, "grad_norm": 1.024051547050476, "learning_rate": 1.8423807718003967e-06, "loss": 0.477, "step": 11460 }, { "epoch": 0.73, "grad_norm": 1.0928895473480225, "learning_rate": 1.8415853274601541e-06, "loss": 0.476, "step": 11461 }, { "epoch": 0.73, "grad_norm": 1.1447224617004395, "learning_rate": 1.8407900161097698e-06, "loss": 0.5256, "step": 11462 }, { "epoch": 0.73, "grad_norm": 1.0564632415771484, "learning_rate": 1.839994837782726e-06, "loss": 0.5082, "step": 11463 }, { "epoch": 0.73, "grad_norm": 1.0120878219604492, "learning_rate": 1.8391997925125066e-06, "loss": 0.5329, "step": 11464 }, { "epoch": 0.73, "grad_norm": 1.0647292137145996, "learning_rate": 1.8384048803325887e-06, "loss": 0.505, "step": 11465 }, { "epoch": 0.73, "grad_norm": 1.062643051147461, "learning_rate": 1.8376101012764424e-06, "loss": 0.4714, "step": 11466 }, { "epoch": 0.73, "grad_norm": 1.0453153848648071, "learning_rate": 1.8368154553775342e-06, "loss": 0.4973, "step": 11467 }, { "epoch": 0.73, "grad_norm": 1.0845417976379395, "learning_rate": 1.8360209426693242e-06, "loss": 0.4738, "step": 11468 }, { "epoch": 0.73, "grad_norm": 1.014893651008606, "learning_rate": 1.8352265631852645e-06, "loss": 0.4753, "step": 11469 }, { "epoch": 0.73, "grad_norm": 1.152695655822754, "learning_rate": 1.8344323169588045e-06, "loss": 0.5137, "step": 11470 }, { "epoch": 0.73, "grad_norm": 1.087654948234558, "learning_rate": 1.8336382040233874e-06, "loss": 0.5012, "step": 11471 }, { "epoch": 0.73, "grad_norm": 1.026802659034729, "learning_rate": 1.8328442244124506e-06, "loss": 0.4385, "step": 11472 }, { "epoch": 0.73, "grad_norm": 1.0484585762023926, "learning_rate": 1.8320503781594273e-06, "loss": 0.5454, "step": 11473 }, { "epoch": 0.73, "grad_norm": 1.069604754447937, "learning_rate": 1.8312566652977393e-06, "loss": 0.5103, "step": 11474 }, { "epoch": 0.73, "grad_norm": 1.077880620956421, "learning_rate": 1.8304630858608107e-06, "loss": 0.5361, "step": 11475 }, { "epoch": 0.73, "grad_norm": 1.0621845722198486, "learning_rate": 1.8296696398820579e-06, "loss": 0.5359, "step": 11476 }, { "epoch": 0.73, "grad_norm": 1.2201637029647827, "learning_rate": 1.828876327394886e-06, "loss": 0.5066, "step": 11477 }, { "epoch": 0.73, "grad_norm": 1.049737572669983, "learning_rate": 1.8280831484327006e-06, "loss": 0.5025, "step": 11478 }, { "epoch": 0.73, "grad_norm": 1.0424972772598267, "learning_rate": 1.8272901030288991e-06, "loss": 0.5339, "step": 11479 }, { "epoch": 0.73, "grad_norm": 0.9816557765007019, "learning_rate": 1.8264971912168744e-06, "loss": 0.4873, "step": 11480 }, { "epoch": 0.73, "grad_norm": 1.0051323175430298, "learning_rate": 1.825704413030015e-06, "loss": 0.517, "step": 11481 }, { "epoch": 0.73, "grad_norm": 1.2196458578109741, "learning_rate": 1.8249117685016983e-06, "loss": 0.5198, "step": 11482 }, { "epoch": 0.73, "grad_norm": 1.1237990856170654, "learning_rate": 1.8241192576653e-06, "loss": 0.5429, "step": 11483 }, { "epoch": 0.73, "grad_norm": 1.209991216659546, "learning_rate": 1.8233268805541953e-06, "loss": 0.451, "step": 11484 }, { "epoch": 0.73, "grad_norm": 1.0044230222702026, "learning_rate": 1.8225346372017432e-06, "loss": 0.492, "step": 11485 }, { "epoch": 0.73, "grad_norm": 1.0598922967910767, "learning_rate": 1.8217425276413037e-06, "loss": 0.5012, "step": 11486 }, { "epoch": 0.73, "grad_norm": 1.0145241022109985, "learning_rate": 1.8209505519062299e-06, "loss": 0.48, "step": 11487 }, { "epoch": 0.73, "grad_norm": 1.007987141609192, "learning_rate": 1.8201587100298694e-06, "loss": 0.4696, "step": 11488 }, { "epoch": 0.73, "grad_norm": 1.0017046928405762, "learning_rate": 1.8193670020455656e-06, "loss": 0.4911, "step": 11489 }, { "epoch": 0.73, "grad_norm": 1.1037015914916992, "learning_rate": 1.8185754279866508e-06, "loss": 0.4793, "step": 11490 }, { "epoch": 0.73, "grad_norm": 1.0826473236083984, "learning_rate": 1.8177839878864562e-06, "loss": 0.4515, "step": 11491 }, { "epoch": 0.73, "grad_norm": 1.0383855104446411, "learning_rate": 1.8169926817783106e-06, "loss": 0.513, "step": 11492 }, { "epoch": 0.73, "grad_norm": 1.1208995580673218, "learning_rate": 1.8162015096955288e-06, "loss": 0.5273, "step": 11493 }, { "epoch": 0.73, "grad_norm": 1.0882205963134766, "learning_rate": 1.8154104716714254e-06, "loss": 0.5168, "step": 11494 }, { "epoch": 0.73, "grad_norm": 1.0574021339416504, "learning_rate": 1.814619567739309e-06, "loss": 0.4873, "step": 11495 }, { "epoch": 0.73, "grad_norm": 1.085392713546753, "learning_rate": 1.8138287979324815e-06, "loss": 0.496, "step": 11496 }, { "epoch": 0.73, "grad_norm": 0.9917359352111816, "learning_rate": 1.8130381622842414e-06, "loss": 0.4118, "step": 11497 }, { "epoch": 0.73, "grad_norm": 1.0514519214630127, "learning_rate": 1.8122476608278755e-06, "loss": 0.5219, "step": 11498 }, { "epoch": 0.73, "grad_norm": 0.9837106466293335, "learning_rate": 1.8114572935966713e-06, "loss": 0.4996, "step": 11499 }, { "epoch": 0.73, "grad_norm": 1.1096519231796265, "learning_rate": 1.8106670606239086e-06, "loss": 0.4988, "step": 11500 }, { "epoch": 0.73, "grad_norm": 0.9787556529045105, "learning_rate": 1.8098769619428607e-06, "loss": 0.4731, "step": 11501 }, { "epoch": 0.73, "grad_norm": 1.083164930343628, "learning_rate": 1.8090869975867964e-06, "loss": 0.4633, "step": 11502 }, { "epoch": 0.73, "grad_norm": 1.0905555486679077, "learning_rate": 1.8082971675889798e-06, "loss": 0.5448, "step": 11503 }, { "epoch": 0.73, "grad_norm": 1.0619641542434692, "learning_rate": 1.8075074719826636e-06, "loss": 0.4995, "step": 11504 }, { "epoch": 0.73, "grad_norm": 0.9724273085594177, "learning_rate": 1.8067179108011047e-06, "loss": 0.4842, "step": 11505 }, { "epoch": 0.73, "grad_norm": 1.0353304147720337, "learning_rate": 1.8059284840775443e-06, "loss": 0.4954, "step": 11506 }, { "epoch": 0.73, "grad_norm": 0.9989253878593445, "learning_rate": 1.8051391918452244e-06, "loss": 0.5239, "step": 11507 }, { "epoch": 0.73, "grad_norm": 1.065077543258667, "learning_rate": 1.8043500341373788e-06, "loss": 0.4882, "step": 11508 }, { "epoch": 0.73, "grad_norm": 0.9911226630210876, "learning_rate": 1.8035610109872364e-06, "loss": 0.5167, "step": 11509 }, { "epoch": 0.73, "grad_norm": 1.0643283128738403, "learning_rate": 1.8027721224280204e-06, "loss": 0.5408, "step": 11510 }, { "epoch": 0.73, "grad_norm": 1.0329303741455078, "learning_rate": 1.8019833684929493e-06, "loss": 0.5091, "step": 11511 }, { "epoch": 0.73, "grad_norm": 1.0223429203033447, "learning_rate": 1.8011947492152303e-06, "loss": 0.5148, "step": 11512 }, { "epoch": 0.73, "grad_norm": 1.0493983030319214, "learning_rate": 1.8004062646280762e-06, "loss": 0.4806, "step": 11513 }, { "epoch": 0.73, "grad_norm": 1.0583444833755493, "learning_rate": 1.799617914764682e-06, "loss": 0.4884, "step": 11514 }, { "epoch": 0.73, "grad_norm": 1.1490446329116821, "learning_rate": 1.7988296996582438e-06, "loss": 0.5446, "step": 11515 }, { "epoch": 0.73, "grad_norm": 1.089064359664917, "learning_rate": 1.7980416193419509e-06, "loss": 0.4835, "step": 11516 }, { "epoch": 0.73, "grad_norm": 1.0678657293319702, "learning_rate": 1.7972536738489865e-06, "loss": 0.5207, "step": 11517 }, { "epoch": 0.73, "grad_norm": 1.07902991771698, "learning_rate": 1.7964658632125286e-06, "loss": 0.5375, "step": 11518 }, { "epoch": 0.73, "grad_norm": 0.9613833427429199, "learning_rate": 1.7956781874657508e-06, "loss": 0.4995, "step": 11519 }, { "epoch": 0.73, "grad_norm": 1.0828293561935425, "learning_rate": 1.7948906466418154e-06, "loss": 0.5334, "step": 11520 }, { "epoch": 0.73, "grad_norm": 0.9815015196800232, "learning_rate": 1.7941032407738857e-06, "loss": 0.457, "step": 11521 }, { "epoch": 0.73, "grad_norm": 1.0951595306396484, "learning_rate": 1.7933159698951153e-06, "loss": 0.5049, "step": 11522 }, { "epoch": 0.73, "grad_norm": 0.978574275970459, "learning_rate": 1.7925288340386543e-06, "loss": 0.5281, "step": 11523 }, { "epoch": 0.73, "grad_norm": 0.9065704345703125, "learning_rate": 1.7917418332376463e-06, "loss": 0.4371, "step": 11524 }, { "epoch": 0.73, "grad_norm": 1.0113238096237183, "learning_rate": 1.7909549675252291e-06, "loss": 0.5222, "step": 11525 }, { "epoch": 0.73, "grad_norm": 1.0330299139022827, "learning_rate": 1.7901682369345346e-06, "loss": 0.4832, "step": 11526 }, { "epoch": 0.73, "grad_norm": 1.0719724893569946, "learning_rate": 1.7893816414986915e-06, "loss": 0.4994, "step": 11527 }, { "epoch": 0.73, "grad_norm": 1.0883289575576782, "learning_rate": 1.7885951812508163e-06, "loss": 0.5058, "step": 11528 }, { "epoch": 0.73, "grad_norm": 1.1325196027755737, "learning_rate": 1.787808856224027e-06, "loss": 0.5706, "step": 11529 }, { "epoch": 0.73, "grad_norm": 1.0494288206100464, "learning_rate": 1.7870226664514318e-06, "loss": 0.4965, "step": 11530 }, { "epoch": 0.73, "grad_norm": 1.1580727100372314, "learning_rate": 1.786236611966135e-06, "loss": 0.5332, "step": 11531 }, { "epoch": 0.73, "grad_norm": 1.1171400547027588, "learning_rate": 1.7854506928012349e-06, "loss": 0.4641, "step": 11532 }, { "epoch": 0.73, "grad_norm": 1.0406197309494019, "learning_rate": 1.784664908989825e-06, "loss": 0.4893, "step": 11533 }, { "epoch": 0.73, "grad_norm": 1.0887912511825562, "learning_rate": 1.7838792605649874e-06, "loss": 0.5642, "step": 11534 }, { "epoch": 0.73, "grad_norm": 1.0635976791381836, "learning_rate": 1.7830937475598092e-06, "loss": 0.4902, "step": 11535 }, { "epoch": 0.73, "grad_norm": 1.1203384399414062, "learning_rate": 1.7823083700073607e-06, "loss": 0.5024, "step": 11536 }, { "epoch": 0.73, "grad_norm": 0.9832225441932678, "learning_rate": 1.781523127940713e-06, "loss": 0.49, "step": 11537 }, { "epoch": 0.73, "grad_norm": 0.9907360076904297, "learning_rate": 1.7807380213929304e-06, "loss": 0.4781, "step": 11538 }, { "epoch": 0.73, "grad_norm": 1.090733289718628, "learning_rate": 1.7799530503970707e-06, "loss": 0.5082, "step": 11539 }, { "epoch": 0.73, "grad_norm": 1.0755808353424072, "learning_rate": 1.7791682149861866e-06, "loss": 0.5073, "step": 11540 }, { "epoch": 0.73, "grad_norm": 1.15406334400177, "learning_rate": 1.778383515193326e-06, "loss": 0.5331, "step": 11541 }, { "epoch": 0.73, "grad_norm": 1.0282824039459229, "learning_rate": 1.777598951051525e-06, "loss": 0.4369, "step": 11542 }, { "epoch": 0.73, "grad_norm": 1.0402719974517822, "learning_rate": 1.7768145225938254e-06, "loss": 0.4479, "step": 11543 }, { "epoch": 0.73, "grad_norm": 1.085084080696106, "learning_rate": 1.7760302298532522e-06, "loss": 0.4458, "step": 11544 }, { "epoch": 0.73, "grad_norm": 0.9971480369567871, "learning_rate": 1.7752460728628308e-06, "loss": 0.4439, "step": 11545 }, { "epoch": 0.73, "grad_norm": 1.0229544639587402, "learning_rate": 1.7744620516555804e-06, "loss": 0.5041, "step": 11546 }, { "epoch": 0.73, "grad_norm": 1.2048213481903076, "learning_rate": 1.7736781662645092e-06, "loss": 0.5566, "step": 11547 }, { "epoch": 0.73, "grad_norm": 1.1265819072723389, "learning_rate": 1.7728944167226287e-06, "loss": 0.5144, "step": 11548 }, { "epoch": 0.73, "grad_norm": 1.080599069595337, "learning_rate": 1.772110803062939e-06, "loss": 0.5197, "step": 11549 }, { "epoch": 0.73, "grad_norm": 1.0882823467254639, "learning_rate": 1.7713273253184331e-06, "loss": 0.4827, "step": 11550 }, { "epoch": 0.73, "grad_norm": 1.0465631484985352, "learning_rate": 1.7705439835221022e-06, "loss": 0.5496, "step": 11551 }, { "epoch": 0.73, "grad_norm": 1.0857951641082764, "learning_rate": 1.7697607777069291e-06, "loss": 0.511, "step": 11552 }, { "epoch": 0.73, "grad_norm": 1.0986204147338867, "learning_rate": 1.7689777079058929e-06, "loss": 0.4755, "step": 11553 }, { "epoch": 0.73, "grad_norm": 1.0771892070770264, "learning_rate": 1.7681947741519668e-06, "loss": 0.4833, "step": 11554 }, { "epoch": 0.73, "grad_norm": 1.156043291091919, "learning_rate": 1.7674119764781129e-06, "loss": 0.5587, "step": 11555 }, { "epoch": 0.73, "grad_norm": 1.0552773475646973, "learning_rate": 1.7666293149172969e-06, "loss": 0.5368, "step": 11556 }, { "epoch": 0.73, "grad_norm": 1.0510469675064087, "learning_rate": 1.7658467895024744e-06, "loss": 0.5071, "step": 11557 }, { "epoch": 0.73, "grad_norm": 1.121157169342041, "learning_rate": 1.7650644002665906e-06, "loss": 0.5401, "step": 11558 }, { "epoch": 0.73, "grad_norm": 1.1302895545959473, "learning_rate": 1.7642821472425918e-06, "loss": 0.5189, "step": 11559 }, { "epoch": 0.73, "grad_norm": 1.1765938997268677, "learning_rate": 1.7635000304634154e-06, "loss": 0.4987, "step": 11560 }, { "epoch": 0.73, "grad_norm": 0.9780496954917908, "learning_rate": 1.762718049961994e-06, "loss": 0.4999, "step": 11561 }, { "epoch": 0.73, "grad_norm": 1.053433895111084, "learning_rate": 1.7619362057712552e-06, "loss": 0.4768, "step": 11562 }, { "epoch": 0.73, "grad_norm": 1.0024325847625732, "learning_rate": 1.761154497924117e-06, "loss": 0.4808, "step": 11563 }, { "epoch": 0.73, "grad_norm": 1.0675638914108276, "learning_rate": 1.7603729264534936e-06, "loss": 0.4671, "step": 11564 }, { "epoch": 0.73, "grad_norm": 1.020726203918457, "learning_rate": 1.7595914913923001e-06, "loss": 0.5356, "step": 11565 }, { "epoch": 0.73, "grad_norm": 1.047310471534729, "learning_rate": 1.7588101927734346e-06, "loss": 0.5275, "step": 11566 }, { "epoch": 0.73, "grad_norm": 0.9428762197494507, "learning_rate": 1.7580290306297965e-06, "loss": 0.5259, "step": 11567 }, { "epoch": 0.73, "grad_norm": 1.005489706993103, "learning_rate": 1.7572480049942781e-06, "loss": 0.535, "step": 11568 }, { "epoch": 0.73, "grad_norm": 1.0362120866775513, "learning_rate": 1.7564671158997653e-06, "loss": 0.4989, "step": 11569 }, { "epoch": 0.73, "grad_norm": 1.0559964179992676, "learning_rate": 1.755686363379141e-06, "loss": 0.5146, "step": 11570 }, { "epoch": 0.73, "grad_norm": 1.1007598638534546, "learning_rate": 1.7549057474652753e-06, "loss": 0.5642, "step": 11571 }, { "epoch": 0.73, "grad_norm": 1.0522035360336304, "learning_rate": 1.7541252681910386e-06, "loss": 0.5137, "step": 11572 }, { "epoch": 0.73, "grad_norm": 1.049686312675476, "learning_rate": 1.7533449255892986e-06, "loss": 0.4666, "step": 11573 }, { "epoch": 0.73, "grad_norm": 1.1081347465515137, "learning_rate": 1.7525647196929079e-06, "loss": 0.5185, "step": 11574 }, { "epoch": 0.73, "grad_norm": 1.145727515220642, "learning_rate": 1.7517846505347197e-06, "loss": 0.4984, "step": 11575 }, { "epoch": 0.73, "grad_norm": 1.0616527795791626, "learning_rate": 1.751004718147582e-06, "loss": 0.4923, "step": 11576 }, { "epoch": 0.73, "grad_norm": 1.0432676076889038, "learning_rate": 1.7502249225643291e-06, "loss": 0.4889, "step": 11577 }, { "epoch": 0.73, "grad_norm": 1.0427736043930054, "learning_rate": 1.7494452638178039e-06, "loss": 0.4669, "step": 11578 }, { "epoch": 0.73, "grad_norm": 1.017297625541687, "learning_rate": 1.7486657419408287e-06, "loss": 0.4975, "step": 11579 }, { "epoch": 0.73, "grad_norm": 1.0261050462722778, "learning_rate": 1.7478863569662286e-06, "loss": 0.4811, "step": 11580 }, { "epoch": 0.73, "grad_norm": 1.107507348060608, "learning_rate": 1.7471071089268204e-06, "loss": 0.5162, "step": 11581 }, { "epoch": 0.73, "grad_norm": 1.0576726198196411, "learning_rate": 1.7463279978554166e-06, "loss": 0.5206, "step": 11582 }, { "epoch": 0.73, "grad_norm": 0.9989815354347229, "learning_rate": 1.745549023784821e-06, "loss": 0.4535, "step": 11583 }, { "epoch": 0.73, "grad_norm": 1.1163592338562012, "learning_rate": 1.7447701867478372e-06, "loss": 0.4898, "step": 11584 }, { "epoch": 0.73, "grad_norm": 1.1261216402053833, "learning_rate": 1.7439914867772529e-06, "loss": 0.4587, "step": 11585 }, { "epoch": 0.73, "grad_norm": 1.0687371492385864, "learning_rate": 1.7432129239058637e-06, "loss": 0.4969, "step": 11586 }, { "epoch": 0.73, "grad_norm": 1.0555487871170044, "learning_rate": 1.7424344981664475e-06, "loss": 0.541, "step": 11587 }, { "epoch": 0.73, "grad_norm": 1.1022698879241943, "learning_rate": 1.7416562095917822e-06, "loss": 0.4831, "step": 11588 }, { "epoch": 0.73, "grad_norm": 0.9749317169189453, "learning_rate": 1.7408780582146383e-06, "loss": 0.52, "step": 11589 }, { "epoch": 0.73, "grad_norm": 1.0140492916107178, "learning_rate": 1.7401000440677824e-06, "loss": 0.4737, "step": 11590 }, { "epoch": 0.73, "grad_norm": 1.0857648849487305, "learning_rate": 1.7393221671839727e-06, "loss": 0.4885, "step": 11591 }, { "epoch": 0.73, "grad_norm": 1.1199688911437988, "learning_rate": 1.7385444275959657e-06, "loss": 0.4537, "step": 11592 }, { "epoch": 0.73, "grad_norm": 1.0685760974884033, "learning_rate": 1.7377668253365054e-06, "loss": 0.5195, "step": 11593 }, { "epoch": 0.73, "grad_norm": 1.0911962985992432, "learning_rate": 1.7369893604383353e-06, "loss": 0.5048, "step": 11594 }, { "epoch": 0.73, "grad_norm": 1.0440233945846558, "learning_rate": 1.736212032934192e-06, "loss": 0.4949, "step": 11595 }, { "epoch": 0.73, "grad_norm": 1.0071343183517456, "learning_rate": 1.7354348428568063e-06, "loss": 0.4748, "step": 11596 }, { "epoch": 0.73, "grad_norm": 1.161497950553894, "learning_rate": 1.7346577902389028e-06, "loss": 0.5158, "step": 11597 }, { "epoch": 0.73, "grad_norm": 1.056105613708496, "learning_rate": 1.7338808751132002e-06, "loss": 0.5007, "step": 11598 }, { "epoch": 0.73, "grad_norm": 1.0153801441192627, "learning_rate": 1.7331040975124125e-06, "loss": 0.4892, "step": 11599 }, { "epoch": 0.73, "grad_norm": 1.0123332738876343, "learning_rate": 1.7323274574692479e-06, "loss": 0.4661, "step": 11600 }, { "epoch": 0.73, "grad_norm": 1.1221553087234497, "learning_rate": 1.7315509550164044e-06, "loss": 0.4927, "step": 11601 }, { "epoch": 0.74, "grad_norm": 1.0159032344818115, "learning_rate": 1.730774590186579e-06, "loss": 0.5677, "step": 11602 }, { "epoch": 0.74, "grad_norm": 1.0440219640731812, "learning_rate": 1.7299983630124663e-06, "loss": 0.5028, "step": 11603 }, { "epoch": 0.74, "grad_norm": 1.0416673421859741, "learning_rate": 1.729222273526745e-06, "loss": 0.5041, "step": 11604 }, { "epoch": 0.74, "grad_norm": 1.0251445770263672, "learning_rate": 1.7284463217620955e-06, "loss": 0.4876, "step": 11605 }, { "epoch": 0.74, "grad_norm": 1.074592113494873, "learning_rate": 1.727670507751193e-06, "loss": 0.4929, "step": 11606 }, { "epoch": 0.74, "grad_norm": 1.1091501712799072, "learning_rate": 1.7268948315266975e-06, "loss": 0.5498, "step": 11607 }, { "epoch": 0.74, "grad_norm": 1.0287474393844604, "learning_rate": 1.7261192931212783e-06, "loss": 0.532, "step": 11608 }, { "epoch": 0.74, "grad_norm": 1.012856125831604, "learning_rate": 1.7253438925675847e-06, "loss": 0.4625, "step": 11609 }, { "epoch": 0.74, "grad_norm": 1.1315114498138428, "learning_rate": 1.7245686298982678e-06, "loss": 0.5651, "step": 11610 }, { "epoch": 0.74, "grad_norm": 1.0674339532852173, "learning_rate": 1.723793505145972e-06, "loss": 0.5238, "step": 11611 }, { "epoch": 0.74, "grad_norm": 1.0249159336090088, "learning_rate": 1.7230185183433345e-06, "loss": 0.4844, "step": 11612 }, { "epoch": 0.74, "grad_norm": 0.9884284734725952, "learning_rate": 1.722243669522987e-06, "loss": 0.4926, "step": 11613 }, { "epoch": 0.74, "grad_norm": 1.0254402160644531, "learning_rate": 1.7214689587175582e-06, "loss": 0.5323, "step": 11614 }, { "epoch": 0.74, "grad_norm": 0.9610978364944458, "learning_rate": 1.720694385959663e-06, "loss": 0.4544, "step": 11615 }, { "epoch": 0.74, "grad_norm": 0.9989670515060425, "learning_rate": 1.7199199512819225e-06, "loss": 0.4831, "step": 11616 }, { "epoch": 0.74, "grad_norm": 1.0955671072006226, "learning_rate": 1.7191456547169405e-06, "loss": 0.5301, "step": 11617 }, { "epoch": 0.74, "grad_norm": 1.0697169303894043, "learning_rate": 1.718371496297322e-06, "loss": 0.5327, "step": 11618 }, { "epoch": 0.74, "grad_norm": 1.096488356590271, "learning_rate": 1.717597476055664e-06, "loss": 0.5321, "step": 11619 }, { "epoch": 0.74, "grad_norm": 1.0395913124084473, "learning_rate": 1.716823594024557e-06, "loss": 0.5056, "step": 11620 }, { "epoch": 0.74, "grad_norm": 1.0414578914642334, "learning_rate": 1.716049850236588e-06, "loss": 0.5046, "step": 11621 }, { "epoch": 0.74, "grad_norm": 1.090161919593811, "learning_rate": 1.7152762447243365e-06, "loss": 0.5255, "step": 11622 }, { "epoch": 0.74, "grad_norm": 1.0255171060562134, "learning_rate": 1.7145027775203748e-06, "loss": 0.4791, "step": 11623 }, { "epoch": 0.74, "grad_norm": 1.0892038345336914, "learning_rate": 1.7137294486572714e-06, "loss": 0.5079, "step": 11624 }, { "epoch": 0.74, "grad_norm": 1.0260745286941528, "learning_rate": 1.7129562581675885e-06, "loss": 0.5071, "step": 11625 }, { "epoch": 0.74, "grad_norm": 0.9881934523582458, "learning_rate": 1.7121832060838833e-06, "loss": 0.488, "step": 11626 }, { "epoch": 0.74, "grad_norm": 1.098183035850525, "learning_rate": 1.711410292438707e-06, "loss": 0.5461, "step": 11627 }, { "epoch": 0.74, "grad_norm": 1.0220118761062622, "learning_rate": 1.7106375172646e-06, "loss": 0.4528, "step": 11628 }, { "epoch": 0.74, "grad_norm": 1.0334930419921875, "learning_rate": 1.709864880594106e-06, "loss": 0.4964, "step": 11629 }, { "epoch": 0.74, "grad_norm": 1.043442726135254, "learning_rate": 1.7090923824597578e-06, "loss": 0.5603, "step": 11630 }, { "epoch": 0.74, "grad_norm": 1.0600742101669312, "learning_rate": 1.70832002289408e-06, "loss": 0.5463, "step": 11631 }, { "epoch": 0.74, "grad_norm": 1.0279513597488403, "learning_rate": 1.7075478019295943e-06, "loss": 0.5061, "step": 11632 }, { "epoch": 0.74, "grad_norm": 1.015267252922058, "learning_rate": 1.7067757195988178e-06, "loss": 0.4569, "step": 11633 }, { "epoch": 0.74, "grad_norm": 1.0748802423477173, "learning_rate": 1.706003775934259e-06, "loss": 0.5272, "step": 11634 }, { "epoch": 0.74, "grad_norm": 1.0735557079315186, "learning_rate": 1.705231970968424e-06, "loss": 0.5236, "step": 11635 }, { "epoch": 0.74, "grad_norm": 1.057826280593872, "learning_rate": 1.704460304733806e-06, "loss": 0.5284, "step": 11636 }, { "epoch": 0.74, "grad_norm": 1.074376106262207, "learning_rate": 1.7036887772629012e-06, "loss": 0.5452, "step": 11637 }, { "epoch": 0.74, "grad_norm": 1.018315315246582, "learning_rate": 1.7029173885881973e-06, "loss": 0.5, "step": 11638 }, { "epoch": 0.74, "grad_norm": 1.068433403968811, "learning_rate": 1.7021461387421705e-06, "loss": 0.4995, "step": 11639 }, { "epoch": 0.74, "grad_norm": 1.0389368534088135, "learning_rate": 1.7013750277572977e-06, "loss": 0.5143, "step": 11640 }, { "epoch": 0.74, "grad_norm": 1.063865303993225, "learning_rate": 1.7006040556660468e-06, "loss": 0.522, "step": 11641 }, { "epoch": 0.74, "grad_norm": 0.966158390045166, "learning_rate": 1.6998332225008817e-06, "loss": 0.4891, "step": 11642 }, { "epoch": 0.74, "grad_norm": 1.0541883707046509, "learning_rate": 1.6990625282942607e-06, "loss": 0.506, "step": 11643 }, { "epoch": 0.74, "grad_norm": 1.1151005029678345, "learning_rate": 1.6982919730786323e-06, "loss": 0.494, "step": 11644 }, { "epoch": 0.74, "grad_norm": 1.1117256879806519, "learning_rate": 1.697521556886441e-06, "loss": 0.534, "step": 11645 }, { "epoch": 0.74, "grad_norm": 0.973153829574585, "learning_rate": 1.6967512797501317e-06, "loss": 0.498, "step": 11646 }, { "epoch": 0.74, "grad_norm": 1.0751382112503052, "learning_rate": 1.6959811417021338e-06, "loss": 0.4757, "step": 11647 }, { "epoch": 0.74, "grad_norm": 1.03907310962677, "learning_rate": 1.6952111427748758e-06, "loss": 0.4955, "step": 11648 }, { "epoch": 0.74, "grad_norm": 1.0551830530166626, "learning_rate": 1.69444128300078e-06, "loss": 0.466, "step": 11649 }, { "epoch": 0.74, "grad_norm": 0.9973853230476379, "learning_rate": 1.6936715624122623e-06, "loss": 0.4897, "step": 11650 }, { "epoch": 0.74, "grad_norm": 1.0001341104507446, "learning_rate": 1.6929019810417352e-06, "loss": 0.4778, "step": 11651 }, { "epoch": 0.74, "grad_norm": 1.0439047813415527, "learning_rate": 1.6921325389215993e-06, "loss": 0.5034, "step": 11652 }, { "epoch": 0.74, "grad_norm": 1.0582904815673828, "learning_rate": 1.6913632360842553e-06, "loss": 0.5121, "step": 11653 }, { "epoch": 0.74, "grad_norm": 1.020135760307312, "learning_rate": 1.6905940725620951e-06, "loss": 0.5069, "step": 11654 }, { "epoch": 0.74, "grad_norm": 1.0762420892715454, "learning_rate": 1.6898250483875063e-06, "loss": 0.5376, "step": 11655 }, { "epoch": 0.74, "grad_norm": 0.9812977313995361, "learning_rate": 1.6890561635928692e-06, "loss": 0.4486, "step": 11656 }, { "epoch": 0.74, "grad_norm": 1.0338646173477173, "learning_rate": 1.6882874182105613e-06, "loss": 0.482, "step": 11657 }, { "epoch": 0.74, "grad_norm": 1.021016240119934, "learning_rate": 1.6875188122729458e-06, "loss": 0.4814, "step": 11658 }, { "epoch": 0.74, "grad_norm": 0.9645542502403259, "learning_rate": 1.6867503458123913e-06, "loss": 0.4765, "step": 11659 }, { "epoch": 0.74, "grad_norm": 1.0475287437438965, "learning_rate": 1.6859820188612557e-06, "loss": 0.5145, "step": 11660 }, { "epoch": 0.74, "grad_norm": 1.0525175333023071, "learning_rate": 1.6852138314518873e-06, "loss": 0.465, "step": 11661 }, { "epoch": 0.74, "grad_norm": 1.0847764015197754, "learning_rate": 1.6844457836166329e-06, "loss": 0.4694, "step": 11662 }, { "epoch": 0.74, "grad_norm": 1.1165729761123657, "learning_rate": 1.6836778753878324e-06, "loss": 0.5256, "step": 11663 }, { "epoch": 0.74, "grad_norm": 1.1158928871154785, "learning_rate": 1.68291010679782e-06, "loss": 0.4778, "step": 11664 }, { "epoch": 0.74, "grad_norm": 0.9745147228240967, "learning_rate": 1.6821424778789252e-06, "loss": 0.4828, "step": 11665 }, { "epoch": 0.74, "grad_norm": 1.0973376035690308, "learning_rate": 1.6813749886634657e-06, "loss": 0.5097, "step": 11666 }, { "epoch": 0.74, "grad_norm": 1.0163177251815796, "learning_rate": 1.6806076391837622e-06, "loss": 0.4506, "step": 11667 }, { "epoch": 0.74, "grad_norm": 1.0826618671417236, "learning_rate": 1.6798404294721254e-06, "loss": 0.4805, "step": 11668 }, { "epoch": 0.74, "grad_norm": 1.0028828382492065, "learning_rate": 1.6790733595608567e-06, "loss": 0.5133, "step": 11669 }, { "epoch": 0.74, "grad_norm": 1.1223706007003784, "learning_rate": 1.6783064294822559e-06, "loss": 0.4689, "step": 11670 }, { "epoch": 0.74, "grad_norm": 1.0388615131378174, "learning_rate": 1.677539639268616e-06, "loss": 0.5267, "step": 11671 }, { "epoch": 0.74, "grad_norm": 1.052419900894165, "learning_rate": 1.6767729889522239e-06, "loss": 0.5219, "step": 11672 }, { "epoch": 0.74, "grad_norm": 1.0997850894927979, "learning_rate": 1.6760064785653624e-06, "loss": 0.5127, "step": 11673 }, { "epoch": 0.74, "grad_norm": 1.0978507995605469, "learning_rate": 1.675240108140303e-06, "loss": 0.5084, "step": 11674 }, { "epoch": 0.74, "grad_norm": 1.0522551536560059, "learning_rate": 1.674473877709315e-06, "loss": 0.4859, "step": 11675 }, { "epoch": 0.74, "grad_norm": 1.1092435121536255, "learning_rate": 1.6737077873046669e-06, "loss": 0.5307, "step": 11676 }, { "epoch": 0.74, "grad_norm": 1.059883713722229, "learning_rate": 1.672941836958611e-06, "loss": 0.5186, "step": 11677 }, { "epoch": 0.74, "grad_norm": 1.0982457399368286, "learning_rate": 1.6721760267033998e-06, "loss": 0.5106, "step": 11678 }, { "epoch": 0.74, "grad_norm": 1.0341575145721436, "learning_rate": 1.6714103565712798e-06, "loss": 0.4907, "step": 11679 }, { "epoch": 0.74, "grad_norm": 1.0636166334152222, "learning_rate": 1.6706448265944902e-06, "loss": 0.5007, "step": 11680 }, { "epoch": 0.74, "grad_norm": 1.2040977478027344, "learning_rate": 1.6698794368052669e-06, "loss": 0.4697, "step": 11681 }, { "epoch": 0.74, "grad_norm": 1.0305360555648804, "learning_rate": 1.6691141872358336e-06, "loss": 0.5235, "step": 11682 }, { "epoch": 0.74, "grad_norm": 1.0078924894332886, "learning_rate": 1.668349077918413e-06, "loss": 0.4649, "step": 11683 }, { "epoch": 0.74, "grad_norm": 1.0714657306671143, "learning_rate": 1.6675841088852268e-06, "loss": 0.4572, "step": 11684 }, { "epoch": 0.74, "grad_norm": 1.0063605308532715, "learning_rate": 1.666819280168479e-06, "loss": 0.4524, "step": 11685 }, { "epoch": 0.74, "grad_norm": 1.017889142036438, "learning_rate": 1.6660545918003762e-06, "loss": 0.4554, "step": 11686 }, { "epoch": 0.74, "grad_norm": 1.1632343530654907, "learning_rate": 1.6652900438131181e-06, "loss": 0.5496, "step": 11687 }, { "epoch": 0.74, "grad_norm": 1.17890202999115, "learning_rate": 1.6645256362388922e-06, "loss": 0.4955, "step": 11688 }, { "epoch": 0.74, "grad_norm": 1.1335285902023315, "learning_rate": 1.663761369109892e-06, "loss": 0.5317, "step": 11689 }, { "epoch": 0.74, "grad_norm": 1.197165608406067, "learning_rate": 1.662997242458293e-06, "loss": 0.5028, "step": 11690 }, { "epoch": 0.74, "grad_norm": 1.125783920288086, "learning_rate": 1.6622332563162714e-06, "loss": 0.5791, "step": 11691 }, { "epoch": 0.74, "grad_norm": 1.0092953443527222, "learning_rate": 1.6614694107159962e-06, "loss": 0.4893, "step": 11692 }, { "epoch": 0.74, "grad_norm": 1.010783314704895, "learning_rate": 1.6607057056896304e-06, "loss": 0.4617, "step": 11693 }, { "epoch": 0.74, "grad_norm": 1.0468823909759521, "learning_rate": 1.6599421412693307e-06, "loss": 0.5048, "step": 11694 }, { "epoch": 0.74, "grad_norm": 1.1088182926177979, "learning_rate": 1.65917871748725e-06, "loss": 0.5079, "step": 11695 }, { "epoch": 0.74, "grad_norm": 1.0559271574020386, "learning_rate": 1.6584154343755276e-06, "loss": 0.4817, "step": 11696 }, { "epoch": 0.74, "grad_norm": 1.0422968864440918, "learning_rate": 1.6576522919663107e-06, "loss": 0.4628, "step": 11697 }, { "epoch": 0.74, "grad_norm": 1.0454442501068115, "learning_rate": 1.6568892902917267e-06, "loss": 0.4989, "step": 11698 }, { "epoch": 0.74, "grad_norm": 1.0210545063018799, "learning_rate": 1.6561264293839051e-06, "loss": 0.473, "step": 11699 }, { "epoch": 0.74, "grad_norm": 1.1097790002822876, "learning_rate": 1.6553637092749685e-06, "loss": 0.4984, "step": 11700 }, { "epoch": 0.74, "grad_norm": 1.015505075454712, "learning_rate": 1.6546011299970276e-06, "loss": 0.4598, "step": 11701 }, { "epoch": 0.74, "grad_norm": 1.0487347841262817, "learning_rate": 1.6538386915821975e-06, "loss": 0.4664, "step": 11702 }, { "epoch": 0.74, "grad_norm": 1.0431348085403442, "learning_rate": 1.6530763940625805e-06, "loss": 0.4781, "step": 11703 }, { "epoch": 0.74, "grad_norm": 1.1704440116882324, "learning_rate": 1.6523142374702722e-06, "loss": 0.534, "step": 11704 }, { "epoch": 0.74, "grad_norm": 1.0386048555374146, "learning_rate": 1.6515522218373658e-06, "loss": 0.4654, "step": 11705 }, { "epoch": 0.74, "grad_norm": 1.0807963609695435, "learning_rate": 1.6507903471959468e-06, "loss": 0.488, "step": 11706 }, { "epoch": 0.74, "grad_norm": 1.065360426902771, "learning_rate": 1.6500286135780951e-06, "loss": 0.5316, "step": 11707 }, { "epoch": 0.74, "grad_norm": 1.0242459774017334, "learning_rate": 1.6492670210158863e-06, "loss": 0.5152, "step": 11708 }, { "epoch": 0.74, "grad_norm": 1.048977255821228, "learning_rate": 1.6485055695413838e-06, "loss": 0.5036, "step": 11709 }, { "epoch": 0.74, "grad_norm": 1.0696985721588135, "learning_rate": 1.6477442591866544e-06, "loss": 0.4892, "step": 11710 }, { "epoch": 0.74, "grad_norm": 1.1575871706008911, "learning_rate": 1.6469830899837547e-06, "loss": 0.4745, "step": 11711 }, { "epoch": 0.74, "grad_norm": 1.0191328525543213, "learning_rate": 1.6462220619647306e-06, "loss": 0.5159, "step": 11712 }, { "epoch": 0.74, "grad_norm": 1.0086557865142822, "learning_rate": 1.6454611751616283e-06, "loss": 0.5013, "step": 11713 }, { "epoch": 0.74, "grad_norm": 1.0208491086959839, "learning_rate": 1.6447004296064867e-06, "loss": 0.4726, "step": 11714 }, { "epoch": 0.74, "grad_norm": 1.0429731607437134, "learning_rate": 1.6439398253313377e-06, "loss": 0.5131, "step": 11715 }, { "epoch": 0.74, "grad_norm": 0.9891948699951172, "learning_rate": 1.6431793623682096e-06, "loss": 0.4921, "step": 11716 }, { "epoch": 0.74, "grad_norm": 1.1223905086517334, "learning_rate": 1.642419040749119e-06, "loss": 0.5548, "step": 11717 }, { "epoch": 0.74, "grad_norm": 1.060289978981018, "learning_rate": 1.6416588605060812e-06, "loss": 0.516, "step": 11718 }, { "epoch": 0.74, "grad_norm": 1.227434515953064, "learning_rate": 1.6408988216711092e-06, "loss": 0.4774, "step": 11719 }, { "epoch": 0.74, "grad_norm": 0.9952710270881653, "learning_rate": 1.6401389242762006e-06, "loss": 0.4812, "step": 11720 }, { "epoch": 0.74, "grad_norm": 1.0805935859680176, "learning_rate": 1.639379168353354e-06, "loss": 0.4917, "step": 11721 }, { "epoch": 0.74, "grad_norm": 1.0161795616149902, "learning_rate": 1.6386195539345596e-06, "loss": 0.4974, "step": 11722 }, { "epoch": 0.74, "grad_norm": 0.9648051261901855, "learning_rate": 1.6378600810518026e-06, "loss": 0.5372, "step": 11723 }, { "epoch": 0.74, "grad_norm": 1.0348917245864868, "learning_rate": 1.6371007497370612e-06, "loss": 0.4883, "step": 11724 }, { "epoch": 0.74, "grad_norm": 1.080922245979309, "learning_rate": 1.6363415600223103e-06, "loss": 0.5468, "step": 11725 }, { "epoch": 0.74, "grad_norm": 0.9747493267059326, "learning_rate": 1.6355825119395118e-06, "loss": 0.4795, "step": 11726 }, { "epoch": 0.74, "grad_norm": 1.0580079555511475, "learning_rate": 1.634823605520633e-06, "loss": 0.484, "step": 11727 }, { "epoch": 0.74, "grad_norm": 1.1747360229492188, "learning_rate": 1.634064840797624e-06, "loss": 0.4909, "step": 11728 }, { "epoch": 0.74, "grad_norm": 1.0317986011505127, "learning_rate": 1.6333062178024355e-06, "loss": 0.5099, "step": 11729 }, { "epoch": 0.74, "grad_norm": 0.9565672874450684, "learning_rate": 1.63254773656701e-06, "loss": 0.4824, "step": 11730 }, { "epoch": 0.74, "grad_norm": 1.0761483907699585, "learning_rate": 1.6317893971232852e-06, "loss": 0.4888, "step": 11731 }, { "epoch": 0.74, "grad_norm": 1.1051914691925049, "learning_rate": 1.6310311995031913e-06, "loss": 0.523, "step": 11732 }, { "epoch": 0.74, "grad_norm": 1.0961300134658813, "learning_rate": 1.6302731437386555e-06, "loss": 0.5789, "step": 11733 }, { "epoch": 0.74, "grad_norm": 1.1655371189117432, "learning_rate": 1.6295152298615936e-06, "loss": 0.4587, "step": 11734 }, { "epoch": 0.74, "grad_norm": 1.1272354125976562, "learning_rate": 1.62875745790392e-06, "loss": 0.5127, "step": 11735 }, { "epoch": 0.74, "grad_norm": 1.126760482788086, "learning_rate": 1.6279998278975428e-06, "loss": 0.5392, "step": 11736 }, { "epoch": 0.74, "grad_norm": 1.0484238862991333, "learning_rate": 1.627242339874362e-06, "loss": 0.4885, "step": 11737 }, { "epoch": 0.74, "grad_norm": 1.0628280639648438, "learning_rate": 1.6264849938662753e-06, "loss": 0.5307, "step": 11738 }, { "epoch": 0.74, "grad_norm": 1.0625278949737549, "learning_rate": 1.6257277899051666e-06, "loss": 0.5333, "step": 11739 }, { "epoch": 0.74, "grad_norm": 1.0485646724700928, "learning_rate": 1.6249707280229237e-06, "loss": 0.5439, "step": 11740 }, { "epoch": 0.74, "grad_norm": 1.0634983777999878, "learning_rate": 1.6242138082514247e-06, "loss": 0.5308, "step": 11741 }, { "epoch": 0.74, "grad_norm": 1.0296636819839478, "learning_rate": 1.6234570306225366e-06, "loss": 0.5131, "step": 11742 }, { "epoch": 0.74, "grad_norm": 1.01493239402771, "learning_rate": 1.6227003951681276e-06, "loss": 0.4532, "step": 11743 }, { "epoch": 0.74, "grad_norm": 1.019639253616333, "learning_rate": 1.6219439019200557e-06, "loss": 0.4923, "step": 11744 }, { "epoch": 0.74, "grad_norm": 1.104752779006958, "learning_rate": 1.6211875509101744e-06, "loss": 0.549, "step": 11745 }, { "epoch": 0.74, "grad_norm": 1.04420804977417, "learning_rate": 1.6204313421703332e-06, "loss": 0.4891, "step": 11746 }, { "epoch": 0.74, "grad_norm": 1.0602904558181763, "learning_rate": 1.6196752757323698e-06, "loss": 0.5742, "step": 11747 }, { "epoch": 0.74, "grad_norm": 1.0057286024093628, "learning_rate": 1.61891935162812e-06, "loss": 0.4803, "step": 11748 }, { "epoch": 0.74, "grad_norm": 1.003815770149231, "learning_rate": 1.6181635698894171e-06, "loss": 0.45, "step": 11749 }, { "epoch": 0.74, "grad_norm": 1.0611565113067627, "learning_rate": 1.61740793054808e-06, "loss": 0.5316, "step": 11750 }, { "epoch": 0.74, "grad_norm": 1.0178951025009155, "learning_rate": 1.6166524336359285e-06, "loss": 0.4699, "step": 11751 }, { "epoch": 0.74, "grad_norm": 1.1051855087280273, "learning_rate": 1.6158970791847728e-06, "loss": 0.5401, "step": 11752 }, { "epoch": 0.74, "grad_norm": 1.0515493154525757, "learning_rate": 1.6151418672264186e-06, "loss": 0.4843, "step": 11753 }, { "epoch": 0.74, "grad_norm": 1.0373625755310059, "learning_rate": 1.614386797792667e-06, "loss": 0.5156, "step": 11754 }, { "epoch": 0.74, "grad_norm": 0.9942763447761536, "learning_rate": 1.6136318709153075e-06, "loss": 0.4878, "step": 11755 }, { "epoch": 0.74, "grad_norm": 0.8941650986671448, "learning_rate": 1.612877086626129e-06, "loss": 0.538, "step": 11756 }, { "epoch": 0.74, "grad_norm": 1.0180784463882446, "learning_rate": 1.612122444956916e-06, "loss": 0.4957, "step": 11757 }, { "epoch": 0.74, "grad_norm": 1.027891993522644, "learning_rate": 1.6113679459394398e-06, "loss": 0.52, "step": 11758 }, { "epoch": 0.74, "grad_norm": 1.0554982423782349, "learning_rate": 1.6106135896054714e-06, "loss": 0.5112, "step": 11759 }, { "epoch": 0.75, "grad_norm": 1.08571195602417, "learning_rate": 1.6098593759867736e-06, "loss": 0.5015, "step": 11760 }, { "epoch": 0.75, "grad_norm": 1.065076470375061, "learning_rate": 1.609105305115104e-06, "loss": 0.4743, "step": 11761 }, { "epoch": 0.75, "grad_norm": 1.0663999319076538, "learning_rate": 1.6083513770222158e-06, "loss": 0.5285, "step": 11762 }, { "epoch": 0.75, "grad_norm": 1.05372154712677, "learning_rate": 1.6075975917398512e-06, "loss": 0.475, "step": 11763 }, { "epoch": 0.75, "grad_norm": 1.063764214515686, "learning_rate": 1.60684394929975e-06, "loss": 0.5586, "step": 11764 }, { "epoch": 0.75, "grad_norm": 1.027434229850769, "learning_rate": 1.6060904497336465e-06, "loss": 0.5199, "step": 11765 }, { "epoch": 0.75, "grad_norm": 1.0786575078964233, "learning_rate": 1.6053370930732676e-06, "loss": 0.4494, "step": 11766 }, { "epoch": 0.75, "grad_norm": 1.0548747777938843, "learning_rate": 1.6045838793503342e-06, "loss": 0.5112, "step": 11767 }, { "epoch": 0.75, "grad_norm": 1.0842057466506958, "learning_rate": 1.6038308085965642e-06, "loss": 0.5167, "step": 11768 }, { "epoch": 0.75, "grad_norm": 1.0827819108963013, "learning_rate": 1.6030778808436609e-06, "loss": 0.4692, "step": 11769 }, { "epoch": 0.75, "grad_norm": 0.9974976181983948, "learning_rate": 1.6023250961233338e-06, "loss": 0.5102, "step": 11770 }, { "epoch": 0.75, "grad_norm": 1.1199263334274292, "learning_rate": 1.6015724544672762e-06, "loss": 0.493, "step": 11771 }, { "epoch": 0.75, "grad_norm": 1.0420162677764893, "learning_rate": 1.6008199559071795e-06, "loss": 0.5241, "step": 11772 }, { "epoch": 0.75, "grad_norm": 1.0192173719406128, "learning_rate": 1.6000676004747306e-06, "loss": 0.4867, "step": 11773 }, { "epoch": 0.75, "grad_norm": 1.05176842212677, "learning_rate": 1.5993153882016065e-06, "loss": 0.5473, "step": 11774 }, { "epoch": 0.75, "grad_norm": 1.0142534971237183, "learning_rate": 1.5985633191194821e-06, "loss": 0.5313, "step": 11775 }, { "epoch": 0.75, "grad_norm": 1.0849391222000122, "learning_rate": 1.5978113932600248e-06, "loss": 0.5187, "step": 11776 }, { "epoch": 0.75, "grad_norm": 1.089290976524353, "learning_rate": 1.5970596106548913e-06, "loss": 0.5012, "step": 11777 }, { "epoch": 0.75, "grad_norm": 0.9700396060943604, "learning_rate": 1.5963079713357432e-06, "loss": 0.5173, "step": 11778 }, { "epoch": 0.75, "grad_norm": 1.0083379745483398, "learning_rate": 1.595556475334224e-06, "loss": 0.5048, "step": 11779 }, { "epoch": 0.75, "grad_norm": 1.0637929439544678, "learning_rate": 1.5948051226819783e-06, "loss": 0.4625, "step": 11780 }, { "epoch": 0.75, "grad_norm": 1.0219900608062744, "learning_rate": 1.5940539134106442e-06, "loss": 0.4917, "step": 11781 }, { "epoch": 0.75, "grad_norm": 1.065132975578308, "learning_rate": 1.5933028475518486e-06, "loss": 0.4959, "step": 11782 }, { "epoch": 0.75, "grad_norm": 1.069326639175415, "learning_rate": 1.5925519251372212e-06, "loss": 0.4303, "step": 11783 }, { "epoch": 0.75, "grad_norm": 0.9691741466522217, "learning_rate": 1.5918011461983796e-06, "loss": 0.4694, "step": 11784 }, { "epoch": 0.75, "grad_norm": 1.0848075151443481, "learning_rate": 1.5910505107669339e-06, "loss": 0.5011, "step": 11785 }, { "epoch": 0.75, "grad_norm": 1.0530424118041992, "learning_rate": 1.5903000188744922e-06, "loss": 0.4995, "step": 11786 }, { "epoch": 0.75, "grad_norm": 1.0618120431900024, "learning_rate": 1.589549670552656e-06, "loss": 0.521, "step": 11787 }, { "epoch": 0.75, "grad_norm": 0.9845284819602966, "learning_rate": 1.588799465833018e-06, "loss": 0.474, "step": 11788 }, { "epoch": 0.75, "grad_norm": 1.1666288375854492, "learning_rate": 1.5880494047471683e-06, "loss": 0.5411, "step": 11789 }, { "epoch": 0.75, "grad_norm": 0.9710365533828735, "learning_rate": 1.587299487326689e-06, "loss": 0.5239, "step": 11790 }, { "epoch": 0.75, "grad_norm": 1.0692170858383179, "learning_rate": 1.586549713603156e-06, "loss": 0.5279, "step": 11791 }, { "epoch": 0.75, "grad_norm": 1.0434986352920532, "learning_rate": 1.5858000836081422e-06, "loss": 0.4978, "step": 11792 }, { "epoch": 0.75, "grad_norm": 1.0952305793762207, "learning_rate": 1.5850505973732077e-06, "loss": 0.5001, "step": 11793 }, { "epoch": 0.75, "grad_norm": 1.0733871459960938, "learning_rate": 1.5843012549299131e-06, "loss": 0.4971, "step": 11794 }, { "epoch": 0.75, "grad_norm": 1.0665565729141235, "learning_rate": 1.58355205630981e-06, "loss": 0.5307, "step": 11795 }, { "epoch": 0.75, "grad_norm": 1.1024495363235474, "learning_rate": 1.5828030015444451e-06, "loss": 0.4628, "step": 11796 }, { "epoch": 0.75, "grad_norm": 1.038615107536316, "learning_rate": 1.5820540906653581e-06, "loss": 0.5358, "step": 11797 }, { "epoch": 0.75, "grad_norm": 0.9799812436103821, "learning_rate": 1.5813053237040849e-06, "loss": 0.4921, "step": 11798 }, { "epoch": 0.75, "grad_norm": 1.041947841644287, "learning_rate": 1.580556700692148e-06, "loss": 0.4849, "step": 11799 }, { "epoch": 0.75, "grad_norm": 1.0639641284942627, "learning_rate": 1.5798082216610766e-06, "loss": 0.4866, "step": 11800 }, { "epoch": 0.75, "grad_norm": 1.0486043691635132, "learning_rate": 1.5790598866423818e-06, "loss": 0.54, "step": 11801 }, { "epoch": 0.75, "grad_norm": 1.0393097400665283, "learning_rate": 1.5783116956675742e-06, "loss": 0.4939, "step": 11802 }, { "epoch": 0.75, "grad_norm": 1.0743504762649536, "learning_rate": 1.5775636487681579e-06, "loss": 0.4922, "step": 11803 }, { "epoch": 0.75, "grad_norm": 1.1121182441711426, "learning_rate": 1.5768157459756307e-06, "loss": 0.5391, "step": 11804 }, { "epoch": 0.75, "grad_norm": 1.0420786142349243, "learning_rate": 1.576067987321484e-06, "loss": 0.4904, "step": 11805 }, { "epoch": 0.75, "grad_norm": 0.9685152173042297, "learning_rate": 1.5753203728372052e-06, "loss": 0.4603, "step": 11806 }, { "epoch": 0.75, "grad_norm": 0.9745351672172546, "learning_rate": 1.5745729025542684e-06, "loss": 0.4562, "step": 11807 }, { "epoch": 0.75, "grad_norm": 1.1631194353103638, "learning_rate": 1.5738255765041537e-06, "loss": 0.5422, "step": 11808 }, { "epoch": 0.75, "grad_norm": 1.0341942310333252, "learning_rate": 1.5730783947183237e-06, "loss": 0.5079, "step": 11809 }, { "epoch": 0.75, "grad_norm": 1.0354013442993164, "learning_rate": 1.5723313572282412e-06, "loss": 0.448, "step": 11810 }, { "epoch": 0.75, "grad_norm": 1.0714869499206543, "learning_rate": 1.5715844640653627e-06, "loss": 0.5068, "step": 11811 }, { "epoch": 0.75, "grad_norm": 1.095607876777649, "learning_rate": 1.5708377152611326e-06, "loss": 0.5168, "step": 11812 }, { "epoch": 0.75, "grad_norm": 1.0534653663635254, "learning_rate": 1.5700911108469986e-06, "loss": 0.4959, "step": 11813 }, { "epoch": 0.75, "grad_norm": 1.0473425388336182, "learning_rate": 1.569344650854398e-06, "loss": 0.4728, "step": 11814 }, { "epoch": 0.75, "grad_norm": 1.0482488870620728, "learning_rate": 1.5685983353147582e-06, "loss": 0.5058, "step": 11815 }, { "epoch": 0.75, "grad_norm": 1.0301032066345215, "learning_rate": 1.5678521642595052e-06, "loss": 0.4793, "step": 11816 }, { "epoch": 0.75, "grad_norm": 1.0333133935928345, "learning_rate": 1.567106137720058e-06, "loss": 0.4461, "step": 11817 }, { "epoch": 0.75, "grad_norm": 1.1130452156066895, "learning_rate": 1.5663602557278297e-06, "loss": 0.4937, "step": 11818 }, { "epoch": 0.75, "grad_norm": 0.985506534576416, "learning_rate": 1.5656145183142274e-06, "loss": 0.4818, "step": 11819 }, { "epoch": 0.75, "grad_norm": 1.0618568658828735, "learning_rate": 1.5648689255106474e-06, "loss": 0.5234, "step": 11820 }, { "epoch": 0.75, "grad_norm": 1.1874841451644897, "learning_rate": 1.5641234773484887e-06, "loss": 0.5073, "step": 11821 }, { "epoch": 0.75, "grad_norm": 1.0233137607574463, "learning_rate": 1.5633781738591392e-06, "loss": 0.5044, "step": 11822 }, { "epoch": 0.75, "grad_norm": 1.1124851703643799, "learning_rate": 1.5626330150739776e-06, "loss": 0.5375, "step": 11823 }, { "epoch": 0.75, "grad_norm": 1.0393725633621216, "learning_rate": 1.5618880010243831e-06, "loss": 0.4793, "step": 11824 }, { "epoch": 0.75, "grad_norm": 1.0990042686462402, "learning_rate": 1.5611431317417235e-06, "loss": 0.5231, "step": 11825 }, { "epoch": 0.75, "grad_norm": 1.1121004819869995, "learning_rate": 1.5603984072573648e-06, "loss": 0.4983, "step": 11826 }, { "epoch": 0.75, "grad_norm": 1.0521689653396606, "learning_rate": 1.5596538276026641e-06, "loss": 0.462, "step": 11827 }, { "epoch": 0.75, "grad_norm": 1.0436952114105225, "learning_rate": 1.5589093928089715e-06, "loss": 0.5098, "step": 11828 }, { "epoch": 0.75, "grad_norm": 1.0428225994110107, "learning_rate": 1.5581651029076322e-06, "loss": 0.5065, "step": 11829 }, { "epoch": 0.75, "grad_norm": 1.144033670425415, "learning_rate": 1.5574209579299903e-06, "loss": 0.5242, "step": 11830 }, { "epoch": 0.75, "grad_norm": 1.1208655834197998, "learning_rate": 1.5566769579073747e-06, "loss": 0.4954, "step": 11831 }, { "epoch": 0.75, "grad_norm": 1.1524823904037476, "learning_rate": 1.555933102871114e-06, "loss": 0.4988, "step": 11832 }, { "epoch": 0.75, "grad_norm": 1.185436725616455, "learning_rate": 1.5551893928525285e-06, "loss": 0.5559, "step": 11833 }, { "epoch": 0.75, "grad_norm": 1.037841558456421, "learning_rate": 1.5544458278829344e-06, "loss": 0.5449, "step": 11834 }, { "epoch": 0.75, "grad_norm": 1.063841700553894, "learning_rate": 1.5537024079936425e-06, "loss": 0.478, "step": 11835 }, { "epoch": 0.75, "grad_norm": 1.0968282222747803, "learning_rate": 1.5529591332159511e-06, "loss": 0.5301, "step": 11836 }, { "epoch": 0.75, "grad_norm": 1.1035841703414917, "learning_rate": 1.5522160035811578e-06, "loss": 0.502, "step": 11837 }, { "epoch": 0.75, "grad_norm": 0.9873964190483093, "learning_rate": 1.551473019120558e-06, "loss": 0.5064, "step": 11838 }, { "epoch": 0.75, "grad_norm": 1.1052426099777222, "learning_rate": 1.5507301798654313e-06, "loss": 0.4939, "step": 11839 }, { "epoch": 0.75, "grad_norm": 1.0275089740753174, "learning_rate": 1.549987485847057e-06, "loss": 0.4861, "step": 11840 }, { "epoch": 0.75, "grad_norm": 0.9469333291053772, "learning_rate": 1.54924493709671e-06, "loss": 0.4617, "step": 11841 }, { "epoch": 0.75, "grad_norm": 1.0526026487350464, "learning_rate": 1.5485025336456511e-06, "loss": 0.5609, "step": 11842 }, { "epoch": 0.75, "grad_norm": 1.032482624053955, "learning_rate": 1.547760275525147e-06, "loss": 0.4819, "step": 11843 }, { "epoch": 0.75, "grad_norm": 1.04742431640625, "learning_rate": 1.547018162766446e-06, "loss": 0.546, "step": 11844 }, { "epoch": 0.75, "grad_norm": 1.1183147430419922, "learning_rate": 1.5462761954007987e-06, "loss": 0.5077, "step": 11845 }, { "epoch": 0.75, "grad_norm": 1.044101357460022, "learning_rate": 1.5455343734594463e-06, "loss": 0.5132, "step": 11846 }, { "epoch": 0.75, "grad_norm": 1.1227136850357056, "learning_rate": 1.5447926969736237e-06, "loss": 0.5326, "step": 11847 }, { "epoch": 0.75, "grad_norm": 1.0950839519500732, "learning_rate": 1.5440511659745611e-06, "loss": 0.5109, "step": 11848 }, { "epoch": 0.75, "grad_norm": 1.1029894351959229, "learning_rate": 1.5433097804934833e-06, "loss": 0.4805, "step": 11849 }, { "epoch": 0.75, "grad_norm": 1.1721646785736084, "learning_rate": 1.5425685405616026e-06, "loss": 0.5316, "step": 11850 }, { "epoch": 0.75, "grad_norm": 1.037771224975586, "learning_rate": 1.5418274462101358e-06, "loss": 0.4589, "step": 11851 }, { "epoch": 0.75, "grad_norm": 1.1142371892929077, "learning_rate": 1.541086497470284e-06, "loss": 0.5214, "step": 11852 }, { "epoch": 0.75, "grad_norm": 1.152891993522644, "learning_rate": 1.540345694373247e-06, "loss": 0.4986, "step": 11853 }, { "epoch": 0.75, "grad_norm": 0.9909875988960266, "learning_rate": 1.5396050369502175e-06, "loss": 0.5031, "step": 11854 }, { "epoch": 0.75, "grad_norm": 1.2157238721847534, "learning_rate": 1.538864525232382e-06, "loss": 0.5067, "step": 11855 }, { "epoch": 0.75, "grad_norm": 1.2282987833023071, "learning_rate": 1.538124159250921e-06, "loss": 0.4681, "step": 11856 }, { "epoch": 0.75, "grad_norm": 1.0671184062957764, "learning_rate": 1.5373839390370098e-06, "loss": 0.5296, "step": 11857 }, { "epoch": 0.75, "grad_norm": 1.0432041883468628, "learning_rate": 1.5366438646218146e-06, "loss": 0.5212, "step": 11858 }, { "epoch": 0.75, "grad_norm": 1.0385440587997437, "learning_rate": 1.5359039360364975e-06, "loss": 0.484, "step": 11859 }, { "epoch": 0.75, "grad_norm": 0.9888316988945007, "learning_rate": 1.5351641533122153e-06, "loss": 0.4988, "step": 11860 }, { "epoch": 0.75, "grad_norm": 1.0522485971450806, "learning_rate": 1.5344245164801174e-06, "loss": 0.4948, "step": 11861 }, { "epoch": 0.75, "grad_norm": 1.0455278158187866, "learning_rate": 1.533685025571347e-06, "loss": 0.4481, "step": 11862 }, { "epoch": 0.75, "grad_norm": 1.0585129261016846, "learning_rate": 1.5329456806170418e-06, "loss": 0.5255, "step": 11863 }, { "epoch": 0.75, "grad_norm": 1.1429243087768555, "learning_rate": 1.5322064816483328e-06, "loss": 0.5375, "step": 11864 }, { "epoch": 0.75, "grad_norm": 1.0262020826339722, "learning_rate": 1.5314674286963471e-06, "loss": 0.5271, "step": 11865 }, { "epoch": 0.75, "grad_norm": 1.0965781211853027, "learning_rate": 1.5307285217922003e-06, "loss": 0.5291, "step": 11866 }, { "epoch": 0.75, "grad_norm": 1.013327956199646, "learning_rate": 1.529989760967005e-06, "loss": 0.5029, "step": 11867 }, { "epoch": 0.75, "grad_norm": 1.078499436378479, "learning_rate": 1.5292511462518728e-06, "loss": 0.4964, "step": 11868 }, { "epoch": 0.75, "grad_norm": 0.977638304233551, "learning_rate": 1.528512677677899e-06, "loss": 0.4668, "step": 11869 }, { "epoch": 0.75, "grad_norm": 1.0083192586898804, "learning_rate": 1.5277743552761809e-06, "loss": 0.4853, "step": 11870 }, { "epoch": 0.75, "grad_norm": 1.0516363382339478, "learning_rate": 1.5270361790778065e-06, "loss": 0.5104, "step": 11871 }, { "epoch": 0.75, "grad_norm": 1.0042387247085571, "learning_rate": 1.526298149113854e-06, "loss": 0.5028, "step": 11872 }, { "epoch": 0.75, "grad_norm": 1.1472967863082886, "learning_rate": 1.5255602654154055e-06, "loss": 0.5059, "step": 11873 }, { "epoch": 0.75, "grad_norm": 1.1058728694915771, "learning_rate": 1.5248225280135258e-06, "loss": 0.4976, "step": 11874 }, { "epoch": 0.75, "grad_norm": 1.0069677829742432, "learning_rate": 1.5240849369392807e-06, "loss": 0.4857, "step": 11875 }, { "epoch": 0.75, "grad_norm": 1.1479973793029785, "learning_rate": 1.5233474922237268e-06, "loss": 0.5161, "step": 11876 }, { "epoch": 0.75, "grad_norm": 1.0414130687713623, "learning_rate": 1.5226101938979153e-06, "loss": 0.487, "step": 11877 }, { "epoch": 0.75, "grad_norm": 1.0197550058364868, "learning_rate": 1.5218730419928917e-06, "loss": 0.442, "step": 11878 }, { "epoch": 0.75, "grad_norm": 1.0681520700454712, "learning_rate": 1.5211360365396972e-06, "loss": 0.5254, "step": 11879 }, { "epoch": 0.75, "grad_norm": 1.124697208404541, "learning_rate": 1.5203991775693577e-06, "loss": 0.5418, "step": 11880 }, { "epoch": 0.75, "grad_norm": 1.0970063209533691, "learning_rate": 1.5196624651129084e-06, "loss": 0.5125, "step": 11881 }, { "epoch": 0.75, "grad_norm": 1.1287860870361328, "learning_rate": 1.5189258992013635e-06, "loss": 0.5159, "step": 11882 }, { "epoch": 0.75, "grad_norm": 1.0907492637634277, "learning_rate": 1.5181894798657388e-06, "loss": 0.4972, "step": 11883 }, { "epoch": 0.75, "grad_norm": 1.0564488172531128, "learning_rate": 1.517453207137043e-06, "loss": 0.49, "step": 11884 }, { "epoch": 0.75, "grad_norm": 1.044112205505371, "learning_rate": 1.5167170810462777e-06, "loss": 0.5233, "step": 11885 }, { "epoch": 0.75, "grad_norm": 1.0471843481063843, "learning_rate": 1.5159811016244392e-06, "loss": 0.5003, "step": 11886 }, { "epoch": 0.75, "grad_norm": 1.0996730327606201, "learning_rate": 1.5152452689025176e-06, "loss": 0.4893, "step": 11887 }, { "epoch": 0.75, "grad_norm": 0.9819523692131042, "learning_rate": 1.5145095829114937e-06, "loss": 0.4941, "step": 11888 }, { "epoch": 0.75, "grad_norm": 1.0815109014511108, "learning_rate": 1.5137740436823462e-06, "loss": 0.5005, "step": 11889 }, { "epoch": 0.75, "grad_norm": 1.076339840888977, "learning_rate": 1.5130386512460454e-06, "loss": 0.541, "step": 11890 }, { "epoch": 0.75, "grad_norm": 1.0075737237930298, "learning_rate": 1.5123034056335572e-06, "loss": 0.5041, "step": 11891 }, { "epoch": 0.75, "grad_norm": 1.055673360824585, "learning_rate": 1.5115683068758419e-06, "loss": 0.5106, "step": 11892 }, { "epoch": 0.75, "grad_norm": 1.0819106101989746, "learning_rate": 1.5108333550038461e-06, "loss": 0.5044, "step": 11893 }, { "epoch": 0.75, "grad_norm": 1.0794376134872437, "learning_rate": 1.510098550048521e-06, "loss": 0.5016, "step": 11894 }, { "epoch": 0.75, "grad_norm": 0.9727383852005005, "learning_rate": 1.5093638920408077e-06, "loss": 0.4704, "step": 11895 }, { "epoch": 0.75, "grad_norm": 1.0047531127929688, "learning_rate": 1.508629381011636e-06, "loss": 0.5153, "step": 11896 }, { "epoch": 0.75, "grad_norm": 1.0383678674697876, "learning_rate": 1.507895016991936e-06, "loss": 0.4804, "step": 11897 }, { "epoch": 0.75, "grad_norm": 1.071780800819397, "learning_rate": 1.507160800012628e-06, "loss": 0.5098, "step": 11898 }, { "epoch": 0.75, "grad_norm": 1.142799973487854, "learning_rate": 1.5064267301046281e-06, "loss": 0.5245, "step": 11899 }, { "epoch": 0.75, "grad_norm": 1.138761043548584, "learning_rate": 1.5056928072988475e-06, "loss": 0.5021, "step": 11900 }, { "epoch": 0.75, "grad_norm": 1.0979869365692139, "learning_rate": 1.504959031626183e-06, "loss": 0.5337, "step": 11901 }, { "epoch": 0.75, "grad_norm": 1.1427834033966064, "learning_rate": 1.5042254031175373e-06, "loss": 0.5304, "step": 11902 }, { "epoch": 0.75, "grad_norm": 1.061793565750122, "learning_rate": 1.5034919218038007e-06, "loss": 0.4581, "step": 11903 }, { "epoch": 0.75, "grad_norm": 1.0412780046463013, "learning_rate": 1.502758587715854e-06, "loss": 0.4771, "step": 11904 }, { "epoch": 0.75, "grad_norm": 1.126717448234558, "learning_rate": 1.5020254008845775e-06, "loss": 0.5269, "step": 11905 }, { "epoch": 0.75, "grad_norm": 1.048540711402893, "learning_rate": 1.501292361340842e-06, "loss": 0.523, "step": 11906 }, { "epoch": 0.75, "grad_norm": 0.9710584878921509, "learning_rate": 1.500559469115515e-06, "loss": 0.5073, "step": 11907 }, { "epoch": 0.75, "grad_norm": 1.0929603576660156, "learning_rate": 1.499826724239456e-06, "loss": 0.4943, "step": 11908 }, { "epoch": 0.75, "grad_norm": 1.036075234413147, "learning_rate": 1.499094126743516e-06, "loss": 0.5468, "step": 11909 }, { "epoch": 0.75, "grad_norm": 0.9933010339736938, "learning_rate": 1.4983616766585423e-06, "loss": 0.463, "step": 11910 }, { "epoch": 0.75, "grad_norm": 1.1443263292312622, "learning_rate": 1.4976293740153803e-06, "loss": 0.5859, "step": 11911 }, { "epoch": 0.75, "grad_norm": 1.0146114826202393, "learning_rate": 1.4968972188448593e-06, "loss": 0.4801, "step": 11912 }, { "epoch": 0.75, "grad_norm": 1.0006181001663208, "learning_rate": 1.4961652111778103e-06, "loss": 0.4741, "step": 11913 }, { "epoch": 0.75, "grad_norm": 1.0206917524337769, "learning_rate": 1.4954333510450552e-06, "loss": 0.5021, "step": 11914 }, { "epoch": 0.75, "grad_norm": 0.9869930148124695, "learning_rate": 1.4947016384774105e-06, "loss": 0.4571, "step": 11915 }, { "epoch": 0.75, "grad_norm": 1.0013905763626099, "learning_rate": 1.4939700735056873e-06, "loss": 0.4808, "step": 11916 }, { "epoch": 0.75, "grad_norm": 1.0572274923324585, "learning_rate": 1.493238656160686e-06, "loss": 0.5408, "step": 11917 }, { "epoch": 0.76, "grad_norm": 0.9899517893791199, "learning_rate": 1.492507386473206e-06, "loss": 0.4837, "step": 11918 }, { "epoch": 0.76, "grad_norm": 1.0914475917816162, "learning_rate": 1.4917762644740381e-06, "loss": 0.5066, "step": 11919 }, { "epoch": 0.76, "grad_norm": 1.005753755569458, "learning_rate": 1.4910452901939671e-06, "loss": 0.4579, "step": 11920 }, { "epoch": 0.76, "grad_norm": 1.0300661325454712, "learning_rate": 1.4903144636637723e-06, "loss": 0.4903, "step": 11921 }, { "epoch": 0.76, "grad_norm": 1.06362783908844, "learning_rate": 1.489583784914228e-06, "loss": 0.4918, "step": 11922 }, { "epoch": 0.76, "grad_norm": 1.1783336400985718, "learning_rate": 1.4888532539760958e-06, "loss": 0.557, "step": 11923 }, { "epoch": 0.76, "grad_norm": 0.9961380362510681, "learning_rate": 1.4881228708801409e-06, "loss": 0.4597, "step": 11924 }, { "epoch": 0.76, "grad_norm": 1.076972246170044, "learning_rate": 1.4873926356571144e-06, "loss": 0.5305, "step": 11925 }, { "epoch": 0.76, "grad_norm": 1.0707809925079346, "learning_rate": 1.486662548337764e-06, "loss": 0.5411, "step": 11926 }, { "epoch": 0.76, "grad_norm": 1.1027288436889648, "learning_rate": 1.485932608952832e-06, "loss": 0.4703, "step": 11927 }, { "epoch": 0.76, "grad_norm": 1.047572374343872, "learning_rate": 1.485202817533053e-06, "loss": 0.4692, "step": 11928 }, { "epoch": 0.76, "grad_norm": 1.0753308534622192, "learning_rate": 1.4844731741091561e-06, "loss": 0.5282, "step": 11929 }, { "epoch": 0.76, "grad_norm": 1.0842903852462769, "learning_rate": 1.4837436787118665e-06, "loss": 0.5028, "step": 11930 }, { "epoch": 0.76, "grad_norm": 1.1072250604629517, "learning_rate": 1.4830143313718943e-06, "loss": 0.5216, "step": 11931 }, { "epoch": 0.76, "grad_norm": 1.0920616388320923, "learning_rate": 1.482285132119956e-06, "loss": 0.5112, "step": 11932 }, { "epoch": 0.76, "grad_norm": 0.9837995767593384, "learning_rate": 1.4815560809867551e-06, "loss": 0.5189, "step": 11933 }, { "epoch": 0.76, "grad_norm": 1.1012341976165771, "learning_rate": 1.4808271780029864e-06, "loss": 0.5073, "step": 11934 }, { "epoch": 0.76, "grad_norm": 1.0157415866851807, "learning_rate": 1.4800984231993432e-06, "loss": 0.4533, "step": 11935 }, { "epoch": 0.76, "grad_norm": 1.1307048797607422, "learning_rate": 1.47936981660651e-06, "loss": 0.5154, "step": 11936 }, { "epoch": 0.76, "grad_norm": 1.076559066772461, "learning_rate": 1.4786413582551668e-06, "loss": 0.5612, "step": 11937 }, { "epoch": 0.76, "grad_norm": 1.066542148590088, "learning_rate": 1.4779130481759874e-06, "loss": 0.5214, "step": 11938 }, { "epoch": 0.76, "grad_norm": 1.0553350448608398, "learning_rate": 1.4771848863996353e-06, "loss": 0.5669, "step": 11939 }, { "epoch": 0.76, "grad_norm": 1.042103886604309, "learning_rate": 1.4764568729567714e-06, "loss": 0.4762, "step": 11940 }, { "epoch": 0.76, "grad_norm": 1.0295274257659912, "learning_rate": 1.4757290078780545e-06, "loss": 0.4596, "step": 11941 }, { "epoch": 0.76, "grad_norm": 1.1455708742141724, "learning_rate": 1.475001291194127e-06, "loss": 0.5457, "step": 11942 }, { "epoch": 0.76, "grad_norm": 1.07152259349823, "learning_rate": 1.4742737229356324e-06, "loss": 0.5586, "step": 11943 }, { "epoch": 0.76, "grad_norm": 1.0826992988586426, "learning_rate": 1.473546303133207e-06, "loss": 0.5437, "step": 11944 }, { "epoch": 0.76, "grad_norm": 1.0262315273284912, "learning_rate": 1.4728190318174785e-06, "loss": 0.5024, "step": 11945 }, { "epoch": 0.76, "grad_norm": 1.0386667251586914, "learning_rate": 1.4720919090190723e-06, "loss": 0.4647, "step": 11946 }, { "epoch": 0.76, "grad_norm": 1.2081540822982788, "learning_rate": 1.471364934768601e-06, "loss": 0.5186, "step": 11947 }, { "epoch": 0.76, "grad_norm": 1.0901449918746948, "learning_rate": 1.470638109096676e-06, "loss": 0.4978, "step": 11948 }, { "epoch": 0.76, "grad_norm": 1.0483721494674683, "learning_rate": 1.469911432033906e-06, "loss": 0.4476, "step": 11949 }, { "epoch": 0.76, "grad_norm": 1.0376167297363281, "learning_rate": 1.469184903610883e-06, "loss": 0.4897, "step": 11950 }, { "epoch": 0.76, "grad_norm": 1.1253174543380737, "learning_rate": 1.468458523858201e-06, "loss": 0.4876, "step": 11951 }, { "epoch": 0.76, "grad_norm": 1.062595009803772, "learning_rate": 1.467732292806447e-06, "loss": 0.5413, "step": 11952 }, { "epoch": 0.76, "grad_norm": 1.0641722679138184, "learning_rate": 1.4670062104861948e-06, "loss": 0.4689, "step": 11953 }, { "epoch": 0.76, "grad_norm": 1.0055220127105713, "learning_rate": 1.4662802769280244e-06, "loss": 0.5123, "step": 11954 }, { "epoch": 0.76, "grad_norm": 1.0673023462295532, "learning_rate": 1.4655544921624964e-06, "loss": 0.5036, "step": 11955 }, { "epoch": 0.76, "grad_norm": 1.0751420259475708, "learning_rate": 1.464828856220174e-06, "loss": 0.4862, "step": 11956 }, { "epoch": 0.76, "grad_norm": 1.0711745023727417, "learning_rate": 1.4641033691316104e-06, "loss": 0.5402, "step": 11957 }, { "epoch": 0.76, "grad_norm": 0.9954444766044617, "learning_rate": 1.4633780309273532e-06, "loss": 0.4982, "step": 11958 }, { "epoch": 0.76, "grad_norm": 1.03201162815094, "learning_rate": 1.4626528416379438e-06, "loss": 0.526, "step": 11959 }, { "epoch": 0.76, "grad_norm": 1.0769315958023071, "learning_rate": 1.4619278012939197e-06, "loss": 0.5002, "step": 11960 }, { "epoch": 0.76, "grad_norm": 1.043395757675171, "learning_rate": 1.4612029099258046e-06, "loss": 0.4672, "step": 11961 }, { "epoch": 0.76, "grad_norm": 1.1494218111038208, "learning_rate": 1.4604781675641273e-06, "loss": 0.4877, "step": 11962 }, { "epoch": 0.76, "grad_norm": 1.0968637466430664, "learning_rate": 1.4597535742393998e-06, "loss": 0.4801, "step": 11963 }, { "epoch": 0.76, "grad_norm": 1.0693919658660889, "learning_rate": 1.459029129982134e-06, "loss": 0.516, "step": 11964 }, { "epoch": 0.76, "grad_norm": 1.1250993013381958, "learning_rate": 1.4583048348228345e-06, "loss": 0.5083, "step": 11965 }, { "epoch": 0.76, "grad_norm": 1.0110074281692505, "learning_rate": 1.4575806887919951e-06, "loss": 0.5104, "step": 11966 }, { "epoch": 0.76, "grad_norm": 0.988934338092804, "learning_rate": 1.456856691920111e-06, "loss": 0.5069, "step": 11967 }, { "epoch": 0.76, "grad_norm": 1.084764003753662, "learning_rate": 1.4561328442376678e-06, "loss": 0.5018, "step": 11968 }, { "epoch": 0.76, "grad_norm": 0.9927456378936768, "learning_rate": 1.45540914577514e-06, "loss": 0.4934, "step": 11969 }, { "epoch": 0.76, "grad_norm": 1.011318564414978, "learning_rate": 1.454685596563003e-06, "loss": 0.4814, "step": 11970 }, { "epoch": 0.76, "grad_norm": 1.0381759405136108, "learning_rate": 1.4539621966317219e-06, "loss": 0.4941, "step": 11971 }, { "epoch": 0.76, "grad_norm": 1.016323208808899, "learning_rate": 1.4532389460117574e-06, "loss": 0.4807, "step": 11972 }, { "epoch": 0.76, "grad_norm": 1.1208360195159912, "learning_rate": 1.4525158447335635e-06, "loss": 0.4807, "step": 11973 }, { "epoch": 0.76, "grad_norm": 0.9977958798408508, "learning_rate": 1.4517928928275843e-06, "loss": 0.5039, "step": 11974 }, { "epoch": 0.76, "grad_norm": 1.0501761436462402, "learning_rate": 1.4510700903242642e-06, "loss": 0.4634, "step": 11975 }, { "epoch": 0.76, "grad_norm": 1.0842777490615845, "learning_rate": 1.4503474372540382e-06, "loss": 0.5476, "step": 11976 }, { "epoch": 0.76, "grad_norm": 1.0287162065505981, "learning_rate": 1.4496249336473318e-06, "loss": 0.4973, "step": 11977 }, { "epoch": 0.76, "grad_norm": 1.0873017311096191, "learning_rate": 1.4489025795345686e-06, "loss": 0.5014, "step": 11978 }, { "epoch": 0.76, "grad_norm": 1.0273690223693848, "learning_rate": 1.4481803749461643e-06, "loss": 0.5119, "step": 11979 }, { "epoch": 0.76, "grad_norm": 1.057515025138855, "learning_rate": 1.4474583199125285e-06, "loss": 0.5005, "step": 11980 }, { "epoch": 0.76, "grad_norm": 0.9910272359848022, "learning_rate": 1.446736414464066e-06, "loss": 0.4732, "step": 11981 }, { "epoch": 0.76, "grad_norm": 1.0097696781158447, "learning_rate": 1.4460146586311713e-06, "loss": 0.5092, "step": 11982 }, { "epoch": 0.76, "grad_norm": 1.0665124654769897, "learning_rate": 1.4452930524442338e-06, "loss": 0.5283, "step": 11983 }, { "epoch": 0.76, "grad_norm": 1.074441909790039, "learning_rate": 1.4445715959336432e-06, "loss": 0.4865, "step": 11984 }, { "epoch": 0.76, "grad_norm": 1.0354875326156616, "learning_rate": 1.4438502891297723e-06, "loss": 0.4793, "step": 11985 }, { "epoch": 0.76, "grad_norm": 1.0373681783676147, "learning_rate": 1.4431291320629953e-06, "loss": 0.4863, "step": 11986 }, { "epoch": 0.76, "grad_norm": 1.163691520690918, "learning_rate": 1.4424081247636768e-06, "loss": 0.4999, "step": 11987 }, { "epoch": 0.76, "grad_norm": 1.0569572448730469, "learning_rate": 1.4416872672621762e-06, "loss": 0.5374, "step": 11988 }, { "epoch": 0.76, "grad_norm": 1.0788553953170776, "learning_rate": 1.440966559588846e-06, "loss": 0.5012, "step": 11989 }, { "epoch": 0.76, "grad_norm": 1.0290710926055908, "learning_rate": 1.4402460017740355e-06, "loss": 0.5061, "step": 11990 }, { "epoch": 0.76, "grad_norm": 1.0616074800491333, "learning_rate": 1.4395255938480785e-06, "loss": 0.4649, "step": 11991 }, { "epoch": 0.76, "grad_norm": 1.0956692695617676, "learning_rate": 1.4388053358413162e-06, "loss": 0.5222, "step": 11992 }, { "epoch": 0.76, "grad_norm": 0.9791175723075867, "learning_rate": 1.4380852277840712e-06, "loss": 0.4925, "step": 11993 }, { "epoch": 0.76, "grad_norm": 1.0590614080429077, "learning_rate": 1.437365269706666e-06, "loss": 0.5184, "step": 11994 }, { "epoch": 0.76, "grad_norm": 1.033125877380371, "learning_rate": 1.436645461639416e-06, "loss": 0.4932, "step": 11995 }, { "epoch": 0.76, "grad_norm": 1.1277824640274048, "learning_rate": 1.4359258036126295e-06, "loss": 0.5071, "step": 11996 }, { "epoch": 0.76, "grad_norm": 1.0017791986465454, "learning_rate": 1.4352062956566088e-06, "loss": 0.4937, "step": 11997 }, { "epoch": 0.76, "grad_norm": 1.0337672233581543, "learning_rate": 1.4344869378016518e-06, "loss": 0.4753, "step": 11998 }, { "epoch": 0.76, "grad_norm": 1.0503156185150146, "learning_rate": 1.4337677300780445e-06, "loss": 0.529, "step": 11999 }, { "epoch": 0.76, "grad_norm": 1.000065565109253, "learning_rate": 1.433048672516072e-06, "loss": 0.5215, "step": 12000 }, { "epoch": 0.76, "grad_norm": 1.0630111694335938, "learning_rate": 1.4323297651460117e-06, "loss": 0.5189, "step": 12001 }, { "epoch": 0.76, "grad_norm": 1.1379293203353882, "learning_rate": 1.4316110079981339e-06, "loss": 0.5171, "step": 12002 }, { "epoch": 0.76, "grad_norm": 1.014651894569397, "learning_rate": 1.4308924011027042e-06, "loss": 0.5122, "step": 12003 }, { "epoch": 0.76, "grad_norm": 1.0537596940994263, "learning_rate": 1.430173944489977e-06, "loss": 0.4997, "step": 12004 }, { "epoch": 0.76, "grad_norm": 0.9623979330062866, "learning_rate": 1.4294556381902074e-06, "loss": 0.4396, "step": 12005 }, { "epoch": 0.76, "grad_norm": 1.0301682949066162, "learning_rate": 1.428737482233642e-06, "loss": 0.4786, "step": 12006 }, { "epoch": 0.76, "grad_norm": 1.0479462146759033, "learning_rate": 1.4280194766505156e-06, "loss": 0.507, "step": 12007 }, { "epoch": 0.76, "grad_norm": 1.057776927947998, "learning_rate": 1.427301621471064e-06, "loss": 0.5253, "step": 12008 }, { "epoch": 0.76, "grad_norm": 1.0924488306045532, "learning_rate": 1.4265839167255114e-06, "loss": 0.5541, "step": 12009 }, { "epoch": 0.76, "grad_norm": 0.9652511477470398, "learning_rate": 1.42586636244408e-06, "loss": 0.4903, "step": 12010 }, { "epoch": 0.76, "grad_norm": 1.0759705305099487, "learning_rate": 1.4251489586569834e-06, "loss": 0.5713, "step": 12011 }, { "epoch": 0.76, "grad_norm": 1.0573580265045166, "learning_rate": 1.4244317053944268e-06, "loss": 0.5024, "step": 12012 }, { "epoch": 0.76, "grad_norm": 1.0048032999038696, "learning_rate": 1.423714602686611e-06, "loss": 0.4958, "step": 12013 }, { "epoch": 0.76, "grad_norm": 1.005070447921753, "learning_rate": 1.4229976505637361e-06, "loss": 0.4997, "step": 12014 }, { "epoch": 0.76, "grad_norm": 1.086395263671875, "learning_rate": 1.4222808490559842e-06, "loss": 0.5486, "step": 12015 }, { "epoch": 0.76, "grad_norm": 0.9975227117538452, "learning_rate": 1.4215641981935403e-06, "loss": 0.4314, "step": 12016 }, { "epoch": 0.76, "grad_norm": 1.1591862440109253, "learning_rate": 1.4208476980065794e-06, "loss": 0.5137, "step": 12017 }, { "epoch": 0.76, "grad_norm": 1.0774359703063965, "learning_rate": 1.420131348525271e-06, "loss": 0.5082, "step": 12018 }, { "epoch": 0.76, "grad_norm": 1.0310287475585938, "learning_rate": 1.4194151497797793e-06, "loss": 0.5299, "step": 12019 }, { "epoch": 0.76, "grad_norm": 1.06497323513031, "learning_rate": 1.4186991018002582e-06, "loss": 0.4938, "step": 12020 }, { "epoch": 0.76, "grad_norm": 1.0487126111984253, "learning_rate": 1.4179832046168584e-06, "loss": 0.534, "step": 12021 }, { "epoch": 0.76, "grad_norm": 1.084673523902893, "learning_rate": 1.417267458259728e-06, "loss": 0.509, "step": 12022 }, { "epoch": 0.76, "grad_norm": 1.0191333293914795, "learning_rate": 1.4165518627589991e-06, "loss": 0.4419, "step": 12023 }, { "epoch": 0.76, "grad_norm": 1.062406063079834, "learning_rate": 1.4158364181448065e-06, "loss": 0.5042, "step": 12024 }, { "epoch": 0.76, "grad_norm": 1.1256499290466309, "learning_rate": 1.4151211244472734e-06, "loss": 0.5556, "step": 12025 }, { "epoch": 0.76, "grad_norm": 1.2402355670928955, "learning_rate": 1.414405981696519e-06, "loss": 0.5159, "step": 12026 }, { "epoch": 0.76, "grad_norm": 1.0793510675430298, "learning_rate": 1.4136909899226564e-06, "loss": 0.521, "step": 12027 }, { "epoch": 0.76, "grad_norm": 1.0173790454864502, "learning_rate": 1.412976149155789e-06, "loss": 0.4589, "step": 12028 }, { "epoch": 0.76, "grad_norm": 1.1383095979690552, "learning_rate": 1.412261459426018e-06, "loss": 0.4998, "step": 12029 }, { "epoch": 0.76, "grad_norm": 1.0682752132415771, "learning_rate": 1.4115469207634358e-06, "loss": 0.542, "step": 12030 }, { "epoch": 0.76, "grad_norm": 1.086917519569397, "learning_rate": 1.4108325331981298e-06, "loss": 0.5032, "step": 12031 }, { "epoch": 0.76, "grad_norm": 0.9840065240859985, "learning_rate": 1.4101182967601796e-06, "loss": 0.535, "step": 12032 }, { "epoch": 0.76, "grad_norm": 1.03810715675354, "learning_rate": 1.4094042114796613e-06, "loss": 0.4871, "step": 12033 }, { "epoch": 0.76, "grad_norm": 1.1853363513946533, "learning_rate": 1.4086902773866379e-06, "loss": 0.5153, "step": 12034 }, { "epoch": 0.76, "grad_norm": 1.0018763542175293, "learning_rate": 1.4079764945111767e-06, "loss": 0.5123, "step": 12035 }, { "epoch": 0.76, "grad_norm": 1.0280405282974243, "learning_rate": 1.407262862883328e-06, "loss": 0.4687, "step": 12036 }, { "epoch": 0.76, "grad_norm": 1.1003395318984985, "learning_rate": 1.4065493825331416e-06, "loss": 0.4533, "step": 12037 }, { "epoch": 0.76, "grad_norm": 1.0540413856506348, "learning_rate": 1.4058360534906607e-06, "loss": 0.5389, "step": 12038 }, { "epoch": 0.76, "grad_norm": 1.1108030080795288, "learning_rate": 1.4051228757859197e-06, "loss": 0.5359, "step": 12039 }, { "epoch": 0.76, "grad_norm": 1.0300624370574951, "learning_rate": 1.4044098494489494e-06, "loss": 0.4832, "step": 12040 }, { "epoch": 0.76, "grad_norm": 1.0136255025863647, "learning_rate": 1.4036969745097735e-06, "loss": 0.4871, "step": 12041 }, { "epoch": 0.76, "grad_norm": 1.0157047510147095, "learning_rate": 1.4029842509984043e-06, "loss": 0.4868, "step": 12042 }, { "epoch": 0.76, "grad_norm": 1.0380834341049194, "learning_rate": 1.4022716789448581e-06, "loss": 0.4896, "step": 12043 }, { "epoch": 0.76, "grad_norm": 1.0514111518859863, "learning_rate": 1.4015592583791343e-06, "loss": 0.4819, "step": 12044 }, { "epoch": 0.76, "grad_norm": 1.0694193840026855, "learning_rate": 1.4008469893312321e-06, "loss": 0.5212, "step": 12045 }, { "epoch": 0.76, "grad_norm": 1.0697076320648193, "learning_rate": 1.4001348718311446e-06, "loss": 0.4913, "step": 12046 }, { "epoch": 0.76, "grad_norm": 0.9682444930076599, "learning_rate": 1.399422905908851e-06, "loss": 0.4974, "step": 12047 }, { "epoch": 0.76, "grad_norm": 1.0870487689971924, "learning_rate": 1.3987110915943352e-06, "loss": 0.5247, "step": 12048 }, { "epoch": 0.76, "grad_norm": 1.036061406135559, "learning_rate": 1.397999428917569e-06, "loss": 0.5026, "step": 12049 }, { "epoch": 0.76, "grad_norm": 1.2339755296707153, "learning_rate": 1.3972879179085147e-06, "loss": 0.5166, "step": 12050 }, { "epoch": 0.76, "grad_norm": 1.1082885265350342, "learning_rate": 1.396576558597133e-06, "loss": 0.5244, "step": 12051 }, { "epoch": 0.76, "grad_norm": 1.032477855682373, "learning_rate": 1.3958653510133774e-06, "loss": 0.4679, "step": 12052 }, { "epoch": 0.76, "grad_norm": 1.1053731441497803, "learning_rate": 1.3951542951871938e-06, "loss": 0.5049, "step": 12053 }, { "epoch": 0.76, "grad_norm": 1.0226027965545654, "learning_rate": 1.3944433911485229e-06, "loss": 0.4656, "step": 12054 }, { "epoch": 0.76, "grad_norm": 1.096422791481018, "learning_rate": 1.3937326389272977e-06, "loss": 0.5359, "step": 12055 }, { "epoch": 0.76, "grad_norm": 1.0210285186767578, "learning_rate": 1.3930220385534453e-06, "loss": 0.5203, "step": 12056 }, { "epoch": 0.76, "grad_norm": 1.0053527355194092, "learning_rate": 1.3923115900568896e-06, "loss": 0.5356, "step": 12057 }, { "epoch": 0.76, "grad_norm": 1.033771276473999, "learning_rate": 1.3916012934675405e-06, "loss": 0.4959, "step": 12058 }, { "epoch": 0.76, "grad_norm": 1.0186415910720825, "learning_rate": 1.3908911488153081e-06, "loss": 0.5164, "step": 12059 }, { "epoch": 0.76, "grad_norm": 0.9885281324386597, "learning_rate": 1.3901811561300944e-06, "loss": 0.5108, "step": 12060 }, { "epoch": 0.76, "grad_norm": 1.0362237691879272, "learning_rate": 1.3894713154417944e-06, "loss": 0.4997, "step": 12061 }, { "epoch": 0.76, "grad_norm": 1.0876007080078125, "learning_rate": 1.3887616267802972e-06, "loss": 0.5109, "step": 12062 }, { "epoch": 0.76, "grad_norm": 1.0648605823516846, "learning_rate": 1.3880520901754874e-06, "loss": 0.511, "step": 12063 }, { "epoch": 0.76, "grad_norm": 1.1079448461532593, "learning_rate": 1.3873427056572354e-06, "loss": 0.5325, "step": 12064 }, { "epoch": 0.76, "grad_norm": 1.069300651550293, "learning_rate": 1.386633473255418e-06, "loss": 0.5054, "step": 12065 }, { "epoch": 0.76, "grad_norm": 1.086875557899475, "learning_rate": 1.3859243929998933e-06, "loss": 0.5346, "step": 12066 }, { "epoch": 0.76, "grad_norm": 1.056308388710022, "learning_rate": 1.3852154649205201e-06, "loss": 0.4556, "step": 12067 }, { "epoch": 0.76, "grad_norm": 1.1065456867218018, "learning_rate": 1.3845066890471487e-06, "loss": 0.5237, "step": 12068 }, { "epoch": 0.76, "grad_norm": 1.0740257501602173, "learning_rate": 1.3837980654096229e-06, "loss": 0.5322, "step": 12069 }, { "epoch": 0.76, "grad_norm": 1.046135425567627, "learning_rate": 1.383089594037781e-06, "loss": 0.4691, "step": 12070 }, { "epoch": 0.76, "grad_norm": 1.0684499740600586, "learning_rate": 1.3823812749614556e-06, "loss": 0.4921, "step": 12071 }, { "epoch": 0.76, "grad_norm": 1.0555635690689087, "learning_rate": 1.3816731082104668e-06, "loss": 0.5079, "step": 12072 }, { "epoch": 0.76, "grad_norm": 1.1421395540237427, "learning_rate": 1.3809650938146391e-06, "loss": 0.5579, "step": 12073 }, { "epoch": 0.76, "grad_norm": 1.0496187210083008, "learning_rate": 1.3802572318037804e-06, "loss": 0.479, "step": 12074 }, { "epoch": 0.77, "grad_norm": 1.0246468782424927, "learning_rate": 1.379549522207697e-06, "loss": 0.507, "step": 12075 }, { "epoch": 0.77, "grad_norm": 1.1108874082565308, "learning_rate": 1.3788419650561908e-06, "loss": 0.5469, "step": 12076 }, { "epoch": 0.77, "grad_norm": 1.041033387184143, "learning_rate": 1.3781345603790485e-06, "loss": 0.4406, "step": 12077 }, { "epoch": 0.77, "grad_norm": 1.0171122550964355, "learning_rate": 1.3774273082060625e-06, "loss": 0.4816, "step": 12078 }, { "epoch": 0.77, "grad_norm": 1.0950605869293213, "learning_rate": 1.3767202085670118e-06, "loss": 0.5512, "step": 12079 }, { "epoch": 0.77, "grad_norm": 1.026475429534912, "learning_rate": 1.3760132614916672e-06, "loss": 0.5266, "step": 12080 }, { "epoch": 0.77, "grad_norm": 1.1308759450912476, "learning_rate": 1.375306467009797e-06, "loss": 0.5612, "step": 12081 }, { "epoch": 0.77, "grad_norm": 1.1372259855270386, "learning_rate": 1.3745998251511622e-06, "loss": 0.5104, "step": 12082 }, { "epoch": 0.77, "grad_norm": 1.1877919435501099, "learning_rate": 1.373893335945517e-06, "loss": 0.4836, "step": 12083 }, { "epoch": 0.77, "grad_norm": 1.1031029224395752, "learning_rate": 1.373186999422611e-06, "loss": 0.5006, "step": 12084 }, { "epoch": 0.77, "grad_norm": 1.0803934335708618, "learning_rate": 1.3724808156121799e-06, "loss": 0.4791, "step": 12085 }, { "epoch": 0.77, "grad_norm": 0.9792132377624512, "learning_rate": 1.3717747845439645e-06, "loss": 0.4612, "step": 12086 }, { "epoch": 0.77, "grad_norm": 0.9646353125572205, "learning_rate": 1.371068906247693e-06, "loss": 0.4692, "step": 12087 }, { "epoch": 0.77, "grad_norm": 1.1279658079147339, "learning_rate": 1.3703631807530831e-06, "loss": 0.543, "step": 12088 }, { "epoch": 0.77, "grad_norm": 1.0468178987503052, "learning_rate": 1.3696576080898538e-06, "loss": 0.5678, "step": 12089 }, { "epoch": 0.77, "grad_norm": 1.095420241355896, "learning_rate": 1.3689521882877137e-06, "loss": 0.489, "step": 12090 }, { "epoch": 0.77, "grad_norm": 1.02595055103302, "learning_rate": 1.3682469213763655e-06, "loss": 0.5023, "step": 12091 }, { "epoch": 0.77, "grad_norm": 1.052215337753296, "learning_rate": 1.367541807385507e-06, "loss": 0.4901, "step": 12092 }, { "epoch": 0.77, "grad_norm": 1.1547960042953491, "learning_rate": 1.3668368463448246e-06, "loss": 0.4822, "step": 12093 }, { "epoch": 0.77, "grad_norm": 1.0154019594192505, "learning_rate": 1.3661320382840026e-06, "loss": 0.4237, "step": 12094 }, { "epoch": 0.77, "grad_norm": 1.1761852502822876, "learning_rate": 1.3654273832327219e-06, "loss": 0.5079, "step": 12095 }, { "epoch": 0.77, "grad_norm": 1.1030158996582031, "learning_rate": 1.3647228812206493e-06, "loss": 0.5249, "step": 12096 }, { "epoch": 0.77, "grad_norm": 1.056185007095337, "learning_rate": 1.3640185322774495e-06, "loss": 0.5108, "step": 12097 }, { "epoch": 0.77, "grad_norm": 1.0081676244735718, "learning_rate": 1.3633143364327812e-06, "loss": 0.461, "step": 12098 }, { "epoch": 0.77, "grad_norm": 0.9790708422660828, "learning_rate": 1.3626102937162943e-06, "loss": 0.4379, "step": 12099 }, { "epoch": 0.77, "grad_norm": 1.081037163734436, "learning_rate": 1.3619064041576368e-06, "loss": 0.4806, "step": 12100 }, { "epoch": 0.77, "grad_norm": 1.055122971534729, "learning_rate": 1.3612026677864426e-06, "loss": 0.5337, "step": 12101 }, { "epoch": 0.77, "grad_norm": 1.0966159105300903, "learning_rate": 1.360499084632344e-06, "loss": 0.5351, "step": 12102 }, { "epoch": 0.77, "grad_norm": 1.0062079429626465, "learning_rate": 1.3597956547249713e-06, "loss": 0.5121, "step": 12103 }, { "epoch": 0.77, "grad_norm": 1.0459173917770386, "learning_rate": 1.3590923780939386e-06, "loss": 0.5077, "step": 12104 }, { "epoch": 0.77, "grad_norm": 1.0308589935302734, "learning_rate": 1.3583892547688598e-06, "loss": 0.501, "step": 12105 }, { "epoch": 0.77, "grad_norm": 1.011716604232788, "learning_rate": 1.357686284779343e-06, "loss": 0.4987, "step": 12106 }, { "epoch": 0.77, "grad_norm": 0.9909331202507019, "learning_rate": 1.3569834681549832e-06, "loss": 0.5138, "step": 12107 }, { "epoch": 0.77, "grad_norm": 1.0783926248550415, "learning_rate": 1.3562808049253795e-06, "loss": 0.5043, "step": 12108 }, { "epoch": 0.77, "grad_norm": 1.0912294387817383, "learning_rate": 1.3555782951201134e-06, "loss": 0.5251, "step": 12109 }, { "epoch": 0.77, "grad_norm": 1.054231882095337, "learning_rate": 1.3548759387687683e-06, "loss": 0.5128, "step": 12110 }, { "epoch": 0.77, "grad_norm": 1.0848804712295532, "learning_rate": 1.3541737359009161e-06, "loss": 0.5027, "step": 12111 }, { "epoch": 0.77, "grad_norm": 0.993877112865448, "learning_rate": 1.3534716865461256e-06, "loss": 0.4662, "step": 12112 }, { "epoch": 0.77, "grad_norm": 0.960350751876831, "learning_rate": 1.3527697907339565e-06, "loss": 0.4709, "step": 12113 }, { "epoch": 0.77, "grad_norm": 0.9977060556411743, "learning_rate": 1.3520680484939651e-06, "loss": 0.491, "step": 12114 }, { "epoch": 0.77, "grad_norm": 1.1944913864135742, "learning_rate": 1.3513664598556952e-06, "loss": 0.4732, "step": 12115 }, { "epoch": 0.77, "grad_norm": 1.0275923013687134, "learning_rate": 1.3506650248486946e-06, "loss": 0.508, "step": 12116 }, { "epoch": 0.77, "grad_norm": 1.158739447593689, "learning_rate": 1.3499637435024926e-06, "loss": 0.5093, "step": 12117 }, { "epoch": 0.77, "grad_norm": 1.1126782894134521, "learning_rate": 1.34926261584662e-06, "loss": 0.5013, "step": 12118 }, { "epoch": 0.77, "grad_norm": 0.9790872931480408, "learning_rate": 1.3485616419105985e-06, "loss": 0.5346, "step": 12119 }, { "epoch": 0.77, "grad_norm": 1.1475367546081543, "learning_rate": 1.3478608217239435e-06, "loss": 0.5139, "step": 12120 }, { "epoch": 0.77, "grad_norm": 1.059121012687683, "learning_rate": 1.347160155316165e-06, "loss": 0.5058, "step": 12121 }, { "epoch": 0.77, "grad_norm": 1.0250189304351807, "learning_rate": 1.3464596427167663e-06, "loss": 0.4953, "step": 12122 }, { "epoch": 0.77, "grad_norm": 1.021460771560669, "learning_rate": 1.3457592839552409e-06, "loss": 0.4608, "step": 12123 }, { "epoch": 0.77, "grad_norm": 1.0707124471664429, "learning_rate": 1.3450590790610795e-06, "loss": 0.5109, "step": 12124 }, { "epoch": 0.77, "grad_norm": 1.124280333518982, "learning_rate": 1.3443590280637664e-06, "loss": 0.484, "step": 12125 }, { "epoch": 0.77, "grad_norm": 1.1179440021514893, "learning_rate": 1.3436591309927772e-06, "loss": 0.5157, "step": 12126 }, { "epoch": 0.77, "grad_norm": 1.0698933601379395, "learning_rate": 1.3429593878775825e-06, "loss": 0.4855, "step": 12127 }, { "epoch": 0.77, "grad_norm": 1.113325834274292, "learning_rate": 1.342259798747646e-06, "loss": 0.4858, "step": 12128 }, { "epoch": 0.77, "grad_norm": 1.0449678897857666, "learning_rate": 1.3415603636324248e-06, "loss": 0.5135, "step": 12129 }, { "epoch": 0.77, "grad_norm": 1.0090608596801758, "learning_rate": 1.3408610825613722e-06, "loss": 0.4645, "step": 12130 }, { "epoch": 0.77, "grad_norm": 1.122188925743103, "learning_rate": 1.340161955563928e-06, "loss": 0.4604, "step": 12131 }, { "epoch": 0.77, "grad_norm": 1.0805073976516724, "learning_rate": 1.339462982669531e-06, "loss": 0.4815, "step": 12132 }, { "epoch": 0.77, "grad_norm": 0.982419490814209, "learning_rate": 1.3387641639076165e-06, "loss": 0.4846, "step": 12133 }, { "epoch": 0.77, "grad_norm": 1.0559958219528198, "learning_rate": 1.3380654993076054e-06, "loss": 0.4709, "step": 12134 }, { "epoch": 0.77, "grad_norm": 0.9687513709068298, "learning_rate": 1.3373669888989167e-06, "loss": 0.4518, "step": 12135 }, { "epoch": 0.77, "grad_norm": 0.9971938133239746, "learning_rate": 1.3366686327109645e-06, "loss": 0.5197, "step": 12136 }, { "epoch": 0.77, "grad_norm": 1.090461254119873, "learning_rate": 1.3359704307731491e-06, "loss": 0.4876, "step": 12137 }, { "epoch": 0.77, "grad_norm": 1.0781184434890747, "learning_rate": 1.3352723831148761e-06, "loss": 0.4923, "step": 12138 }, { "epoch": 0.77, "grad_norm": 1.1592669486999512, "learning_rate": 1.3345744897655327e-06, "loss": 0.5068, "step": 12139 }, { "epoch": 0.77, "grad_norm": 1.0591822862625122, "learning_rate": 1.3338767507545064e-06, "loss": 0.4973, "step": 12140 }, { "epoch": 0.77, "grad_norm": 1.0624432563781738, "learning_rate": 1.3331791661111765e-06, "loss": 0.489, "step": 12141 }, { "epoch": 0.77, "grad_norm": 1.091153621673584, "learning_rate": 1.3324817358649162e-06, "loss": 0.5274, "step": 12142 }, { "epoch": 0.77, "grad_norm": 1.0193672180175781, "learning_rate": 1.3317844600450912e-06, "loss": 0.4756, "step": 12143 }, { "epoch": 0.77, "grad_norm": 1.069133996963501, "learning_rate": 1.3310873386810641e-06, "loss": 0.5254, "step": 12144 }, { "epoch": 0.77, "grad_norm": 1.0465642213821411, "learning_rate": 1.330390371802182e-06, "loss": 0.501, "step": 12145 }, { "epoch": 0.77, "grad_norm": 1.132053017616272, "learning_rate": 1.3296935594377996e-06, "loss": 0.5368, "step": 12146 }, { "epoch": 0.77, "grad_norm": 1.0702149868011475, "learning_rate": 1.3289969016172515e-06, "loss": 0.5338, "step": 12147 }, { "epoch": 0.77, "grad_norm": 1.0705736875534058, "learning_rate": 1.3283003983698733e-06, "loss": 0.4942, "step": 12148 }, { "epoch": 0.77, "grad_norm": 1.1318503618240356, "learning_rate": 1.3276040497249926e-06, "loss": 0.4867, "step": 12149 }, { "epoch": 0.77, "grad_norm": 1.042022705078125, "learning_rate": 1.3269078557119297e-06, "loss": 0.483, "step": 12150 }, { "epoch": 0.77, "grad_norm": 1.1017260551452637, "learning_rate": 1.3262118163599992e-06, "loss": 0.4648, "step": 12151 }, { "epoch": 0.77, "grad_norm": 1.0930343866348267, "learning_rate": 1.3255159316985105e-06, "loss": 0.4973, "step": 12152 }, { "epoch": 0.77, "grad_norm": 1.0908854007720947, "learning_rate": 1.3248202017567624e-06, "loss": 0.527, "step": 12153 }, { "epoch": 0.77, "grad_norm": 1.092955470085144, "learning_rate": 1.32412462656405e-06, "loss": 0.5338, "step": 12154 }, { "epoch": 0.77, "grad_norm": 1.0554345846176147, "learning_rate": 1.3234292061496622e-06, "loss": 0.5016, "step": 12155 }, { "epoch": 0.77, "grad_norm": 1.0174444913864136, "learning_rate": 1.3227339405428807e-06, "loss": 0.5004, "step": 12156 }, { "epoch": 0.77, "grad_norm": 1.0531197786331177, "learning_rate": 1.3220388297729825e-06, "loss": 0.5208, "step": 12157 }, { "epoch": 0.77, "grad_norm": 0.966866135597229, "learning_rate": 1.3213438738692313e-06, "loss": 0.4504, "step": 12158 }, { "epoch": 0.77, "grad_norm": 1.0735490322113037, "learning_rate": 1.320649072860894e-06, "loss": 0.5237, "step": 12159 }, { "epoch": 0.77, "grad_norm": 1.0257203578948975, "learning_rate": 1.3199544267772257e-06, "loss": 0.4605, "step": 12160 }, { "epoch": 0.77, "grad_norm": 1.0558432340621948, "learning_rate": 1.3192599356474733e-06, "loss": 0.5669, "step": 12161 }, { "epoch": 0.77, "grad_norm": 1.050284504890442, "learning_rate": 1.318565599500881e-06, "loss": 0.545, "step": 12162 }, { "epoch": 0.77, "grad_norm": 1.0238032341003418, "learning_rate": 1.3178714183666846e-06, "loss": 0.5122, "step": 12163 }, { "epoch": 0.77, "grad_norm": 1.0815541744232178, "learning_rate": 1.3171773922741132e-06, "loss": 0.4996, "step": 12164 }, { "epoch": 0.77, "grad_norm": 1.057782530784607, "learning_rate": 1.316483521252392e-06, "loss": 0.5329, "step": 12165 }, { "epoch": 0.77, "grad_norm": 0.9373903870582581, "learning_rate": 1.3157898053307322e-06, "loss": 0.4324, "step": 12166 }, { "epoch": 0.77, "grad_norm": 1.1545541286468506, "learning_rate": 1.3150962445383492e-06, "loss": 0.5078, "step": 12167 }, { "epoch": 0.77, "grad_norm": 1.0350265502929688, "learning_rate": 1.314402838904446e-06, "loss": 0.4906, "step": 12168 }, { "epoch": 0.77, "grad_norm": 1.0755099058151245, "learning_rate": 1.3137095884582163e-06, "loss": 0.556, "step": 12169 }, { "epoch": 0.77, "grad_norm": 0.9735145568847656, "learning_rate": 1.3130164932288524e-06, "loss": 0.4431, "step": 12170 }, { "epoch": 0.77, "grad_norm": 1.0998507738113403, "learning_rate": 1.3123235532455376e-06, "loss": 0.5312, "step": 12171 }, { "epoch": 0.77, "grad_norm": 1.0288559198379517, "learning_rate": 1.3116307685374497e-06, "loss": 0.4932, "step": 12172 }, { "epoch": 0.77, "grad_norm": 1.0293195247650146, "learning_rate": 1.3109381391337605e-06, "loss": 0.4914, "step": 12173 }, { "epoch": 0.77, "grad_norm": 1.1143139600753784, "learning_rate": 1.3102456650636314e-06, "loss": 0.5403, "step": 12174 }, { "epoch": 0.77, "grad_norm": 1.0851128101348877, "learning_rate": 1.3095533463562204e-06, "loss": 0.5424, "step": 12175 }, { "epoch": 0.77, "grad_norm": 1.1166521310806274, "learning_rate": 1.3088611830406828e-06, "loss": 0.4971, "step": 12176 }, { "epoch": 0.77, "grad_norm": 1.0827765464782715, "learning_rate": 1.3081691751461588e-06, "loss": 0.4561, "step": 12177 }, { "epoch": 0.77, "grad_norm": 1.1333348751068115, "learning_rate": 1.3074773227017878e-06, "loss": 0.5245, "step": 12178 }, { "epoch": 0.77, "grad_norm": 1.097198247909546, "learning_rate": 1.3067856257367018e-06, "loss": 0.4648, "step": 12179 }, { "epoch": 0.77, "grad_norm": 1.0951207876205444, "learning_rate": 1.3060940842800247e-06, "loss": 0.5086, "step": 12180 }, { "epoch": 0.77, "grad_norm": 1.0213844776153564, "learning_rate": 1.3054026983608776e-06, "loss": 0.4805, "step": 12181 }, { "epoch": 0.77, "grad_norm": 1.0557488203048706, "learning_rate": 1.3047114680083683e-06, "loss": 0.4748, "step": 12182 }, { "epoch": 0.77, "grad_norm": 1.0473880767822266, "learning_rate": 1.3040203932516043e-06, "loss": 0.5136, "step": 12183 }, { "epoch": 0.77, "grad_norm": 1.0972284078598022, "learning_rate": 1.303329474119684e-06, "loss": 0.4723, "step": 12184 }, { "epoch": 0.77, "grad_norm": 1.1365244388580322, "learning_rate": 1.3026387106417e-06, "loss": 0.4823, "step": 12185 }, { "epoch": 0.77, "grad_norm": 1.034995436668396, "learning_rate": 1.301948102846738e-06, "loss": 0.4946, "step": 12186 }, { "epoch": 0.77, "grad_norm": 1.0066113471984863, "learning_rate": 1.301257650763878e-06, "loss": 0.5085, "step": 12187 }, { "epoch": 0.77, "grad_norm": 1.1857848167419434, "learning_rate": 1.3005673544221882e-06, "loss": 0.5569, "step": 12188 }, { "epoch": 0.77, "grad_norm": 1.049791693687439, "learning_rate": 1.299877213850741e-06, "loss": 0.4777, "step": 12189 }, { "epoch": 0.77, "grad_norm": 1.1054198741912842, "learning_rate": 1.2991872290785906e-06, "loss": 0.5321, "step": 12190 }, { "epoch": 0.77, "grad_norm": 1.070559024810791, "learning_rate": 1.2984974001347922e-06, "loss": 0.4905, "step": 12191 }, { "epoch": 0.77, "grad_norm": 1.0235702991485596, "learning_rate": 1.2978077270483913e-06, "loss": 0.4836, "step": 12192 }, { "epoch": 0.77, "grad_norm": 1.0622280836105347, "learning_rate": 1.2971182098484286e-06, "loss": 0.4874, "step": 12193 }, { "epoch": 0.77, "grad_norm": 1.0791078805923462, "learning_rate": 1.2964288485639366e-06, "loss": 0.5058, "step": 12194 }, { "epoch": 0.77, "grad_norm": 1.209069013595581, "learning_rate": 1.2957396432239427e-06, "loss": 0.5056, "step": 12195 }, { "epoch": 0.77, "grad_norm": 1.0712496042251587, "learning_rate": 1.2950505938574643e-06, "loss": 0.5171, "step": 12196 }, { "epoch": 0.77, "grad_norm": 1.025773286819458, "learning_rate": 1.2943617004935176e-06, "loss": 0.4676, "step": 12197 }, { "epoch": 0.77, "grad_norm": 1.0884387493133545, "learning_rate": 1.2936729631611106e-06, "loss": 0.5218, "step": 12198 }, { "epoch": 0.77, "grad_norm": 1.1169272661209106, "learning_rate": 1.2929843818892401e-06, "loss": 0.4675, "step": 12199 }, { "epoch": 0.77, "grad_norm": 1.0363742113113403, "learning_rate": 1.2922959567069016e-06, "loss": 0.4974, "step": 12200 }, { "epoch": 0.77, "grad_norm": 1.0577375888824463, "learning_rate": 1.2916076876430821e-06, "loss": 0.5397, "step": 12201 }, { "epoch": 0.77, "grad_norm": 1.0551846027374268, "learning_rate": 1.2909195747267622e-06, "loss": 0.5299, "step": 12202 }, { "epoch": 0.77, "grad_norm": 1.037851333618164, "learning_rate": 1.2902316179869179e-06, "loss": 0.5021, "step": 12203 }, { "epoch": 0.77, "grad_norm": 1.1661293506622314, "learning_rate": 1.2895438174525127e-06, "loss": 0.5248, "step": 12204 }, { "epoch": 0.77, "grad_norm": 1.0050801038742065, "learning_rate": 1.288856173152509e-06, "loss": 0.5077, "step": 12205 }, { "epoch": 0.77, "grad_norm": 1.065712332725525, "learning_rate": 1.2881686851158642e-06, "loss": 0.556, "step": 12206 }, { "epoch": 0.77, "grad_norm": 1.059660792350769, "learning_rate": 1.287481353371522e-06, "loss": 0.5284, "step": 12207 }, { "epoch": 0.77, "grad_norm": 1.0822430849075317, "learning_rate": 1.286794177948425e-06, "loss": 0.5514, "step": 12208 }, { "epoch": 0.77, "grad_norm": 1.1334432363510132, "learning_rate": 1.286107158875508e-06, "loss": 0.5393, "step": 12209 }, { "epoch": 0.77, "grad_norm": 0.9908674359321594, "learning_rate": 1.285420296181699e-06, "loss": 0.4934, "step": 12210 }, { "epoch": 0.77, "grad_norm": 1.1943225860595703, "learning_rate": 1.2847335898959207e-06, "loss": 0.4991, "step": 12211 }, { "epoch": 0.77, "grad_norm": 1.0061407089233398, "learning_rate": 1.284047040047085e-06, "loss": 0.5331, "step": 12212 }, { "epoch": 0.77, "grad_norm": 1.099575161933899, "learning_rate": 1.2833606466641001e-06, "loss": 0.5291, "step": 12213 }, { "epoch": 0.77, "grad_norm": 1.074028491973877, "learning_rate": 1.282674409775872e-06, "loss": 0.4862, "step": 12214 }, { "epoch": 0.77, "grad_norm": 0.9675447344779968, "learning_rate": 1.2819883294112918e-06, "loss": 0.4982, "step": 12215 }, { "epoch": 0.77, "grad_norm": 1.0054662227630615, "learning_rate": 1.2813024055992486e-06, "loss": 0.4806, "step": 12216 }, { "epoch": 0.77, "grad_norm": 1.0863933563232422, "learning_rate": 1.2806166383686258e-06, "loss": 0.5069, "step": 12217 }, { "epoch": 0.77, "grad_norm": 1.0222904682159424, "learning_rate": 1.2799310277482952e-06, "loss": 0.4901, "step": 12218 }, { "epoch": 0.77, "grad_norm": 1.1853152513504028, "learning_rate": 1.2792455737671306e-06, "loss": 0.5426, "step": 12219 }, { "epoch": 0.77, "grad_norm": 1.0142987966537476, "learning_rate": 1.27856027645399e-06, "loss": 0.4637, "step": 12220 }, { "epoch": 0.77, "grad_norm": 1.0342990159988403, "learning_rate": 1.27787513583773e-06, "loss": 0.447, "step": 12221 }, { "epoch": 0.77, "grad_norm": 1.0292028188705444, "learning_rate": 1.2771901519471997e-06, "loss": 0.5055, "step": 12222 }, { "epoch": 0.77, "grad_norm": 1.0016099214553833, "learning_rate": 1.2765053248112414e-06, "loss": 0.5186, "step": 12223 }, { "epoch": 0.77, "grad_norm": 1.1074825525283813, "learning_rate": 1.2758206544586909e-06, "loss": 0.5292, "step": 12224 }, { "epoch": 0.77, "grad_norm": 1.079474687576294, "learning_rate": 1.2751361409183788e-06, "loss": 0.5449, "step": 12225 }, { "epoch": 0.77, "grad_norm": 0.993061900138855, "learning_rate": 1.2744517842191228e-06, "loss": 0.5057, "step": 12226 }, { "epoch": 0.77, "grad_norm": 1.108346700668335, "learning_rate": 1.2737675843897452e-06, "loss": 0.5171, "step": 12227 }, { "epoch": 0.77, "grad_norm": 1.0924155712127686, "learning_rate": 1.2730835414590498e-06, "loss": 0.5515, "step": 12228 }, { "epoch": 0.77, "grad_norm": 0.986985445022583, "learning_rate": 1.272399655455842e-06, "loss": 0.4846, "step": 12229 }, { "epoch": 0.77, "grad_norm": 1.0078566074371338, "learning_rate": 1.2717159264089185e-06, "loss": 0.4736, "step": 12230 }, { "epoch": 0.77, "grad_norm": 1.1671011447906494, "learning_rate": 1.2710323543470648e-06, "loss": 0.4549, "step": 12231 }, { "epoch": 0.77, "grad_norm": 1.081705093383789, "learning_rate": 1.2703489392990682e-06, "loss": 0.5101, "step": 12232 }, { "epoch": 0.78, "grad_norm": 1.06071138381958, "learning_rate": 1.2696656812937047e-06, "loss": 0.5194, "step": 12233 }, { "epoch": 0.78, "grad_norm": 1.013026237487793, "learning_rate": 1.268982580359741e-06, "loss": 0.4562, "step": 12234 }, { "epoch": 0.78, "grad_norm": 1.0899467468261719, "learning_rate": 1.2682996365259415e-06, "loss": 0.4819, "step": 12235 }, { "epoch": 0.78, "grad_norm": 1.0048573017120361, "learning_rate": 1.2676168498210623e-06, "loss": 0.5089, "step": 12236 }, { "epoch": 0.78, "grad_norm": 1.022613286972046, "learning_rate": 1.2669342202738537e-06, "loss": 0.4961, "step": 12237 }, { "epoch": 0.78, "grad_norm": 1.0211726427078247, "learning_rate": 1.2662517479130605e-06, "loss": 0.4917, "step": 12238 }, { "epoch": 0.78, "grad_norm": 1.1397035121917725, "learning_rate": 1.2655694327674145e-06, "loss": 0.5077, "step": 12239 }, { "epoch": 0.78, "grad_norm": 1.0699193477630615, "learning_rate": 1.2648872748656498e-06, "loss": 0.4947, "step": 12240 }, { "epoch": 0.78, "grad_norm": 1.0356310606002808, "learning_rate": 1.2642052742364903e-06, "loss": 0.4952, "step": 12241 }, { "epoch": 0.78, "grad_norm": 1.0501240491867065, "learning_rate": 1.2635234309086486e-06, "loss": 0.5111, "step": 12242 }, { "epoch": 0.78, "grad_norm": 1.1141310930252075, "learning_rate": 1.2628417449108376e-06, "loss": 0.5439, "step": 12243 }, { "epoch": 0.78, "grad_norm": 1.1341676712036133, "learning_rate": 1.2621602162717594e-06, "loss": 0.4811, "step": 12244 }, { "epoch": 0.78, "grad_norm": 1.0090206861495972, "learning_rate": 1.261478845020112e-06, "loss": 0.4582, "step": 12245 }, { "epoch": 0.78, "grad_norm": 1.1057790517807007, "learning_rate": 1.2607976311845865e-06, "loss": 0.4488, "step": 12246 }, { "epoch": 0.78, "grad_norm": 1.0496375560760498, "learning_rate": 1.2601165747938638e-06, "loss": 0.4418, "step": 12247 }, { "epoch": 0.78, "grad_norm": 1.0294616222381592, "learning_rate": 1.2594356758766201e-06, "loss": 0.4762, "step": 12248 }, { "epoch": 0.78, "grad_norm": 1.0037492513656616, "learning_rate": 1.2587549344615308e-06, "loss": 0.4527, "step": 12249 }, { "epoch": 0.78, "grad_norm": 1.045936107635498, "learning_rate": 1.2580743505772553e-06, "loss": 0.5087, "step": 12250 }, { "epoch": 0.78, "grad_norm": 1.0254948139190674, "learning_rate": 1.2573939242524508e-06, "loss": 0.4914, "step": 12251 }, { "epoch": 0.78, "grad_norm": 1.0283900499343872, "learning_rate": 1.2567136555157694e-06, "loss": 0.5073, "step": 12252 }, { "epoch": 0.78, "grad_norm": 1.0395734310150146, "learning_rate": 1.2560335443958533e-06, "loss": 0.4758, "step": 12253 }, { "epoch": 0.78, "grad_norm": 1.0320192575454712, "learning_rate": 1.2553535909213422e-06, "loss": 0.5084, "step": 12254 }, { "epoch": 0.78, "grad_norm": 1.0554685592651367, "learning_rate": 1.254673795120863e-06, "loss": 0.5101, "step": 12255 }, { "epoch": 0.78, "grad_norm": 1.08400559425354, "learning_rate": 1.2539941570230402e-06, "loss": 0.477, "step": 12256 }, { "epoch": 0.78, "grad_norm": 0.9304841160774231, "learning_rate": 1.2533146766564946e-06, "loss": 0.4717, "step": 12257 }, { "epoch": 0.78, "grad_norm": 1.1072951555252075, "learning_rate": 1.252635354049833e-06, "loss": 0.5145, "step": 12258 }, { "epoch": 0.78, "grad_norm": 0.9975444674491882, "learning_rate": 1.2519561892316606e-06, "loss": 0.4958, "step": 12259 }, { "epoch": 0.78, "grad_norm": 1.1330645084381104, "learning_rate": 1.2512771822305742e-06, "loss": 0.504, "step": 12260 }, { "epoch": 0.78, "grad_norm": 1.1020914316177368, "learning_rate": 1.2505983330751654e-06, "loss": 0.4903, "step": 12261 }, { "epoch": 0.78, "grad_norm": 1.0297106504440308, "learning_rate": 1.2499196417940168e-06, "loss": 0.5439, "step": 12262 }, { "epoch": 0.78, "grad_norm": 1.07819402217865, "learning_rate": 1.2492411084157086e-06, "loss": 0.5008, "step": 12263 }, { "epoch": 0.78, "grad_norm": 1.0378316640853882, "learning_rate": 1.2485627329688076e-06, "loss": 0.5036, "step": 12264 }, { "epoch": 0.78, "grad_norm": 1.012473702430725, "learning_rate": 1.2478845154818798e-06, "loss": 0.492, "step": 12265 }, { "epoch": 0.78, "grad_norm": 1.0863486528396606, "learning_rate": 1.2472064559834818e-06, "loss": 0.5448, "step": 12266 }, { "epoch": 0.78, "grad_norm": 0.9467172026634216, "learning_rate": 1.2465285545021655e-06, "loss": 0.481, "step": 12267 }, { "epoch": 0.78, "grad_norm": 0.9862213134765625, "learning_rate": 1.2458508110664758e-06, "loss": 0.5023, "step": 12268 }, { "epoch": 0.78, "grad_norm": 1.0363272428512573, "learning_rate": 1.2451732257049458e-06, "loss": 0.5312, "step": 12269 }, { "epoch": 0.78, "grad_norm": 1.0616782903671265, "learning_rate": 1.2444957984461103e-06, "loss": 0.4936, "step": 12270 }, { "epoch": 0.78, "grad_norm": 1.164757490158081, "learning_rate": 1.243818529318494e-06, "loss": 0.5477, "step": 12271 }, { "epoch": 0.78, "grad_norm": 0.9252405166625977, "learning_rate": 1.2431414183506114e-06, "loss": 0.4355, "step": 12272 }, { "epoch": 0.78, "grad_norm": 1.0829187631607056, "learning_rate": 1.2424644655709744e-06, "loss": 0.5246, "step": 12273 }, { "epoch": 0.78, "grad_norm": 1.0668288469314575, "learning_rate": 1.2417876710080872e-06, "loss": 0.4941, "step": 12274 }, { "epoch": 0.78, "grad_norm": 1.0414551496505737, "learning_rate": 1.2411110346904471e-06, "loss": 0.4964, "step": 12275 }, { "epoch": 0.78, "grad_norm": 1.0581361055374146, "learning_rate": 1.2404345566465464e-06, "loss": 0.5053, "step": 12276 }, { "epoch": 0.78, "grad_norm": 1.0122004747390747, "learning_rate": 1.2397582369048672e-06, "loss": 0.4787, "step": 12277 }, { "epoch": 0.78, "grad_norm": 1.0736596584320068, "learning_rate": 1.2390820754938859e-06, "loss": 0.527, "step": 12278 }, { "epoch": 0.78, "grad_norm": 1.0998107194900513, "learning_rate": 1.2384060724420776e-06, "loss": 0.56, "step": 12279 }, { "epoch": 0.78, "grad_norm": 1.0592037439346313, "learning_rate": 1.2377302277779029e-06, "loss": 0.4759, "step": 12280 }, { "epoch": 0.78, "grad_norm": 1.0291491746902466, "learning_rate": 1.2370545415298207e-06, "loss": 0.4809, "step": 12281 }, { "epoch": 0.78, "grad_norm": 1.0694981813430786, "learning_rate": 1.236379013726281e-06, "loss": 0.4992, "step": 12282 }, { "epoch": 0.78, "grad_norm": 1.0465306043624878, "learning_rate": 1.2357036443957283e-06, "loss": 0.5236, "step": 12283 }, { "epoch": 0.78, "grad_norm": 1.0182136297225952, "learning_rate": 1.2350284335666019e-06, "loss": 0.5117, "step": 12284 }, { "epoch": 0.78, "grad_norm": 1.052598237991333, "learning_rate": 1.2343533812673286e-06, "loss": 0.485, "step": 12285 }, { "epoch": 0.78, "grad_norm": 1.1253355741500854, "learning_rate": 1.2336784875263341e-06, "loss": 0.4508, "step": 12286 }, { "epoch": 0.78, "grad_norm": 1.1140406131744385, "learning_rate": 1.233003752372039e-06, "loss": 0.52, "step": 12287 }, { "epoch": 0.78, "grad_norm": 0.9919499158859253, "learning_rate": 1.23232917583285e-06, "loss": 0.4785, "step": 12288 }, { "epoch": 0.78, "grad_norm": 1.030924916267395, "learning_rate": 1.2316547579371724e-06, "loss": 0.4931, "step": 12289 }, { "epoch": 0.78, "grad_norm": 1.014090657234192, "learning_rate": 1.230980498713404e-06, "loss": 0.4911, "step": 12290 }, { "epoch": 0.78, "grad_norm": 1.1351239681243896, "learning_rate": 1.2303063981899355e-06, "loss": 0.5271, "step": 12291 }, { "epoch": 0.78, "grad_norm": 1.0517010688781738, "learning_rate": 1.2296324563951517e-06, "loss": 0.5212, "step": 12292 }, { "epoch": 0.78, "grad_norm": 0.9599292874336243, "learning_rate": 1.2289586733574283e-06, "loss": 0.4797, "step": 12293 }, { "epoch": 0.78, "grad_norm": 1.0953619480133057, "learning_rate": 1.2282850491051363e-06, "loss": 0.4853, "step": 12294 }, { "epoch": 0.78, "grad_norm": 1.040137529373169, "learning_rate": 1.2276115836666396e-06, "loss": 0.4951, "step": 12295 }, { "epoch": 0.78, "grad_norm": 1.0743566751480103, "learning_rate": 1.2269382770702964e-06, "loss": 0.5165, "step": 12296 }, { "epoch": 0.78, "grad_norm": 0.9729884266853333, "learning_rate": 1.2262651293444572e-06, "loss": 0.4798, "step": 12297 }, { "epoch": 0.78, "grad_norm": 1.1260862350463867, "learning_rate": 1.2255921405174664e-06, "loss": 0.5019, "step": 12298 }, { "epoch": 0.78, "grad_norm": 0.9777114391326904, "learning_rate": 1.2249193106176578e-06, "loss": 0.4741, "step": 12299 }, { "epoch": 0.78, "grad_norm": 0.9457767009735107, "learning_rate": 1.224246639673367e-06, "loss": 0.4812, "step": 12300 }, { "epoch": 0.78, "grad_norm": 1.0699979066848755, "learning_rate": 1.2235741277129143e-06, "loss": 0.5072, "step": 12301 }, { "epoch": 0.78, "grad_norm": 1.0439226627349854, "learning_rate": 1.2229017747646178e-06, "loss": 0.4934, "step": 12302 }, { "epoch": 0.78, "grad_norm": 1.1002904176712036, "learning_rate": 1.2222295808567874e-06, "loss": 0.5215, "step": 12303 }, { "epoch": 0.78, "grad_norm": 1.052224040031433, "learning_rate": 1.2215575460177282e-06, "loss": 0.5105, "step": 12304 }, { "epoch": 0.78, "grad_norm": 1.048862338066101, "learning_rate": 1.220885670275736e-06, "loss": 0.519, "step": 12305 }, { "epoch": 0.78, "grad_norm": 1.0804308652877808, "learning_rate": 1.2202139536591035e-06, "loss": 0.5023, "step": 12306 }, { "epoch": 0.78, "grad_norm": 0.988083004951477, "learning_rate": 1.2195423961961089e-06, "loss": 0.511, "step": 12307 }, { "epoch": 0.78, "grad_norm": 1.086592435836792, "learning_rate": 1.2188709979150366e-06, "loss": 0.515, "step": 12308 }, { "epoch": 0.78, "grad_norm": 1.1539126634597778, "learning_rate": 1.2181997588441507e-06, "loss": 0.5196, "step": 12309 }, { "epoch": 0.78, "grad_norm": 1.000229001045227, "learning_rate": 1.2175286790117174e-06, "loss": 0.4879, "step": 12310 }, { "epoch": 0.78, "grad_norm": 1.0030760765075684, "learning_rate": 1.2168577584459944e-06, "loss": 0.5261, "step": 12311 }, { "epoch": 0.78, "grad_norm": 1.0990442037582397, "learning_rate": 1.2161869971752283e-06, "loss": 0.5291, "step": 12312 }, { "epoch": 0.78, "grad_norm": 1.148339867591858, "learning_rate": 1.2155163952276654e-06, "loss": 0.5428, "step": 12313 }, { "epoch": 0.78, "grad_norm": 1.043141484260559, "learning_rate": 1.2148459526315442e-06, "loss": 0.534, "step": 12314 }, { "epoch": 0.78, "grad_norm": 1.0665313005447388, "learning_rate": 1.2141756694150903e-06, "loss": 0.5132, "step": 12315 }, { "epoch": 0.78, "grad_norm": 1.044087290763855, "learning_rate": 1.2135055456065292e-06, "loss": 0.47, "step": 12316 }, { "epoch": 0.78, "grad_norm": 1.1001490354537964, "learning_rate": 1.2128355812340776e-06, "loss": 0.5133, "step": 12317 }, { "epoch": 0.78, "grad_norm": 1.056571125984192, "learning_rate": 1.2121657763259448e-06, "loss": 0.4569, "step": 12318 }, { "epoch": 0.78, "grad_norm": 1.0614756345748901, "learning_rate": 1.211496130910334e-06, "loss": 0.4624, "step": 12319 }, { "epoch": 0.78, "grad_norm": 1.0753448009490967, "learning_rate": 1.2108266450154422e-06, "loss": 0.5151, "step": 12320 }, { "epoch": 0.78, "grad_norm": 1.0368281602859497, "learning_rate": 1.2101573186694587e-06, "loss": 0.5118, "step": 12321 }, { "epoch": 0.78, "grad_norm": 1.035905361175537, "learning_rate": 1.209488151900568e-06, "loss": 0.4938, "step": 12322 }, { "epoch": 0.78, "grad_norm": 1.1701600551605225, "learning_rate": 1.2088191447369436e-06, "loss": 0.5398, "step": 12323 }, { "epoch": 0.78, "grad_norm": 1.1100108623504639, "learning_rate": 1.2081502972067567e-06, "loss": 0.46, "step": 12324 }, { "epoch": 0.78, "grad_norm": 1.0216463804244995, "learning_rate": 1.2074816093381696e-06, "loss": 0.4951, "step": 12325 }, { "epoch": 0.78, "grad_norm": 1.0515730381011963, "learning_rate": 1.2068130811593387e-06, "loss": 0.4909, "step": 12326 }, { "epoch": 0.78, "grad_norm": 1.0393810272216797, "learning_rate": 1.2061447126984138e-06, "loss": 0.4834, "step": 12327 }, { "epoch": 0.78, "grad_norm": 1.0972084999084473, "learning_rate": 1.2054765039835382e-06, "loss": 0.5074, "step": 12328 }, { "epoch": 0.78, "grad_norm": 0.992595374584198, "learning_rate": 1.2048084550428442e-06, "loss": 0.4412, "step": 12329 }, { "epoch": 0.78, "grad_norm": 1.1025792360305786, "learning_rate": 1.2041405659044664e-06, "loss": 0.5186, "step": 12330 }, { "epoch": 0.78, "grad_norm": 1.0410696268081665, "learning_rate": 1.203472836596523e-06, "loss": 0.4979, "step": 12331 }, { "epoch": 0.78, "grad_norm": 1.0511598587036133, "learning_rate": 1.2028052671471318e-06, "loss": 0.5426, "step": 12332 }, { "epoch": 0.78, "grad_norm": 1.1000758409500122, "learning_rate": 1.2021378575844005e-06, "loss": 0.5021, "step": 12333 }, { "epoch": 0.78, "grad_norm": 1.0427312850952148, "learning_rate": 1.201470607936433e-06, "loss": 0.4636, "step": 12334 }, { "epoch": 0.78, "grad_norm": 1.076611042022705, "learning_rate": 1.2008035182313237e-06, "loss": 0.51, "step": 12335 }, { "epoch": 0.78, "grad_norm": 0.98536616563797, "learning_rate": 1.2001365884971634e-06, "loss": 0.4575, "step": 12336 }, { "epoch": 0.78, "grad_norm": 1.1343871355056763, "learning_rate": 1.1994698187620297e-06, "loss": 0.5158, "step": 12337 }, { "epoch": 0.78, "grad_norm": 1.1061893701553345, "learning_rate": 1.1988032090540036e-06, "loss": 0.5396, "step": 12338 }, { "epoch": 0.78, "grad_norm": 1.0203694105148315, "learning_rate": 1.1981367594011496e-06, "loss": 0.5111, "step": 12339 }, { "epoch": 0.78, "grad_norm": 1.0427228212356567, "learning_rate": 1.1974704698315309e-06, "loss": 0.4431, "step": 12340 }, { "epoch": 0.78, "grad_norm": 1.022687554359436, "learning_rate": 1.1968043403732044e-06, "loss": 0.493, "step": 12341 }, { "epoch": 0.78, "grad_norm": 1.0888246297836304, "learning_rate": 1.1961383710542135e-06, "loss": 0.5025, "step": 12342 }, { "epoch": 0.78, "grad_norm": 0.9961660504341125, "learning_rate": 1.1954725619026048e-06, "loss": 0.4856, "step": 12343 }, { "epoch": 0.78, "grad_norm": 0.9393571615219116, "learning_rate": 1.1948069129464128e-06, "loss": 0.5036, "step": 12344 }, { "epoch": 0.78, "grad_norm": 1.0751937627792358, "learning_rate": 1.1941414242136635e-06, "loss": 0.4683, "step": 12345 }, { "epoch": 0.78, "grad_norm": 1.0670708417892456, "learning_rate": 1.1934760957323782e-06, "loss": 0.495, "step": 12346 }, { "epoch": 0.78, "grad_norm": 1.130021095275879, "learning_rate": 1.1928109275305734e-06, "loss": 0.5344, "step": 12347 }, { "epoch": 0.78, "grad_norm": 0.95157390832901, "learning_rate": 1.1921459196362562e-06, "loss": 0.4909, "step": 12348 }, { "epoch": 0.78, "grad_norm": 1.0562176704406738, "learning_rate": 1.1914810720774289e-06, "loss": 0.517, "step": 12349 }, { "epoch": 0.78, "grad_norm": 1.0687183141708374, "learning_rate": 1.190816384882082e-06, "loss": 0.5289, "step": 12350 }, { "epoch": 0.78, "grad_norm": 1.0604063272476196, "learning_rate": 1.1901518580782073e-06, "loss": 0.5122, "step": 12351 }, { "epoch": 0.78, "grad_norm": 1.074355125427246, "learning_rate": 1.1894874916937855e-06, "loss": 0.5271, "step": 12352 }, { "epoch": 0.78, "grad_norm": 0.9688604474067688, "learning_rate": 1.1888232857567888e-06, "loss": 0.4705, "step": 12353 }, { "epoch": 0.78, "grad_norm": 1.0829731225967407, "learning_rate": 1.1881592402951853e-06, "loss": 0.518, "step": 12354 }, { "epoch": 0.78, "grad_norm": 1.146059513092041, "learning_rate": 1.1874953553369351e-06, "loss": 0.4958, "step": 12355 }, { "epoch": 0.78, "grad_norm": 1.050464153289795, "learning_rate": 1.1868316309099937e-06, "loss": 0.4865, "step": 12356 }, { "epoch": 0.78, "grad_norm": 1.046895980834961, "learning_rate": 1.186168067042308e-06, "loss": 0.5281, "step": 12357 }, { "epoch": 0.78, "grad_norm": 1.1184728145599365, "learning_rate": 1.1855046637618168e-06, "loss": 0.5722, "step": 12358 }, { "epoch": 0.78, "grad_norm": 1.1339380741119385, "learning_rate": 1.1848414210964526e-06, "loss": 0.5566, "step": 12359 }, { "epoch": 0.78, "grad_norm": 1.0997258424758911, "learning_rate": 1.1841783390741473e-06, "loss": 0.4693, "step": 12360 }, { "epoch": 0.78, "grad_norm": 1.177841305732727, "learning_rate": 1.1835154177228165e-06, "loss": 0.4936, "step": 12361 }, { "epoch": 0.78, "grad_norm": 1.146742582321167, "learning_rate": 1.1828526570703747e-06, "loss": 0.562, "step": 12362 }, { "epoch": 0.78, "grad_norm": 1.091923713684082, "learning_rate": 1.1821900571447286e-06, "loss": 0.4978, "step": 12363 }, { "epoch": 0.78, "grad_norm": 1.0691144466400146, "learning_rate": 1.1815276179737778e-06, "loss": 0.4421, "step": 12364 }, { "epoch": 0.78, "grad_norm": 1.0965749025344849, "learning_rate": 1.1808653395854174e-06, "loss": 0.516, "step": 12365 }, { "epoch": 0.78, "grad_norm": 1.002099633216858, "learning_rate": 1.1802032220075299e-06, "loss": 0.4794, "step": 12366 }, { "epoch": 0.78, "grad_norm": 1.1064283847808838, "learning_rate": 1.1795412652679955e-06, "loss": 0.523, "step": 12367 }, { "epoch": 0.78, "grad_norm": 0.9925868511199951, "learning_rate": 1.178879469394691e-06, "loss": 0.5006, "step": 12368 }, { "epoch": 0.78, "grad_norm": 1.0517547130584717, "learning_rate": 1.1782178344154776e-06, "loss": 0.4492, "step": 12369 }, { "epoch": 0.78, "grad_norm": 1.0431710481643677, "learning_rate": 1.1775563603582162e-06, "loss": 0.5079, "step": 12370 }, { "epoch": 0.78, "grad_norm": 0.9839174151420593, "learning_rate": 1.1768950472507605e-06, "loss": 0.4542, "step": 12371 }, { "epoch": 0.78, "grad_norm": 1.1189147233963013, "learning_rate": 1.1762338951209524e-06, "loss": 0.5292, "step": 12372 }, { "epoch": 0.78, "grad_norm": 1.1259547472000122, "learning_rate": 1.1755729039966358e-06, "loss": 0.5137, "step": 12373 }, { "epoch": 0.78, "grad_norm": 1.0271570682525635, "learning_rate": 1.174912073905638e-06, "loss": 0.4744, "step": 12374 }, { "epoch": 0.78, "grad_norm": 1.0150748491287231, "learning_rate": 1.174251404875787e-06, "loss": 0.4976, "step": 12375 }, { "epoch": 0.78, "grad_norm": 0.9843624830245972, "learning_rate": 1.1735908969349002e-06, "loss": 0.4748, "step": 12376 }, { "epoch": 0.78, "grad_norm": 1.01045823097229, "learning_rate": 1.1729305501107897e-06, "loss": 0.4913, "step": 12377 }, { "epoch": 0.78, "grad_norm": 1.0301995277404785, "learning_rate": 1.1722703644312599e-06, "loss": 0.4744, "step": 12378 }, { "epoch": 0.78, "grad_norm": 1.0072860717773438, "learning_rate": 1.1716103399241113e-06, "loss": 0.479, "step": 12379 }, { "epoch": 0.78, "grad_norm": 1.1030186414718628, "learning_rate": 1.1709504766171298e-06, "loss": 0.4761, "step": 12380 }, { "epoch": 0.78, "grad_norm": 1.0800237655639648, "learning_rate": 1.170290774538107e-06, "loss": 0.5464, "step": 12381 }, { "epoch": 0.78, "grad_norm": 1.0646288394927979, "learning_rate": 1.1696312337148152e-06, "loss": 0.4738, "step": 12382 }, { "epoch": 0.78, "grad_norm": 1.122464656829834, "learning_rate": 1.1689718541750278e-06, "loss": 0.5275, "step": 12383 }, { "epoch": 0.78, "grad_norm": 1.1252233982086182, "learning_rate": 1.168312635946508e-06, "loss": 0.5514, "step": 12384 }, { "epoch": 0.78, "grad_norm": 1.0471551418304443, "learning_rate": 1.1676535790570137e-06, "loss": 0.4772, "step": 12385 }, { "epoch": 0.78, "grad_norm": 1.1040979623794556, "learning_rate": 1.1669946835342956e-06, "loss": 0.4702, "step": 12386 }, { "epoch": 0.78, "grad_norm": 1.230148434638977, "learning_rate": 1.1663359494060983e-06, "loss": 0.5516, "step": 12387 }, { "epoch": 0.78, "grad_norm": 1.084335446357727, "learning_rate": 1.1656773767001566e-06, "loss": 0.529, "step": 12388 }, { "epoch": 0.78, "grad_norm": 1.1386499404907227, "learning_rate": 1.1650189654442024e-06, "loss": 0.4936, "step": 12389 }, { "epoch": 0.78, "grad_norm": 1.0516639947891235, "learning_rate": 1.1643607156659582e-06, "loss": 0.51, "step": 12390 }, { "epoch": 0.79, "grad_norm": 1.1283698081970215, "learning_rate": 1.1637026273931413e-06, "loss": 0.4913, "step": 12391 }, { "epoch": 0.79, "grad_norm": 1.097561001777649, "learning_rate": 1.1630447006534606e-06, "loss": 0.5207, "step": 12392 }, { "epoch": 0.79, "grad_norm": 1.1390267610549927, "learning_rate": 1.1623869354746203e-06, "loss": 0.5143, "step": 12393 }, { "epoch": 0.79, "grad_norm": 1.1013120412826538, "learning_rate": 1.1617293318843164e-06, "loss": 0.5651, "step": 12394 }, { "epoch": 0.79, "grad_norm": 1.054701805114746, "learning_rate": 1.1610718899102392e-06, "loss": 0.487, "step": 12395 }, { "epoch": 0.79, "grad_norm": 1.1136512756347656, "learning_rate": 1.1604146095800684e-06, "loss": 0.5257, "step": 12396 }, { "epoch": 0.79, "grad_norm": 1.004216194152832, "learning_rate": 1.1597574909214808e-06, "loss": 0.5358, "step": 12397 }, { "epoch": 0.79, "grad_norm": 1.12004554271698, "learning_rate": 1.159100533962147e-06, "loss": 0.4837, "step": 12398 }, { "epoch": 0.79, "grad_norm": 1.1613850593566895, "learning_rate": 1.1584437387297283e-06, "loss": 0.581, "step": 12399 }, { "epoch": 0.79, "grad_norm": 1.0097615718841553, "learning_rate": 1.157787105251879e-06, "loss": 0.4788, "step": 12400 }, { "epoch": 0.79, "grad_norm": 1.0033386945724487, "learning_rate": 1.157130633556251e-06, "loss": 0.4392, "step": 12401 }, { "epoch": 0.79, "grad_norm": 1.0544471740722656, "learning_rate": 1.1564743236704801e-06, "loss": 0.4531, "step": 12402 }, { "epoch": 0.79, "grad_norm": 1.1057604551315308, "learning_rate": 1.1558181756222081e-06, "loss": 0.4915, "step": 12403 }, { "epoch": 0.79, "grad_norm": 1.0928549766540527, "learning_rate": 1.1551621894390586e-06, "loss": 0.475, "step": 12404 }, { "epoch": 0.79, "grad_norm": 1.1400675773620605, "learning_rate": 1.1545063651486533e-06, "loss": 0.5263, "step": 12405 }, { "epoch": 0.79, "grad_norm": 1.1011698246002197, "learning_rate": 1.1538507027786077e-06, "loss": 0.5582, "step": 12406 }, { "epoch": 0.79, "grad_norm": 1.0064760446548462, "learning_rate": 1.1531952023565295e-06, "loss": 0.4861, "step": 12407 }, { "epoch": 0.79, "grad_norm": 1.0369073152542114, "learning_rate": 1.1525398639100194e-06, "loss": 0.5001, "step": 12408 }, { "epoch": 0.79, "grad_norm": 1.210459589958191, "learning_rate": 1.1518846874666723e-06, "loss": 0.5119, "step": 12409 }, { "epoch": 0.79, "grad_norm": 1.2517964839935303, "learning_rate": 1.1512296730540717e-06, "loss": 0.51, "step": 12410 }, { "epoch": 0.79, "grad_norm": 1.0143394470214844, "learning_rate": 1.1505748206998036e-06, "loss": 0.4776, "step": 12411 }, { "epoch": 0.79, "grad_norm": 0.9268616437911987, "learning_rate": 1.1499201304314372e-06, "loss": 0.5016, "step": 12412 }, { "epoch": 0.79, "grad_norm": 1.0749627351760864, "learning_rate": 1.149265602276541e-06, "loss": 0.5425, "step": 12413 }, { "epoch": 0.79, "grad_norm": 1.0563760995864868, "learning_rate": 1.1486112362626738e-06, "loss": 0.5445, "step": 12414 }, { "epoch": 0.79, "grad_norm": 1.0509696006774902, "learning_rate": 1.14795703241739e-06, "loss": 0.509, "step": 12415 }, { "epoch": 0.79, "grad_norm": 0.9929944276809692, "learning_rate": 1.1473029907682348e-06, "loss": 0.427, "step": 12416 }, { "epoch": 0.79, "grad_norm": 1.095852255821228, "learning_rate": 1.1466491113427503e-06, "loss": 0.4672, "step": 12417 }, { "epoch": 0.79, "grad_norm": 1.0767621994018555, "learning_rate": 1.1459953941684648e-06, "loss": 0.5021, "step": 12418 }, { "epoch": 0.79, "grad_norm": 1.0170347690582275, "learning_rate": 1.1453418392729065e-06, "loss": 0.4952, "step": 12419 }, { "epoch": 0.79, "grad_norm": 1.0994138717651367, "learning_rate": 1.1446884466835933e-06, "loss": 0.5337, "step": 12420 }, { "epoch": 0.79, "grad_norm": 1.1458686590194702, "learning_rate": 1.1440352164280388e-06, "loss": 0.4907, "step": 12421 }, { "epoch": 0.79, "grad_norm": 1.0651253461837769, "learning_rate": 1.1433821485337487e-06, "loss": 0.4825, "step": 12422 }, { "epoch": 0.79, "grad_norm": 1.042712688446045, "learning_rate": 1.1427292430282165e-06, "loss": 0.4977, "step": 12423 }, { "epoch": 0.79, "grad_norm": 1.02010178565979, "learning_rate": 1.14207649993894e-06, "loss": 0.5145, "step": 12424 }, { "epoch": 0.79, "grad_norm": 1.131560206413269, "learning_rate": 1.1414239192934019e-06, "loss": 0.4912, "step": 12425 }, { "epoch": 0.79, "grad_norm": 1.0900567770004272, "learning_rate": 1.1407715011190784e-06, "loss": 0.4849, "step": 12426 }, { "epoch": 0.79, "grad_norm": 1.135135531425476, "learning_rate": 1.1401192454434418e-06, "loss": 0.5114, "step": 12427 }, { "epoch": 0.79, "grad_norm": 1.0785999298095703, "learning_rate": 1.139467152293956e-06, "loss": 0.5069, "step": 12428 }, { "epoch": 0.79, "grad_norm": 1.1728732585906982, "learning_rate": 1.138815221698079e-06, "loss": 0.5358, "step": 12429 }, { "epoch": 0.79, "grad_norm": 1.074499249458313, "learning_rate": 1.138163453683262e-06, "loss": 0.475, "step": 12430 }, { "epoch": 0.79, "grad_norm": 0.9937154650688171, "learning_rate": 1.1375118482769447e-06, "loss": 0.4616, "step": 12431 }, { "epoch": 0.79, "grad_norm": 0.9972637295722961, "learning_rate": 1.136860405506569e-06, "loss": 0.4687, "step": 12432 }, { "epoch": 0.79, "grad_norm": 0.9380080699920654, "learning_rate": 1.1362091253995632e-06, "loss": 0.4475, "step": 12433 }, { "epoch": 0.79, "grad_norm": 1.0433030128479004, "learning_rate": 1.1355580079833496e-06, "loss": 0.5259, "step": 12434 }, { "epoch": 0.79, "grad_norm": 1.0440255403518677, "learning_rate": 1.134907053285344e-06, "loss": 0.4836, "step": 12435 }, { "epoch": 0.79, "grad_norm": 1.0331069231033325, "learning_rate": 1.1342562613329571e-06, "loss": 0.5182, "step": 12436 }, { "epoch": 0.79, "grad_norm": 1.0718039274215698, "learning_rate": 1.133605632153591e-06, "loss": 0.5, "step": 12437 }, { "epoch": 0.79, "grad_norm": 1.1010985374450684, "learning_rate": 1.1329551657746429e-06, "loss": 0.5704, "step": 12438 }, { "epoch": 0.79, "grad_norm": 1.12299382686615, "learning_rate": 1.132304862223499e-06, "loss": 0.5177, "step": 12439 }, { "epoch": 0.79, "grad_norm": 0.9886751174926758, "learning_rate": 1.1316547215275409e-06, "loss": 0.5398, "step": 12440 }, { "epoch": 0.79, "grad_norm": 1.0287259817123413, "learning_rate": 1.1310047437141485e-06, "loss": 0.5166, "step": 12441 }, { "epoch": 0.79, "grad_norm": 1.0754035711288452, "learning_rate": 1.1303549288106857e-06, "loss": 0.4811, "step": 12442 }, { "epoch": 0.79, "grad_norm": 1.0982261896133423, "learning_rate": 1.1297052768445154e-06, "loss": 0.5062, "step": 12443 }, { "epoch": 0.79, "grad_norm": 1.1342655420303345, "learning_rate": 1.129055787842992e-06, "loss": 0.4888, "step": 12444 }, { "epoch": 0.79, "grad_norm": 1.09312105178833, "learning_rate": 1.1284064618334634e-06, "loss": 0.4614, "step": 12445 }, { "epoch": 0.79, "grad_norm": 1.1874736547470093, "learning_rate": 1.1277572988432716e-06, "loss": 0.5141, "step": 12446 }, { "epoch": 0.79, "grad_norm": 0.9963446855545044, "learning_rate": 1.1271082988997485e-06, "loss": 0.5013, "step": 12447 }, { "epoch": 0.79, "grad_norm": 1.1956878900527954, "learning_rate": 1.1264594620302216e-06, "loss": 0.5259, "step": 12448 }, { "epoch": 0.79, "grad_norm": 0.9786106944084167, "learning_rate": 1.1258107882620117e-06, "loss": 0.4864, "step": 12449 }, { "epoch": 0.79, "grad_norm": 1.1049765348434448, "learning_rate": 1.1251622776224325e-06, "loss": 0.5224, "step": 12450 }, { "epoch": 0.79, "grad_norm": 1.047175407409668, "learning_rate": 1.1245139301387903e-06, "loss": 0.4617, "step": 12451 }, { "epoch": 0.79, "grad_norm": 1.107480764389038, "learning_rate": 1.1238657458383857e-06, "loss": 0.5228, "step": 12452 }, { "epoch": 0.79, "grad_norm": 1.088758111000061, "learning_rate": 1.1232177247485076e-06, "loss": 0.5175, "step": 12453 }, { "epoch": 0.79, "grad_norm": 1.0597591400146484, "learning_rate": 1.122569866896448e-06, "loss": 0.4779, "step": 12454 }, { "epoch": 0.79, "grad_norm": 1.032039999961853, "learning_rate": 1.1219221723094815e-06, "loss": 0.4954, "step": 12455 }, { "epoch": 0.79, "grad_norm": 1.1140142679214478, "learning_rate": 1.1212746410148807e-06, "loss": 0.5409, "step": 12456 }, { "epoch": 0.79, "grad_norm": 1.0760780572891235, "learning_rate": 1.120627273039912e-06, "loss": 0.5255, "step": 12457 }, { "epoch": 0.79, "grad_norm": 1.0337908267974854, "learning_rate": 1.119980068411834e-06, "loss": 0.5136, "step": 12458 }, { "epoch": 0.79, "grad_norm": 1.0888824462890625, "learning_rate": 1.1193330271578968e-06, "loss": 0.5351, "step": 12459 }, { "epoch": 0.79, "grad_norm": 1.0786190032958984, "learning_rate": 1.118686149305348e-06, "loss": 0.4628, "step": 12460 }, { "epoch": 0.79, "grad_norm": 1.0877033472061157, "learning_rate": 1.1180394348814206e-06, "loss": 0.4994, "step": 12461 }, { "epoch": 0.79, "grad_norm": 1.0420441627502441, "learning_rate": 1.117392883913349e-06, "loss": 0.4942, "step": 12462 }, { "epoch": 0.79, "grad_norm": 1.0242724418640137, "learning_rate": 1.1167464964283587e-06, "loss": 0.4835, "step": 12463 }, { "epoch": 0.79, "grad_norm": 0.9864571690559387, "learning_rate": 1.1161002724536623e-06, "loss": 0.4727, "step": 12464 }, { "epoch": 0.79, "grad_norm": 1.0759340524673462, "learning_rate": 1.115454212016473e-06, "loss": 0.4859, "step": 12465 }, { "epoch": 0.79, "grad_norm": 1.0306402444839478, "learning_rate": 1.1148083151439932e-06, "loss": 0.488, "step": 12466 }, { "epoch": 0.79, "grad_norm": 1.0664716958999634, "learning_rate": 1.1141625818634194e-06, "loss": 0.511, "step": 12467 }, { "epoch": 0.79, "grad_norm": 1.2387864589691162, "learning_rate": 1.1135170122019433e-06, "loss": 0.5537, "step": 12468 }, { "epoch": 0.79, "grad_norm": 1.0770725011825562, "learning_rate": 1.112871606186744e-06, "loss": 0.5181, "step": 12469 }, { "epoch": 0.79, "grad_norm": 1.0454281568527222, "learning_rate": 1.112226363844998e-06, "loss": 0.5427, "step": 12470 }, { "epoch": 0.79, "grad_norm": 0.9640543460845947, "learning_rate": 1.1115812852038777e-06, "loss": 0.4233, "step": 12471 }, { "epoch": 0.79, "grad_norm": 1.0597995519638062, "learning_rate": 1.1109363702905419e-06, "loss": 0.5358, "step": 12472 }, { "epoch": 0.79, "grad_norm": 1.1093391180038452, "learning_rate": 1.1102916191321456e-06, "loss": 0.5271, "step": 12473 }, { "epoch": 0.79, "grad_norm": 1.0604957342147827, "learning_rate": 1.1096470317558384e-06, "loss": 0.4954, "step": 12474 }, { "epoch": 0.79, "grad_norm": 1.0820348262786865, "learning_rate": 1.1090026081887611e-06, "loss": 0.504, "step": 12475 }, { "epoch": 0.79, "grad_norm": 1.0919123888015747, "learning_rate": 1.1083583484580495e-06, "loss": 0.4784, "step": 12476 }, { "epoch": 0.79, "grad_norm": 0.9372173547744751, "learning_rate": 1.107714252590828e-06, "loss": 0.4721, "step": 12477 }, { "epoch": 0.79, "grad_norm": 0.9823314547538757, "learning_rate": 1.1070703206142186e-06, "loss": 0.5098, "step": 12478 }, { "epoch": 0.79, "grad_norm": 1.1057475805282593, "learning_rate": 1.1064265525553375e-06, "loss": 0.5691, "step": 12479 }, { "epoch": 0.79, "grad_norm": 1.030718207359314, "learning_rate": 1.1057829484412885e-06, "loss": 0.4894, "step": 12480 }, { "epoch": 0.79, "grad_norm": 1.0543081760406494, "learning_rate": 1.1051395082991722e-06, "loss": 0.5059, "step": 12481 }, { "epoch": 0.79, "grad_norm": 1.0306936502456665, "learning_rate": 1.1044962321560837e-06, "loss": 0.5019, "step": 12482 }, { "epoch": 0.79, "grad_norm": 0.9948222637176514, "learning_rate": 1.1038531200391045e-06, "loss": 0.5566, "step": 12483 }, { "epoch": 0.79, "grad_norm": 1.0176924467086792, "learning_rate": 1.1032101719753197e-06, "loss": 0.4679, "step": 12484 }, { "epoch": 0.79, "grad_norm": 0.9939400553703308, "learning_rate": 1.102567387991797e-06, "loss": 0.4832, "step": 12485 }, { "epoch": 0.79, "grad_norm": 1.0428471565246582, "learning_rate": 1.101924768115603e-06, "loss": 0.486, "step": 12486 }, { "epoch": 0.79, "grad_norm": 1.0242252349853516, "learning_rate": 1.101282312373797e-06, "loss": 0.5002, "step": 12487 }, { "epoch": 0.79, "grad_norm": 1.1221117973327637, "learning_rate": 1.1006400207934304e-06, "loss": 0.5324, "step": 12488 }, { "epoch": 0.79, "grad_norm": 1.0698509216308594, "learning_rate": 1.0999978934015475e-06, "loss": 0.5184, "step": 12489 }, { "epoch": 0.79, "grad_norm": 1.080450177192688, "learning_rate": 1.0993559302251878e-06, "loss": 0.5573, "step": 12490 }, { "epoch": 0.79, "grad_norm": 1.034711480140686, "learning_rate": 1.0987141312913773e-06, "loss": 0.5067, "step": 12491 }, { "epoch": 0.79, "grad_norm": 1.1353464126586914, "learning_rate": 1.098072496627146e-06, "loss": 0.5728, "step": 12492 }, { "epoch": 0.79, "grad_norm": 1.089824914932251, "learning_rate": 1.0974310262595067e-06, "loss": 0.5395, "step": 12493 }, { "epoch": 0.79, "grad_norm": 1.1430447101593018, "learning_rate": 1.096789720215471e-06, "loss": 0.6045, "step": 12494 }, { "epoch": 0.79, "grad_norm": 1.062886357307434, "learning_rate": 1.0961485785220434e-06, "loss": 0.5036, "step": 12495 }, { "epoch": 0.79, "grad_norm": 0.996558666229248, "learning_rate": 1.0955076012062155e-06, "loss": 0.5283, "step": 12496 }, { "epoch": 0.79, "grad_norm": 1.056496024131775, "learning_rate": 1.094866788294981e-06, "loss": 0.4628, "step": 12497 }, { "epoch": 0.79, "grad_norm": 1.0023682117462158, "learning_rate": 1.094226139815323e-06, "loss": 0.473, "step": 12498 }, { "epoch": 0.79, "grad_norm": 1.1194649934768677, "learning_rate": 1.0935856557942132e-06, "loss": 0.4728, "step": 12499 }, { "epoch": 0.79, "grad_norm": 1.0746665000915527, "learning_rate": 1.0929453362586223e-06, "loss": 0.4718, "step": 12500 }, { "epoch": 0.79, "grad_norm": 1.132388949394226, "learning_rate": 1.0923051812355117e-06, "loss": 0.5671, "step": 12501 }, { "epoch": 0.79, "grad_norm": 1.1181532144546509, "learning_rate": 1.091665190751836e-06, "loss": 0.5218, "step": 12502 }, { "epoch": 0.79, "grad_norm": 1.140037178993225, "learning_rate": 1.0910253648345442e-06, "loss": 0.5484, "step": 12503 }, { "epoch": 0.79, "grad_norm": 1.057293176651001, "learning_rate": 1.0903857035105736e-06, "loss": 0.5199, "step": 12504 }, { "epoch": 0.79, "grad_norm": 0.9680564403533936, "learning_rate": 1.0897462068068616e-06, "loss": 0.4586, "step": 12505 }, { "epoch": 0.79, "grad_norm": 1.089263916015625, "learning_rate": 1.0891068747503353e-06, "loss": 0.5194, "step": 12506 }, { "epoch": 0.79, "grad_norm": 1.0306979417800903, "learning_rate": 1.0884677073679123e-06, "loss": 0.4676, "step": 12507 }, { "epoch": 0.79, "grad_norm": 0.9302442669868469, "learning_rate": 1.0878287046865072e-06, "loss": 0.4554, "step": 12508 }, { "epoch": 0.79, "grad_norm": 1.081356167793274, "learning_rate": 1.0871898667330249e-06, "loss": 0.4919, "step": 12509 }, { "epoch": 0.79, "grad_norm": 1.027951717376709, "learning_rate": 1.0865511935343664e-06, "loss": 0.4478, "step": 12510 }, { "epoch": 0.79, "grad_norm": 1.006633996963501, "learning_rate": 1.0859126851174246e-06, "loss": 0.4938, "step": 12511 }, { "epoch": 0.79, "grad_norm": 1.0576258897781372, "learning_rate": 1.0852743415090823e-06, "loss": 0.5065, "step": 12512 }, { "epoch": 0.79, "grad_norm": 1.1270864009857178, "learning_rate": 1.0846361627362174e-06, "loss": 0.473, "step": 12513 }, { "epoch": 0.79, "grad_norm": 0.988420844078064, "learning_rate": 1.0839981488257061e-06, "loss": 0.477, "step": 12514 }, { "epoch": 0.79, "grad_norm": 1.015462875366211, "learning_rate": 1.0833602998044085e-06, "loss": 0.47, "step": 12515 }, { "epoch": 0.79, "grad_norm": 1.111742615699768, "learning_rate": 1.0827226156991838e-06, "loss": 0.524, "step": 12516 }, { "epoch": 0.79, "grad_norm": 1.0591188669204712, "learning_rate": 1.0820850965368822e-06, "loss": 0.4825, "step": 12517 }, { "epoch": 0.79, "grad_norm": 1.140297293663025, "learning_rate": 1.0814477423443482e-06, "loss": 0.5592, "step": 12518 }, { "epoch": 0.79, "grad_norm": 1.121349573135376, "learning_rate": 1.0808105531484192e-06, "loss": 0.5092, "step": 12519 }, { "epoch": 0.79, "grad_norm": 1.060700535774231, "learning_rate": 1.0801735289759225e-06, "loss": 0.4999, "step": 12520 }, { "epoch": 0.79, "grad_norm": 1.1008962392807007, "learning_rate": 1.0795366698536812e-06, "loss": 0.5153, "step": 12521 }, { "epoch": 0.79, "grad_norm": 1.0303106307983398, "learning_rate": 1.078899975808515e-06, "loss": 0.5057, "step": 12522 }, { "epoch": 0.79, "grad_norm": 1.054789662361145, "learning_rate": 1.0782634468672293e-06, "loss": 0.4603, "step": 12523 }, { "epoch": 0.79, "grad_norm": 0.9646784067153931, "learning_rate": 1.0776270830566266e-06, "loss": 0.485, "step": 12524 }, { "epoch": 0.79, "grad_norm": 1.0548442602157593, "learning_rate": 1.0769908844035032e-06, "loss": 0.4672, "step": 12525 }, { "epoch": 0.79, "grad_norm": 1.0208356380462646, "learning_rate": 1.0763548509346461e-06, "loss": 0.4431, "step": 12526 }, { "epoch": 0.79, "grad_norm": 1.0277953147888184, "learning_rate": 1.0757189826768367e-06, "loss": 0.4811, "step": 12527 }, { "epoch": 0.79, "grad_norm": 1.0759657621383667, "learning_rate": 1.075083279656851e-06, "loss": 0.5032, "step": 12528 }, { "epoch": 0.79, "grad_norm": 1.0658326148986816, "learning_rate": 1.0744477419014532e-06, "loss": 0.5109, "step": 12529 }, { "epoch": 0.79, "grad_norm": 1.0466490983963013, "learning_rate": 1.0738123694374047e-06, "loss": 0.4807, "step": 12530 }, { "epoch": 0.79, "grad_norm": 0.9918878078460693, "learning_rate": 1.0731771622914595e-06, "loss": 0.4838, "step": 12531 }, { "epoch": 0.79, "grad_norm": 1.0270483493804932, "learning_rate": 1.072542120490363e-06, "loss": 0.5023, "step": 12532 }, { "epoch": 0.79, "grad_norm": 1.0566339492797852, "learning_rate": 1.0719072440608575e-06, "loss": 0.5024, "step": 12533 }, { "epoch": 0.79, "grad_norm": 0.9983569979667664, "learning_rate": 1.0712725330296697e-06, "loss": 0.4908, "step": 12534 }, { "epoch": 0.79, "grad_norm": 1.0100479125976562, "learning_rate": 1.07063798742353e-06, "loss": 0.5212, "step": 12535 }, { "epoch": 0.79, "grad_norm": 1.1355971097946167, "learning_rate": 1.0700036072691566e-06, "loss": 0.5449, "step": 12536 }, { "epoch": 0.79, "grad_norm": 1.1706911325454712, "learning_rate": 1.0693693925932585e-06, "loss": 0.5524, "step": 12537 }, { "epoch": 0.79, "grad_norm": 1.0609681606292725, "learning_rate": 1.0687353434225418e-06, "loss": 0.5237, "step": 12538 }, { "epoch": 0.79, "grad_norm": 1.0677683353424072, "learning_rate": 1.0681014597837042e-06, "loss": 0.5273, "step": 12539 }, { "epoch": 0.79, "grad_norm": 1.0673065185546875, "learning_rate": 1.0674677417034358e-06, "loss": 0.4832, "step": 12540 }, { "epoch": 0.79, "grad_norm": 0.9804611802101135, "learning_rate": 1.0668341892084217e-06, "loss": 0.4591, "step": 12541 }, { "epoch": 0.79, "grad_norm": 1.1132515668869019, "learning_rate": 1.0662008023253356e-06, "loss": 0.4985, "step": 12542 }, { "epoch": 0.79, "grad_norm": 1.080709457397461, "learning_rate": 1.0655675810808485e-06, "loss": 0.5226, "step": 12543 }, { "epoch": 0.79, "grad_norm": 1.0782188177108765, "learning_rate": 1.0649345255016258e-06, "loss": 0.5295, "step": 12544 }, { "epoch": 0.79, "grad_norm": 1.0711718797683716, "learning_rate": 1.0643016356143204e-06, "loss": 0.5439, "step": 12545 }, { "epoch": 0.79, "grad_norm": 1.0208290815353394, "learning_rate": 1.0636689114455811e-06, "loss": 0.5282, "step": 12546 }, { "epoch": 0.79, "grad_norm": 1.088685154914856, "learning_rate": 1.063036353022051e-06, "loss": 0.5668, "step": 12547 }, { "epoch": 0.79, "grad_norm": 1.036378264427185, "learning_rate": 1.0624039603703645e-06, "loss": 0.4408, "step": 12548 }, { "epoch": 0.8, "grad_norm": 1.0257773399353027, "learning_rate": 1.06177173351715e-06, "loss": 0.5045, "step": 12549 }, { "epoch": 0.8, "grad_norm": 1.0909396409988403, "learning_rate": 1.061139672489027e-06, "loss": 0.5179, "step": 12550 }, { "epoch": 0.8, "grad_norm": 1.0610034465789795, "learning_rate": 1.0605077773126083e-06, "loss": 0.5166, "step": 12551 }, { "epoch": 0.8, "grad_norm": 1.014702558517456, "learning_rate": 1.059876048014506e-06, "loss": 0.5213, "step": 12552 }, { "epoch": 0.8, "grad_norm": 1.1166365146636963, "learning_rate": 1.0592444846213145e-06, "loss": 0.5204, "step": 12553 }, { "epoch": 0.8, "grad_norm": 0.9917412996292114, "learning_rate": 1.058613087159629e-06, "loss": 0.4676, "step": 12554 }, { "epoch": 0.8, "grad_norm": 1.0502556562423706, "learning_rate": 1.0579818556560357e-06, "loss": 0.5403, "step": 12555 }, { "epoch": 0.8, "grad_norm": 1.1649173498153687, "learning_rate": 1.0573507901371126e-06, "loss": 0.5067, "step": 12556 }, { "epoch": 0.8, "grad_norm": 1.035313606262207, "learning_rate": 1.0567198906294341e-06, "loss": 0.4929, "step": 12557 }, { "epoch": 0.8, "grad_norm": 1.0535595417022705, "learning_rate": 1.0560891571595616e-06, "loss": 0.5031, "step": 12558 }, { "epoch": 0.8, "grad_norm": 1.067043662071228, "learning_rate": 1.0554585897540553e-06, "loss": 0.4907, "step": 12559 }, { "epoch": 0.8, "grad_norm": 1.138853907585144, "learning_rate": 1.0548281884394657e-06, "loss": 0.4944, "step": 12560 }, { "epoch": 0.8, "grad_norm": 1.0570776462554932, "learning_rate": 1.0541979532423362e-06, "loss": 0.4762, "step": 12561 }, { "epoch": 0.8, "grad_norm": 0.9952700138092041, "learning_rate": 1.053567884189205e-06, "loss": 0.4968, "step": 12562 }, { "epoch": 0.8, "grad_norm": 1.04869544506073, "learning_rate": 1.0529379813066026e-06, "loss": 0.4726, "step": 12563 }, { "epoch": 0.8, "grad_norm": 1.0134539604187012, "learning_rate": 1.0523082446210487e-06, "loss": 0.4516, "step": 12564 }, { "epoch": 0.8, "grad_norm": 1.0992261171340942, "learning_rate": 1.051678674159064e-06, "loss": 0.5417, "step": 12565 }, { "epoch": 0.8, "grad_norm": 1.0802074670791626, "learning_rate": 1.0510492699471536e-06, "loss": 0.5175, "step": 12566 }, { "epoch": 0.8, "grad_norm": 1.125693917274475, "learning_rate": 1.0504200320118214e-06, "loss": 0.5146, "step": 12567 }, { "epoch": 0.8, "grad_norm": 1.204516887664795, "learning_rate": 1.049790960379562e-06, "loss": 0.5596, "step": 12568 }, { "epoch": 0.8, "grad_norm": 1.0837621688842773, "learning_rate": 1.0491620550768633e-06, "loss": 0.5103, "step": 12569 }, { "epoch": 0.8, "grad_norm": 1.0523866415023804, "learning_rate": 1.048533316130207e-06, "loss": 0.4998, "step": 12570 }, { "epoch": 0.8, "grad_norm": 1.1099904775619507, "learning_rate": 1.0479047435660671e-06, "loss": 0.4525, "step": 12571 }, { "epoch": 0.8, "grad_norm": 1.0851279497146606, "learning_rate": 1.047276337410908e-06, "loss": 0.5323, "step": 12572 }, { "epoch": 0.8, "grad_norm": 1.0258618593215942, "learning_rate": 1.0466480976911947e-06, "loss": 0.5285, "step": 12573 }, { "epoch": 0.8, "grad_norm": 0.9892868995666504, "learning_rate": 1.0460200244333758e-06, "loss": 0.4928, "step": 12574 }, { "epoch": 0.8, "grad_norm": 0.9799687266349792, "learning_rate": 1.0453921176638981e-06, "loss": 0.4839, "step": 12575 }, { "epoch": 0.8, "grad_norm": 0.9586300849914551, "learning_rate": 1.044764377409203e-06, "loss": 0.4929, "step": 12576 }, { "epoch": 0.8, "grad_norm": 1.1096810102462769, "learning_rate": 1.0441368036957184e-06, "loss": 0.5154, "step": 12577 }, { "epoch": 0.8, "grad_norm": 1.0250980854034424, "learning_rate": 1.0435093965498727e-06, "loss": 0.4814, "step": 12578 }, { "epoch": 0.8, "grad_norm": 1.0203561782836914, "learning_rate": 1.0428821559980839e-06, "loss": 0.4966, "step": 12579 }, { "epoch": 0.8, "grad_norm": 0.9630902409553528, "learning_rate": 1.0422550820667605e-06, "loss": 0.4975, "step": 12580 }, { "epoch": 0.8, "grad_norm": 1.0307133197784424, "learning_rate": 1.0416281747823076e-06, "loss": 0.4603, "step": 12581 }, { "epoch": 0.8, "grad_norm": 1.1498281955718994, "learning_rate": 1.0410014341711216e-06, "loss": 0.5362, "step": 12582 }, { "epoch": 0.8, "grad_norm": 1.0730465650558472, "learning_rate": 1.0403748602595937e-06, "loss": 0.5233, "step": 12583 }, { "epoch": 0.8, "grad_norm": 1.0637001991271973, "learning_rate": 1.0397484530741053e-06, "loss": 0.5503, "step": 12584 }, { "epoch": 0.8, "grad_norm": 1.1022603511810303, "learning_rate": 1.0391222126410327e-06, "loss": 0.5288, "step": 12585 }, { "epoch": 0.8, "grad_norm": 1.012472152709961, "learning_rate": 1.0384961389867454e-06, "loss": 0.4724, "step": 12586 }, { "epoch": 0.8, "grad_norm": 1.0934635400772095, "learning_rate": 1.0378702321376054e-06, "loss": 0.5138, "step": 12587 }, { "epoch": 0.8, "grad_norm": 0.9922604560852051, "learning_rate": 1.037244492119966e-06, "loss": 0.5094, "step": 12588 }, { "epoch": 0.8, "grad_norm": 0.9959739446640015, "learning_rate": 1.036618918960175e-06, "loss": 0.5135, "step": 12589 }, { "epoch": 0.8, "grad_norm": 1.0212490558624268, "learning_rate": 1.0359935126845738e-06, "loss": 0.4835, "step": 12590 }, { "epoch": 0.8, "grad_norm": 0.9797313809394836, "learning_rate": 1.0353682733194965e-06, "loss": 0.4893, "step": 12591 }, { "epoch": 0.8, "grad_norm": 1.0796663761138916, "learning_rate": 1.0347432008912688e-06, "loss": 0.5523, "step": 12592 }, { "epoch": 0.8, "grad_norm": 1.037741780281067, "learning_rate": 1.0341182954262125e-06, "loss": 0.4667, "step": 12593 }, { "epoch": 0.8, "grad_norm": 0.9964542984962463, "learning_rate": 1.0334935569506355e-06, "loss": 0.4916, "step": 12594 }, { "epoch": 0.8, "grad_norm": 1.066288709640503, "learning_rate": 1.0328689854908492e-06, "loss": 0.5088, "step": 12595 }, { "epoch": 0.8, "grad_norm": 0.9630863666534424, "learning_rate": 1.032244581073148e-06, "loss": 0.4728, "step": 12596 }, { "epoch": 0.8, "grad_norm": 1.1112786531448364, "learning_rate": 1.0316203437238242e-06, "loss": 0.5292, "step": 12597 }, { "epoch": 0.8, "grad_norm": 1.0189820528030396, "learning_rate": 1.0309962734691632e-06, "loss": 0.5186, "step": 12598 }, { "epoch": 0.8, "grad_norm": 1.1797800064086914, "learning_rate": 1.0303723703354418e-06, "loss": 0.5682, "step": 12599 }, { "epoch": 0.8, "grad_norm": 1.0767300128936768, "learning_rate": 1.0297486343489304e-06, "loss": 0.5095, "step": 12600 }, { "epoch": 0.8, "grad_norm": 1.0390448570251465, "learning_rate": 1.0291250655358942e-06, "loss": 0.5208, "step": 12601 }, { "epoch": 0.8, "grad_norm": 0.9824992418289185, "learning_rate": 1.0285016639225849e-06, "loss": 0.5199, "step": 12602 }, { "epoch": 0.8, "grad_norm": 1.0326634645462036, "learning_rate": 1.0278784295352572e-06, "loss": 0.5177, "step": 12603 }, { "epoch": 0.8, "grad_norm": 1.0143102407455444, "learning_rate": 1.0272553624001502e-06, "loss": 0.4865, "step": 12604 }, { "epoch": 0.8, "grad_norm": 1.0329055786132812, "learning_rate": 1.0266324625434992e-06, "loss": 0.4971, "step": 12605 }, { "epoch": 0.8, "grad_norm": 0.9900029301643372, "learning_rate": 1.0260097299915345e-06, "loss": 0.4573, "step": 12606 }, { "epoch": 0.8, "grad_norm": 1.0251998901367188, "learning_rate": 1.0253871647704722e-06, "loss": 0.4938, "step": 12607 }, { "epoch": 0.8, "grad_norm": 1.018629550933838, "learning_rate": 1.024764766906532e-06, "loss": 0.4997, "step": 12608 }, { "epoch": 0.8, "grad_norm": 1.0895016193389893, "learning_rate": 1.0241425364259195e-06, "loss": 0.5291, "step": 12609 }, { "epoch": 0.8, "grad_norm": 1.0606317520141602, "learning_rate": 1.0235204733548321e-06, "loss": 0.4863, "step": 12610 }, { "epoch": 0.8, "grad_norm": 1.0265743732452393, "learning_rate": 1.022898577719465e-06, "loss": 0.5231, "step": 12611 }, { "epoch": 0.8, "grad_norm": 1.0301939249038696, "learning_rate": 1.0222768495460029e-06, "loss": 0.4679, "step": 12612 }, { "epoch": 0.8, "grad_norm": 1.0592392683029175, "learning_rate": 1.0216552888606256e-06, "loss": 0.4683, "step": 12613 }, { "epoch": 0.8, "grad_norm": 1.0133415460586548, "learning_rate": 1.0210338956895054e-06, "loss": 0.5066, "step": 12614 }, { "epoch": 0.8, "grad_norm": 1.0261272192001343, "learning_rate": 1.020412670058804e-06, "loss": 0.5144, "step": 12615 }, { "epoch": 0.8, "grad_norm": 1.1378453969955444, "learning_rate": 1.0197916119946821e-06, "loss": 0.5321, "step": 12616 }, { "epoch": 0.8, "grad_norm": 1.1250720024108887, "learning_rate": 1.0191707215232905e-06, "loss": 0.5413, "step": 12617 }, { "epoch": 0.8, "grad_norm": 1.0722566843032837, "learning_rate": 1.0185499986707702e-06, "loss": 0.5324, "step": 12618 }, { "epoch": 0.8, "grad_norm": 1.0596920251846313, "learning_rate": 1.0179294434632593e-06, "loss": 0.4888, "step": 12619 }, { "epoch": 0.8, "grad_norm": 1.0051017999649048, "learning_rate": 1.0173090559268867e-06, "loss": 0.4899, "step": 12620 }, { "epoch": 0.8, "grad_norm": 1.1246610879898071, "learning_rate": 1.0166888360877747e-06, "loss": 0.5003, "step": 12621 }, { "epoch": 0.8, "grad_norm": 1.0212544202804565, "learning_rate": 1.0160687839720407e-06, "loss": 0.5072, "step": 12622 }, { "epoch": 0.8, "grad_norm": 1.0428470373153687, "learning_rate": 1.0154488996057894e-06, "loss": 0.5297, "step": 12623 }, { "epoch": 0.8, "grad_norm": 1.092565655708313, "learning_rate": 1.0148291830151224e-06, "loss": 0.5606, "step": 12624 }, { "epoch": 0.8, "grad_norm": 1.1170252561569214, "learning_rate": 1.014209634226138e-06, "loss": 0.5372, "step": 12625 }, { "epoch": 0.8, "grad_norm": 1.0844227075576782, "learning_rate": 1.013590253264919e-06, "loss": 0.5427, "step": 12626 }, { "epoch": 0.8, "grad_norm": 1.0359807014465332, "learning_rate": 1.0129710401575465e-06, "loss": 0.5043, "step": 12627 }, { "epoch": 0.8, "grad_norm": 1.022281527519226, "learning_rate": 1.0123519949300942e-06, "loss": 0.5025, "step": 12628 }, { "epoch": 0.8, "grad_norm": 1.0547220706939697, "learning_rate": 1.0117331176086264e-06, "loss": 0.4771, "step": 12629 }, { "epoch": 0.8, "grad_norm": 1.0500353574752808, "learning_rate": 1.0111144082192048e-06, "loss": 0.5252, "step": 12630 }, { "epoch": 0.8, "grad_norm": 1.1121727228164673, "learning_rate": 1.0104958667878778e-06, "loss": 0.5176, "step": 12631 }, { "epoch": 0.8, "grad_norm": 1.0687726736068726, "learning_rate": 1.0098774933406903e-06, "loss": 0.513, "step": 12632 }, { "epoch": 0.8, "grad_norm": 1.0900862216949463, "learning_rate": 1.0092592879036834e-06, "loss": 0.5047, "step": 12633 }, { "epoch": 0.8, "grad_norm": 1.0976719856262207, "learning_rate": 1.0086412505028836e-06, "loss": 0.4683, "step": 12634 }, { "epoch": 0.8, "grad_norm": 1.1430429220199585, "learning_rate": 1.0080233811643158e-06, "loss": 0.5266, "step": 12635 }, { "epoch": 0.8, "grad_norm": 0.9831097722053528, "learning_rate": 1.0074056799139981e-06, "loss": 0.5366, "step": 12636 }, { "epoch": 0.8, "grad_norm": 1.0840317010879517, "learning_rate": 1.006788146777935e-06, "loss": 0.5049, "step": 12637 }, { "epoch": 0.8, "grad_norm": 1.0385769605636597, "learning_rate": 1.0061707817821343e-06, "loss": 0.5058, "step": 12638 }, { "epoch": 0.8, "grad_norm": 1.0986313819885254, "learning_rate": 1.0055535849525872e-06, "loss": 0.4705, "step": 12639 }, { "epoch": 0.8, "grad_norm": 1.1245629787445068, "learning_rate": 1.004936556315283e-06, "loss": 0.5381, "step": 12640 }, { "epoch": 0.8, "grad_norm": 1.0714011192321777, "learning_rate": 1.004319695896202e-06, "loss": 0.5182, "step": 12641 }, { "epoch": 0.8, "grad_norm": 1.0748082399368286, "learning_rate": 1.0037030037213197e-06, "loss": 0.4803, "step": 12642 }, { "epoch": 0.8, "grad_norm": 1.1025422811508179, "learning_rate": 1.0030864798166013e-06, "loss": 0.5621, "step": 12643 }, { "epoch": 0.8, "grad_norm": 1.0863114595413208, "learning_rate": 1.0024701242080082e-06, "loss": 0.5265, "step": 12644 }, { "epoch": 0.8, "grad_norm": 1.1285635232925415, "learning_rate": 1.0018539369214891e-06, "loss": 0.5193, "step": 12645 }, { "epoch": 0.8, "grad_norm": 0.9931705594062805, "learning_rate": 1.0012379179829951e-06, "loss": 0.5036, "step": 12646 }, { "epoch": 0.8, "grad_norm": 1.1284775733947754, "learning_rate": 1.0006220674184602e-06, "loss": 0.5376, "step": 12647 }, { "epoch": 0.8, "grad_norm": 1.1133114099502563, "learning_rate": 1.0000063852538172e-06, "loss": 0.4708, "step": 12648 }, { "epoch": 0.8, "grad_norm": 1.1451929807662964, "learning_rate": 9.993908715149902e-07, "loss": 0.5042, "step": 12649 }, { "epoch": 0.8, "grad_norm": 1.0009359121322632, "learning_rate": 9.98775526227897e-07, "loss": 0.451, "step": 12650 }, { "epoch": 0.8, "grad_norm": 1.0615708827972412, "learning_rate": 9.981603494184473e-07, "loss": 0.5368, "step": 12651 }, { "epoch": 0.8, "grad_norm": 1.0838319063186646, "learning_rate": 9.975453411125447e-07, "loss": 0.5032, "step": 12652 }, { "epoch": 0.8, "grad_norm": 1.1050270795822144, "learning_rate": 9.969305013360825e-07, "loss": 0.4872, "step": 12653 }, { "epoch": 0.8, "grad_norm": 1.1218594312667847, "learning_rate": 9.963158301149522e-07, "loss": 0.532, "step": 12654 }, { "epoch": 0.8, "grad_norm": 1.0388950109481812, "learning_rate": 9.957013274750338e-07, "loss": 0.526, "step": 12655 }, { "epoch": 0.8, "grad_norm": 1.005384922027588, "learning_rate": 9.95086993442203e-07, "loss": 0.4991, "step": 12656 }, { "epoch": 0.8, "grad_norm": 1.1161460876464844, "learning_rate": 9.944728280423265e-07, "loss": 0.5416, "step": 12657 }, { "epoch": 0.8, "grad_norm": 0.9668041467666626, "learning_rate": 9.938588313012655e-07, "loss": 0.4708, "step": 12658 }, { "epoch": 0.8, "grad_norm": 1.1281183958053589, "learning_rate": 9.93245003244872e-07, "loss": 0.5069, "step": 12659 }, { "epoch": 0.8, "grad_norm": 1.054303526878357, "learning_rate": 9.92631343898995e-07, "loss": 0.5112, "step": 12660 }, { "epoch": 0.8, "grad_norm": 1.1362591981887817, "learning_rate": 9.920178532894698e-07, "loss": 0.4875, "step": 12661 }, { "epoch": 0.8, "grad_norm": 1.027287483215332, "learning_rate": 9.9140453144213e-07, "loss": 0.4441, "step": 12662 }, { "epoch": 0.8, "grad_norm": 1.027077078819275, "learning_rate": 9.907913783828004e-07, "loss": 0.4899, "step": 12663 }, { "epoch": 0.8, "grad_norm": 1.0585702657699585, "learning_rate": 9.901783941372988e-07, "loss": 0.4836, "step": 12664 }, { "epoch": 0.8, "grad_norm": 1.0785973072052002, "learning_rate": 9.895655787314361e-07, "loss": 0.557, "step": 12665 }, { "epoch": 0.8, "grad_norm": 1.0298945903778076, "learning_rate": 9.889529321910169e-07, "loss": 0.4785, "step": 12666 }, { "epoch": 0.8, "grad_norm": 1.070832371711731, "learning_rate": 9.88340454541834e-07, "loss": 0.4906, "step": 12667 }, { "epoch": 0.8, "grad_norm": 1.1544461250305176, "learning_rate": 9.87728145809681e-07, "loss": 0.5258, "step": 12668 }, { "epoch": 0.8, "grad_norm": 1.0338704586029053, "learning_rate": 9.871160060203371e-07, "loss": 0.4249, "step": 12669 }, { "epoch": 0.8, "grad_norm": 0.9609220027923584, "learning_rate": 9.865040351995787e-07, "loss": 0.4871, "step": 12670 }, { "epoch": 0.8, "grad_norm": 1.0077117681503296, "learning_rate": 9.85892233373173e-07, "loss": 0.4835, "step": 12671 }, { "epoch": 0.8, "grad_norm": 0.9985730648040771, "learning_rate": 9.852806005668813e-07, "loss": 0.4722, "step": 12672 }, { "epoch": 0.8, "grad_norm": 1.1201215982437134, "learning_rate": 9.846691368064577e-07, "loss": 0.4947, "step": 12673 }, { "epoch": 0.8, "grad_norm": 1.046615719795227, "learning_rate": 9.840578421176495e-07, "loss": 0.4741, "step": 12674 }, { "epoch": 0.8, "grad_norm": 1.0141711235046387, "learning_rate": 9.834467165261924e-07, "loss": 0.4817, "step": 12675 }, { "epoch": 0.8, "grad_norm": 1.1131911277770996, "learning_rate": 9.828357600578242e-07, "loss": 0.4825, "step": 12676 }, { "epoch": 0.8, "grad_norm": 1.1036888360977173, "learning_rate": 9.82224972738266e-07, "loss": 0.5249, "step": 12677 }, { "epoch": 0.8, "grad_norm": 1.0062801837921143, "learning_rate": 9.816143545932378e-07, "loss": 0.4517, "step": 12678 }, { "epoch": 0.8, "grad_norm": 1.1002241373062134, "learning_rate": 9.8100390564845e-07, "loss": 0.4809, "step": 12679 }, { "epoch": 0.8, "grad_norm": 0.9574532508850098, "learning_rate": 9.803936259296066e-07, "loss": 0.4358, "step": 12680 }, { "epoch": 0.8, "grad_norm": 1.0709331035614014, "learning_rate": 9.797835154624041e-07, "loss": 0.5312, "step": 12681 }, { "epoch": 0.8, "grad_norm": 1.082556128501892, "learning_rate": 9.791735742725339e-07, "loss": 0.5543, "step": 12682 }, { "epoch": 0.8, "grad_norm": 1.1491870880126953, "learning_rate": 9.78563802385676e-07, "loss": 0.4738, "step": 12683 }, { "epoch": 0.8, "grad_norm": 1.0681147575378418, "learning_rate": 9.779541998275067e-07, "loss": 0.496, "step": 12684 }, { "epoch": 0.8, "grad_norm": 1.092307209968567, "learning_rate": 9.773447666236946e-07, "loss": 0.471, "step": 12685 }, { "epoch": 0.8, "grad_norm": 0.9956874251365662, "learning_rate": 9.767355027999004e-07, "loss": 0.492, "step": 12686 }, { "epoch": 0.8, "grad_norm": 0.9814101457595825, "learning_rate": 9.761264083817795e-07, "loss": 0.4715, "step": 12687 }, { "epoch": 0.8, "grad_norm": 1.137710690498352, "learning_rate": 9.755174833949749e-07, "loss": 0.4593, "step": 12688 }, { "epoch": 0.8, "grad_norm": 1.1179423332214355, "learning_rate": 9.749087278651304e-07, "loss": 0.5595, "step": 12689 }, { "epoch": 0.8, "grad_norm": 1.055477261543274, "learning_rate": 9.743001418178782e-07, "loss": 0.5269, "step": 12690 }, { "epoch": 0.8, "grad_norm": 1.06284499168396, "learning_rate": 9.736917252788414e-07, "loss": 0.4771, "step": 12691 }, { "epoch": 0.8, "grad_norm": 1.029338002204895, "learning_rate": 9.730834782736393e-07, "loss": 0.4957, "step": 12692 }, { "epoch": 0.8, "grad_norm": 1.0246593952178955, "learning_rate": 9.724754008278836e-07, "loss": 0.491, "step": 12693 }, { "epoch": 0.8, "grad_norm": 0.9944297671318054, "learning_rate": 9.718674929671778e-07, "loss": 0.4525, "step": 12694 }, { "epoch": 0.8, "grad_norm": 1.0613635778427124, "learning_rate": 9.71259754717121e-07, "loss": 0.523, "step": 12695 }, { "epoch": 0.8, "grad_norm": 1.114700198173523, "learning_rate": 9.706521861032974e-07, "loss": 0.4554, "step": 12696 }, { "epoch": 0.8, "grad_norm": 1.114327073097229, "learning_rate": 9.700447871512953e-07, "loss": 0.5158, "step": 12697 }, { "epoch": 0.8, "grad_norm": 1.0712555646896362, "learning_rate": 9.694375578866889e-07, "loss": 0.5325, "step": 12698 }, { "epoch": 0.8, "grad_norm": 1.0607600212097168, "learning_rate": 9.688304983350443e-07, "loss": 0.5167, "step": 12699 }, { "epoch": 0.8, "grad_norm": 1.012575626373291, "learning_rate": 9.682236085219243e-07, "loss": 0.4688, "step": 12700 }, { "epoch": 0.8, "grad_norm": 1.0543360710144043, "learning_rate": 9.67616888472882e-07, "loss": 0.4889, "step": 12701 }, { "epoch": 0.8, "grad_norm": 1.0162805318832397, "learning_rate": 9.670103382134655e-07, "loss": 0.4771, "step": 12702 }, { "epoch": 0.8, "grad_norm": 1.0929137468338013, "learning_rate": 9.664039577692152e-07, "loss": 0.5128, "step": 12703 }, { "epoch": 0.8, "grad_norm": 1.0132461786270142, "learning_rate": 9.65797747165661e-07, "loss": 0.505, "step": 12704 }, { "epoch": 0.8, "grad_norm": 1.038873314857483, "learning_rate": 9.65191706428328e-07, "loss": 0.5092, "step": 12705 }, { "epoch": 0.8, "grad_norm": 1.0783709287643433, "learning_rate": 9.645858355827392e-07, "loss": 0.5183, "step": 12706 }, { "epoch": 0.81, "grad_norm": 0.9915413856506348, "learning_rate": 9.639801346544015e-07, "loss": 0.5139, "step": 12707 }, { "epoch": 0.81, "grad_norm": 1.0379064083099365, "learning_rate": 9.633746036688196e-07, "loss": 0.4709, "step": 12708 }, { "epoch": 0.81, "grad_norm": 0.9953266978263855, "learning_rate": 9.627692426514907e-07, "loss": 0.4872, "step": 12709 }, { "epoch": 0.81, "grad_norm": 1.0961235761642456, "learning_rate": 9.621640516279047e-07, "loss": 0.51, "step": 12710 }, { "epoch": 0.81, "grad_norm": 1.0647658109664917, "learning_rate": 9.61559030623545e-07, "loss": 0.5198, "step": 12711 }, { "epoch": 0.81, "grad_norm": 1.0856093168258667, "learning_rate": 9.609541796638848e-07, "loss": 0.4598, "step": 12712 }, { "epoch": 0.81, "grad_norm": 1.0690525770187378, "learning_rate": 9.603494987743932e-07, "loss": 0.5155, "step": 12713 }, { "epoch": 0.81, "grad_norm": 1.1249573230743408, "learning_rate": 9.597449879805314e-07, "loss": 0.4989, "step": 12714 }, { "epoch": 0.81, "grad_norm": 1.0353507995605469, "learning_rate": 9.59140647307753e-07, "loss": 0.4712, "step": 12715 }, { "epoch": 0.81, "grad_norm": 1.1793574094772339, "learning_rate": 9.585364767815048e-07, "loss": 0.485, "step": 12716 }, { "epoch": 0.81, "grad_norm": 1.0845911502838135, "learning_rate": 9.57932476427228e-07, "loss": 0.5272, "step": 12717 }, { "epoch": 0.81, "grad_norm": 1.0367937088012695, "learning_rate": 9.573286462703501e-07, "loss": 0.4813, "step": 12718 }, { "epoch": 0.81, "grad_norm": 0.994184136390686, "learning_rate": 9.567249863363027e-07, "loss": 0.479, "step": 12719 }, { "epoch": 0.81, "grad_norm": 1.0609592199325562, "learning_rate": 9.56121496650499e-07, "loss": 0.5304, "step": 12720 }, { "epoch": 0.81, "grad_norm": 0.9854810833930969, "learning_rate": 9.55518177238351e-07, "loss": 0.5232, "step": 12721 }, { "epoch": 0.81, "grad_norm": 1.023314356803894, "learning_rate": 9.549150281252633e-07, "loss": 0.5233, "step": 12722 }, { "epoch": 0.81, "grad_norm": 1.0726054906845093, "learning_rate": 9.54312049336632e-07, "loss": 0.5171, "step": 12723 }, { "epoch": 0.81, "grad_norm": 0.9805082678794861, "learning_rate": 9.53709240897846e-07, "loss": 0.4573, "step": 12724 }, { "epoch": 0.81, "grad_norm": 1.0192350149154663, "learning_rate": 9.531066028342895e-07, "loss": 0.5394, "step": 12725 }, { "epoch": 0.81, "grad_norm": 1.0108089447021484, "learning_rate": 9.525041351713332e-07, "loss": 0.4892, "step": 12726 }, { "epoch": 0.81, "grad_norm": 1.0106862783432007, "learning_rate": 9.519018379343486e-07, "loss": 0.4612, "step": 12727 }, { "epoch": 0.81, "grad_norm": 1.0383695363998413, "learning_rate": 9.512997111486965e-07, "loss": 0.53, "step": 12728 }, { "epoch": 0.81, "grad_norm": 1.0875706672668457, "learning_rate": 9.506977548397284e-07, "loss": 0.5628, "step": 12729 }, { "epoch": 0.81, "grad_norm": 1.0377280712127686, "learning_rate": 9.50095969032791e-07, "loss": 0.5389, "step": 12730 }, { "epoch": 0.81, "grad_norm": 1.090673804283142, "learning_rate": 9.494943537532242e-07, "loss": 0.494, "step": 12731 }, { "epoch": 0.81, "grad_norm": 0.9572697281837463, "learning_rate": 9.488929090263588e-07, "loss": 0.465, "step": 12732 }, { "epoch": 0.81, "grad_norm": 1.033835530281067, "learning_rate": 9.482916348775217e-07, "loss": 0.5165, "step": 12733 }, { "epoch": 0.81, "grad_norm": 1.1086596250534058, "learning_rate": 9.476905313320283e-07, "loss": 0.5433, "step": 12734 }, { "epoch": 0.81, "grad_norm": 1.0501341819763184, "learning_rate": 9.470895984151879e-07, "loss": 0.4828, "step": 12735 }, { "epoch": 0.81, "grad_norm": 1.0581494569778442, "learning_rate": 9.464888361523078e-07, "loss": 0.4805, "step": 12736 }, { "epoch": 0.81, "grad_norm": 1.1357430219650269, "learning_rate": 9.458882445686807e-07, "loss": 0.5229, "step": 12737 }, { "epoch": 0.81, "grad_norm": 1.1199616193771362, "learning_rate": 9.452878236895963e-07, "loss": 0.5418, "step": 12738 }, { "epoch": 0.81, "grad_norm": 1.0396251678466797, "learning_rate": 9.446875735403366e-07, "loss": 0.495, "step": 12739 }, { "epoch": 0.81, "grad_norm": 1.0419509410858154, "learning_rate": 9.440874941461753e-07, "loss": 0.4972, "step": 12740 }, { "epoch": 0.81, "grad_norm": 1.0135390758514404, "learning_rate": 9.434875855323816e-07, "loss": 0.4716, "step": 12741 }, { "epoch": 0.81, "grad_norm": 1.018988013267517, "learning_rate": 9.428878477242131e-07, "loss": 0.5212, "step": 12742 }, { "epoch": 0.81, "grad_norm": 1.0946797132492065, "learning_rate": 9.422882807469219e-07, "loss": 0.466, "step": 12743 }, { "epoch": 0.81, "grad_norm": 1.0877785682678223, "learning_rate": 9.416888846257588e-07, "loss": 0.5006, "step": 12744 }, { "epoch": 0.81, "grad_norm": 1.0570225715637207, "learning_rate": 9.41089659385957e-07, "loss": 0.5155, "step": 12745 }, { "epoch": 0.81, "grad_norm": 1.065837025642395, "learning_rate": 9.404906050527496e-07, "loss": 0.5261, "step": 12746 }, { "epoch": 0.81, "grad_norm": 1.050106406211853, "learning_rate": 9.398917216513625e-07, "loss": 0.4856, "step": 12747 }, { "epoch": 0.81, "grad_norm": 1.1717270612716675, "learning_rate": 9.39293009207008e-07, "loss": 0.5413, "step": 12748 }, { "epoch": 0.81, "grad_norm": 1.1501052379608154, "learning_rate": 9.386944677449017e-07, "loss": 0.5275, "step": 12749 }, { "epoch": 0.81, "grad_norm": 0.9979376196861267, "learning_rate": 9.380960972902414e-07, "loss": 0.4461, "step": 12750 }, { "epoch": 0.81, "grad_norm": 1.067839503288269, "learning_rate": 9.374978978682248e-07, "loss": 0.4897, "step": 12751 }, { "epoch": 0.81, "grad_norm": 1.2218809127807617, "learning_rate": 9.368998695040387e-07, "loss": 0.5476, "step": 12752 }, { "epoch": 0.81, "grad_norm": 1.0331355333328247, "learning_rate": 9.363020122228645e-07, "loss": 0.4985, "step": 12753 }, { "epoch": 0.81, "grad_norm": 1.0841494798660278, "learning_rate": 9.357043260498766e-07, "loss": 0.5028, "step": 12754 }, { "epoch": 0.81, "grad_norm": 1.0547598600387573, "learning_rate": 9.351068110102418e-07, "loss": 0.4473, "step": 12755 }, { "epoch": 0.81, "grad_norm": 1.2031419277191162, "learning_rate": 9.345094671291155e-07, "loss": 0.5074, "step": 12756 }, { "epoch": 0.81, "grad_norm": 1.1683024168014526, "learning_rate": 9.339122944316559e-07, "loss": 0.541, "step": 12757 }, { "epoch": 0.81, "grad_norm": 1.0284042358398438, "learning_rate": 9.333152929430029e-07, "loss": 0.4993, "step": 12758 }, { "epoch": 0.81, "grad_norm": 1.1034044027328491, "learning_rate": 9.327184626882963e-07, "loss": 0.5493, "step": 12759 }, { "epoch": 0.81, "grad_norm": 1.0372745990753174, "learning_rate": 9.321218036926677e-07, "loss": 0.4963, "step": 12760 }, { "epoch": 0.81, "grad_norm": 1.0754164457321167, "learning_rate": 9.315253159812359e-07, "loss": 0.5033, "step": 12761 }, { "epoch": 0.81, "grad_norm": 1.061840295791626, "learning_rate": 9.30928999579121e-07, "loss": 0.4783, "step": 12762 }, { "epoch": 0.81, "grad_norm": 1.165432095527649, "learning_rate": 9.303328545114321e-07, "loss": 0.4691, "step": 12763 }, { "epoch": 0.81, "grad_norm": 1.0695961713790894, "learning_rate": 9.29736880803268e-07, "loss": 0.4595, "step": 12764 }, { "epoch": 0.81, "grad_norm": 1.1859469413757324, "learning_rate": 9.29141078479725e-07, "loss": 0.5298, "step": 12765 }, { "epoch": 0.81, "grad_norm": 1.1136317253112793, "learning_rate": 9.285454475658889e-07, "loss": 0.5012, "step": 12766 }, { "epoch": 0.81, "grad_norm": 0.957591712474823, "learning_rate": 9.279499880868409e-07, "loss": 0.4346, "step": 12767 }, { "epoch": 0.81, "grad_norm": 1.0229932069778442, "learning_rate": 9.273547000676547e-07, "loss": 0.5261, "step": 12768 }, { "epoch": 0.81, "grad_norm": 1.070711374282837, "learning_rate": 9.267595835333915e-07, "loss": 0.5153, "step": 12769 }, { "epoch": 0.81, "grad_norm": 1.040297269821167, "learning_rate": 9.261646385091139e-07, "loss": 0.4738, "step": 12770 }, { "epoch": 0.81, "grad_norm": 1.0666635036468506, "learning_rate": 9.25569865019873e-07, "loss": 0.4977, "step": 12771 }, { "epoch": 0.81, "grad_norm": 1.158909797668457, "learning_rate": 9.249752630907094e-07, "loss": 0.4999, "step": 12772 }, { "epoch": 0.81, "grad_norm": 1.0651565790176392, "learning_rate": 9.243808327466619e-07, "loss": 0.47, "step": 12773 }, { "epoch": 0.81, "grad_norm": 1.0467365980148315, "learning_rate": 9.237865740127594e-07, "loss": 0.4714, "step": 12774 }, { "epoch": 0.81, "grad_norm": 1.0020484924316406, "learning_rate": 9.231924869140241e-07, "loss": 0.4715, "step": 12775 }, { "epoch": 0.81, "grad_norm": 1.0114612579345703, "learning_rate": 9.225985714754721e-07, "loss": 0.4824, "step": 12776 }, { "epoch": 0.81, "grad_norm": 1.0749523639678955, "learning_rate": 9.220048277221089e-07, "loss": 0.5071, "step": 12777 }, { "epoch": 0.81, "grad_norm": 1.038300633430481, "learning_rate": 9.214112556789345e-07, "loss": 0.5062, "step": 12778 }, { "epoch": 0.81, "grad_norm": 1.062896490097046, "learning_rate": 9.208178553709468e-07, "loss": 0.5121, "step": 12779 }, { "epoch": 0.81, "grad_norm": 1.1365878582000732, "learning_rate": 9.202246268231274e-07, "loss": 0.5414, "step": 12780 }, { "epoch": 0.81, "grad_norm": 1.1047718524932861, "learning_rate": 9.196315700604564e-07, "loss": 0.5558, "step": 12781 }, { "epoch": 0.81, "grad_norm": 0.9735736846923828, "learning_rate": 9.190386851079053e-07, "loss": 0.4957, "step": 12782 }, { "epoch": 0.81, "grad_norm": 1.096001148223877, "learning_rate": 9.184459719904388e-07, "loss": 0.4893, "step": 12783 }, { "epoch": 0.81, "grad_norm": 1.0629702806472778, "learning_rate": 9.178534307330145e-07, "loss": 0.4838, "step": 12784 }, { "epoch": 0.81, "grad_norm": 1.0845651626586914, "learning_rate": 9.17261061360581e-07, "loss": 0.5347, "step": 12785 }, { "epoch": 0.81, "grad_norm": 1.0777674913406372, "learning_rate": 9.166688638980791e-07, "loss": 0.4936, "step": 12786 }, { "epoch": 0.81, "grad_norm": 1.0557137727737427, "learning_rate": 9.160768383704499e-07, "loss": 0.4577, "step": 12787 }, { "epoch": 0.81, "grad_norm": 0.97430819272995, "learning_rate": 9.154849848026165e-07, "loss": 0.4938, "step": 12788 }, { "epoch": 0.81, "grad_norm": 1.0108232498168945, "learning_rate": 9.148933032195013e-07, "loss": 0.497, "step": 12789 }, { "epoch": 0.81, "grad_norm": 1.094015121459961, "learning_rate": 9.14301793646018e-07, "loss": 0.5157, "step": 12790 }, { "epoch": 0.81, "grad_norm": 1.0083976984024048, "learning_rate": 9.137104561070736e-07, "loss": 0.4662, "step": 12791 }, { "epoch": 0.81, "grad_norm": 1.0446066856384277, "learning_rate": 9.13119290627566e-07, "loss": 0.5072, "step": 12792 }, { "epoch": 0.81, "grad_norm": 1.00369131565094, "learning_rate": 9.125282972323895e-07, "loss": 0.4938, "step": 12793 }, { "epoch": 0.81, "grad_norm": 1.1130354404449463, "learning_rate": 9.119374759464261e-07, "loss": 0.5223, "step": 12794 }, { "epoch": 0.81, "grad_norm": 0.976584792137146, "learning_rate": 9.113468267945541e-07, "loss": 0.4466, "step": 12795 }, { "epoch": 0.81, "grad_norm": 1.0781188011169434, "learning_rate": 9.107563498016436e-07, "loss": 0.5376, "step": 12796 }, { "epoch": 0.81, "grad_norm": 1.1136902570724487, "learning_rate": 9.101660449925576e-07, "loss": 0.5402, "step": 12797 }, { "epoch": 0.81, "grad_norm": 1.1188857555389404, "learning_rate": 9.095759123921538e-07, "loss": 0.4836, "step": 12798 }, { "epoch": 0.81, "grad_norm": 1.0546386241912842, "learning_rate": 9.089859520252759e-07, "loss": 0.4632, "step": 12799 }, { "epoch": 0.81, "grad_norm": 1.1947174072265625, "learning_rate": 9.083961639167693e-07, "loss": 0.5401, "step": 12800 }, { "epoch": 0.81, "grad_norm": 1.2043405771255493, "learning_rate": 9.078065480914678e-07, "loss": 0.5621, "step": 12801 }, { "epoch": 0.81, "grad_norm": 1.1328403949737549, "learning_rate": 9.072171045741957e-07, "loss": 0.52, "step": 12802 }, { "epoch": 0.81, "grad_norm": 1.0790365934371948, "learning_rate": 9.066278333897732e-07, "loss": 0.5446, "step": 12803 }, { "epoch": 0.81, "grad_norm": 1.0958285331726074, "learning_rate": 9.060387345630134e-07, "loss": 0.487, "step": 12804 }, { "epoch": 0.81, "grad_norm": 1.1146931648254395, "learning_rate": 9.054498081187202e-07, "loss": 0.4661, "step": 12805 }, { "epoch": 0.81, "grad_norm": 1.0177806615829468, "learning_rate": 9.048610540816932e-07, "loss": 0.4937, "step": 12806 }, { "epoch": 0.81, "grad_norm": 0.9972603917121887, "learning_rate": 9.042724724767199e-07, "loss": 0.5211, "step": 12807 }, { "epoch": 0.81, "grad_norm": 1.0634011030197144, "learning_rate": 9.036840633285837e-07, "loss": 0.4974, "step": 12808 }, { "epoch": 0.81, "grad_norm": 1.0534765720367432, "learning_rate": 9.030958266620637e-07, "loss": 0.5286, "step": 12809 }, { "epoch": 0.81, "grad_norm": 1.0246983766555786, "learning_rate": 9.025077625019252e-07, "loss": 0.4781, "step": 12810 }, { "epoch": 0.81, "grad_norm": 1.0605356693267822, "learning_rate": 9.01919870872931e-07, "loss": 0.5262, "step": 12811 }, { "epoch": 0.81, "grad_norm": 1.1038426160812378, "learning_rate": 9.013321517998347e-07, "loss": 0.4912, "step": 12812 }, { "epoch": 0.81, "grad_norm": 1.1282594203948975, "learning_rate": 9.007446053073832e-07, "loss": 0.5649, "step": 12813 }, { "epoch": 0.81, "grad_norm": 0.9613350629806519, "learning_rate": 9.001572314203172e-07, "loss": 0.4879, "step": 12814 }, { "epoch": 0.81, "grad_norm": 1.1107274293899536, "learning_rate": 8.99570030163367e-07, "loss": 0.5202, "step": 12815 }, { "epoch": 0.81, "grad_norm": 1.0693176984786987, "learning_rate": 8.989830015612566e-07, "loss": 0.5076, "step": 12816 }, { "epoch": 0.81, "grad_norm": 1.0245882272720337, "learning_rate": 8.983961456387086e-07, "loss": 0.5388, "step": 12817 }, { "epoch": 0.81, "grad_norm": 1.0226383209228516, "learning_rate": 8.978094624204292e-07, "loss": 0.4687, "step": 12818 }, { "epoch": 0.81, "grad_norm": 1.06915283203125, "learning_rate": 8.972229519311227e-07, "loss": 0.522, "step": 12819 }, { "epoch": 0.81, "grad_norm": 1.0821400880813599, "learning_rate": 8.966366141954852e-07, "loss": 0.5298, "step": 12820 }, { "epoch": 0.81, "grad_norm": 1.0791270732879639, "learning_rate": 8.960504492382055e-07, "loss": 0.5111, "step": 12821 }, { "epoch": 0.81, "grad_norm": 1.0358119010925293, "learning_rate": 8.95464457083966e-07, "loss": 0.49, "step": 12822 }, { "epoch": 0.81, "grad_norm": 1.0727996826171875, "learning_rate": 8.948786377574382e-07, "loss": 0.5114, "step": 12823 }, { "epoch": 0.81, "grad_norm": 1.0534948110580444, "learning_rate": 8.942929912832904e-07, "loss": 0.4841, "step": 12824 }, { "epoch": 0.81, "grad_norm": 0.9961467981338501, "learning_rate": 8.93707517686182e-07, "loss": 0.4767, "step": 12825 }, { "epoch": 0.81, "grad_norm": 0.9979651570320129, "learning_rate": 8.93122216990765e-07, "loss": 0.477, "step": 12826 }, { "epoch": 0.81, "grad_norm": 1.1751617193222046, "learning_rate": 8.92537089221685e-07, "loss": 0.5293, "step": 12827 }, { "epoch": 0.81, "grad_norm": 1.0309288501739502, "learning_rate": 8.919521344035808e-07, "loss": 0.4804, "step": 12828 }, { "epoch": 0.81, "grad_norm": 1.023771047592163, "learning_rate": 8.913673525610783e-07, "loss": 0.5093, "step": 12829 }, { "epoch": 0.81, "grad_norm": 1.0918841361999512, "learning_rate": 8.907827437188065e-07, "loss": 0.5151, "step": 12830 }, { "epoch": 0.81, "grad_norm": 1.0214347839355469, "learning_rate": 8.901983079013771e-07, "loss": 0.4703, "step": 12831 }, { "epoch": 0.81, "grad_norm": 1.103243350982666, "learning_rate": 8.896140451334001e-07, "loss": 0.5128, "step": 12832 }, { "epoch": 0.81, "grad_norm": 1.016454815864563, "learning_rate": 8.890299554394766e-07, "loss": 0.4864, "step": 12833 }, { "epoch": 0.81, "grad_norm": 1.051961898803711, "learning_rate": 8.884460388442006e-07, "loss": 0.4967, "step": 12834 }, { "epoch": 0.81, "grad_norm": 1.088719129562378, "learning_rate": 8.878622953721589e-07, "loss": 0.4788, "step": 12835 }, { "epoch": 0.81, "grad_norm": 1.0930858850479126, "learning_rate": 8.87278725047932e-07, "loss": 0.5075, "step": 12836 }, { "epoch": 0.81, "grad_norm": 1.1117326021194458, "learning_rate": 8.866953278960888e-07, "loss": 0.5197, "step": 12837 }, { "epoch": 0.81, "grad_norm": 1.0902756452560425, "learning_rate": 8.86112103941198e-07, "loss": 0.4675, "step": 12838 }, { "epoch": 0.81, "grad_norm": 0.9956930875778198, "learning_rate": 8.855290532078148e-07, "loss": 0.4349, "step": 12839 }, { "epoch": 0.81, "grad_norm": 1.092830777168274, "learning_rate": 8.849461757204897e-07, "loss": 0.5405, "step": 12840 }, { "epoch": 0.81, "grad_norm": 1.0364973545074463, "learning_rate": 8.843634715037669e-07, "loss": 0.5061, "step": 12841 }, { "epoch": 0.81, "grad_norm": 1.0219436883926392, "learning_rate": 8.83780940582179e-07, "loss": 0.541, "step": 12842 }, { "epoch": 0.81, "grad_norm": 0.9847308993339539, "learning_rate": 8.83198582980257e-07, "loss": 0.4969, "step": 12843 }, { "epoch": 0.81, "grad_norm": 1.1531392335891724, "learning_rate": 8.826163987225233e-07, "loss": 0.5078, "step": 12844 }, { "epoch": 0.81, "grad_norm": 1.0827713012695312, "learning_rate": 8.82034387833488e-07, "loss": 0.513, "step": 12845 }, { "epoch": 0.81, "grad_norm": 1.0626567602157593, "learning_rate": 8.814525503376597e-07, "loss": 0.4932, "step": 12846 }, { "epoch": 0.81, "grad_norm": 1.0791380405426025, "learning_rate": 8.808708862595367e-07, "loss": 0.4864, "step": 12847 }, { "epoch": 0.81, "grad_norm": 1.0617848634719849, "learning_rate": 8.802893956236114e-07, "loss": 0.4669, "step": 12848 }, { "epoch": 0.81, "grad_norm": 1.0819118022918701, "learning_rate": 8.797080784543699e-07, "loss": 0.4879, "step": 12849 }, { "epoch": 0.81, "grad_norm": 1.1863682270050049, "learning_rate": 8.791269347762849e-07, "loss": 0.5103, "step": 12850 }, { "epoch": 0.81, "grad_norm": 1.0164762735366821, "learning_rate": 8.785459646138306e-07, "loss": 0.4691, "step": 12851 }, { "epoch": 0.81, "grad_norm": 1.0647052526474, "learning_rate": 8.779651679914692e-07, "loss": 0.4773, "step": 12852 }, { "epoch": 0.81, "grad_norm": 1.0151244401931763, "learning_rate": 8.773845449336537e-07, "loss": 0.5087, "step": 12853 }, { "epoch": 0.81, "grad_norm": 1.0956175327301025, "learning_rate": 8.768040954648338e-07, "loss": 0.5361, "step": 12854 }, { "epoch": 0.81, "grad_norm": 1.0666496753692627, "learning_rate": 8.762238196094502e-07, "loss": 0.4673, "step": 12855 }, { "epoch": 0.81, "grad_norm": 1.1929495334625244, "learning_rate": 8.756437173919352e-07, "loss": 0.5144, "step": 12856 }, { "epoch": 0.81, "grad_norm": 0.9840711951255798, "learning_rate": 8.750637888367164e-07, "loss": 0.4645, "step": 12857 }, { "epoch": 0.81, "grad_norm": 1.0075687170028687, "learning_rate": 8.744840339682126e-07, "loss": 0.4712, "step": 12858 }, { "epoch": 0.81, "grad_norm": 0.9786378145217896, "learning_rate": 8.73904452810832e-07, "loss": 0.4876, "step": 12859 }, { "epoch": 0.81, "grad_norm": 1.1230751276016235, "learning_rate": 8.733250453889841e-07, "loss": 0.461, "step": 12860 }, { "epoch": 0.81, "grad_norm": 1.1180142164230347, "learning_rate": 8.727458117270615e-07, "loss": 0.4621, "step": 12861 }, { "epoch": 0.81, "grad_norm": 1.0221726894378662, "learning_rate": 8.721667518494553e-07, "loss": 0.5081, "step": 12862 }, { "epoch": 0.81, "grad_norm": 1.059422492980957, "learning_rate": 8.715878657805471e-07, "loss": 0.5142, "step": 12863 }, { "epoch": 0.81, "grad_norm": 1.0881012678146362, "learning_rate": 8.710091535447123e-07, "loss": 0.509, "step": 12864 }, { "epoch": 0.82, "grad_norm": 1.0379509925842285, "learning_rate": 8.704306151663184e-07, "loss": 0.4673, "step": 12865 }, { "epoch": 0.82, "grad_norm": 1.0658977031707764, "learning_rate": 8.698522506697271e-07, "loss": 0.502, "step": 12866 }, { "epoch": 0.82, "grad_norm": 1.005908489227295, "learning_rate": 8.692740600792871e-07, "loss": 0.5111, "step": 12867 }, { "epoch": 0.82, "grad_norm": 1.0216130018234253, "learning_rate": 8.686960434193486e-07, "loss": 0.473, "step": 12868 }, { "epoch": 0.82, "grad_norm": 1.1380136013031006, "learning_rate": 8.681182007142475e-07, "loss": 0.5199, "step": 12869 }, { "epoch": 0.82, "grad_norm": 1.0926117897033691, "learning_rate": 8.675405319883146e-07, "loss": 0.475, "step": 12870 }, { "epoch": 0.82, "grad_norm": 1.105962872505188, "learning_rate": 8.66963037265876e-07, "loss": 0.5099, "step": 12871 }, { "epoch": 0.82, "grad_norm": 0.9456197619438171, "learning_rate": 8.663857165712431e-07, "loss": 0.4366, "step": 12872 }, { "epoch": 0.82, "grad_norm": 1.0361655950546265, "learning_rate": 8.658085699287294e-07, "loss": 0.5059, "step": 12873 }, { "epoch": 0.82, "grad_norm": 1.1172592639923096, "learning_rate": 8.652315973626362e-07, "loss": 0.5217, "step": 12874 }, { "epoch": 0.82, "grad_norm": 1.0309276580810547, "learning_rate": 8.646547988972553e-07, "loss": 0.4685, "step": 12875 }, { "epoch": 0.82, "grad_norm": 0.9785642623901367, "learning_rate": 8.64078174556875e-07, "loss": 0.5054, "step": 12876 }, { "epoch": 0.82, "grad_norm": 1.047278642654419, "learning_rate": 8.635017243657751e-07, "loss": 0.5194, "step": 12877 }, { "epoch": 0.82, "grad_norm": 1.091817021369934, "learning_rate": 8.629254483482274e-07, "loss": 0.463, "step": 12878 }, { "epoch": 0.82, "grad_norm": 1.017472505569458, "learning_rate": 8.623493465284987e-07, "loss": 0.4432, "step": 12879 }, { "epoch": 0.82, "grad_norm": 1.034571647644043, "learning_rate": 8.61773418930843e-07, "loss": 0.492, "step": 12880 }, { "epoch": 0.82, "grad_norm": 1.1846771240234375, "learning_rate": 8.611976655795135e-07, "loss": 0.5708, "step": 12881 }, { "epoch": 0.82, "grad_norm": 1.1050655841827393, "learning_rate": 8.606220864987541e-07, "loss": 0.5207, "step": 12882 }, { "epoch": 0.82, "grad_norm": 1.016854166984558, "learning_rate": 8.600466817127972e-07, "loss": 0.4698, "step": 12883 }, { "epoch": 0.82, "grad_norm": 1.1042780876159668, "learning_rate": 8.59471451245873e-07, "loss": 0.4675, "step": 12884 }, { "epoch": 0.82, "grad_norm": 1.055746078491211, "learning_rate": 8.588963951222024e-07, "loss": 0.5221, "step": 12885 }, { "epoch": 0.82, "grad_norm": 1.0114814043045044, "learning_rate": 8.583215133659983e-07, "loss": 0.5464, "step": 12886 }, { "epoch": 0.82, "grad_norm": 1.0872836112976074, "learning_rate": 8.577468060014688e-07, "loss": 0.5093, "step": 12887 }, { "epoch": 0.82, "grad_norm": 1.14777672290802, "learning_rate": 8.571722730528098e-07, "loss": 0.5534, "step": 12888 }, { "epoch": 0.82, "grad_norm": 1.007361650466919, "learning_rate": 8.565979145442138e-07, "loss": 0.5071, "step": 12889 }, { "epoch": 0.82, "grad_norm": 1.047307014465332, "learning_rate": 8.560237304998681e-07, "loss": 0.5, "step": 12890 }, { "epoch": 0.82, "grad_norm": 1.188661813735962, "learning_rate": 8.554497209439461e-07, "loss": 0.5295, "step": 12891 }, { "epoch": 0.82, "grad_norm": 1.0467709302902222, "learning_rate": 8.548758859006184e-07, "loss": 0.504, "step": 12892 }, { "epoch": 0.82, "grad_norm": 1.1030771732330322, "learning_rate": 8.543022253940475e-07, "loss": 0.4859, "step": 12893 }, { "epoch": 0.82, "grad_norm": 1.01668381690979, "learning_rate": 8.537287394483878e-07, "loss": 0.4997, "step": 12894 }, { "epoch": 0.82, "grad_norm": 1.160611867904663, "learning_rate": 8.531554280877885e-07, "loss": 0.4866, "step": 12895 }, { "epoch": 0.82, "grad_norm": 1.108699917793274, "learning_rate": 8.525822913363868e-07, "loss": 0.539, "step": 12896 }, { "epoch": 0.82, "grad_norm": 1.0514135360717773, "learning_rate": 8.520093292183163e-07, "loss": 0.4907, "step": 12897 }, { "epoch": 0.82, "grad_norm": 1.1542928218841553, "learning_rate": 8.514365417577048e-07, "loss": 0.479, "step": 12898 }, { "epoch": 0.82, "grad_norm": 1.0242931842803955, "learning_rate": 8.50863928978668e-07, "loss": 0.4756, "step": 12899 }, { "epoch": 0.82, "grad_norm": 1.0869053602218628, "learning_rate": 8.502914909053173e-07, "loss": 0.508, "step": 12900 }, { "epoch": 0.82, "grad_norm": 1.0241419076919556, "learning_rate": 8.497192275617577e-07, "loss": 0.4767, "step": 12901 }, { "epoch": 0.82, "grad_norm": 1.0105047225952148, "learning_rate": 8.491471389720807e-07, "loss": 0.5296, "step": 12902 }, { "epoch": 0.82, "grad_norm": 1.111020565032959, "learning_rate": 8.485752251603807e-07, "loss": 0.5032, "step": 12903 }, { "epoch": 0.82, "grad_norm": 1.0602821111679077, "learning_rate": 8.480034861507347e-07, "loss": 0.4677, "step": 12904 }, { "epoch": 0.82, "grad_norm": 1.0135266780853271, "learning_rate": 8.474319219672183e-07, "loss": 0.5016, "step": 12905 }, { "epoch": 0.82, "grad_norm": 1.0159438848495483, "learning_rate": 8.46860532633898e-07, "loss": 0.4794, "step": 12906 }, { "epoch": 0.82, "grad_norm": 1.0804152488708496, "learning_rate": 8.462893181748327e-07, "loss": 0.4866, "step": 12907 }, { "epoch": 0.82, "grad_norm": 1.025763750076294, "learning_rate": 8.457182786140744e-07, "loss": 0.5105, "step": 12908 }, { "epoch": 0.82, "grad_norm": 1.0539695024490356, "learning_rate": 8.451474139756693e-07, "loss": 0.5277, "step": 12909 }, { "epoch": 0.82, "grad_norm": 1.0051062107086182, "learning_rate": 8.445767242836506e-07, "loss": 0.4804, "step": 12910 }, { "epoch": 0.82, "grad_norm": 0.993935763835907, "learning_rate": 8.440062095620527e-07, "loss": 0.4868, "step": 12911 }, { "epoch": 0.82, "grad_norm": 1.1110044717788696, "learning_rate": 8.434358698348944e-07, "loss": 0.5325, "step": 12912 }, { "epoch": 0.82, "grad_norm": 1.0014790296554565, "learning_rate": 8.428657051261918e-07, "loss": 0.4937, "step": 12913 }, { "epoch": 0.82, "grad_norm": 1.0092440843582153, "learning_rate": 8.422957154599526e-07, "loss": 0.4946, "step": 12914 }, { "epoch": 0.82, "grad_norm": 1.1117631196975708, "learning_rate": 8.417259008601775e-07, "loss": 0.4752, "step": 12915 }, { "epoch": 0.82, "grad_norm": 1.0479555130004883, "learning_rate": 8.411562613508595e-07, "loss": 0.4874, "step": 12916 }, { "epoch": 0.82, "grad_norm": 1.1826703548431396, "learning_rate": 8.405867969559845e-07, "loss": 0.5166, "step": 12917 }, { "epoch": 0.82, "grad_norm": 1.0489535331726074, "learning_rate": 8.400175076995287e-07, "loss": 0.4758, "step": 12918 }, { "epoch": 0.82, "grad_norm": 1.0403448343276978, "learning_rate": 8.394483936054643e-07, "loss": 0.5041, "step": 12919 }, { "epoch": 0.82, "grad_norm": 1.0457063913345337, "learning_rate": 8.388794546977546e-07, "loss": 0.4999, "step": 12920 }, { "epoch": 0.82, "grad_norm": 1.0715712308883667, "learning_rate": 8.383106910003552e-07, "loss": 0.4867, "step": 12921 }, { "epoch": 0.82, "grad_norm": 1.1114780902862549, "learning_rate": 8.377421025372157e-07, "loss": 0.5058, "step": 12922 }, { "epoch": 0.82, "grad_norm": 0.9921567440032959, "learning_rate": 8.371736893322763e-07, "loss": 0.4085, "step": 12923 }, { "epoch": 0.82, "grad_norm": 1.0891449451446533, "learning_rate": 8.366054514094718e-07, "loss": 0.488, "step": 12924 }, { "epoch": 0.82, "grad_norm": 1.0652058124542236, "learning_rate": 8.360373887927298e-07, "loss": 0.5177, "step": 12925 }, { "epoch": 0.82, "grad_norm": 1.1317729949951172, "learning_rate": 8.35469501505966e-07, "loss": 0.5144, "step": 12926 }, { "epoch": 0.82, "grad_norm": 1.0853395462036133, "learning_rate": 8.349017895730948e-07, "loss": 0.4921, "step": 12927 }, { "epoch": 0.82, "grad_norm": 1.1067955493927002, "learning_rate": 8.343342530180198e-07, "loss": 0.4986, "step": 12928 }, { "epoch": 0.82, "grad_norm": 1.08108389377594, "learning_rate": 8.33766891864638e-07, "loss": 0.4834, "step": 12929 }, { "epoch": 0.82, "grad_norm": 1.1389966011047363, "learning_rate": 8.331997061368391e-07, "loss": 0.5181, "step": 12930 }, { "epoch": 0.82, "grad_norm": 1.072933554649353, "learning_rate": 8.326326958585062e-07, "loss": 0.5329, "step": 12931 }, { "epoch": 0.82, "grad_norm": 1.0903488397598267, "learning_rate": 8.320658610535115e-07, "loss": 0.5116, "step": 12932 }, { "epoch": 0.82, "grad_norm": 1.092515230178833, "learning_rate": 8.314992017457263e-07, "loss": 0.5006, "step": 12933 }, { "epoch": 0.82, "grad_norm": 1.00934636592865, "learning_rate": 8.30932717959007e-07, "loss": 0.4781, "step": 12934 }, { "epoch": 0.82, "grad_norm": 1.0941412448883057, "learning_rate": 8.303664097172087e-07, "loss": 0.5036, "step": 12935 }, { "epoch": 0.82, "grad_norm": 1.0578081607818604, "learning_rate": 8.298002770441749e-07, "loss": 0.5253, "step": 12936 }, { "epoch": 0.82, "grad_norm": 0.956196129322052, "learning_rate": 8.292343199637448e-07, "loss": 0.5058, "step": 12937 }, { "epoch": 0.82, "grad_norm": 1.0259432792663574, "learning_rate": 8.286685384997484e-07, "loss": 0.5223, "step": 12938 }, { "epoch": 0.82, "grad_norm": 1.0590115785598755, "learning_rate": 8.281029326760104e-07, "loss": 0.4955, "step": 12939 }, { "epoch": 0.82, "grad_norm": 1.0075199604034424, "learning_rate": 8.275375025163418e-07, "loss": 0.5035, "step": 12940 }, { "epoch": 0.82, "grad_norm": 1.0844745635986328, "learning_rate": 8.269722480445569e-07, "loss": 0.5579, "step": 12941 }, { "epoch": 0.82, "grad_norm": 1.0919443368911743, "learning_rate": 8.264071692844527e-07, "loss": 0.4824, "step": 12942 }, { "epoch": 0.82, "grad_norm": 0.9813203811645508, "learning_rate": 8.258422662598231e-07, "loss": 0.483, "step": 12943 }, { "epoch": 0.82, "grad_norm": 1.1761623620986938, "learning_rate": 8.252775389944556e-07, "loss": 0.489, "step": 12944 }, { "epoch": 0.82, "grad_norm": 1.1047816276550293, "learning_rate": 8.247129875121274e-07, "loss": 0.5299, "step": 12945 }, { "epoch": 0.82, "grad_norm": 1.0560872554779053, "learning_rate": 8.24148611836611e-07, "loss": 0.4865, "step": 12946 }, { "epoch": 0.82, "grad_norm": 1.0733801126480103, "learning_rate": 8.235844119916708e-07, "loss": 0.4838, "step": 12947 }, { "epoch": 0.82, "grad_norm": 1.1089249849319458, "learning_rate": 8.230203880010612e-07, "loss": 0.5146, "step": 12948 }, { "epoch": 0.82, "grad_norm": 1.030139446258545, "learning_rate": 8.224565398885325e-07, "loss": 0.4603, "step": 12949 }, { "epoch": 0.82, "grad_norm": 1.0687334537506104, "learning_rate": 8.218928676778264e-07, "loss": 0.5096, "step": 12950 }, { "epoch": 0.82, "grad_norm": 0.9699050188064575, "learning_rate": 8.213293713926767e-07, "loss": 0.436, "step": 12951 }, { "epoch": 0.82, "grad_norm": 1.1305122375488281, "learning_rate": 8.207660510568122e-07, "loss": 0.5413, "step": 12952 }, { "epoch": 0.82, "grad_norm": 0.9856038689613342, "learning_rate": 8.202029066939482e-07, "loss": 0.4482, "step": 12953 }, { "epoch": 0.82, "grad_norm": 0.9998235106468201, "learning_rate": 8.196399383278004e-07, "loss": 0.5385, "step": 12954 }, { "epoch": 0.82, "grad_norm": 1.179714322090149, "learning_rate": 8.190771459820739e-07, "loss": 0.5373, "step": 12955 }, { "epoch": 0.82, "grad_norm": 1.190774917602539, "learning_rate": 8.18514529680463e-07, "loss": 0.5386, "step": 12956 }, { "epoch": 0.82, "grad_norm": 1.0108187198638916, "learning_rate": 8.179520894466592e-07, "loss": 0.4892, "step": 12957 }, { "epoch": 0.82, "grad_norm": 1.0275996923446655, "learning_rate": 8.173898253043444e-07, "loss": 0.4336, "step": 12958 }, { "epoch": 0.82, "grad_norm": 1.0599771738052368, "learning_rate": 8.168277372771937e-07, "loss": 0.4618, "step": 12959 }, { "epoch": 0.82, "grad_norm": 0.9936832785606384, "learning_rate": 8.162658253888761e-07, "loss": 0.5281, "step": 12960 }, { "epoch": 0.82, "grad_norm": 1.0354995727539062, "learning_rate": 8.157040896630481e-07, "loss": 0.4696, "step": 12961 }, { "epoch": 0.82, "grad_norm": 1.0596461296081543, "learning_rate": 8.151425301233656e-07, "loss": 0.4751, "step": 12962 }, { "epoch": 0.82, "grad_norm": 1.1725833415985107, "learning_rate": 8.14581146793475e-07, "loss": 0.5226, "step": 12963 }, { "epoch": 0.82, "grad_norm": 1.0640276670455933, "learning_rate": 8.140199396970106e-07, "loss": 0.5035, "step": 12964 }, { "epoch": 0.82, "grad_norm": 1.0254626274108887, "learning_rate": 8.13458908857605e-07, "loss": 0.4465, "step": 12965 }, { "epoch": 0.82, "grad_norm": 1.0551642179489136, "learning_rate": 8.128980542988801e-07, "loss": 0.4637, "step": 12966 }, { "epoch": 0.82, "grad_norm": 1.061268925666809, "learning_rate": 8.12337376044453e-07, "loss": 0.4741, "step": 12967 }, { "epoch": 0.82, "grad_norm": 1.105828881263733, "learning_rate": 8.117768741179322e-07, "loss": 0.5346, "step": 12968 }, { "epoch": 0.82, "grad_norm": 1.0640383958816528, "learning_rate": 8.112165485429163e-07, "loss": 0.5018, "step": 12969 }, { "epoch": 0.82, "grad_norm": 1.0903797149658203, "learning_rate": 8.106563993429983e-07, "loss": 0.4948, "step": 12970 }, { "epoch": 0.82, "grad_norm": 1.0028164386749268, "learning_rate": 8.100964265417682e-07, "loss": 0.476, "step": 12971 }, { "epoch": 0.82, "grad_norm": 1.1049624681472778, "learning_rate": 8.09536630162801e-07, "loss": 0.5115, "step": 12972 }, { "epoch": 0.82, "grad_norm": 1.179762601852417, "learning_rate": 8.089770102296685e-07, "loss": 0.4664, "step": 12973 }, { "epoch": 0.82, "grad_norm": 1.0441378355026245, "learning_rate": 8.084175667659345e-07, "loss": 0.5025, "step": 12974 }, { "epoch": 0.82, "grad_norm": 1.024807095527649, "learning_rate": 8.078582997951556e-07, "loss": 0.5021, "step": 12975 }, { "epoch": 0.82, "grad_norm": 1.0215458869934082, "learning_rate": 8.072992093408816e-07, "loss": 0.5306, "step": 12976 }, { "epoch": 0.82, "grad_norm": 1.0705639123916626, "learning_rate": 8.067402954266512e-07, "loss": 0.5316, "step": 12977 }, { "epoch": 0.82, "grad_norm": 1.0934439897537231, "learning_rate": 8.061815580759996e-07, "loss": 0.4898, "step": 12978 }, { "epoch": 0.82, "grad_norm": 1.0901581048965454, "learning_rate": 8.056229973124529e-07, "loss": 0.506, "step": 12979 }, { "epoch": 0.82, "grad_norm": 1.0371161699295044, "learning_rate": 8.050646131595313e-07, "loss": 0.5112, "step": 12980 }, { "epoch": 0.82, "grad_norm": 0.9929707050323486, "learning_rate": 8.045064056407453e-07, "loss": 0.4959, "step": 12981 }, { "epoch": 0.82, "grad_norm": 1.021591305732727, "learning_rate": 8.039483747796012e-07, "loss": 0.4946, "step": 12982 }, { "epoch": 0.82, "grad_norm": 1.0111936330795288, "learning_rate": 8.033905205995913e-07, "loss": 0.4957, "step": 12983 }, { "epoch": 0.82, "grad_norm": 1.089510202407837, "learning_rate": 8.0283284312421e-07, "loss": 0.4813, "step": 12984 }, { "epoch": 0.82, "grad_norm": 1.0633338689804077, "learning_rate": 8.022753423769359e-07, "loss": 0.5185, "step": 12985 }, { "epoch": 0.82, "grad_norm": 0.9597094058990479, "learning_rate": 8.017180183812439e-07, "loss": 0.4766, "step": 12986 }, { "epoch": 0.82, "grad_norm": 1.0139012336730957, "learning_rate": 8.011608711606017e-07, "loss": 0.4829, "step": 12987 }, { "epoch": 0.82, "grad_norm": 1.062382698059082, "learning_rate": 8.006039007384681e-07, "loss": 0.5149, "step": 12988 }, { "epoch": 0.82, "grad_norm": 1.1986846923828125, "learning_rate": 8.000471071382959e-07, "loss": 0.5263, "step": 12989 }, { "epoch": 0.82, "grad_norm": 1.0365426540374756, "learning_rate": 7.99490490383531e-07, "loss": 0.5102, "step": 12990 }, { "epoch": 0.82, "grad_norm": 1.0646085739135742, "learning_rate": 7.989340504976062e-07, "loss": 0.5231, "step": 12991 }, { "epoch": 0.82, "grad_norm": 1.0220482349395752, "learning_rate": 7.983777875039567e-07, "loss": 0.4728, "step": 12992 }, { "epoch": 0.82, "grad_norm": 1.0070226192474365, "learning_rate": 7.978217014260009e-07, "loss": 0.495, "step": 12993 }, { "epoch": 0.82, "grad_norm": 1.1144849061965942, "learning_rate": 7.972657922871546e-07, "loss": 0.4909, "step": 12994 }, { "epoch": 0.82, "grad_norm": 1.0617939233779907, "learning_rate": 7.967100601108258e-07, "loss": 0.5118, "step": 12995 }, { "epoch": 0.82, "grad_norm": 1.0664457082748413, "learning_rate": 7.961545049204145e-07, "loss": 0.5491, "step": 12996 }, { "epoch": 0.82, "grad_norm": 0.9803445339202881, "learning_rate": 7.955991267393127e-07, "loss": 0.4642, "step": 12997 }, { "epoch": 0.82, "grad_norm": 1.0978424549102783, "learning_rate": 7.950439255909065e-07, "loss": 0.5544, "step": 12998 }, { "epoch": 0.82, "grad_norm": 1.0519658327102661, "learning_rate": 7.944889014985718e-07, "loss": 0.5123, "step": 12999 }, { "epoch": 0.82, "grad_norm": 1.081263780593872, "learning_rate": 7.939340544856783e-07, "loss": 0.5062, "step": 13000 }, { "epoch": 0.82, "grad_norm": 1.050601601600647, "learning_rate": 7.933793845755922e-07, "loss": 0.5167, "step": 13001 }, { "epoch": 0.82, "grad_norm": 1.0440396070480347, "learning_rate": 7.928248917916653e-07, "loss": 0.4979, "step": 13002 }, { "epoch": 0.82, "grad_norm": 1.0332825183868408, "learning_rate": 7.922705761572464e-07, "loss": 0.4787, "step": 13003 }, { "epoch": 0.82, "grad_norm": 0.9679126143455505, "learning_rate": 7.91716437695676e-07, "loss": 0.4428, "step": 13004 }, { "epoch": 0.82, "grad_norm": 1.1437848806381226, "learning_rate": 7.911624764302872e-07, "loss": 0.5602, "step": 13005 }, { "epoch": 0.82, "grad_norm": 1.0696083307266235, "learning_rate": 7.906086923844059e-07, "loss": 0.4994, "step": 13006 }, { "epoch": 0.82, "grad_norm": 1.0181982517242432, "learning_rate": 7.900550855813477e-07, "loss": 0.5032, "step": 13007 }, { "epoch": 0.82, "grad_norm": 1.1297941207885742, "learning_rate": 7.895016560444241e-07, "loss": 0.5132, "step": 13008 }, { "epoch": 0.82, "grad_norm": 1.085738182067871, "learning_rate": 7.889484037969403e-07, "loss": 0.531, "step": 13009 }, { "epoch": 0.82, "grad_norm": 1.0692535638809204, "learning_rate": 7.883953288621887e-07, "loss": 0.5461, "step": 13010 }, { "epoch": 0.82, "grad_norm": 1.1931544542312622, "learning_rate": 7.878424312634592e-07, "loss": 0.5064, "step": 13011 }, { "epoch": 0.82, "grad_norm": 1.1276174783706665, "learning_rate": 7.87289711024033e-07, "loss": 0.5223, "step": 13012 }, { "epoch": 0.82, "grad_norm": 1.0550527572631836, "learning_rate": 7.867371681671793e-07, "loss": 0.5255, "step": 13013 }, { "epoch": 0.82, "grad_norm": 1.0752570629119873, "learning_rate": 7.861848027161694e-07, "loss": 0.5363, "step": 13014 }, { "epoch": 0.82, "grad_norm": 0.9889829754829407, "learning_rate": 7.856326146942572e-07, "loss": 0.4606, "step": 13015 }, { "epoch": 0.82, "grad_norm": 1.053361415863037, "learning_rate": 7.85080604124695e-07, "loss": 0.5499, "step": 13016 }, { "epoch": 0.82, "grad_norm": 1.0334244966506958, "learning_rate": 7.845287710307258e-07, "loss": 0.4767, "step": 13017 }, { "epoch": 0.82, "grad_norm": 1.1344623565673828, "learning_rate": 7.839771154355858e-07, "loss": 0.519, "step": 13018 }, { "epoch": 0.82, "grad_norm": 1.0209664106369019, "learning_rate": 7.834256373625027e-07, "loss": 0.5122, "step": 13019 }, { "epoch": 0.82, "grad_norm": 1.0017036199569702, "learning_rate": 7.828743368346991e-07, "loss": 0.4465, "step": 13020 }, { "epoch": 0.82, "grad_norm": 1.0459413528442383, "learning_rate": 7.823232138753845e-07, "loss": 0.4616, "step": 13021 }, { "epoch": 0.82, "grad_norm": 1.1484712362289429, "learning_rate": 7.817722685077689e-07, "loss": 0.4665, "step": 13022 }, { "epoch": 0.83, "grad_norm": 0.9936702251434326, "learning_rate": 7.812215007550483e-07, "loss": 0.5223, "step": 13023 }, { "epoch": 0.83, "grad_norm": 1.1362011432647705, "learning_rate": 7.806709106404142e-07, "loss": 0.5363, "step": 13024 }, { "epoch": 0.83, "grad_norm": 1.0711592435836792, "learning_rate": 7.801204981870508e-07, "loss": 0.5046, "step": 13025 }, { "epoch": 0.83, "grad_norm": 1.0723801851272583, "learning_rate": 7.795702634181318e-07, "loss": 0.4827, "step": 13026 }, { "epoch": 0.83, "grad_norm": 1.0045136213302612, "learning_rate": 7.790202063568276e-07, "loss": 0.4553, "step": 13027 }, { "epoch": 0.83, "grad_norm": 1.0694032907485962, "learning_rate": 7.784703270263006e-07, "loss": 0.504, "step": 13028 }, { "epoch": 0.83, "grad_norm": 1.0534634590148926, "learning_rate": 7.779206254497007e-07, "loss": 0.4675, "step": 13029 }, { "epoch": 0.83, "grad_norm": 1.056309700012207, "learning_rate": 7.773711016501762e-07, "loss": 0.529, "step": 13030 }, { "epoch": 0.83, "grad_norm": 1.064368486404419, "learning_rate": 7.76821755650865e-07, "loss": 0.55, "step": 13031 }, { "epoch": 0.83, "grad_norm": 1.0615298748016357, "learning_rate": 7.762725874748983e-07, "loss": 0.5037, "step": 13032 }, { "epoch": 0.83, "grad_norm": 1.1085795164108276, "learning_rate": 7.757235971454008e-07, "loss": 0.4766, "step": 13033 }, { "epoch": 0.83, "grad_norm": 0.9743221402168274, "learning_rate": 7.751747846854851e-07, "loss": 0.4875, "step": 13034 }, { "epoch": 0.83, "grad_norm": 1.1090971231460571, "learning_rate": 7.746261501182633e-07, "loss": 0.5021, "step": 13035 }, { "epoch": 0.83, "grad_norm": 1.017557978630066, "learning_rate": 7.740776934668365e-07, "loss": 0.4852, "step": 13036 }, { "epoch": 0.83, "grad_norm": 1.0135324001312256, "learning_rate": 7.73529414754296e-07, "loss": 0.4662, "step": 13037 }, { "epoch": 0.83, "grad_norm": 1.132223129272461, "learning_rate": 7.72981314003729e-07, "loss": 0.4664, "step": 13038 }, { "epoch": 0.83, "grad_norm": 1.087611198425293, "learning_rate": 7.724333912382143e-07, "loss": 0.5009, "step": 13039 }, { "epoch": 0.83, "grad_norm": 1.010160207748413, "learning_rate": 7.718856464808222e-07, "loss": 0.4671, "step": 13040 }, { "epoch": 0.83, "grad_norm": 0.9534567594528198, "learning_rate": 7.713380797546188e-07, "loss": 0.4645, "step": 13041 }, { "epoch": 0.83, "grad_norm": 1.1297147274017334, "learning_rate": 7.707906910826574e-07, "loss": 0.5232, "step": 13042 }, { "epoch": 0.83, "grad_norm": 1.0202480554580688, "learning_rate": 7.702434804879861e-07, "loss": 0.4936, "step": 13043 }, { "epoch": 0.83, "grad_norm": 1.0754560232162476, "learning_rate": 7.696964479936497e-07, "loss": 0.4756, "step": 13044 }, { "epoch": 0.83, "grad_norm": 1.0832222700119019, "learning_rate": 7.691495936226789e-07, "loss": 0.4753, "step": 13045 }, { "epoch": 0.83, "grad_norm": 1.1323879957199097, "learning_rate": 7.686029173981008e-07, "loss": 0.5314, "step": 13046 }, { "epoch": 0.83, "grad_norm": 1.097445011138916, "learning_rate": 7.680564193429336e-07, "loss": 0.5147, "step": 13047 }, { "epoch": 0.83, "grad_norm": 1.1266767978668213, "learning_rate": 7.675100994801888e-07, "loss": 0.4727, "step": 13048 }, { "epoch": 0.83, "grad_norm": 1.0783687829971313, "learning_rate": 7.669639578328713e-07, "loss": 0.4799, "step": 13049 }, { "epoch": 0.83, "grad_norm": 1.0944496393203735, "learning_rate": 7.664179944239746e-07, "loss": 0.5092, "step": 13050 }, { "epoch": 0.83, "grad_norm": 1.0033917427062988, "learning_rate": 7.658722092764876e-07, "loss": 0.4229, "step": 13051 }, { "epoch": 0.83, "grad_norm": 1.0313786268234253, "learning_rate": 7.653266024133943e-07, "loss": 0.4678, "step": 13052 }, { "epoch": 0.83, "grad_norm": 1.0790505409240723, "learning_rate": 7.647811738576655e-07, "loss": 0.5071, "step": 13053 }, { "epoch": 0.83, "grad_norm": 1.064852237701416, "learning_rate": 7.642359236322683e-07, "loss": 0.4768, "step": 13054 }, { "epoch": 0.83, "grad_norm": 1.03118896484375, "learning_rate": 7.63690851760161e-07, "loss": 0.4928, "step": 13055 }, { "epoch": 0.83, "grad_norm": 1.1065315008163452, "learning_rate": 7.631459582642947e-07, "loss": 0.4912, "step": 13056 }, { "epoch": 0.83, "grad_norm": 1.064281702041626, "learning_rate": 7.626012431676138e-07, "loss": 0.5225, "step": 13057 }, { "epoch": 0.83, "grad_norm": 1.0789825916290283, "learning_rate": 7.620567064930545e-07, "loss": 0.5028, "step": 13058 }, { "epoch": 0.83, "grad_norm": 1.042149543762207, "learning_rate": 7.615123482635433e-07, "loss": 0.5042, "step": 13059 }, { "epoch": 0.83, "grad_norm": 1.112006664276123, "learning_rate": 7.609681685020026e-07, "loss": 0.5103, "step": 13060 }, { "epoch": 0.83, "grad_norm": 1.028239369392395, "learning_rate": 7.604241672313461e-07, "loss": 0.4712, "step": 13061 }, { "epoch": 0.83, "grad_norm": 1.0118327140808105, "learning_rate": 7.59880344474479e-07, "loss": 0.4952, "step": 13062 }, { "epoch": 0.83, "grad_norm": 1.0231777429580688, "learning_rate": 7.593367002543018e-07, "loss": 0.4922, "step": 13063 }, { "epoch": 0.83, "grad_norm": 1.0467559099197388, "learning_rate": 7.587932345937016e-07, "loss": 0.4955, "step": 13064 }, { "epoch": 0.83, "grad_norm": 1.0370157957077026, "learning_rate": 7.582499475155653e-07, "loss": 0.5388, "step": 13065 }, { "epoch": 0.83, "grad_norm": 1.0274789333343506, "learning_rate": 7.577068390427689e-07, "loss": 0.4585, "step": 13066 }, { "epoch": 0.83, "grad_norm": 0.9453758001327515, "learning_rate": 7.571639091981786e-07, "loss": 0.463, "step": 13067 }, { "epoch": 0.83, "grad_norm": 1.0819838047027588, "learning_rate": 7.566211580046562e-07, "loss": 0.5143, "step": 13068 }, { "epoch": 0.83, "grad_norm": 0.992570698261261, "learning_rate": 7.56078585485055e-07, "loss": 0.463, "step": 13069 }, { "epoch": 0.83, "grad_norm": 1.0368421077728271, "learning_rate": 7.555361916622217e-07, "loss": 0.512, "step": 13070 }, { "epoch": 0.83, "grad_norm": 1.010269045829773, "learning_rate": 7.549939765589942e-07, "loss": 0.4379, "step": 13071 }, { "epoch": 0.83, "grad_norm": 1.059167742729187, "learning_rate": 7.544519401982025e-07, "loss": 0.5091, "step": 13072 }, { "epoch": 0.83, "grad_norm": 1.1328742504119873, "learning_rate": 7.539100826026691e-07, "loss": 0.527, "step": 13073 }, { "epoch": 0.83, "grad_norm": 1.1374634504318237, "learning_rate": 7.533684037952133e-07, "loss": 0.4968, "step": 13074 }, { "epoch": 0.83, "grad_norm": 1.0074968338012695, "learning_rate": 7.528269037986402e-07, "loss": 0.5453, "step": 13075 }, { "epoch": 0.83, "grad_norm": 1.132983684539795, "learning_rate": 7.522855826357511e-07, "loss": 0.492, "step": 13076 }, { "epoch": 0.83, "grad_norm": 0.9377274513244629, "learning_rate": 7.517444403293394e-07, "loss": 0.4618, "step": 13077 }, { "epoch": 0.83, "grad_norm": 1.0819823741912842, "learning_rate": 7.512034769021909e-07, "loss": 0.5267, "step": 13078 }, { "epoch": 0.83, "grad_norm": 1.1434528827667236, "learning_rate": 7.506626923770843e-07, "loss": 0.5112, "step": 13079 }, { "epoch": 0.83, "grad_norm": 1.046053171157837, "learning_rate": 7.501220867767883e-07, "loss": 0.5212, "step": 13080 }, { "epoch": 0.83, "grad_norm": 1.0413637161254883, "learning_rate": 7.495816601240664e-07, "loss": 0.5084, "step": 13081 }, { "epoch": 0.83, "grad_norm": 1.0843172073364258, "learning_rate": 7.490414124416761e-07, "loss": 0.5339, "step": 13082 }, { "epoch": 0.83, "grad_norm": 1.107027292251587, "learning_rate": 7.485013437523636e-07, "loss": 0.5402, "step": 13083 }, { "epoch": 0.83, "grad_norm": 1.105094313621521, "learning_rate": 7.479614540788687e-07, "loss": 0.5307, "step": 13084 }, { "epoch": 0.83, "grad_norm": 1.0860050916671753, "learning_rate": 7.474217434439263e-07, "loss": 0.4937, "step": 13085 }, { "epoch": 0.83, "grad_norm": 1.0668641328811646, "learning_rate": 7.468822118702596e-07, "loss": 0.4803, "step": 13086 }, { "epoch": 0.83, "grad_norm": 1.0991917848587036, "learning_rate": 7.463428593805894e-07, "loss": 0.4805, "step": 13087 }, { "epoch": 0.83, "grad_norm": 1.0379934310913086, "learning_rate": 7.458036859976225e-07, "loss": 0.5311, "step": 13088 }, { "epoch": 0.83, "grad_norm": 1.1096937656402588, "learning_rate": 7.452646917440631e-07, "loss": 0.5478, "step": 13089 }, { "epoch": 0.83, "grad_norm": 1.085133671760559, "learning_rate": 7.447258766426063e-07, "loss": 0.51, "step": 13090 }, { "epoch": 0.83, "grad_norm": 1.214627742767334, "learning_rate": 7.441872407159401e-07, "loss": 0.4982, "step": 13091 }, { "epoch": 0.83, "grad_norm": 1.0035841464996338, "learning_rate": 7.43648783986744e-07, "loss": 0.5197, "step": 13092 }, { "epoch": 0.83, "grad_norm": 1.0719767808914185, "learning_rate": 7.431105064776922e-07, "loss": 0.5233, "step": 13093 }, { "epoch": 0.83, "grad_norm": 1.0103352069854736, "learning_rate": 7.425724082114455e-07, "loss": 0.4828, "step": 13094 }, { "epoch": 0.83, "grad_norm": 1.091241478919983, "learning_rate": 7.420344892106674e-07, "loss": 0.4777, "step": 13095 }, { "epoch": 0.83, "grad_norm": 1.0513263940811157, "learning_rate": 7.414967494980024e-07, "loss": 0.5093, "step": 13096 }, { "epoch": 0.83, "grad_norm": 1.0273000001907349, "learning_rate": 7.40959189096096e-07, "loss": 0.4405, "step": 13097 }, { "epoch": 0.83, "grad_norm": 1.095465064048767, "learning_rate": 7.404218080275816e-07, "loss": 0.529, "step": 13098 }, { "epoch": 0.83, "grad_norm": 1.068629503250122, "learning_rate": 7.398846063150866e-07, "loss": 0.4608, "step": 13099 }, { "epoch": 0.83, "grad_norm": 1.020584225654602, "learning_rate": 7.393475839812314e-07, "loss": 0.5202, "step": 13100 }, { "epoch": 0.83, "grad_norm": 1.0594336986541748, "learning_rate": 7.388107410486289e-07, "loss": 0.4967, "step": 13101 }, { "epoch": 0.83, "grad_norm": 1.0724962949752808, "learning_rate": 7.3827407753988e-07, "loss": 0.496, "step": 13102 }, { "epoch": 0.83, "grad_norm": 0.9776265025138855, "learning_rate": 7.377375934775865e-07, "loss": 0.4839, "step": 13103 }, { "epoch": 0.83, "grad_norm": 1.0062637329101562, "learning_rate": 7.372012888843344e-07, "loss": 0.506, "step": 13104 }, { "epoch": 0.83, "grad_norm": 0.9793121814727783, "learning_rate": 7.366651637827065e-07, "loss": 0.492, "step": 13105 }, { "epoch": 0.83, "grad_norm": 1.0513062477111816, "learning_rate": 7.361292181952795e-07, "loss": 0.5025, "step": 13106 }, { "epoch": 0.83, "grad_norm": 1.2303926944732666, "learning_rate": 7.355934521446151e-07, "loss": 0.485, "step": 13107 }, { "epoch": 0.83, "grad_norm": 1.075791835784912, "learning_rate": 7.350578656532776e-07, "loss": 0.5139, "step": 13108 }, { "epoch": 0.83, "grad_norm": 1.0517456531524658, "learning_rate": 7.345224587438171e-07, "loss": 0.4412, "step": 13109 }, { "epoch": 0.83, "grad_norm": 1.1046347618103027, "learning_rate": 7.339872314387763e-07, "loss": 0.5332, "step": 13110 }, { "epoch": 0.83, "grad_norm": 1.1530647277832031, "learning_rate": 7.334521837606934e-07, "loss": 0.5211, "step": 13111 }, { "epoch": 0.83, "grad_norm": 0.9838554263114929, "learning_rate": 7.329173157320962e-07, "loss": 0.4616, "step": 13112 }, { "epoch": 0.83, "grad_norm": 1.0944254398345947, "learning_rate": 7.323826273755069e-07, "loss": 0.5109, "step": 13113 }, { "epoch": 0.83, "grad_norm": 1.10060715675354, "learning_rate": 7.318481187134408e-07, "loss": 0.5204, "step": 13114 }, { "epoch": 0.83, "grad_norm": 1.1013379096984863, "learning_rate": 7.313137897683997e-07, "loss": 0.5227, "step": 13115 }, { "epoch": 0.83, "grad_norm": 1.012404441833496, "learning_rate": 7.30779640562887e-07, "loss": 0.5431, "step": 13116 }, { "epoch": 0.83, "grad_norm": 1.021898865699768, "learning_rate": 7.302456711193928e-07, "loss": 0.4886, "step": 13117 }, { "epoch": 0.83, "grad_norm": 1.070927381515503, "learning_rate": 7.297118814603987e-07, "loss": 0.4834, "step": 13118 }, { "epoch": 0.83, "grad_norm": 0.9357616305351257, "learning_rate": 7.291782716083823e-07, "loss": 0.4724, "step": 13119 }, { "epoch": 0.83, "grad_norm": 1.0846022367477417, "learning_rate": 7.286448415858116e-07, "loss": 0.5166, "step": 13120 }, { "epoch": 0.83, "grad_norm": 1.008909821510315, "learning_rate": 7.281115914151477e-07, "loss": 0.4524, "step": 13121 }, { "epoch": 0.83, "grad_norm": 1.0392085313796997, "learning_rate": 7.275785211188441e-07, "loss": 0.5003, "step": 13122 }, { "epoch": 0.83, "grad_norm": 1.0051409006118774, "learning_rate": 7.270456307193474e-07, "loss": 0.5078, "step": 13123 }, { "epoch": 0.83, "grad_norm": 1.1217042207717896, "learning_rate": 7.265129202390924e-07, "loss": 0.4688, "step": 13124 }, { "epoch": 0.83, "grad_norm": 0.9774218201637268, "learning_rate": 7.259803897005141e-07, "loss": 0.488, "step": 13125 }, { "epoch": 0.83, "grad_norm": 1.0326730012893677, "learning_rate": 7.254480391260321e-07, "loss": 0.4403, "step": 13126 }, { "epoch": 0.83, "grad_norm": 1.0553927421569824, "learning_rate": 7.249158685380631e-07, "loss": 0.5115, "step": 13127 }, { "epoch": 0.83, "grad_norm": 1.042048454284668, "learning_rate": 7.243838779590151e-07, "loss": 0.4818, "step": 13128 }, { "epoch": 0.83, "grad_norm": 1.0369988679885864, "learning_rate": 7.238520674112881e-07, "loss": 0.4685, "step": 13129 }, { "epoch": 0.83, "grad_norm": 1.0481699705123901, "learning_rate": 7.233204369172753e-07, "loss": 0.476, "step": 13130 }, { "epoch": 0.83, "grad_norm": 1.0384000539779663, "learning_rate": 7.22788986499362e-07, "loss": 0.4698, "step": 13131 }, { "epoch": 0.83, "grad_norm": 1.0750094652175903, "learning_rate": 7.222577161799232e-07, "loss": 0.5098, "step": 13132 }, { "epoch": 0.83, "grad_norm": 1.1386425495147705, "learning_rate": 7.217266259813332e-07, "loss": 0.5112, "step": 13133 }, { "epoch": 0.83, "grad_norm": 1.0001041889190674, "learning_rate": 7.211957159259503e-07, "loss": 0.464, "step": 13134 }, { "epoch": 0.83, "grad_norm": 1.0347130298614502, "learning_rate": 7.206649860361314e-07, "loss": 0.4863, "step": 13135 }, { "epoch": 0.83, "grad_norm": 1.1184300184249878, "learning_rate": 7.201344363342245e-07, "loss": 0.5083, "step": 13136 }, { "epoch": 0.83, "grad_norm": 1.1076111793518066, "learning_rate": 7.196040668425653e-07, "loss": 0.5337, "step": 13137 }, { "epoch": 0.83, "grad_norm": 1.092587947845459, "learning_rate": 7.190738775834894e-07, "loss": 0.5188, "step": 13138 }, { "epoch": 0.83, "grad_norm": 1.0376991033554077, "learning_rate": 7.185438685793217e-07, "loss": 0.5127, "step": 13139 }, { "epoch": 0.83, "grad_norm": 1.0892513990402222, "learning_rate": 7.180140398523761e-07, "loss": 0.5143, "step": 13140 }, { "epoch": 0.83, "grad_norm": 1.0623342990875244, "learning_rate": 7.174843914249636e-07, "loss": 0.4956, "step": 13141 }, { "epoch": 0.83, "grad_norm": 1.0941516160964966, "learning_rate": 7.169549233193857e-07, "loss": 0.5069, "step": 13142 }, { "epoch": 0.83, "grad_norm": 1.1275255680084229, "learning_rate": 7.164256355579363e-07, "loss": 0.5076, "step": 13143 }, { "epoch": 0.83, "grad_norm": 1.0641416311264038, "learning_rate": 7.158965281629027e-07, "loss": 0.5074, "step": 13144 }, { "epoch": 0.83, "grad_norm": 1.0748405456542969, "learning_rate": 7.153676011565613e-07, "loss": 0.5199, "step": 13145 }, { "epoch": 0.83, "grad_norm": 0.9913454651832581, "learning_rate": 7.148388545611856e-07, "loss": 0.4566, "step": 13146 }, { "epoch": 0.83, "grad_norm": 0.9696449637413025, "learning_rate": 7.143102883990405e-07, "loss": 0.4981, "step": 13147 }, { "epoch": 0.83, "grad_norm": 1.0366780757904053, "learning_rate": 7.137819026923786e-07, "loss": 0.449, "step": 13148 }, { "epoch": 0.83, "grad_norm": 1.0798648595809937, "learning_rate": 7.132536974634508e-07, "loss": 0.5199, "step": 13149 }, { "epoch": 0.83, "grad_norm": 1.049294352531433, "learning_rate": 7.127256727344967e-07, "loss": 0.487, "step": 13150 }, { "epoch": 0.83, "grad_norm": 1.0658286809921265, "learning_rate": 7.121978285277503e-07, "loss": 0.5006, "step": 13151 }, { "epoch": 0.83, "grad_norm": 1.0541974306106567, "learning_rate": 7.116701648654384e-07, "loss": 0.5293, "step": 13152 }, { "epoch": 0.83, "grad_norm": 1.053865671157837, "learning_rate": 7.11142681769777e-07, "loss": 0.4901, "step": 13153 }, { "epoch": 0.83, "grad_norm": 1.0910694599151611, "learning_rate": 7.106153792629761e-07, "loss": 0.5032, "step": 13154 }, { "epoch": 0.83, "grad_norm": 0.9611359238624573, "learning_rate": 7.100882573672419e-07, "loss": 0.4711, "step": 13155 }, { "epoch": 0.83, "grad_norm": 1.0032509565353394, "learning_rate": 7.095613161047666e-07, "loss": 0.4942, "step": 13156 }, { "epoch": 0.83, "grad_norm": 1.0794392824172974, "learning_rate": 7.09034555497739e-07, "loss": 0.4868, "step": 13157 }, { "epoch": 0.83, "grad_norm": 1.0678043365478516, "learning_rate": 7.085079755683389e-07, "loss": 0.4887, "step": 13158 }, { "epoch": 0.83, "grad_norm": 1.0658090114593506, "learning_rate": 7.079815763387393e-07, "loss": 0.5243, "step": 13159 }, { "epoch": 0.83, "grad_norm": 1.0863361358642578, "learning_rate": 7.074553578311055e-07, "loss": 0.4692, "step": 13160 }, { "epoch": 0.83, "grad_norm": 1.046759843826294, "learning_rate": 7.06929320067593e-07, "loss": 0.5326, "step": 13161 }, { "epoch": 0.83, "grad_norm": 1.1394504308700562, "learning_rate": 7.064034630703515e-07, "loss": 0.5297, "step": 13162 }, { "epoch": 0.83, "grad_norm": 1.1047179698944092, "learning_rate": 7.058777868615258e-07, "loss": 0.5167, "step": 13163 }, { "epoch": 0.83, "grad_norm": 1.058111548423767, "learning_rate": 7.053522914632466e-07, "loss": 0.518, "step": 13164 }, { "epoch": 0.83, "grad_norm": 1.0377641916275024, "learning_rate": 7.048269768976429e-07, "loss": 0.4877, "step": 13165 }, { "epoch": 0.83, "grad_norm": 1.1154805421829224, "learning_rate": 7.043018431868348e-07, "loss": 0.4773, "step": 13166 }, { "epoch": 0.83, "grad_norm": 1.08950936794281, "learning_rate": 7.037768903529302e-07, "loss": 0.5348, "step": 13167 }, { "epoch": 0.83, "grad_norm": 1.030245065689087, "learning_rate": 7.032521184180369e-07, "loss": 0.5246, "step": 13168 }, { "epoch": 0.83, "grad_norm": 1.0297592878341675, "learning_rate": 7.027275274042489e-07, "loss": 0.5093, "step": 13169 }, { "epoch": 0.83, "grad_norm": 1.093056321144104, "learning_rate": 7.022031173336557e-07, "loss": 0.4911, "step": 13170 }, { "epoch": 0.83, "grad_norm": 1.0303655862808228, "learning_rate": 7.016788882283382e-07, "loss": 0.5291, "step": 13171 }, { "epoch": 0.83, "grad_norm": 1.1379703283309937, "learning_rate": 7.011548401103696e-07, "loss": 0.5298, "step": 13172 }, { "epoch": 0.83, "grad_norm": 1.0572259426116943, "learning_rate": 7.006309730018168e-07, "loss": 0.5104, "step": 13173 }, { "epoch": 0.83, "grad_norm": 1.0219038724899292, "learning_rate": 7.001072869247378e-07, "loss": 0.5306, "step": 13174 }, { "epoch": 0.83, "grad_norm": 1.0922071933746338, "learning_rate": 6.995837819011808e-07, "loss": 0.5321, "step": 13175 }, { "epoch": 0.83, "grad_norm": 0.9969636797904968, "learning_rate": 6.990604579531929e-07, "loss": 0.4978, "step": 13176 }, { "epoch": 0.83, "grad_norm": 1.1060311794281006, "learning_rate": 6.985373151028058e-07, "loss": 0.5127, "step": 13177 }, { "epoch": 0.83, "grad_norm": 1.023337721824646, "learning_rate": 6.980143533720491e-07, "loss": 0.4736, "step": 13178 }, { "epoch": 0.83, "grad_norm": 1.0479949712753296, "learning_rate": 6.974915727829423e-07, "loss": 0.4933, "step": 13179 }, { "epoch": 0.84, "grad_norm": 1.1665982007980347, "learning_rate": 6.96968973357498e-07, "loss": 0.5469, "step": 13180 }, { "epoch": 0.84, "grad_norm": 0.982250452041626, "learning_rate": 6.964465551177208e-07, "loss": 0.4244, "step": 13181 }, { "epoch": 0.84, "grad_norm": 1.029781460762024, "learning_rate": 6.959243180856096e-07, "loss": 0.5439, "step": 13182 }, { "epoch": 0.84, "grad_norm": 1.0114187002182007, "learning_rate": 6.954022622831514e-07, "loss": 0.4516, "step": 13183 }, { "epoch": 0.84, "grad_norm": 1.0797160863876343, "learning_rate": 6.948803877323296e-07, "loss": 0.4391, "step": 13184 }, { "epoch": 0.84, "grad_norm": 1.0194454193115234, "learning_rate": 6.943586944551178e-07, "loss": 0.4993, "step": 13185 }, { "epoch": 0.84, "grad_norm": 1.0202571153640747, "learning_rate": 6.938371824734835e-07, "loss": 0.5219, "step": 13186 }, { "epoch": 0.84, "grad_norm": 1.1259692907333374, "learning_rate": 6.933158518093852e-07, "loss": 0.5369, "step": 13187 }, { "epoch": 0.84, "grad_norm": 1.1246204376220703, "learning_rate": 6.927947024847748e-07, "loss": 0.5125, "step": 13188 }, { "epoch": 0.84, "grad_norm": 1.1178438663482666, "learning_rate": 6.922737345215952e-07, "loss": 0.4701, "step": 13189 }, { "epoch": 0.84, "grad_norm": 0.9730929732322693, "learning_rate": 6.91752947941785e-07, "loss": 0.4824, "step": 13190 }, { "epoch": 0.84, "grad_norm": 1.0151828527450562, "learning_rate": 6.912323427672691e-07, "loss": 0.4812, "step": 13191 }, { "epoch": 0.84, "grad_norm": 1.0726484060287476, "learning_rate": 6.907119190199706e-07, "loss": 0.4955, "step": 13192 }, { "epoch": 0.84, "grad_norm": 1.059969425201416, "learning_rate": 6.901916767218019e-07, "loss": 0.4861, "step": 13193 }, { "epoch": 0.84, "grad_norm": 1.0365266799926758, "learning_rate": 6.896716158946692e-07, "loss": 0.4832, "step": 13194 }, { "epoch": 0.84, "grad_norm": 1.0280224084854126, "learning_rate": 6.891517365604705e-07, "loss": 0.528, "step": 13195 }, { "epoch": 0.84, "grad_norm": 1.0622800588607788, "learning_rate": 6.886320387410967e-07, "loss": 0.5447, "step": 13196 }, { "epoch": 0.84, "grad_norm": 0.9725284576416016, "learning_rate": 6.881125224584273e-07, "loss": 0.4566, "step": 13197 }, { "epoch": 0.84, "grad_norm": 1.090033769607544, "learning_rate": 6.875931877343417e-07, "loss": 0.4939, "step": 13198 }, { "epoch": 0.84, "grad_norm": 1.004368543624878, "learning_rate": 6.870740345907046e-07, "loss": 0.5051, "step": 13199 }, { "epoch": 0.84, "grad_norm": 1.1019244194030762, "learning_rate": 6.865550630493756e-07, "loss": 0.4604, "step": 13200 }, { "epoch": 0.84, "grad_norm": 1.1146312952041626, "learning_rate": 6.860362731322079e-07, "loss": 0.5348, "step": 13201 }, { "epoch": 0.84, "grad_norm": 1.0931274890899658, "learning_rate": 6.855176648610457e-07, "loss": 0.5044, "step": 13202 }, { "epoch": 0.84, "grad_norm": 1.031827449798584, "learning_rate": 6.849992382577253e-07, "loss": 0.4897, "step": 13203 }, { "epoch": 0.84, "grad_norm": 1.0737539529800415, "learning_rate": 6.844809933440776e-07, "loss": 0.4928, "step": 13204 }, { "epoch": 0.84, "grad_norm": 1.1895108222961426, "learning_rate": 6.839629301419204e-07, "loss": 0.476, "step": 13205 }, { "epoch": 0.84, "grad_norm": 1.0393731594085693, "learning_rate": 6.83445048673072e-07, "loss": 0.4769, "step": 13206 }, { "epoch": 0.84, "grad_norm": 0.9944177269935608, "learning_rate": 6.829273489593352e-07, "loss": 0.4725, "step": 13207 }, { "epoch": 0.84, "grad_norm": 1.0164682865142822, "learning_rate": 6.824098310225097e-07, "loss": 0.4965, "step": 13208 }, { "epoch": 0.84, "grad_norm": 1.1275815963745117, "learning_rate": 6.818924948843863e-07, "loss": 0.5223, "step": 13209 }, { "epoch": 0.84, "grad_norm": 1.0147440433502197, "learning_rate": 6.81375340566749e-07, "loss": 0.5011, "step": 13210 }, { "epoch": 0.84, "grad_norm": 1.0180290937423706, "learning_rate": 6.808583680913722e-07, "loss": 0.4858, "step": 13211 }, { "epoch": 0.84, "grad_norm": 1.0238807201385498, "learning_rate": 6.803415774800253e-07, "loss": 0.4898, "step": 13212 }, { "epoch": 0.84, "grad_norm": 1.0540781021118164, "learning_rate": 6.798249687544667e-07, "loss": 0.4693, "step": 13213 }, { "epoch": 0.84, "grad_norm": 1.0231060981750488, "learning_rate": 6.793085419364498e-07, "loss": 0.4837, "step": 13214 }, { "epoch": 0.84, "grad_norm": 1.021544098854065, "learning_rate": 6.787922970477196e-07, "loss": 0.4942, "step": 13215 }, { "epoch": 0.84, "grad_norm": 1.0937020778656006, "learning_rate": 6.782762341100135e-07, "loss": 0.5429, "step": 13216 }, { "epoch": 0.84, "grad_norm": 0.9730556607246399, "learning_rate": 6.777603531450617e-07, "loss": 0.4739, "step": 13217 }, { "epoch": 0.84, "grad_norm": 1.0430997610092163, "learning_rate": 6.772446541745836e-07, "loss": 0.5048, "step": 13218 }, { "epoch": 0.84, "grad_norm": 1.0744502544403076, "learning_rate": 6.767291372202967e-07, "loss": 0.5076, "step": 13219 }, { "epoch": 0.84, "grad_norm": 1.154815912246704, "learning_rate": 6.762138023039072e-07, "loss": 0.5316, "step": 13220 }, { "epoch": 0.84, "grad_norm": 1.0704598426818848, "learning_rate": 6.756986494471119e-07, "loss": 0.5289, "step": 13221 }, { "epoch": 0.84, "grad_norm": 1.0126025676727295, "learning_rate": 6.751836786716032e-07, "loss": 0.517, "step": 13222 }, { "epoch": 0.84, "grad_norm": 1.066314458847046, "learning_rate": 6.74668889999065e-07, "loss": 0.4967, "step": 13223 }, { "epoch": 0.84, "grad_norm": 1.0445787906646729, "learning_rate": 6.741542834511727e-07, "loss": 0.4835, "step": 13224 }, { "epoch": 0.84, "grad_norm": 1.01438570022583, "learning_rate": 6.736398590495968e-07, "loss": 0.4443, "step": 13225 }, { "epoch": 0.84, "grad_norm": 1.1149402856826782, "learning_rate": 6.731256168159939e-07, "loss": 0.5227, "step": 13226 }, { "epoch": 0.84, "grad_norm": 1.0016542673110962, "learning_rate": 6.726115567720198e-07, "loss": 0.4791, "step": 13227 }, { "epoch": 0.84, "grad_norm": 1.0544713735580444, "learning_rate": 6.720976789393202e-07, "loss": 0.5225, "step": 13228 }, { "epoch": 0.84, "grad_norm": 1.021567702293396, "learning_rate": 6.71583983339531e-07, "loss": 0.503, "step": 13229 }, { "epoch": 0.84, "grad_norm": 1.028765082359314, "learning_rate": 6.710704699942827e-07, "loss": 0.4991, "step": 13230 }, { "epoch": 0.84, "grad_norm": 1.0623314380645752, "learning_rate": 6.705571389251975e-07, "loss": 0.4521, "step": 13231 }, { "epoch": 0.84, "grad_norm": 1.1302554607391357, "learning_rate": 6.700439901538902e-07, "loss": 0.4692, "step": 13232 }, { "epoch": 0.84, "grad_norm": 1.114856243133545, "learning_rate": 6.695310237019692e-07, "loss": 0.4972, "step": 13233 }, { "epoch": 0.84, "grad_norm": 1.0923891067504883, "learning_rate": 6.690182395910305e-07, "loss": 0.5548, "step": 13234 }, { "epoch": 0.84, "grad_norm": 1.0090917348861694, "learning_rate": 6.685056378426663e-07, "loss": 0.4718, "step": 13235 }, { "epoch": 0.84, "grad_norm": 1.1440528631210327, "learning_rate": 6.679932184784638e-07, "loss": 0.5182, "step": 13236 }, { "epoch": 0.84, "grad_norm": 1.0652399063110352, "learning_rate": 6.674809815199962e-07, "loss": 0.4616, "step": 13237 }, { "epoch": 0.84, "grad_norm": 1.1093815565109253, "learning_rate": 6.669689269888325e-07, "loss": 0.5176, "step": 13238 }, { "epoch": 0.84, "grad_norm": 1.0881990194320679, "learning_rate": 6.664570549065336e-07, "loss": 0.4498, "step": 13239 }, { "epoch": 0.84, "grad_norm": 1.0387650728225708, "learning_rate": 6.659453652946529e-07, "loss": 0.486, "step": 13240 }, { "epoch": 0.84, "grad_norm": 1.0223878622055054, "learning_rate": 6.654338581747366e-07, "loss": 0.5146, "step": 13241 }, { "epoch": 0.84, "grad_norm": 1.1007391214370728, "learning_rate": 6.649225335683213e-07, "loss": 0.5279, "step": 13242 }, { "epoch": 0.84, "grad_norm": 1.2238249778747559, "learning_rate": 6.644113914969369e-07, "loss": 0.5656, "step": 13243 }, { "epoch": 0.84, "grad_norm": 0.9655773639678955, "learning_rate": 6.639004319821063e-07, "loss": 0.4897, "step": 13244 }, { "epoch": 0.84, "grad_norm": 1.0541054010391235, "learning_rate": 6.63389655045345e-07, "loss": 0.5037, "step": 13245 }, { "epoch": 0.84, "grad_norm": 1.0961520671844482, "learning_rate": 6.628790607081586e-07, "loss": 0.5069, "step": 13246 }, { "epoch": 0.84, "grad_norm": 1.033692717552185, "learning_rate": 6.623686489920489e-07, "loss": 0.4749, "step": 13247 }, { "epoch": 0.84, "grad_norm": 1.1215088367462158, "learning_rate": 6.61858419918503e-07, "loss": 0.4896, "step": 13248 }, { "epoch": 0.84, "grad_norm": 1.1026906967163086, "learning_rate": 6.613483735090104e-07, "loss": 0.5227, "step": 13249 }, { "epoch": 0.84, "grad_norm": 1.0389769077301025, "learning_rate": 6.608385097850439e-07, "loss": 0.4894, "step": 13250 }, { "epoch": 0.84, "grad_norm": 1.054916501045227, "learning_rate": 6.603288287680726e-07, "loss": 0.5115, "step": 13251 }, { "epoch": 0.84, "grad_norm": 1.0588915348052979, "learning_rate": 6.598193304795575e-07, "loss": 0.4646, "step": 13252 }, { "epoch": 0.84, "grad_norm": 1.0616204738616943, "learning_rate": 6.593100149409521e-07, "loss": 0.5235, "step": 13253 }, { "epoch": 0.84, "grad_norm": 1.1210933923721313, "learning_rate": 6.588008821737019e-07, "loss": 0.5045, "step": 13254 }, { "epoch": 0.84, "grad_norm": 1.0720219612121582, "learning_rate": 6.582919321992459e-07, "loss": 0.5223, "step": 13255 }, { "epoch": 0.84, "grad_norm": 1.1221983432769775, "learning_rate": 6.577831650390104e-07, "loss": 0.4741, "step": 13256 }, { "epoch": 0.84, "grad_norm": 1.176119327545166, "learning_rate": 6.572745807144226e-07, "loss": 0.4837, "step": 13257 }, { "epoch": 0.84, "grad_norm": 1.0502431392669678, "learning_rate": 6.567661792468944e-07, "loss": 0.4666, "step": 13258 }, { "epoch": 0.84, "grad_norm": 1.075438380241394, "learning_rate": 6.562579606578328e-07, "loss": 0.4939, "step": 13259 }, { "epoch": 0.84, "grad_norm": 1.0331934690475464, "learning_rate": 6.557499249686377e-07, "loss": 0.5035, "step": 13260 }, { "epoch": 0.84, "grad_norm": 1.0521024465560913, "learning_rate": 6.552420722007008e-07, "loss": 0.4854, "step": 13261 }, { "epoch": 0.84, "grad_norm": 1.1086888313293457, "learning_rate": 6.547344023754065e-07, "loss": 0.5503, "step": 13262 }, { "epoch": 0.84, "grad_norm": 1.1222712993621826, "learning_rate": 6.542269155141306e-07, "loss": 0.4943, "step": 13263 }, { "epoch": 0.84, "grad_norm": 1.0259684324264526, "learning_rate": 6.537196116382411e-07, "loss": 0.4563, "step": 13264 }, { "epoch": 0.84, "grad_norm": 1.0945380926132202, "learning_rate": 6.532124907690979e-07, "loss": 0.4994, "step": 13265 }, { "epoch": 0.84, "grad_norm": 0.9539095163345337, "learning_rate": 6.527055529280574e-07, "loss": 0.4748, "step": 13266 }, { "epoch": 0.84, "grad_norm": 1.1312283277511597, "learning_rate": 6.521987981364614e-07, "loss": 0.5096, "step": 13267 }, { "epoch": 0.84, "grad_norm": 1.0389736890792847, "learning_rate": 6.516922264156495e-07, "loss": 0.4795, "step": 13268 }, { "epoch": 0.84, "grad_norm": 0.989928662776947, "learning_rate": 6.511858377869517e-07, "loss": 0.4759, "step": 13269 }, { "epoch": 0.84, "grad_norm": 1.0273138284683228, "learning_rate": 6.506796322716891e-07, "loss": 0.4625, "step": 13270 }, { "epoch": 0.84, "grad_norm": 1.0603842735290527, "learning_rate": 6.501736098911787e-07, "loss": 0.4699, "step": 13271 }, { "epoch": 0.84, "grad_norm": 1.0442575216293335, "learning_rate": 6.496677706667243e-07, "loss": 0.504, "step": 13272 }, { "epoch": 0.84, "grad_norm": 0.9706287384033203, "learning_rate": 6.491621146196253e-07, "loss": 0.4683, "step": 13273 }, { "epoch": 0.84, "grad_norm": 1.0698890686035156, "learning_rate": 6.486566417711765e-07, "loss": 0.4817, "step": 13274 }, { "epoch": 0.84, "grad_norm": 1.066401720046997, "learning_rate": 6.481513521426581e-07, "loss": 0.4917, "step": 13275 }, { "epoch": 0.84, "grad_norm": 0.9655542373657227, "learning_rate": 6.476462457553473e-07, "loss": 0.4641, "step": 13276 }, { "epoch": 0.84, "grad_norm": 1.100365400314331, "learning_rate": 6.471413226305134e-07, "loss": 0.51, "step": 13277 }, { "epoch": 0.84, "grad_norm": 1.1870086193084717, "learning_rate": 6.466365827894133e-07, "loss": 0.5375, "step": 13278 }, { "epoch": 0.84, "grad_norm": 1.0047991275787354, "learning_rate": 6.461320262533055e-07, "loss": 0.5118, "step": 13279 }, { "epoch": 0.84, "grad_norm": 1.0439016819000244, "learning_rate": 6.456276530434302e-07, "loss": 0.4938, "step": 13280 }, { "epoch": 0.84, "grad_norm": 1.066996455192566, "learning_rate": 6.451234631810271e-07, "loss": 0.5259, "step": 13281 }, { "epoch": 0.84, "grad_norm": 1.1347441673278809, "learning_rate": 6.446194566873254e-07, "loss": 0.5433, "step": 13282 }, { "epoch": 0.84, "grad_norm": 1.1079368591308594, "learning_rate": 6.441156335835474e-07, "loss": 0.5256, "step": 13283 }, { "epoch": 0.84, "grad_norm": 0.9577980637550354, "learning_rate": 6.436119938909069e-07, "loss": 0.4615, "step": 13284 }, { "epoch": 0.84, "grad_norm": 1.1125235557556152, "learning_rate": 6.431085376306112e-07, "loss": 0.5352, "step": 13285 }, { "epoch": 0.84, "grad_norm": 0.9927465319633484, "learning_rate": 6.426052648238568e-07, "loss": 0.4886, "step": 13286 }, { "epoch": 0.84, "grad_norm": 1.0493212938308716, "learning_rate": 6.421021754918383e-07, "loss": 0.4716, "step": 13287 }, { "epoch": 0.84, "grad_norm": 1.0924726724624634, "learning_rate": 6.415992696557361e-07, "loss": 0.4976, "step": 13288 }, { "epoch": 0.84, "grad_norm": 1.0519717931747437, "learning_rate": 6.41096547336727e-07, "loss": 0.5018, "step": 13289 }, { "epoch": 0.84, "grad_norm": 1.0670619010925293, "learning_rate": 6.405940085559797e-07, "loss": 0.4831, "step": 13290 }, { "epoch": 0.84, "grad_norm": 1.0152946710586548, "learning_rate": 6.400916533346518e-07, "loss": 0.4259, "step": 13291 }, { "epoch": 0.84, "grad_norm": 1.0688623189926147, "learning_rate": 6.39589481693898e-07, "loss": 0.5016, "step": 13292 }, { "epoch": 0.84, "grad_norm": 1.0448710918426514, "learning_rate": 6.390874936548635e-07, "loss": 0.523, "step": 13293 }, { "epoch": 0.84, "grad_norm": 1.0543631315231323, "learning_rate": 6.385856892386826e-07, "loss": 0.5235, "step": 13294 }, { "epoch": 0.84, "grad_norm": 1.1219706535339355, "learning_rate": 6.380840684664869e-07, "loss": 0.4887, "step": 13295 }, { "epoch": 0.84, "grad_norm": 1.1632959842681885, "learning_rate": 6.375826313593963e-07, "loss": 0.5444, "step": 13296 }, { "epoch": 0.84, "grad_norm": 1.0708070993423462, "learning_rate": 6.37081377938526e-07, "loss": 0.5215, "step": 13297 }, { "epoch": 0.84, "grad_norm": 1.0890955924987793, "learning_rate": 6.365803082249822e-07, "loss": 0.4947, "step": 13298 }, { "epoch": 0.84, "grad_norm": 0.9379157423973083, "learning_rate": 6.360794222398603e-07, "loss": 0.4903, "step": 13299 }, { "epoch": 0.84, "grad_norm": 1.0575850009918213, "learning_rate": 6.35578720004254e-07, "loss": 0.5211, "step": 13300 }, { "epoch": 0.84, "grad_norm": 1.0986931324005127, "learning_rate": 6.350782015392459e-07, "loss": 0.5327, "step": 13301 }, { "epoch": 0.84, "grad_norm": 1.2144449949264526, "learning_rate": 6.345778668659097e-07, "loss": 0.5434, "step": 13302 }, { "epoch": 0.84, "grad_norm": 1.0637465715408325, "learning_rate": 6.34077716005313e-07, "loss": 0.5019, "step": 13303 }, { "epoch": 0.84, "grad_norm": 1.1095032691955566, "learning_rate": 6.335777489785161e-07, "loss": 0.5243, "step": 13304 }, { "epoch": 0.84, "grad_norm": 1.0209369659423828, "learning_rate": 6.3307796580657e-07, "loss": 0.5526, "step": 13305 }, { "epoch": 0.84, "grad_norm": 1.0439844131469727, "learning_rate": 6.325783665105206e-07, "loss": 0.5125, "step": 13306 }, { "epoch": 0.84, "grad_norm": 1.094522476196289, "learning_rate": 6.320789511114022e-07, "loss": 0.4866, "step": 13307 }, { "epoch": 0.84, "grad_norm": 1.0305488109588623, "learning_rate": 6.315797196302432e-07, "loss": 0.4716, "step": 13308 }, { "epoch": 0.84, "grad_norm": 1.0392119884490967, "learning_rate": 6.310806720880675e-07, "loss": 0.5054, "step": 13309 }, { "epoch": 0.84, "grad_norm": 1.055533528327942, "learning_rate": 6.305818085058852e-07, "loss": 0.4753, "step": 13310 }, { "epoch": 0.84, "grad_norm": 1.0672519207000732, "learning_rate": 6.300831289047027e-07, "loss": 0.5125, "step": 13311 }, { "epoch": 0.84, "grad_norm": 1.0750885009765625, "learning_rate": 6.295846333055184e-07, "loss": 0.4882, "step": 13312 }, { "epoch": 0.84, "grad_norm": 1.0400210618972778, "learning_rate": 6.290863217293214e-07, "loss": 0.5058, "step": 13313 }, { "epoch": 0.84, "grad_norm": 1.0726042985916138, "learning_rate": 6.285881941970951e-07, "loss": 0.4813, "step": 13314 }, { "epoch": 0.84, "grad_norm": 1.069843053817749, "learning_rate": 6.280902507298115e-07, "loss": 0.5179, "step": 13315 }, { "epoch": 0.84, "grad_norm": 1.089877724647522, "learning_rate": 6.275924913484377e-07, "loss": 0.4994, "step": 13316 }, { "epoch": 0.84, "grad_norm": 0.9835137724876404, "learning_rate": 6.270949160739359e-07, "loss": 0.4661, "step": 13317 }, { "epoch": 0.84, "grad_norm": 1.0019054412841797, "learning_rate": 6.265975249272544e-07, "loss": 0.4595, "step": 13318 }, { "epoch": 0.84, "grad_norm": 1.1164153814315796, "learning_rate": 6.261003179293368e-07, "loss": 0.4822, "step": 13319 }, { "epoch": 0.84, "grad_norm": 1.1330068111419678, "learning_rate": 6.256032951011188e-07, "loss": 0.5266, "step": 13320 }, { "epoch": 0.84, "grad_norm": 1.100870132446289, "learning_rate": 6.25106456463529e-07, "loss": 0.5282, "step": 13321 }, { "epoch": 0.84, "grad_norm": 1.029121994972229, "learning_rate": 6.246098020374869e-07, "loss": 0.5301, "step": 13322 }, { "epoch": 0.84, "grad_norm": 1.088587999343872, "learning_rate": 6.241133318439063e-07, "loss": 0.5308, "step": 13323 }, { "epoch": 0.84, "grad_norm": 1.0542081594467163, "learning_rate": 6.236170459036894e-07, "loss": 0.4574, "step": 13324 }, { "epoch": 0.84, "grad_norm": 1.1169860363006592, "learning_rate": 6.23120944237735e-07, "loss": 0.5238, "step": 13325 }, { "epoch": 0.84, "grad_norm": 0.9798452854156494, "learning_rate": 6.226250268669309e-07, "loss": 0.5135, "step": 13326 }, { "epoch": 0.84, "grad_norm": 1.0330060720443726, "learning_rate": 6.221292938121598e-07, "loss": 0.5312, "step": 13327 }, { "epoch": 0.84, "grad_norm": 1.0352720022201538, "learning_rate": 6.216337450942955e-07, "loss": 0.4789, "step": 13328 }, { "epoch": 0.84, "grad_norm": 1.0385183095932007, "learning_rate": 6.211383807342008e-07, "loss": 0.4893, "step": 13329 }, { "epoch": 0.84, "grad_norm": 1.119275689125061, "learning_rate": 6.206432007527368e-07, "loss": 0.4774, "step": 13330 }, { "epoch": 0.84, "grad_norm": 1.0935087203979492, "learning_rate": 6.201482051707542e-07, "loss": 0.5279, "step": 13331 }, { "epoch": 0.84, "grad_norm": 1.020162582397461, "learning_rate": 6.196533940090932e-07, "loss": 0.4822, "step": 13332 }, { "epoch": 0.84, "grad_norm": 1.2218085527420044, "learning_rate": 6.191587672885896e-07, "loss": 0.4954, "step": 13333 }, { "epoch": 0.84, "grad_norm": 1.037545084953308, "learning_rate": 6.186643250300706e-07, "loss": 0.5241, "step": 13334 }, { "epoch": 0.84, "grad_norm": 1.091447353363037, "learning_rate": 6.18170067254355e-07, "loss": 0.4714, "step": 13335 }, { "epoch": 0.84, "grad_norm": 1.047592043876648, "learning_rate": 6.176759939822557e-07, "loss": 0.4809, "step": 13336 }, { "epoch": 0.84, "grad_norm": 1.0553518533706665, "learning_rate": 6.171821052345744e-07, "loss": 0.4992, "step": 13337 }, { "epoch": 0.85, "grad_norm": 1.0779601335525513, "learning_rate": 6.166884010321072e-07, "loss": 0.5394, "step": 13338 }, { "epoch": 0.85, "grad_norm": 1.006421685218811, "learning_rate": 6.161948813956447e-07, "loss": 0.5238, "step": 13339 }, { "epoch": 0.85, "grad_norm": 1.026556372642517, "learning_rate": 6.157015463459648e-07, "loss": 0.4894, "step": 13340 }, { "epoch": 0.85, "grad_norm": 0.9980022311210632, "learning_rate": 6.152083959038407e-07, "loss": 0.446, "step": 13341 }, { "epoch": 0.85, "grad_norm": 1.0848935842514038, "learning_rate": 6.147154300900377e-07, "loss": 0.5094, "step": 13342 }, { "epoch": 0.85, "grad_norm": 1.112890601158142, "learning_rate": 6.142226489253122e-07, "loss": 0.5098, "step": 13343 }, { "epoch": 0.85, "grad_norm": 1.0533113479614258, "learning_rate": 6.137300524304151e-07, "loss": 0.5044, "step": 13344 }, { "epoch": 0.85, "grad_norm": 1.0517538785934448, "learning_rate": 6.132376406260865e-07, "loss": 0.4979, "step": 13345 }, { "epoch": 0.85, "grad_norm": 1.0184341669082642, "learning_rate": 6.127454135330585e-07, "loss": 0.5236, "step": 13346 }, { "epoch": 0.85, "grad_norm": 1.0783300399780273, "learning_rate": 6.122533711720613e-07, "loss": 0.4872, "step": 13347 }, { "epoch": 0.85, "grad_norm": 1.1193021535873413, "learning_rate": 6.1176151356381e-07, "loss": 0.5704, "step": 13348 }, { "epoch": 0.85, "grad_norm": 1.0210407972335815, "learning_rate": 6.112698407290158e-07, "loss": 0.5419, "step": 13349 }, { "epoch": 0.85, "grad_norm": 1.0369386672973633, "learning_rate": 6.107783526883809e-07, "loss": 0.523, "step": 13350 }, { "epoch": 0.85, "grad_norm": 0.9796282052993774, "learning_rate": 6.102870494626006e-07, "loss": 0.4769, "step": 13351 }, { "epoch": 0.85, "grad_norm": 1.1247459650039673, "learning_rate": 6.097959310723633e-07, "loss": 0.5422, "step": 13352 }, { "epoch": 0.85, "grad_norm": 0.99430251121521, "learning_rate": 6.093049975383458e-07, "loss": 0.5098, "step": 13353 }, { "epoch": 0.85, "grad_norm": 1.0856844186782837, "learning_rate": 6.08814248881221e-07, "loss": 0.4806, "step": 13354 }, { "epoch": 0.85, "grad_norm": 1.0806077718734741, "learning_rate": 6.083236851216517e-07, "loss": 0.4805, "step": 13355 }, { "epoch": 0.85, "grad_norm": 1.1206717491149902, "learning_rate": 6.078333062802949e-07, "loss": 0.5672, "step": 13356 }, { "epoch": 0.85, "grad_norm": 1.1182076930999756, "learning_rate": 6.073431123777984e-07, "loss": 0.5323, "step": 13357 }, { "epoch": 0.85, "grad_norm": 1.1024640798568726, "learning_rate": 6.068531034348035e-07, "loss": 0.5219, "step": 13358 }, { "epoch": 0.85, "grad_norm": 1.0542242527008057, "learning_rate": 6.063632794719399e-07, "loss": 0.4919, "step": 13359 }, { "epoch": 0.85, "grad_norm": 1.079557180404663, "learning_rate": 6.058736405098359e-07, "loss": 0.5392, "step": 13360 }, { "epoch": 0.85, "grad_norm": 1.1475123167037964, "learning_rate": 6.053841865691063e-07, "loss": 0.5783, "step": 13361 }, { "epoch": 0.85, "grad_norm": 1.0788050889968872, "learning_rate": 6.048949176703606e-07, "loss": 0.4897, "step": 13362 }, { "epoch": 0.85, "grad_norm": 1.0291560888290405, "learning_rate": 6.044058338342002e-07, "loss": 0.4777, "step": 13363 }, { "epoch": 0.85, "grad_norm": 1.0231040716171265, "learning_rate": 6.039169350812191e-07, "loss": 0.5104, "step": 13364 }, { "epoch": 0.85, "grad_norm": 1.0467113256454468, "learning_rate": 6.034282214320031e-07, "loss": 0.5164, "step": 13365 }, { "epoch": 0.85, "grad_norm": 1.2185094356536865, "learning_rate": 6.029396929071313e-07, "loss": 0.5592, "step": 13366 }, { "epoch": 0.85, "grad_norm": 1.0959972143173218, "learning_rate": 6.024513495271705e-07, "loss": 0.5077, "step": 13367 }, { "epoch": 0.85, "grad_norm": 1.0685462951660156, "learning_rate": 6.019631913126877e-07, "loss": 0.4392, "step": 13368 }, { "epoch": 0.85, "grad_norm": 1.1245286464691162, "learning_rate": 6.014752182842343e-07, "loss": 0.488, "step": 13369 }, { "epoch": 0.85, "grad_norm": 1.0407520532608032, "learning_rate": 6.009874304623576e-07, "loss": 0.4951, "step": 13370 }, { "epoch": 0.85, "grad_norm": 0.9670149087905884, "learning_rate": 6.004998278675988e-07, "loss": 0.4493, "step": 13371 }, { "epoch": 0.85, "grad_norm": 1.0624332427978516, "learning_rate": 6.000124105204847e-07, "loss": 0.4983, "step": 13372 }, { "epoch": 0.85, "grad_norm": 1.092924952507019, "learning_rate": 5.995251784415435e-07, "loss": 0.5028, "step": 13373 }, { "epoch": 0.85, "grad_norm": 1.0477136373519897, "learning_rate": 5.990381316512894e-07, "loss": 0.5134, "step": 13374 }, { "epoch": 0.85, "grad_norm": 1.074385404586792, "learning_rate": 5.985512701702284e-07, "loss": 0.5002, "step": 13375 }, { "epoch": 0.85, "grad_norm": 1.0472429990768433, "learning_rate": 5.980645940188623e-07, "loss": 0.4914, "step": 13376 }, { "epoch": 0.85, "grad_norm": 1.0674313306808472, "learning_rate": 5.975781032176831e-07, "loss": 0.4902, "step": 13377 }, { "epoch": 0.85, "grad_norm": 1.0908169746398926, "learning_rate": 5.970917977871749e-07, "loss": 0.513, "step": 13378 }, { "epoch": 0.85, "grad_norm": 1.0883985757827759, "learning_rate": 5.966056777478152e-07, "loss": 0.4675, "step": 13379 }, { "epoch": 0.85, "grad_norm": 1.1221798658370972, "learning_rate": 5.961197431200705e-07, "loss": 0.5164, "step": 13380 }, { "epoch": 0.85, "grad_norm": 1.1040724515914917, "learning_rate": 5.956339939244044e-07, "loss": 0.4699, "step": 13381 }, { "epoch": 0.85, "grad_norm": 1.0564048290252686, "learning_rate": 5.951484301812699e-07, "loss": 0.5189, "step": 13382 }, { "epoch": 0.85, "grad_norm": 1.131378412246704, "learning_rate": 5.946630519111107e-07, "loss": 0.511, "step": 13383 }, { "epoch": 0.85, "grad_norm": 1.0738738775253296, "learning_rate": 5.941778591343656e-07, "loss": 0.5174, "step": 13384 }, { "epoch": 0.85, "grad_norm": 1.0545551776885986, "learning_rate": 5.936928518714641e-07, "loss": 0.4968, "step": 13385 }, { "epoch": 0.85, "grad_norm": 1.0524842739105225, "learning_rate": 5.932080301428278e-07, "loss": 0.5021, "step": 13386 }, { "epoch": 0.85, "grad_norm": 1.0227655172348022, "learning_rate": 5.927233939688714e-07, "loss": 0.5357, "step": 13387 }, { "epoch": 0.85, "grad_norm": 1.021254301071167, "learning_rate": 5.922389433700021e-07, "loss": 0.4929, "step": 13388 }, { "epoch": 0.85, "grad_norm": 1.0238847732543945, "learning_rate": 5.917546783666156e-07, "loss": 0.4754, "step": 13389 }, { "epoch": 0.85, "grad_norm": 1.070908546447754, "learning_rate": 5.912705989791062e-07, "loss": 0.5137, "step": 13390 }, { "epoch": 0.85, "grad_norm": 1.0081439018249512, "learning_rate": 5.907867052278543e-07, "loss": 0.4707, "step": 13391 }, { "epoch": 0.85, "grad_norm": 1.061011791229248, "learning_rate": 5.903029971332353e-07, "loss": 0.536, "step": 13392 }, { "epoch": 0.85, "grad_norm": 1.0461562871932983, "learning_rate": 5.898194747156171e-07, "loss": 0.5196, "step": 13393 }, { "epoch": 0.85, "grad_norm": 1.0973914861679077, "learning_rate": 5.893361379953588e-07, "loss": 0.5192, "step": 13394 }, { "epoch": 0.85, "grad_norm": 1.1432956457138062, "learning_rate": 5.888529869928122e-07, "loss": 0.5257, "step": 13395 }, { "epoch": 0.85, "grad_norm": 1.1039844751358032, "learning_rate": 5.883700217283223e-07, "loss": 0.468, "step": 13396 }, { "epoch": 0.85, "grad_norm": 1.1045359373092651, "learning_rate": 5.878872422222215e-07, "loss": 0.4658, "step": 13397 }, { "epoch": 0.85, "grad_norm": 1.1638327836990356, "learning_rate": 5.874046484948426e-07, "loss": 0.5342, "step": 13398 }, { "epoch": 0.85, "grad_norm": 1.0851562023162842, "learning_rate": 5.869222405665026e-07, "loss": 0.4768, "step": 13399 }, { "epoch": 0.85, "grad_norm": 1.1951812505722046, "learning_rate": 5.864400184575153e-07, "loss": 0.5113, "step": 13400 }, { "epoch": 0.85, "grad_norm": 1.1096984148025513, "learning_rate": 5.859579821881855e-07, "loss": 0.5218, "step": 13401 }, { "epoch": 0.85, "grad_norm": 1.0537545680999756, "learning_rate": 5.854761317788082e-07, "loss": 0.4826, "step": 13402 }, { "epoch": 0.85, "grad_norm": 1.0303213596343994, "learning_rate": 5.849944672496749e-07, "loss": 0.5577, "step": 13403 }, { "epoch": 0.85, "grad_norm": 1.1699107885360718, "learning_rate": 5.845129886210671e-07, "loss": 0.546, "step": 13404 }, { "epoch": 0.85, "grad_norm": 1.0213823318481445, "learning_rate": 5.840316959132558e-07, "loss": 0.4528, "step": 13405 }, { "epoch": 0.85, "grad_norm": 1.068701982498169, "learning_rate": 5.835505891465076e-07, "loss": 0.5116, "step": 13406 }, { "epoch": 0.85, "grad_norm": 1.0392073392868042, "learning_rate": 5.830696683410802e-07, "loss": 0.4792, "step": 13407 }, { "epoch": 0.85, "grad_norm": 0.9517161250114441, "learning_rate": 5.825889335172241e-07, "loss": 0.4691, "step": 13408 }, { "epoch": 0.85, "grad_norm": 1.0567563772201538, "learning_rate": 5.821083846951819e-07, "loss": 0.5436, "step": 13409 }, { "epoch": 0.85, "grad_norm": 1.060482144355774, "learning_rate": 5.816280218951847e-07, "loss": 0.4599, "step": 13410 }, { "epoch": 0.85, "grad_norm": 1.0229899883270264, "learning_rate": 5.811478451374625e-07, "loss": 0.4913, "step": 13411 }, { "epoch": 0.85, "grad_norm": 1.1489890813827515, "learning_rate": 5.806678544422334e-07, "loss": 0.5356, "step": 13412 }, { "epoch": 0.85, "grad_norm": 1.0392504930496216, "learning_rate": 5.801880498297057e-07, "loss": 0.4936, "step": 13413 }, { "epoch": 0.85, "grad_norm": 1.0875258445739746, "learning_rate": 5.797084313200846e-07, "loss": 0.4962, "step": 13414 }, { "epoch": 0.85, "grad_norm": 1.0216097831726074, "learning_rate": 5.792289989335637e-07, "loss": 0.5191, "step": 13415 }, { "epoch": 0.85, "grad_norm": 1.1515929698944092, "learning_rate": 5.787497526903313e-07, "loss": 0.5328, "step": 13416 }, { "epoch": 0.85, "grad_norm": 1.0613532066345215, "learning_rate": 5.782706926105674e-07, "loss": 0.5331, "step": 13417 }, { "epoch": 0.85, "grad_norm": 1.016359806060791, "learning_rate": 5.777918187144416e-07, "loss": 0.5008, "step": 13418 }, { "epoch": 0.85, "grad_norm": 1.1669195890426636, "learning_rate": 5.773131310221169e-07, "loss": 0.5416, "step": 13419 }, { "epoch": 0.85, "grad_norm": 1.0914517641067505, "learning_rate": 5.768346295537536e-07, "loss": 0.5339, "step": 13420 }, { "epoch": 0.85, "grad_norm": 1.0393308401107788, "learning_rate": 5.76356314329496e-07, "loss": 0.5353, "step": 13421 }, { "epoch": 0.85, "grad_norm": 1.111310362815857, "learning_rate": 5.758781853694845e-07, "loss": 0.5443, "step": 13422 }, { "epoch": 0.85, "grad_norm": 1.0661511421203613, "learning_rate": 5.754002426938532e-07, "loss": 0.5195, "step": 13423 }, { "epoch": 0.85, "grad_norm": 1.052367091178894, "learning_rate": 5.749224863227249e-07, "loss": 0.4722, "step": 13424 }, { "epoch": 0.85, "grad_norm": 1.0971262454986572, "learning_rate": 5.744449162762183e-07, "loss": 0.4774, "step": 13425 }, { "epoch": 0.85, "grad_norm": 1.053396463394165, "learning_rate": 5.739675325744398e-07, "loss": 0.51, "step": 13426 }, { "epoch": 0.85, "grad_norm": 1.101646900177002, "learning_rate": 5.734903352374904e-07, "loss": 0.4877, "step": 13427 }, { "epoch": 0.85, "grad_norm": 0.9976493716239929, "learning_rate": 5.730133242854663e-07, "loss": 0.4812, "step": 13428 }, { "epoch": 0.85, "grad_norm": 1.06601083278656, "learning_rate": 5.725364997384498e-07, "loss": 0.5059, "step": 13429 }, { "epoch": 0.85, "grad_norm": 1.0498607158660889, "learning_rate": 5.720598616165196e-07, "loss": 0.5217, "step": 13430 }, { "epoch": 0.85, "grad_norm": 1.089664101600647, "learning_rate": 5.715834099397455e-07, "loss": 0.4993, "step": 13431 }, { "epoch": 0.85, "grad_norm": 0.9954808354377747, "learning_rate": 5.711071447281868e-07, "loss": 0.4651, "step": 13432 }, { "epoch": 0.85, "grad_norm": 0.9758763909339905, "learning_rate": 5.70631066001901e-07, "loss": 0.4788, "step": 13433 }, { "epoch": 0.85, "grad_norm": 1.0360801219940186, "learning_rate": 5.701551737809319e-07, "loss": 0.4746, "step": 13434 }, { "epoch": 0.85, "grad_norm": 1.1086175441741943, "learning_rate": 5.696794680853179e-07, "loss": 0.4847, "step": 13435 }, { "epoch": 0.85, "grad_norm": 1.0172804594039917, "learning_rate": 5.692039489350892e-07, "loss": 0.4974, "step": 13436 }, { "epoch": 0.85, "grad_norm": 1.1112823486328125, "learning_rate": 5.687286163502687e-07, "loss": 0.4819, "step": 13437 }, { "epoch": 0.85, "grad_norm": 1.057182788848877, "learning_rate": 5.682534703508713e-07, "loss": 0.4543, "step": 13438 }, { "epoch": 0.85, "grad_norm": 1.0339446067810059, "learning_rate": 5.67778510956904e-07, "loss": 0.471, "step": 13439 }, { "epoch": 0.85, "grad_norm": 0.9820137619972229, "learning_rate": 5.673037381883634e-07, "loss": 0.4218, "step": 13440 }, { "epoch": 0.85, "grad_norm": 1.03311288356781, "learning_rate": 5.668291520652436e-07, "loss": 0.4846, "step": 13441 }, { "epoch": 0.85, "grad_norm": 0.9906759262084961, "learning_rate": 5.663547526075258e-07, "loss": 0.4565, "step": 13442 }, { "epoch": 0.85, "grad_norm": 1.0246329307556152, "learning_rate": 5.658805398351858e-07, "loss": 0.5291, "step": 13443 }, { "epoch": 0.85, "grad_norm": 0.9982864260673523, "learning_rate": 5.654065137681907e-07, "loss": 0.4907, "step": 13444 }, { "epoch": 0.85, "grad_norm": 1.123137354850769, "learning_rate": 5.64932674426501e-07, "loss": 0.5616, "step": 13445 }, { "epoch": 0.85, "grad_norm": 1.0283350944519043, "learning_rate": 5.644590218300672e-07, "loss": 0.4707, "step": 13446 }, { "epoch": 0.85, "grad_norm": 1.0369606018066406, "learning_rate": 5.639855559988356e-07, "loss": 0.4627, "step": 13447 }, { "epoch": 0.85, "grad_norm": 1.040816068649292, "learning_rate": 5.63512276952739e-07, "loss": 0.5324, "step": 13448 }, { "epoch": 0.85, "grad_norm": 1.088789701461792, "learning_rate": 5.630391847117073e-07, "loss": 0.5435, "step": 13449 }, { "epoch": 0.85, "grad_norm": 1.0620760917663574, "learning_rate": 5.625662792956604e-07, "loss": 0.5114, "step": 13450 }, { "epoch": 0.85, "grad_norm": 1.0261839628219604, "learning_rate": 5.620935607245109e-07, "loss": 0.5098, "step": 13451 }, { "epoch": 0.85, "grad_norm": 0.9988752007484436, "learning_rate": 5.616210290181628e-07, "loss": 0.459, "step": 13452 }, { "epoch": 0.85, "grad_norm": 1.1079870462417603, "learning_rate": 5.611486841965136e-07, "loss": 0.48, "step": 13453 }, { "epoch": 0.85, "grad_norm": 1.0706554651260376, "learning_rate": 5.606765262794512e-07, "loss": 0.5064, "step": 13454 }, { "epoch": 0.85, "grad_norm": 1.140230655670166, "learning_rate": 5.602045552868585e-07, "loss": 0.5137, "step": 13455 }, { "epoch": 0.85, "grad_norm": 1.059227466583252, "learning_rate": 5.597327712386058e-07, "loss": 0.536, "step": 13456 }, { "epoch": 0.85, "grad_norm": 0.9997138977050781, "learning_rate": 5.592611741545594e-07, "loss": 0.446, "step": 13457 }, { "epoch": 0.85, "grad_norm": 1.0232324600219727, "learning_rate": 5.58789764054577e-07, "loss": 0.507, "step": 13458 }, { "epoch": 0.85, "grad_norm": 1.0606647729873657, "learning_rate": 5.583185409585079e-07, "loss": 0.481, "step": 13459 }, { "epoch": 0.85, "grad_norm": 1.1186909675598145, "learning_rate": 5.578475048861931e-07, "loss": 0.4988, "step": 13460 }, { "epoch": 0.85, "grad_norm": 0.9839795827865601, "learning_rate": 5.573766558574684e-07, "loss": 0.4766, "step": 13461 }, { "epoch": 0.85, "grad_norm": 0.9404541254043579, "learning_rate": 5.569059938921551e-07, "loss": 0.488, "step": 13462 }, { "epoch": 0.85, "grad_norm": 0.9731743931770325, "learning_rate": 5.564355190100768e-07, "loss": 0.4615, "step": 13463 }, { "epoch": 0.85, "grad_norm": 0.9581971764564514, "learning_rate": 5.559652312310393e-07, "loss": 0.4714, "step": 13464 }, { "epoch": 0.85, "grad_norm": 1.0430355072021484, "learning_rate": 5.554951305748462e-07, "loss": 0.4696, "step": 13465 }, { "epoch": 0.85, "grad_norm": 1.0668957233428955, "learning_rate": 5.550252170612924e-07, "loss": 0.4743, "step": 13466 }, { "epoch": 0.85, "grad_norm": 1.0566219091415405, "learning_rate": 5.545554907101636e-07, "loss": 0.4965, "step": 13467 }, { "epoch": 0.85, "grad_norm": 0.9783183336257935, "learning_rate": 5.540859515412378e-07, "loss": 0.4866, "step": 13468 }, { "epoch": 0.85, "grad_norm": 0.9738695025444031, "learning_rate": 5.536165995742882e-07, "loss": 0.4563, "step": 13469 }, { "epoch": 0.85, "grad_norm": 1.023001790046692, "learning_rate": 5.531474348290733e-07, "loss": 0.4656, "step": 13470 }, { "epoch": 0.85, "grad_norm": 1.0590389966964722, "learning_rate": 5.526784573253525e-07, "loss": 0.51, "step": 13471 }, { "epoch": 0.85, "grad_norm": 1.0875754356384277, "learning_rate": 5.522096670828703e-07, "loss": 0.5145, "step": 13472 }, { "epoch": 0.85, "grad_norm": 1.018993854522705, "learning_rate": 5.517410641213656e-07, "loss": 0.5021, "step": 13473 }, { "epoch": 0.85, "grad_norm": 0.9939917325973511, "learning_rate": 5.512726484605707e-07, "loss": 0.484, "step": 13474 }, { "epoch": 0.85, "grad_norm": 1.0694676637649536, "learning_rate": 5.508044201202084e-07, "loss": 0.4823, "step": 13475 }, { "epoch": 0.85, "grad_norm": 1.0037014484405518, "learning_rate": 5.503363791199945e-07, "loss": 0.4544, "step": 13476 }, { "epoch": 0.85, "grad_norm": 1.0620406866073608, "learning_rate": 5.49868525479637e-07, "loss": 0.5536, "step": 13477 }, { "epoch": 0.85, "grad_norm": 1.023963451385498, "learning_rate": 5.494008592188344e-07, "loss": 0.4545, "step": 13478 }, { "epoch": 0.85, "grad_norm": 1.0016241073608398, "learning_rate": 5.489333803572788e-07, "loss": 0.4767, "step": 13479 }, { "epoch": 0.85, "grad_norm": 0.9408761858940125, "learning_rate": 5.484660889146548e-07, "loss": 0.453, "step": 13480 }, { "epoch": 0.85, "grad_norm": 1.110805869102478, "learning_rate": 5.479989849106381e-07, "loss": 0.5043, "step": 13481 }, { "epoch": 0.85, "grad_norm": 1.0312732458114624, "learning_rate": 5.475320683648977e-07, "loss": 0.491, "step": 13482 }, { "epoch": 0.85, "grad_norm": 0.9720163941383362, "learning_rate": 5.470653392970904e-07, "loss": 0.4894, "step": 13483 }, { "epoch": 0.85, "grad_norm": 1.0498781204223633, "learning_rate": 5.465987977268727e-07, "loss": 0.4994, "step": 13484 }, { "epoch": 0.85, "grad_norm": 1.0908483266830444, "learning_rate": 5.46132443673888e-07, "loss": 0.5265, "step": 13485 }, { "epoch": 0.85, "grad_norm": 1.0789039134979248, "learning_rate": 5.456662771577714e-07, "loss": 0.4853, "step": 13486 }, { "epoch": 0.85, "grad_norm": 1.1271196603775024, "learning_rate": 5.452002981981519e-07, "loss": 0.5378, "step": 13487 }, { "epoch": 0.85, "grad_norm": 1.0779871940612793, "learning_rate": 5.447345068146515e-07, "loss": 0.5562, "step": 13488 }, { "epoch": 0.85, "grad_norm": 1.0521538257598877, "learning_rate": 5.442689030268816e-07, "loss": 0.4833, "step": 13489 }, { "epoch": 0.85, "grad_norm": 1.155379295349121, "learning_rate": 5.438034868544495e-07, "loss": 0.5332, "step": 13490 }, { "epoch": 0.85, "grad_norm": 1.0768070220947266, "learning_rate": 5.433382583169478e-07, "loss": 0.5016, "step": 13491 }, { "epoch": 0.85, "grad_norm": 1.1185115575790405, "learning_rate": 5.428732174339702e-07, "loss": 0.5098, "step": 13492 }, { "epoch": 0.85, "grad_norm": 1.0764923095703125, "learning_rate": 5.424083642250966e-07, "loss": 0.4944, "step": 13493 }, { "epoch": 0.85, "grad_norm": 1.1410459280014038, "learning_rate": 5.419436987098991e-07, "loss": 0.546, "step": 13494 }, { "epoch": 0.85, "grad_norm": 1.112058162689209, "learning_rate": 5.414792209079445e-07, "loss": 0.5081, "step": 13495 }, { "epoch": 0.86, "grad_norm": 1.0271916389465332, "learning_rate": 5.410149308387891e-07, "loss": 0.5599, "step": 13496 }, { "epoch": 0.86, "grad_norm": 1.0328270196914673, "learning_rate": 5.405508285219835e-07, "loss": 0.5023, "step": 13497 }, { "epoch": 0.86, "grad_norm": 0.9662725925445557, "learning_rate": 5.400869139770704e-07, "loss": 0.438, "step": 13498 }, { "epoch": 0.86, "grad_norm": 1.0837187767028809, "learning_rate": 5.396231872235819e-07, "loss": 0.4805, "step": 13499 }, { "epoch": 0.86, "grad_norm": 1.087839961051941, "learning_rate": 5.391596482810424e-07, "loss": 0.5013, "step": 13500 }, { "epoch": 0.86, "grad_norm": 1.0441499948501587, "learning_rate": 5.386962971689746e-07, "loss": 0.4961, "step": 13501 }, { "epoch": 0.86, "grad_norm": 1.0059573650360107, "learning_rate": 5.382331339068853e-07, "loss": 0.506, "step": 13502 }, { "epoch": 0.86, "grad_norm": 0.980630099773407, "learning_rate": 5.377701585142769e-07, "loss": 0.4787, "step": 13503 }, { "epoch": 0.86, "grad_norm": 1.1318646669387817, "learning_rate": 5.373073710106441e-07, "loss": 0.5448, "step": 13504 }, { "epoch": 0.86, "grad_norm": 1.1532589197158813, "learning_rate": 5.368447714154734e-07, "loss": 0.5069, "step": 13505 }, { "epoch": 0.86, "grad_norm": 1.0172760486602783, "learning_rate": 5.363823597482443e-07, "loss": 0.5132, "step": 13506 }, { "epoch": 0.86, "grad_norm": 1.0407923460006714, "learning_rate": 5.359201360284255e-07, "loss": 0.488, "step": 13507 }, { "epoch": 0.86, "grad_norm": 1.1686497926712036, "learning_rate": 5.354581002754799e-07, "loss": 0.5275, "step": 13508 }, { "epoch": 0.86, "grad_norm": 0.9421904683113098, "learning_rate": 5.349962525088631e-07, "loss": 0.4569, "step": 13509 }, { "epoch": 0.86, "grad_norm": 1.0419373512268066, "learning_rate": 5.345345927480211e-07, "loss": 0.4937, "step": 13510 }, { "epoch": 0.86, "grad_norm": 1.0212336778640747, "learning_rate": 5.340731210123934e-07, "loss": 0.4955, "step": 13511 }, { "epoch": 0.86, "grad_norm": 1.0964690446853638, "learning_rate": 5.336118373214116e-07, "loss": 0.5284, "step": 13512 }, { "epoch": 0.86, "grad_norm": 1.086272954940796, "learning_rate": 5.331507416944965e-07, "loss": 0.5009, "step": 13513 }, { "epoch": 0.86, "grad_norm": 1.071381688117981, "learning_rate": 5.326898341510655e-07, "loss": 0.5464, "step": 13514 }, { "epoch": 0.86, "grad_norm": 1.0455056428909302, "learning_rate": 5.322291147105246e-07, "loss": 0.4958, "step": 13515 }, { "epoch": 0.86, "grad_norm": 1.069616675376892, "learning_rate": 5.317685833922737e-07, "loss": 0.5581, "step": 13516 }, { "epoch": 0.86, "grad_norm": 1.0819993019104004, "learning_rate": 5.313082402157039e-07, "loss": 0.4991, "step": 13517 }, { "epoch": 0.86, "grad_norm": 0.9846544861793518, "learning_rate": 5.308480852001979e-07, "loss": 0.4838, "step": 13518 }, { "epoch": 0.86, "grad_norm": 1.094940423965454, "learning_rate": 5.303881183651327e-07, "loss": 0.4385, "step": 13519 }, { "epoch": 0.86, "grad_norm": 1.0644207000732422, "learning_rate": 5.29928339729876e-07, "loss": 0.5435, "step": 13520 }, { "epoch": 0.86, "grad_norm": 1.0141444206237793, "learning_rate": 5.294687493137845e-07, "loss": 0.5139, "step": 13521 }, { "epoch": 0.86, "grad_norm": 1.046487808227539, "learning_rate": 5.290093471362145e-07, "loss": 0.5192, "step": 13522 }, { "epoch": 0.86, "grad_norm": 0.9801287055015564, "learning_rate": 5.28550133216506e-07, "loss": 0.5298, "step": 13523 }, { "epoch": 0.86, "grad_norm": 1.0636112689971924, "learning_rate": 5.28091107573997e-07, "loss": 0.4557, "step": 13524 }, { "epoch": 0.86, "grad_norm": 0.9705425500869751, "learning_rate": 5.27632270228014e-07, "loss": 0.4822, "step": 13525 }, { "epoch": 0.86, "grad_norm": 1.1575100421905518, "learning_rate": 5.271736211978784e-07, "loss": 0.483, "step": 13526 }, { "epoch": 0.86, "grad_norm": 1.126613736152649, "learning_rate": 5.267151605029014e-07, "loss": 0.4884, "step": 13527 }, { "epoch": 0.86, "grad_norm": 1.0410032272338867, "learning_rate": 5.262568881623892e-07, "loss": 0.4944, "step": 13528 }, { "epoch": 0.86, "grad_norm": 1.0787334442138672, "learning_rate": 5.257988041956347e-07, "loss": 0.4754, "step": 13529 }, { "epoch": 0.86, "grad_norm": 1.1417659521102905, "learning_rate": 5.253409086219274e-07, "loss": 0.5649, "step": 13530 }, { "epoch": 0.86, "grad_norm": 1.06132972240448, "learning_rate": 5.248832014605503e-07, "loss": 0.4725, "step": 13531 }, { "epoch": 0.86, "grad_norm": 1.1390949487686157, "learning_rate": 5.244256827307726e-07, "loss": 0.5635, "step": 13532 }, { "epoch": 0.86, "grad_norm": 0.9856281876564026, "learning_rate": 5.239683524518596e-07, "loss": 0.4919, "step": 13533 }, { "epoch": 0.86, "grad_norm": 1.1564993858337402, "learning_rate": 5.23511210643069e-07, "loss": 0.5237, "step": 13534 }, { "epoch": 0.86, "grad_norm": 0.9941248297691345, "learning_rate": 5.230542573236485e-07, "loss": 0.4986, "step": 13535 }, { "epoch": 0.86, "grad_norm": 1.0197328329086304, "learning_rate": 5.225974925128402e-07, "loss": 0.4711, "step": 13536 }, { "epoch": 0.86, "grad_norm": 1.131434679031372, "learning_rate": 5.221409162298741e-07, "loss": 0.5358, "step": 13537 }, { "epoch": 0.86, "grad_norm": 1.0401612520217896, "learning_rate": 5.216845284939764e-07, "loss": 0.5269, "step": 13538 }, { "epoch": 0.86, "grad_norm": 1.0301513671875, "learning_rate": 5.212283293243658e-07, "loss": 0.48, "step": 13539 }, { "epoch": 0.86, "grad_norm": 1.0342973470687866, "learning_rate": 5.207723187402491e-07, "loss": 0.4466, "step": 13540 }, { "epoch": 0.86, "grad_norm": 1.1259351968765259, "learning_rate": 5.203164967608282e-07, "loss": 0.5571, "step": 13541 }, { "epoch": 0.86, "grad_norm": 1.121213436126709, "learning_rate": 5.198608634052965e-07, "loss": 0.572, "step": 13542 }, { "epoch": 0.86, "grad_norm": 1.0400069952011108, "learning_rate": 5.194054186928365e-07, "loss": 0.4837, "step": 13543 }, { "epoch": 0.86, "grad_norm": 1.0414228439331055, "learning_rate": 5.189501626426297e-07, "loss": 0.4879, "step": 13544 }, { "epoch": 0.86, "grad_norm": 1.0761221647262573, "learning_rate": 5.184950952738421e-07, "loss": 0.5487, "step": 13545 }, { "epoch": 0.86, "grad_norm": 1.091073989868164, "learning_rate": 5.180402166056359e-07, "loss": 0.4429, "step": 13546 }, { "epoch": 0.86, "grad_norm": 1.127923607826233, "learning_rate": 5.175855266571644e-07, "loss": 0.5364, "step": 13547 }, { "epoch": 0.86, "grad_norm": 1.1597503423690796, "learning_rate": 5.171310254475737e-07, "loss": 0.514, "step": 13548 }, { "epoch": 0.86, "grad_norm": 1.162801742553711, "learning_rate": 5.166767129960004e-07, "loss": 0.5011, "step": 13549 }, { "epoch": 0.86, "grad_norm": 1.0138014554977417, "learning_rate": 5.162225893215755e-07, "loss": 0.4867, "step": 13550 }, { "epoch": 0.86, "grad_norm": 1.0033013820648193, "learning_rate": 5.157686544434176e-07, "loss": 0.4832, "step": 13551 }, { "epoch": 0.86, "grad_norm": 1.0404198169708252, "learning_rate": 5.153149083806436e-07, "loss": 0.4695, "step": 13552 }, { "epoch": 0.86, "grad_norm": 1.141335368156433, "learning_rate": 5.14861351152357e-07, "loss": 0.5374, "step": 13553 }, { "epoch": 0.86, "grad_norm": 1.062104344367981, "learning_rate": 5.144079827776566e-07, "loss": 0.4696, "step": 13554 }, { "epoch": 0.86, "grad_norm": 0.9700568318367004, "learning_rate": 5.139548032756325e-07, "loss": 0.4859, "step": 13555 }, { "epoch": 0.86, "grad_norm": 0.995707631111145, "learning_rate": 5.13501812665364e-07, "loss": 0.4342, "step": 13556 }, { "epoch": 0.86, "grad_norm": 1.113817811012268, "learning_rate": 5.130490109659275e-07, "loss": 0.4639, "step": 13557 }, { "epoch": 0.86, "grad_norm": 1.0634162425994873, "learning_rate": 5.125963981963894e-07, "loss": 0.513, "step": 13558 }, { "epoch": 0.86, "grad_norm": 0.9440078139305115, "learning_rate": 5.12143974375805e-07, "loss": 0.4778, "step": 13559 }, { "epoch": 0.86, "grad_norm": 1.0164488554000854, "learning_rate": 5.116917395232262e-07, "loss": 0.5417, "step": 13560 }, { "epoch": 0.86, "grad_norm": 1.050866723060608, "learning_rate": 5.112396936576947e-07, "loss": 0.4614, "step": 13561 }, { "epoch": 0.86, "grad_norm": 1.016036868095398, "learning_rate": 5.107878367982438e-07, "loss": 0.4636, "step": 13562 }, { "epoch": 0.86, "grad_norm": 1.132187008857727, "learning_rate": 5.103361689639019e-07, "loss": 0.5277, "step": 13563 }, { "epoch": 0.86, "grad_norm": 1.0902912616729736, "learning_rate": 5.098846901736832e-07, "loss": 0.5161, "step": 13564 }, { "epoch": 0.86, "grad_norm": 1.0292311906814575, "learning_rate": 5.094334004466012e-07, "loss": 0.5256, "step": 13565 }, { "epoch": 0.86, "grad_norm": 1.0934474468231201, "learning_rate": 5.089822998016586e-07, "loss": 0.4878, "step": 13566 }, { "epoch": 0.86, "grad_norm": 1.1122615337371826, "learning_rate": 5.085313882578469e-07, "loss": 0.4703, "step": 13567 }, { "epoch": 0.86, "grad_norm": 1.130413293838501, "learning_rate": 5.080806658341536e-07, "loss": 0.5113, "step": 13568 }, { "epoch": 0.86, "grad_norm": 1.1992076635360718, "learning_rate": 5.076301325495575e-07, "loss": 0.5321, "step": 13569 }, { "epoch": 0.86, "grad_norm": 0.9438257813453674, "learning_rate": 5.071797884230284e-07, "loss": 0.4599, "step": 13570 }, { "epoch": 0.86, "grad_norm": 1.0700342655181885, "learning_rate": 5.067296334735306e-07, "loss": 0.5489, "step": 13571 }, { "epoch": 0.86, "grad_norm": 1.1255141496658325, "learning_rate": 5.062796677200154e-07, "loss": 0.5352, "step": 13572 }, { "epoch": 0.86, "grad_norm": 1.0605268478393555, "learning_rate": 5.058298911814302e-07, "loss": 0.5164, "step": 13573 }, { "epoch": 0.86, "grad_norm": 1.0894030332565308, "learning_rate": 5.053803038767158e-07, "loss": 0.5137, "step": 13574 }, { "epoch": 0.86, "grad_norm": 1.0053369998931885, "learning_rate": 5.049309058248004e-07, "loss": 0.4574, "step": 13575 }, { "epoch": 0.86, "grad_norm": 1.0016471147537231, "learning_rate": 5.044816970446076e-07, "loss": 0.4495, "step": 13576 }, { "epoch": 0.86, "grad_norm": 1.084174394607544, "learning_rate": 5.040326775550514e-07, "loss": 0.5058, "step": 13577 }, { "epoch": 0.86, "grad_norm": 1.1791083812713623, "learning_rate": 5.035838473750393e-07, "loss": 0.5271, "step": 13578 }, { "epoch": 0.86, "grad_norm": 1.030188798904419, "learning_rate": 5.031352065234702e-07, "loss": 0.4744, "step": 13579 }, { "epoch": 0.86, "grad_norm": 1.043033480644226, "learning_rate": 5.026867550192327e-07, "loss": 0.4716, "step": 13580 }, { "epoch": 0.86, "grad_norm": 1.157036304473877, "learning_rate": 5.022384928812107e-07, "loss": 0.5134, "step": 13581 }, { "epoch": 0.86, "grad_norm": 0.9832121133804321, "learning_rate": 5.017904201282808e-07, "loss": 0.454, "step": 13582 }, { "epoch": 0.86, "grad_norm": 1.0457446575164795, "learning_rate": 5.013425367793074e-07, "loss": 0.4794, "step": 13583 }, { "epoch": 0.86, "grad_norm": 1.0089260339736938, "learning_rate": 5.008948428531496e-07, "loss": 0.4778, "step": 13584 }, { "epoch": 0.86, "grad_norm": 1.0331329107284546, "learning_rate": 5.004473383686592e-07, "loss": 0.515, "step": 13585 }, { "epoch": 0.86, "grad_norm": 1.1153910160064697, "learning_rate": 5.000000233446783e-07, "loss": 0.4699, "step": 13586 }, { "epoch": 0.86, "grad_norm": 1.0856069326400757, "learning_rate": 4.99552897800043e-07, "loss": 0.4885, "step": 13587 }, { "epoch": 0.86, "grad_norm": 1.0789910554885864, "learning_rate": 4.991059617535781e-07, "loss": 0.4944, "step": 13588 }, { "epoch": 0.86, "grad_norm": 1.072582483291626, "learning_rate": 4.986592152241043e-07, "loss": 0.5377, "step": 13589 }, { "epoch": 0.86, "grad_norm": 1.044414758682251, "learning_rate": 4.982126582304314e-07, "loss": 0.4907, "step": 13590 }, { "epoch": 0.86, "grad_norm": 1.141349196434021, "learning_rate": 4.977662907913633e-07, "loss": 0.5417, "step": 13591 }, { "epoch": 0.86, "grad_norm": 1.0667126178741455, "learning_rate": 4.973201129256943e-07, "loss": 0.4893, "step": 13592 }, { "epoch": 0.86, "grad_norm": 1.069313406944275, "learning_rate": 4.968741246522129e-07, "loss": 0.526, "step": 13593 }, { "epoch": 0.86, "grad_norm": 1.127071738243103, "learning_rate": 4.964283259896945e-07, "loss": 0.496, "step": 13594 }, { "epoch": 0.86, "grad_norm": 1.0764715671539307, "learning_rate": 4.959827169569136e-07, "loss": 0.4625, "step": 13595 }, { "epoch": 0.86, "grad_norm": 1.0825448036193848, "learning_rate": 4.955372975726336e-07, "loss": 0.464, "step": 13596 }, { "epoch": 0.86, "grad_norm": 1.0636563301086426, "learning_rate": 4.950920678556065e-07, "loss": 0.5054, "step": 13597 }, { "epoch": 0.86, "grad_norm": 0.9773215651512146, "learning_rate": 4.946470278245813e-07, "loss": 0.479, "step": 13598 }, { "epoch": 0.86, "grad_norm": 0.9716424942016602, "learning_rate": 4.942021774982969e-07, "loss": 0.5029, "step": 13599 }, { "epoch": 0.86, "grad_norm": 1.0770186185836792, "learning_rate": 4.937575168954845e-07, "loss": 0.4665, "step": 13600 }, { "epoch": 0.86, "grad_norm": 1.0549659729003906, "learning_rate": 4.933130460348673e-07, "loss": 0.4864, "step": 13601 }, { "epoch": 0.86, "grad_norm": 1.0820561647415161, "learning_rate": 4.928687649351594e-07, "loss": 0.4769, "step": 13602 }, { "epoch": 0.86, "grad_norm": 1.0644725561141968, "learning_rate": 4.924246736150679e-07, "loss": 0.4813, "step": 13603 }, { "epoch": 0.86, "grad_norm": 1.1527262926101685, "learning_rate": 4.919807720932946e-07, "loss": 0.5019, "step": 13604 }, { "epoch": 0.86, "grad_norm": 0.9755449295043945, "learning_rate": 4.915370603885272e-07, "loss": 0.4708, "step": 13605 }, { "epoch": 0.86, "grad_norm": 1.0473021268844604, "learning_rate": 4.91093538519451e-07, "loss": 0.4953, "step": 13606 }, { "epoch": 0.86, "grad_norm": 1.0419485569000244, "learning_rate": 4.906502065047403e-07, "loss": 0.5103, "step": 13607 }, { "epoch": 0.86, "grad_norm": 0.9475518465042114, "learning_rate": 4.902070643630624e-07, "loss": 0.4805, "step": 13608 }, { "epoch": 0.86, "grad_norm": 1.0797600746154785, "learning_rate": 4.89764112113078e-07, "loss": 0.5072, "step": 13609 }, { "epoch": 0.86, "grad_norm": 1.0618468523025513, "learning_rate": 4.893213497734356e-07, "loss": 0.4919, "step": 13610 }, { "epoch": 0.86, "grad_norm": 1.1053194999694824, "learning_rate": 4.888787773627785e-07, "loss": 0.5046, "step": 13611 }, { "epoch": 0.86, "grad_norm": 1.0452958345413208, "learning_rate": 4.884363948997455e-07, "loss": 0.4605, "step": 13612 }, { "epoch": 0.86, "grad_norm": 1.0648913383483887, "learning_rate": 4.879942024029599e-07, "loss": 0.4763, "step": 13613 }, { "epoch": 0.86, "grad_norm": 1.0711185932159424, "learning_rate": 4.875521998910426e-07, "loss": 0.4512, "step": 13614 }, { "epoch": 0.86, "grad_norm": 1.075517177581787, "learning_rate": 4.871103873826044e-07, "loss": 0.508, "step": 13615 }, { "epoch": 0.86, "grad_norm": 0.9884278178215027, "learning_rate": 4.866687648962487e-07, "loss": 0.5261, "step": 13616 }, { "epoch": 0.86, "grad_norm": 1.0627617835998535, "learning_rate": 4.862273324505712e-07, "loss": 0.4483, "step": 13617 }, { "epoch": 0.86, "grad_norm": 0.9698841571807861, "learning_rate": 4.857860900641576e-07, "loss": 0.4589, "step": 13618 }, { "epoch": 0.86, "grad_norm": 1.0453863143920898, "learning_rate": 4.853450377555879e-07, "loss": 0.4927, "step": 13619 }, { "epoch": 0.86, "grad_norm": 1.0251530408859253, "learning_rate": 4.849041755434336e-07, "loss": 0.4854, "step": 13620 }, { "epoch": 0.86, "grad_norm": 1.01449453830719, "learning_rate": 4.844635034462574e-07, "loss": 0.4369, "step": 13621 }, { "epoch": 0.86, "grad_norm": 1.0162572860717773, "learning_rate": 4.840230214826147e-07, "loss": 0.5273, "step": 13622 }, { "epoch": 0.86, "grad_norm": 1.072426438331604, "learning_rate": 4.835827296710537e-07, "loss": 0.5463, "step": 13623 }, { "epoch": 0.86, "grad_norm": 1.1377872228622437, "learning_rate": 4.831426280301105e-07, "loss": 0.5315, "step": 13624 }, { "epoch": 0.86, "grad_norm": 0.9945191144943237, "learning_rate": 4.8270271657832e-07, "loss": 0.4296, "step": 13625 }, { "epoch": 0.86, "grad_norm": 1.0929838418960571, "learning_rate": 4.822629953342028e-07, "loss": 0.4947, "step": 13626 }, { "epoch": 0.86, "grad_norm": 1.0578333139419556, "learning_rate": 4.81823464316275e-07, "loss": 0.4908, "step": 13627 }, { "epoch": 0.86, "grad_norm": 1.0581110715866089, "learning_rate": 4.813841235430433e-07, "loss": 0.4542, "step": 13628 }, { "epoch": 0.86, "grad_norm": 1.0441462993621826, "learning_rate": 4.809449730330068e-07, "loss": 0.4749, "step": 13629 }, { "epoch": 0.86, "grad_norm": 1.0819425582885742, "learning_rate": 4.805060128046574e-07, "loss": 0.5002, "step": 13630 }, { "epoch": 0.86, "grad_norm": 0.9921013712882996, "learning_rate": 4.80067242876478e-07, "loss": 0.5028, "step": 13631 }, { "epoch": 0.86, "grad_norm": 1.1062136888504028, "learning_rate": 4.796286632669417e-07, "loss": 0.5031, "step": 13632 }, { "epoch": 0.86, "grad_norm": 1.0803297758102417, "learning_rate": 4.791902739945187e-07, "loss": 0.4738, "step": 13633 }, { "epoch": 0.86, "grad_norm": 1.0163581371307373, "learning_rate": 4.787520750776658e-07, "loss": 0.4973, "step": 13634 }, { "epoch": 0.86, "grad_norm": 1.0785454511642456, "learning_rate": 4.783140665348352e-07, "loss": 0.4376, "step": 13635 }, { "epoch": 0.86, "grad_norm": 1.1214227676391602, "learning_rate": 4.778762483844701e-07, "loss": 0.493, "step": 13636 }, { "epoch": 0.86, "grad_norm": 0.9712678790092468, "learning_rate": 4.774386206450027e-07, "loss": 0.4666, "step": 13637 }, { "epoch": 0.86, "grad_norm": 1.0110604763031006, "learning_rate": 4.770011833348631e-07, "loss": 0.4826, "step": 13638 }, { "epoch": 0.86, "grad_norm": 1.0145424604415894, "learning_rate": 4.7656393647247054e-07, "loss": 0.4737, "step": 13639 }, { "epoch": 0.86, "grad_norm": 0.9978539943695068, "learning_rate": 4.7612688007623363e-07, "loss": 0.4696, "step": 13640 }, { "epoch": 0.86, "grad_norm": 0.9833884239196777, "learning_rate": 4.756900141645565e-07, "loss": 0.5214, "step": 13641 }, { "epoch": 0.86, "grad_norm": 1.0501322746276855, "learning_rate": 4.752533387558339e-07, "loss": 0.5091, "step": 13642 }, { "epoch": 0.86, "grad_norm": 1.0006647109985352, "learning_rate": 4.748168538684528e-07, "loss": 0.4968, "step": 13643 }, { "epoch": 0.86, "grad_norm": 1.2387149333953857, "learning_rate": 4.7438055952079287e-07, "loss": 0.5373, "step": 13644 }, { "epoch": 0.86, "grad_norm": 1.0665277242660522, "learning_rate": 4.739444557312223e-07, "loss": 0.485, "step": 13645 }, { "epoch": 0.86, "grad_norm": 1.0650554895401, "learning_rate": 4.735085425181063e-07, "loss": 0.4833, "step": 13646 }, { "epoch": 0.86, "grad_norm": 1.059370756149292, "learning_rate": 4.730728198998008e-07, "loss": 0.5535, "step": 13647 }, { "epoch": 0.86, "grad_norm": 1.110365629196167, "learning_rate": 4.726372878946489e-07, "loss": 0.4718, "step": 13648 }, { "epoch": 0.86, "grad_norm": 1.028216004371643, "learning_rate": 4.7220194652099204e-07, "loss": 0.5179, "step": 13649 }, { "epoch": 0.86, "grad_norm": 1.0740399360656738, "learning_rate": 4.7176679579716e-07, "loss": 0.5044, "step": 13650 }, { "epoch": 0.86, "grad_norm": 1.1303333044052124, "learning_rate": 4.7133183574147534e-07, "loss": 0.554, "step": 13651 }, { "epoch": 0.86, "grad_norm": 1.0639551877975464, "learning_rate": 4.7089706637225283e-07, "loss": 0.528, "step": 13652 }, { "epoch": 0.86, "grad_norm": 1.0426877737045288, "learning_rate": 4.7046248770780065e-07, "loss": 0.5154, "step": 13653 }, { "epoch": 0.87, "grad_norm": 0.9748163819313049, "learning_rate": 4.7002809976641417e-07, "loss": 0.466, "step": 13654 }, { "epoch": 0.87, "grad_norm": 1.1808016300201416, "learning_rate": 4.6959390256638703e-07, "loss": 0.5148, "step": 13655 }, { "epoch": 0.87, "grad_norm": 1.0725035667419434, "learning_rate": 4.691598961260002e-07, "loss": 0.508, "step": 13656 }, { "epoch": 0.87, "grad_norm": 1.1180516481399536, "learning_rate": 4.68726080463528e-07, "loss": 0.5635, "step": 13657 }, { "epoch": 0.87, "grad_norm": 1.062372088432312, "learning_rate": 4.682924555972379e-07, "loss": 0.4866, "step": 13658 }, { "epoch": 0.87, "grad_norm": 1.0931804180145264, "learning_rate": 4.6785902154538763e-07, "loss": 0.54, "step": 13659 }, { "epoch": 0.87, "grad_norm": 1.1557106971740723, "learning_rate": 4.674257783262276e-07, "loss": 0.5277, "step": 13660 }, { "epoch": 0.87, "grad_norm": 1.065765380859375, "learning_rate": 4.669927259580015e-07, "loss": 0.5072, "step": 13661 }, { "epoch": 0.87, "grad_norm": 0.9811139702796936, "learning_rate": 4.665598644589409e-07, "loss": 0.4384, "step": 13662 }, { "epoch": 0.87, "grad_norm": 1.0448228120803833, "learning_rate": 4.6612719384727556e-07, "loss": 0.5043, "step": 13663 }, { "epoch": 0.87, "grad_norm": 1.1161221265792847, "learning_rate": 4.656947141412205e-07, "loss": 0.5172, "step": 13664 }, { "epoch": 0.87, "grad_norm": 1.0317866802215576, "learning_rate": 4.652624253589877e-07, "loss": 0.5474, "step": 13665 }, { "epoch": 0.87, "grad_norm": 1.0991182327270508, "learning_rate": 4.6483032751877987e-07, "loss": 0.5154, "step": 13666 }, { "epoch": 0.87, "grad_norm": 1.1278705596923828, "learning_rate": 4.6439842063878803e-07, "loss": 0.4723, "step": 13667 }, { "epoch": 0.87, "grad_norm": 1.0951365232467651, "learning_rate": 4.639667047372015e-07, "loss": 0.5117, "step": 13668 }, { "epoch": 0.87, "grad_norm": 0.9740217924118042, "learning_rate": 4.6353517983219856e-07, "loss": 0.4419, "step": 13669 }, { "epoch": 0.87, "grad_norm": 1.073346734046936, "learning_rate": 4.631038459419468e-07, "loss": 0.4737, "step": 13670 }, { "epoch": 0.87, "grad_norm": 1.0972206592559814, "learning_rate": 4.6267270308460955e-07, "loss": 0.5103, "step": 13671 }, { "epoch": 0.87, "grad_norm": 0.9710244536399841, "learning_rate": 4.6224175127834057e-07, "loss": 0.4978, "step": 13672 }, { "epoch": 0.87, "grad_norm": 0.994428813457489, "learning_rate": 4.61810990541286e-07, "loss": 0.446, "step": 13673 }, { "epoch": 0.87, "grad_norm": 1.0358386039733887, "learning_rate": 4.61380420891584e-07, "loss": 0.5114, "step": 13674 }, { "epoch": 0.87, "grad_norm": 1.1287455558776855, "learning_rate": 4.6095004234736175e-07, "loss": 0.5337, "step": 13675 }, { "epoch": 0.87, "grad_norm": 1.040019154548645, "learning_rate": 4.6051985492674425e-07, "loss": 0.4942, "step": 13676 }, { "epoch": 0.87, "grad_norm": 1.1022597551345825, "learning_rate": 4.6008985864784473e-07, "loss": 0.5078, "step": 13677 }, { "epoch": 0.87, "grad_norm": 1.071824312210083, "learning_rate": 4.596600535287671e-07, "loss": 0.5336, "step": 13678 }, { "epoch": 0.87, "grad_norm": 1.101735234260559, "learning_rate": 4.592304395876102e-07, "loss": 0.5188, "step": 13679 }, { "epoch": 0.87, "grad_norm": 1.0816458463668823, "learning_rate": 4.588010168424628e-07, "loss": 0.5067, "step": 13680 }, { "epoch": 0.87, "grad_norm": 1.0752999782562256, "learning_rate": 4.5837178531140723e-07, "loss": 0.4947, "step": 13681 }, { "epoch": 0.87, "grad_norm": 1.0864089727401733, "learning_rate": 4.579427450125179e-07, "loss": 0.4854, "step": 13682 }, { "epoch": 0.87, "grad_norm": 0.9950405359268188, "learning_rate": 4.5751389596385755e-07, "loss": 0.4512, "step": 13683 }, { "epoch": 0.87, "grad_norm": 1.1778031587600708, "learning_rate": 4.570852381834839e-07, "loss": 0.4902, "step": 13684 }, { "epoch": 0.87, "grad_norm": 1.0931570529937744, "learning_rate": 4.5665677168944935e-07, "loss": 0.5498, "step": 13685 }, { "epoch": 0.87, "grad_norm": 1.0452144145965576, "learning_rate": 4.562284964997915e-07, "loss": 0.4889, "step": 13686 }, { "epoch": 0.87, "grad_norm": 1.0706392526626587, "learning_rate": 4.5580041263254547e-07, "loss": 0.5363, "step": 13687 }, { "epoch": 0.87, "grad_norm": 1.0081599950790405, "learning_rate": 4.553725201057363e-07, "loss": 0.4986, "step": 13688 }, { "epoch": 0.87, "grad_norm": 1.1238662004470825, "learning_rate": 4.5494481893738005e-07, "loss": 0.4684, "step": 13689 }, { "epoch": 0.87, "grad_norm": 1.1825469732284546, "learning_rate": 4.5451730914548744e-07, "loss": 0.5582, "step": 13690 }, { "epoch": 0.87, "grad_norm": 0.9957408308982849, "learning_rate": 4.540899907480578e-07, "loss": 0.4999, "step": 13691 }, { "epoch": 0.87, "grad_norm": 1.0893901586532593, "learning_rate": 4.536628637630836e-07, "loss": 0.4806, "step": 13692 }, { "epoch": 0.87, "grad_norm": 1.0604897737503052, "learning_rate": 4.532359282085519e-07, "loss": 0.5332, "step": 13693 }, { "epoch": 0.87, "grad_norm": 1.0288249254226685, "learning_rate": 4.528091841024379e-07, "loss": 0.481, "step": 13694 }, { "epoch": 0.87, "grad_norm": 1.0741475820541382, "learning_rate": 4.5238263146271053e-07, "loss": 0.5142, "step": 13695 }, { "epoch": 0.87, "grad_norm": 1.0991498231887817, "learning_rate": 4.5195627030733156e-07, "loss": 0.5221, "step": 13696 }, { "epoch": 0.87, "grad_norm": 1.0029066801071167, "learning_rate": 4.5153010065425054e-07, "loss": 0.4646, "step": 13697 }, { "epoch": 0.87, "grad_norm": 1.0602319240570068, "learning_rate": 4.511041225214158e-07, "loss": 0.4879, "step": 13698 }, { "epoch": 0.87, "grad_norm": 0.9987397789955139, "learning_rate": 4.5067833592676136e-07, "loss": 0.4726, "step": 13699 }, { "epoch": 0.87, "grad_norm": 1.0657809972763062, "learning_rate": 4.502527408882157e-07, "loss": 0.4613, "step": 13700 }, { "epoch": 0.87, "grad_norm": 1.000096082687378, "learning_rate": 4.498273374237e-07, "loss": 0.5043, "step": 13701 }, { "epoch": 0.87, "grad_norm": 1.1645472049713135, "learning_rate": 4.494021255511266e-07, "loss": 0.5265, "step": 13702 }, { "epoch": 0.87, "grad_norm": 1.0900377035140991, "learning_rate": 4.48977105288399e-07, "loss": 0.4996, "step": 13703 }, { "epoch": 0.87, "grad_norm": 1.1540076732635498, "learning_rate": 4.485522766534145e-07, "loss": 0.5581, "step": 13704 }, { "epoch": 0.87, "grad_norm": 0.9702553749084473, "learning_rate": 4.4812763966405825e-07, "loss": 0.4524, "step": 13705 }, { "epoch": 0.87, "grad_norm": 1.0510414838790894, "learning_rate": 4.4770319433821487e-07, "loss": 0.506, "step": 13706 }, { "epoch": 0.87, "grad_norm": 1.0558525323867798, "learning_rate": 4.472789406937522e-07, "loss": 0.5163, "step": 13707 }, { "epoch": 0.87, "grad_norm": 1.0879522562026978, "learning_rate": 4.468548787485355e-07, "loss": 0.5177, "step": 13708 }, { "epoch": 0.87, "grad_norm": 1.1908034086227417, "learning_rate": 4.4643100852042097e-07, "loss": 0.5214, "step": 13709 }, { "epoch": 0.87, "grad_norm": 1.0457243919372559, "learning_rate": 4.4600733002725547e-07, "loss": 0.4969, "step": 13710 }, { "epoch": 0.87, "grad_norm": 1.009068250656128, "learning_rate": 4.4558384328687975e-07, "loss": 0.4928, "step": 13711 }, { "epoch": 0.87, "grad_norm": 1.0318927764892578, "learning_rate": 4.451605483171251e-07, "loss": 0.4985, "step": 13712 }, { "epoch": 0.87, "grad_norm": 1.0235506296157837, "learning_rate": 4.4473744513581384e-07, "loss": 0.442, "step": 13713 }, { "epoch": 0.87, "grad_norm": 1.0120404958724976, "learning_rate": 4.443145337607624e-07, "loss": 0.4789, "step": 13714 }, { "epoch": 0.87, "grad_norm": 1.0483548641204834, "learning_rate": 4.4389181420977814e-07, "loss": 0.5485, "step": 13715 }, { "epoch": 0.87, "grad_norm": 1.004766583442688, "learning_rate": 4.4346928650065957e-07, "loss": 0.4707, "step": 13716 }, { "epoch": 0.87, "grad_norm": 1.0503003597259521, "learning_rate": 4.4304695065119807e-07, "loss": 0.4899, "step": 13717 }, { "epoch": 0.87, "grad_norm": 1.0432121753692627, "learning_rate": 4.4262480667917774e-07, "loss": 0.4798, "step": 13718 }, { "epoch": 0.87, "grad_norm": 1.0445311069488525, "learning_rate": 4.422028546023721e-07, "loss": 0.4877, "step": 13719 }, { "epoch": 0.87, "grad_norm": 1.067617654800415, "learning_rate": 4.4178109443855033e-07, "loss": 0.5021, "step": 13720 }, { "epoch": 0.87, "grad_norm": 1.034996747970581, "learning_rate": 4.4135952620546876e-07, "loss": 0.4994, "step": 13721 }, { "epoch": 0.87, "grad_norm": 1.0224899053573608, "learning_rate": 4.409381499208787e-07, "loss": 0.4401, "step": 13722 }, { "epoch": 0.87, "grad_norm": 1.0104342699050903, "learning_rate": 4.405169656025238e-07, "loss": 0.4929, "step": 13723 }, { "epoch": 0.87, "grad_norm": 1.1080906391143799, "learning_rate": 4.400959732681381e-07, "loss": 0.5192, "step": 13724 }, { "epoch": 0.87, "grad_norm": 1.0382052659988403, "learning_rate": 4.3967517293544814e-07, "loss": 0.4673, "step": 13725 }, { "epoch": 0.87, "grad_norm": 1.1179478168487549, "learning_rate": 4.3925456462217244e-07, "loss": 0.521, "step": 13726 }, { "epoch": 0.87, "grad_norm": 1.0882370471954346, "learning_rate": 4.3883414834602125e-07, "loss": 0.4925, "step": 13727 }, { "epoch": 0.87, "grad_norm": 1.0186153650283813, "learning_rate": 4.384139241246982e-07, "loss": 0.5467, "step": 13728 }, { "epoch": 0.87, "grad_norm": 1.0723274946212769, "learning_rate": 4.3799389197589525e-07, "loss": 0.5052, "step": 13729 }, { "epoch": 0.87, "grad_norm": 1.0477349758148193, "learning_rate": 4.375740519172994e-07, "loss": 0.5355, "step": 13730 }, { "epoch": 0.87, "grad_norm": 1.0270122289657593, "learning_rate": 4.3715440396658816e-07, "loss": 0.4898, "step": 13731 }, { "epoch": 0.87, "grad_norm": 1.1825838088989258, "learning_rate": 4.3673494814143234e-07, "loss": 0.5149, "step": 13732 }, { "epoch": 0.87, "grad_norm": 1.2040338516235352, "learning_rate": 4.3631568445949403e-07, "loss": 0.4735, "step": 13733 }, { "epoch": 0.87, "grad_norm": 1.0329917669296265, "learning_rate": 4.3589661293842624e-07, "loss": 0.4847, "step": 13734 }, { "epoch": 0.87, "grad_norm": 1.091172218322754, "learning_rate": 4.3547773359587377e-07, "loss": 0.5329, "step": 13735 }, { "epoch": 0.87, "grad_norm": 1.0232014656066895, "learning_rate": 4.350590464494764e-07, "loss": 0.4495, "step": 13736 }, { "epoch": 0.87, "grad_norm": 1.0511703491210938, "learning_rate": 4.346405515168617e-07, "loss": 0.487, "step": 13737 }, { "epoch": 0.87, "grad_norm": 1.158629298210144, "learning_rate": 4.342222488156511e-07, "loss": 0.522, "step": 13738 }, { "epoch": 0.87, "grad_norm": 1.0807676315307617, "learning_rate": 4.3380413836345893e-07, "loss": 0.5195, "step": 13739 }, { "epoch": 0.87, "grad_norm": 1.0331394672393799, "learning_rate": 4.333862201778899e-07, "loss": 0.4433, "step": 13740 }, { "epoch": 0.87, "grad_norm": 1.116027593612671, "learning_rate": 4.329684942765411e-07, "loss": 0.4983, "step": 13741 }, { "epoch": 0.87, "grad_norm": 1.1004745960235596, "learning_rate": 4.3255096067700176e-07, "loss": 0.4979, "step": 13742 }, { "epoch": 0.87, "grad_norm": 1.0236526727676392, "learning_rate": 4.321336193968523e-07, "loss": 0.5051, "step": 13743 }, { "epoch": 0.87, "grad_norm": 1.0533157587051392, "learning_rate": 4.3171647045366525e-07, "loss": 0.4792, "step": 13744 }, { "epoch": 0.87, "grad_norm": 1.0958071947097778, "learning_rate": 4.312995138650056e-07, "loss": 0.5173, "step": 13745 }, { "epoch": 0.87, "grad_norm": 0.9877141714096069, "learning_rate": 4.3088274964843027e-07, "loss": 0.474, "step": 13746 }, { "epoch": 0.87, "grad_norm": 1.1106044054031372, "learning_rate": 4.3046617782148857e-07, "loss": 0.5205, "step": 13747 }, { "epoch": 0.87, "grad_norm": 1.08500075340271, "learning_rate": 4.300497984017182e-07, "loss": 0.4695, "step": 13748 }, { "epoch": 0.87, "grad_norm": 1.0543922185897827, "learning_rate": 4.2963361140665405e-07, "loss": 0.4411, "step": 13749 }, { "epoch": 0.87, "grad_norm": 1.01895010471344, "learning_rate": 4.292176168538198e-07, "loss": 0.4968, "step": 13750 }, { "epoch": 0.87, "grad_norm": 1.195759654045105, "learning_rate": 4.2880181476073034e-07, "loss": 0.5137, "step": 13751 }, { "epoch": 0.87, "grad_norm": 1.080329179763794, "learning_rate": 4.283862051448945e-07, "loss": 0.5073, "step": 13752 }, { "epoch": 0.87, "grad_norm": 1.0563124418258667, "learning_rate": 4.279707880238121e-07, "loss": 0.4968, "step": 13753 }, { "epoch": 0.87, "grad_norm": 1.100950837135315, "learning_rate": 4.275555634149753e-07, "loss": 0.525, "step": 13754 }, { "epoch": 0.87, "grad_norm": 1.0707536935806274, "learning_rate": 4.2714053133586785e-07, "loss": 0.4671, "step": 13755 }, { "epoch": 0.87, "grad_norm": 1.0423051118850708, "learning_rate": 4.267256918039625e-07, "loss": 0.4598, "step": 13756 }, { "epoch": 0.87, "grad_norm": 0.9561262130737305, "learning_rate": 4.263110448367308e-07, "loss": 0.4481, "step": 13757 }, { "epoch": 0.87, "grad_norm": 1.06647789478302, "learning_rate": 4.2589659045163044e-07, "loss": 0.5527, "step": 13758 }, { "epoch": 0.87, "grad_norm": 1.132185697555542, "learning_rate": 4.254823286661125e-07, "loss": 0.5088, "step": 13759 }, { "epoch": 0.87, "grad_norm": 1.0688728094100952, "learning_rate": 4.250682594976191e-07, "loss": 0.4909, "step": 13760 }, { "epoch": 0.87, "grad_norm": 1.0723905563354492, "learning_rate": 4.2465438296358685e-07, "loss": 0.5008, "step": 13761 }, { "epoch": 0.87, "grad_norm": 1.0533455610275269, "learning_rate": 4.2424069908144236e-07, "loss": 0.4684, "step": 13762 }, { "epoch": 0.87, "grad_norm": 1.1853251457214355, "learning_rate": 4.2382720786860453e-07, "loss": 0.5125, "step": 13763 }, { "epoch": 0.87, "grad_norm": 1.1141843795776367, "learning_rate": 4.2341390934248273e-07, "loss": 0.477, "step": 13764 }, { "epoch": 0.87, "grad_norm": 1.0213943719863892, "learning_rate": 4.230008035204797e-07, "loss": 0.5005, "step": 13765 }, { "epoch": 0.87, "grad_norm": 0.955684244632721, "learning_rate": 4.225878904199926e-07, "loss": 0.4232, "step": 13766 }, { "epoch": 0.87, "grad_norm": 1.043104648590088, "learning_rate": 4.2217517005840423e-07, "loss": 0.4865, "step": 13767 }, { "epoch": 0.87, "grad_norm": 1.0324668884277344, "learning_rate": 4.2176264245309517e-07, "loss": 0.5011, "step": 13768 }, { "epoch": 0.87, "grad_norm": 1.0168111324310303, "learning_rate": 4.2135030762143424e-07, "loss": 0.4794, "step": 13769 }, { "epoch": 0.87, "grad_norm": 1.0939329862594604, "learning_rate": 4.2093816558078373e-07, "loss": 0.571, "step": 13770 }, { "epoch": 0.87, "grad_norm": 1.0314226150512695, "learning_rate": 4.205262163484991e-07, "loss": 0.4627, "step": 13771 }, { "epoch": 0.87, "grad_norm": 1.0353028774261475, "learning_rate": 4.2011445994192324e-07, "loss": 0.4867, "step": 13772 }, { "epoch": 0.87, "grad_norm": 1.1914775371551514, "learning_rate": 4.1970289637839556e-07, "loss": 0.499, "step": 13773 }, { "epoch": 0.87, "grad_norm": 1.0128769874572754, "learning_rate": 4.19291525675245e-07, "loss": 0.4959, "step": 13774 }, { "epoch": 0.87, "grad_norm": 1.2244237661361694, "learning_rate": 4.1888034784979326e-07, "loss": 0.5313, "step": 13775 }, { "epoch": 0.87, "grad_norm": 0.9951699376106262, "learning_rate": 4.184693629193537e-07, "loss": 0.5494, "step": 13776 }, { "epoch": 0.87, "grad_norm": 1.0847532749176025, "learning_rate": 4.180585709012319e-07, "loss": 0.5002, "step": 13777 }, { "epoch": 0.87, "grad_norm": 1.0222808122634888, "learning_rate": 4.1764797181272296e-07, "loss": 0.506, "step": 13778 }, { "epoch": 0.87, "grad_norm": 1.0069555044174194, "learning_rate": 4.172375656711181e-07, "loss": 0.4506, "step": 13779 }, { "epoch": 0.87, "grad_norm": 1.0545144081115723, "learning_rate": 4.1682735249369663e-07, "loss": 0.5265, "step": 13780 }, { "epoch": 0.87, "grad_norm": 1.0830049514770508, "learning_rate": 4.1641733229773163e-07, "loss": 0.4831, "step": 13781 }, { "epoch": 0.87, "grad_norm": 1.0026416778564453, "learning_rate": 4.1600750510048805e-07, "loss": 0.5108, "step": 13782 }, { "epoch": 0.87, "grad_norm": 1.0848588943481445, "learning_rate": 4.1559787091922153e-07, "loss": 0.5119, "step": 13783 }, { "epoch": 0.87, "grad_norm": 1.0871912240982056, "learning_rate": 4.151884297711806e-07, "loss": 0.4547, "step": 13784 }, { "epoch": 0.87, "grad_norm": 1.0597244501113892, "learning_rate": 4.147791816736063e-07, "loss": 0.4979, "step": 13785 }, { "epoch": 0.87, "grad_norm": 1.0201863050460815, "learning_rate": 4.143701266437283e-07, "loss": 0.4842, "step": 13786 }, { "epoch": 0.87, "grad_norm": 1.080246925354004, "learning_rate": 4.139612646987734e-07, "loss": 0.5132, "step": 13787 }, { "epoch": 0.87, "grad_norm": 1.1408929824829102, "learning_rate": 4.135525958559555e-07, "loss": 0.5508, "step": 13788 }, { "epoch": 0.87, "grad_norm": 0.9834961295127869, "learning_rate": 4.131441201324826e-07, "loss": 0.4859, "step": 13789 }, { "epoch": 0.87, "grad_norm": 1.138939380645752, "learning_rate": 4.1273583754555424e-07, "loss": 0.4789, "step": 13790 }, { "epoch": 0.87, "grad_norm": 1.0835551023483276, "learning_rate": 4.123277481123622e-07, "loss": 0.4838, "step": 13791 }, { "epoch": 0.87, "grad_norm": 0.9488256573677063, "learning_rate": 4.1191985185008887e-07, "loss": 0.4945, "step": 13792 }, { "epoch": 0.87, "grad_norm": 1.041617512702942, "learning_rate": 4.1151214877591105e-07, "loss": 0.4989, "step": 13793 }, { "epoch": 0.87, "grad_norm": 1.108404278755188, "learning_rate": 4.1110463890699336e-07, "loss": 0.5205, "step": 13794 }, { "epoch": 0.87, "grad_norm": 1.07442045211792, "learning_rate": 4.1069732226049484e-07, "loss": 0.5459, "step": 13795 }, { "epoch": 0.87, "grad_norm": 1.000274419784546, "learning_rate": 4.102901988535685e-07, "loss": 0.4816, "step": 13796 }, { "epoch": 0.87, "grad_norm": 1.0229767560958862, "learning_rate": 4.0988326870335494e-07, "loss": 0.4857, "step": 13797 }, { "epoch": 0.87, "grad_norm": 1.0853655338287354, "learning_rate": 4.0947653182698887e-07, "loss": 0.5183, "step": 13798 }, { "epoch": 0.87, "grad_norm": 1.1095023155212402, "learning_rate": 4.0906998824159715e-07, "loss": 0.5336, "step": 13799 }, { "epoch": 0.87, "grad_norm": 1.0596293210983276, "learning_rate": 4.086636379642972e-07, "loss": 0.5024, "step": 13800 }, { "epoch": 0.87, "grad_norm": 1.1000126600265503, "learning_rate": 4.0825748101220087e-07, "loss": 0.5201, "step": 13801 }, { "epoch": 0.87, "grad_norm": 1.0342031717300415, "learning_rate": 4.078515174024067e-07, "loss": 0.5001, "step": 13802 }, { "epoch": 0.87, "grad_norm": 1.0115686655044556, "learning_rate": 4.074457471520099e-07, "loss": 0.5209, "step": 13803 }, { "epoch": 0.87, "grad_norm": 1.0109944343566895, "learning_rate": 4.0704017027809797e-07, "loss": 0.5039, "step": 13804 }, { "epoch": 0.87, "grad_norm": 1.0330100059509277, "learning_rate": 4.0663478679774604e-07, "loss": 0.4766, "step": 13805 }, { "epoch": 0.87, "grad_norm": 1.0435158014297485, "learning_rate": 4.062295967280239e-07, "loss": 0.4726, "step": 13806 }, { "epoch": 0.87, "grad_norm": 0.9838164448738098, "learning_rate": 4.058246000859939e-07, "loss": 0.5299, "step": 13807 }, { "epoch": 0.87, "grad_norm": 1.0656788349151611, "learning_rate": 4.054197968887064e-07, "loss": 0.4478, "step": 13808 }, { "epoch": 0.87, "grad_norm": 1.037368893623352, "learning_rate": 4.0501518715320933e-07, "loss": 0.4994, "step": 13809 }, { "epoch": 0.87, "grad_norm": 1.0878825187683105, "learning_rate": 4.046107708965369e-07, "loss": 0.527, "step": 13810 }, { "epoch": 0.87, "grad_norm": 1.0550358295440674, "learning_rate": 4.042065481357188e-07, "loss": 0.5076, "step": 13811 }, { "epoch": 0.88, "grad_norm": 1.0873349905014038, "learning_rate": 4.038025188877753e-07, "loss": 0.5078, "step": 13812 }, { "epoch": 0.88, "grad_norm": 1.109261155128479, "learning_rate": 4.03398683169719e-07, "loss": 0.5092, "step": 13813 }, { "epoch": 0.88, "grad_norm": 1.1236189603805542, "learning_rate": 4.029950409985539e-07, "loss": 0.5157, "step": 13814 }, { "epoch": 0.88, "grad_norm": 1.1359589099884033, "learning_rate": 4.0259159239127656e-07, "loss": 0.5106, "step": 13815 }, { "epoch": 0.88, "grad_norm": 0.962879478931427, "learning_rate": 4.021883373648722e-07, "loss": 0.4692, "step": 13816 }, { "epoch": 0.88, "grad_norm": 0.9297695755958557, "learning_rate": 4.017852759363239e-07, "loss": 0.3834, "step": 13817 }, { "epoch": 0.88, "grad_norm": 1.05164635181427, "learning_rate": 4.013824081226009e-07, "loss": 0.5041, "step": 13818 }, { "epoch": 0.88, "grad_norm": 1.0964182615280151, "learning_rate": 4.009797339406674e-07, "loss": 0.4969, "step": 13819 }, { "epoch": 0.88, "grad_norm": 1.0226235389709473, "learning_rate": 4.005772534074792e-07, "loss": 0.493, "step": 13820 }, { "epoch": 0.88, "grad_norm": 1.1254289150238037, "learning_rate": 4.001749665399807e-07, "loss": 0.5216, "step": 13821 }, { "epoch": 0.88, "grad_norm": 1.0507532358169556, "learning_rate": 3.997728733551137e-07, "loss": 0.4617, "step": 13822 }, { "epoch": 0.88, "grad_norm": 1.0152353048324585, "learning_rate": 3.993709738698093e-07, "loss": 0.5004, "step": 13823 }, { "epoch": 0.88, "grad_norm": 1.008558988571167, "learning_rate": 3.989692681009877e-07, "loss": 0.5099, "step": 13824 }, { "epoch": 0.88, "grad_norm": 1.0981887578964233, "learning_rate": 3.985677560655643e-07, "loss": 0.5035, "step": 13825 }, { "epoch": 0.88, "grad_norm": 1.0890809297561646, "learning_rate": 3.9816643778044506e-07, "loss": 0.5457, "step": 13826 }, { "epoch": 0.88, "grad_norm": 1.0290725231170654, "learning_rate": 3.977653132625292e-07, "loss": 0.4723, "step": 13827 }, { "epoch": 0.88, "grad_norm": 1.0328872203826904, "learning_rate": 3.9736438252870655e-07, "loss": 0.5104, "step": 13828 }, { "epoch": 0.88, "grad_norm": 1.0733381509780884, "learning_rate": 3.969636455958564e-07, "loss": 0.52, "step": 13829 }, { "epoch": 0.88, "grad_norm": 0.997137188911438, "learning_rate": 3.965631024808553e-07, "loss": 0.526, "step": 13830 }, { "epoch": 0.88, "grad_norm": 1.0118529796600342, "learning_rate": 3.961627532005691e-07, "loss": 0.4546, "step": 13831 }, { "epoch": 0.88, "grad_norm": 1.021236777305603, "learning_rate": 3.957625977718527e-07, "loss": 0.497, "step": 13832 }, { "epoch": 0.88, "grad_norm": 1.0267452001571655, "learning_rate": 3.953626362115559e-07, "loss": 0.49, "step": 13833 }, { "epoch": 0.88, "grad_norm": 1.10640549659729, "learning_rate": 3.949628685365203e-07, "loss": 0.5001, "step": 13834 }, { "epoch": 0.88, "grad_norm": 1.0515050888061523, "learning_rate": 3.945632947635791e-07, "loss": 0.4786, "step": 13835 }, { "epoch": 0.88, "grad_norm": 1.076259970664978, "learning_rate": 3.941639149095566e-07, "loss": 0.5328, "step": 13836 }, { "epoch": 0.88, "grad_norm": 0.9872125387191772, "learning_rate": 3.9376472899126884e-07, "loss": 0.4885, "step": 13837 }, { "epoch": 0.88, "grad_norm": 1.0869286060333252, "learning_rate": 3.933657370255228e-07, "loss": 0.5239, "step": 13838 }, { "epoch": 0.88, "grad_norm": 1.1174988746643066, "learning_rate": 3.9296693902912244e-07, "loss": 0.5198, "step": 13839 }, { "epoch": 0.88, "grad_norm": 1.062361478805542, "learning_rate": 3.9256833501885693e-07, "loss": 0.5302, "step": 13840 }, { "epoch": 0.88, "grad_norm": 1.0280914306640625, "learning_rate": 3.9216992501151074e-07, "loss": 0.4902, "step": 13841 }, { "epoch": 0.88, "grad_norm": 1.0504766702651978, "learning_rate": 3.917717090238593e-07, "loss": 0.5444, "step": 13842 }, { "epoch": 0.88, "grad_norm": 1.0632145404815674, "learning_rate": 3.913736870726703e-07, "loss": 0.4899, "step": 13843 }, { "epoch": 0.88, "grad_norm": 1.0664408206939697, "learning_rate": 3.909758591747037e-07, "loss": 0.5637, "step": 13844 }, { "epoch": 0.88, "grad_norm": 1.01212739944458, "learning_rate": 3.905782253467094e-07, "loss": 0.4518, "step": 13845 }, { "epoch": 0.88, "grad_norm": 1.0291129350662231, "learning_rate": 3.9018078560543015e-07, "loss": 0.5055, "step": 13846 }, { "epoch": 0.88, "grad_norm": 1.1620970964431763, "learning_rate": 3.8978353996760365e-07, "loss": 0.4721, "step": 13847 }, { "epoch": 0.88, "grad_norm": 0.9888899922370911, "learning_rate": 3.8938648844995374e-07, "loss": 0.4705, "step": 13848 }, { "epoch": 0.88, "grad_norm": 1.0864684581756592, "learning_rate": 3.889896310691993e-07, "loss": 0.5069, "step": 13849 }, { "epoch": 0.88, "grad_norm": 1.1696125268936157, "learning_rate": 3.885929678420508e-07, "loss": 0.5196, "step": 13850 }, { "epoch": 0.88, "grad_norm": 1.006962537765503, "learning_rate": 3.881964987852105e-07, "loss": 0.4866, "step": 13851 }, { "epoch": 0.88, "grad_norm": 1.1452676057815552, "learning_rate": 3.878002239153739e-07, "loss": 0.5542, "step": 13852 }, { "epoch": 0.88, "grad_norm": 1.0906898975372314, "learning_rate": 3.874041432492237e-07, "loss": 0.5263, "step": 13853 }, { "epoch": 0.88, "grad_norm": 1.1190228462219238, "learning_rate": 3.870082568034389e-07, "loss": 0.5016, "step": 13854 }, { "epoch": 0.88, "grad_norm": 1.0661256313323975, "learning_rate": 3.866125645946894e-07, "loss": 0.4567, "step": 13855 }, { "epoch": 0.88, "grad_norm": 1.0436792373657227, "learning_rate": 3.862170666396359e-07, "loss": 0.498, "step": 13856 }, { "epoch": 0.88, "grad_norm": 0.9427409172058105, "learning_rate": 3.858217629549316e-07, "loss": 0.4355, "step": 13857 }, { "epoch": 0.88, "grad_norm": 1.1033661365509033, "learning_rate": 3.8542665355722154e-07, "loss": 0.4829, "step": 13858 }, { "epoch": 0.88, "grad_norm": 0.9851303100585938, "learning_rate": 3.8503173846314137e-07, "loss": 0.4218, "step": 13859 }, { "epoch": 0.88, "grad_norm": 1.0885196924209595, "learning_rate": 3.846370176893205e-07, "loss": 0.5067, "step": 13860 }, { "epoch": 0.88, "grad_norm": 1.0836544036865234, "learning_rate": 3.8424249125238065e-07, "loss": 0.5167, "step": 13861 }, { "epoch": 0.88, "grad_norm": 1.0435181856155396, "learning_rate": 3.838481591689308e-07, "loss": 0.487, "step": 13862 }, { "epoch": 0.88, "grad_norm": 1.1410058736801147, "learning_rate": 3.834540214555771e-07, "loss": 0.5455, "step": 13863 }, { "epoch": 0.88, "grad_norm": 0.9805917143821716, "learning_rate": 3.830600781289151e-07, "loss": 0.4689, "step": 13864 }, { "epoch": 0.88, "grad_norm": 1.0559532642364502, "learning_rate": 3.826663292055316e-07, "loss": 0.4885, "step": 13865 }, { "epoch": 0.88, "grad_norm": 1.0752135515213013, "learning_rate": 3.822727747020072e-07, "loss": 0.5398, "step": 13866 }, { "epoch": 0.88, "grad_norm": 1.1543159484863281, "learning_rate": 3.818794146349114e-07, "loss": 0.5181, "step": 13867 }, { "epoch": 0.88, "grad_norm": 1.0757665634155273, "learning_rate": 3.8148624902080764e-07, "loss": 0.4785, "step": 13868 }, { "epoch": 0.88, "grad_norm": 1.0365145206451416, "learning_rate": 3.8109327787625273e-07, "loss": 0.5054, "step": 13869 }, { "epoch": 0.88, "grad_norm": 1.090109944343567, "learning_rate": 3.807005012177911e-07, "loss": 0.4763, "step": 13870 }, { "epoch": 0.88, "grad_norm": 1.19999361038208, "learning_rate": 3.803079190619624e-07, "loss": 0.5465, "step": 13871 }, { "epoch": 0.88, "grad_norm": 1.0682469606399536, "learning_rate": 3.7991553142529616e-07, "loss": 0.4847, "step": 13872 }, { "epoch": 0.88, "grad_norm": 1.0509440898895264, "learning_rate": 3.7952333832431466e-07, "loss": 0.4749, "step": 13873 }, { "epoch": 0.88, "grad_norm": 1.0399044752120972, "learning_rate": 3.7913133977553306e-07, "loss": 0.5162, "step": 13874 }, { "epoch": 0.88, "grad_norm": 1.0946909189224243, "learning_rate": 3.7873953579545486e-07, "loss": 0.4957, "step": 13875 }, { "epoch": 0.88, "grad_norm": 1.099408745765686, "learning_rate": 3.783479264005779e-07, "loss": 0.4898, "step": 13876 }, { "epoch": 0.88, "grad_norm": 1.0778411626815796, "learning_rate": 3.779565116073941e-07, "loss": 0.4739, "step": 13877 }, { "epoch": 0.88, "grad_norm": 1.0826133489608765, "learning_rate": 3.775652914323813e-07, "loss": 0.5707, "step": 13878 }, { "epoch": 0.88, "grad_norm": 1.1412365436553955, "learning_rate": 3.771742658920141e-07, "loss": 0.5085, "step": 13879 }, { "epoch": 0.88, "grad_norm": 0.9909276366233826, "learning_rate": 3.767834350027572e-07, "loss": 0.4776, "step": 13880 }, { "epoch": 0.88, "grad_norm": 1.0669307708740234, "learning_rate": 3.7639279878106616e-07, "loss": 0.5215, "step": 13881 }, { "epoch": 0.88, "grad_norm": 1.0728931427001953, "learning_rate": 3.7600235724339127e-07, "loss": 0.5108, "step": 13882 }, { "epoch": 0.88, "grad_norm": 1.0628244876861572, "learning_rate": 3.756121104061705e-07, "loss": 0.4737, "step": 13883 }, { "epoch": 0.88, "grad_norm": 1.0442898273468018, "learning_rate": 3.752220582858368e-07, "loss": 0.4864, "step": 13884 }, { "epoch": 0.88, "grad_norm": 1.0579360723495483, "learning_rate": 3.748322008988137e-07, "loss": 0.5235, "step": 13885 }, { "epoch": 0.88, "grad_norm": 1.1031591892242432, "learning_rate": 3.744425382615169e-07, "loss": 0.5246, "step": 13886 }, { "epoch": 0.88, "grad_norm": 1.1032261848449707, "learning_rate": 3.7405307039035387e-07, "loss": 0.5555, "step": 13887 }, { "epoch": 0.88, "grad_norm": 1.0538815259933472, "learning_rate": 3.7366379730172376e-07, "loss": 0.539, "step": 13888 }, { "epoch": 0.88, "grad_norm": 1.0281118154525757, "learning_rate": 3.732747190120162e-07, "loss": 0.5217, "step": 13889 }, { "epoch": 0.88, "grad_norm": 1.055055022239685, "learning_rate": 3.728858355376164e-07, "loss": 0.5277, "step": 13890 }, { "epoch": 0.88, "grad_norm": 1.0404201745986938, "learning_rate": 3.724971468948968e-07, "loss": 0.5128, "step": 13891 }, { "epoch": 0.88, "grad_norm": 1.0271530151367188, "learning_rate": 3.721086531002244e-07, "loss": 0.5059, "step": 13892 }, { "epoch": 0.88, "grad_norm": 1.06915283203125, "learning_rate": 3.7172035416995765e-07, "loss": 0.5106, "step": 13893 }, { "epoch": 0.88, "grad_norm": 1.000438928604126, "learning_rate": 3.7133225012044585e-07, "loss": 0.4452, "step": 13894 }, { "epoch": 0.88, "grad_norm": 1.0894824266433716, "learning_rate": 3.709443409680308e-07, "loss": 0.4878, "step": 13895 }, { "epoch": 0.88, "grad_norm": 1.0660288333892822, "learning_rate": 3.7055662672904723e-07, "loss": 0.495, "step": 13896 }, { "epoch": 0.88, "grad_norm": 1.0462357997894287, "learning_rate": 3.7016910741981825e-07, "loss": 0.5012, "step": 13897 }, { "epoch": 0.88, "grad_norm": 0.9886908531188965, "learning_rate": 3.6978178305666357e-07, "loss": 0.4258, "step": 13898 }, { "epoch": 0.88, "grad_norm": 1.0848771333694458, "learning_rate": 3.693946536558896e-07, "loss": 0.4953, "step": 13899 }, { "epoch": 0.88, "grad_norm": 1.1187751293182373, "learning_rate": 3.6900771923379817e-07, "loss": 0.5036, "step": 13900 }, { "epoch": 0.88, "grad_norm": 1.0687930583953857, "learning_rate": 3.6862097980668255e-07, "loss": 0.5525, "step": 13901 }, { "epoch": 0.88, "grad_norm": 1.0529654026031494, "learning_rate": 3.68234435390824e-07, "loss": 0.5304, "step": 13902 }, { "epoch": 0.88, "grad_norm": 1.1089003086090088, "learning_rate": 3.6784808600250186e-07, "loss": 0.5179, "step": 13903 }, { "epoch": 0.88, "grad_norm": 1.1205693483352661, "learning_rate": 3.674619316579836e-07, "loss": 0.5054, "step": 13904 }, { "epoch": 0.88, "grad_norm": 1.0788072347640991, "learning_rate": 3.670759723735273e-07, "loss": 0.4904, "step": 13905 }, { "epoch": 0.88, "grad_norm": 1.0418163537979126, "learning_rate": 3.666902081653845e-07, "loss": 0.4827, "step": 13906 }, { "epoch": 0.88, "grad_norm": 1.1107317209243774, "learning_rate": 3.663046390497993e-07, "loss": 0.5121, "step": 13907 }, { "epoch": 0.88, "grad_norm": 1.0846924781799316, "learning_rate": 3.659192650430066e-07, "loss": 0.5148, "step": 13908 }, { "epoch": 0.88, "grad_norm": 1.0620695352554321, "learning_rate": 3.655340861612333e-07, "loss": 0.4897, "step": 13909 }, { "epoch": 0.88, "grad_norm": 0.9974926710128784, "learning_rate": 3.6514910242069547e-07, "loss": 0.4973, "step": 13910 }, { "epoch": 0.88, "grad_norm": 1.0115067958831787, "learning_rate": 3.647643138376067e-07, "loss": 0.486, "step": 13911 }, { "epoch": 0.88, "grad_norm": 1.189638376235962, "learning_rate": 3.6437972042816904e-07, "loss": 0.5415, "step": 13912 }, { "epoch": 0.88, "grad_norm": 1.0261467695236206, "learning_rate": 3.6399532220857403e-07, "loss": 0.4823, "step": 13913 }, { "epoch": 0.88, "grad_norm": 1.1787561178207397, "learning_rate": 3.6361111919500815e-07, "loss": 0.5182, "step": 13914 }, { "epoch": 0.88, "grad_norm": 1.0854473114013672, "learning_rate": 3.6322711140364953e-07, "loss": 0.5527, "step": 13915 }, { "epoch": 0.88, "grad_norm": 1.0096983909606934, "learning_rate": 3.628432988506675e-07, "loss": 0.4698, "step": 13916 }, { "epoch": 0.88, "grad_norm": 1.1122733354568481, "learning_rate": 3.6245968155222243e-07, "loss": 0.4852, "step": 13917 }, { "epoch": 0.88, "grad_norm": 1.03513765335083, "learning_rate": 3.6207625952446756e-07, "loss": 0.5188, "step": 13918 }, { "epoch": 0.88, "grad_norm": 1.0341960191726685, "learning_rate": 3.616930327835466e-07, "loss": 0.46, "step": 13919 }, { "epoch": 0.88, "grad_norm": 1.0552611351013184, "learning_rate": 3.613100013455972e-07, "loss": 0.5315, "step": 13920 }, { "epoch": 0.88, "grad_norm": 1.1298288106918335, "learning_rate": 3.609271652267465e-07, "loss": 0.5213, "step": 13921 }, { "epoch": 0.88, "grad_norm": 1.1473060846328735, "learning_rate": 3.6054452444311493e-07, "loss": 0.5293, "step": 13922 }, { "epoch": 0.88, "grad_norm": 0.9427440762519836, "learning_rate": 3.601620790108135e-07, "loss": 0.4833, "step": 13923 }, { "epoch": 0.88, "grad_norm": 1.0919525623321533, "learning_rate": 3.597798289459464e-07, "loss": 0.5746, "step": 13924 }, { "epoch": 0.88, "grad_norm": 1.0796968936920166, "learning_rate": 3.593977742646088e-07, "loss": 0.4685, "step": 13925 }, { "epoch": 0.88, "grad_norm": 1.0801947116851807, "learning_rate": 3.5901591498288755e-07, "loss": 0.4749, "step": 13926 }, { "epoch": 0.88, "grad_norm": 1.0195661783218384, "learning_rate": 3.5863425111686e-07, "loss": 0.4622, "step": 13927 }, { "epoch": 0.88, "grad_norm": 1.1256657838821411, "learning_rate": 3.5825278268259987e-07, "loss": 0.5211, "step": 13928 }, { "epoch": 0.88, "grad_norm": 1.0019874572753906, "learning_rate": 3.5787150969616657e-07, "loss": 0.4866, "step": 13929 }, { "epoch": 0.88, "grad_norm": 1.0935301780700684, "learning_rate": 3.57490432173615e-07, "loss": 0.5172, "step": 13930 }, { "epoch": 0.88, "grad_norm": 1.0015305280685425, "learning_rate": 3.5710955013099233e-07, "loss": 0.4644, "step": 13931 }, { "epoch": 0.88, "grad_norm": 1.057268500328064, "learning_rate": 3.5672886358433356e-07, "loss": 0.4947, "step": 13932 }, { "epoch": 0.88, "grad_norm": 1.059360384941101, "learning_rate": 3.5634837254967023e-07, "loss": 0.4757, "step": 13933 }, { "epoch": 0.88, "grad_norm": 1.0322678089141846, "learning_rate": 3.559680770430235e-07, "loss": 0.4716, "step": 13934 }, { "epoch": 0.88, "grad_norm": 1.0753318071365356, "learning_rate": 3.555879770804049e-07, "loss": 0.5344, "step": 13935 }, { "epoch": 0.88, "grad_norm": 1.0942437648773193, "learning_rate": 3.5520807267782007e-07, "loss": 0.4606, "step": 13936 }, { "epoch": 0.88, "grad_norm": 1.1344181299209595, "learning_rate": 3.548283638512651e-07, "loss": 0.521, "step": 13937 }, { "epoch": 0.88, "grad_norm": 1.02778959274292, "learning_rate": 3.544488506167282e-07, "loss": 0.5025, "step": 13938 }, { "epoch": 0.88, "grad_norm": 1.0618828535079956, "learning_rate": 3.5406953299019056e-07, "loss": 0.4692, "step": 13939 }, { "epoch": 0.88, "grad_norm": 1.0954022407531738, "learning_rate": 3.5369041098762103e-07, "loss": 0.512, "step": 13940 }, { "epoch": 0.88, "grad_norm": 1.1816658973693848, "learning_rate": 3.5331148462498635e-07, "loss": 0.5178, "step": 13941 }, { "epoch": 0.88, "grad_norm": 1.0592976808547974, "learning_rate": 3.529327539182403e-07, "loss": 0.4907, "step": 13942 }, { "epoch": 0.88, "grad_norm": 1.006933569908142, "learning_rate": 3.5255421888332976e-07, "loss": 0.4825, "step": 13943 }, { "epoch": 0.88, "grad_norm": 0.9933514595031738, "learning_rate": 3.5217587953619404e-07, "loss": 0.475, "step": 13944 }, { "epoch": 0.88, "grad_norm": 1.00538170337677, "learning_rate": 3.517977358927632e-07, "loss": 0.481, "step": 13945 }, { "epoch": 0.88, "grad_norm": 1.0523886680603027, "learning_rate": 3.514197879689596e-07, "loss": 0.5091, "step": 13946 }, { "epoch": 0.88, "grad_norm": 1.0707453489303589, "learning_rate": 3.5104203578069817e-07, "loss": 0.5074, "step": 13947 }, { "epoch": 0.88, "grad_norm": 1.0507975816726685, "learning_rate": 3.506644793438835e-07, "loss": 0.4826, "step": 13948 }, { "epoch": 0.88, "grad_norm": 1.1410112380981445, "learning_rate": 3.502871186744128e-07, "loss": 0.5147, "step": 13949 }, { "epoch": 0.88, "grad_norm": 1.0179779529571533, "learning_rate": 3.499099537881784e-07, "loss": 0.5329, "step": 13950 }, { "epoch": 0.88, "grad_norm": 1.163852572441101, "learning_rate": 3.495329847010581e-07, "loss": 0.5019, "step": 13951 }, { "epoch": 0.88, "grad_norm": 1.048890233039856, "learning_rate": 3.4915621142892595e-07, "loss": 0.5094, "step": 13952 }, { "epoch": 0.88, "grad_norm": 1.1120535135269165, "learning_rate": 3.48779633987647e-07, "loss": 0.5243, "step": 13953 }, { "epoch": 0.88, "grad_norm": 1.0523576736450195, "learning_rate": 3.4840325239307693e-07, "loss": 0.5379, "step": 13954 }, { "epoch": 0.88, "grad_norm": 1.0382822751998901, "learning_rate": 3.4802706666106525e-07, "loss": 0.4986, "step": 13955 }, { "epoch": 0.88, "grad_norm": 1.141065239906311, "learning_rate": 3.476510768074498e-07, "loss": 0.4903, "step": 13956 }, { "epoch": 0.88, "grad_norm": 1.0527931451797485, "learning_rate": 3.4727528284806247e-07, "loss": 0.5229, "step": 13957 }, { "epoch": 0.88, "grad_norm": 1.133631944656372, "learning_rate": 3.468996847987288e-07, "loss": 0.4652, "step": 13958 }, { "epoch": 0.88, "grad_norm": 1.1136630773544312, "learning_rate": 3.4652428267526184e-07, "loss": 0.577, "step": 13959 }, { "epoch": 0.88, "grad_norm": 1.0631787776947021, "learning_rate": 3.4614907649346884e-07, "loss": 0.4837, "step": 13960 }, { "epoch": 0.88, "grad_norm": 1.162307858467102, "learning_rate": 3.4577406626914947e-07, "loss": 0.5281, "step": 13961 }, { "epoch": 0.88, "grad_norm": 1.093318223953247, "learning_rate": 3.453992520180921e-07, "loss": 0.5125, "step": 13962 }, { "epoch": 0.88, "grad_norm": 1.0075652599334717, "learning_rate": 3.4502463375608143e-07, "loss": 0.4865, "step": 13963 }, { "epoch": 0.88, "grad_norm": 1.0205689668655396, "learning_rate": 3.446502114988892e-07, "loss": 0.4908, "step": 13964 }, { "epoch": 0.88, "grad_norm": 1.1372061967849731, "learning_rate": 3.442759852622812e-07, "loss": 0.5417, "step": 13965 }, { "epoch": 0.88, "grad_norm": 0.9880989193916321, "learning_rate": 3.4390195506201594e-07, "loss": 0.4487, "step": 13966 }, { "epoch": 0.88, "grad_norm": 1.061226487159729, "learning_rate": 3.435281209138419e-07, "loss": 0.5025, "step": 13967 }, { "epoch": 0.88, "grad_norm": 1.071082592010498, "learning_rate": 3.4315448283349985e-07, "loss": 0.5042, "step": 13968 }, { "epoch": 0.88, "grad_norm": 1.08926260471344, "learning_rate": 3.4278104083672383e-07, "loss": 0.5171, "step": 13969 }, { "epoch": 0.89, "grad_norm": 1.0360966920852661, "learning_rate": 3.424077949392346e-07, "loss": 0.4877, "step": 13970 }, { "epoch": 0.89, "grad_norm": 1.0709553956985474, "learning_rate": 3.4203474515675293e-07, "loss": 0.5125, "step": 13971 }, { "epoch": 0.89, "grad_norm": 1.0262420177459717, "learning_rate": 3.4166189150498297e-07, "loss": 0.5018, "step": 13972 }, { "epoch": 0.89, "grad_norm": 1.0908358097076416, "learning_rate": 3.4128923399962543e-07, "loss": 0.4687, "step": 13973 }, { "epoch": 0.89, "grad_norm": 1.0744664669036865, "learning_rate": 3.4091677265637224e-07, "loss": 0.556, "step": 13974 }, { "epoch": 0.89, "grad_norm": 1.0527634620666504, "learning_rate": 3.405445074909053e-07, "loss": 0.4932, "step": 13975 }, { "epoch": 0.89, "grad_norm": 1.0401239395141602, "learning_rate": 3.401724385189009e-07, "loss": 0.462, "step": 13976 }, { "epoch": 0.89, "grad_norm": 1.0116753578186035, "learning_rate": 3.398005657560249e-07, "loss": 0.4909, "step": 13977 }, { "epoch": 0.89, "grad_norm": 1.0280588865280151, "learning_rate": 3.394288892179348e-07, "loss": 0.4832, "step": 13978 }, { "epoch": 0.89, "grad_norm": 0.989622175693512, "learning_rate": 3.390574089202814e-07, "loss": 0.5134, "step": 13979 }, { "epoch": 0.89, "grad_norm": 1.0852323770523071, "learning_rate": 3.3868612487870657e-07, "loss": 0.5206, "step": 13980 }, { "epoch": 0.89, "grad_norm": 1.0556645393371582, "learning_rate": 3.3831503710884286e-07, "loss": 0.4992, "step": 13981 }, { "epoch": 0.89, "grad_norm": 1.13899564743042, "learning_rate": 3.379441456263166e-07, "loss": 0.5275, "step": 13982 }, { "epoch": 0.89, "grad_norm": 0.9144753813743591, "learning_rate": 3.375734504467437e-07, "loss": 0.4646, "step": 13983 }, { "epoch": 0.89, "grad_norm": 0.9743185639381409, "learning_rate": 3.372029515857339e-07, "loss": 0.495, "step": 13984 }, { "epoch": 0.89, "grad_norm": 1.0322911739349365, "learning_rate": 3.368326490588875e-07, "loss": 0.4807, "step": 13985 }, { "epoch": 0.89, "grad_norm": 1.016212821006775, "learning_rate": 3.364625428817958e-07, "loss": 0.4527, "step": 13986 }, { "epoch": 0.89, "grad_norm": 1.1167676448822021, "learning_rate": 3.360926330700431e-07, "loss": 0.505, "step": 13987 }, { "epoch": 0.89, "grad_norm": 1.0469144582748413, "learning_rate": 3.3572291963920536e-07, "loss": 0.4573, "step": 13988 }, { "epoch": 0.89, "grad_norm": 1.0507501363754272, "learning_rate": 3.353534026048494e-07, "loss": 0.5354, "step": 13989 }, { "epoch": 0.89, "grad_norm": 1.0410934686660767, "learning_rate": 3.3498408198253453e-07, "loss": 0.4821, "step": 13990 }, { "epoch": 0.89, "grad_norm": 0.9763138890266418, "learning_rate": 3.3461495778781104e-07, "loss": 0.4706, "step": 13991 }, { "epoch": 0.89, "grad_norm": 0.9764253497123718, "learning_rate": 3.342460300362227e-07, "loss": 0.4674, "step": 13992 }, { "epoch": 0.89, "grad_norm": 0.9839788675308228, "learning_rate": 3.3387729874330367e-07, "loss": 0.4861, "step": 13993 }, { "epoch": 0.89, "grad_norm": 1.0251315832138062, "learning_rate": 3.335087639245782e-07, "loss": 0.5231, "step": 13994 }, { "epoch": 0.89, "grad_norm": 1.0381351709365845, "learning_rate": 3.331404255955656e-07, "loss": 0.4944, "step": 13995 }, { "epoch": 0.89, "grad_norm": 1.0450767278671265, "learning_rate": 3.327722837717745e-07, "loss": 0.4844, "step": 13996 }, { "epoch": 0.89, "grad_norm": 1.0122859477996826, "learning_rate": 3.32404338468707e-07, "loss": 0.4894, "step": 13997 }, { "epoch": 0.89, "grad_norm": 1.157048225402832, "learning_rate": 3.320365897018546e-07, "loss": 0.5038, "step": 13998 }, { "epoch": 0.89, "grad_norm": 1.1703745126724243, "learning_rate": 3.316690374867043e-07, "loss": 0.4965, "step": 13999 }, { "epoch": 0.89, "grad_norm": 1.1754573583602905, "learning_rate": 3.313016818387288e-07, "loss": 0.4803, "step": 14000 }, { "epoch": 0.89, "grad_norm": 1.0195411443710327, "learning_rate": 3.309345227734001e-07, "loss": 0.5153, "step": 14001 }, { "epoch": 0.89, "grad_norm": 1.044917345046997, "learning_rate": 3.305675603061753e-07, "loss": 0.5025, "step": 14002 }, { "epoch": 0.89, "grad_norm": 1.0109405517578125, "learning_rate": 3.3020079445250655e-07, "loss": 0.5369, "step": 14003 }, { "epoch": 0.89, "grad_norm": 1.014048457145691, "learning_rate": 3.2983422522783747e-07, "loss": 0.4931, "step": 14004 }, { "epoch": 0.89, "grad_norm": 1.012646198272705, "learning_rate": 3.2946785264760305e-07, "loss": 0.4602, "step": 14005 }, { "epoch": 0.89, "grad_norm": 1.0836317539215088, "learning_rate": 3.291016767272298e-07, "loss": 0.535, "step": 14006 }, { "epoch": 0.89, "grad_norm": 1.0748432874679565, "learning_rate": 3.287356974821365e-07, "loss": 0.5124, "step": 14007 }, { "epoch": 0.89, "grad_norm": 1.0747649669647217, "learning_rate": 3.28369914927732e-07, "loss": 0.4709, "step": 14008 }, { "epoch": 0.89, "grad_norm": 1.0255682468414307, "learning_rate": 3.2800432907941935e-07, "loss": 0.4898, "step": 14009 }, { "epoch": 0.89, "grad_norm": 1.1123892068862915, "learning_rate": 3.276389399525914e-07, "loss": 0.4793, "step": 14010 }, { "epoch": 0.89, "grad_norm": 1.1055023670196533, "learning_rate": 3.272737475626342e-07, "loss": 0.5013, "step": 14011 }, { "epoch": 0.89, "grad_norm": 1.0478788614273071, "learning_rate": 3.269087519249242e-07, "loss": 0.4817, "step": 14012 }, { "epoch": 0.89, "grad_norm": 1.009963870048523, "learning_rate": 3.2654395305482924e-07, "loss": 0.5239, "step": 14013 }, { "epoch": 0.89, "grad_norm": 1.1160995960235596, "learning_rate": 3.2617935096771137e-07, "loss": 0.4961, "step": 14014 }, { "epoch": 0.89, "grad_norm": 1.0429821014404297, "learning_rate": 3.258149456789228e-07, "loss": 0.4966, "step": 14015 }, { "epoch": 0.89, "grad_norm": 1.1230183839797974, "learning_rate": 3.2545073720380573e-07, "loss": 0.4588, "step": 14016 }, { "epoch": 0.89, "grad_norm": 1.0803278684616089, "learning_rate": 3.2508672555769617e-07, "loss": 0.4747, "step": 14017 }, { "epoch": 0.89, "grad_norm": 1.160321831703186, "learning_rate": 3.2472291075592246e-07, "loss": 0.5333, "step": 14018 }, { "epoch": 0.89, "grad_norm": 0.9995227456092834, "learning_rate": 3.243592928138023e-07, "loss": 0.4823, "step": 14019 }, { "epoch": 0.89, "grad_norm": 1.0570666790008545, "learning_rate": 3.2399587174664794e-07, "loss": 0.5001, "step": 14020 }, { "epoch": 0.89, "grad_norm": 1.076535701751709, "learning_rate": 3.236326475697593e-07, "loss": 0.49, "step": 14021 }, { "epoch": 0.89, "grad_norm": 1.0700603723526, "learning_rate": 3.232696202984326e-07, "loss": 0.4804, "step": 14022 }, { "epoch": 0.89, "grad_norm": 0.9756863713264465, "learning_rate": 3.2290678994795377e-07, "loss": 0.4687, "step": 14023 }, { "epoch": 0.89, "grad_norm": 1.0192291736602783, "learning_rate": 3.2254415653359906e-07, "loss": 0.5132, "step": 14024 }, { "epoch": 0.89, "grad_norm": 1.0446388721466064, "learning_rate": 3.2218172007063787e-07, "loss": 0.5352, "step": 14025 }, { "epoch": 0.89, "grad_norm": 1.0355427265167236, "learning_rate": 3.218194805743319e-07, "loss": 0.4519, "step": 14026 }, { "epoch": 0.89, "grad_norm": 1.019925832748413, "learning_rate": 3.2145743805993334e-07, "loss": 0.4755, "step": 14027 }, { "epoch": 0.89, "grad_norm": 1.0217880010604858, "learning_rate": 3.210955925426873e-07, "loss": 0.4838, "step": 14028 }, { "epoch": 0.89, "grad_norm": 1.120097041130066, "learning_rate": 3.2073394403782823e-07, "loss": 0.5432, "step": 14029 }, { "epoch": 0.89, "grad_norm": 1.034629225730896, "learning_rate": 3.2037249256058445e-07, "loss": 0.5506, "step": 14030 }, { "epoch": 0.89, "grad_norm": 1.0962677001953125, "learning_rate": 3.2001123812617663e-07, "loss": 0.5269, "step": 14031 }, { "epoch": 0.89, "grad_norm": 1.1263171434402466, "learning_rate": 3.196501807498148e-07, "loss": 0.4973, "step": 14032 }, { "epoch": 0.89, "grad_norm": 1.0495620965957642, "learning_rate": 3.192893204467018e-07, "loss": 0.4721, "step": 14033 }, { "epoch": 0.89, "grad_norm": 1.103755235671997, "learning_rate": 3.189286572320327e-07, "loss": 0.525, "step": 14034 }, { "epoch": 0.89, "grad_norm": 1.1351439952850342, "learning_rate": 3.185681911209937e-07, "loss": 0.4788, "step": 14035 }, { "epoch": 0.89, "grad_norm": 1.162345290184021, "learning_rate": 3.1820792212876316e-07, "loss": 0.4734, "step": 14036 }, { "epoch": 0.89, "grad_norm": 1.1245495080947876, "learning_rate": 3.178478502705101e-07, "loss": 0.5024, "step": 14037 }, { "epoch": 0.89, "grad_norm": 1.1061865091323853, "learning_rate": 3.174879755613952e-07, "loss": 0.4776, "step": 14038 }, { "epoch": 0.89, "grad_norm": 1.1747066974639893, "learning_rate": 3.1712829801657294e-07, "loss": 0.4794, "step": 14039 }, { "epoch": 0.89, "grad_norm": 1.0927338600158691, "learning_rate": 3.167688176511874e-07, "loss": 0.5348, "step": 14040 }, { "epoch": 0.89, "grad_norm": 1.0735430717468262, "learning_rate": 3.1640953448037527e-07, "loss": 0.5223, "step": 14041 }, { "epoch": 0.89, "grad_norm": 1.075239896774292, "learning_rate": 3.1605044851926504e-07, "loss": 0.487, "step": 14042 }, { "epoch": 0.89, "grad_norm": 1.1099472045898438, "learning_rate": 3.1569155978297463e-07, "loss": 0.4947, "step": 14043 }, { "epoch": 0.89, "grad_norm": 1.0738979578018188, "learning_rate": 3.1533286828661915e-07, "loss": 0.4845, "step": 14044 }, { "epoch": 0.89, "grad_norm": 1.0492703914642334, "learning_rate": 3.1497437404529875e-07, "loss": 0.5279, "step": 14045 }, { "epoch": 0.89, "grad_norm": 1.0727201700210571, "learning_rate": 3.1461607707410914e-07, "loss": 0.5097, "step": 14046 }, { "epoch": 0.89, "grad_norm": 1.156922698020935, "learning_rate": 3.142579773881377e-07, "loss": 0.5034, "step": 14047 }, { "epoch": 0.89, "grad_norm": 1.085688591003418, "learning_rate": 3.1390007500246236e-07, "loss": 0.5045, "step": 14048 }, { "epoch": 0.89, "grad_norm": 1.1152316331863403, "learning_rate": 3.135423699321527e-07, "loss": 0.5576, "step": 14049 }, { "epoch": 0.89, "grad_norm": 1.1113206148147583, "learning_rate": 3.131848621922717e-07, "loss": 0.5027, "step": 14050 }, { "epoch": 0.89, "grad_norm": 1.0150794982910156, "learning_rate": 3.128275517978707e-07, "loss": 0.4817, "step": 14051 }, { "epoch": 0.89, "grad_norm": 1.154528260231018, "learning_rate": 3.124704387639976e-07, "loss": 0.5732, "step": 14052 }, { "epoch": 0.89, "grad_norm": 1.0682569742202759, "learning_rate": 3.1211352310568655e-07, "loss": 0.5297, "step": 14053 }, { "epoch": 0.89, "grad_norm": 1.0718752145767212, "learning_rate": 3.1175680483796713e-07, "loss": 0.5059, "step": 14054 }, { "epoch": 0.89, "grad_norm": 1.1030879020690918, "learning_rate": 3.1140028397585953e-07, "loss": 0.4824, "step": 14055 }, { "epoch": 0.89, "grad_norm": 0.9959520697593689, "learning_rate": 3.110439605343751e-07, "loss": 0.4686, "step": 14056 }, { "epoch": 0.89, "grad_norm": 1.1040695905685425, "learning_rate": 3.1068783452851856e-07, "loss": 0.5372, "step": 14057 }, { "epoch": 0.89, "grad_norm": 1.0181715488433838, "learning_rate": 3.1033190597328456e-07, "loss": 0.5207, "step": 14058 }, { "epoch": 0.89, "grad_norm": 1.0138585567474365, "learning_rate": 3.099761748836594e-07, "loss": 0.4886, "step": 14059 }, { "epoch": 0.89, "grad_norm": 1.0275546312332153, "learning_rate": 3.0962064127462167e-07, "loss": 0.4894, "step": 14060 }, { "epoch": 0.89, "grad_norm": 1.049033761024475, "learning_rate": 3.092653051611427e-07, "loss": 0.4732, "step": 14061 }, { "epoch": 0.89, "grad_norm": 0.9946855902671814, "learning_rate": 3.089101665581834e-07, "loss": 0.5534, "step": 14062 }, { "epoch": 0.89, "grad_norm": 1.047498106956482, "learning_rate": 3.085552254806978e-07, "loss": 0.4832, "step": 14063 }, { "epoch": 0.89, "grad_norm": 1.0959535837173462, "learning_rate": 3.0820048194363183e-07, "loss": 0.5209, "step": 14064 }, { "epoch": 0.89, "grad_norm": 1.0900388956069946, "learning_rate": 3.0784593596192123e-07, "loss": 0.4941, "step": 14065 }, { "epoch": 0.89, "grad_norm": 1.0469268560409546, "learning_rate": 3.074915875504969e-07, "loss": 0.4463, "step": 14066 }, { "epoch": 0.89, "grad_norm": 0.9177989959716797, "learning_rate": 3.0713743672427686e-07, "loss": 0.4102, "step": 14067 }, { "epoch": 0.89, "grad_norm": 0.9485307335853577, "learning_rate": 3.067834834981731e-07, "loss": 0.4613, "step": 14068 }, { "epoch": 0.89, "grad_norm": 1.00711190700531, "learning_rate": 3.0642972788709203e-07, "loss": 0.4552, "step": 14069 }, { "epoch": 0.89, "grad_norm": 1.0489661693572998, "learning_rate": 3.060761699059267e-07, "loss": 0.4923, "step": 14070 }, { "epoch": 0.89, "grad_norm": 1.1041911840438843, "learning_rate": 3.057228095695647e-07, "loss": 0.5478, "step": 14071 }, { "epoch": 0.89, "grad_norm": 0.9792740941047668, "learning_rate": 3.053696468928857e-07, "loss": 0.4304, "step": 14072 }, { "epoch": 0.89, "grad_norm": 1.0784748792648315, "learning_rate": 3.0501668189075794e-07, "loss": 0.5076, "step": 14073 }, { "epoch": 0.89, "grad_norm": 1.0770944356918335, "learning_rate": 3.0466391457804666e-07, "loss": 0.4694, "step": 14074 }, { "epoch": 0.89, "grad_norm": 1.0410479307174683, "learning_rate": 3.0431134496960333e-07, "loss": 0.5219, "step": 14075 }, { "epoch": 0.89, "grad_norm": 1.069656252861023, "learning_rate": 3.0395897308027443e-07, "loss": 0.4861, "step": 14076 }, { "epoch": 0.89, "grad_norm": 1.1559025049209595, "learning_rate": 3.0360679892489643e-07, "loss": 0.5164, "step": 14077 }, { "epoch": 0.89, "grad_norm": 1.0050629377365112, "learning_rate": 3.032548225182985e-07, "loss": 0.5105, "step": 14078 }, { "epoch": 0.89, "grad_norm": 1.0117554664611816, "learning_rate": 3.029030438753017e-07, "loss": 0.521, "step": 14079 }, { "epoch": 0.89, "grad_norm": 1.005658745765686, "learning_rate": 3.025514630107179e-07, "loss": 0.4724, "step": 14080 }, { "epoch": 0.89, "grad_norm": 1.0892596244812012, "learning_rate": 3.0220007993934987e-07, "loss": 0.5437, "step": 14081 }, { "epoch": 0.89, "grad_norm": 1.0093494653701782, "learning_rate": 3.018488946759951e-07, "loss": 0.4781, "step": 14082 }, { "epoch": 0.89, "grad_norm": 1.0474176406860352, "learning_rate": 3.01497907235439e-07, "loss": 0.4923, "step": 14083 }, { "epoch": 0.89, "grad_norm": 0.9824331998825073, "learning_rate": 3.0114711763246096e-07, "loss": 0.468, "step": 14084 }, { "epoch": 0.89, "grad_norm": 1.0985101461410522, "learning_rate": 3.007965258818324e-07, "loss": 0.5218, "step": 14085 }, { "epoch": 0.89, "grad_norm": 1.067883849143982, "learning_rate": 3.0044613199831373e-07, "loss": 0.5282, "step": 14086 }, { "epoch": 0.89, "grad_norm": 1.1233497858047485, "learning_rate": 3.0009593599666044e-07, "loss": 0.5522, "step": 14087 }, { "epoch": 0.89, "grad_norm": 1.1419447660446167, "learning_rate": 2.9974593789161843e-07, "loss": 0.5165, "step": 14088 }, { "epoch": 0.89, "grad_norm": 1.1232085227966309, "learning_rate": 2.9939613769792265e-07, "loss": 0.5118, "step": 14089 }, { "epoch": 0.89, "grad_norm": 1.0514224767684937, "learning_rate": 2.9904653543030406e-07, "loss": 0.5259, "step": 14090 }, { "epoch": 0.89, "grad_norm": 1.0916627645492554, "learning_rate": 2.98697131103482e-07, "loss": 0.5176, "step": 14091 }, { "epoch": 0.89, "grad_norm": 1.0547813177108765, "learning_rate": 2.983479247321691e-07, "loss": 0.5191, "step": 14092 }, { "epoch": 0.89, "grad_norm": 1.0756022930145264, "learning_rate": 2.979989163310704e-07, "loss": 0.5091, "step": 14093 }, { "epoch": 0.89, "grad_norm": 1.0705457925796509, "learning_rate": 2.976501059148779e-07, "loss": 0.5185, "step": 14094 }, { "epoch": 0.89, "grad_norm": 1.0069702863693237, "learning_rate": 2.9730149349828265e-07, "loss": 0.4211, "step": 14095 }, { "epoch": 0.89, "grad_norm": 1.0970326662063599, "learning_rate": 2.969530790959624e-07, "loss": 0.5021, "step": 14096 }, { "epoch": 0.89, "grad_norm": 1.1100468635559082, "learning_rate": 2.9660486272258703e-07, "loss": 0.4899, "step": 14097 }, { "epoch": 0.89, "grad_norm": 1.123530387878418, "learning_rate": 2.9625684439281875e-07, "loss": 0.4867, "step": 14098 }, { "epoch": 0.89, "grad_norm": 1.0842902660369873, "learning_rate": 2.959090241213114e-07, "loss": 0.5289, "step": 14099 }, { "epoch": 0.89, "grad_norm": 1.0588605403900146, "learning_rate": 2.9556140192271045e-07, "loss": 0.529, "step": 14100 }, { "epoch": 0.89, "grad_norm": 1.020761489868164, "learning_rate": 2.9521397781165475e-07, "loss": 0.508, "step": 14101 }, { "epoch": 0.89, "grad_norm": 1.0218114852905273, "learning_rate": 2.9486675180277035e-07, "loss": 0.5011, "step": 14102 }, { "epoch": 0.89, "grad_norm": 1.0822890996932983, "learning_rate": 2.9451972391067897e-07, "loss": 0.4697, "step": 14103 }, { "epoch": 0.89, "grad_norm": 1.0297285318374634, "learning_rate": 2.941728941499938e-07, "loss": 0.4652, "step": 14104 }, { "epoch": 0.89, "grad_norm": 1.1335946321487427, "learning_rate": 2.938262625353172e-07, "loss": 0.4826, "step": 14105 }, { "epoch": 0.89, "grad_norm": 1.004652976989746, "learning_rate": 2.934798290812446e-07, "loss": 0.5152, "step": 14106 }, { "epoch": 0.89, "grad_norm": 1.048423171043396, "learning_rate": 2.931335938023644e-07, "loss": 0.4854, "step": 14107 }, { "epoch": 0.89, "grad_norm": 1.0791021585464478, "learning_rate": 2.9278755671325377e-07, "loss": 0.4997, "step": 14108 }, { "epoch": 0.89, "grad_norm": 1.0030678510665894, "learning_rate": 2.924417178284855e-07, "loss": 0.5084, "step": 14109 }, { "epoch": 0.89, "grad_norm": 1.008026361465454, "learning_rate": 2.9209607716261856e-07, "loss": 0.4792, "step": 14110 }, { "epoch": 0.89, "grad_norm": 1.0434467792510986, "learning_rate": 2.917506347302079e-07, "loss": 0.525, "step": 14111 }, { "epoch": 0.89, "grad_norm": 1.1228113174438477, "learning_rate": 2.9140539054580087e-07, "loss": 0.5111, "step": 14112 }, { "epoch": 0.89, "grad_norm": 1.0027477741241455, "learning_rate": 2.9106034462393187e-07, "loss": 0.4626, "step": 14113 }, { "epoch": 0.89, "grad_norm": 1.0428019762039185, "learning_rate": 2.9071549697913035e-07, "loss": 0.4866, "step": 14114 }, { "epoch": 0.89, "grad_norm": 1.0616719722747803, "learning_rate": 2.9037084762591704e-07, "loss": 0.4292, "step": 14115 }, { "epoch": 0.89, "grad_norm": 1.103381872177124, "learning_rate": 2.900263965788036e-07, "loss": 0.5328, "step": 14116 }, { "epoch": 0.89, "grad_norm": 1.0784556865692139, "learning_rate": 2.8968214385229453e-07, "loss": 0.4891, "step": 14117 }, { "epoch": 0.89, "grad_norm": 1.0696626901626587, "learning_rate": 2.8933808946088383e-07, "loss": 0.4962, "step": 14118 }, { "epoch": 0.89, "grad_norm": 1.0749086141586304, "learning_rate": 2.889942334190593e-07, "loss": 0.5058, "step": 14119 }, { "epoch": 0.89, "grad_norm": 0.9835822582244873, "learning_rate": 2.8865057574129883e-07, "loss": 0.4921, "step": 14120 }, { "epoch": 0.89, "grad_norm": 1.0954842567443848, "learning_rate": 2.8830711644207257e-07, "loss": 0.4839, "step": 14121 }, { "epoch": 0.89, "grad_norm": 1.1231663227081299, "learning_rate": 2.8796385553584326e-07, "loss": 0.5203, "step": 14122 }, { "epoch": 0.89, "grad_norm": 1.0576690435409546, "learning_rate": 2.8762079303706505e-07, "loss": 0.5109, "step": 14123 }, { "epoch": 0.89, "grad_norm": 1.1051459312438965, "learning_rate": 2.8727792896018015e-07, "loss": 0.4861, "step": 14124 }, { "epoch": 0.89, "grad_norm": 1.027230143547058, "learning_rate": 2.8693526331962875e-07, "loss": 0.4674, "step": 14125 }, { "epoch": 0.89, "grad_norm": 1.0249128341674805, "learning_rate": 2.865927961298376e-07, "loss": 0.4791, "step": 14126 }, { "epoch": 0.9, "grad_norm": 1.0711956024169922, "learning_rate": 2.8625052740522683e-07, "loss": 0.4984, "step": 14127 }, { "epoch": 0.9, "grad_norm": 1.1102118492126465, "learning_rate": 2.859084571602083e-07, "loss": 0.4992, "step": 14128 }, { "epoch": 0.9, "grad_norm": 1.123199462890625, "learning_rate": 2.8556658540918603e-07, "loss": 0.4748, "step": 14129 }, { "epoch": 0.9, "grad_norm": 1.0180432796478271, "learning_rate": 2.8522491216655403e-07, "loss": 0.4835, "step": 14130 }, { "epoch": 0.9, "grad_norm": 0.9965765476226807, "learning_rate": 2.848834374467002e-07, "loss": 0.5006, "step": 14131 }, { "epoch": 0.9, "grad_norm": 1.1160887479782104, "learning_rate": 2.8454216126400146e-07, "loss": 0.5333, "step": 14132 }, { "epoch": 0.9, "grad_norm": 1.216166377067566, "learning_rate": 2.842010836328274e-07, "loss": 0.5463, "step": 14133 }, { "epoch": 0.9, "grad_norm": 1.0514857769012451, "learning_rate": 2.838602045675426e-07, "loss": 0.4975, "step": 14134 }, { "epoch": 0.9, "grad_norm": 1.0855978727340698, "learning_rate": 2.8351952408249726e-07, "loss": 0.4765, "step": 14135 }, { "epoch": 0.9, "grad_norm": 1.0219852924346924, "learning_rate": 2.831790421920377e-07, "loss": 0.5216, "step": 14136 }, { "epoch": 0.9, "grad_norm": 1.0745776891708374, "learning_rate": 2.828387589104997e-07, "loss": 0.5266, "step": 14137 }, { "epoch": 0.9, "grad_norm": 1.0392996072769165, "learning_rate": 2.824986742522118e-07, "loss": 0.4988, "step": 14138 }, { "epoch": 0.9, "grad_norm": 1.0775796175003052, "learning_rate": 2.8215878823149466e-07, "loss": 0.5413, "step": 14139 }, { "epoch": 0.9, "grad_norm": 1.0642188787460327, "learning_rate": 2.818191008626581e-07, "loss": 0.4648, "step": 14140 }, { "epoch": 0.9, "grad_norm": 1.0208340883255005, "learning_rate": 2.8147961216000497e-07, "loss": 0.4709, "step": 14141 }, { "epoch": 0.9, "grad_norm": 1.038403868675232, "learning_rate": 2.8114032213783226e-07, "loss": 0.4433, "step": 14142 }, { "epoch": 0.9, "grad_norm": 1.0203797817230225, "learning_rate": 2.808012308104241e-07, "loss": 0.5383, "step": 14143 }, { "epoch": 0.9, "grad_norm": 1.0287710428237915, "learning_rate": 2.80462338192059e-07, "loss": 0.5089, "step": 14144 }, { "epoch": 0.9, "grad_norm": 1.0058563947677612, "learning_rate": 2.801236442970073e-07, "loss": 0.4697, "step": 14145 }, { "epoch": 0.9, "grad_norm": 0.9962086081504822, "learning_rate": 2.797851491395293e-07, "loss": 0.5054, "step": 14146 }, { "epoch": 0.9, "grad_norm": 1.0989856719970703, "learning_rate": 2.79446852733879e-07, "loss": 0.5381, "step": 14147 }, { "epoch": 0.9, "grad_norm": 1.0387845039367676, "learning_rate": 2.791087550942995e-07, "loss": 0.4932, "step": 14148 }, { "epoch": 0.9, "grad_norm": 1.0991252660751343, "learning_rate": 2.7877085623502775e-07, "loss": 0.4984, "step": 14149 }, { "epoch": 0.9, "grad_norm": 1.1020551919937134, "learning_rate": 2.784331561702908e-07, "loss": 0.5014, "step": 14150 }, { "epoch": 0.9, "grad_norm": 1.0331794023513794, "learning_rate": 2.780956549143088e-07, "loss": 0.452, "step": 14151 }, { "epoch": 0.9, "grad_norm": 0.9821256399154663, "learning_rate": 2.7775835248129267e-07, "loss": 0.4704, "step": 14152 }, { "epoch": 0.9, "grad_norm": 1.1878122091293335, "learning_rate": 2.7742124888544497e-07, "loss": 0.5128, "step": 14153 }, { "epoch": 0.9, "grad_norm": 1.1046786308288574, "learning_rate": 2.7708434414095875e-07, "loss": 0.5183, "step": 14154 }, { "epoch": 0.9, "grad_norm": 1.0835120677947998, "learning_rate": 2.7674763826202265e-07, "loss": 0.5177, "step": 14155 }, { "epoch": 0.9, "grad_norm": 1.0790420770645142, "learning_rate": 2.764111312628115e-07, "loss": 0.5319, "step": 14156 }, { "epoch": 0.9, "grad_norm": 1.0519121885299683, "learning_rate": 2.7607482315749554e-07, "loss": 0.4714, "step": 14157 }, { "epoch": 0.9, "grad_norm": 1.108740210533142, "learning_rate": 2.757387139602352e-07, "loss": 0.4883, "step": 14158 }, { "epoch": 0.9, "grad_norm": 1.0215269327163696, "learning_rate": 2.754028036851836e-07, "loss": 0.4484, "step": 14159 }, { "epoch": 0.9, "grad_norm": 1.124632716178894, "learning_rate": 2.750670923464838e-07, "loss": 0.4925, "step": 14160 }, { "epoch": 0.9, "grad_norm": 1.0852049589157104, "learning_rate": 2.747315799582728e-07, "loss": 0.4534, "step": 14161 }, { "epoch": 0.9, "grad_norm": 1.0176403522491455, "learning_rate": 2.7439626653467555e-07, "loss": 0.5349, "step": 14162 }, { "epoch": 0.9, "grad_norm": 1.0927029848098755, "learning_rate": 2.7406115208981345e-07, "loss": 0.4894, "step": 14163 }, { "epoch": 0.9, "grad_norm": 0.9954310059547424, "learning_rate": 2.7372623663779575e-07, "loss": 0.4644, "step": 14164 }, { "epoch": 0.9, "grad_norm": 1.0536550283432007, "learning_rate": 2.733915201927245e-07, "loss": 0.4885, "step": 14165 }, { "epoch": 0.9, "grad_norm": 1.105161190032959, "learning_rate": 2.7305700276869406e-07, "loss": 0.5142, "step": 14166 }, { "epoch": 0.9, "grad_norm": 1.0193461179733276, "learning_rate": 2.727226843797881e-07, "loss": 0.4866, "step": 14167 }, { "epoch": 0.9, "grad_norm": 1.0644636154174805, "learning_rate": 2.7238856504008594e-07, "loss": 0.5238, "step": 14168 }, { "epoch": 0.9, "grad_norm": 1.0790624618530273, "learning_rate": 2.7205464476365575e-07, "loss": 0.4718, "step": 14169 }, { "epoch": 0.9, "grad_norm": 1.1688655614852905, "learning_rate": 2.7172092356455626e-07, "loss": 0.5232, "step": 14170 }, { "epoch": 0.9, "grad_norm": 1.0596356391906738, "learning_rate": 2.7138740145684017e-07, "loss": 0.5308, "step": 14171 }, { "epoch": 0.9, "grad_norm": 1.1095932722091675, "learning_rate": 2.7105407845455124e-07, "loss": 0.5524, "step": 14172 }, { "epoch": 0.9, "grad_norm": 0.9973832964897156, "learning_rate": 2.707209545717238e-07, "loss": 0.4979, "step": 14173 }, { "epoch": 0.9, "grad_norm": 1.084646224975586, "learning_rate": 2.70388029822386e-07, "loss": 0.4858, "step": 14174 }, { "epoch": 0.9, "grad_norm": 1.1202423572540283, "learning_rate": 2.700553042205539e-07, "loss": 0.4811, "step": 14175 }, { "epoch": 0.9, "grad_norm": 1.0021730661392212, "learning_rate": 2.6972277778023913e-07, "loss": 0.4513, "step": 14176 }, { "epoch": 0.9, "grad_norm": 1.0620533227920532, "learning_rate": 2.693904505154432e-07, "loss": 0.5149, "step": 14177 }, { "epoch": 0.9, "grad_norm": 1.1149684190750122, "learning_rate": 2.690583224401588e-07, "loss": 0.5114, "step": 14178 }, { "epoch": 0.9, "grad_norm": 0.963422954082489, "learning_rate": 2.687263935683704e-07, "loss": 0.4722, "step": 14179 }, { "epoch": 0.9, "grad_norm": 1.1437015533447266, "learning_rate": 2.6839466391405444e-07, "loss": 0.5359, "step": 14180 }, { "epoch": 0.9, "grad_norm": 0.9957416653633118, "learning_rate": 2.680631334911793e-07, "loss": 0.4835, "step": 14181 }, { "epoch": 0.9, "grad_norm": 1.138525128364563, "learning_rate": 2.677318023137049e-07, "loss": 0.5379, "step": 14182 }, { "epoch": 0.9, "grad_norm": 1.0956865549087524, "learning_rate": 2.674006703955817e-07, "loss": 0.5163, "step": 14183 }, { "epoch": 0.9, "grad_norm": 1.1254189014434814, "learning_rate": 2.670697377507514e-07, "loss": 0.4935, "step": 14184 }, { "epoch": 0.9, "grad_norm": 1.0349822044372559, "learning_rate": 2.667390043931517e-07, "loss": 0.493, "step": 14185 }, { "epoch": 0.9, "grad_norm": 1.0068690776824951, "learning_rate": 2.664084703367059e-07, "loss": 0.4698, "step": 14186 }, { "epoch": 0.9, "grad_norm": 1.0804328918457031, "learning_rate": 2.6607813559533236e-07, "loss": 0.4675, "step": 14187 }, { "epoch": 0.9, "grad_norm": 1.0260028839111328, "learning_rate": 2.6574800018294043e-07, "loss": 0.4944, "step": 14188 }, { "epoch": 0.9, "grad_norm": 1.0747073888778687, "learning_rate": 2.654180641134313e-07, "loss": 0.4736, "step": 14189 }, { "epoch": 0.9, "grad_norm": 1.0424423217773438, "learning_rate": 2.650883274006966e-07, "loss": 0.4994, "step": 14190 }, { "epoch": 0.9, "grad_norm": 1.0522016286849976, "learning_rate": 2.6475879005862183e-07, "loss": 0.5297, "step": 14191 }, { "epoch": 0.9, "grad_norm": 1.0279130935668945, "learning_rate": 2.644294521010804e-07, "loss": 0.4684, "step": 14192 }, { "epoch": 0.9, "grad_norm": 1.1287299394607544, "learning_rate": 2.6410031354194175e-07, "loss": 0.5351, "step": 14193 }, { "epoch": 0.9, "grad_norm": 1.033314824104309, "learning_rate": 2.6377137439506373e-07, "loss": 0.4824, "step": 14194 }, { "epoch": 0.9, "grad_norm": 1.0730446577072144, "learning_rate": 2.634426346742969e-07, "loss": 0.496, "step": 14195 }, { "epoch": 0.9, "grad_norm": 1.1193658113479614, "learning_rate": 2.6311409439348403e-07, "loss": 0.496, "step": 14196 }, { "epoch": 0.9, "grad_norm": 1.0377310514450073, "learning_rate": 2.6278575356645687e-07, "loss": 0.5061, "step": 14197 }, { "epoch": 0.9, "grad_norm": 1.0643011331558228, "learning_rate": 2.624576122070427e-07, "loss": 0.5335, "step": 14198 }, { "epoch": 0.9, "grad_norm": 1.0411330461502075, "learning_rate": 2.621296703290588e-07, "loss": 0.4619, "step": 14199 }, { "epoch": 0.9, "grad_norm": 1.0853526592254639, "learning_rate": 2.6180192794631133e-07, "loss": 0.4932, "step": 14200 }, { "epoch": 0.9, "grad_norm": 1.0662851333618164, "learning_rate": 2.6147438507260205e-07, "loss": 0.5227, "step": 14201 }, { "epoch": 0.9, "grad_norm": 1.104363203048706, "learning_rate": 2.611470417217227e-07, "loss": 0.4924, "step": 14202 }, { "epoch": 0.9, "grad_norm": 0.99798983335495, "learning_rate": 2.6081989790745554e-07, "loss": 0.4812, "step": 14203 }, { "epoch": 0.9, "grad_norm": 1.0650646686553955, "learning_rate": 2.6049295364357684e-07, "loss": 0.5305, "step": 14204 }, { "epoch": 0.9, "grad_norm": 1.0008906126022339, "learning_rate": 2.6016620894385113e-07, "loss": 0.4693, "step": 14205 }, { "epoch": 0.9, "grad_norm": 1.0338935852050781, "learning_rate": 2.59839663822038e-07, "loss": 0.4815, "step": 14206 }, { "epoch": 0.9, "grad_norm": 1.0857926607131958, "learning_rate": 2.5951331829188797e-07, "loss": 0.4963, "step": 14207 }, { "epoch": 0.9, "grad_norm": 1.0653680562973022, "learning_rate": 2.591871723671402e-07, "loss": 0.4684, "step": 14208 }, { "epoch": 0.9, "grad_norm": 0.9513919949531555, "learning_rate": 2.5886122606152866e-07, "loss": 0.5128, "step": 14209 }, { "epoch": 0.9, "grad_norm": 1.0318009853363037, "learning_rate": 2.585354793887779e-07, "loss": 0.4508, "step": 14210 }, { "epoch": 0.9, "grad_norm": 1.0974669456481934, "learning_rate": 2.5820993236260305e-07, "loss": 0.4683, "step": 14211 }, { "epoch": 0.9, "grad_norm": 0.9730995297431946, "learning_rate": 2.5788458499671376e-07, "loss": 0.5013, "step": 14212 }, { "epoch": 0.9, "grad_norm": 1.1871789693832397, "learning_rate": 2.5755943730480735e-07, "loss": 0.509, "step": 14213 }, { "epoch": 0.9, "grad_norm": 1.1025487184524536, "learning_rate": 2.5723448930057405e-07, "loss": 0.549, "step": 14214 }, { "epoch": 0.9, "grad_norm": 1.0638960599899292, "learning_rate": 2.569097409976995e-07, "loss": 0.5135, "step": 14215 }, { "epoch": 0.9, "grad_norm": 1.033321499824524, "learning_rate": 2.5658519240985444e-07, "loss": 0.5202, "step": 14216 }, { "epoch": 0.9, "grad_norm": 1.0302057266235352, "learning_rate": 2.5626084355070634e-07, "loss": 0.5184, "step": 14217 }, { "epoch": 0.9, "grad_norm": 1.0556660890579224, "learning_rate": 2.5593669443391145e-07, "loss": 0.545, "step": 14218 }, { "epoch": 0.9, "grad_norm": 1.1177645921707153, "learning_rate": 2.556127450731194e-07, "loss": 0.5428, "step": 14219 }, { "epoch": 0.9, "grad_norm": 1.101194977760315, "learning_rate": 2.552889954819704e-07, "loss": 0.4978, "step": 14220 }, { "epoch": 0.9, "grad_norm": 1.108578085899353, "learning_rate": 2.5496544567409577e-07, "loss": 0.5178, "step": 14221 }, { "epoch": 0.9, "grad_norm": 1.0729098320007324, "learning_rate": 2.5464209566311847e-07, "loss": 0.4939, "step": 14222 }, { "epoch": 0.9, "grad_norm": 1.151045560836792, "learning_rate": 2.5431894546265654e-07, "loss": 0.5319, "step": 14223 }, { "epoch": 0.9, "grad_norm": 1.021720051765442, "learning_rate": 2.5399599508631356e-07, "loss": 0.4726, "step": 14224 }, { "epoch": 0.9, "grad_norm": 1.077812671661377, "learning_rate": 2.5367324454768916e-07, "loss": 0.5378, "step": 14225 }, { "epoch": 0.9, "grad_norm": 1.0677251815795898, "learning_rate": 2.5335069386037414e-07, "loss": 0.5248, "step": 14226 }, { "epoch": 0.9, "grad_norm": 1.0210872888565063, "learning_rate": 2.530283430379471e-07, "loss": 0.5225, "step": 14227 }, { "epoch": 0.9, "grad_norm": 1.0425536632537842, "learning_rate": 2.5270619209398497e-07, "loss": 0.4455, "step": 14228 }, { "epoch": 0.9, "grad_norm": 1.1434992551803589, "learning_rate": 2.523842410420496e-07, "loss": 0.5393, "step": 14229 }, { "epoch": 0.9, "grad_norm": 1.0491610765457153, "learning_rate": 2.5206248989569803e-07, "loss": 0.4876, "step": 14230 }, { "epoch": 0.9, "grad_norm": 1.1168839931488037, "learning_rate": 2.5174093866847826e-07, "loss": 0.4623, "step": 14231 }, { "epoch": 0.9, "grad_norm": 1.0826624631881714, "learning_rate": 2.5141958737392947e-07, "loss": 0.5193, "step": 14232 }, { "epoch": 0.9, "grad_norm": 1.0712814331054688, "learning_rate": 2.5109843602558247e-07, "loss": 0.5055, "step": 14233 }, { "epoch": 0.9, "grad_norm": 0.9830718040466309, "learning_rate": 2.507774846369615e-07, "loss": 0.4724, "step": 14234 }, { "epoch": 0.9, "grad_norm": 1.0501651763916016, "learning_rate": 2.5045673322157735e-07, "loss": 0.5073, "step": 14235 }, { "epoch": 0.9, "grad_norm": 1.0189639329910278, "learning_rate": 2.501361817929393e-07, "loss": 0.45, "step": 14236 }, { "epoch": 0.9, "grad_norm": 1.0900280475616455, "learning_rate": 2.4981583036454203e-07, "loss": 0.4828, "step": 14237 }, { "epoch": 0.9, "grad_norm": 1.06129789352417, "learning_rate": 2.494956789498759e-07, "loss": 0.4955, "step": 14238 }, { "epoch": 0.9, "grad_norm": 1.1390588283538818, "learning_rate": 2.491757275624207e-07, "loss": 0.4832, "step": 14239 }, { "epoch": 0.9, "grad_norm": 1.138748288154602, "learning_rate": 2.4885597621564896e-07, "loss": 0.5358, "step": 14240 }, { "epoch": 0.9, "grad_norm": 1.0612497329711914, "learning_rate": 2.485364249230238e-07, "loss": 0.4911, "step": 14241 }, { "epoch": 0.9, "grad_norm": 0.9879231452941895, "learning_rate": 2.4821707369800163e-07, "loss": 0.4806, "step": 14242 }, { "epoch": 0.9, "grad_norm": 1.046484112739563, "learning_rate": 2.478979225540268e-07, "loss": 0.5028, "step": 14243 }, { "epoch": 0.9, "grad_norm": 1.0972697734832764, "learning_rate": 2.475789715045401e-07, "loss": 0.5167, "step": 14244 }, { "epoch": 0.9, "grad_norm": 1.0879430770874023, "learning_rate": 2.472602205629698e-07, "loss": 0.4745, "step": 14245 }, { "epoch": 0.9, "grad_norm": 1.1586531400680542, "learning_rate": 2.469416697427379e-07, "loss": 0.5199, "step": 14246 }, { "epoch": 0.9, "grad_norm": 1.147218108177185, "learning_rate": 2.466233190572581e-07, "loss": 0.495, "step": 14247 }, { "epoch": 0.9, "grad_norm": 1.0244981050491333, "learning_rate": 2.463051685199341e-07, "loss": 0.5247, "step": 14248 }, { "epoch": 0.9, "grad_norm": 1.0598788261413574, "learning_rate": 2.4598721814416306e-07, "loss": 0.4921, "step": 14249 }, { "epoch": 0.9, "grad_norm": 0.9894571304321289, "learning_rate": 2.4566946794333247e-07, "loss": 0.5022, "step": 14250 }, { "epoch": 0.9, "grad_norm": 1.0936572551727295, "learning_rate": 2.4535191793082116e-07, "loss": 0.5097, "step": 14251 }, { "epoch": 0.9, "grad_norm": 1.021803855895996, "learning_rate": 2.4503456812e-07, "loss": 0.4551, "step": 14252 }, { "epoch": 0.9, "grad_norm": 1.1300513744354248, "learning_rate": 2.447174185242324e-07, "loss": 0.5114, "step": 14253 }, { "epoch": 0.9, "grad_norm": 1.0158747434616089, "learning_rate": 2.4440046915687135e-07, "loss": 0.4517, "step": 14254 }, { "epoch": 0.9, "grad_norm": 1.0202269554138184, "learning_rate": 2.4408372003126345e-07, "loss": 0.5171, "step": 14255 }, { "epoch": 0.9, "grad_norm": 1.0510153770446777, "learning_rate": 2.4376717116074533e-07, "loss": 0.4848, "step": 14256 }, { "epoch": 0.9, "grad_norm": 0.9678918719291687, "learning_rate": 2.434508225586457e-07, "loss": 0.4999, "step": 14257 }, { "epoch": 0.9, "grad_norm": 1.0264215469360352, "learning_rate": 2.431346742382856e-07, "loss": 0.4809, "step": 14258 }, { "epoch": 0.9, "grad_norm": 1.0267912149429321, "learning_rate": 2.428187262129761e-07, "loss": 0.4719, "step": 14259 }, { "epoch": 0.9, "grad_norm": 1.011001706123352, "learning_rate": 2.4250297849602145e-07, "loss": 0.5374, "step": 14260 }, { "epoch": 0.9, "grad_norm": 1.0350229740142822, "learning_rate": 2.421874311007155e-07, "loss": 0.4745, "step": 14261 }, { "epoch": 0.9, "grad_norm": 1.153439998626709, "learning_rate": 2.41872084040346e-07, "loss": 0.4898, "step": 14262 }, { "epoch": 0.9, "grad_norm": 1.0444737672805786, "learning_rate": 2.4155693732819065e-07, "loss": 0.4752, "step": 14263 }, { "epoch": 0.9, "grad_norm": 1.1378803253173828, "learning_rate": 2.412419909775199e-07, "loss": 0.4922, "step": 14264 }, { "epoch": 0.9, "grad_norm": 1.020803689956665, "learning_rate": 2.4092724500159315e-07, "loss": 0.521, "step": 14265 }, { "epoch": 0.9, "grad_norm": 1.044555902481079, "learning_rate": 2.406126994136654e-07, "loss": 0.4783, "step": 14266 }, { "epoch": 0.9, "grad_norm": 1.0339492559432983, "learning_rate": 2.402983542269799e-07, "loss": 0.4759, "step": 14267 }, { "epoch": 0.9, "grad_norm": 1.0984539985656738, "learning_rate": 2.3998420945477276e-07, "loss": 0.5041, "step": 14268 }, { "epoch": 0.9, "grad_norm": 1.0510377883911133, "learning_rate": 2.3967026511027224e-07, "loss": 0.5406, "step": 14269 }, { "epoch": 0.9, "grad_norm": 1.0136500597000122, "learning_rate": 2.393565212066962e-07, "loss": 0.4782, "step": 14270 }, { "epoch": 0.9, "grad_norm": 1.0046253204345703, "learning_rate": 2.3904297775725614e-07, "loss": 0.4767, "step": 14271 }, { "epoch": 0.9, "grad_norm": 1.0714184045791626, "learning_rate": 2.3872963477515497e-07, "loss": 0.4912, "step": 14272 }, { "epoch": 0.9, "grad_norm": 1.0916383266448975, "learning_rate": 2.3841649227358489e-07, "loss": 0.4851, "step": 14273 }, { "epoch": 0.9, "grad_norm": 1.0295509099960327, "learning_rate": 2.3810355026573195e-07, "loss": 0.4955, "step": 14274 }, { "epoch": 0.9, "grad_norm": 1.0655426979064941, "learning_rate": 2.377908087647729e-07, "loss": 0.5101, "step": 14275 }, { "epoch": 0.9, "grad_norm": 1.0833792686462402, "learning_rate": 2.374782677838766e-07, "loss": 0.4631, "step": 14276 }, { "epoch": 0.9, "grad_norm": 1.0340633392333984, "learning_rate": 2.3716592733620315e-07, "loss": 0.5448, "step": 14277 }, { "epoch": 0.9, "grad_norm": 1.169402837753296, "learning_rate": 2.3685378743490306e-07, "loss": 0.4724, "step": 14278 }, { "epoch": 0.9, "grad_norm": 1.0679082870483398, "learning_rate": 2.3654184809312032e-07, "loss": 0.4914, "step": 14279 }, { "epoch": 0.9, "grad_norm": 0.9339148998260498, "learning_rate": 2.362301093239905e-07, "loss": 0.448, "step": 14280 }, { "epoch": 0.9, "grad_norm": 1.0225436687469482, "learning_rate": 2.359185711406381e-07, "loss": 0.48, "step": 14281 }, { "epoch": 0.9, "grad_norm": 0.9594351649284363, "learning_rate": 2.3560723355618152e-07, "loss": 0.5045, "step": 14282 }, { "epoch": 0.9, "grad_norm": 0.9956153035163879, "learning_rate": 2.3529609658373032e-07, "loss": 0.5083, "step": 14283 }, { "epoch": 0.9, "grad_norm": 1.073124885559082, "learning_rate": 2.3498516023638562e-07, "loss": 0.5129, "step": 14284 }, { "epoch": 0.91, "grad_norm": 1.0852166414260864, "learning_rate": 2.3467442452723976e-07, "loss": 0.4635, "step": 14285 }, { "epoch": 0.91, "grad_norm": 1.1746938228607178, "learning_rate": 2.3436388946937504e-07, "loss": 0.4606, "step": 14286 }, { "epoch": 0.91, "grad_norm": 1.0940499305725098, "learning_rate": 2.3405355507586992e-07, "loss": 0.5258, "step": 14287 }, { "epoch": 0.91, "grad_norm": 0.974661111831665, "learning_rate": 2.3374342135979e-07, "loss": 0.5102, "step": 14288 }, { "epoch": 0.91, "grad_norm": 1.0678569078445435, "learning_rate": 2.3343348833419377e-07, "loss": 0.5254, "step": 14289 }, { "epoch": 0.91, "grad_norm": 1.104823350906372, "learning_rate": 2.3312375601213134e-07, "loss": 0.4705, "step": 14290 }, { "epoch": 0.91, "grad_norm": 1.1579174995422363, "learning_rate": 2.3281422440664503e-07, "loss": 0.5125, "step": 14291 }, { "epoch": 0.91, "grad_norm": 1.1120131015777588, "learning_rate": 2.3250489353076777e-07, "loss": 0.5045, "step": 14292 }, { "epoch": 0.91, "grad_norm": 1.1061304807662964, "learning_rate": 2.3219576339752525e-07, "loss": 0.5066, "step": 14293 }, { "epoch": 0.91, "grad_norm": 1.0827875137329102, "learning_rate": 2.3188683401993261e-07, "loss": 0.5188, "step": 14294 }, { "epoch": 0.91, "grad_norm": 1.0205931663513184, "learning_rate": 2.3157810541099724e-07, "loss": 0.4545, "step": 14295 }, { "epoch": 0.91, "grad_norm": 1.0359200239181519, "learning_rate": 2.3126957758372149e-07, "loss": 0.4723, "step": 14296 }, { "epoch": 0.91, "grad_norm": 1.0978033542633057, "learning_rate": 2.3096125055109386e-07, "loss": 0.4738, "step": 14297 }, { "epoch": 0.91, "grad_norm": 1.0552581548690796, "learning_rate": 2.3065312432609788e-07, "loss": 0.484, "step": 14298 }, { "epoch": 0.91, "grad_norm": 1.0160704851150513, "learning_rate": 2.3034519892170705e-07, "loss": 0.4968, "step": 14299 }, { "epoch": 0.91, "grad_norm": 1.022432804107666, "learning_rate": 2.3003747435088764e-07, "loss": 0.4674, "step": 14300 }, { "epoch": 0.91, "grad_norm": 1.0523029565811157, "learning_rate": 2.2972995062659764e-07, "loss": 0.5172, "step": 14301 }, { "epoch": 0.91, "grad_norm": 1.0756049156188965, "learning_rate": 2.2942262776178392e-07, "loss": 0.5255, "step": 14302 }, { "epoch": 0.91, "grad_norm": 0.9936608672142029, "learning_rate": 2.291155057693878e-07, "loss": 0.503, "step": 14303 }, { "epoch": 0.91, "grad_norm": 1.1403104066848755, "learning_rate": 2.2880858466234114e-07, "loss": 0.505, "step": 14304 }, { "epoch": 0.91, "grad_norm": 1.0118169784545898, "learning_rate": 2.2850186445356693e-07, "loss": 0.5185, "step": 14305 }, { "epoch": 0.91, "grad_norm": 0.9938391447067261, "learning_rate": 2.281953451559804e-07, "loss": 0.4619, "step": 14306 }, { "epoch": 0.91, "grad_norm": 1.087198257446289, "learning_rate": 2.2788902678248904e-07, "loss": 0.5247, "step": 14307 }, { "epoch": 0.91, "grad_norm": 1.039725661277771, "learning_rate": 2.2758290934598805e-07, "loss": 0.5071, "step": 14308 }, { "epoch": 0.91, "grad_norm": 1.034071445465088, "learning_rate": 2.2727699285937043e-07, "loss": 0.4666, "step": 14309 }, { "epoch": 0.91, "grad_norm": 1.218772530555725, "learning_rate": 2.2697127733551483e-07, "loss": 0.5297, "step": 14310 }, { "epoch": 0.91, "grad_norm": 1.0864248275756836, "learning_rate": 2.2666576278729424e-07, "loss": 0.5342, "step": 14311 }, { "epoch": 0.91, "grad_norm": 1.0638132095336914, "learning_rate": 2.2636044922757339e-07, "loss": 0.5038, "step": 14312 }, { "epoch": 0.91, "grad_norm": 1.1192634105682373, "learning_rate": 2.2605533666920753e-07, "loss": 0.5212, "step": 14313 }, { "epoch": 0.91, "grad_norm": 1.1108735799789429, "learning_rate": 2.257504251250442e-07, "loss": 0.4626, "step": 14314 }, { "epoch": 0.91, "grad_norm": 1.0333267450332642, "learning_rate": 2.2544571460792308e-07, "loss": 0.5094, "step": 14315 }, { "epoch": 0.91, "grad_norm": 1.0197890996932983, "learning_rate": 2.251412051306717e-07, "loss": 0.4993, "step": 14316 }, { "epoch": 0.91, "grad_norm": 1.036181092262268, "learning_rate": 2.2483689670611542e-07, "loss": 0.5418, "step": 14317 }, { "epoch": 0.91, "grad_norm": 1.1110544204711914, "learning_rate": 2.2453278934706446e-07, "loss": 0.5396, "step": 14318 }, { "epoch": 0.91, "grad_norm": 1.0619498491287231, "learning_rate": 2.2422888306632584e-07, "loss": 0.4909, "step": 14319 }, { "epoch": 0.91, "grad_norm": 1.028778314590454, "learning_rate": 2.2392517787669487e-07, "loss": 0.4499, "step": 14320 }, { "epoch": 0.91, "grad_norm": 1.1261348724365234, "learning_rate": 2.2362167379096023e-07, "loss": 0.5234, "step": 14321 }, { "epoch": 0.91, "grad_norm": 1.000189185142517, "learning_rate": 2.2331837082190056e-07, "loss": 0.4514, "step": 14322 }, { "epoch": 0.91, "grad_norm": 1.0539196729660034, "learning_rate": 2.2301526898228842e-07, "loss": 0.4784, "step": 14323 }, { "epoch": 0.91, "grad_norm": 1.0466829538345337, "learning_rate": 2.2271236828488474e-07, "loss": 0.5106, "step": 14324 }, { "epoch": 0.91, "grad_norm": 1.0722671747207642, "learning_rate": 2.224096687424443e-07, "loss": 0.498, "step": 14325 }, { "epoch": 0.91, "grad_norm": 1.0605310201644897, "learning_rate": 2.2210717036771246e-07, "loss": 0.4761, "step": 14326 }, { "epoch": 0.91, "grad_norm": 1.1165287494659424, "learning_rate": 2.218048731734268e-07, "loss": 0.5357, "step": 14327 }, { "epoch": 0.91, "grad_norm": 0.9855444431304932, "learning_rate": 2.215027771723155e-07, "loss": 0.4909, "step": 14328 }, { "epoch": 0.91, "grad_norm": 1.0161094665527344, "learning_rate": 2.2120088237709946e-07, "loss": 0.4939, "step": 14329 }, { "epoch": 0.91, "grad_norm": 1.1264501810073853, "learning_rate": 2.2089918880049023e-07, "loss": 0.4765, "step": 14330 }, { "epoch": 0.91, "grad_norm": 1.0361504554748535, "learning_rate": 2.205976964551909e-07, "loss": 0.4931, "step": 14331 }, { "epoch": 0.91, "grad_norm": 0.9984549880027771, "learning_rate": 2.2029640535389586e-07, "loss": 0.4754, "step": 14332 }, { "epoch": 0.91, "grad_norm": 1.0490080118179321, "learning_rate": 2.1999531550929098e-07, "loss": 0.5058, "step": 14333 }, { "epoch": 0.91, "grad_norm": 0.9620938897132874, "learning_rate": 2.1969442693405673e-07, "loss": 0.4717, "step": 14334 }, { "epoch": 0.91, "grad_norm": 1.0211260318756104, "learning_rate": 2.1939373964085964e-07, "loss": 0.5389, "step": 14335 }, { "epoch": 0.91, "grad_norm": 1.0039058923721313, "learning_rate": 2.190932536423618e-07, "loss": 0.487, "step": 14336 }, { "epoch": 0.91, "grad_norm": 1.1592278480529785, "learning_rate": 2.1879296895121637e-07, "loss": 0.5299, "step": 14337 }, { "epoch": 0.91, "grad_norm": 1.0102602243423462, "learning_rate": 2.1849288558006442e-07, "loss": 0.502, "step": 14338 }, { "epoch": 0.91, "grad_norm": 1.131880521774292, "learning_rate": 2.1819300354154526e-07, "loss": 0.5316, "step": 14339 }, { "epoch": 0.91, "grad_norm": 1.104852557182312, "learning_rate": 2.1789332284828323e-07, "loss": 0.5145, "step": 14340 }, { "epoch": 0.91, "grad_norm": 0.9619296193122864, "learning_rate": 2.175938435128977e-07, "loss": 0.4365, "step": 14341 }, { "epoch": 0.91, "grad_norm": 1.0772216320037842, "learning_rate": 2.1729456554799855e-07, "loss": 0.5193, "step": 14342 }, { "epoch": 0.91, "grad_norm": 1.1998577117919922, "learning_rate": 2.1699548896618795e-07, "loss": 0.5393, "step": 14343 }, { "epoch": 0.91, "grad_norm": 1.083191990852356, "learning_rate": 2.1669661378005802e-07, "loss": 0.4686, "step": 14344 }, { "epoch": 0.91, "grad_norm": 1.0519863367080688, "learning_rate": 2.1639794000219426e-07, "loss": 0.5003, "step": 14345 }, { "epoch": 0.91, "grad_norm": 1.0952211618423462, "learning_rate": 2.1609946764517108e-07, "loss": 0.5063, "step": 14346 }, { "epoch": 0.91, "grad_norm": 1.121744155883789, "learning_rate": 2.1580119672155898e-07, "loss": 0.5417, "step": 14347 }, { "epoch": 0.91, "grad_norm": 1.0885792970657349, "learning_rate": 2.1550312724391452e-07, "loss": 0.5188, "step": 14348 }, { "epoch": 0.91, "grad_norm": 1.0304995775222778, "learning_rate": 2.152052592247894e-07, "loss": 0.4922, "step": 14349 }, { "epoch": 0.91, "grad_norm": 1.0206528902053833, "learning_rate": 2.1490759267672634e-07, "loss": 0.498, "step": 14350 }, { "epoch": 0.91, "grad_norm": 1.1047496795654297, "learning_rate": 2.1461012761225696e-07, "loss": 0.5174, "step": 14351 }, { "epoch": 0.91, "grad_norm": 1.0037637948989868, "learning_rate": 2.143128640439085e-07, "loss": 0.475, "step": 14352 }, { "epoch": 0.91, "grad_norm": 1.0277127027511597, "learning_rate": 2.1401580198419812e-07, "loss": 0.475, "step": 14353 }, { "epoch": 0.91, "grad_norm": 1.0374289751052856, "learning_rate": 2.1371894144563254e-07, "loss": 0.4978, "step": 14354 }, { "epoch": 0.91, "grad_norm": 1.0003501176834106, "learning_rate": 2.1342228244071173e-07, "loss": 0.5156, "step": 14355 }, { "epoch": 0.91, "grad_norm": 1.0034006834030151, "learning_rate": 2.1312582498192792e-07, "loss": 0.4977, "step": 14356 }, { "epoch": 0.91, "grad_norm": 1.0414868593215942, "learning_rate": 2.1282956908176277e-07, "loss": 0.5104, "step": 14357 }, { "epoch": 0.91, "grad_norm": 1.0152901411056519, "learning_rate": 2.125335147526919e-07, "loss": 0.4949, "step": 14358 }, { "epoch": 0.91, "grad_norm": 1.0467476844787598, "learning_rate": 2.122376620071792e-07, "loss": 0.531, "step": 14359 }, { "epoch": 0.91, "grad_norm": 1.0745909214019775, "learning_rate": 2.1194201085768363e-07, "loss": 0.478, "step": 14360 }, { "epoch": 0.91, "grad_norm": 1.0887025594711304, "learning_rate": 2.1164656131665407e-07, "loss": 0.4818, "step": 14361 }, { "epoch": 0.91, "grad_norm": 1.028891921043396, "learning_rate": 2.1135131339652947e-07, "loss": 0.5066, "step": 14362 }, { "epoch": 0.91, "grad_norm": 1.0102150440216064, "learning_rate": 2.1105626710974325e-07, "loss": 0.4772, "step": 14363 }, { "epoch": 0.91, "grad_norm": 1.108974814414978, "learning_rate": 2.1076142246871766e-07, "loss": 0.4554, "step": 14364 }, { "epoch": 0.91, "grad_norm": 1.0432991981506348, "learning_rate": 2.1046677948586836e-07, "loss": 0.4985, "step": 14365 }, { "epoch": 0.91, "grad_norm": 1.0474931001663208, "learning_rate": 2.1017233817360149e-07, "loss": 0.499, "step": 14366 }, { "epoch": 0.91, "grad_norm": 1.1366848945617676, "learning_rate": 2.098780985443144e-07, "loss": 0.5547, "step": 14367 }, { "epoch": 0.91, "grad_norm": 1.0178462266921997, "learning_rate": 2.095840606103966e-07, "loss": 0.4721, "step": 14368 }, { "epoch": 0.91, "grad_norm": 1.0264699459075928, "learning_rate": 2.092902243842304e-07, "loss": 0.4803, "step": 14369 }, { "epoch": 0.91, "grad_norm": 1.2258989810943604, "learning_rate": 2.0899658987818705e-07, "loss": 0.496, "step": 14370 }, { "epoch": 0.91, "grad_norm": 1.1892192363739014, "learning_rate": 2.0870315710462996e-07, "loss": 0.5231, "step": 14371 }, { "epoch": 0.91, "grad_norm": 1.0772793292999268, "learning_rate": 2.0840992607591593e-07, "loss": 0.4884, "step": 14372 }, { "epoch": 0.91, "grad_norm": 1.1337677240371704, "learning_rate": 2.081168968043906e-07, "loss": 0.5228, "step": 14373 }, { "epoch": 0.91, "grad_norm": 1.0992305278778076, "learning_rate": 2.0782406930239363e-07, "loss": 0.5369, "step": 14374 }, { "epoch": 0.91, "grad_norm": 1.0421539545059204, "learning_rate": 2.0753144358225397e-07, "loss": 0.5005, "step": 14375 }, { "epoch": 0.91, "grad_norm": 1.1078789234161377, "learning_rate": 2.072390196562929e-07, "loss": 0.5082, "step": 14376 }, { "epoch": 0.91, "grad_norm": 1.0897104740142822, "learning_rate": 2.0694679753682445e-07, "loss": 0.4868, "step": 14377 }, { "epoch": 0.91, "grad_norm": 1.0101878643035889, "learning_rate": 2.0665477723615268e-07, "loss": 0.4782, "step": 14378 }, { "epoch": 0.91, "grad_norm": 1.0218583345413208, "learning_rate": 2.063629587665733e-07, "loss": 0.4867, "step": 14379 }, { "epoch": 0.91, "grad_norm": 1.0644055604934692, "learning_rate": 2.0607134214037373e-07, "loss": 0.4456, "step": 14380 }, { "epoch": 0.91, "grad_norm": 0.9680347442626953, "learning_rate": 2.05779927369833e-07, "loss": 0.4666, "step": 14381 }, { "epoch": 0.91, "grad_norm": 1.0135297775268555, "learning_rate": 2.054887144672224e-07, "loss": 0.4552, "step": 14382 }, { "epoch": 0.91, "grad_norm": 0.971634566783905, "learning_rate": 2.0519770344480272e-07, "loss": 0.4987, "step": 14383 }, { "epoch": 0.91, "grad_norm": 1.0475090742111206, "learning_rate": 2.0490689431482746e-07, "loss": 0.4633, "step": 14384 }, { "epoch": 0.91, "grad_norm": 1.0539919137954712, "learning_rate": 2.0461628708954183e-07, "loss": 0.5522, "step": 14385 }, { "epoch": 0.91, "grad_norm": 1.0730996131896973, "learning_rate": 2.0432588178118274e-07, "loss": 0.5039, "step": 14386 }, { "epoch": 0.91, "grad_norm": 0.9802207350730896, "learning_rate": 2.0403567840197813e-07, "loss": 0.4511, "step": 14387 }, { "epoch": 0.91, "grad_norm": 1.1255625486373901, "learning_rate": 2.0374567696414716e-07, "loss": 0.4906, "step": 14388 }, { "epoch": 0.91, "grad_norm": 1.1176317930221558, "learning_rate": 2.0345587747990004e-07, "loss": 0.5227, "step": 14389 }, { "epoch": 0.91, "grad_norm": 1.0412148237228394, "learning_rate": 2.0316627996144035e-07, "loss": 0.4668, "step": 14390 }, { "epoch": 0.91, "grad_norm": 1.1022497415542603, "learning_rate": 2.028768844209622e-07, "loss": 0.5007, "step": 14391 }, { "epoch": 0.91, "grad_norm": 1.1177451610565186, "learning_rate": 2.0258769087065034e-07, "loss": 0.5445, "step": 14392 }, { "epoch": 0.91, "grad_norm": 1.0330328941345215, "learning_rate": 2.022986993226811e-07, "loss": 0.5068, "step": 14393 }, { "epoch": 0.91, "grad_norm": 0.9648503661155701, "learning_rate": 2.020099097892242e-07, "loss": 0.4734, "step": 14394 }, { "epoch": 0.91, "grad_norm": 1.0981762409210205, "learning_rate": 2.0172132228243878e-07, "loss": 0.5329, "step": 14395 }, { "epoch": 0.91, "grad_norm": 1.131181001663208, "learning_rate": 2.014329368144774e-07, "loss": 0.479, "step": 14396 }, { "epoch": 0.91, "grad_norm": 1.1691778898239136, "learning_rate": 2.0114475339748085e-07, "loss": 0.527, "step": 14397 }, { "epoch": 0.91, "grad_norm": 1.0281850099563599, "learning_rate": 2.0085677204358445e-07, "loss": 0.4742, "step": 14398 }, { "epoch": 0.91, "grad_norm": 1.0565160512924194, "learning_rate": 2.005689927649157e-07, "loss": 0.5312, "step": 14399 }, { "epoch": 0.91, "grad_norm": 0.9973809123039246, "learning_rate": 2.0028141557358992e-07, "loss": 0.5035, "step": 14400 }, { "epoch": 0.91, "grad_norm": 1.0344796180725098, "learning_rate": 1.999940404817169e-07, "loss": 0.5028, "step": 14401 }, { "epoch": 0.91, "grad_norm": 1.1034725904464722, "learning_rate": 1.9970686750139633e-07, "loss": 0.4856, "step": 14402 }, { "epoch": 0.91, "grad_norm": 1.039278268814087, "learning_rate": 1.994198966447214e-07, "loss": 0.5301, "step": 14403 }, { "epoch": 0.91, "grad_norm": 1.0402063131332397, "learning_rate": 1.991331279237746e-07, "loss": 0.5286, "step": 14404 }, { "epoch": 0.91, "grad_norm": 1.0492223501205444, "learning_rate": 1.988465613506302e-07, "loss": 0.4573, "step": 14405 }, { "epoch": 0.91, "grad_norm": 1.086842656135559, "learning_rate": 1.9856019693735463e-07, "loss": 0.5222, "step": 14406 }, { "epoch": 0.91, "grad_norm": 1.0863054990768433, "learning_rate": 1.982740346960077e-07, "loss": 0.5254, "step": 14407 }, { "epoch": 0.91, "grad_norm": 1.1995936632156372, "learning_rate": 1.9798807463863589e-07, "loss": 0.5319, "step": 14408 }, { "epoch": 0.91, "grad_norm": 1.0130900144577026, "learning_rate": 1.977023167772818e-07, "loss": 0.4829, "step": 14409 }, { "epoch": 0.91, "grad_norm": 1.0801007747650146, "learning_rate": 1.9741676112397688e-07, "loss": 0.4629, "step": 14410 }, { "epoch": 0.91, "grad_norm": 1.0296639204025269, "learning_rate": 1.9713140769074546e-07, "loss": 0.5006, "step": 14411 }, { "epoch": 0.91, "grad_norm": 1.0868396759033203, "learning_rate": 1.9684625648960287e-07, "loss": 0.4896, "step": 14412 }, { "epoch": 0.91, "grad_norm": 1.0036476850509644, "learning_rate": 1.965613075325551e-07, "loss": 0.4749, "step": 14413 }, { "epoch": 0.91, "grad_norm": 1.0925575494766235, "learning_rate": 1.9627656083160085e-07, "loss": 0.526, "step": 14414 }, { "epoch": 0.91, "grad_norm": 1.0652610063552856, "learning_rate": 1.9599201639872943e-07, "loss": 0.4753, "step": 14415 }, { "epoch": 0.91, "grad_norm": 1.0112617015838623, "learning_rate": 1.9570767424592186e-07, "loss": 0.456, "step": 14416 }, { "epoch": 0.91, "grad_norm": 1.0837966203689575, "learning_rate": 1.9542353438515183e-07, "loss": 0.4816, "step": 14417 }, { "epoch": 0.91, "grad_norm": 1.0132328271865845, "learning_rate": 1.9513959682838314e-07, "loss": 0.5037, "step": 14418 }, { "epoch": 0.91, "grad_norm": 1.0372248888015747, "learning_rate": 1.9485586158757009e-07, "loss": 0.5072, "step": 14419 }, { "epoch": 0.91, "grad_norm": 1.2160942554473877, "learning_rate": 1.9457232867466204e-07, "loss": 0.535, "step": 14420 }, { "epoch": 0.91, "grad_norm": 1.1435277462005615, "learning_rate": 1.9428899810159606e-07, "loss": 0.5158, "step": 14421 }, { "epoch": 0.91, "grad_norm": 1.1938700675964355, "learning_rate": 1.9400586988030212e-07, "loss": 0.5031, "step": 14422 }, { "epoch": 0.91, "grad_norm": 1.0440253019332886, "learning_rate": 1.937229440227023e-07, "loss": 0.5007, "step": 14423 }, { "epoch": 0.91, "grad_norm": 0.9880737662315369, "learning_rate": 1.9344022054070933e-07, "loss": 0.4593, "step": 14424 }, { "epoch": 0.91, "grad_norm": 0.9918844699859619, "learning_rate": 1.9315769944622808e-07, "loss": 0.4999, "step": 14425 }, { "epoch": 0.91, "grad_norm": 1.0597563982009888, "learning_rate": 1.9287538075115463e-07, "loss": 0.5231, "step": 14426 }, { "epoch": 0.91, "grad_norm": 1.0294430255889893, "learning_rate": 1.9259326446737503e-07, "loss": 0.4994, "step": 14427 }, { "epoch": 0.91, "grad_norm": 1.0238746404647827, "learning_rate": 1.9231135060677087e-07, "loss": 0.5056, "step": 14428 }, { "epoch": 0.91, "grad_norm": 1.1582118272781372, "learning_rate": 1.9202963918120988e-07, "loss": 0.5307, "step": 14429 }, { "epoch": 0.91, "grad_norm": 0.9513401389122009, "learning_rate": 1.9174813020255533e-07, "loss": 0.5188, "step": 14430 }, { "epoch": 0.91, "grad_norm": 0.9851300120353699, "learning_rate": 1.9146682368266112e-07, "loss": 0.4879, "step": 14431 }, { "epoch": 0.91, "grad_norm": 1.0757297277450562, "learning_rate": 1.9118571963336996e-07, "loss": 0.5066, "step": 14432 }, { "epoch": 0.91, "grad_norm": 1.122973084449768, "learning_rate": 1.9090481806652017e-07, "loss": 0.5234, "step": 14433 }, { "epoch": 0.91, "grad_norm": 1.0094428062438965, "learning_rate": 1.9062411899393896e-07, "loss": 0.4785, "step": 14434 }, { "epoch": 0.91, "grad_norm": 1.0200306177139282, "learning_rate": 1.9034362242744576e-07, "loss": 0.4915, "step": 14435 }, { "epoch": 0.91, "grad_norm": 1.0503755807876587, "learning_rate": 1.9006332837885054e-07, "loss": 0.5085, "step": 14436 }, { "epoch": 0.91, "grad_norm": 1.1125425100326538, "learning_rate": 1.8978323685995558e-07, "loss": 0.5781, "step": 14437 }, { "epoch": 0.91, "grad_norm": 1.0734832286834717, "learning_rate": 1.8950334788255586e-07, "loss": 0.4952, "step": 14438 }, { "epoch": 0.91, "grad_norm": 1.0181453227996826, "learning_rate": 1.8922366145843585e-07, "loss": 0.4679, "step": 14439 }, { "epoch": 0.91, "grad_norm": 0.951262354850769, "learning_rate": 1.8894417759937055e-07, "loss": 0.4909, "step": 14440 }, { "epoch": 0.91, "grad_norm": 1.0891555547714233, "learning_rate": 1.886648963171306e-07, "loss": 0.4835, "step": 14441 }, { "epoch": 0.91, "grad_norm": 1.052882194519043, "learning_rate": 1.8838581762347485e-07, "loss": 0.4943, "step": 14442 }, { "epoch": 0.92, "grad_norm": 1.1120226383209229, "learning_rate": 1.881069415301534e-07, "loss": 0.4715, "step": 14443 }, { "epoch": 0.92, "grad_norm": 1.1016628742218018, "learning_rate": 1.8782826804890908e-07, "loss": 0.5361, "step": 14444 }, { "epoch": 0.92, "grad_norm": 1.0507054328918457, "learning_rate": 1.875497971914758e-07, "loss": 0.5833, "step": 14445 }, { "epoch": 0.92, "grad_norm": 1.0540279150009155, "learning_rate": 1.872715289695798e-07, "loss": 0.4896, "step": 14446 }, { "epoch": 0.92, "grad_norm": 0.9839321970939636, "learning_rate": 1.8699346339493774e-07, "loss": 0.5175, "step": 14447 }, { "epoch": 0.92, "grad_norm": 1.0354623794555664, "learning_rate": 1.867156004792575e-07, "loss": 0.505, "step": 14448 }, { "epoch": 0.92, "grad_norm": 1.145330786705017, "learning_rate": 1.864379402342381e-07, "loss": 0.4834, "step": 14449 }, { "epoch": 0.92, "grad_norm": 1.0762995481491089, "learning_rate": 1.8616048267157348e-07, "loss": 0.5029, "step": 14450 }, { "epoch": 0.92, "grad_norm": 1.158921480178833, "learning_rate": 1.8588322780294377e-07, "loss": 0.4992, "step": 14451 }, { "epoch": 0.92, "grad_norm": 1.077999472618103, "learning_rate": 1.8560617564002458e-07, "loss": 0.4996, "step": 14452 }, { "epoch": 0.92, "grad_norm": 1.1388030052185059, "learning_rate": 1.8532932619448106e-07, "loss": 0.4947, "step": 14453 }, { "epoch": 0.92, "grad_norm": 1.0563603639602661, "learning_rate": 1.8505267947797056e-07, "loss": 0.4851, "step": 14454 }, { "epoch": 0.92, "grad_norm": 1.1018091440200806, "learning_rate": 1.847762355021421e-07, "loss": 0.4919, "step": 14455 }, { "epoch": 0.92, "grad_norm": 1.133117914199829, "learning_rate": 1.8449999427863575e-07, "loss": 0.4967, "step": 14456 }, { "epoch": 0.92, "grad_norm": 1.0800427198410034, "learning_rate": 1.842239558190817e-07, "loss": 0.5022, "step": 14457 }, { "epoch": 0.92, "grad_norm": 1.0592983961105347, "learning_rate": 1.839481201351051e-07, "loss": 0.5063, "step": 14458 }, { "epoch": 0.92, "grad_norm": 1.0915387868881226, "learning_rate": 1.8367248723831889e-07, "loss": 0.5533, "step": 14459 }, { "epoch": 0.92, "grad_norm": 1.020050048828125, "learning_rate": 1.833970571403293e-07, "loss": 0.475, "step": 14460 }, { "epoch": 0.92, "grad_norm": 1.0217317342758179, "learning_rate": 1.831218298527343e-07, "loss": 0.496, "step": 14461 }, { "epoch": 0.92, "grad_norm": 1.089906096458435, "learning_rate": 1.828468053871213e-07, "loss": 0.5353, "step": 14462 }, { "epoch": 0.92, "grad_norm": 1.0170221328735352, "learning_rate": 1.825719837550727e-07, "loss": 0.4988, "step": 14463 }, { "epoch": 0.92, "grad_norm": 1.0263185501098633, "learning_rate": 1.822973649681592e-07, "loss": 0.4907, "step": 14464 }, { "epoch": 0.92, "grad_norm": 1.040390133857727, "learning_rate": 1.820229490379438e-07, "loss": 0.4174, "step": 14465 }, { "epoch": 0.92, "grad_norm": 1.092755675315857, "learning_rate": 1.8174873597598176e-07, "loss": 0.4812, "step": 14466 }, { "epoch": 0.92, "grad_norm": 1.0939713716506958, "learning_rate": 1.814747257938182e-07, "loss": 0.5367, "step": 14467 }, { "epoch": 0.92, "grad_norm": 1.0125434398651123, "learning_rate": 1.8120091850299225e-07, "loss": 0.5075, "step": 14468 }, { "epoch": 0.92, "grad_norm": 1.0190476179122925, "learning_rate": 1.809273141150325e-07, "loss": 0.4582, "step": 14469 }, { "epoch": 0.92, "grad_norm": 1.0586004257202148, "learning_rate": 1.8065391264145805e-07, "loss": 0.4978, "step": 14470 }, { "epoch": 0.92, "grad_norm": 1.0722798109054565, "learning_rate": 1.8038071409378299e-07, "loss": 0.5564, "step": 14471 }, { "epoch": 0.92, "grad_norm": 1.0449334383010864, "learning_rate": 1.8010771848350983e-07, "loss": 0.5336, "step": 14472 }, { "epoch": 0.92, "grad_norm": 1.0054998397827148, "learning_rate": 1.7983492582213324e-07, "loss": 0.502, "step": 14473 }, { "epoch": 0.92, "grad_norm": 1.1076698303222656, "learning_rate": 1.7956233612114017e-07, "loss": 0.4899, "step": 14474 }, { "epoch": 0.92, "grad_norm": 1.04656183719635, "learning_rate": 1.792899493920075e-07, "loss": 0.4579, "step": 14475 }, { "epoch": 0.92, "grad_norm": 1.1003092527389526, "learning_rate": 1.79017765646205e-07, "loss": 0.5395, "step": 14476 }, { "epoch": 0.92, "grad_norm": 0.9659669399261475, "learning_rate": 1.78745784895194e-07, "loss": 0.4773, "step": 14477 }, { "epoch": 0.92, "grad_norm": 1.1086894273757935, "learning_rate": 1.7847400715042594e-07, "loss": 0.5042, "step": 14478 }, { "epoch": 0.92, "grad_norm": 1.077738881111145, "learning_rate": 1.7820243242334334e-07, "loss": 0.5238, "step": 14479 }, { "epoch": 0.92, "grad_norm": 1.0929759740829468, "learning_rate": 1.7793106072538423e-07, "loss": 0.496, "step": 14480 }, { "epoch": 0.92, "grad_norm": 1.0933939218521118, "learning_rate": 1.7765989206797285e-07, "loss": 0.525, "step": 14481 }, { "epoch": 0.92, "grad_norm": 1.2027020454406738, "learning_rate": 1.7738892646252726e-07, "loss": 0.5044, "step": 14482 }, { "epoch": 0.92, "grad_norm": 1.0866278409957886, "learning_rate": 1.7711816392045778e-07, "loss": 0.51, "step": 14483 }, { "epoch": 0.92, "grad_norm": 1.0649842023849487, "learning_rate": 1.7684760445316418e-07, "loss": 0.4774, "step": 14484 }, { "epoch": 0.92, "grad_norm": 1.0627365112304688, "learning_rate": 1.765772480720407e-07, "loss": 0.4744, "step": 14485 }, { "epoch": 0.92, "grad_norm": 1.0996586084365845, "learning_rate": 1.763070947884693e-07, "loss": 0.5037, "step": 14486 }, { "epoch": 0.92, "grad_norm": 1.0557044744491577, "learning_rate": 1.7603714461382481e-07, "loss": 0.5296, "step": 14487 }, { "epoch": 0.92, "grad_norm": 0.9884737133979797, "learning_rate": 1.7576739755947593e-07, "loss": 0.4505, "step": 14488 }, { "epoch": 0.92, "grad_norm": 0.9897318482398987, "learning_rate": 1.7549785363677906e-07, "loss": 0.4991, "step": 14489 }, { "epoch": 0.92, "grad_norm": 1.0085302591323853, "learning_rate": 1.7522851285708465e-07, "loss": 0.5155, "step": 14490 }, { "epoch": 0.92, "grad_norm": 1.0740585327148438, "learning_rate": 1.7495937523173356e-07, "loss": 0.4794, "step": 14491 }, { "epoch": 0.92, "grad_norm": 1.0635310411453247, "learning_rate": 1.7469044077205732e-07, "loss": 0.4955, "step": 14492 }, { "epoch": 0.92, "grad_norm": 1.020662546157837, "learning_rate": 1.744217094893813e-07, "loss": 0.5138, "step": 14493 }, { "epoch": 0.92, "grad_norm": 1.0481855869293213, "learning_rate": 1.7415318139502036e-07, "loss": 0.4705, "step": 14494 }, { "epoch": 0.92, "grad_norm": 1.105790376663208, "learning_rate": 1.7388485650028043e-07, "loss": 0.5045, "step": 14495 }, { "epoch": 0.92, "grad_norm": 1.1326957941055298, "learning_rate": 1.7361673481646025e-07, "loss": 0.5019, "step": 14496 }, { "epoch": 0.92, "grad_norm": 1.2094613313674927, "learning_rate": 1.7334881635485023e-07, "loss": 0.492, "step": 14497 }, { "epoch": 0.92, "grad_norm": 1.1281813383102417, "learning_rate": 1.7308110112673027e-07, "loss": 0.5219, "step": 14498 }, { "epoch": 0.92, "grad_norm": 1.0920004844665527, "learning_rate": 1.7281358914337408e-07, "loss": 0.5141, "step": 14499 }, { "epoch": 0.92, "grad_norm": 1.1044858694076538, "learning_rate": 1.7254628041604437e-07, "loss": 0.5035, "step": 14500 }, { "epoch": 0.92, "grad_norm": 1.0102866888046265, "learning_rate": 1.7227917495599823e-07, "loss": 0.5135, "step": 14501 }, { "epoch": 0.92, "grad_norm": 1.2572340965270996, "learning_rate": 1.7201227277448108e-07, "loss": 0.6066, "step": 14502 }, { "epoch": 0.92, "grad_norm": 1.1052078008651733, "learning_rate": 1.7174557388273173e-07, "loss": 0.5146, "step": 14503 }, { "epoch": 0.92, "grad_norm": 1.1050742864608765, "learning_rate": 1.7147907829198008e-07, "loss": 0.4964, "step": 14504 }, { "epoch": 0.92, "grad_norm": 1.1257394552230835, "learning_rate": 1.7121278601344715e-07, "loss": 0.5344, "step": 14505 }, { "epoch": 0.92, "grad_norm": 1.069999098777771, "learning_rate": 1.7094669705834566e-07, "loss": 0.501, "step": 14506 }, { "epoch": 0.92, "grad_norm": 1.023105502128601, "learning_rate": 1.706808114378805e-07, "loss": 0.4674, "step": 14507 }, { "epoch": 0.92, "grad_norm": 1.0694199800491333, "learning_rate": 1.7041512916324554e-07, "loss": 0.4259, "step": 14508 }, { "epoch": 0.92, "grad_norm": 0.9969576001167297, "learning_rate": 1.7014965024562846e-07, "loss": 0.4873, "step": 14509 }, { "epoch": 0.92, "grad_norm": 1.0783843994140625, "learning_rate": 1.698843746962081e-07, "loss": 0.5172, "step": 14510 }, { "epoch": 0.92, "grad_norm": 1.0928618907928467, "learning_rate": 1.6961930252615388e-07, "loss": 0.4765, "step": 14511 }, { "epoch": 0.92, "grad_norm": 1.0058009624481201, "learning_rate": 1.6935443374662741e-07, "loss": 0.4716, "step": 14512 }, { "epoch": 0.92, "grad_norm": 0.999502956867218, "learning_rate": 1.6908976836878088e-07, "loss": 0.4922, "step": 14513 }, { "epoch": 0.92, "grad_norm": 1.1754869222640991, "learning_rate": 1.6882530640375872e-07, "loss": 0.4827, "step": 14514 }, { "epoch": 0.92, "grad_norm": 1.1096594333648682, "learning_rate": 1.68561047862697e-07, "loss": 0.4981, "step": 14515 }, { "epoch": 0.92, "grad_norm": 2.6716039180755615, "learning_rate": 1.6829699275672186e-07, "loss": 0.5013, "step": 14516 }, { "epoch": 0.92, "grad_norm": 1.1831146478652954, "learning_rate": 1.6803314109695157e-07, "loss": 0.5198, "step": 14517 }, { "epoch": 0.92, "grad_norm": 1.009282112121582, "learning_rate": 1.677694928944973e-07, "loss": 0.5035, "step": 14518 }, { "epoch": 0.92, "grad_norm": 1.0758283138275146, "learning_rate": 1.6750604816045902e-07, "loss": 0.4773, "step": 14519 }, { "epoch": 0.92, "grad_norm": 1.0731827020645142, "learning_rate": 1.6724280690593008e-07, "loss": 0.506, "step": 14520 }, { "epoch": 0.92, "grad_norm": 1.0044021606445312, "learning_rate": 1.6697976914199497e-07, "loss": 0.4654, "step": 14521 }, { "epoch": 0.92, "grad_norm": 1.0994043350219727, "learning_rate": 1.6671693487972818e-07, "loss": 0.4649, "step": 14522 }, { "epoch": 0.92, "grad_norm": 1.109891653060913, "learning_rate": 1.6645430413019858e-07, "loss": 0.5406, "step": 14523 }, { "epoch": 0.92, "grad_norm": 1.03861403465271, "learning_rate": 1.6619187690446293e-07, "loss": 0.5204, "step": 14524 }, { "epoch": 0.92, "grad_norm": 1.0034364461898804, "learning_rate": 1.659296532135718e-07, "loss": 0.4742, "step": 14525 }, { "epoch": 0.92, "grad_norm": 1.1274032592773438, "learning_rate": 1.6566763306856638e-07, "loss": 0.5234, "step": 14526 }, { "epoch": 0.92, "grad_norm": 1.0752556324005127, "learning_rate": 1.6540581648048003e-07, "loss": 0.5089, "step": 14527 }, { "epoch": 0.92, "grad_norm": 1.0327403545379639, "learning_rate": 1.6514420346033565e-07, "loss": 0.5078, "step": 14528 }, { "epoch": 0.92, "grad_norm": 1.0728938579559326, "learning_rate": 1.6488279401915052e-07, "loss": 0.5332, "step": 14529 }, { "epoch": 0.92, "grad_norm": 1.0218722820281982, "learning_rate": 1.6462158816792973e-07, "loss": 0.5185, "step": 14530 }, { "epoch": 0.92, "grad_norm": 1.051649808883667, "learning_rate": 1.643605859176739e-07, "loss": 0.4676, "step": 14531 }, { "epoch": 0.92, "grad_norm": 0.9655593633651733, "learning_rate": 1.6409978727937094e-07, "loss": 0.4628, "step": 14532 }, { "epoch": 0.92, "grad_norm": 1.1105687618255615, "learning_rate": 1.6383919226400368e-07, "loss": 0.5038, "step": 14533 }, { "epoch": 0.92, "grad_norm": 1.0202934741973877, "learning_rate": 1.6357880088254396e-07, "loss": 0.4905, "step": 14534 }, { "epoch": 0.92, "grad_norm": 1.1168105602264404, "learning_rate": 1.633186131459563e-07, "loss": 0.4577, "step": 14535 }, { "epoch": 0.92, "grad_norm": 1.1130887269973755, "learning_rate": 1.6305862906519587e-07, "loss": 0.4963, "step": 14536 }, { "epoch": 0.92, "grad_norm": 1.187774658203125, "learning_rate": 1.6279884865121108e-07, "loss": 0.5196, "step": 14537 }, { "epoch": 0.92, "grad_norm": 1.1031478643417358, "learning_rate": 1.6253927191493879e-07, "loss": 0.5342, "step": 14538 }, { "epoch": 0.92, "grad_norm": 0.9925063848495483, "learning_rate": 1.622798988673091e-07, "loss": 0.4971, "step": 14539 }, { "epoch": 0.92, "grad_norm": 1.1163438558578491, "learning_rate": 1.6202072951924386e-07, "loss": 0.4807, "step": 14540 }, { "epoch": 0.92, "grad_norm": 1.0186446905136108, "learning_rate": 1.6176176388165598e-07, "loss": 0.5142, "step": 14541 }, { "epoch": 0.92, "grad_norm": 0.9980571866035461, "learning_rate": 1.6150300196544955e-07, "loss": 0.4519, "step": 14542 }, { "epoch": 0.92, "grad_norm": 1.1521648168563843, "learning_rate": 1.612444437815186e-07, "loss": 0.4882, "step": 14543 }, { "epoch": 0.92, "grad_norm": 0.9618294835090637, "learning_rate": 1.6098608934075166e-07, "loss": 0.4419, "step": 14544 }, { "epoch": 0.92, "grad_norm": 1.0831882953643799, "learning_rate": 1.607279386540278e-07, "loss": 0.531, "step": 14545 }, { "epoch": 0.92, "grad_norm": 1.0838994979858398, "learning_rate": 1.60469991732215e-07, "loss": 0.4938, "step": 14546 }, { "epoch": 0.92, "grad_norm": 1.1143907308578491, "learning_rate": 1.6021224858617513e-07, "loss": 0.4969, "step": 14547 }, { "epoch": 0.92, "grad_norm": 0.9926535487174988, "learning_rate": 1.5995470922676116e-07, "loss": 0.5519, "step": 14548 }, { "epoch": 0.92, "grad_norm": 1.0101168155670166, "learning_rate": 1.5969737366481774e-07, "loss": 0.4652, "step": 14549 }, { "epoch": 0.92, "grad_norm": 1.1262658834457397, "learning_rate": 1.5944024191117958e-07, "loss": 0.5355, "step": 14550 }, { "epoch": 0.92, "grad_norm": 1.187333106994629, "learning_rate": 1.5918331397667298e-07, "loss": 0.4975, "step": 14551 }, { "epoch": 0.92, "grad_norm": 1.0564457178115845, "learning_rate": 1.589265898721176e-07, "loss": 0.4909, "step": 14552 }, { "epoch": 0.92, "grad_norm": 1.0846610069274902, "learning_rate": 1.586700696083232e-07, "loss": 0.5091, "step": 14553 }, { "epoch": 0.92, "grad_norm": 1.1003252267837524, "learning_rate": 1.5841375319608943e-07, "loss": 0.4942, "step": 14554 }, { "epoch": 0.92, "grad_norm": 1.0836139917373657, "learning_rate": 1.5815764064621043e-07, "loss": 0.4812, "step": 14555 }, { "epoch": 0.92, "grad_norm": 1.1072503328323364, "learning_rate": 1.5790173196946924e-07, "loss": 0.4757, "step": 14556 }, { "epoch": 0.92, "grad_norm": 1.140049695968628, "learning_rate": 1.5764602717664224e-07, "loss": 0.4893, "step": 14557 }, { "epoch": 0.92, "grad_norm": 1.0675603151321411, "learning_rate": 1.5739052627849581e-07, "loss": 0.4685, "step": 14558 }, { "epoch": 0.92, "grad_norm": 1.1174269914627075, "learning_rate": 1.571352292857875e-07, "loss": 0.4534, "step": 14559 }, { "epoch": 0.92, "grad_norm": 1.1924841403961182, "learning_rate": 1.5688013620926757e-07, "loss": 0.51, "step": 14560 }, { "epoch": 0.92, "grad_norm": 1.0370993614196777, "learning_rate": 1.566252470596774e-07, "loss": 0.4771, "step": 14561 }, { "epoch": 0.92, "grad_norm": 1.0299807786941528, "learning_rate": 1.5637056184774958e-07, "loss": 0.5161, "step": 14562 }, { "epoch": 0.92, "grad_norm": 1.042910099029541, "learning_rate": 1.5611608058420714e-07, "loss": 0.4819, "step": 14563 }, { "epoch": 0.92, "grad_norm": 1.017249584197998, "learning_rate": 1.5586180327976598e-07, "loss": 0.4749, "step": 14564 }, { "epoch": 0.92, "grad_norm": 1.109641194343567, "learning_rate": 1.5560772994513251e-07, "loss": 0.508, "step": 14565 }, { "epoch": 0.92, "grad_norm": 0.9985325932502747, "learning_rate": 1.55353860591006e-07, "loss": 0.4372, "step": 14566 }, { "epoch": 0.92, "grad_norm": 1.0511524677276611, "learning_rate": 1.5510019522807397e-07, "loss": 0.4739, "step": 14567 }, { "epoch": 0.92, "grad_norm": 1.0950795412063599, "learning_rate": 1.5484673386701953e-07, "loss": 0.5365, "step": 14568 }, { "epoch": 0.92, "grad_norm": 1.0569123029708862, "learning_rate": 1.545934765185131e-07, "loss": 0.4913, "step": 14569 }, { "epoch": 0.92, "grad_norm": 1.0184240341186523, "learning_rate": 1.5434042319321996e-07, "loss": 0.4783, "step": 14570 }, { "epoch": 0.92, "grad_norm": 1.0293238162994385, "learning_rate": 1.5408757390179496e-07, "loss": 0.4696, "step": 14571 }, { "epoch": 0.92, "grad_norm": 1.0004197359085083, "learning_rate": 1.5383492865488459e-07, "loss": 0.498, "step": 14572 }, { "epoch": 0.92, "grad_norm": 1.0610783100128174, "learning_rate": 1.5358248746312588e-07, "loss": 0.517, "step": 14573 }, { "epoch": 0.92, "grad_norm": 1.0828512907028198, "learning_rate": 1.533302503371503e-07, "loss": 0.4697, "step": 14574 }, { "epoch": 0.92, "grad_norm": 1.1009697914123535, "learning_rate": 1.5307821728757722e-07, "loss": 0.5079, "step": 14575 }, { "epoch": 0.92, "grad_norm": 1.1081169843673706, "learning_rate": 1.5282638832501917e-07, "loss": 0.5489, "step": 14576 }, { "epoch": 0.92, "grad_norm": 1.023945689201355, "learning_rate": 1.5257476346007938e-07, "loss": 0.4622, "step": 14577 }, { "epoch": 0.92, "grad_norm": 1.0600881576538086, "learning_rate": 1.523233427033538e-07, "loss": 0.487, "step": 14578 }, { "epoch": 0.92, "grad_norm": 1.0859774351119995, "learning_rate": 1.5207212606542786e-07, "loss": 0.5268, "step": 14579 }, { "epoch": 0.92, "grad_norm": 0.9725646376609802, "learning_rate": 1.518211135568809e-07, "loss": 0.4515, "step": 14580 }, { "epoch": 0.92, "grad_norm": 1.0840494632720947, "learning_rate": 1.5157030518828054e-07, "loss": 0.5228, "step": 14581 }, { "epoch": 0.92, "grad_norm": 1.0530489683151245, "learning_rate": 1.513197009701889e-07, "loss": 0.4621, "step": 14582 }, { "epoch": 0.92, "grad_norm": 1.0261887311935425, "learning_rate": 1.510693009131564e-07, "loss": 0.5065, "step": 14583 }, { "epoch": 0.92, "grad_norm": 1.0902878046035767, "learning_rate": 1.50819105027728e-07, "loss": 0.4306, "step": 14584 }, { "epoch": 0.92, "grad_norm": 1.032993197441101, "learning_rate": 1.5056911332443801e-07, "loss": 0.5454, "step": 14585 }, { "epoch": 0.92, "grad_norm": 1.0643889904022217, "learning_rate": 1.5031932581381247e-07, "loss": 0.524, "step": 14586 }, { "epoch": 0.92, "grad_norm": 1.0280283689498901, "learning_rate": 1.5006974250636906e-07, "loss": 0.5156, "step": 14587 }, { "epoch": 0.92, "grad_norm": 1.021931767463684, "learning_rate": 1.498203634126183e-07, "loss": 0.4612, "step": 14588 }, { "epoch": 0.92, "grad_norm": 1.01459801197052, "learning_rate": 1.4957118854305842e-07, "loss": 0.5029, "step": 14589 }, { "epoch": 0.92, "grad_norm": 1.0916061401367188, "learning_rate": 1.4932221790818268e-07, "loss": 0.5149, "step": 14590 }, { "epoch": 0.92, "grad_norm": 1.045670509338379, "learning_rate": 1.4907345151847387e-07, "loss": 0.511, "step": 14591 }, { "epoch": 0.92, "grad_norm": 1.0131471157073975, "learning_rate": 1.4882488938440688e-07, "loss": 0.4715, "step": 14592 }, { "epoch": 0.92, "grad_norm": 1.168458104133606, "learning_rate": 1.485765315164478e-07, "loss": 0.5041, "step": 14593 }, { "epoch": 0.92, "grad_norm": 1.0689053535461426, "learning_rate": 1.483283779250544e-07, "loss": 0.4845, "step": 14594 }, { "epoch": 0.92, "grad_norm": 1.2584795951843262, "learning_rate": 1.4808042862067496e-07, "loss": 0.5169, "step": 14595 }, { "epoch": 0.92, "grad_norm": 1.0628129243850708, "learning_rate": 1.4783268361375058e-07, "loss": 0.5561, "step": 14596 }, { "epoch": 0.92, "grad_norm": 1.0348163843154907, "learning_rate": 1.4758514291471238e-07, "loss": 0.4674, "step": 14597 }, { "epoch": 0.92, "grad_norm": 1.0075467824935913, "learning_rate": 1.4733780653398254e-07, "loss": 0.4785, "step": 14598 }, { "epoch": 0.92, "grad_norm": 1.1389700174331665, "learning_rate": 1.4709067448197722e-07, "loss": 0.5509, "step": 14599 }, { "epoch": 0.92, "grad_norm": 1.0486923456192017, "learning_rate": 1.4684374676910197e-07, "loss": 0.5141, "step": 14600 }, { "epoch": 0.93, "grad_norm": 1.038835883140564, "learning_rate": 1.4659702340575287e-07, "loss": 0.4996, "step": 14601 }, { "epoch": 0.93, "grad_norm": 0.9914584159851074, "learning_rate": 1.4635050440232002e-07, "loss": 0.461, "step": 14602 }, { "epoch": 0.93, "grad_norm": 1.0763351917266846, "learning_rate": 1.4610418976918172e-07, "loss": 0.5136, "step": 14603 }, { "epoch": 0.93, "grad_norm": 1.1267943382263184, "learning_rate": 1.4585807951671194e-07, "loss": 0.5065, "step": 14604 }, { "epoch": 0.93, "grad_norm": 1.0288209915161133, "learning_rate": 1.4561217365527124e-07, "loss": 0.5359, "step": 14605 }, { "epoch": 0.93, "grad_norm": 1.1225999593734741, "learning_rate": 1.453664721952147e-07, "loss": 0.4836, "step": 14606 }, { "epoch": 0.93, "grad_norm": 1.0499444007873535, "learning_rate": 1.451209751468885e-07, "loss": 0.5059, "step": 14607 }, { "epoch": 0.93, "grad_norm": 1.0559086799621582, "learning_rate": 1.448756825206288e-07, "loss": 0.4934, "step": 14608 }, { "epoch": 0.93, "grad_norm": 1.1914482116699219, "learning_rate": 1.4463059432676395e-07, "loss": 0.5341, "step": 14609 }, { "epoch": 0.93, "grad_norm": 1.074084997177124, "learning_rate": 1.4438571057561523e-07, "loss": 0.4574, "step": 14610 }, { "epoch": 0.93, "grad_norm": 1.0202916860580444, "learning_rate": 1.4414103127749157e-07, "loss": 0.4697, "step": 14611 }, { "epoch": 0.93, "grad_norm": 1.0890121459960938, "learning_rate": 1.4389655644269752e-07, "loss": 0.5635, "step": 14612 }, { "epoch": 0.93, "grad_norm": 1.1268235445022583, "learning_rate": 1.4365228608152647e-07, "loss": 0.5436, "step": 14613 }, { "epoch": 0.93, "grad_norm": 1.097791075706482, "learning_rate": 1.4340822020426304e-07, "loss": 0.5227, "step": 14614 }, { "epoch": 0.93, "grad_norm": 1.1117488145828247, "learning_rate": 1.4316435882118563e-07, "loss": 0.5534, "step": 14615 }, { "epoch": 0.93, "grad_norm": 1.0937881469726562, "learning_rate": 1.429207019425599e-07, "loss": 0.5099, "step": 14616 }, { "epoch": 0.93, "grad_norm": 1.0312176942825317, "learning_rate": 1.426772495786477e-07, "loss": 0.4859, "step": 14617 }, { "epoch": 0.93, "grad_norm": 1.1102279424667358, "learning_rate": 1.4243400173969968e-07, "loss": 0.4672, "step": 14618 }, { "epoch": 0.93, "grad_norm": 1.0215271711349487, "learning_rate": 1.4219095843595654e-07, "loss": 0.4931, "step": 14619 }, { "epoch": 0.93, "grad_norm": 1.036686658859253, "learning_rate": 1.4194811967765344e-07, "loss": 0.4831, "step": 14620 }, { "epoch": 0.93, "grad_norm": 1.077612042427063, "learning_rate": 1.417054854750155e-07, "loss": 0.4795, "step": 14621 }, { "epoch": 0.93, "grad_norm": 1.0578806400299072, "learning_rate": 1.414630558382579e-07, "loss": 0.4379, "step": 14622 }, { "epoch": 0.93, "grad_norm": 1.1002401113510132, "learning_rate": 1.4122083077759087e-07, "loss": 0.4855, "step": 14623 }, { "epoch": 0.93, "grad_norm": 1.0033938884735107, "learning_rate": 1.409788103032106e-07, "loss": 0.4872, "step": 14624 }, { "epoch": 0.93, "grad_norm": 1.0382969379425049, "learning_rate": 1.4073699442531007e-07, "loss": 0.4959, "step": 14625 }, { "epoch": 0.93, "grad_norm": 1.0288439989089966, "learning_rate": 1.4049538315407064e-07, "loss": 0.4967, "step": 14626 }, { "epoch": 0.93, "grad_norm": 1.0696370601654053, "learning_rate": 1.4025397649966577e-07, "loss": 0.4914, "step": 14627 }, { "epoch": 0.93, "grad_norm": 1.0011566877365112, "learning_rate": 1.400127744722596e-07, "loss": 0.501, "step": 14628 }, { "epoch": 0.93, "grad_norm": 1.134408950805664, "learning_rate": 1.3977177708200896e-07, "loss": 0.5081, "step": 14629 }, { "epoch": 0.93, "grad_norm": 1.0760116577148438, "learning_rate": 1.395309843390613e-07, "loss": 0.506, "step": 14630 }, { "epoch": 0.93, "grad_norm": 1.0447229146957397, "learning_rate": 1.3929039625355633e-07, "loss": 0.5096, "step": 14631 }, { "epoch": 0.93, "grad_norm": 1.1114329099655151, "learning_rate": 1.3905001283562257e-07, "loss": 0.5185, "step": 14632 }, { "epoch": 0.93, "grad_norm": 1.0503195524215698, "learning_rate": 1.3880983409538252e-07, "loss": 0.4906, "step": 14633 }, { "epoch": 0.93, "grad_norm": 1.1024737358093262, "learning_rate": 1.3856986004295082e-07, "loss": 0.4867, "step": 14634 }, { "epoch": 0.93, "grad_norm": 1.0304983854293823, "learning_rate": 1.3833009068842995e-07, "loss": 0.4899, "step": 14635 }, { "epoch": 0.93, "grad_norm": 1.0985244512557983, "learning_rate": 1.3809052604191632e-07, "loss": 0.5125, "step": 14636 }, { "epoch": 0.93, "grad_norm": 1.1270508766174316, "learning_rate": 1.3785116611349736e-07, "loss": 0.5057, "step": 14637 }, { "epoch": 0.93, "grad_norm": 1.1066454648971558, "learning_rate": 1.3761201091325172e-07, "loss": 0.5184, "step": 14638 }, { "epoch": 0.93, "grad_norm": 1.0407905578613281, "learning_rate": 1.3737306045124966e-07, "loss": 0.5327, "step": 14639 }, { "epoch": 0.93, "grad_norm": 1.1179550886154175, "learning_rate": 1.3713431473755147e-07, "loss": 0.4996, "step": 14640 }, { "epoch": 0.93, "grad_norm": 1.1324025392532349, "learning_rate": 1.3689577378221019e-07, "loss": 0.5664, "step": 14641 }, { "epoch": 0.93, "grad_norm": 0.9954050183296204, "learning_rate": 1.3665743759527173e-07, "loss": 0.449, "step": 14642 }, { "epoch": 0.93, "grad_norm": 1.1661484241485596, "learning_rate": 1.3641930618676912e-07, "loss": 0.5231, "step": 14643 }, { "epoch": 0.93, "grad_norm": 1.1205106973648071, "learning_rate": 1.3618137956673105e-07, "loss": 0.4912, "step": 14644 }, { "epoch": 0.93, "grad_norm": 1.1276148557662964, "learning_rate": 1.3594365774517447e-07, "loss": 0.5189, "step": 14645 }, { "epoch": 0.93, "grad_norm": 1.1023553609848022, "learning_rate": 1.357061407321103e-07, "loss": 0.477, "step": 14646 }, { "epoch": 0.93, "grad_norm": 1.1168313026428223, "learning_rate": 1.3546882853753885e-07, "loss": 0.5251, "step": 14647 }, { "epoch": 0.93, "grad_norm": 0.9755619764328003, "learning_rate": 1.3523172117145212e-07, "loss": 0.5231, "step": 14648 }, { "epoch": 0.93, "grad_norm": 1.0576835870742798, "learning_rate": 1.349948186438349e-07, "loss": 0.498, "step": 14649 }, { "epoch": 0.93, "grad_norm": 1.0488373041152954, "learning_rate": 1.347581209646609e-07, "loss": 0.5153, "step": 14650 }, { "epoch": 0.93, "grad_norm": 1.083781361579895, "learning_rate": 1.3452162814389824e-07, "loss": 0.4722, "step": 14651 }, { "epoch": 0.93, "grad_norm": 1.0556163787841797, "learning_rate": 1.342853401915034e-07, "loss": 0.5482, "step": 14652 }, { "epoch": 0.93, "grad_norm": 1.03798246383667, "learning_rate": 1.3404925711742734e-07, "loss": 0.539, "step": 14653 }, { "epoch": 0.93, "grad_norm": 1.024129033088684, "learning_rate": 1.3381337893160818e-07, "loss": 0.4782, "step": 14654 }, { "epoch": 0.93, "grad_norm": 1.0649831295013428, "learning_rate": 1.3357770564398075e-07, "loss": 0.4895, "step": 14655 }, { "epoch": 0.93, "grad_norm": 1.0626680850982666, "learning_rate": 1.333422372644666e-07, "loss": 0.5219, "step": 14656 }, { "epoch": 0.93, "grad_norm": 1.059069275856018, "learning_rate": 1.331069738029811e-07, "loss": 0.4972, "step": 14657 }, { "epoch": 0.93, "grad_norm": 1.0212222337722778, "learning_rate": 1.3287191526942968e-07, "loss": 0.5299, "step": 14658 }, { "epoch": 0.93, "grad_norm": 1.0332025289535522, "learning_rate": 1.3263706167371104e-07, "loss": 0.4738, "step": 14659 }, { "epoch": 0.93, "grad_norm": 1.107440710067749, "learning_rate": 1.324024130257129e-07, "loss": 0.534, "step": 14660 }, { "epoch": 0.93, "grad_norm": 1.0443707704544067, "learning_rate": 1.3216796933531672e-07, "loss": 0.5026, "step": 14661 }, { "epoch": 0.93, "grad_norm": 0.9763154983520508, "learning_rate": 1.31933730612393e-07, "loss": 0.5172, "step": 14662 }, { "epoch": 0.93, "grad_norm": 1.0752264261245728, "learning_rate": 1.316996968668044e-07, "loss": 0.5152, "step": 14663 }, { "epoch": 0.93, "grad_norm": 1.047065258026123, "learning_rate": 1.3146586810840745e-07, "loss": 0.4737, "step": 14664 }, { "epoch": 0.93, "grad_norm": 1.1606717109680176, "learning_rate": 1.312322443470454e-07, "loss": 0.5027, "step": 14665 }, { "epoch": 0.93, "grad_norm": 1.1153333187103271, "learning_rate": 1.309988255925565e-07, "loss": 0.5118, "step": 14666 }, { "epoch": 0.93, "grad_norm": 1.1025995016098022, "learning_rate": 1.30765611854769e-07, "loss": 0.5324, "step": 14667 }, { "epoch": 0.93, "grad_norm": 1.097686529159546, "learning_rate": 1.305326031435028e-07, "loss": 0.5022, "step": 14668 }, { "epoch": 0.93, "grad_norm": 1.1420540809631348, "learning_rate": 1.3029979946856953e-07, "loss": 0.5704, "step": 14669 }, { "epoch": 0.93, "grad_norm": 1.013661503791809, "learning_rate": 1.3006720083977076e-07, "loss": 0.4919, "step": 14670 }, { "epoch": 0.93, "grad_norm": 1.01823091506958, "learning_rate": 1.2983480726690033e-07, "loss": 0.4721, "step": 14671 }, { "epoch": 0.93, "grad_norm": 1.14744234085083, "learning_rate": 1.296026187597449e-07, "loss": 0.4951, "step": 14672 }, { "epoch": 0.93, "grad_norm": 1.062508225440979, "learning_rate": 1.2937063532807992e-07, "loss": 0.4741, "step": 14673 }, { "epoch": 0.93, "grad_norm": 1.0481140613555908, "learning_rate": 1.2913885698167427e-07, "loss": 0.4931, "step": 14674 }, { "epoch": 0.93, "grad_norm": 1.1365739107131958, "learning_rate": 1.2890728373028626e-07, "loss": 0.4872, "step": 14675 }, { "epoch": 0.93, "grad_norm": 1.0365864038467407, "learning_rate": 1.2867591558366755e-07, "loss": 0.4568, "step": 14676 }, { "epoch": 0.93, "grad_norm": 1.0156171321868896, "learning_rate": 1.2844475255156087e-07, "loss": 0.4704, "step": 14677 }, { "epoch": 0.93, "grad_norm": 1.044424295425415, "learning_rate": 1.2821379464369732e-07, "loss": 0.4823, "step": 14678 }, { "epoch": 0.93, "grad_norm": 1.0783231258392334, "learning_rate": 1.2798304186980358e-07, "loss": 0.5153, "step": 14679 }, { "epoch": 0.93, "grad_norm": 1.0163437128067017, "learning_rate": 1.277524942395958e-07, "loss": 0.4605, "step": 14680 }, { "epoch": 0.93, "grad_norm": 1.115587592124939, "learning_rate": 1.275221517627806e-07, "loss": 0.5101, "step": 14681 }, { "epoch": 0.93, "grad_norm": 0.9934950470924377, "learning_rate": 1.2729201444905803e-07, "loss": 0.4888, "step": 14682 }, { "epoch": 0.93, "grad_norm": 1.0094565153121948, "learning_rate": 1.2706208230811812e-07, "loss": 0.4648, "step": 14683 }, { "epoch": 0.93, "grad_norm": 1.1033930778503418, "learning_rate": 1.2683235534964088e-07, "loss": 0.5416, "step": 14684 }, { "epoch": 0.93, "grad_norm": 1.0976402759552002, "learning_rate": 1.2660283358330195e-07, "loss": 0.4732, "step": 14685 }, { "epoch": 0.93, "grad_norm": 1.0931764841079712, "learning_rate": 1.263735170187641e-07, "loss": 0.5057, "step": 14686 }, { "epoch": 0.93, "grad_norm": 1.050828456878662, "learning_rate": 1.26144405665683e-07, "loss": 0.4758, "step": 14687 }, { "epoch": 0.93, "grad_norm": 1.0917552709579468, "learning_rate": 1.2591549953370586e-07, "loss": 0.5101, "step": 14688 }, { "epoch": 0.93, "grad_norm": 1.0817075967788696, "learning_rate": 1.2568679863247168e-07, "loss": 0.5084, "step": 14689 }, { "epoch": 0.93, "grad_norm": 1.0188014507293701, "learning_rate": 1.2545830297161e-07, "loss": 0.4613, "step": 14690 }, { "epoch": 0.93, "grad_norm": 1.0029340982437134, "learning_rate": 1.2523001256074196e-07, "loss": 0.4473, "step": 14691 }, { "epoch": 0.93, "grad_norm": 1.103075385093689, "learning_rate": 1.2500192740947936e-07, "loss": 0.49, "step": 14692 }, { "epoch": 0.93, "grad_norm": 1.0108256340026855, "learning_rate": 1.2477404752742784e-07, "loss": 0.4927, "step": 14693 }, { "epoch": 0.93, "grad_norm": 1.1069060564041138, "learning_rate": 1.2454637292418082e-07, "loss": 0.4983, "step": 14694 }, { "epoch": 0.93, "grad_norm": 1.1655943393707275, "learning_rate": 1.2431890360932507e-07, "loss": 0.5047, "step": 14695 }, { "epoch": 0.93, "grad_norm": 1.1198543310165405, "learning_rate": 1.2409163959244019e-07, "loss": 0.5091, "step": 14696 }, { "epoch": 0.93, "grad_norm": 1.043695092201233, "learning_rate": 1.2386458088309296e-07, "loss": 0.4854, "step": 14697 }, { "epoch": 0.93, "grad_norm": 1.0993931293487549, "learning_rate": 1.2363772749084625e-07, "loss": 0.4677, "step": 14698 }, { "epoch": 0.93, "grad_norm": 1.0852168798446655, "learning_rate": 1.2341107942525132e-07, "loss": 0.4745, "step": 14699 }, { "epoch": 0.93, "grad_norm": 0.9771943092346191, "learning_rate": 1.2318463669585112e-07, "loss": 0.4683, "step": 14700 }, { "epoch": 0.93, "grad_norm": 1.085769772529602, "learning_rate": 1.229583993121808e-07, "loss": 0.495, "step": 14701 }, { "epoch": 0.93, "grad_norm": 1.0052050352096558, "learning_rate": 1.2273236728376604e-07, "loss": 0.4935, "step": 14702 }, { "epoch": 0.93, "grad_norm": 1.0405563116073608, "learning_rate": 1.2250654062012478e-07, "loss": 0.4673, "step": 14703 }, { "epoch": 0.93, "grad_norm": 1.0291260480880737, "learning_rate": 1.2228091933076613e-07, "loss": 0.4609, "step": 14704 }, { "epoch": 0.93, "grad_norm": 0.9860481023788452, "learning_rate": 1.2205550342518803e-07, "loss": 0.4391, "step": 14705 }, { "epoch": 0.93, "grad_norm": 1.0832188129425049, "learning_rate": 1.2183029291288452e-07, "loss": 0.5244, "step": 14706 }, { "epoch": 0.93, "grad_norm": 1.0049821138381958, "learning_rate": 1.2160528780333803e-07, "loss": 0.4529, "step": 14707 }, { "epoch": 0.93, "grad_norm": 1.0232901573181152, "learning_rate": 1.2138048810602154e-07, "loss": 0.5217, "step": 14708 }, { "epoch": 0.93, "grad_norm": 1.0310624837875366, "learning_rate": 1.2115589383040083e-07, "loss": 0.5262, "step": 14709 }, { "epoch": 0.93, "grad_norm": 1.1270962953567505, "learning_rate": 1.2093150498593387e-07, "loss": 0.4924, "step": 14710 }, { "epoch": 0.93, "grad_norm": 1.0199156999588013, "learning_rate": 1.2070732158206754e-07, "loss": 0.5297, "step": 14711 }, { "epoch": 0.93, "grad_norm": 1.102827548980713, "learning_rate": 1.2048334362824265e-07, "loss": 0.5266, "step": 14712 }, { "epoch": 0.93, "grad_norm": 1.082411766052246, "learning_rate": 1.202595711338894e-07, "loss": 0.5163, "step": 14713 }, { "epoch": 0.93, "grad_norm": 0.9880638122558594, "learning_rate": 1.2003600410842974e-07, "loss": 0.4991, "step": 14714 }, { "epoch": 0.93, "grad_norm": 1.1178537607192993, "learning_rate": 1.1981264256127832e-07, "loss": 0.5123, "step": 14715 }, { "epoch": 0.93, "grad_norm": 1.0149600505828857, "learning_rate": 1.1958948650183988e-07, "loss": 0.516, "step": 14716 }, { "epoch": 0.93, "grad_norm": 1.02154541015625, "learning_rate": 1.1936653593950964e-07, "loss": 0.4847, "step": 14717 }, { "epoch": 0.93, "grad_norm": 1.0899893045425415, "learning_rate": 1.1914379088367677e-07, "loss": 0.5178, "step": 14718 }, { "epoch": 0.93, "grad_norm": 1.0712817907333374, "learning_rate": 1.1892125134371935e-07, "loss": 0.4968, "step": 14719 }, { "epoch": 0.93, "grad_norm": 1.1291003227233887, "learning_rate": 1.1869891732900762e-07, "loss": 0.5253, "step": 14720 }, { "epoch": 0.93, "grad_norm": 1.0928550958633423, "learning_rate": 1.1847678884890467e-07, "loss": 0.4624, "step": 14721 }, { "epoch": 0.93, "grad_norm": 0.9802485108375549, "learning_rate": 1.1825486591276136e-07, "loss": 0.4553, "step": 14722 }, { "epoch": 0.93, "grad_norm": 1.1071562767028809, "learning_rate": 1.1803314852992409e-07, "loss": 0.5389, "step": 14723 }, { "epoch": 0.93, "grad_norm": 1.029962182044983, "learning_rate": 1.1781163670972762e-07, "loss": 0.5059, "step": 14724 }, { "epoch": 0.93, "grad_norm": 1.1084882020950317, "learning_rate": 1.1759033046149948e-07, "loss": 0.5179, "step": 14725 }, { "epoch": 0.93, "grad_norm": 1.0458074808120728, "learning_rate": 1.1736922979455778e-07, "loss": 0.5318, "step": 14726 }, { "epoch": 0.93, "grad_norm": 1.1072486639022827, "learning_rate": 1.1714833471821175e-07, "loss": 0.5415, "step": 14727 }, { "epoch": 0.93, "grad_norm": 1.0224193334579468, "learning_rate": 1.1692764524176337e-07, "loss": 0.5223, "step": 14728 }, { "epoch": 0.93, "grad_norm": 0.950425922870636, "learning_rate": 1.1670716137450577e-07, "loss": 0.4669, "step": 14729 }, { "epoch": 0.93, "grad_norm": 1.0135602951049805, "learning_rate": 1.1648688312572099e-07, "loss": 0.4715, "step": 14730 }, { "epoch": 0.93, "grad_norm": 1.1646565198898315, "learning_rate": 1.1626681050468492e-07, "loss": 0.5031, "step": 14731 }, { "epoch": 0.93, "grad_norm": 1.1194438934326172, "learning_rate": 1.1604694352066459e-07, "loss": 0.5512, "step": 14732 }, { "epoch": 0.93, "grad_norm": 0.9765666127204895, "learning_rate": 1.1582728218291761e-07, "loss": 0.4943, "step": 14733 }, { "epoch": 0.93, "grad_norm": 1.0588070154190063, "learning_rate": 1.1560782650069269e-07, "loss": 0.4678, "step": 14734 }, { "epoch": 0.93, "grad_norm": 1.0291880369186401, "learning_rate": 1.153885764832302e-07, "loss": 0.5237, "step": 14735 }, { "epoch": 0.93, "grad_norm": 1.0827213525772095, "learning_rate": 1.1516953213976278e-07, "loss": 0.5302, "step": 14736 }, { "epoch": 0.93, "grad_norm": 1.0477744340896606, "learning_rate": 1.1495069347951416e-07, "loss": 0.4948, "step": 14737 }, { "epoch": 0.93, "grad_norm": 1.0322061777114868, "learning_rate": 1.1473206051169694e-07, "loss": 0.5209, "step": 14738 }, { "epoch": 0.93, "grad_norm": 1.0588160753250122, "learning_rate": 1.1451363324551822e-07, "loss": 0.4953, "step": 14739 }, { "epoch": 0.93, "grad_norm": 1.0796961784362793, "learning_rate": 1.1429541169017511e-07, "loss": 0.4789, "step": 14740 }, { "epoch": 0.93, "grad_norm": 1.0964117050170898, "learning_rate": 1.1407739585485633e-07, "loss": 0.5103, "step": 14741 }, { "epoch": 0.93, "grad_norm": 1.1128816604614258, "learning_rate": 1.1385958574874178e-07, "loss": 0.5043, "step": 14742 }, { "epoch": 0.93, "grad_norm": 1.173375129699707, "learning_rate": 1.1364198138100191e-07, "loss": 0.5545, "step": 14743 }, { "epoch": 0.93, "grad_norm": 1.2093064785003662, "learning_rate": 1.1342458276079937e-07, "loss": 0.5485, "step": 14744 }, { "epoch": 0.93, "grad_norm": 1.0547919273376465, "learning_rate": 1.1320738989728963e-07, "loss": 0.436, "step": 14745 }, { "epoch": 0.93, "grad_norm": 0.9337969422340393, "learning_rate": 1.1299040279961593e-07, "loss": 0.4476, "step": 14746 }, { "epoch": 0.93, "grad_norm": 1.009351134300232, "learning_rate": 1.1277362147691595e-07, "loss": 0.4925, "step": 14747 }, { "epoch": 0.93, "grad_norm": 1.0779575109481812, "learning_rate": 1.125570459383174e-07, "loss": 0.5148, "step": 14748 }, { "epoch": 0.93, "grad_norm": 1.0640534162521362, "learning_rate": 1.1234067619293909e-07, "loss": 0.4839, "step": 14749 }, { "epoch": 0.93, "grad_norm": 1.1556848287582397, "learning_rate": 1.1212451224989262e-07, "loss": 0.4944, "step": 14750 }, { "epoch": 0.93, "grad_norm": 1.1237772703170776, "learning_rate": 1.1190855411827906e-07, "loss": 0.5018, "step": 14751 }, { "epoch": 0.93, "grad_norm": 1.168222427368164, "learning_rate": 1.1169280180719111e-07, "loss": 0.4884, "step": 14752 }, { "epoch": 0.93, "grad_norm": 0.9854783415794373, "learning_rate": 1.114772553257154e-07, "loss": 0.4873, "step": 14753 }, { "epoch": 0.93, "grad_norm": 1.0684829950332642, "learning_rate": 1.1126191468292579e-07, "loss": 0.4655, "step": 14754 }, { "epoch": 0.93, "grad_norm": 1.0761710405349731, "learning_rate": 1.1104677988789004e-07, "loss": 0.5349, "step": 14755 }, { "epoch": 0.93, "grad_norm": 0.9973959922790527, "learning_rate": 1.1083185094966753e-07, "loss": 0.4533, "step": 14756 }, { "epoch": 0.93, "grad_norm": 1.116524577140808, "learning_rate": 1.1061712787730716e-07, "loss": 0.5009, "step": 14757 }, { "epoch": 0.93, "grad_norm": 1.0338749885559082, "learning_rate": 1.1040261067985114e-07, "loss": 0.4979, "step": 14758 }, { "epoch": 0.94, "grad_norm": 1.0604186058044434, "learning_rate": 1.1018829936633113e-07, "loss": 0.5396, "step": 14759 }, { "epoch": 0.94, "grad_norm": 1.136382818222046, "learning_rate": 1.0997419394577158e-07, "loss": 0.535, "step": 14760 }, { "epoch": 0.94, "grad_norm": 1.0905015468597412, "learning_rate": 1.0976029442718694e-07, "loss": 0.504, "step": 14761 }, { "epoch": 0.94, "grad_norm": 1.0957517623901367, "learning_rate": 1.0954660081958502e-07, "loss": 0.4676, "step": 14762 }, { "epoch": 0.94, "grad_norm": 1.0968356132507324, "learning_rate": 1.0933311313196304e-07, "loss": 0.5497, "step": 14763 }, { "epoch": 0.94, "grad_norm": 1.05763840675354, "learning_rate": 1.091198313733105e-07, "loss": 0.4728, "step": 14764 }, { "epoch": 0.94, "grad_norm": 1.0490801334381104, "learning_rate": 1.0890675555260688e-07, "loss": 0.4904, "step": 14765 }, { "epoch": 0.94, "grad_norm": 1.0433591604232788, "learning_rate": 1.086938856788261e-07, "loss": 0.4838, "step": 14766 }, { "epoch": 0.94, "grad_norm": 1.1754895448684692, "learning_rate": 1.0848122176092935e-07, "loss": 0.4764, "step": 14767 }, { "epoch": 0.94, "grad_norm": 1.0383951663970947, "learning_rate": 1.0826876380787221e-07, "loss": 0.5165, "step": 14768 }, { "epoch": 0.94, "grad_norm": 1.01365327835083, "learning_rate": 1.0805651182860033e-07, "loss": 0.4913, "step": 14769 }, { "epoch": 0.94, "grad_norm": 1.0312130451202393, "learning_rate": 1.0784446583205099e-07, "loss": 0.4835, "step": 14770 }, { "epoch": 0.94, "grad_norm": 1.113490343093872, "learning_rate": 1.0763262582715206e-07, "loss": 0.5329, "step": 14771 }, { "epoch": 0.94, "grad_norm": 1.0257731676101685, "learning_rate": 1.0742099182282529e-07, "loss": 0.4546, "step": 14772 }, { "epoch": 0.94, "grad_norm": 1.0378912687301636, "learning_rate": 1.0720956382797965e-07, "loss": 0.4755, "step": 14773 }, { "epoch": 0.94, "grad_norm": 1.0855647325515747, "learning_rate": 1.0699834185151802e-07, "loss": 0.4862, "step": 14774 }, { "epoch": 0.94, "grad_norm": 1.115774154663086, "learning_rate": 1.0678732590233553e-07, "loss": 0.5119, "step": 14775 }, { "epoch": 0.94, "grad_norm": 1.0004231929779053, "learning_rate": 1.0657651598931563e-07, "loss": 0.4612, "step": 14776 }, { "epoch": 0.94, "grad_norm": 1.0334473848342896, "learning_rate": 1.0636591212133673e-07, "loss": 0.4967, "step": 14777 }, { "epoch": 0.94, "grad_norm": 1.09772527217865, "learning_rate": 1.0615551430726456e-07, "loss": 0.5046, "step": 14778 }, { "epoch": 0.94, "grad_norm": 0.9907475113868713, "learning_rate": 1.0594532255595979e-07, "loss": 0.4728, "step": 14779 }, { "epoch": 0.94, "grad_norm": 1.056544542312622, "learning_rate": 1.0573533687627258e-07, "loss": 0.4913, "step": 14780 }, { "epoch": 0.94, "grad_norm": 0.9403951168060303, "learning_rate": 1.0552555727704417e-07, "loss": 0.4794, "step": 14781 }, { "epoch": 0.94, "grad_norm": 1.1103562116622925, "learning_rate": 1.053159837671075e-07, "loss": 0.5223, "step": 14782 }, { "epoch": 0.94, "grad_norm": 0.9928891062736511, "learning_rate": 1.0510661635528774e-07, "loss": 0.4647, "step": 14783 }, { "epoch": 0.94, "grad_norm": 1.085282325744629, "learning_rate": 1.0489745505040006e-07, "loss": 0.4453, "step": 14784 }, { "epoch": 0.94, "grad_norm": 1.0276105403900146, "learning_rate": 1.0468849986125185e-07, "loss": 0.4977, "step": 14785 }, { "epoch": 0.94, "grad_norm": 1.0931756496429443, "learning_rate": 1.0447975079664163e-07, "loss": 0.4814, "step": 14786 }, { "epoch": 0.94, "grad_norm": 1.0745348930358887, "learning_rate": 1.042712078653585e-07, "loss": 0.5049, "step": 14787 }, { "epoch": 0.94, "grad_norm": 1.0469303131103516, "learning_rate": 1.0406287107618429e-07, "loss": 0.5248, "step": 14788 }, { "epoch": 0.94, "grad_norm": 1.0048182010650635, "learning_rate": 1.0385474043789034e-07, "loss": 0.4649, "step": 14789 }, { "epoch": 0.94, "grad_norm": 1.0827223062515259, "learning_rate": 1.0364681595924131e-07, "loss": 0.4582, "step": 14790 }, { "epoch": 0.94, "grad_norm": 0.9856858253479004, "learning_rate": 1.034390976489913e-07, "loss": 0.4624, "step": 14791 }, { "epoch": 0.94, "grad_norm": 0.9975650906562805, "learning_rate": 1.0323158551588663e-07, "loss": 0.4744, "step": 14792 }, { "epoch": 0.94, "grad_norm": 1.0474202632904053, "learning_rate": 1.030242795686659e-07, "loss": 0.4667, "step": 14793 }, { "epoch": 0.94, "grad_norm": 1.112815022468567, "learning_rate": 1.0281717981605765e-07, "loss": 0.5151, "step": 14794 }, { "epoch": 0.94, "grad_norm": 1.0142254829406738, "learning_rate": 1.0261028626678104e-07, "loss": 0.4269, "step": 14795 }, { "epoch": 0.94, "grad_norm": 1.0286717414855957, "learning_rate": 1.024035989295491e-07, "loss": 0.5014, "step": 14796 }, { "epoch": 0.94, "grad_norm": 1.137579083442688, "learning_rate": 1.0219711781306374e-07, "loss": 0.5317, "step": 14797 }, { "epoch": 0.94, "grad_norm": 1.1120980978012085, "learning_rate": 1.0199084292602024e-07, "loss": 0.4991, "step": 14798 }, { "epoch": 0.94, "grad_norm": 1.0727988481521606, "learning_rate": 1.0178477427710276e-07, "loss": 0.5524, "step": 14799 }, { "epoch": 0.94, "grad_norm": 1.0857231616973877, "learning_rate": 1.015789118749888e-07, "loss": 0.5149, "step": 14800 }, { "epoch": 0.94, "grad_norm": 1.041313648223877, "learning_rate": 1.0137325572834644e-07, "loss": 0.4561, "step": 14801 }, { "epoch": 0.94, "grad_norm": 1.1011313199996948, "learning_rate": 1.0116780584583596e-07, "loss": 0.5172, "step": 14802 }, { "epoch": 0.94, "grad_norm": 1.0425204038619995, "learning_rate": 1.0096256223610657e-07, "loss": 0.4757, "step": 14803 }, { "epoch": 0.94, "grad_norm": 1.1371713876724243, "learning_rate": 1.0075752490780133e-07, "loss": 0.53, "step": 14804 }, { "epoch": 0.94, "grad_norm": 1.0720175504684448, "learning_rate": 1.0055269386955391e-07, "loss": 0.5051, "step": 14805 }, { "epoch": 0.94, "grad_norm": 1.046457290649414, "learning_rate": 1.0034806912998796e-07, "loss": 0.5115, "step": 14806 }, { "epoch": 0.94, "grad_norm": 1.0416407585144043, "learning_rate": 1.0014365069772102e-07, "loss": 0.518, "step": 14807 }, { "epoch": 0.94, "grad_norm": 1.0310873985290527, "learning_rate": 9.993943858135846e-08, "loss": 0.5512, "step": 14808 }, { "epoch": 0.94, "grad_norm": 1.1090185642242432, "learning_rate": 9.973543278950115e-08, "loss": 0.4912, "step": 14809 }, { "epoch": 0.94, "grad_norm": 1.0710328817367554, "learning_rate": 9.953163333073779e-08, "loss": 0.5639, "step": 14810 }, { "epoch": 0.94, "grad_norm": 1.053472876548767, "learning_rate": 9.932804021364928e-08, "loss": 0.5128, "step": 14811 }, { "epoch": 0.94, "grad_norm": 1.1337611675262451, "learning_rate": 9.912465344680933e-08, "loss": 0.51, "step": 14812 }, { "epoch": 0.94, "grad_norm": 0.9499417543411255, "learning_rate": 9.892147303878108e-08, "loss": 0.4073, "step": 14813 }, { "epoch": 0.94, "grad_norm": 1.0529181957244873, "learning_rate": 9.871849899811991e-08, "loss": 0.4939, "step": 14814 }, { "epoch": 0.94, "grad_norm": 1.0673210620880127, "learning_rate": 9.851573133337288e-08, "loss": 0.5408, "step": 14815 }, { "epoch": 0.94, "grad_norm": 1.0431410074234009, "learning_rate": 9.83131700530765e-08, "loss": 0.5126, "step": 14816 }, { "epoch": 0.94, "grad_norm": 1.1376043558120728, "learning_rate": 9.81108151657617e-08, "loss": 0.4993, "step": 14817 }, { "epoch": 0.94, "grad_norm": 1.1424436569213867, "learning_rate": 9.790866667994781e-08, "loss": 0.5119, "step": 14818 }, { "epoch": 0.94, "grad_norm": 1.170620083808899, "learning_rate": 9.770672460414688e-08, "loss": 0.5726, "step": 14819 }, { "epoch": 0.94, "grad_norm": 1.0776851177215576, "learning_rate": 9.750498894686156e-08, "loss": 0.5107, "step": 14820 }, { "epoch": 0.94, "grad_norm": 1.0550538301467896, "learning_rate": 9.730345971658728e-08, "loss": 0.4677, "step": 14821 }, { "epoch": 0.94, "grad_norm": 1.0215625762939453, "learning_rate": 9.710213692180836e-08, "loss": 0.5126, "step": 14822 }, { "epoch": 0.94, "grad_norm": 1.1082737445831299, "learning_rate": 9.690102057100304e-08, "loss": 0.5123, "step": 14823 }, { "epoch": 0.94, "grad_norm": 1.137060284614563, "learning_rate": 9.670011067263896e-08, "loss": 0.4662, "step": 14824 }, { "epoch": 0.94, "grad_norm": 1.0273690223693848, "learning_rate": 9.649940723517549e-08, "loss": 0.4731, "step": 14825 }, { "epoch": 0.94, "grad_norm": 1.1005918979644775, "learning_rate": 9.629891026706472e-08, "loss": 0.455, "step": 14826 }, { "epoch": 0.94, "grad_norm": 1.0610016584396362, "learning_rate": 9.609861977674773e-08, "loss": 0.4533, "step": 14827 }, { "epoch": 0.94, "grad_norm": 0.9681522846221924, "learning_rate": 9.589853577265829e-08, "loss": 0.4696, "step": 14828 }, { "epoch": 0.94, "grad_norm": 1.0362496376037598, "learning_rate": 9.569865826322133e-08, "loss": 0.5161, "step": 14829 }, { "epoch": 0.94, "grad_norm": 1.0674338340759277, "learning_rate": 9.549898725685291e-08, "loss": 0.4986, "step": 14830 }, { "epoch": 0.94, "grad_norm": 1.0913596153259277, "learning_rate": 9.52995227619613e-08, "loss": 0.4844, "step": 14831 }, { "epoch": 0.94, "grad_norm": 1.0585317611694336, "learning_rate": 9.510026478694423e-08, "loss": 0.4917, "step": 14832 }, { "epoch": 0.94, "grad_norm": 0.9431830048561096, "learning_rate": 9.49012133401922e-08, "loss": 0.5156, "step": 14833 }, { "epoch": 0.94, "grad_norm": 1.1033228635787964, "learning_rate": 9.47023684300863e-08, "loss": 0.5279, "step": 14834 }, { "epoch": 0.94, "grad_norm": 1.1913155317306519, "learning_rate": 9.450373006499924e-08, "loss": 0.4868, "step": 14835 }, { "epoch": 0.94, "grad_norm": 1.1376891136169434, "learning_rate": 9.430529825329492e-08, "loss": 0.4777, "step": 14836 }, { "epoch": 0.94, "grad_norm": 1.0231413841247559, "learning_rate": 9.410707300333e-08, "loss": 0.5451, "step": 14837 }, { "epoch": 0.94, "grad_norm": 1.0114684104919434, "learning_rate": 9.390905432344833e-08, "loss": 0.4916, "step": 14838 }, { "epoch": 0.94, "grad_norm": 1.1515475511550903, "learning_rate": 9.371124222199046e-08, "loss": 0.4936, "step": 14839 }, { "epoch": 0.94, "grad_norm": 1.1124534606933594, "learning_rate": 9.35136367072842e-08, "loss": 0.4832, "step": 14840 }, { "epoch": 0.94, "grad_norm": 1.0438929796218872, "learning_rate": 9.331623778765009e-08, "loss": 0.4578, "step": 14841 }, { "epoch": 0.94, "grad_norm": 1.1023262739181519, "learning_rate": 9.311904547139982e-08, "loss": 0.4794, "step": 14842 }, { "epoch": 0.94, "grad_norm": 1.084631085395813, "learning_rate": 9.292205976683733e-08, "loss": 0.5164, "step": 14843 }, { "epoch": 0.94, "grad_norm": 1.036280870437622, "learning_rate": 9.272528068225595e-08, "loss": 0.5072, "step": 14844 }, { "epoch": 0.94, "grad_norm": 1.027374029159546, "learning_rate": 9.252870822594239e-08, "loss": 0.5063, "step": 14845 }, { "epoch": 0.94, "grad_norm": 1.0010735988616943, "learning_rate": 9.233234240617228e-08, "loss": 0.4835, "step": 14846 }, { "epoch": 0.94, "grad_norm": 1.0823222398757935, "learning_rate": 9.213618323121564e-08, "loss": 0.5167, "step": 14847 }, { "epoch": 0.94, "grad_norm": 1.0411672592163086, "learning_rate": 9.19402307093309e-08, "loss": 0.4888, "step": 14848 }, { "epoch": 0.94, "grad_norm": 1.0619031190872192, "learning_rate": 9.174448484876864e-08, "loss": 0.4686, "step": 14849 }, { "epoch": 0.94, "grad_norm": 0.9982632398605347, "learning_rate": 9.154894565777173e-08, "loss": 0.4258, "step": 14850 }, { "epoch": 0.94, "grad_norm": 1.087734580039978, "learning_rate": 9.135361314457358e-08, "loss": 0.5276, "step": 14851 }, { "epoch": 0.94, "grad_norm": 0.9884198307991028, "learning_rate": 9.115848731739874e-08, "loss": 0.502, "step": 14852 }, { "epoch": 0.94, "grad_norm": 1.1553099155426025, "learning_rate": 9.096356818446395e-08, "loss": 0.5425, "step": 14853 }, { "epoch": 0.94, "grad_norm": 1.0436712503433228, "learning_rate": 9.076885575397543e-08, "loss": 0.4802, "step": 14854 }, { "epoch": 0.94, "grad_norm": 1.1476726531982422, "learning_rate": 9.057435003413273e-08, "loss": 0.5413, "step": 14855 }, { "epoch": 0.94, "grad_norm": 1.073279619216919, "learning_rate": 9.038005103312486e-08, "loss": 0.5065, "step": 14856 }, { "epoch": 0.94, "grad_norm": 1.0361818075180054, "learning_rate": 9.018595875913416e-08, "loss": 0.475, "step": 14857 }, { "epoch": 0.94, "grad_norm": 0.9552186727523804, "learning_rate": 8.999207322033299e-08, "loss": 0.4886, "step": 14858 }, { "epoch": 0.94, "grad_norm": 1.0540318489074707, "learning_rate": 8.979839442488425e-08, "loss": 0.5102, "step": 14859 }, { "epoch": 0.94, "grad_norm": 1.0041334629058838, "learning_rate": 8.960492238094421e-08, "loss": 0.4447, "step": 14860 }, { "epoch": 0.94, "grad_norm": 1.0236811637878418, "learning_rate": 8.941165709665966e-08, "loss": 0.5233, "step": 14861 }, { "epoch": 0.94, "grad_norm": 1.0665013790130615, "learning_rate": 8.921859858016635e-08, "loss": 0.4775, "step": 14862 }, { "epoch": 0.94, "grad_norm": 1.0712215900421143, "learning_rate": 8.902574683959442e-08, "loss": 0.4774, "step": 14863 }, { "epoch": 0.94, "grad_norm": 1.0452264547348022, "learning_rate": 8.883310188306515e-08, "loss": 0.4736, "step": 14864 }, { "epoch": 0.94, "grad_norm": 1.2021039724349976, "learning_rate": 8.864066371868873e-08, "loss": 0.5335, "step": 14865 }, { "epoch": 0.94, "grad_norm": 1.1182342767715454, "learning_rate": 8.844843235456868e-08, "loss": 0.5135, "step": 14866 }, { "epoch": 0.94, "grad_norm": 1.0686935186386108, "learning_rate": 8.825640779879962e-08, "loss": 0.446, "step": 14867 }, { "epoch": 0.94, "grad_norm": 0.956581711769104, "learning_rate": 8.806459005946565e-08, "loss": 0.5015, "step": 14868 }, { "epoch": 0.94, "grad_norm": 1.1133993864059448, "learning_rate": 8.787297914464533e-08, "loss": 0.4799, "step": 14869 }, { "epoch": 0.94, "grad_norm": 1.090657114982605, "learning_rate": 8.768157506240494e-08, "loss": 0.5349, "step": 14870 }, { "epoch": 0.94, "grad_norm": 1.0605127811431885, "learning_rate": 8.749037782080528e-08, "loss": 0.5265, "step": 14871 }, { "epoch": 0.94, "grad_norm": 1.0305068492889404, "learning_rate": 8.729938742789601e-08, "loss": 0.4894, "step": 14872 }, { "epoch": 0.94, "grad_norm": 1.0121204853057861, "learning_rate": 8.71086038917196e-08, "loss": 0.5261, "step": 14873 }, { "epoch": 0.94, "grad_norm": 1.0988103151321411, "learning_rate": 8.691802722030906e-08, "loss": 0.5069, "step": 14874 }, { "epoch": 0.94, "grad_norm": 1.0493741035461426, "learning_rate": 8.672765742168964e-08, "loss": 0.4722, "step": 14875 }, { "epoch": 0.94, "grad_norm": 1.1380290985107422, "learning_rate": 8.65374945038755e-08, "loss": 0.492, "step": 14876 }, { "epoch": 0.94, "grad_norm": 1.029023289680481, "learning_rate": 8.634753847487575e-08, "loss": 0.5584, "step": 14877 }, { "epoch": 0.94, "grad_norm": 1.0613927841186523, "learning_rate": 8.615778934268793e-08, "loss": 0.5209, "step": 14878 }, { "epoch": 0.94, "grad_norm": 1.0069340467453003, "learning_rate": 8.59682471153006e-08, "loss": 0.4645, "step": 14879 }, { "epoch": 0.94, "grad_norm": 0.9931952953338623, "learning_rate": 8.577891180069687e-08, "loss": 0.4564, "step": 14880 }, { "epoch": 0.94, "grad_norm": 1.0470446348190308, "learning_rate": 8.558978340684642e-08, "loss": 0.4986, "step": 14881 }, { "epoch": 0.94, "grad_norm": 1.008948802947998, "learning_rate": 8.540086194171515e-08, "loss": 0.5117, "step": 14882 }, { "epoch": 0.94, "grad_norm": 1.0242713689804077, "learning_rate": 8.521214741325722e-08, "loss": 0.5012, "step": 14883 }, { "epoch": 0.94, "grad_norm": 1.0157408714294434, "learning_rate": 8.502363982941797e-08, "loss": 0.454, "step": 14884 }, { "epoch": 0.94, "grad_norm": 1.059425711631775, "learning_rate": 8.483533919813546e-08, "loss": 0.5074, "step": 14885 }, { "epoch": 0.94, "grad_norm": 1.1020406484603882, "learning_rate": 8.464724552733782e-08, "loss": 0.5256, "step": 14886 }, { "epoch": 0.94, "grad_norm": 1.0872770547866821, "learning_rate": 8.445935882494593e-08, "loss": 0.484, "step": 14887 }, { "epoch": 0.94, "grad_norm": 1.1084372997283936, "learning_rate": 8.427167909887069e-08, "loss": 0.4639, "step": 14888 }, { "epoch": 0.94, "grad_norm": 1.215596318244934, "learning_rate": 8.408420635701353e-08, "loss": 0.4957, "step": 14889 }, { "epoch": 0.94, "grad_norm": 1.0778475999832153, "learning_rate": 8.389694060726927e-08, "loss": 0.5289, "step": 14890 }, { "epoch": 0.94, "grad_norm": 1.1347565650939941, "learning_rate": 8.370988185752383e-08, "loss": 0.551, "step": 14891 }, { "epoch": 0.94, "grad_norm": 1.0359888076782227, "learning_rate": 8.352303011565254e-08, "loss": 0.5024, "step": 14892 }, { "epoch": 0.94, "grad_norm": 1.0887922048568726, "learning_rate": 8.333638538952305e-08, "loss": 0.5033, "step": 14893 }, { "epoch": 0.94, "grad_norm": 1.0436344146728516, "learning_rate": 8.314994768699458e-08, "loss": 0.511, "step": 14894 }, { "epoch": 0.94, "grad_norm": 1.0825071334838867, "learning_rate": 8.296371701591699e-08, "loss": 0.4679, "step": 14895 }, { "epoch": 0.94, "grad_norm": 1.039556860923767, "learning_rate": 8.277769338413288e-08, "loss": 0.5195, "step": 14896 }, { "epoch": 0.94, "grad_norm": 1.0870271921157837, "learning_rate": 8.259187679947434e-08, "loss": 0.489, "step": 14897 }, { "epoch": 0.94, "grad_norm": 1.037808895111084, "learning_rate": 8.240626726976453e-08, "loss": 0.493, "step": 14898 }, { "epoch": 0.94, "grad_norm": 1.1069194078445435, "learning_rate": 8.222086480282054e-08, "loss": 0.4903, "step": 14899 }, { "epoch": 0.94, "grad_norm": 1.0563931465148926, "learning_rate": 8.20356694064478e-08, "loss": 0.5428, "step": 14900 }, { "epoch": 0.94, "grad_norm": 0.9862633347511292, "learning_rate": 8.185068108844507e-08, "loss": 0.4719, "step": 14901 }, { "epoch": 0.94, "grad_norm": 1.0177010297775269, "learning_rate": 8.166589985660056e-08, "loss": 0.4688, "step": 14902 }, { "epoch": 0.94, "grad_norm": 1.1310821771621704, "learning_rate": 8.148132571869582e-08, "loss": 0.534, "step": 14903 }, { "epoch": 0.94, "grad_norm": 1.032629132270813, "learning_rate": 8.129695868250242e-08, "loss": 0.4774, "step": 14904 }, { "epoch": 0.94, "grad_norm": 0.9880489706993103, "learning_rate": 8.111279875578304e-08, "loss": 0.4906, "step": 14905 }, { "epoch": 0.94, "grad_norm": 1.0270191431045532, "learning_rate": 8.092884594629147e-08, "loss": 0.5082, "step": 14906 }, { "epoch": 0.94, "grad_norm": 1.0651283264160156, "learning_rate": 8.074510026177485e-08, "loss": 0.4889, "step": 14907 }, { "epoch": 0.94, "grad_norm": 1.0554499626159668, "learning_rate": 8.056156170996866e-08, "loss": 0.5256, "step": 14908 }, { "epoch": 0.94, "grad_norm": 1.075369119644165, "learning_rate": 8.03782302986017e-08, "loss": 0.5045, "step": 14909 }, { "epoch": 0.94, "grad_norm": 1.0443814992904663, "learning_rate": 8.019510603539338e-08, "loss": 0.4852, "step": 14910 }, { "epoch": 0.94, "grad_norm": 1.0223075151443481, "learning_rate": 8.001218892805474e-08, "loss": 0.4904, "step": 14911 }, { "epoch": 0.94, "grad_norm": 1.062279224395752, "learning_rate": 7.982947898428739e-08, "loss": 0.5036, "step": 14912 }, { "epoch": 0.94, "grad_norm": 1.0690451860427856, "learning_rate": 7.964697621178463e-08, "loss": 0.5454, "step": 14913 }, { "epoch": 0.94, "grad_norm": 1.0020755529403687, "learning_rate": 7.946468061823031e-08, "loss": 0.4802, "step": 14914 }, { "epoch": 0.94, "grad_norm": 1.057814598083496, "learning_rate": 7.928259221130163e-08, "loss": 0.5025, "step": 14915 }, { "epoch": 0.94, "grad_norm": 1.0814919471740723, "learning_rate": 7.910071099866523e-08, "loss": 0.5347, "step": 14916 }, { "epoch": 0.95, "grad_norm": 0.983623206615448, "learning_rate": 7.891903698797886e-08, "loss": 0.4386, "step": 14917 }, { "epoch": 0.95, "grad_norm": 1.0692455768585205, "learning_rate": 7.87375701868931e-08, "loss": 0.5177, "step": 14918 }, { "epoch": 0.95, "grad_norm": 1.0529448986053467, "learning_rate": 7.855631060304792e-08, "loss": 0.4429, "step": 14919 }, { "epoch": 0.95, "grad_norm": 1.0599701404571533, "learning_rate": 7.837525824407665e-08, "loss": 0.4916, "step": 14920 }, { "epoch": 0.95, "grad_norm": 1.0690699815750122, "learning_rate": 7.819441311760156e-08, "loss": 0.4819, "step": 14921 }, { "epoch": 0.95, "grad_norm": 1.1062662601470947, "learning_rate": 7.801377523123877e-08, "loss": 0.5503, "step": 14922 }, { "epoch": 0.95, "grad_norm": 0.9787145256996155, "learning_rate": 7.783334459259273e-08, "loss": 0.4679, "step": 14923 }, { "epoch": 0.95, "grad_norm": 1.0887858867645264, "learning_rate": 7.765312120926182e-08, "loss": 0.5464, "step": 14924 }, { "epoch": 0.95, "grad_norm": 0.9913561940193176, "learning_rate": 7.747310508883444e-08, "loss": 0.4786, "step": 14925 }, { "epoch": 0.95, "grad_norm": 1.0134284496307373, "learning_rate": 7.729329623889114e-08, "loss": 0.4498, "step": 14926 }, { "epoch": 0.95, "grad_norm": 1.087054967880249, "learning_rate": 7.711369466700147e-08, "loss": 0.561, "step": 14927 }, { "epoch": 0.95, "grad_norm": 0.9631887674331665, "learning_rate": 7.693430038072824e-08, "loss": 0.4651, "step": 14928 }, { "epoch": 0.95, "grad_norm": 0.9987837672233582, "learning_rate": 7.675511338762654e-08, "loss": 0.4178, "step": 14929 }, { "epoch": 0.95, "grad_norm": 1.06411874294281, "learning_rate": 7.657613369523975e-08, "loss": 0.4956, "step": 14930 }, { "epoch": 0.95, "grad_norm": 1.0640369653701782, "learning_rate": 7.639736131110465e-08, "loss": 0.5093, "step": 14931 }, { "epoch": 0.95, "grad_norm": 1.065610647201538, "learning_rate": 7.621879624274853e-08, "loss": 0.4942, "step": 14932 }, { "epoch": 0.95, "grad_norm": 1.056787133216858, "learning_rate": 7.604043849769094e-08, "loss": 0.4742, "step": 14933 }, { "epoch": 0.95, "grad_norm": 1.0905635356903076, "learning_rate": 7.586228808344087e-08, "loss": 0.5321, "step": 14934 }, { "epoch": 0.95, "grad_norm": 1.0646344423294067, "learning_rate": 7.56843450075001e-08, "loss": 0.5134, "step": 14935 }, { "epoch": 0.95, "grad_norm": 1.0673986673355103, "learning_rate": 7.550660927736042e-08, "loss": 0.4523, "step": 14936 }, { "epoch": 0.95, "grad_norm": 1.1001416444778442, "learning_rate": 7.53290809005075e-08, "loss": 0.5033, "step": 14937 }, { "epoch": 0.95, "grad_norm": 1.0971667766571045, "learning_rate": 7.515175988441481e-08, "loss": 0.4623, "step": 14938 }, { "epoch": 0.95, "grad_norm": 1.0954697132110596, "learning_rate": 7.497464623654915e-08, "loss": 0.4978, "step": 14939 }, { "epoch": 0.95, "grad_norm": 1.0560624599456787, "learning_rate": 7.479773996436845e-08, "loss": 0.4591, "step": 14940 }, { "epoch": 0.95, "grad_norm": 1.057520866394043, "learning_rate": 7.46210410753212e-08, "loss": 0.4754, "step": 14941 }, { "epoch": 0.95, "grad_norm": 1.0726072788238525, "learning_rate": 7.44445495768481e-08, "loss": 0.5061, "step": 14942 }, { "epoch": 0.95, "grad_norm": 1.061622142791748, "learning_rate": 7.426826547637989e-08, "loss": 0.5456, "step": 14943 }, { "epoch": 0.95, "grad_norm": 0.933109700679779, "learning_rate": 7.40921887813395e-08, "loss": 0.4892, "step": 14944 }, { "epoch": 0.95, "grad_norm": 1.0784633159637451, "learning_rate": 7.391631949914102e-08, "loss": 0.4997, "step": 14945 }, { "epoch": 0.95, "grad_norm": 1.0952481031417847, "learning_rate": 7.374065763719018e-08, "loss": 0.4948, "step": 14946 }, { "epoch": 0.95, "grad_norm": 1.0322444438934326, "learning_rate": 7.356520320288274e-08, "loss": 0.5358, "step": 14947 }, { "epoch": 0.95, "grad_norm": 1.0545867681503296, "learning_rate": 7.338995620360722e-08, "loss": 0.4536, "step": 14948 }, { "epoch": 0.95, "grad_norm": 1.0081437826156616, "learning_rate": 7.321491664674163e-08, "loss": 0.4412, "step": 14949 }, { "epoch": 0.95, "grad_norm": 1.1125178337097168, "learning_rate": 7.304008453965727e-08, "loss": 0.5431, "step": 14950 }, { "epoch": 0.95, "grad_norm": 1.0762580633163452, "learning_rate": 7.286545988971495e-08, "loss": 0.5061, "step": 14951 }, { "epoch": 0.95, "grad_norm": 1.0408401489257812, "learning_rate": 7.269104270426818e-08, "loss": 0.4921, "step": 14952 }, { "epoch": 0.95, "grad_norm": 1.1675944328308105, "learning_rate": 7.251683299066059e-08, "loss": 0.5155, "step": 14953 }, { "epoch": 0.95, "grad_norm": 1.1432479619979858, "learning_rate": 7.23428307562274e-08, "loss": 0.519, "step": 14954 }, { "epoch": 0.95, "grad_norm": 1.0275051593780518, "learning_rate": 7.216903600829605e-08, "loss": 0.4785, "step": 14955 }, { "epoch": 0.95, "grad_norm": 1.1260793209075928, "learning_rate": 7.199544875418407e-08, "loss": 0.5439, "step": 14956 }, { "epoch": 0.95, "grad_norm": 1.1154240369796753, "learning_rate": 7.182206900119948e-08, "loss": 0.5083, "step": 14957 }, { "epoch": 0.95, "grad_norm": 1.080336570739746, "learning_rate": 7.164889675664477e-08, "loss": 0.5349, "step": 14958 }, { "epoch": 0.95, "grad_norm": 1.1574434041976929, "learning_rate": 7.147593202781022e-08, "loss": 0.4962, "step": 14959 }, { "epoch": 0.95, "grad_norm": 1.1089222431182861, "learning_rate": 7.13031748219789e-08, "loss": 0.4882, "step": 14960 }, { "epoch": 0.95, "grad_norm": 1.0524308681488037, "learning_rate": 7.113062514642555e-08, "loss": 0.4888, "step": 14961 }, { "epoch": 0.95, "grad_norm": 1.0922738313674927, "learning_rate": 7.095828300841435e-08, "loss": 0.4505, "step": 14962 }, { "epoch": 0.95, "grad_norm": 1.0481584072113037, "learning_rate": 7.078614841520392e-08, "loss": 0.5466, "step": 14963 }, { "epoch": 0.95, "grad_norm": 0.9718726873397827, "learning_rate": 7.061422137404129e-08, "loss": 0.5016, "step": 14964 }, { "epoch": 0.95, "grad_norm": 1.035399317741394, "learning_rate": 7.044250189216561e-08, "loss": 0.4608, "step": 14965 }, { "epoch": 0.95, "grad_norm": 1.0872936248779297, "learning_rate": 7.027098997680726e-08, "loss": 0.5362, "step": 14966 }, { "epoch": 0.95, "grad_norm": 1.107075572013855, "learning_rate": 7.00996856351882e-08, "loss": 0.5019, "step": 14967 }, { "epoch": 0.95, "grad_norm": 1.14595627784729, "learning_rate": 6.992858887452158e-08, "loss": 0.4828, "step": 14968 }, { "epoch": 0.95, "grad_norm": 1.0169750452041626, "learning_rate": 6.975769970201163e-08, "loss": 0.5016, "step": 14969 }, { "epoch": 0.95, "grad_norm": 0.9879432320594788, "learning_rate": 6.958701812485369e-08, "loss": 0.5074, "step": 14970 }, { "epoch": 0.95, "grad_norm": 1.06552255153656, "learning_rate": 6.94165441502348e-08, "loss": 0.498, "step": 14971 }, { "epoch": 0.95, "grad_norm": 1.1178659200668335, "learning_rate": 6.924627778533366e-08, "loss": 0.4977, "step": 14972 }, { "epoch": 0.95, "grad_norm": 1.096285343170166, "learning_rate": 6.907621903731842e-08, "loss": 0.5299, "step": 14973 }, { "epoch": 0.95, "grad_norm": 1.0517348051071167, "learning_rate": 6.890636791335003e-08, "loss": 0.4543, "step": 14974 }, { "epoch": 0.95, "grad_norm": 0.9691824316978455, "learning_rate": 6.873672442058054e-08, "loss": 0.4705, "step": 14975 }, { "epoch": 0.95, "grad_norm": 1.0984865427017212, "learning_rate": 6.856728856615314e-08, "loss": 0.4788, "step": 14976 }, { "epoch": 0.95, "grad_norm": 1.0343778133392334, "learning_rate": 6.839806035720209e-08, "loss": 0.5031, "step": 14977 }, { "epoch": 0.95, "grad_norm": 1.026618242263794, "learning_rate": 6.822903980085282e-08, "loss": 0.5167, "step": 14978 }, { "epoch": 0.95, "grad_norm": 1.0796104669570923, "learning_rate": 6.806022690422187e-08, "loss": 0.5074, "step": 14979 }, { "epoch": 0.95, "grad_norm": 1.0644936561584473, "learning_rate": 6.789162167441798e-08, "loss": 0.5252, "step": 14980 }, { "epoch": 0.95, "grad_norm": 1.0769859552383423, "learning_rate": 6.772322411854048e-08, "loss": 0.5469, "step": 14981 }, { "epoch": 0.95, "grad_norm": 1.1746636629104614, "learning_rate": 6.755503424368037e-08, "loss": 0.5216, "step": 14982 }, { "epoch": 0.95, "grad_norm": 1.1133933067321777, "learning_rate": 6.73870520569181e-08, "loss": 0.5592, "step": 14983 }, { "epoch": 0.95, "grad_norm": 1.0979746580123901, "learning_rate": 6.721927756532853e-08, "loss": 0.4748, "step": 14984 }, { "epoch": 0.95, "grad_norm": 1.0335559844970703, "learning_rate": 6.705171077597495e-08, "loss": 0.4971, "step": 14985 }, { "epoch": 0.95, "grad_norm": 1.084326148033142, "learning_rate": 6.68843516959139e-08, "loss": 0.5545, "step": 14986 }, { "epoch": 0.95, "grad_norm": 1.1023401021957397, "learning_rate": 6.67172003321903e-08, "loss": 0.5284, "step": 14987 }, { "epoch": 0.95, "grad_norm": 1.1031067371368408, "learning_rate": 6.655025669184522e-08, "loss": 0.5324, "step": 14988 }, { "epoch": 0.95, "grad_norm": 1.0138829946517944, "learning_rate": 6.638352078190636e-08, "loss": 0.4523, "step": 14989 }, { "epoch": 0.95, "grad_norm": 1.0137693881988525, "learning_rate": 6.621699260939418e-08, "loss": 0.4951, "step": 14990 }, { "epoch": 0.95, "grad_norm": 1.0743118524551392, "learning_rate": 6.605067218132145e-08, "loss": 0.5367, "step": 14991 }, { "epoch": 0.95, "grad_norm": 1.0681713819503784, "learning_rate": 6.58845595046903e-08, "loss": 0.562, "step": 14992 }, { "epoch": 0.95, "grad_norm": 1.1324034929275513, "learning_rate": 6.571865458649629e-08, "loss": 0.5746, "step": 14993 }, { "epoch": 0.95, "grad_norm": 1.0964475870132446, "learning_rate": 6.555295743372492e-08, "loss": 0.49, "step": 14994 }, { "epoch": 0.95, "grad_norm": 0.9967697262763977, "learning_rate": 6.538746805335284e-08, "loss": 0.4638, "step": 14995 }, { "epoch": 0.95, "grad_norm": 1.097658395767212, "learning_rate": 6.52221864523478e-08, "loss": 0.5465, "step": 14996 }, { "epoch": 0.95, "grad_norm": 1.0048773288726807, "learning_rate": 6.505711263766978e-08, "loss": 0.4931, "step": 14997 }, { "epoch": 0.95, "grad_norm": 1.0225428342819214, "learning_rate": 6.48922466162688e-08, "loss": 0.5089, "step": 14998 }, { "epoch": 0.95, "grad_norm": 0.98487389087677, "learning_rate": 6.472758839508819e-08, "loss": 0.4879, "step": 14999 }, { "epoch": 0.95, "grad_norm": 0.986153244972229, "learning_rate": 6.456313798105962e-08, "loss": 0.4959, "step": 15000 }, { "epoch": 0.95, "grad_norm": 1.085741639137268, "learning_rate": 6.439889538110867e-08, "loss": 0.5745, "step": 15001 }, { "epoch": 0.95, "grad_norm": 1.0911871194839478, "learning_rate": 6.423486060215034e-08, "loss": 0.5103, "step": 15002 }, { "epoch": 0.95, "grad_norm": 1.0940908193588257, "learning_rate": 6.40710336510919e-08, "loss": 0.4667, "step": 15003 }, { "epoch": 0.95, "grad_norm": 1.0498441457748413, "learning_rate": 6.390741453483119e-08, "loss": 0.4825, "step": 15004 }, { "epoch": 0.95, "grad_norm": 1.019486665725708, "learning_rate": 6.374400326025765e-08, "loss": 0.5004, "step": 15005 }, { "epoch": 0.95, "grad_norm": 1.1773452758789062, "learning_rate": 6.358079983425247e-08, "loss": 0.4969, "step": 15006 }, { "epoch": 0.95, "grad_norm": 1.051240086555481, "learning_rate": 6.341780426368737e-08, "loss": 0.5094, "step": 15007 }, { "epoch": 0.95, "grad_norm": 1.0501857995986938, "learning_rate": 6.32550165554252e-08, "loss": 0.5171, "step": 15008 }, { "epoch": 0.95, "grad_norm": 1.1003665924072266, "learning_rate": 6.309243671632048e-08, "loss": 0.541, "step": 15009 }, { "epoch": 0.95, "grad_norm": 1.1350167989730835, "learning_rate": 6.293006475321939e-08, "loss": 0.5066, "step": 15010 }, { "epoch": 0.95, "grad_norm": 1.1172188520431519, "learning_rate": 6.276790067295813e-08, "loss": 0.5128, "step": 15011 }, { "epoch": 0.95, "grad_norm": 1.0877690315246582, "learning_rate": 6.260594448236513e-08, "loss": 0.4797, "step": 15012 }, { "epoch": 0.95, "grad_norm": 1.0320991277694702, "learning_rate": 6.244419618825992e-08, "loss": 0.4801, "step": 15013 }, { "epoch": 0.95, "grad_norm": 1.0631580352783203, "learning_rate": 6.228265579745318e-08, "loss": 0.4995, "step": 15014 }, { "epoch": 0.95, "grad_norm": 1.0299152135849, "learning_rate": 6.212132331674725e-08, "loss": 0.5023, "step": 15015 }, { "epoch": 0.95, "grad_norm": 1.146296739578247, "learning_rate": 6.196019875293391e-08, "loss": 0.5539, "step": 15016 }, { "epoch": 0.95, "grad_norm": 1.012778401374817, "learning_rate": 6.179928211279884e-08, "loss": 0.4976, "step": 15017 }, { "epoch": 0.95, "grad_norm": 1.0477205514907837, "learning_rate": 6.163857340311718e-08, "loss": 0.4565, "step": 15018 }, { "epoch": 0.95, "grad_norm": 1.0732078552246094, "learning_rate": 6.147807263065575e-08, "loss": 0.4543, "step": 15019 }, { "epoch": 0.95, "grad_norm": 1.1308003664016724, "learning_rate": 6.131777980217302e-08, "loss": 0.5284, "step": 15020 }, { "epoch": 0.95, "grad_norm": 1.0013229846954346, "learning_rate": 6.115769492441859e-08, "loss": 0.5011, "step": 15021 }, { "epoch": 0.95, "grad_norm": 1.1048378944396973, "learning_rate": 6.099781800413151e-08, "loss": 0.5263, "step": 15022 }, { "epoch": 0.95, "grad_norm": 1.2660025358200073, "learning_rate": 6.083814904804586e-08, "loss": 0.5616, "step": 15023 }, { "epoch": 0.95, "grad_norm": 1.045714259147644, "learning_rate": 6.067868806288346e-08, "loss": 0.5137, "step": 15024 }, { "epoch": 0.95, "grad_norm": 0.9999276399612427, "learning_rate": 6.05194350553584e-08, "loss": 0.4884, "step": 15025 }, { "epoch": 0.95, "grad_norm": 1.1267025470733643, "learning_rate": 6.036039003217697e-08, "loss": 0.5067, "step": 15026 }, { "epoch": 0.95, "grad_norm": 1.0568078756332397, "learning_rate": 6.02015530000355e-08, "loss": 0.5517, "step": 15027 }, { "epoch": 0.95, "grad_norm": 1.0691008567810059, "learning_rate": 6.00429239656225e-08, "loss": 0.4856, "step": 15028 }, { "epoch": 0.95, "grad_norm": 1.0379536151885986, "learning_rate": 5.988450293561765e-08, "loss": 0.471, "step": 15029 }, { "epoch": 0.95, "grad_norm": 1.0408005714416504, "learning_rate": 5.972628991669006e-08, "loss": 0.4636, "step": 15030 }, { "epoch": 0.95, "grad_norm": 1.1032767295837402, "learning_rate": 5.956828491550326e-08, "loss": 0.5221, "step": 15031 }, { "epoch": 0.95, "grad_norm": 1.0133955478668213, "learning_rate": 5.941048793870918e-08, "loss": 0.4752, "step": 15032 }, { "epoch": 0.95, "grad_norm": 1.028398871421814, "learning_rate": 5.92528989929525e-08, "loss": 0.5115, "step": 15033 }, { "epoch": 0.95, "grad_norm": 0.9963531494140625, "learning_rate": 5.9095518084868467e-08, "loss": 0.5157, "step": 15034 }, { "epoch": 0.95, "grad_norm": 1.0203803777694702, "learning_rate": 5.893834522108399e-08, "loss": 0.5161, "step": 15035 }, { "epoch": 0.95, "grad_norm": 1.105560064315796, "learning_rate": 5.8781380408217124e-08, "loss": 0.4944, "step": 15036 }, { "epoch": 0.95, "grad_norm": 1.0529372692108154, "learning_rate": 5.862462365287702e-08, "loss": 0.4984, "step": 15037 }, { "epoch": 0.95, "grad_norm": 1.101141333580017, "learning_rate": 5.846807496166451e-08, "loss": 0.5171, "step": 15038 }, { "epoch": 0.95, "grad_norm": 1.069178581237793, "learning_rate": 5.831173434117043e-08, "loss": 0.4868, "step": 15039 }, { "epoch": 0.95, "grad_norm": 1.0065348148345947, "learning_rate": 5.815560179797897e-08, "loss": 0.4754, "step": 15040 }, { "epoch": 0.95, "grad_norm": 1.010067343711853, "learning_rate": 5.7999677338663184e-08, "loss": 0.475, "step": 15041 }, { "epoch": 0.95, "grad_norm": 1.0668708086013794, "learning_rate": 5.7843960969790056e-08, "loss": 0.5041, "step": 15042 }, { "epoch": 0.95, "grad_norm": 1.0093073844909668, "learning_rate": 5.768845269791379e-08, "loss": 0.5062, "step": 15043 }, { "epoch": 0.95, "grad_norm": 0.9874602556228638, "learning_rate": 5.7533152529584135e-08, "loss": 0.4237, "step": 15044 }, { "epoch": 0.95, "grad_norm": 1.1246216297149658, "learning_rate": 5.7378060471340866e-08, "loss": 0.4612, "step": 15045 }, { "epoch": 0.95, "grad_norm": 1.1335710287094116, "learning_rate": 5.7223176529712097e-08, "loss": 0.5429, "step": 15046 }, { "epoch": 0.95, "grad_norm": 0.9604591727256775, "learning_rate": 5.70685007112215e-08, "loss": 0.4539, "step": 15047 }, { "epoch": 0.95, "grad_norm": 1.0197806358337402, "learning_rate": 5.691403302238052e-08, "loss": 0.4636, "step": 15048 }, { "epoch": 0.95, "grad_norm": 1.0385576486587524, "learning_rate": 5.6759773469694523e-08, "loss": 0.5175, "step": 15049 }, { "epoch": 0.95, "grad_norm": 0.9743496179580688, "learning_rate": 5.660572205965775e-08, "loss": 0.4715, "step": 15050 }, { "epoch": 0.95, "grad_norm": 1.082789659500122, "learning_rate": 5.645187879875724e-08, "loss": 0.4957, "step": 15051 }, { "epoch": 0.95, "grad_norm": 0.9587162137031555, "learning_rate": 5.6298243693470586e-08, "loss": 0.5016, "step": 15052 }, { "epoch": 0.95, "grad_norm": 1.2008246183395386, "learning_rate": 5.614481675026762e-08, "loss": 0.4949, "step": 15053 }, { "epoch": 0.95, "grad_norm": 1.0328693389892578, "learning_rate": 5.59915979756076e-08, "loss": 0.4821, "step": 15054 }, { "epoch": 0.95, "grad_norm": 1.09230375289917, "learning_rate": 5.583858737594205e-08, "loss": 0.486, "step": 15055 }, { "epoch": 0.95, "grad_norm": 1.1106675863265991, "learning_rate": 5.5685784957714707e-08, "loss": 0.4895, "step": 15056 }, { "epoch": 0.95, "grad_norm": 1.0798262357711792, "learning_rate": 5.5533190727358745e-08, "loss": 0.5254, "step": 15057 }, { "epoch": 0.95, "grad_norm": 1.0557674169540405, "learning_rate": 5.538080469129958e-08, "loss": 0.5157, "step": 15058 }, { "epoch": 0.95, "grad_norm": 0.9619843363761902, "learning_rate": 5.522862685595376e-08, "loss": 0.4913, "step": 15059 }, { "epoch": 0.95, "grad_norm": 1.0052016973495483, "learning_rate": 5.507665722772837e-08, "loss": 0.4738, "step": 15060 }, { "epoch": 0.95, "grad_norm": 1.033711314201355, "learning_rate": 5.492489581302329e-08, "loss": 0.4792, "step": 15061 }, { "epoch": 0.95, "grad_norm": 1.0828896760940552, "learning_rate": 5.477334261822842e-08, "loss": 0.503, "step": 15062 }, { "epoch": 0.95, "grad_norm": 1.0456746816635132, "learning_rate": 5.46219976497242e-08, "loss": 0.4859, "step": 15063 }, { "epoch": 0.95, "grad_norm": 0.9850556254386902, "learning_rate": 5.447086091388443e-08, "loss": 0.4761, "step": 15064 }, { "epoch": 0.95, "grad_norm": 1.0214520692825317, "learning_rate": 5.4319932417072344e-08, "loss": 0.5008, "step": 15065 }, { "epoch": 0.95, "grad_norm": 1.0863326787948608, "learning_rate": 5.416921216564286e-08, "loss": 0.5266, "step": 15066 }, { "epoch": 0.95, "grad_norm": 1.0874569416046143, "learning_rate": 5.401870016594313e-08, "loss": 0.507, "step": 15067 }, { "epoch": 0.95, "grad_norm": 1.1380571126937866, "learning_rate": 5.38683964243103e-08, "loss": 0.5038, "step": 15068 }, { "epoch": 0.95, "grad_norm": 1.0237140655517578, "learning_rate": 5.3718300947072086e-08, "loss": 0.4931, "step": 15069 }, { "epoch": 0.95, "grad_norm": 1.051347255706787, "learning_rate": 5.356841374055011e-08, "loss": 0.5361, "step": 15070 }, { "epoch": 0.95, "grad_norm": 1.0510364770889282, "learning_rate": 5.341873481105431e-08, "loss": 0.5426, "step": 15071 }, { "epoch": 0.95, "grad_norm": 1.096970796585083, "learning_rate": 5.3269264164887977e-08, "loss": 0.554, "step": 15072 }, { "epoch": 0.95, "grad_norm": 1.1007144451141357, "learning_rate": 5.3120001808344425e-08, "loss": 0.4805, "step": 15073 }, { "epoch": 0.96, "grad_norm": 0.994536280632019, "learning_rate": 5.297094774770861e-08, "loss": 0.4438, "step": 15074 }, { "epoch": 0.96, "grad_norm": 1.0439180135726929, "learning_rate": 5.282210198925664e-08, "loss": 0.5223, "step": 15075 }, { "epoch": 0.96, "grad_norm": 1.0629830360412598, "learning_rate": 5.267346453925626e-08, "loss": 0.5244, "step": 15076 }, { "epoch": 0.96, "grad_norm": 1.2122411727905273, "learning_rate": 5.2525035403965805e-08, "loss": 0.4977, "step": 15077 }, { "epoch": 0.96, "grad_norm": 1.081017017364502, "learning_rate": 5.237681458963473e-08, "loss": 0.5228, "step": 15078 }, { "epoch": 0.96, "grad_norm": 0.982690691947937, "learning_rate": 5.222880210250469e-08, "loss": 0.4755, "step": 15079 }, { "epoch": 0.96, "grad_norm": 1.0423614978790283, "learning_rate": 5.2080997948807944e-08, "loss": 0.5246, "step": 15080 }, { "epoch": 0.96, "grad_norm": 1.084811806678772, "learning_rate": 5.193340213476727e-08, "loss": 0.4849, "step": 15081 }, { "epoch": 0.96, "grad_norm": 1.1122099161148071, "learning_rate": 5.178601466659827e-08, "loss": 0.5022, "step": 15082 }, { "epoch": 0.96, "grad_norm": 1.1095976829528809, "learning_rate": 5.163883555050708e-08, "loss": 0.5244, "step": 15083 }, { "epoch": 0.96, "grad_norm": 1.0307388305664062, "learning_rate": 5.149186479268986e-08, "loss": 0.4971, "step": 15084 }, { "epoch": 0.96, "grad_norm": 1.1129001379013062, "learning_rate": 5.134510239933554e-08, "loss": 0.5413, "step": 15085 }, { "epoch": 0.96, "grad_norm": 1.0419707298278809, "learning_rate": 5.119854837662419e-08, "loss": 0.4838, "step": 15086 }, { "epoch": 0.96, "grad_norm": 1.1126266717910767, "learning_rate": 5.1052202730725865e-08, "loss": 0.4948, "step": 15087 }, { "epoch": 0.96, "grad_norm": 1.0315604209899902, "learning_rate": 5.0906065467803965e-08, "loss": 0.4857, "step": 15088 }, { "epoch": 0.96, "grad_norm": 1.0968666076660156, "learning_rate": 5.0760136594010246e-08, "loss": 0.4508, "step": 15089 }, { "epoch": 0.96, "grad_norm": 1.015987753868103, "learning_rate": 5.061441611549034e-08, "loss": 0.4891, "step": 15090 }, { "epoch": 0.96, "grad_norm": 1.024927020072937, "learning_rate": 5.046890403837989e-08, "loss": 0.5044, "step": 15091 }, { "epoch": 0.96, "grad_norm": 1.001304268836975, "learning_rate": 5.032360036880568e-08, "loss": 0.49, "step": 15092 }, { "epoch": 0.96, "grad_norm": 1.0158631801605225, "learning_rate": 5.0178505112885576e-08, "loss": 0.4882, "step": 15093 }, { "epoch": 0.96, "grad_norm": 1.1382091045379639, "learning_rate": 5.00336182767297e-08, "loss": 0.4925, "step": 15094 }, { "epoch": 0.96, "grad_norm": 1.147162914276123, "learning_rate": 4.988893986643817e-08, "loss": 0.4986, "step": 15095 }, { "epoch": 0.96, "grad_norm": 0.9799484014511108, "learning_rate": 4.9744469888103887e-08, "loss": 0.5284, "step": 15096 }, { "epoch": 0.96, "grad_norm": 1.0316082239151, "learning_rate": 4.9600208347809206e-08, "loss": 0.4583, "step": 15097 }, { "epoch": 0.96, "grad_norm": 1.1625584363937378, "learning_rate": 4.945615525162761e-08, "loss": 0.527, "step": 15098 }, { "epoch": 0.96, "grad_norm": 1.015092134475708, "learning_rate": 4.931231060562702e-08, "loss": 0.4586, "step": 15099 }, { "epoch": 0.96, "grad_norm": 1.123383641242981, "learning_rate": 4.916867441586204e-08, "loss": 0.5561, "step": 15100 }, { "epoch": 0.96, "grad_norm": 1.009797215461731, "learning_rate": 4.902524668838116e-08, "loss": 0.4791, "step": 15101 }, { "epoch": 0.96, "grad_norm": 1.050193190574646, "learning_rate": 4.88820274292251e-08, "loss": 0.4855, "step": 15102 }, { "epoch": 0.96, "grad_norm": 1.0637693405151367, "learning_rate": 4.873901664442182e-08, "loss": 0.4757, "step": 15103 }, { "epoch": 0.96, "grad_norm": 1.0080116987228394, "learning_rate": 4.8596214339995395e-08, "loss": 0.4764, "step": 15104 }, { "epoch": 0.96, "grad_norm": 1.1039217710494995, "learning_rate": 4.8453620521957124e-08, "loss": 0.503, "step": 15105 }, { "epoch": 0.96, "grad_norm": 1.090170979499817, "learning_rate": 4.83112351963122e-08, "loss": 0.465, "step": 15106 }, { "epoch": 0.96, "grad_norm": 1.1621887683868408, "learning_rate": 4.816905836905528e-08, "loss": 0.5391, "step": 15107 }, { "epoch": 0.96, "grad_norm": 1.0159717798233032, "learning_rate": 4.802709004617267e-08, "loss": 0.493, "step": 15108 }, { "epoch": 0.96, "grad_norm": 1.0658420324325562, "learning_rate": 4.788533023364295e-08, "loss": 0.4565, "step": 15109 }, { "epoch": 0.96, "grad_norm": 1.1132019758224487, "learning_rate": 4.77437789374352e-08, "loss": 0.5395, "step": 15110 }, { "epoch": 0.96, "grad_norm": 1.1035053730010986, "learning_rate": 4.760243616350913e-08, "loss": 0.5191, "step": 15111 }, { "epoch": 0.96, "grad_norm": 1.1152589321136475, "learning_rate": 4.746130191781606e-08, "loss": 0.499, "step": 15112 }, { "epoch": 0.96, "grad_norm": 1.0198644399642944, "learning_rate": 4.7320376206299034e-08, "loss": 0.5292, "step": 15113 }, { "epoch": 0.96, "grad_norm": 1.0909252166748047, "learning_rate": 4.717965903489219e-08, "loss": 0.5047, "step": 15114 }, { "epoch": 0.96, "grad_norm": 1.043396234512329, "learning_rate": 4.7039150409519674e-08, "loss": 0.4878, "step": 15115 }, { "epoch": 0.96, "grad_norm": 1.0830494165420532, "learning_rate": 4.6898850336098975e-08, "loss": 0.489, "step": 15116 }, { "epoch": 0.96, "grad_norm": 1.072623610496521, "learning_rate": 4.675875882053704e-08, "loss": 0.5088, "step": 15117 }, { "epoch": 0.96, "grad_norm": 0.9616676568984985, "learning_rate": 4.6618875868733037e-08, "loss": 0.4776, "step": 15118 }, { "epoch": 0.96, "grad_norm": 1.0668658018112183, "learning_rate": 4.6479201486575585e-08, "loss": 0.4682, "step": 15119 }, { "epoch": 0.96, "grad_norm": 1.0576627254486084, "learning_rate": 4.633973567994776e-08, "loss": 0.5172, "step": 15120 }, { "epoch": 0.96, "grad_norm": 1.0642863512039185, "learning_rate": 4.620047845472098e-08, "loss": 0.5077, "step": 15121 }, { "epoch": 0.96, "grad_norm": 1.0122225284576416, "learning_rate": 4.606142981675887e-08, "loss": 0.5101, "step": 15122 }, { "epoch": 0.96, "grad_norm": 1.069036602973938, "learning_rate": 4.592258977191622e-08, "loss": 0.5149, "step": 15123 }, { "epoch": 0.96, "grad_norm": 1.05073881149292, "learning_rate": 4.578395832603999e-08, "loss": 0.4921, "step": 15124 }, { "epoch": 0.96, "grad_norm": 1.090564489364624, "learning_rate": 4.5645535484966085e-08, "loss": 0.4889, "step": 15125 }, { "epoch": 0.96, "grad_norm": 1.0945969820022583, "learning_rate": 4.5507321254524287e-08, "loss": 0.4914, "step": 15126 }, { "epoch": 0.96, "grad_norm": 1.1259173154830933, "learning_rate": 4.536931564053382e-08, "loss": 0.51, "step": 15127 }, { "epoch": 0.96, "grad_norm": 1.1442131996154785, "learning_rate": 4.523151864880504e-08, "loss": 0.4917, "step": 15128 }, { "epoch": 0.96, "grad_norm": 1.194004774093628, "learning_rate": 4.5093930285141086e-08, "loss": 0.5238, "step": 15129 }, { "epoch": 0.96, "grad_norm": 1.0926456451416016, "learning_rate": 4.4956550555334546e-08, "loss": 0.5154, "step": 15130 }, { "epoch": 0.96, "grad_norm": 1.0433238744735718, "learning_rate": 4.4819379465170785e-08, "loss": 0.4799, "step": 15131 }, { "epoch": 0.96, "grad_norm": 1.1860097646713257, "learning_rate": 4.4682417020425194e-08, "loss": 0.5124, "step": 15132 }, { "epoch": 0.96, "grad_norm": 1.1235885620117188, "learning_rate": 4.454566322686371e-08, "loss": 0.5486, "step": 15133 }, { "epoch": 0.96, "grad_norm": 1.1014682054519653, "learning_rate": 4.440911809024673e-08, "loss": 0.5076, "step": 15134 }, { "epoch": 0.96, "grad_norm": 1.122446060180664, "learning_rate": 4.427278161632187e-08, "loss": 0.4887, "step": 15135 }, { "epoch": 0.96, "grad_norm": 1.0771417617797852, "learning_rate": 4.413665381083065e-08, "loss": 0.433, "step": 15136 }, { "epoch": 0.96, "grad_norm": 1.126584768295288, "learning_rate": 4.4000734679504606e-08, "loss": 0.4713, "step": 15137 }, { "epoch": 0.96, "grad_norm": 1.0992733240127563, "learning_rate": 4.386502422806749e-08, "loss": 0.5091, "step": 15138 }, { "epoch": 0.96, "grad_norm": 1.031760573387146, "learning_rate": 4.37295224622325e-08, "loss": 0.4908, "step": 15139 }, { "epoch": 0.96, "grad_norm": 0.9821397066116333, "learning_rate": 4.359422938770619e-08, "loss": 0.5034, "step": 15140 }, { "epoch": 0.96, "grad_norm": 1.1158956289291382, "learning_rate": 4.3459145010184e-08, "loss": 0.4501, "step": 15141 }, { "epoch": 0.96, "grad_norm": 1.0483918190002441, "learning_rate": 4.3324269335355274e-08, "loss": 0.5242, "step": 15142 }, { "epoch": 0.96, "grad_norm": 1.1130990982055664, "learning_rate": 4.31896023688988e-08, "loss": 0.5054, "step": 15143 }, { "epoch": 0.96, "grad_norm": 1.1633292436599731, "learning_rate": 4.305514411648393e-08, "loss": 0.4917, "step": 15144 }, { "epoch": 0.96, "grad_norm": 1.010609745979309, "learning_rate": 4.2920894583773906e-08, "loss": 0.4604, "step": 15145 }, { "epoch": 0.96, "grad_norm": 0.957053005695343, "learning_rate": 4.278685377641978e-08, "loss": 0.4582, "step": 15146 }, { "epoch": 0.96, "grad_norm": 1.0870672464370728, "learning_rate": 4.2653021700066466e-08, "loss": 0.4749, "step": 15147 }, { "epoch": 0.96, "grad_norm": 1.087636113166809, "learning_rate": 4.251939836034946e-08, "loss": 0.4881, "step": 15148 }, { "epoch": 0.96, "grad_norm": 1.094960331916809, "learning_rate": 4.238598376289482e-08, "loss": 0.4742, "step": 15149 }, { "epoch": 0.96, "grad_norm": 1.0509110689163208, "learning_rate": 4.225277791331972e-08, "loss": 0.4919, "step": 15150 }, { "epoch": 0.96, "grad_norm": 1.0320489406585693, "learning_rate": 4.211978081723356e-08, "loss": 0.492, "step": 15151 }, { "epoch": 0.96, "grad_norm": 1.0644983053207397, "learning_rate": 4.19869924802363e-08, "loss": 0.4932, "step": 15152 }, { "epoch": 0.96, "grad_norm": 0.9970551133155823, "learning_rate": 4.185441290791903e-08, "loss": 0.4491, "step": 15153 }, { "epoch": 0.96, "grad_norm": 1.0908452272415161, "learning_rate": 4.1722042105863946e-08, "loss": 0.5192, "step": 15154 }, { "epoch": 0.96, "grad_norm": 1.086653232574463, "learning_rate": 4.158988007964548e-08, "loss": 0.5056, "step": 15155 }, { "epoch": 0.96, "grad_norm": 1.088154911994934, "learning_rate": 4.145792683482808e-08, "loss": 0.5084, "step": 15156 }, { "epoch": 0.96, "grad_norm": 1.0358635187149048, "learning_rate": 4.132618237696784e-08, "loss": 0.5237, "step": 15157 }, { "epoch": 0.96, "grad_norm": 1.0679439306259155, "learning_rate": 4.1194646711612555e-08, "loss": 0.5357, "step": 15158 }, { "epoch": 0.96, "grad_norm": 1.0736949443817139, "learning_rate": 4.1063319844299454e-08, "loss": 0.4725, "step": 15159 }, { "epoch": 0.96, "grad_norm": 1.0397822856903076, "learning_rate": 4.0932201780559674e-08, "loss": 0.5, "step": 15160 }, { "epoch": 0.96, "grad_norm": 1.1251829862594604, "learning_rate": 4.080129252591325e-08, "loss": 0.4841, "step": 15161 }, { "epoch": 0.96, "grad_norm": 1.0683003664016724, "learning_rate": 4.0670592085872984e-08, "loss": 0.5109, "step": 15162 }, { "epoch": 0.96, "grad_norm": 1.0046395063400269, "learning_rate": 4.054010046594115e-08, "loss": 0.4897, "step": 15163 }, { "epoch": 0.96, "grad_norm": 1.0667266845703125, "learning_rate": 4.040981767161334e-08, "loss": 0.5108, "step": 15164 }, { "epoch": 0.96, "grad_norm": 1.0909169912338257, "learning_rate": 4.027974370837518e-08, "loss": 0.5137, "step": 15165 }, { "epoch": 0.96, "grad_norm": 1.038557529449463, "learning_rate": 4.014987858170283e-08, "loss": 0.5008, "step": 15166 }, { "epoch": 0.96, "grad_norm": 1.090004324913025, "learning_rate": 4.0020222297065256e-08, "loss": 0.4983, "step": 15167 }, { "epoch": 0.96, "grad_norm": 1.1984957456588745, "learning_rate": 3.9890774859921987e-08, "loss": 0.5201, "step": 15168 }, { "epoch": 0.96, "grad_norm": 1.1261996030807495, "learning_rate": 3.97615362757231e-08, "loss": 0.5366, "step": 15169 }, { "epoch": 0.96, "grad_norm": 1.1402543783187866, "learning_rate": 3.9632506549910356e-08, "loss": 0.5212, "step": 15170 }, { "epoch": 0.96, "grad_norm": 1.0591979026794434, "learning_rate": 3.9503685687916627e-08, "loss": 0.46, "step": 15171 }, { "epoch": 0.96, "grad_norm": 1.0578498840332031, "learning_rate": 3.937507369516702e-08, "loss": 0.5166, "step": 15172 }, { "epoch": 0.96, "grad_norm": 1.1559275388717651, "learning_rate": 3.92466705770761e-08, "loss": 0.4912, "step": 15173 }, { "epoch": 0.96, "grad_norm": 1.0619618892669678, "learning_rate": 3.911847633905008e-08, "loss": 0.5137, "step": 15174 }, { "epoch": 0.96, "grad_norm": 1.071740746498108, "learning_rate": 3.899049098648799e-08, "loss": 0.4929, "step": 15175 }, { "epoch": 0.96, "grad_norm": 1.1121670007705688, "learning_rate": 3.88627145247783e-08, "loss": 0.5078, "step": 15176 }, { "epoch": 0.96, "grad_norm": 1.100979208946228, "learning_rate": 3.873514695930114e-08, "loss": 0.5421, "step": 15177 }, { "epoch": 0.96, "grad_norm": 1.0536065101623535, "learning_rate": 3.860778829542777e-08, "loss": 0.5298, "step": 15178 }, { "epoch": 0.96, "grad_norm": 1.0702991485595703, "learning_rate": 3.848063853852113e-08, "loss": 0.4964, "step": 15179 }, { "epoch": 0.96, "grad_norm": 1.0179272890090942, "learning_rate": 3.835369769393471e-08, "loss": 0.4662, "step": 15180 }, { "epoch": 0.96, "grad_norm": 1.0654373168945312, "learning_rate": 3.822696576701368e-08, "loss": 0.5076, "step": 15181 }, { "epoch": 0.96, "grad_norm": 1.0972926616668701, "learning_rate": 3.8100442763094324e-08, "loss": 0.4772, "step": 15182 }, { "epoch": 0.96, "grad_norm": 1.0048757791519165, "learning_rate": 3.797412868750461e-08, "loss": 0.4918, "step": 15183 }, { "epoch": 0.96, "grad_norm": 1.0761852264404297, "learning_rate": 3.784802354556249e-08, "loss": 0.4763, "step": 15184 }, { "epoch": 0.96, "grad_norm": 1.1550503969192505, "learning_rate": 3.7722127342578183e-08, "loss": 0.4978, "step": 15185 }, { "epoch": 0.96, "grad_norm": 1.063727855682373, "learning_rate": 3.759644008385244e-08, "loss": 0.4868, "step": 15186 }, { "epoch": 0.96, "grad_norm": 1.0755116939544678, "learning_rate": 3.747096177467768e-08, "loss": 0.4823, "step": 15187 }, { "epoch": 0.96, "grad_norm": 1.024055004119873, "learning_rate": 3.7345692420337476e-08, "loss": 0.46, "step": 15188 }, { "epoch": 0.96, "grad_norm": 1.0906200408935547, "learning_rate": 3.722063202610593e-08, "loss": 0.5097, "step": 15189 }, { "epoch": 0.96, "grad_norm": 1.0795869827270508, "learning_rate": 3.709578059724939e-08, "loss": 0.4971, "step": 15190 }, { "epoch": 0.96, "grad_norm": 1.0026204586029053, "learning_rate": 3.697113813902531e-08, "loss": 0.4939, "step": 15191 }, { "epoch": 0.96, "grad_norm": 1.0426870584487915, "learning_rate": 3.684670465668116e-08, "loss": 0.5157, "step": 15192 }, { "epoch": 0.96, "grad_norm": 1.0251553058624268, "learning_rate": 3.6722480155456655e-08, "loss": 0.4769, "step": 15193 }, { "epoch": 0.96, "grad_norm": 1.051120400428772, "learning_rate": 3.6598464640582586e-08, "loss": 0.5039, "step": 15194 }, { "epoch": 0.96, "grad_norm": 1.0711596012115479, "learning_rate": 3.64746581172809e-08, "loss": 0.5578, "step": 15195 }, { "epoch": 0.96, "grad_norm": 1.0845519304275513, "learning_rate": 3.6351060590764656e-08, "loss": 0.4918, "step": 15196 }, { "epoch": 0.96, "grad_norm": 0.9789648056030273, "learning_rate": 3.6227672066237454e-08, "loss": 0.4764, "step": 15197 }, { "epoch": 0.96, "grad_norm": 1.0158416032791138, "learning_rate": 3.6104492548895695e-08, "loss": 0.5349, "step": 15198 }, { "epoch": 0.96, "grad_norm": 1.1074073314666748, "learning_rate": 3.5981522043925796e-08, "loss": 0.4843, "step": 15199 }, { "epoch": 0.96, "grad_norm": 1.0457102060317993, "learning_rate": 3.585876055650528e-08, "loss": 0.4954, "step": 15200 }, { "epoch": 0.96, "grad_norm": 1.0220361948013306, "learning_rate": 3.5736208091802784e-08, "loss": 0.4796, "step": 15201 }, { "epoch": 0.96, "grad_norm": 1.1319571733474731, "learning_rate": 3.5613864654979734e-08, "loss": 0.5557, "step": 15202 }, { "epoch": 0.96, "grad_norm": 1.1150519847869873, "learning_rate": 3.5491730251187016e-08, "loss": 0.5144, "step": 15203 }, { "epoch": 0.96, "grad_norm": 1.0242811441421509, "learning_rate": 3.5369804885567185e-08, "loss": 0.4811, "step": 15204 }, { "epoch": 0.96, "grad_norm": 1.001131534576416, "learning_rate": 3.52480885632539e-08, "loss": 0.4878, "step": 15205 }, { "epoch": 0.96, "grad_norm": 1.0957980155944824, "learning_rate": 3.512658128937252e-08, "loss": 0.5016, "step": 15206 }, { "epoch": 0.96, "grad_norm": 1.0731688737869263, "learning_rate": 3.500528306904005e-08, "loss": 0.5034, "step": 15207 }, { "epoch": 0.96, "grad_norm": 1.158174753189087, "learning_rate": 3.488419390736242e-08, "loss": 0.4819, "step": 15208 }, { "epoch": 0.96, "grad_norm": 1.128722071647644, "learning_rate": 3.476331380943887e-08, "loss": 0.555, "step": 15209 }, { "epoch": 0.96, "grad_norm": 1.090911865234375, "learning_rate": 3.464264278035978e-08, "loss": 0.5168, "step": 15210 }, { "epoch": 0.96, "grad_norm": 0.9718244671821594, "learning_rate": 3.452218082520553e-08, "loss": 0.4606, "step": 15211 }, { "epoch": 0.96, "grad_norm": 1.0440447330474854, "learning_rate": 3.440192794904873e-08, "loss": 0.4768, "step": 15212 }, { "epoch": 0.96, "grad_norm": 1.0195415019989014, "learning_rate": 3.4281884156953106e-08, "loss": 0.503, "step": 15213 }, { "epoch": 0.96, "grad_norm": 1.0665068626403809, "learning_rate": 3.416204945397239e-08, "loss": 0.4998, "step": 15214 }, { "epoch": 0.96, "grad_norm": 0.9613544344902039, "learning_rate": 3.4042423845153104e-08, "loss": 0.4924, "step": 15215 }, { "epoch": 0.96, "grad_norm": 1.0670095682144165, "learning_rate": 3.392300733553178e-08, "loss": 0.4948, "step": 15216 }, { "epoch": 0.96, "grad_norm": 1.1226885318756104, "learning_rate": 3.380379993013716e-08, "loss": 0.5421, "step": 15217 }, { "epoch": 0.96, "grad_norm": 1.1233232021331787, "learning_rate": 3.368480163398802e-08, "loss": 0.4939, "step": 15218 }, { "epoch": 0.96, "grad_norm": 1.0569380521774292, "learning_rate": 3.356601245209534e-08, "loss": 0.5018, "step": 15219 }, { "epoch": 0.96, "grad_norm": 1.0649720430374146, "learning_rate": 3.344743238946124e-08, "loss": 0.4755, "step": 15220 }, { "epoch": 0.96, "grad_norm": 1.0704591274261475, "learning_rate": 3.332906145107839e-08, "loss": 0.4771, "step": 15221 }, { "epoch": 0.96, "grad_norm": 1.0231249332427979, "learning_rate": 3.3210899641930586e-08, "loss": 0.4562, "step": 15222 }, { "epoch": 0.96, "grad_norm": 1.1724458932876587, "learning_rate": 3.3092946966994385e-08, "loss": 0.5385, "step": 15223 }, { "epoch": 0.96, "grad_norm": 1.0665589570999146, "learning_rate": 3.297520343123473e-08, "loss": 0.5129, "step": 15224 }, { "epoch": 0.96, "grad_norm": 1.0072379112243652, "learning_rate": 3.285766903961096e-08, "loss": 0.4644, "step": 15225 }, { "epoch": 0.96, "grad_norm": 1.1121455430984497, "learning_rate": 3.274034379707081e-08, "loss": 0.5327, "step": 15226 }, { "epoch": 0.96, "grad_norm": 0.982637882232666, "learning_rate": 3.262322770855475e-08, "loss": 0.5118, "step": 15227 }, { "epoch": 0.96, "grad_norm": 1.1324604749679565, "learning_rate": 3.250632077899496e-08, "loss": 0.5422, "step": 15228 }, { "epoch": 0.96, "grad_norm": 1.1338701248168945, "learning_rate": 3.238962301331305e-08, "loss": 0.5434, "step": 15229 }, { "epoch": 0.96, "grad_norm": 1.043928861618042, "learning_rate": 3.227313441642288e-08, "loss": 0.5306, "step": 15230 }, { "epoch": 0.96, "grad_norm": 1.0605419874191284, "learning_rate": 3.2156854993229955e-08, "loss": 0.4544, "step": 15231 }, { "epoch": 0.97, "grad_norm": 1.1065747737884521, "learning_rate": 3.2040784748629814e-08, "loss": 0.4977, "step": 15232 }, { "epoch": 0.97, "grad_norm": 1.1279810667037964, "learning_rate": 3.192492368750966e-08, "loss": 0.4958, "step": 15233 }, { "epoch": 0.97, "grad_norm": 1.0252561569213867, "learning_rate": 3.180927181474891e-08, "loss": 0.5373, "step": 15234 }, { "epoch": 0.97, "grad_norm": 1.1200501918792725, "learning_rate": 3.16938291352159e-08, "loss": 0.5131, "step": 15235 }, { "epoch": 0.97, "grad_norm": 0.9503628611564636, "learning_rate": 3.157859565377286e-08, "loss": 0.4604, "step": 15236 }, { "epoch": 0.97, "grad_norm": 1.0107611417770386, "learning_rate": 3.146357137527145e-08, "loss": 0.4503, "step": 15237 }, { "epoch": 0.97, "grad_norm": 1.0233523845672607, "learning_rate": 3.1348756304554475e-08, "loss": 0.5112, "step": 15238 }, { "epoch": 0.97, "grad_norm": 1.056778907775879, "learning_rate": 3.123415044645639e-08, "loss": 0.5001, "step": 15239 }, { "epoch": 0.97, "grad_norm": 1.0508291721343994, "learning_rate": 3.111975380580334e-08, "loss": 0.4796, "step": 15240 }, { "epoch": 0.97, "grad_norm": 1.0572813749313354, "learning_rate": 3.100556638741203e-08, "loss": 0.53, "step": 15241 }, { "epoch": 0.97, "grad_norm": 1.0045342445373535, "learning_rate": 3.089158819609084e-08, "loss": 0.4893, "step": 15242 }, { "epoch": 0.97, "grad_norm": 1.0140489339828491, "learning_rate": 3.077781923663814e-08, "loss": 0.5306, "step": 15243 }, { "epoch": 0.97, "grad_norm": 1.0265076160430908, "learning_rate": 3.066425951384455e-08, "loss": 0.4269, "step": 15244 }, { "epoch": 0.97, "grad_norm": 1.0639636516571045, "learning_rate": 3.055090903249236e-08, "loss": 0.5385, "step": 15245 }, { "epoch": 0.97, "grad_norm": 1.0341408252716064, "learning_rate": 3.0437767797353856e-08, "loss": 0.477, "step": 15246 }, { "epoch": 0.97, "grad_norm": 1.0694355964660645, "learning_rate": 3.032483581319301e-08, "loss": 0.5101, "step": 15247 }, { "epoch": 0.97, "grad_norm": 1.0831514596939087, "learning_rate": 3.021211308476546e-08, "loss": 0.497, "step": 15248 }, { "epoch": 0.97, "grad_norm": 1.054814100265503, "learning_rate": 3.0099599616816856e-08, "loss": 0.4966, "step": 15249 }, { "epoch": 0.97, "grad_norm": 1.145595669746399, "learning_rate": 2.998729541408507e-08, "loss": 0.5221, "step": 15250 }, { "epoch": 0.97, "grad_norm": 1.0958560705184937, "learning_rate": 2.987520048129911e-08, "loss": 0.5108, "step": 15251 }, { "epoch": 0.97, "grad_norm": 1.0394368171691895, "learning_rate": 2.976331482317796e-08, "loss": 0.4922, "step": 15252 }, { "epoch": 0.97, "grad_norm": 1.0591411590576172, "learning_rate": 2.9651638444434528e-08, "loss": 0.5281, "step": 15253 }, { "epoch": 0.97, "grad_norm": 1.0195757150650024, "learning_rate": 2.9540171349769497e-08, "loss": 0.5176, "step": 15254 }, { "epoch": 0.97, "grad_norm": 1.0487700700759888, "learning_rate": 2.942891354387689e-08, "loss": 0.49, "step": 15255 }, { "epoch": 0.97, "grad_norm": 1.0553514957427979, "learning_rate": 2.9317865031441295e-08, "loss": 0.5202, "step": 15256 }, { "epoch": 0.97, "grad_norm": 1.1601183414459229, "learning_rate": 2.920702581713841e-08, "loss": 0.4933, "step": 15257 }, { "epoch": 0.97, "grad_norm": 1.051039457321167, "learning_rate": 2.909639590563562e-08, "loss": 0.5315, "step": 15258 }, { "epoch": 0.97, "grad_norm": 1.0642770528793335, "learning_rate": 2.8985975301591975e-08, "loss": 0.4957, "step": 15259 }, { "epoch": 0.97, "grad_norm": 1.054998517036438, "learning_rate": 2.887576400965486e-08, "loss": 0.4657, "step": 15260 }, { "epoch": 0.97, "grad_norm": 0.9867322444915771, "learning_rate": 2.8765762034466682e-08, "loss": 0.4807, "step": 15261 }, { "epoch": 0.97, "grad_norm": 1.082873821258545, "learning_rate": 2.8655969380658177e-08, "loss": 0.4793, "step": 15262 }, { "epoch": 0.97, "grad_norm": 1.129400372505188, "learning_rate": 2.8546386052853427e-08, "loss": 0.4728, "step": 15263 }, { "epoch": 0.97, "grad_norm": 1.0983253717422485, "learning_rate": 2.8437012055665403e-08, "loss": 0.557, "step": 15264 }, { "epoch": 0.97, "grad_norm": 1.0699577331542969, "learning_rate": 2.832784739369987e-08, "loss": 0.5156, "step": 15265 }, { "epoch": 0.97, "grad_norm": 1.0261139869689941, "learning_rate": 2.8218892071553705e-08, "loss": 0.4538, "step": 15266 }, { "epoch": 0.97, "grad_norm": 1.04954993724823, "learning_rate": 2.8110146093814906e-08, "loss": 0.4877, "step": 15267 }, { "epoch": 0.97, "grad_norm": 1.0849642753601074, "learning_rate": 2.8001609465061474e-08, "loss": 0.508, "step": 15268 }, { "epoch": 0.97, "grad_norm": 1.011203408241272, "learning_rate": 2.7893282189863647e-08, "loss": 0.497, "step": 15269 }, { "epoch": 0.97, "grad_norm": 1.0393261909484863, "learning_rate": 2.7785164272783327e-08, "loss": 0.4831, "step": 15270 }, { "epoch": 0.97, "grad_norm": 1.1217502355575562, "learning_rate": 2.7677255718372986e-08, "loss": 0.4889, "step": 15271 }, { "epoch": 0.97, "grad_norm": 1.070006012916565, "learning_rate": 2.7569556531175657e-08, "loss": 0.5044, "step": 15272 }, { "epoch": 0.97, "grad_norm": 1.097985863685608, "learning_rate": 2.7462066715726045e-08, "loss": 0.5416, "step": 15273 }, { "epoch": 0.97, "grad_norm": 1.1098628044128418, "learning_rate": 2.7354786276551083e-08, "loss": 0.5338, "step": 15274 }, { "epoch": 0.97, "grad_norm": 1.2087064981460571, "learning_rate": 2.7247715218167714e-08, "loss": 0.526, "step": 15275 }, { "epoch": 0.97, "grad_norm": 1.1480826139450073, "learning_rate": 2.7140853545083447e-08, "loss": 0.4955, "step": 15276 }, { "epoch": 0.97, "grad_norm": 1.115615963935852, "learning_rate": 2.703420126179912e-08, "loss": 0.5082, "step": 15277 }, { "epoch": 0.97, "grad_norm": 1.1329855918884277, "learning_rate": 2.692775837280448e-08, "loss": 0.5264, "step": 15278 }, { "epoch": 0.97, "grad_norm": 1.0670102834701538, "learning_rate": 2.682152488258205e-08, "loss": 0.5167, "step": 15279 }, { "epoch": 0.97, "grad_norm": 1.010777473449707, "learning_rate": 2.671550079560492e-08, "loss": 0.4553, "step": 15280 }, { "epoch": 0.97, "grad_norm": 1.0586128234863281, "learning_rate": 2.6609686116337296e-08, "loss": 0.4824, "step": 15281 }, { "epoch": 0.97, "grad_norm": 1.1230065822601318, "learning_rate": 2.6504080849234504e-08, "loss": 0.5393, "step": 15282 }, { "epoch": 0.97, "grad_norm": 0.9813792109489441, "learning_rate": 2.6398684998742986e-08, "loss": 0.4788, "step": 15283 }, { "epoch": 0.97, "grad_norm": 1.0060399770736694, "learning_rate": 2.629349856930141e-08, "loss": 0.4565, "step": 15284 }, { "epoch": 0.97, "grad_norm": 1.101292610168457, "learning_rate": 2.6188521565338466e-08, "loss": 0.4982, "step": 15285 }, { "epoch": 0.97, "grad_norm": 1.0829226970672607, "learning_rate": 2.608375399127394e-08, "loss": 0.4897, "step": 15286 }, { "epoch": 0.97, "grad_norm": 0.9973019957542419, "learning_rate": 2.5979195851519313e-08, "loss": 0.459, "step": 15287 }, { "epoch": 0.97, "grad_norm": 1.0220048427581787, "learning_rate": 2.5874847150477722e-08, "loss": 0.5127, "step": 15288 }, { "epoch": 0.97, "grad_norm": 1.011262059211731, "learning_rate": 2.5770707892542878e-08, "loss": 0.4793, "step": 15289 }, { "epoch": 0.97, "grad_norm": 1.0880519151687622, "learning_rate": 2.5666778082099052e-08, "loss": 0.4742, "step": 15290 }, { "epoch": 0.97, "grad_norm": 1.17592191696167, "learning_rate": 2.5563057723522742e-08, "loss": 0.5033, "step": 15291 }, { "epoch": 0.97, "grad_norm": 1.1462591886520386, "learning_rate": 2.5459546821181014e-08, "loss": 0.5171, "step": 15292 }, { "epoch": 0.97, "grad_norm": 0.9982767701148987, "learning_rate": 2.5356245379433154e-08, "loss": 0.4854, "step": 15293 }, { "epoch": 0.97, "grad_norm": 1.009433627128601, "learning_rate": 2.525315340262846e-08, "loss": 0.4707, "step": 15294 }, { "epoch": 0.97, "grad_norm": 1.0816377401351929, "learning_rate": 2.51502708951068e-08, "loss": 0.5397, "step": 15295 }, { "epoch": 0.97, "grad_norm": 1.0993826389312744, "learning_rate": 2.5047597861201368e-08, "loss": 0.504, "step": 15296 }, { "epoch": 0.97, "grad_norm": 1.122497320175171, "learning_rate": 2.494513430523482e-08, "loss": 0.4931, "step": 15297 }, { "epoch": 0.97, "grad_norm": 1.1604719161987305, "learning_rate": 2.4842880231522038e-08, "loss": 0.4961, "step": 15298 }, { "epoch": 0.97, "grad_norm": 1.0055816173553467, "learning_rate": 2.474083564436791e-08, "loss": 0.4649, "step": 15299 }, { "epoch": 0.97, "grad_norm": 1.0866940021514893, "learning_rate": 2.4639000548070114e-08, "loss": 0.5276, "step": 15300 }, { "epoch": 0.97, "grad_norm": 1.1128740310668945, "learning_rate": 2.4537374946915215e-08, "loss": 0.4593, "step": 15301 }, { "epoch": 0.97, "grad_norm": 1.0929911136627197, "learning_rate": 2.4435958845183684e-08, "loss": 0.4883, "step": 15302 }, { "epoch": 0.97, "grad_norm": 1.1067675352096558, "learning_rate": 2.4334752247145433e-08, "loss": 0.4879, "step": 15303 }, { "epoch": 0.97, "grad_norm": 0.9623573422431946, "learning_rate": 2.4233755157060944e-08, "loss": 0.5029, "step": 15304 }, { "epoch": 0.97, "grad_norm": 1.0398600101470947, "learning_rate": 2.413296757918404e-08, "loss": 0.4967, "step": 15305 }, { "epoch": 0.97, "grad_norm": 1.0774849653244019, "learning_rate": 2.4032389517757993e-08, "loss": 0.5044, "step": 15306 }, { "epoch": 0.97, "grad_norm": 0.9469539523124695, "learning_rate": 2.3932020977017745e-08, "loss": 0.4557, "step": 15307 }, { "epoch": 0.97, "grad_norm": 0.9823768734931946, "learning_rate": 2.3831861961189917e-08, "loss": 0.4771, "step": 15308 }, { "epoch": 0.97, "grad_norm": 1.0540803670883179, "learning_rate": 2.3731912474491137e-08, "loss": 0.4739, "step": 15309 }, { "epoch": 0.97, "grad_norm": 0.9758304953575134, "learning_rate": 2.3632172521130815e-08, "loss": 0.4468, "step": 15310 }, { "epoch": 0.97, "grad_norm": 0.9877710342407227, "learning_rate": 2.3532642105307813e-08, "loss": 0.4662, "step": 15311 }, { "epoch": 0.97, "grad_norm": 1.0869003534317017, "learning_rate": 2.3433321231213778e-08, "loss": 0.495, "step": 15312 }, { "epoch": 0.97, "grad_norm": 1.1220903396606445, "learning_rate": 2.3334209903029815e-08, "loss": 0.5294, "step": 15313 }, { "epoch": 0.97, "grad_norm": 1.099942684173584, "learning_rate": 2.3235308124930357e-08, "loss": 0.5059, "step": 15314 }, { "epoch": 0.97, "grad_norm": 1.0976247787475586, "learning_rate": 2.3136615901078742e-08, "loss": 0.5256, "step": 15315 }, { "epoch": 0.97, "grad_norm": 1.060356616973877, "learning_rate": 2.303813323563109e-08, "loss": 0.5001, "step": 15316 }, { "epoch": 0.97, "grad_norm": 1.061465859413147, "learning_rate": 2.2939860132734084e-08, "loss": 0.475, "step": 15317 }, { "epoch": 0.97, "grad_norm": 0.9800360202789307, "learning_rate": 2.2841796596525522e-08, "loss": 0.5049, "step": 15318 }, { "epoch": 0.97, "grad_norm": 1.152793049812317, "learning_rate": 2.2743942631134886e-08, "loss": 0.4839, "step": 15319 }, { "epoch": 0.97, "grad_norm": 1.2103699445724487, "learning_rate": 2.264629824068165e-08, "loss": 0.5802, "step": 15320 }, { "epoch": 0.97, "grad_norm": 1.1103157997131348, "learning_rate": 2.2548863429278645e-08, "loss": 0.5396, "step": 15321 }, { "epoch": 0.97, "grad_norm": 1.095953106880188, "learning_rate": 2.2451638201027026e-08, "loss": 0.5135, "step": 15322 }, { "epoch": 0.97, "grad_norm": 1.1200344562530518, "learning_rate": 2.2354622560021854e-08, "loss": 0.4938, "step": 15323 }, { "epoch": 0.97, "grad_norm": 1.0356738567352295, "learning_rate": 2.2257816510347086e-08, "loss": 0.4789, "step": 15324 }, { "epoch": 0.97, "grad_norm": 1.0751638412475586, "learning_rate": 2.2161220056079457e-08, "loss": 0.4692, "step": 15325 }, { "epoch": 0.97, "grad_norm": 1.0484387874603271, "learning_rate": 2.2064833201286827e-08, "loss": 0.4921, "step": 15326 }, { "epoch": 0.97, "grad_norm": 1.0133472681045532, "learning_rate": 2.1968655950026508e-08, "loss": 0.5265, "step": 15327 }, { "epoch": 0.97, "grad_norm": 1.0596994161605835, "learning_rate": 2.187268830634859e-08, "loss": 0.4993, "step": 15328 }, { "epoch": 0.97, "grad_norm": 1.0807329416275024, "learning_rate": 2.1776930274294283e-08, "loss": 0.4878, "step": 15329 }, { "epoch": 0.97, "grad_norm": 1.0925438404083252, "learning_rate": 2.1681381857895923e-08, "loss": 0.4969, "step": 15330 }, { "epoch": 0.97, "grad_norm": 1.0323103666305542, "learning_rate": 2.1586043061175842e-08, "loss": 0.4788, "step": 15331 }, { "epoch": 0.97, "grad_norm": 1.0427535772323608, "learning_rate": 2.1490913888149166e-08, "loss": 0.533, "step": 15332 }, { "epoch": 0.97, "grad_norm": 1.1524258852005005, "learning_rate": 2.139599434282047e-08, "loss": 0.5057, "step": 15333 }, { "epoch": 0.97, "grad_norm": 1.1330947875976562, "learning_rate": 2.130128442918766e-08, "loss": 0.5622, "step": 15334 }, { "epoch": 0.97, "grad_norm": 1.1543328762054443, "learning_rate": 2.1206784151238113e-08, "loss": 0.5027, "step": 15335 }, { "epoch": 0.97, "grad_norm": 1.051921010017395, "learning_rate": 2.111249351295086e-08, "loss": 0.543, "step": 15336 }, { "epoch": 0.97, "grad_norm": 1.0333747863769531, "learning_rate": 2.1018412518296617e-08, "loss": 0.508, "step": 15337 }, { "epoch": 0.97, "grad_norm": 0.9350318908691406, "learning_rate": 2.0924541171235545e-08, "loss": 0.4936, "step": 15338 }, { "epoch": 0.97, "grad_norm": 1.206451416015625, "learning_rate": 2.083087947572171e-08, "loss": 0.4999, "step": 15339 }, { "epoch": 0.97, "grad_norm": 1.131523847579956, "learning_rate": 2.073742743569862e-08, "loss": 0.5002, "step": 15340 }, { "epoch": 0.97, "grad_norm": 1.0815268754959106, "learning_rate": 2.0644185055100352e-08, "loss": 0.5543, "step": 15341 }, { "epoch": 0.97, "grad_norm": 0.994455873966217, "learning_rate": 2.0551152337853208e-08, "loss": 0.4812, "step": 15342 }, { "epoch": 0.97, "grad_norm": 1.13554847240448, "learning_rate": 2.0458329287875168e-08, "loss": 0.5011, "step": 15343 }, { "epoch": 0.97, "grad_norm": 1.1019952297210693, "learning_rate": 2.0365715909074213e-08, "loss": 0.5145, "step": 15344 }, { "epoch": 0.97, "grad_norm": 1.0579437017440796, "learning_rate": 2.027331220535056e-08, "loss": 0.5206, "step": 15345 }, { "epoch": 0.97, "grad_norm": 1.0751632452011108, "learning_rate": 2.018111818059387e-08, "loss": 0.4207, "step": 15346 }, { "epoch": 0.97, "grad_norm": 1.1553031206130981, "learning_rate": 2.008913383868716e-08, "loss": 0.5421, "step": 15347 }, { "epoch": 0.97, "grad_norm": 1.113647222518921, "learning_rate": 1.999735918350343e-08, "loss": 0.5173, "step": 15348 }, { "epoch": 0.97, "grad_norm": 1.0515217781066895, "learning_rate": 1.990579421890626e-08, "loss": 0.5192, "step": 15349 }, { "epoch": 0.97, "grad_norm": 1.0645197629928589, "learning_rate": 1.9814438948751458e-08, "loss": 0.5053, "step": 15350 }, { "epoch": 0.97, "grad_norm": 1.0292072296142578, "learning_rate": 1.9723293376886497e-08, "loss": 0.4778, "step": 15351 }, { "epoch": 0.97, "grad_norm": 1.040724277496338, "learning_rate": 1.963235750714776e-08, "loss": 0.5397, "step": 15352 }, { "epoch": 0.97, "grad_norm": 1.0659323930740356, "learning_rate": 1.9541631343365507e-08, "loss": 0.5223, "step": 15353 }, { "epoch": 0.97, "grad_norm": 1.2221729755401611, "learning_rate": 1.9451114889359468e-08, "loss": 0.5599, "step": 15354 }, { "epoch": 0.97, "grad_norm": 1.0946619510650635, "learning_rate": 1.936080814894048e-08, "loss": 0.499, "step": 15355 }, { "epoch": 0.97, "grad_norm": 1.0898137092590332, "learning_rate": 1.9270711125912167e-08, "loss": 0.4925, "step": 15356 }, { "epoch": 0.97, "grad_norm": 1.0585594177246094, "learning_rate": 1.9180823824067053e-08, "loss": 0.493, "step": 15357 }, { "epoch": 0.97, "grad_norm": 0.9604061245918274, "learning_rate": 1.909114624719044e-08, "loss": 0.5012, "step": 15358 }, { "epoch": 0.97, "grad_norm": 1.1087369918823242, "learning_rate": 1.90016783990582e-08, "loss": 0.4827, "step": 15359 }, { "epoch": 0.97, "grad_norm": 1.0932306051254272, "learning_rate": 1.891242028343787e-08, "loss": 0.5323, "step": 15360 }, { "epoch": 0.97, "grad_norm": 1.0271295309066772, "learning_rate": 1.8823371904087563e-08, "loss": 0.4937, "step": 15361 }, { "epoch": 0.97, "grad_norm": 1.0557682514190674, "learning_rate": 1.8734533264757047e-08, "loss": 0.4682, "step": 15362 }, { "epoch": 0.97, "grad_norm": 1.1083639860153198, "learning_rate": 1.864590436918612e-08, "loss": 0.5375, "step": 15363 }, { "epoch": 0.97, "grad_norm": 1.1011896133422852, "learning_rate": 1.8557485221107897e-08, "loss": 0.4959, "step": 15364 }, { "epoch": 0.97, "grad_norm": 1.1148617267608643, "learning_rate": 1.8469275824244958e-08, "loss": 0.4878, "step": 15365 }, { "epoch": 0.97, "grad_norm": 1.1544920206069946, "learning_rate": 1.8381276182311004e-08, "loss": 0.4704, "step": 15366 }, { "epoch": 0.97, "grad_norm": 1.1003879308700562, "learning_rate": 1.8293486299011398e-08, "loss": 0.4997, "step": 15367 }, { "epoch": 0.97, "grad_norm": 1.06473970413208, "learning_rate": 1.8205906178043186e-08, "loss": 0.5041, "step": 15368 }, { "epoch": 0.97, "grad_norm": 1.0241307020187378, "learning_rate": 1.811853582309453e-08, "loss": 0.5289, "step": 15369 }, { "epoch": 0.97, "grad_norm": 1.0392919778823853, "learning_rate": 1.803137523784304e-08, "loss": 0.4512, "step": 15370 }, { "epoch": 0.97, "grad_norm": 1.028462290763855, "learning_rate": 1.7944424425959116e-08, "loss": 0.5017, "step": 15371 }, { "epoch": 0.97, "grad_norm": 0.9891314506530762, "learning_rate": 1.7857683391104273e-08, "loss": 0.4665, "step": 15372 }, { "epoch": 0.97, "grad_norm": 1.0323691368103027, "learning_rate": 1.7771152136931147e-08, "loss": 0.5099, "step": 15373 }, { "epoch": 0.97, "grad_norm": 1.1690553426742554, "learning_rate": 1.7684830667082377e-08, "loss": 0.5036, "step": 15374 }, { "epoch": 0.97, "grad_norm": 1.048149824142456, "learning_rate": 1.759871898519394e-08, "loss": 0.4901, "step": 15375 }, { "epoch": 0.97, "grad_norm": 1.0809240341186523, "learning_rate": 1.7512817094890167e-08, "loss": 0.4707, "step": 15376 }, { "epoch": 0.97, "grad_norm": 1.0636286735534668, "learning_rate": 1.742712499978927e-08, "loss": 0.4795, "step": 15377 }, { "epoch": 0.97, "grad_norm": 1.0688414573669434, "learning_rate": 1.734164270349892e-08, "loss": 0.4704, "step": 15378 }, { "epoch": 0.97, "grad_norm": 1.0488619804382324, "learning_rate": 1.7256370209618458e-08, "loss": 0.5188, "step": 15379 }, { "epoch": 0.97, "grad_norm": 1.0834404230117798, "learning_rate": 1.71713075217389e-08, "loss": 0.5152, "step": 15380 }, { "epoch": 0.97, "grad_norm": 1.1331005096435547, "learning_rate": 1.7086454643441273e-08, "loss": 0.5231, "step": 15381 }, { "epoch": 0.97, "grad_norm": 1.0595972537994385, "learning_rate": 1.7001811578298832e-08, "loss": 0.4447, "step": 15382 }, { "epoch": 0.97, "grad_norm": 1.0525641441345215, "learning_rate": 1.6917378329875946e-08, "loss": 0.488, "step": 15383 }, { "epoch": 0.97, "grad_norm": 0.9919252991676331, "learning_rate": 1.6833154901726988e-08, "loss": 0.5188, "step": 15384 }, { "epoch": 0.97, "grad_norm": 1.0833450555801392, "learning_rate": 1.6749141297398574e-08, "loss": 0.4768, "step": 15385 }, { "epoch": 0.97, "grad_norm": 1.0467420816421509, "learning_rate": 1.6665337520428427e-08, "loss": 0.4613, "step": 15386 }, { "epoch": 0.97, "grad_norm": 1.0288331508636475, "learning_rate": 1.658174357434483e-08, "loss": 0.5197, "step": 15387 }, { "epoch": 0.97, "grad_norm": 1.1571677923202515, "learning_rate": 1.649835946266831e-08, "loss": 0.551, "step": 15388 }, { "epoch": 0.97, "grad_norm": 1.0284770727157593, "learning_rate": 1.6415185188909944e-08, "loss": 0.471, "step": 15389 }, { "epoch": 0.98, "grad_norm": 1.0496933460235596, "learning_rate": 1.6332220756570815e-08, "loss": 0.4814, "step": 15390 }, { "epoch": 0.98, "grad_norm": 1.076085090637207, "learning_rate": 1.6249466169145354e-08, "loss": 0.5407, "step": 15391 }, { "epoch": 0.98, "grad_norm": 1.032431960105896, "learning_rate": 1.6166921430118e-08, "loss": 0.4832, "step": 15392 }, { "epoch": 0.98, "grad_norm": 1.089762806892395, "learning_rate": 1.608458654296319e-08, "loss": 0.4834, "step": 15393 }, { "epoch": 0.98, "grad_norm": 1.0818616151809692, "learning_rate": 1.600246151114926e-08, "loss": 0.4751, "step": 15394 }, { "epoch": 0.98, "grad_norm": 1.0302743911743164, "learning_rate": 1.5920546338133447e-08, "loss": 0.4924, "step": 15395 }, { "epoch": 0.98, "grad_norm": 1.0723204612731934, "learning_rate": 1.5838841027365215e-08, "loss": 0.5048, "step": 15396 }, { "epoch": 0.98, "grad_norm": 1.052377462387085, "learning_rate": 1.5757345582285144e-08, "loss": 0.5257, "step": 15397 }, { "epoch": 0.98, "grad_norm": 1.065084457397461, "learning_rate": 1.5676060006323267e-08, "loss": 0.5565, "step": 15398 }, { "epoch": 0.98, "grad_norm": 1.114837884902954, "learning_rate": 1.559498430290407e-08, "loss": 0.4829, "step": 15399 }, { "epoch": 0.98, "grad_norm": 1.132448434829712, "learning_rate": 1.5514118475440378e-08, "loss": 0.488, "step": 15400 }, { "epoch": 0.98, "grad_norm": 0.9648688435554504, "learning_rate": 1.5433462527337793e-08, "loss": 0.4395, "step": 15401 }, { "epoch": 0.98, "grad_norm": 1.0886834859848022, "learning_rate": 1.5353016461991387e-08, "loss": 0.5493, "step": 15402 }, { "epoch": 0.98, "grad_norm": 1.0746283531188965, "learning_rate": 1.5272780282789556e-08, "loss": 0.5297, "step": 15403 }, { "epoch": 0.98, "grad_norm": 1.2116729021072388, "learning_rate": 1.5192753993110155e-08, "loss": 0.4665, "step": 15404 }, { "epoch": 0.98, "grad_norm": 1.0766348838806152, "learning_rate": 1.5112937596323263e-08, "loss": 0.5302, "step": 15405 }, { "epoch": 0.98, "grad_norm": 1.0860892534255981, "learning_rate": 1.5033331095788973e-08, "loss": 0.4866, "step": 15406 }, { "epoch": 0.98, "grad_norm": 0.9706477522850037, "learning_rate": 1.4953934494860155e-08, "loss": 0.4174, "step": 15407 }, { "epoch": 0.98, "grad_norm": 1.033424735069275, "learning_rate": 1.4874747796879142e-08, "loss": 0.4624, "step": 15408 }, { "epoch": 0.98, "grad_norm": 1.146001935005188, "learning_rate": 1.4795771005181036e-08, "loss": 0.5519, "step": 15409 }, { "epoch": 0.98, "grad_norm": 1.0574760437011719, "learning_rate": 1.4717004123090406e-08, "loss": 0.4848, "step": 15410 }, { "epoch": 0.98, "grad_norm": 1.0617190599441528, "learning_rate": 1.463844715392404e-08, "loss": 0.5468, "step": 15411 }, { "epoch": 0.98, "grad_norm": 0.9709962010383606, "learning_rate": 1.4560100100989849e-08, "loss": 0.4465, "step": 15412 }, { "epoch": 0.98, "grad_norm": 1.0188851356506348, "learning_rate": 1.448196296758686e-08, "loss": 0.4785, "step": 15413 }, { "epoch": 0.98, "grad_norm": 1.0532549619674683, "learning_rate": 1.4404035757005219e-08, "loss": 0.4913, "step": 15414 }, { "epoch": 0.98, "grad_norm": 1.1245558261871338, "learning_rate": 1.4326318472525635e-08, "loss": 0.4962, "step": 15415 }, { "epoch": 0.98, "grad_norm": 1.0558838844299316, "learning_rate": 1.4248811117421046e-08, "loss": 0.5147, "step": 15416 }, { "epoch": 0.98, "grad_norm": 1.1222167015075684, "learning_rate": 1.4171513694954953e-08, "loss": 0.4893, "step": 15417 }, { "epoch": 0.98, "grad_norm": 1.1024917364120483, "learning_rate": 1.4094426208381972e-08, "loss": 0.5539, "step": 15418 }, { "epoch": 0.98, "grad_norm": 1.1723283529281616, "learning_rate": 1.4017548660947844e-08, "loss": 0.5152, "step": 15419 }, { "epoch": 0.98, "grad_norm": 1.086130976676941, "learning_rate": 1.3940881055889976e-08, "loss": 0.5141, "step": 15420 }, { "epoch": 0.98, "grad_norm": 1.167900562286377, "learning_rate": 1.3864423396436344e-08, "loss": 0.4828, "step": 15421 }, { "epoch": 0.98, "grad_norm": 1.0070527791976929, "learning_rate": 1.3788175685806594e-08, "loss": 0.5074, "step": 15422 }, { "epoch": 0.98, "grad_norm": 1.103872299194336, "learning_rate": 1.3712137927210377e-08, "loss": 0.5252, "step": 15423 }, { "epoch": 0.98, "grad_norm": 1.1018625497817993, "learning_rate": 1.3636310123850694e-08, "loss": 0.5367, "step": 15424 }, { "epoch": 0.98, "grad_norm": 0.981019139289856, "learning_rate": 1.3560692278919429e-08, "loss": 0.4795, "step": 15425 }, { "epoch": 0.98, "grad_norm": 1.0430066585540771, "learning_rate": 1.3485284395600707e-08, "loss": 0.4652, "step": 15426 }, { "epoch": 0.98, "grad_norm": 1.0918257236480713, "learning_rate": 1.3410086477069761e-08, "loss": 0.5407, "step": 15427 }, { "epoch": 0.98, "grad_norm": 1.0566918849945068, "learning_rate": 1.333509852649295e-08, "loss": 0.4817, "step": 15428 }, { "epoch": 0.98, "grad_norm": 1.0845723152160645, "learning_rate": 1.3260320547028305e-08, "loss": 0.5032, "step": 15429 }, { "epoch": 0.98, "grad_norm": 1.0401231050491333, "learning_rate": 1.3185752541823304e-08, "loss": 0.4924, "step": 15430 }, { "epoch": 0.98, "grad_norm": 1.166698932647705, "learning_rate": 1.3111394514018772e-08, "loss": 0.4758, "step": 15431 }, { "epoch": 0.98, "grad_norm": 1.0580391883850098, "learning_rate": 1.3037246466745535e-08, "loss": 0.4882, "step": 15432 }, { "epoch": 0.98, "grad_norm": 1.099125862121582, "learning_rate": 1.2963308403124985e-08, "loss": 0.4994, "step": 15433 }, { "epoch": 0.98, "grad_norm": 1.1514862775802612, "learning_rate": 1.2889580326271301e-08, "loss": 0.5011, "step": 15434 }, { "epoch": 0.98, "grad_norm": 1.0589500665664673, "learning_rate": 1.2816062239288107e-08, "loss": 0.4675, "step": 15435 }, { "epoch": 0.98, "grad_norm": 1.115246057510376, "learning_rate": 1.2742754145271264e-08, "loss": 0.4854, "step": 15436 }, { "epoch": 0.98, "grad_norm": 1.0737168788909912, "learning_rate": 1.2669656047308299e-08, "loss": 0.4983, "step": 15437 }, { "epoch": 0.98, "grad_norm": 1.0704424381256104, "learning_rate": 1.2596767948476196e-08, "loss": 0.533, "step": 15438 }, { "epoch": 0.98, "grad_norm": 1.1217480897903442, "learning_rate": 1.2524089851844168e-08, "loss": 0.4673, "step": 15439 }, { "epoch": 0.98, "grad_norm": 1.004804253578186, "learning_rate": 1.2451621760472544e-08, "loss": 0.4808, "step": 15440 }, { "epoch": 0.98, "grad_norm": 1.1652071475982666, "learning_rate": 1.237936367741277e-08, "loss": 0.4964, "step": 15441 }, { "epoch": 0.98, "grad_norm": 1.1194581985473633, "learning_rate": 1.2307315605707416e-08, "loss": 0.5033, "step": 15442 }, { "epoch": 0.98, "grad_norm": 1.096777081489563, "learning_rate": 1.2235477548390162e-08, "loss": 0.5177, "step": 15443 }, { "epoch": 0.98, "grad_norm": 0.9652280807495117, "learning_rate": 1.2163849508485259e-08, "loss": 0.4436, "step": 15444 }, { "epoch": 0.98, "grad_norm": 1.0869945287704468, "learning_rate": 1.2092431489009738e-08, "loss": 0.5026, "step": 15445 }, { "epoch": 0.98, "grad_norm": 0.9712923169136047, "learning_rate": 1.202122349297008e-08, "loss": 0.4694, "step": 15446 }, { "epoch": 0.98, "grad_norm": 1.1346890926361084, "learning_rate": 1.1950225523365e-08, "loss": 0.5372, "step": 15447 }, { "epoch": 0.98, "grad_norm": 1.0068241357803345, "learning_rate": 1.1879437583183217e-08, "loss": 0.4844, "step": 15448 }, { "epoch": 0.98, "grad_norm": 1.0190941095352173, "learning_rate": 1.1808859675406236e-08, "loss": 0.5136, "step": 15449 }, { "epoch": 0.98, "grad_norm": 1.1415479183197021, "learning_rate": 1.173849180300557e-08, "loss": 0.4895, "step": 15450 }, { "epoch": 0.98, "grad_norm": 1.080260992050171, "learning_rate": 1.16683339689444e-08, "loss": 0.5392, "step": 15451 }, { "epoch": 0.98, "grad_norm": 1.0502970218658447, "learning_rate": 1.1598386176175924e-08, "loss": 0.4963, "step": 15452 }, { "epoch": 0.98, "grad_norm": 1.0288969278335571, "learning_rate": 1.1528648427646671e-08, "loss": 0.4612, "step": 15453 }, { "epoch": 0.98, "grad_norm": 1.0865890979766846, "learning_rate": 1.1459120726292072e-08, "loss": 0.4803, "step": 15454 }, { "epoch": 0.98, "grad_norm": 1.079679012298584, "learning_rate": 1.1389803075039785e-08, "loss": 0.4945, "step": 15455 }, { "epoch": 0.98, "grad_norm": 1.050313949584961, "learning_rate": 1.1320695476809141e-08, "loss": 0.5243, "step": 15456 }, { "epoch": 0.98, "grad_norm": 1.089991807937622, "learning_rate": 1.1251797934509478e-08, "loss": 0.5044, "step": 15457 }, { "epoch": 0.98, "grad_norm": 1.107649326324463, "learning_rate": 1.1183110451042368e-08, "loss": 0.4493, "step": 15458 }, { "epoch": 0.98, "grad_norm": 1.0357794761657715, "learning_rate": 1.1114633029299382e-08, "loss": 0.4632, "step": 15459 }, { "epoch": 0.98, "grad_norm": 0.9970534443855286, "learning_rate": 1.1046365672163772e-08, "loss": 0.4763, "step": 15460 }, { "epoch": 0.98, "grad_norm": 1.08147132396698, "learning_rate": 1.0978308382511016e-08, "loss": 0.5183, "step": 15461 }, { "epoch": 0.98, "grad_norm": 1.112913966178894, "learning_rate": 1.0910461163206043e-08, "loss": 0.4865, "step": 15462 }, { "epoch": 0.98, "grad_norm": 1.2007641792297363, "learning_rate": 1.0842824017105458e-08, "loss": 0.5015, "step": 15463 }, { "epoch": 0.98, "grad_norm": 0.9819358587265015, "learning_rate": 1.0775396947057537e-08, "loss": 0.47, "step": 15464 }, { "epoch": 0.98, "grad_norm": 1.0319911241531372, "learning_rate": 1.0708179955901677e-08, "loss": 0.4666, "step": 15465 }, { "epoch": 0.98, "grad_norm": 1.0933454036712646, "learning_rate": 1.0641173046467833e-08, "loss": 0.5157, "step": 15466 }, { "epoch": 0.98, "grad_norm": 1.100706696510315, "learning_rate": 1.0574376221577642e-08, "loss": 0.4744, "step": 15467 }, { "epoch": 0.98, "grad_norm": 1.0418654680252075, "learning_rate": 1.0507789484043295e-08, "loss": 0.4367, "step": 15468 }, { "epoch": 0.98, "grad_norm": 0.9827783107757568, "learning_rate": 1.0441412836668663e-08, "loss": 0.4627, "step": 15469 }, { "epoch": 0.98, "grad_norm": 1.021515965461731, "learning_rate": 1.037524628224873e-08, "loss": 0.477, "step": 15470 }, { "epoch": 0.98, "grad_norm": 1.0083973407745361, "learning_rate": 1.0309289823569601e-08, "loss": 0.4883, "step": 15471 }, { "epoch": 0.98, "grad_norm": 1.1818205118179321, "learning_rate": 1.02435434634085e-08, "loss": 0.514, "step": 15472 }, { "epoch": 0.98, "grad_norm": 1.253869652748108, "learning_rate": 1.0178007204533768e-08, "loss": 0.5216, "step": 15473 }, { "epoch": 0.98, "grad_norm": 1.1360281705856323, "learning_rate": 1.0112681049704865e-08, "loss": 0.5095, "step": 15474 }, { "epoch": 0.98, "grad_norm": 0.9967944622039795, "learning_rate": 1.004756500167181e-08, "loss": 0.4683, "step": 15475 }, { "epoch": 0.98, "grad_norm": 1.064879059791565, "learning_rate": 9.982659063177413e-09, "loss": 0.4966, "step": 15476 }, { "epoch": 0.98, "grad_norm": 1.049710988998413, "learning_rate": 9.917963236954487e-09, "loss": 0.4757, "step": 15477 }, { "epoch": 0.98, "grad_norm": 1.1126371622085571, "learning_rate": 9.853477525726962e-09, "loss": 0.4715, "step": 15478 }, { "epoch": 0.98, "grad_norm": 1.0898528099060059, "learning_rate": 9.789201932209335e-09, "loss": 0.5257, "step": 15479 }, { "epoch": 0.98, "grad_norm": 1.1489167213439941, "learning_rate": 9.725136459109441e-09, "loss": 0.5702, "step": 15480 }, { "epoch": 0.98, "grad_norm": 1.0590689182281494, "learning_rate": 9.66128110912401e-09, "loss": 0.4812, "step": 15481 }, { "epoch": 0.98, "grad_norm": 0.9705673456192017, "learning_rate": 9.597635884941447e-09, "loss": 0.4751, "step": 15482 }, { "epoch": 0.98, "grad_norm": 0.9974306225776672, "learning_rate": 9.534200789242388e-09, "loss": 0.5068, "step": 15483 }, { "epoch": 0.98, "grad_norm": 1.0707036256790161, "learning_rate": 9.470975824698025e-09, "loss": 0.5301, "step": 15484 }, { "epoch": 0.98, "grad_norm": 1.145838737487793, "learning_rate": 9.407960993969567e-09, "loss": 0.5452, "step": 15485 }, { "epoch": 0.98, "grad_norm": 0.9681414365768433, "learning_rate": 9.345156299711e-09, "loss": 0.4709, "step": 15486 }, { "epoch": 0.98, "grad_norm": 1.0441086292266846, "learning_rate": 9.282561744566321e-09, "loss": 0.4832, "step": 15487 }, { "epoch": 0.98, "grad_norm": 1.0421568155288696, "learning_rate": 9.220177331172309e-09, "loss": 0.4879, "step": 15488 }, { "epoch": 0.98, "grad_norm": 1.0118240118026733, "learning_rate": 9.158003062154642e-09, "loss": 0.4499, "step": 15489 }, { "epoch": 0.98, "grad_norm": 1.0902498960494995, "learning_rate": 9.096038940131225e-09, "loss": 0.5196, "step": 15490 }, { "epoch": 0.98, "grad_norm": 0.9824011325836182, "learning_rate": 9.034284967711637e-09, "loss": 0.4471, "step": 15491 }, { "epoch": 0.98, "grad_norm": 1.0274479389190674, "learning_rate": 8.972741147496023e-09, "loss": 0.5007, "step": 15492 }, { "epoch": 0.98, "grad_norm": 1.0410804748535156, "learning_rate": 8.911407482076196e-09, "loss": 0.4576, "step": 15493 }, { "epoch": 0.98, "grad_norm": 1.0497487783432007, "learning_rate": 8.85028397403398e-09, "loss": 0.4976, "step": 15494 }, { "epoch": 0.98, "grad_norm": 1.106119990348816, "learning_rate": 8.789370625943427e-09, "loss": 0.5142, "step": 15495 }, { "epoch": 0.98, "grad_norm": 1.0978714227676392, "learning_rate": 8.728667440369153e-09, "loss": 0.4892, "step": 15496 }, { "epoch": 0.98, "grad_norm": 1.0782625675201416, "learning_rate": 8.668174419867449e-09, "loss": 0.5343, "step": 15497 }, { "epoch": 0.98, "grad_norm": 1.124019980430603, "learning_rate": 8.60789156698516e-09, "loss": 0.4786, "step": 15498 }, { "epoch": 0.98, "grad_norm": 1.0971437692642212, "learning_rate": 8.547818884260816e-09, "loss": 0.5118, "step": 15499 }, { "epoch": 0.98, "grad_norm": 1.0221912860870361, "learning_rate": 8.48795637422406e-09, "loss": 0.4627, "step": 15500 }, { "epoch": 0.98, "grad_norm": 1.0534876585006714, "learning_rate": 8.428304039395096e-09, "loss": 0.5088, "step": 15501 }, { "epoch": 0.98, "grad_norm": 1.0663495063781738, "learning_rate": 8.368861882285806e-09, "loss": 0.4988, "step": 15502 }, { "epoch": 0.98, "grad_norm": 1.0165753364562988, "learning_rate": 8.309629905399186e-09, "loss": 0.468, "step": 15503 }, { "epoch": 0.98, "grad_norm": 1.038854956626892, "learning_rate": 8.250608111229352e-09, "loss": 0.5189, "step": 15504 }, { "epoch": 0.98, "grad_norm": 1.1807208061218262, "learning_rate": 8.191796502260985e-09, "loss": 0.5431, "step": 15505 }, { "epoch": 0.98, "grad_norm": 1.1552495956420898, "learning_rate": 8.13319508097099e-09, "loss": 0.5579, "step": 15506 }, { "epoch": 0.98, "grad_norm": 1.1485646963119507, "learning_rate": 8.074803849827395e-09, "loss": 0.5058, "step": 15507 }, { "epoch": 0.98, "grad_norm": 1.0058408975601196, "learning_rate": 8.016622811287123e-09, "loss": 0.4613, "step": 15508 }, { "epoch": 0.98, "grad_norm": 0.9777622818946838, "learning_rate": 7.958651967801545e-09, "loss": 0.4582, "step": 15509 }, { "epoch": 0.98, "grad_norm": 1.062703251838684, "learning_rate": 7.900891321810932e-09, "loss": 0.4902, "step": 15510 }, { "epoch": 0.98, "grad_norm": 1.1520791053771973, "learning_rate": 7.843340875747785e-09, "loss": 0.5539, "step": 15511 }, { "epoch": 0.98, "grad_norm": 1.029168725013733, "learning_rate": 7.786000632035163e-09, "loss": 0.4889, "step": 15512 }, { "epoch": 0.98, "grad_norm": 1.0008456707000732, "learning_rate": 7.728870593087246e-09, "loss": 0.5062, "step": 15513 }, { "epoch": 0.98, "grad_norm": 1.185447096824646, "learning_rate": 7.671950761309333e-09, "loss": 0.4949, "step": 15514 }, { "epoch": 0.98, "grad_norm": 1.1570756435394287, "learning_rate": 7.61524113909895e-09, "loss": 0.4382, "step": 15515 }, { "epoch": 0.98, "grad_norm": 1.0614207983016968, "learning_rate": 7.558741728843633e-09, "loss": 0.4664, "step": 15516 }, { "epoch": 0.98, "grad_norm": 1.094365119934082, "learning_rate": 7.502452532922033e-09, "loss": 0.4727, "step": 15517 }, { "epoch": 0.98, "grad_norm": 1.0895835161209106, "learning_rate": 7.446373553705033e-09, "loss": 0.525, "step": 15518 }, { "epoch": 0.98, "grad_norm": 1.09317946434021, "learning_rate": 7.390504793552966e-09, "loss": 0.5406, "step": 15519 }, { "epoch": 0.98, "grad_norm": 1.0369621515274048, "learning_rate": 7.3348462548183955e-09, "loss": 0.5135, "step": 15520 }, { "epoch": 0.98, "grad_norm": 1.1538317203521729, "learning_rate": 7.279397939845556e-09, "loss": 0.5053, "step": 15521 }, { "epoch": 0.98, "grad_norm": 1.189522624015808, "learning_rate": 7.2241598509686926e-09, "loss": 0.515, "step": 15522 }, { "epoch": 0.98, "grad_norm": 1.0553511381149292, "learning_rate": 7.169131990514278e-09, "loss": 0.5348, "step": 15523 }, { "epoch": 0.98, "grad_norm": 1.105460524559021, "learning_rate": 7.114314360798791e-09, "loss": 0.5216, "step": 15524 }, { "epoch": 0.98, "grad_norm": 1.0487605333328247, "learning_rate": 7.0597069641303865e-09, "loss": 0.5039, "step": 15525 }, { "epoch": 0.98, "grad_norm": 1.0938303470611572, "learning_rate": 7.00530980280889e-09, "loss": 0.4823, "step": 15526 }, { "epoch": 0.98, "grad_norm": 1.0104894638061523, "learning_rate": 6.951122879124139e-09, "loss": 0.4778, "step": 15527 }, { "epoch": 0.98, "grad_norm": 0.9984012246131897, "learning_rate": 6.89714619535764e-09, "loss": 0.4719, "step": 15528 }, { "epoch": 0.98, "grad_norm": 1.0359838008880615, "learning_rate": 6.84337975378313e-09, "loss": 0.4591, "step": 15529 }, { "epoch": 0.98, "grad_norm": 1.0148361921310425, "learning_rate": 6.789823556663799e-09, "loss": 0.4995, "step": 15530 }, { "epoch": 0.98, "grad_norm": 1.0572915077209473, "learning_rate": 6.736477606255065e-09, "loss": 0.4604, "step": 15531 }, { "epoch": 0.98, "grad_norm": 1.04763662815094, "learning_rate": 6.683341904802909e-09, "loss": 0.491, "step": 15532 }, { "epoch": 0.98, "grad_norm": 1.1820831298828125, "learning_rate": 6.63041645454443e-09, "loss": 0.4814, "step": 15533 }, { "epoch": 0.98, "grad_norm": 1.1502306461334229, "learning_rate": 6.577701257708957e-09, "loss": 0.5676, "step": 15534 }, { "epoch": 0.98, "grad_norm": 1.0856949090957642, "learning_rate": 6.52519631651527e-09, "loss": 0.4669, "step": 15535 }, { "epoch": 0.98, "grad_norm": 1.1188011169433594, "learning_rate": 6.4729016331749325e-09, "loss": 0.5111, "step": 15536 }, { "epoch": 0.98, "grad_norm": 1.0615599155426025, "learning_rate": 6.420817209888963e-09, "loss": 0.4751, "step": 15537 }, { "epoch": 0.98, "grad_norm": 1.0602856874465942, "learning_rate": 6.368943048851162e-09, "loss": 0.4879, "step": 15538 }, { "epoch": 0.98, "grad_norm": 1.146788477897644, "learning_rate": 6.317279152245892e-09, "loss": 0.4714, "step": 15539 }, { "epoch": 0.98, "grad_norm": 1.1277881860733032, "learning_rate": 6.265825522248082e-09, "loss": 0.5061, "step": 15540 }, { "epoch": 0.98, "grad_norm": 1.0490295886993408, "learning_rate": 6.2145821610243296e-09, "loss": 0.5056, "step": 15541 }, { "epoch": 0.98, "grad_norm": 1.1133843660354614, "learning_rate": 6.163549070732356e-09, "loss": 0.5406, "step": 15542 }, { "epoch": 0.98, "grad_norm": 1.1436570882797241, "learning_rate": 6.1127262535209955e-09, "loss": 0.5567, "step": 15543 }, { "epoch": 0.98, "grad_norm": 1.1176104545593262, "learning_rate": 6.062113711530204e-09, "loss": 0.5033, "step": 15544 }, { "epoch": 0.98, "grad_norm": 1.0910592079162598, "learning_rate": 6.01171144689161e-09, "loss": 0.4846, "step": 15545 }, { "epoch": 0.98, "grad_norm": 1.08412766456604, "learning_rate": 5.96151946172685e-09, "loss": 0.5403, "step": 15546 }, { "epoch": 0.98, "grad_norm": 1.0198564529418945, "learning_rate": 5.911537758149233e-09, "loss": 0.4694, "step": 15547 }, { "epoch": 0.99, "grad_norm": 1.014939785003662, "learning_rate": 5.861766338263741e-09, "loss": 0.4924, "step": 15548 }, { "epoch": 0.99, "grad_norm": 1.121074914932251, "learning_rate": 5.812205204165922e-09, "loss": 0.4822, "step": 15549 }, { "epoch": 0.99, "grad_norm": 1.1740344762802124, "learning_rate": 5.762854357942993e-09, "loss": 0.538, "step": 15550 }, { "epoch": 0.99, "grad_norm": 1.0074889659881592, "learning_rate": 5.7137138016721825e-09, "loss": 0.4615, "step": 15551 }, { "epoch": 0.99, "grad_norm": 0.9784232974052429, "learning_rate": 5.6647835374229465e-09, "loss": 0.5068, "step": 15552 }, { "epoch": 0.99, "grad_norm": 1.0445855855941772, "learning_rate": 5.616063567255859e-09, "loss": 0.5426, "step": 15553 }, { "epoch": 0.99, "grad_norm": 1.1089900732040405, "learning_rate": 5.5675538932220555e-09, "loss": 0.5565, "step": 15554 }, { "epoch": 0.99, "grad_norm": 1.1083252429962158, "learning_rate": 5.519254517364347e-09, "loss": 0.462, "step": 15555 }, { "epoch": 0.99, "grad_norm": 1.0948292016983032, "learning_rate": 5.471165441716108e-09, "loss": 0.4796, "step": 15556 }, { "epoch": 0.99, "grad_norm": 1.0887798070907593, "learning_rate": 5.4232866683023856e-09, "loss": 0.5145, "step": 15557 }, { "epoch": 0.99, "grad_norm": 1.0607532262802124, "learning_rate": 5.375618199139343e-09, "loss": 0.4706, "step": 15558 }, { "epoch": 0.99, "grad_norm": 1.1024744510650635, "learning_rate": 5.328160036234264e-09, "loss": 0.5237, "step": 15559 }, { "epoch": 0.99, "grad_norm": 1.1816823482513428, "learning_rate": 5.280912181584441e-09, "loss": 0.5398, "step": 15560 }, { "epoch": 0.99, "grad_norm": 1.034546971321106, "learning_rate": 5.233874637180503e-09, "loss": 0.5295, "step": 15561 }, { "epoch": 0.99, "grad_norm": 1.04789137840271, "learning_rate": 5.1870474050025325e-09, "loss": 0.5054, "step": 15562 }, { "epoch": 0.99, "grad_norm": 1.0426819324493408, "learning_rate": 5.1404304870222856e-09, "loss": 0.4796, "step": 15563 }, { "epoch": 0.99, "grad_norm": 1.0558671951293945, "learning_rate": 5.094023885203192e-09, "loss": 0.4662, "step": 15564 }, { "epoch": 0.99, "grad_norm": 1.1009888648986816, "learning_rate": 5.0478276014981345e-09, "loss": 0.5328, "step": 15565 }, { "epoch": 0.99, "grad_norm": 1.2031971216201782, "learning_rate": 5.001841637852778e-09, "loss": 0.4895, "step": 15566 }, { "epoch": 0.99, "grad_norm": 1.0120888948440552, "learning_rate": 4.956065996203907e-09, "loss": 0.4846, "step": 15567 }, { "epoch": 0.99, "grad_norm": 1.0949715375900269, "learning_rate": 4.910500678478314e-09, "loss": 0.4925, "step": 15568 }, { "epoch": 0.99, "grad_norm": 1.112021565437317, "learning_rate": 4.865145686595019e-09, "loss": 0.5211, "step": 15569 }, { "epoch": 0.99, "grad_norm": 0.9821246862411499, "learning_rate": 4.820001022463605e-09, "loss": 0.4812, "step": 15570 }, { "epoch": 0.99, "grad_norm": 1.1005215644836426, "learning_rate": 4.77506668798533e-09, "loss": 0.5139, "step": 15571 }, { "epoch": 0.99, "grad_norm": 1.044965386390686, "learning_rate": 4.730342685051459e-09, "loss": 0.5013, "step": 15572 }, { "epoch": 0.99, "grad_norm": 1.0928858518600464, "learning_rate": 4.685829015545485e-09, "loss": 0.501, "step": 15573 }, { "epoch": 0.99, "grad_norm": 1.0987399816513062, "learning_rate": 4.641525681342019e-09, "loss": 0.5113, "step": 15574 }, { "epoch": 0.99, "grad_norm": 1.1467041969299316, "learning_rate": 4.597432684306236e-09, "loss": 0.5519, "step": 15575 }, { "epoch": 0.99, "grad_norm": 1.1859843730926514, "learning_rate": 4.553550026294984e-09, "loss": 0.5583, "step": 15576 }, { "epoch": 0.99, "grad_norm": 1.0004034042358398, "learning_rate": 4.5098777091556745e-09, "loss": 0.4807, "step": 15577 }, { "epoch": 0.99, "grad_norm": 1.0263206958770752, "learning_rate": 4.4664157347273916e-09, "loss": 0.5219, "step": 15578 }, { "epoch": 0.99, "grad_norm": 1.0322223901748657, "learning_rate": 4.423164104840339e-09, "loss": 0.5045, "step": 15579 }, { "epoch": 0.99, "grad_norm": 1.0323245525360107, "learning_rate": 4.38012282131528e-09, "loss": 0.4675, "step": 15580 }, { "epoch": 0.99, "grad_norm": 1.053807258605957, "learning_rate": 4.3372918859652115e-09, "loss": 0.5007, "step": 15581 }, { "epoch": 0.99, "grad_norm": 1.0838310718536377, "learning_rate": 4.294671300592579e-09, "loss": 0.4959, "step": 15582 }, { "epoch": 0.99, "grad_norm": 1.0312323570251465, "learning_rate": 4.252261066993169e-09, "loss": 0.4954, "step": 15583 }, { "epoch": 0.99, "grad_norm": 1.093133807182312, "learning_rate": 4.210061186951664e-09, "loss": 0.542, "step": 15584 }, { "epoch": 0.99, "grad_norm": 1.0975558757781982, "learning_rate": 4.168071662245532e-09, "loss": 0.5134, "step": 15585 }, { "epoch": 0.99, "grad_norm": 1.079088807106018, "learning_rate": 4.1262924946422476e-09, "loss": 0.4907, "step": 15586 }, { "epoch": 0.99, "grad_norm": 1.068571925163269, "learning_rate": 4.084723685901515e-09, "loss": 0.5307, "step": 15587 }, { "epoch": 0.99, "grad_norm": 1.081530213356018, "learning_rate": 4.043365237774155e-09, "loss": 0.5256, "step": 15588 }, { "epoch": 0.99, "grad_norm": 1.0684465169906616, "learning_rate": 4.002217152000443e-09, "loss": 0.4959, "step": 15589 }, { "epoch": 0.99, "grad_norm": 0.9404188990592957, "learning_rate": 3.961279430313991e-09, "loss": 0.4006, "step": 15590 }, { "epoch": 0.99, "grad_norm": 1.0813419818878174, "learning_rate": 3.920552074437867e-09, "loss": 0.4954, "step": 15591 }, { "epoch": 0.99, "grad_norm": 1.1187856197357178, "learning_rate": 3.880035086086808e-09, "loss": 0.4615, "step": 15592 }, { "epoch": 0.99, "grad_norm": 1.0242195129394531, "learning_rate": 3.839728466967785e-09, "loss": 0.4851, "step": 15593 }, { "epoch": 0.99, "grad_norm": 1.1560113430023193, "learning_rate": 3.799632218777216e-09, "loss": 0.5075, "step": 15594 }, { "epoch": 0.99, "grad_norm": 1.1023149490356445, "learning_rate": 3.759746343203751e-09, "loss": 0.4827, "step": 15595 }, { "epoch": 0.99, "grad_norm": 1.1077873706817627, "learning_rate": 3.720070841926604e-09, "loss": 0.4832, "step": 15596 }, { "epoch": 0.99, "grad_norm": 1.0226209163665771, "learning_rate": 3.6806057166166585e-09, "loss": 0.4924, "step": 15597 }, { "epoch": 0.99, "grad_norm": 1.0712997913360596, "learning_rate": 3.6413509689353644e-09, "loss": 0.5191, "step": 15598 }, { "epoch": 0.99, "grad_norm": 1.0410377979278564, "learning_rate": 3.602306600535843e-09, "loss": 0.5057, "step": 15599 }, { "epoch": 0.99, "grad_norm": 1.071582317352295, "learning_rate": 3.5634726130617802e-09, "loss": 0.5255, "step": 15600 }, { "epoch": 0.99, "grad_norm": 0.977677583694458, "learning_rate": 3.5248490081485343e-09, "loss": 0.4713, "step": 15601 }, { "epoch": 0.99, "grad_norm": 0.9692546129226685, "learning_rate": 3.486435787422582e-09, "loss": 0.475, "step": 15602 }, { "epoch": 0.99, "grad_norm": 1.0329508781433105, "learning_rate": 3.4482329525009627e-09, "loss": 0.4914, "step": 15603 }, { "epoch": 0.99, "grad_norm": 1.0116748809814453, "learning_rate": 3.4102405049929455e-09, "loss": 0.5348, "step": 15604 }, { "epoch": 0.99, "grad_norm": 1.1092138290405273, "learning_rate": 3.372458446497251e-09, "loss": 0.5007, "step": 15605 }, { "epoch": 0.99, "grad_norm": 1.1361290216445923, "learning_rate": 3.3348867786059393e-09, "loss": 0.5157, "step": 15606 }, { "epoch": 0.99, "grad_norm": 1.0798801183700562, "learning_rate": 3.2975255028999675e-09, "loss": 0.4856, "step": 15607 }, { "epoch": 0.99, "grad_norm": 1.0976042747497559, "learning_rate": 3.2603746209530774e-09, "loss": 0.4749, "step": 15608 }, { "epoch": 0.99, "grad_norm": 1.063442587852478, "learning_rate": 3.223434134329573e-09, "loss": 0.4877, "step": 15609 }, { "epoch": 0.99, "grad_norm": 1.1981234550476074, "learning_rate": 3.1867040445848764e-09, "loss": 0.5203, "step": 15610 }, { "epoch": 0.99, "grad_norm": 1.0773344039916992, "learning_rate": 3.1501843532649734e-09, "loss": 0.444, "step": 15611 }, { "epoch": 0.99, "grad_norm": 1.059180498123169, "learning_rate": 3.113875061908078e-09, "loss": 0.4834, "step": 15612 }, { "epoch": 0.99, "grad_norm": 1.0928962230682373, "learning_rate": 3.077776172043523e-09, "loss": 0.4997, "step": 15613 }, { "epoch": 0.99, "grad_norm": 1.0543581247329712, "learning_rate": 3.0418876851900924e-09, "loss": 0.4387, "step": 15614 }, { "epoch": 0.99, "grad_norm": 1.069734811782837, "learning_rate": 3.0062096028599108e-09, "loss": 0.4502, "step": 15615 }, { "epoch": 0.99, "grad_norm": 0.9861531257629395, "learning_rate": 2.9707419265551097e-09, "loss": 0.4883, "step": 15616 }, { "epoch": 0.99, "grad_norm": 1.0755091905593872, "learning_rate": 2.9354846577689387e-09, "loss": 0.531, "step": 15617 }, { "epoch": 0.99, "grad_norm": 1.0954896211624146, "learning_rate": 2.900437797986322e-09, "loss": 0.4631, "step": 15618 }, { "epoch": 0.99, "grad_norm": 1.0470771789550781, "learning_rate": 2.8656013486821897e-09, "loss": 0.5038, "step": 15619 }, { "epoch": 0.99, "grad_norm": 0.9958431124687195, "learning_rate": 2.8309753113237025e-09, "loss": 0.4625, "step": 15620 }, { "epoch": 0.99, "grad_norm": 0.9982820153236389, "learning_rate": 2.796559687369138e-09, "loss": 0.4949, "step": 15621 }, { "epoch": 0.99, "grad_norm": 0.9949272871017456, "learning_rate": 2.7623544782673372e-09, "loss": 0.4438, "step": 15622 }, { "epoch": 0.99, "grad_norm": 1.064408779144287, "learning_rate": 2.7283596854588148e-09, "loss": 0.5071, "step": 15623 }, { "epoch": 0.99, "grad_norm": 1.0921839475631714, "learning_rate": 2.6945753103746475e-09, "loss": 0.4698, "step": 15624 }, { "epoch": 0.99, "grad_norm": 1.0886198282241821, "learning_rate": 2.661001354437587e-09, "loss": 0.4907, "step": 15625 }, { "epoch": 0.99, "grad_norm": 1.0785962343215942, "learning_rate": 2.6276378190615016e-09, "loss": 0.4858, "step": 15626 }, { "epoch": 0.99, "grad_norm": 1.1296824216842651, "learning_rate": 2.5944847056508237e-09, "loss": 0.463, "step": 15627 }, { "epoch": 0.99, "grad_norm": 1.1042518615722656, "learning_rate": 2.561542015601659e-09, "loss": 0.5477, "step": 15628 }, { "epoch": 0.99, "grad_norm": 1.1353806257247925, "learning_rate": 2.528809750301231e-09, "loss": 0.4806, "step": 15629 }, { "epoch": 0.99, "grad_norm": 1.1011172533035278, "learning_rate": 2.4962879111278813e-09, "loss": 0.4638, "step": 15630 }, { "epoch": 0.99, "grad_norm": 1.0281448364257812, "learning_rate": 2.4639764994505156e-09, "loss": 0.4835, "step": 15631 }, { "epoch": 0.99, "grad_norm": 1.0197134017944336, "learning_rate": 2.4318755166302668e-09, "loss": 0.4865, "step": 15632 }, { "epoch": 0.99, "grad_norm": 1.008103609085083, "learning_rate": 2.399984964018276e-09, "loss": 0.4972, "step": 15633 }, { "epoch": 0.99, "grad_norm": 0.9819304943084717, "learning_rate": 2.3683048429573587e-09, "loss": 0.4532, "step": 15634 }, { "epoch": 0.99, "grad_norm": 1.0961347818374634, "learning_rate": 2.3368351547820023e-09, "loss": 0.5162, "step": 15635 }, { "epoch": 0.99, "grad_norm": 1.0458500385284424, "learning_rate": 2.3055759008167033e-09, "loss": 0.4518, "step": 15636 }, { "epoch": 0.99, "grad_norm": 1.0598288774490356, "learning_rate": 2.2745270823776312e-09, "loss": 0.4867, "step": 15637 }, { "epoch": 0.99, "grad_norm": 1.1259396076202393, "learning_rate": 2.243688700772628e-09, "loss": 0.538, "step": 15638 }, { "epoch": 0.99, "grad_norm": 1.0349446535110474, "learning_rate": 2.2130607573001006e-09, "loss": 0.5405, "step": 15639 }, { "epoch": 0.99, "grad_norm": 1.058007001876831, "learning_rate": 2.1826432532495724e-09, "loss": 0.4848, "step": 15640 }, { "epoch": 0.99, "grad_norm": 1.0677138566970825, "learning_rate": 2.1524361899016853e-09, "loss": 0.5082, "step": 15641 }, { "epoch": 0.99, "grad_norm": 0.9990156888961792, "learning_rate": 2.1224395685282008e-09, "loss": 0.5306, "step": 15642 }, { "epoch": 0.99, "grad_norm": 0.9851405024528503, "learning_rate": 2.0926533903925516e-09, "loss": 0.4652, "step": 15643 }, { "epoch": 0.99, "grad_norm": 1.1061551570892334, "learning_rate": 2.0630776567492904e-09, "loss": 0.5053, "step": 15644 }, { "epoch": 0.99, "grad_norm": 1.0540603399276733, "learning_rate": 2.033712368842977e-09, "loss": 0.5004, "step": 15645 }, { "epoch": 0.99, "grad_norm": 1.0345008373260498, "learning_rate": 2.004557527909845e-09, "loss": 0.4679, "step": 15646 }, { "epoch": 0.99, "grad_norm": 1.0666303634643555, "learning_rate": 1.975613135178911e-09, "loss": 0.4971, "step": 15647 }, { "epoch": 0.99, "grad_norm": 1.1802502870559692, "learning_rate": 1.9468791918675345e-09, "loss": 0.4951, "step": 15648 }, { "epoch": 0.99, "grad_norm": 0.9935306906700134, "learning_rate": 1.918355699186414e-09, "loss": 0.5036, "step": 15649 }, { "epoch": 0.99, "grad_norm": 1.019480586051941, "learning_rate": 1.8900426583357003e-09, "loss": 0.4298, "step": 15650 }, { "epoch": 0.99, "grad_norm": 0.9573292136192322, "learning_rate": 1.861940070508883e-09, "loss": 0.4635, "step": 15651 }, { "epoch": 0.99, "grad_norm": 1.069796085357666, "learning_rate": 1.8340479368883502e-09, "loss": 0.5041, "step": 15652 }, { "epoch": 0.99, "grad_norm": 1.0155587196350098, "learning_rate": 1.8063662586481622e-09, "loss": 0.4627, "step": 15653 }, { "epoch": 0.99, "grad_norm": 0.9943329095840454, "learning_rate": 1.7788950369551638e-09, "loss": 0.4771, "step": 15654 }, { "epoch": 0.99, "grad_norm": 1.0415226221084595, "learning_rate": 1.751634272964542e-09, "loss": 0.471, "step": 15655 }, { "epoch": 0.99, "grad_norm": 1.1297926902770996, "learning_rate": 1.7245839678259323e-09, "loss": 0.4791, "step": 15656 }, { "epoch": 0.99, "grad_norm": 1.1199589967727661, "learning_rate": 1.6977441226767589e-09, "loss": 0.5399, "step": 15657 }, { "epoch": 0.99, "grad_norm": 0.9969205856323242, "learning_rate": 1.6711147386477833e-09, "loss": 0.4313, "step": 15658 }, { "epoch": 0.99, "grad_norm": 1.0321614742279053, "learning_rate": 1.644695816860331e-09, "loss": 0.4638, "step": 15659 }, { "epoch": 0.99, "grad_norm": 1.067716360092163, "learning_rate": 1.6184873584268457e-09, "loss": 0.5376, "step": 15660 }, { "epoch": 0.99, "grad_norm": 1.0950990915298462, "learning_rate": 1.5924893644503336e-09, "loss": 0.5583, "step": 15661 }, { "epoch": 0.99, "grad_norm": 1.1522343158721924, "learning_rate": 1.566701836026585e-09, "loss": 0.5207, "step": 15662 }, { "epoch": 0.99, "grad_norm": 1.0168970823287964, "learning_rate": 1.541124774240288e-09, "loss": 0.5075, "step": 15663 }, { "epoch": 0.99, "grad_norm": 1.0221366882324219, "learning_rate": 1.5157581801689137e-09, "loss": 0.4748, "step": 15664 }, { "epoch": 0.99, "grad_norm": 1.0731167793273926, "learning_rate": 1.4906020548804968e-09, "loss": 0.503, "step": 15665 }, { "epoch": 0.99, "grad_norm": 1.0286649465560913, "learning_rate": 1.4656563994341898e-09, "loss": 0.47, "step": 15666 }, { "epoch": 0.99, "grad_norm": 1.1274816989898682, "learning_rate": 1.4409212148802643e-09, "loss": 0.5381, "step": 15667 }, { "epoch": 0.99, "grad_norm": 1.0476179122924805, "learning_rate": 1.416396502260664e-09, "loss": 0.4834, "step": 15668 }, { "epoch": 0.99, "grad_norm": 1.0860023498535156, "learning_rate": 1.3920822626078967e-09, "loss": 0.4954, "step": 15669 }, { "epoch": 0.99, "grad_norm": 1.0240345001220703, "learning_rate": 1.3679784969461429e-09, "loss": 0.528, "step": 15670 }, { "epoch": 0.99, "grad_norm": 1.041835069656372, "learning_rate": 1.3440852062890364e-09, "loss": 0.5012, "step": 15671 }, { "epoch": 0.99, "grad_norm": 0.9991360902786255, "learning_rate": 1.3204023916435494e-09, "loss": 0.4802, "step": 15672 }, { "epoch": 0.99, "grad_norm": 1.0960899591445923, "learning_rate": 1.2969300540072171e-09, "loss": 0.4822, "step": 15673 }, { "epoch": 0.99, "grad_norm": 1.0605608224868774, "learning_rate": 1.2736681943675833e-09, "loss": 0.4904, "step": 15674 }, { "epoch": 0.99, "grad_norm": 1.0661795139312744, "learning_rate": 1.2506168137049747e-09, "loss": 0.5213, "step": 15675 }, { "epoch": 0.99, "grad_norm": 0.9501168131828308, "learning_rate": 1.2277759129886158e-09, "loss": 0.4456, "step": 15676 }, { "epoch": 0.99, "grad_norm": 1.0185068845748901, "learning_rate": 1.2051454931816254e-09, "loss": 0.4724, "step": 15677 }, { "epoch": 0.99, "grad_norm": 1.1506839990615845, "learning_rate": 1.1827255552365745e-09, "loss": 0.4651, "step": 15678 }, { "epoch": 0.99, "grad_norm": 1.0617283582687378, "learning_rate": 1.1605161000971532e-09, "loss": 0.5208, "step": 15679 }, { "epoch": 0.99, "grad_norm": 1.061086893081665, "learning_rate": 1.1385171286992791e-09, "loss": 0.5032, "step": 15680 }, { "epoch": 0.99, "grad_norm": 1.1165785789489746, "learning_rate": 1.116728641967768e-09, "loss": 0.5692, "step": 15681 }, { "epoch": 0.99, "grad_norm": 1.0701615810394287, "learning_rate": 1.0951506408213298e-09, "loss": 0.507, "step": 15682 }, { "epoch": 0.99, "grad_norm": 1.0252995491027832, "learning_rate": 1.0737831261686815e-09, "loss": 0.5245, "step": 15683 }, { "epoch": 0.99, "grad_norm": 1.0762993097305298, "learning_rate": 1.052626098907994e-09, "loss": 0.4707, "step": 15684 }, { "epoch": 0.99, "grad_norm": 1.0970128774642944, "learning_rate": 1.0316795599318862e-09, "loss": 0.4847, "step": 15685 }, { "epoch": 0.99, "grad_norm": 1.0381357669830322, "learning_rate": 1.0109435101218757e-09, "loss": 0.4986, "step": 15686 }, { "epoch": 0.99, "grad_norm": 1.1097840070724487, "learning_rate": 9.90417950350042e-10, "loss": 0.4853, "step": 15687 }, { "epoch": 0.99, "grad_norm": 1.0169131755828857, "learning_rate": 9.701028814818047e-10, "loss": 0.5007, "step": 15688 }, { "epoch": 0.99, "grad_norm": 1.1126348972320557, "learning_rate": 9.499983043720351e-10, "loss": 0.5161, "step": 15689 }, { "epoch": 0.99, "grad_norm": 1.0900895595550537, "learning_rate": 9.301042198678334e-10, "loss": 0.5049, "step": 15690 }, { "epoch": 0.99, "grad_norm": 1.1760368347167969, "learning_rate": 9.104206288057527e-10, "loss": 0.4983, "step": 15691 }, { "epoch": 0.99, "grad_norm": 1.0283381938934326, "learning_rate": 8.909475320156846e-10, "loss": 0.4404, "step": 15692 }, { "epoch": 0.99, "grad_norm": 1.0624741315841675, "learning_rate": 8.71684930317529e-10, "loss": 0.4906, "step": 15693 }, { "epoch": 0.99, "grad_norm": 1.0771564245224, "learning_rate": 8.526328245217485e-10, "loss": 0.4918, "step": 15694 }, { "epoch": 0.99, "grad_norm": 1.0345393419265747, "learning_rate": 8.337912154304795e-10, "loss": 0.5149, "step": 15695 }, { "epoch": 0.99, "grad_norm": 1.0324221849441528, "learning_rate": 8.151601038375312e-10, "loss": 0.4918, "step": 15696 }, { "epoch": 0.99, "grad_norm": 1.0914480686187744, "learning_rate": 7.967394905278314e-10, "loss": 0.4865, "step": 15697 }, { "epoch": 0.99, "grad_norm": 0.9753479957580566, "learning_rate": 7.785293762757607e-10, "loss": 0.4864, "step": 15698 }, { "epoch": 0.99, "grad_norm": 1.0722107887268066, "learning_rate": 7.605297618495932e-10, "loss": 0.5488, "step": 15699 }, { "epoch": 0.99, "grad_norm": 1.088541030883789, "learning_rate": 7.427406480059463e-10, "loss": 0.5284, "step": 15700 }, { "epoch": 0.99, "grad_norm": 0.9980146288871765, "learning_rate": 7.251620354942201e-10, "loss": 0.4872, "step": 15701 }, { "epoch": 0.99, "grad_norm": 1.0236068964004517, "learning_rate": 7.077939250549337e-10, "loss": 0.4758, "step": 15702 }, { "epoch": 0.99, "grad_norm": 1.0211738348007202, "learning_rate": 6.906363174191688e-10, "loss": 0.4903, "step": 15703 }, { "epoch": 0.99, "grad_norm": 0.9885705709457397, "learning_rate": 6.736892133091255e-10, "loss": 0.432, "step": 15704 }, { "epoch": 0.99, "grad_norm": 0.9520770311355591, "learning_rate": 6.569526134392324e-10, "loss": 0.4016, "step": 15705 }, { "epoch": 1.0, "grad_norm": 1.0254629850387573, "learning_rate": 6.404265185128155e-10, "loss": 0.5599, "step": 15706 }, { "epoch": 1.0, "grad_norm": 1.090428352355957, "learning_rate": 6.241109292270953e-10, "loss": 0.4845, "step": 15707 }, { "epoch": 1.0, "grad_norm": 1.1226465702056885, "learning_rate": 6.080058462687444e-10, "loss": 0.5108, "step": 15708 }, { "epoch": 1.0, "grad_norm": 1.0290261507034302, "learning_rate": 5.92111270314999e-10, "loss": 0.4717, "step": 15709 }, { "epoch": 1.0, "grad_norm": 1.0893996953964233, "learning_rate": 5.764272020358785e-10, "loss": 0.5063, "step": 15710 }, { "epoch": 1.0, "grad_norm": 1.1903843879699707, "learning_rate": 5.609536420919659e-10, "loss": 0.5099, "step": 15711 }, { "epoch": 1.0, "grad_norm": 1.0996195077896118, "learning_rate": 5.456905911344068e-10, "loss": 0.4883, "step": 15712 }, { "epoch": 1.0, "grad_norm": 1.1661995649337769, "learning_rate": 5.306380498060204e-10, "loss": 0.5083, "step": 15713 }, { "epoch": 1.0, "grad_norm": 1.0439397096633911, "learning_rate": 5.15796018740744e-10, "loss": 0.5051, "step": 15714 }, { "epoch": 1.0, "grad_norm": 1.0537337064743042, "learning_rate": 5.011644985630781e-10, "loss": 0.4833, "step": 15715 }, { "epoch": 1.0, "grad_norm": 1.0489590167999268, "learning_rate": 4.867434898891965e-10, "loss": 0.5071, "step": 15716 }, { "epoch": 1.0, "grad_norm": 1.0084718465805054, "learning_rate": 4.725329933269463e-10, "loss": 0.5157, "step": 15717 }, { "epoch": 1.0, "grad_norm": 1.024583101272583, "learning_rate": 4.5853300947418247e-10, "loss": 0.4831, "step": 15718 }, { "epoch": 1.0, "grad_norm": 1.0930402278900146, "learning_rate": 4.4474353892043356e-10, "loss": 0.5319, "step": 15719 }, { "epoch": 1.0, "grad_norm": 0.9511051774024963, "learning_rate": 4.311645822463462e-10, "loss": 0.4464, "step": 15720 }, { "epoch": 1.0, "grad_norm": 1.1021090745925903, "learning_rate": 4.177961400236852e-10, "loss": 0.5023, "step": 15721 }, { "epoch": 1.0, "grad_norm": 1.030034065246582, "learning_rate": 4.046382128147786e-10, "loss": 0.4818, "step": 15722 }, { "epoch": 1.0, "grad_norm": 1.0933709144592285, "learning_rate": 3.916908011747378e-10, "loss": 0.5577, "step": 15723 }, { "epoch": 1.0, "grad_norm": 1.0729312896728516, "learning_rate": 3.7895390564868237e-10, "loss": 0.513, "step": 15724 }, { "epoch": 1.0, "grad_norm": 1.008466362953186, "learning_rate": 3.664275267717399e-10, "loss": 0.4971, "step": 15725 }, { "epoch": 1.0, "grad_norm": 1.0403852462768555, "learning_rate": 3.541116650723764e-10, "loss": 0.4527, "step": 15726 }, { "epoch": 1.0, "grad_norm": 0.9811885952949524, "learning_rate": 3.4200632106906605e-10, "loss": 0.4783, "step": 15727 }, { "epoch": 1.0, "grad_norm": 0.9582056999206543, "learning_rate": 3.301114952708462e-10, "loss": 0.4753, "step": 15728 }, { "epoch": 1.0, "grad_norm": 1.1376382112503052, "learning_rate": 3.1842718817953755e-10, "loss": 0.537, "step": 15729 }, { "epoch": 1.0, "grad_norm": 1.1202480792999268, "learning_rate": 3.0695340028641383e-10, "loss": 0.568, "step": 15730 }, { "epoch": 1.0, "grad_norm": 1.0908098220825195, "learning_rate": 2.956901320744221e-10, "loss": 0.5215, "step": 15731 }, { "epoch": 1.0, "grad_norm": 1.0361547470092773, "learning_rate": 2.8463738401873776e-10, "loss": 0.5091, "step": 15732 }, { "epoch": 1.0, "grad_norm": 1.0398439168930054, "learning_rate": 2.7379515658398915e-10, "loss": 0.4834, "step": 15733 }, { "epoch": 1.0, "grad_norm": 1.0584646463394165, "learning_rate": 2.6316345022703307e-10, "loss": 0.484, "step": 15734 }, { "epoch": 1.0, "grad_norm": 1.104377269744873, "learning_rate": 2.5274226539584445e-10, "loss": 0.4811, "step": 15735 }, { "epoch": 1.0, "grad_norm": 1.004241704940796, "learning_rate": 2.4253160252840636e-10, "loss": 0.4199, "step": 15736 }, { "epoch": 1.0, "grad_norm": 1.0890564918518066, "learning_rate": 2.3253146205493014e-10, "loss": 0.4789, "step": 15737 }, { "epoch": 1.0, "grad_norm": 1.0498672723770142, "learning_rate": 2.227418443967455e-10, "loss": 0.5008, "step": 15738 }, { "epoch": 1.0, "grad_norm": 1.0728520154953003, "learning_rate": 2.1316274996630026e-10, "loss": 0.5534, "step": 15739 }, { "epoch": 1.0, "grad_norm": 0.9895668029785156, "learning_rate": 2.0379417916605027e-10, "loss": 0.4431, "step": 15740 }, { "epoch": 1.0, "grad_norm": 1.1057844161987305, "learning_rate": 1.946361323912349e-10, "loss": 0.4936, "step": 15741 }, { "epoch": 1.0, "grad_norm": 1.0985236167907715, "learning_rate": 1.856886100276567e-10, "loss": 0.5129, "step": 15742 }, { "epoch": 1.0, "grad_norm": 1.0444600582122803, "learning_rate": 1.7695161245112613e-10, "loss": 0.4916, "step": 15743 }, { "epoch": 1.0, "grad_norm": 1.1546952724456787, "learning_rate": 1.6842514003023724e-10, "loss": 0.5168, "step": 15744 }, { "epoch": 1.0, "grad_norm": 1.0188254117965698, "learning_rate": 1.6010919312359208e-10, "loss": 0.467, "step": 15745 }, { "epoch": 1.0, "grad_norm": 1.0582890510559082, "learning_rate": 1.520037720820211e-10, "loss": 0.4842, "step": 15746 }, { "epoch": 1.0, "grad_norm": 1.0392037630081177, "learning_rate": 1.4410887724580768e-10, "loss": 0.4333, "step": 15747 }, { "epoch": 1.0, "grad_norm": 1.0552963018417358, "learning_rate": 1.3642450894801872e-10, "loss": 0.4416, "step": 15748 }, { "epoch": 1.0, "grad_norm": 1.0796518325805664, "learning_rate": 1.2895066751283935e-10, "loss": 0.4768, "step": 15749 }, { "epoch": 1.0, "grad_norm": 1.0383802652359009, "learning_rate": 1.2168735325335246e-10, "loss": 0.511, "step": 15750 }, { "epoch": 1.0, "grad_norm": 1.0444761514663696, "learning_rate": 1.1463456647708982e-10, "loss": 0.5094, "step": 15751 }, { "epoch": 1.0, "grad_norm": 1.060904622077942, "learning_rate": 1.0779230747992587e-10, "loss": 0.5082, "step": 15752 }, { "epoch": 1.0, "grad_norm": 1.1346057653427124, "learning_rate": 1.0116057654996348e-10, "loss": 0.5678, "step": 15753 }, { "epoch": 1.0, "grad_norm": 1.065916657447815, "learning_rate": 9.473937396697885e-11, "loss": 0.4356, "step": 15754 }, { "epoch": 1.0, "grad_norm": 1.0521526336669922, "learning_rate": 8.85287000013113e-11, "loss": 0.496, "step": 15755 }, { "epoch": 1.0, "grad_norm": 1.0416265726089478, "learning_rate": 8.252855491386325e-11, "loss": 0.4501, "step": 15756 }, { "epoch": 1.0, "grad_norm": 1.0565155744552612, "learning_rate": 7.673893895776551e-11, "loss": 0.4994, "step": 15757 }, { "epoch": 1.0, "grad_norm": 1.0739134550094604, "learning_rate": 7.115985237726719e-11, "loss": 0.5306, "step": 15758 }, { "epoch": 1.0, "grad_norm": 1.0463873147964478, "learning_rate": 6.57912954060702e-11, "loss": 0.5017, "step": 15759 }, { "epoch": 1.0, "grad_norm": 1.0840357542037964, "learning_rate": 6.063326827121518e-11, "loss": 0.4765, "step": 15760 }, { "epoch": 1.0, "grad_norm": 1.083588719367981, "learning_rate": 5.5685771189750714e-11, "loss": 0.4727, "step": 15761 }, { "epoch": 1.0, "grad_norm": 1.0757077932357788, "learning_rate": 5.0948804369843616e-11, "loss": 0.4973, "step": 15762 }, { "epoch": 1.0, "grad_norm": 1.004027247428894, "learning_rate": 4.642236801022382e-11, "loss": 0.5167, "step": 15763 }, { "epoch": 1.0, "grad_norm": 1.1284191608428955, "learning_rate": 4.210646230295989e-11, "loss": 0.4831, "step": 15764 }, { "epoch": 1.0, "grad_norm": 1.0468326807022095, "learning_rate": 3.800108742846309e-11, "loss": 0.508, "step": 15765 }, { "epoch": 1.0, "grad_norm": 1.1716840267181396, "learning_rate": 3.410624356048331e-11, "loss": 0.5461, "step": 15766 }, { "epoch": 1.0, "grad_norm": 1.0520511865615845, "learning_rate": 3.0421930862778446e-11, "loss": 0.5136, "step": 15767 }, { "epoch": 1.0, "grad_norm": 1.0537054538726807, "learning_rate": 2.6948149490224618e-11, "loss": 0.5008, "step": 15768 }, { "epoch": 1.0, "grad_norm": 1.0120649337768555, "learning_rate": 2.3684899589371256e-11, "loss": 0.4472, "step": 15769 }, { "epoch": 1.0, "grad_norm": 1.0789854526519775, "learning_rate": 2.063218129733091e-11, "loss": 0.5109, "step": 15770 }, { "epoch": 1.0, "grad_norm": 1.1495561599731445, "learning_rate": 1.7789994742889448e-11, "loss": 0.5457, "step": 15771 }, { "epoch": 1.0, "grad_norm": 1.0533004999160767, "learning_rate": 1.5158340045395847e-11, "loss": 0.5097, "step": 15772 }, { "epoch": 1.0, "grad_norm": 1.058885931968689, "learning_rate": 1.2737217315872407e-11, "loss": 0.4954, "step": 15773 }, { "epoch": 1.0, "grad_norm": 1.064213514328003, "learning_rate": 1.0526626656459648e-11, "loss": 0.4911, "step": 15774 }, { "epoch": 1.0, "grad_norm": 1.0072990655899048, "learning_rate": 8.526568160416304e-12, "loss": 0.521, "step": 15775 }, { "epoch": 1.0, "grad_norm": 1.1676479578018188, "learning_rate": 6.737041911564213e-12, "loss": 0.5327, "step": 15776 }, { "epoch": 1.0, "grad_norm": 1.1192338466644287, "learning_rate": 5.158047984843428e-12, "loss": 0.4802, "step": 15777 }, { "epoch": 1.0, "grad_norm": 1.1080821752548218, "learning_rate": 3.789586447422444e-12, "loss": 0.472, "step": 15778 }, { "epoch": 1.0, "grad_norm": 1.0763338804244995, "learning_rate": 2.631657356477746e-12, "loss": 0.5215, "step": 15779 }, { "epoch": 1.0, "grad_norm": 1.0144696235656738, "learning_rate": 1.6842607614142582e-12, "loss": 0.4771, "step": 15780 }, { "epoch": 1.0, "grad_norm": 1.0714669227600098, "learning_rate": 9.473967016448982e-13, "loss": 0.5592, "step": 15781 }, { "epoch": 1.0, "grad_norm": 1.1242960691452026, "learning_rate": 4.210652082559108e-13, "loss": 0.4959, "step": 15782 }, { "epoch": 1.0, "grad_norm": 1.0352383852005005, "learning_rate": 1.0526630289664496e-13, "loss": 0.4915, "step": 15783 }, { "epoch": 1.0, "grad_norm": 1.006693720817566, "learning_rate": 0.0, "loss": 0.4885, "step": 15784 }, { "epoch": 1.0, "step": 15784, "total_flos": 3.799196684334504e+19, "train_loss": 0.5349509626951727, "train_runtime": 163223.245, "train_samples_per_second": 24.756, "train_steps_per_second": 0.097 } ], "logging_steps": 1.0, "max_steps": 15784, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 8000, "total_flos": 3.799196684334504e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }